search_code.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

import ast
from collections import defaultdict
import os
import pandas as pd
import openai 
import tiktoken
from openai.embeddings_utils import get_embedding, cosine_similarity

openai.api_key = os.getenv('END_OF_WORLD')

#def clean():
#    df[col1] = df[col1].apply(lambda x: literal_eval(x) if "[" in x else x)

df=pd.read_csv("setup_dataWithSummaryEmbed.csv", converters={'embedding_summary': pd.eval})

#def phony(x):
#    print(type(x))
#    print(x)
#    exit()

def search_code(df, query, n=3, pprint=True):
    query_embedding = get_embedding(
        query,
        engine="text-embedding-ada-002"
    )

    #print(type(query_embedding))
    #print(query_embedding)

    #df["similarity"] = df.embedding_summary.apply(lambda x: phony(x))
    df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))

    results = (
        df.sort_values("similarity", ascending=False)
        
    )
    return results

def generate_answer(question):
  results = search_code(df, question, n=3)
  prompt = ''
  for i in range(3):
    prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
   #prompt += "\n" + "Q: " + question + "\nA: "

  prompt += "\n" + "Answer the following question using the code context\
  given above, and show an example with 'Example'\nQ: " + question + "\nA: "

  print("PROMPT:")
  print(prompt)

  response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0.7,
    max_tokens=1000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=["\"\"\""]
  )
  return response["choices"][0]["text"]

question = "how does the code in setup.py parse Python source code using the ast library?"
ans = generate_answer(question)
print(ans)