diff options
Diffstat (limited to 'search_code.py')
-rw-r--r-- | search_code.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/search_code.py b/search_code.py new file mode 100644 index 0000000..9d9ecee --- /dev/null +++ b/search_code.py @@ -0,0 +1,66 @@ +import ast +from collections import defaultdict +import os +import pandas as pd +import openai +import tiktoken +from openai.embeddings_utils import get_embedding, cosine_similarity + +openai.api_key = os.getenv('END_OF_WORLD') + +#def clean(): +# df[col1] = df[col1].apply(lambda x: literal_eval(x) if "[" in x else x) + +df=pd.read_csv("setup_dataWithSummaryEmbed.csv", converters={'embedding_summary': pd.eval}) + +#def phony(x): +# print(type(x)) +# print(x) +# exit() + +def search_code(df, query, n=3, pprint=True): + query_embedding = get_embedding( + query, + engine="text-embedding-ada-002" + ) + + #print(type(query_embedding)) + #print(query_embedding) + + #df["similarity"] = df.embedding_summary.apply(lambda x: phony(x)) + df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding)) + + results = ( + df.sort_values("similarity", ascending=False) + + ) + return results + +def generate_answer(question): + results = search_code(df, question, n=3) + prompt = '' + for i in range(3): + prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n" + #prompt += "\n" + "Q: " + question + "\nA: " + + prompt += "\n" + "Answer the following question using the code context\ + given above, and show an example with 'Example'\nQ: " + question + "\nA: " + + print("PROMPT:") + print(prompt) + + response = openai.Completion.create( + model="text-davinci-003", + prompt=prompt, + temperature=0.7, + max_tokens=1000, + top_p=1.0, + frequency_penalty=0.0, + presence_penalty=0.0, + stop=["\"\"\""] + ) + return response["choices"][0]["text"] + +question = "how does the code in setup.py parse Python source code using the ast library?" +ans = generate_answer(question) +print(ans) |