summaryrefslogtreecommitdiff
path: root/search_code.py
diff options
context:
space:
mode:
authorNate Buttke <nate-web@riseup.net>2023-07-25 22:40:00 -0700
committerNate Buttke <nate-web@riseup.net>2023-07-25 22:40:00 -0700
commit0985b7f2d467ecbeba0c6ca51ba03236cd4ff929 (patch)
tree5c24e8f12cd4416c69c5a37c365af34a1119a47f /search_code.py
hi dan
Diffstat (limited to 'search_code.py')
-rw-r--r--search_code.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/search_code.py b/search_code.py
new file mode 100644
index 0000000..9d9ecee
--- /dev/null
+++ b/search_code.py
@@ -0,0 +1,66 @@
+import ast
+from collections import defaultdict
+import os
+import pandas as pd
+import openai
+import tiktoken
+from openai.embeddings_utils import get_embedding, cosine_similarity
+
+openai.api_key = os.getenv('END_OF_WORLD')
+
+#def clean():
+# df[col1] = df[col1].apply(lambda x: literal_eval(x) if "[" in x else x)
+
+df=pd.read_csv("setup_dataWithSummaryEmbed.csv", converters={'embedding_summary': pd.eval})
+
+#def phony(x):
+# print(type(x))
+# print(x)
+# exit()
+
+def search_code(df, query, n=3, pprint=True):
+ query_embedding = get_embedding(
+ query,
+ engine="text-embedding-ada-002"
+ )
+
+ #print(type(query_embedding))
+ #print(query_embedding)
+
+ #df["similarity"] = df.embedding_summary.apply(lambda x: phony(x))
+ df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
+
+ results = (
+ df.sort_values("similarity", ascending=False)
+
+ )
+ return results
+
+def generate_answer(question):
+ results = search_code(df, question, n=3)
+ prompt = ''
+ for i in range(3):
+ prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
+ #prompt += "\n" + "Q: " + question + "\nA: "
+
+ prompt += "\n" + "Answer the following question using the code context\
+ given above, and show an example with 'Example'\nQ: " + question + "\nA: "
+
+ print("PROMPT:")
+ print(prompt)
+
+ response = openai.Completion.create(
+ model="text-davinci-003",
+ prompt=prompt,
+ temperature=0.7,
+ max_tokens=1000,
+ top_p=1.0,
+ frequency_penalty=0.0,
+ presence_penalty=0.0,
+ stop=["\"\"\""]
+ )
+ return response["choices"][0]["text"]
+
+question = "how does the code in setup.py parse Python source code using the ast library?"
+ans = generate_answer(question)
+print(ans)