1 files changed, 66 insertions, 0 deletions
diff --git a/search_code.py b/search_code.py
new file mode 100644
index 0000000..9d9ecee
--- /dev/null
+++ b/search_code.py
@@ -0,0 +1,66 @@
+import ast
+from collections import defaultdict
+import os
+import pandas as pd
+import openai 
+import tiktoken
+from openai.embeddings_utils import get_embedding, cosine_similarity
+
+openai.api_key = os.getenv('END_OF_WORLD')
+
+#def clean():
+#    df[col1] = df[col1].apply(lambda x: literal_eval(x) if "[" in x else x)
+
+df=pd.read_csv("setup_dataWithSummaryEmbed.csv", converters={'embedding_summary': pd.eval})
+
+#def phony(x):
+#    print(type(x))
+#    print(x)
+#    exit()
+
+def search_code(df, query, n=3, pprint=True):
+    query_embedding = get_embedding(
+        query,
+        engine="text-embedding-ada-002"
+    )
+
+    #print(type(query_embedding))
+    #print(query_embedding)
+
+    #df["similarity"] = df.embedding_summary.apply(lambda x: phony(x))
+    df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
+
+    results = (
+        df.sort_values("similarity", ascending=False)
+        
+    )
+    return results
+
+def generate_answer(question):
+  results = search_code(df, question, n=3)
+  prompt = ''
+  for i in range(3):
+    prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
+   #prompt += "\n" + "Q: " + question + "\nA: "
+
+  prompt += "\n" + "Answer the following question using the code context\
+  given above, and show an example with 'Example'\nQ: " + question + "\nA: "
+
+  print("PROMPT:")
+  print(prompt)
+
+  response = openai.Completion.create(
+    model="text-davinci-003",
+    prompt=prompt,
+    temperature=0.7,
+    max_tokens=1000,
+    top_p=1.0,
+    frequency_penalty=0.0,
+    presence_penalty=0.0,
+    stop=["\"\"\""]
+  )
+  return response["choices"][0]["text"]
+
+question = "how does the code in setup.py parse Python source code using the ast library?"
+ans = generate_answer(question)
+print(ans)