1 files changed, 135 insertions, 0 deletions
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..05e726f
--- /dev/null
+++ b/server.py
@@ -0,0 +1,135 @@
+from flask import Flask, request, Response, jsonify, render_template
+import pandas as pd
+from collections import defaultdict
+import os
+import openai
+from openai.embeddings_utils import get_embedding, cosine_similarity
+openai.api_key = os.getenv('OPENAI_KEY')
+import numpy as np
+
+app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")
+
+
+
+def search_code(df, query, n=4):
+    query_embedding = get_embedding(
+        query,
+        engine="text-embedding-ada-002"
+    )
+    #df["similarity"] = df.summary_embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
+    df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
+    # df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
+    results = (
+        df.sort_values("similarity", ascending=False)
+    )
+    return results.head(n)
+
+
+def generate_answer(question):
+  results = search_code(df, question, n=4)
+  prompt = ''
+  for i in range(3):
+    prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
+  prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
+  response = openai.Completion.create(
+    model="text-davinci-003",
+    # model="code-davinci-002",
+    prompt=prompt,
+    temperature=0.7,
+    max_tokens=1000,
+    top_p=1.0,
+    frequency_penalty=0.0,
+    presence_penalty=0.0,
+    stop=["\"\"\""]
+  )
+  return response["choices"][0]["text"]
+
+# The above two functions should already work with the Go version.
+
+def get_code_structure(df):
+    kids_structure = defaultdict(list)
+    parents_structure = {}
+    for path in list(df['file_path'].unique()):
+        t = path.split("/")
+        for e in range(len(t)):
+            # if we know it's a dir name, record the next path component as a
+            # child of the current dir.
+            if e < len(t) - 1:
+              kids_structure[t[e]].append(t[e+1])
+            else:
+              kids_structure[t[e]] = ["file"]
+            if e == 0:
+              parents_structure[t[e]] = "./"
+            else:
+              parents_structure[t[e]] = "/".join(t[:e])
+
+    for k, v in kids_structure.items():
+        kids_structure[k] = list(set(v))
+    return dict(kids_structure), parents_structure
+
+
+# Nate: these are from the original project. My own csv is below
+#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
+#df = pd.read_csv("./frontend/data/withsummary2.csv")
+
+# My line
+df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
+
+# need to do funny stuff to read in the data frame correctly from csv. that's
+# why the eval() is below. and pd.eval is above.
+
+#df['summary_embeddings'] = df['summary_embeddings'].apply(lambda x: eval(x))
+#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))
+
+filetypes = ['go']
+
+
+# messed this area up for debugging
+@app.route('/')
+def home():
+  stub = request.args.get('path', 'dirserver').strip()
+  kids_structure, parents_structure = get_code_structure(df)
+  print('kids_structure', kids_structure)
+  print('parents_structure', parents_structure)
+  print('stub', stub)
+  if stub not in kids_structure:
+    loctype = "nan"
+    text = [["Path not available!"], [""]]
+  elif any([stub.endswith(x) for x in filetypes]):
+    loctype = "file"
+    fullpath = f"{parents_structure[stub]}/{stub}"
+    print(fullpath)
+    text = [[x, y] for x, y in zip(
+      list(df[df['file_path'] == fullpath]['source']),
+      list(df[df['file_path'] == fullpath]['summary'])
+    )]
+  else:
+    loctype = "folder"
+    text = [[x, ""] for x in kids_structure[stub]]
+  res = {
+    'parents': parents_structure[stub],
+    'loctype': loctype,
+    'text': text,
+    'current': stub
+  }
+  return render_template('index.html', payload=res)
+
+
+@app.route('/answer')
+def answer():
+  q = request.args.get('q', '').strip()
+  a = search_code(df, q)
+  res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]
+
+  return jsonify(res)
+
+@app.route('/explain')
+def explain():
+  q = request.args.get('q', '').strip()
+  a = generate_answer(q)
+  return jsonify(a)
+
+
+if __name__ == '__main__':
+    app.run(port=5001, debug=True)
+    #app.run(host="0.0.0.0", port=5001, debug=True)