summaryrefslogtreecommitdiff
path: root/server.py
diff options
context:
space:
mode:
Diffstat (limited to 'server.py')
-rw-r--r--server.py135
1 files changed, 135 insertions, 0 deletions
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..05e726f
--- /dev/null
+++ b/server.py
@@ -0,0 +1,135 @@
+from flask import Flask, request, Response, jsonify, render_template
+import pandas as pd
+from collections import defaultdict
+import os
+import openai
+from openai.embeddings_utils import get_embedding, cosine_similarity
+openai.api_key = os.getenv('OPENAI_KEY')
+import numpy as np
+
+app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")
+
+
+
+def search_code(df, query, n=4):
+ query_embedding = get_embedding(
+ query,
+ engine="text-embedding-ada-002"
+ )
+ #df["similarity"] = df.summary_embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
+ df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
+ # df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
+ results = (
+ df.sort_values("similarity", ascending=False)
+ )
+ return results.head(n)
+
+
+def generate_answer(question):
+ results = search_code(df, question, n=4)
+ prompt = ''
+ for i in range(3):
+ prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
+ prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
+ response = openai.Completion.create(
+ model="text-davinci-003",
+ # model="code-davinci-002",
+ prompt=prompt,
+ temperature=0.7,
+ max_tokens=1000,
+ top_p=1.0,
+ frequency_penalty=0.0,
+ presence_penalty=0.0,
+ stop=["\"\"\""]
+ )
+ return response["choices"][0]["text"]
+
+# The above two functions should already work with the Go version.
+
+def get_code_structure(df):
+ kids_structure = defaultdict(list)
+ parents_structure = {}
+ for path in list(df['file_path'].unique()):
+ t = path.split("/")
+ for e in range(len(t)):
+ # if we know it's a dir name, record the next path component as a
+ # child of the current dir.
+ if e < len(t) - 1:
+ kids_structure[t[e]].append(t[e+1])
+ else:
+ kids_structure[t[e]] = ["file"]
+ if e == 0:
+ parents_structure[t[e]] = "./"
+ else:
+ parents_structure[t[e]] = "/".join(t[:e])
+
+ for k, v in kids_structure.items():
+ kids_structure[k] = list(set(v))
+ return dict(kids_structure), parents_structure
+
+
+# Nate: these are from the original project. My own csv is below
+#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
+#df = pd.read_csv("./frontend/data/withsummary2.csv")
+
+# My line
+df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
+
+# need to do funny stuff to read in the data frame correctly from csv. that's
+# why the eval() is below. and pd.eval is above.
+
+#df['summary_embeddings'] = df['summary_embeddings'].apply(lambda x: eval(x))
+#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))
+
+filetypes = ['go']
+
+
+# messed this area up for debugging
+@app.route('/')
+def home():
+ stub = request.args.get('path', 'dirserver').strip()
+ kids_structure, parents_structure = get_code_structure(df)
+ print('kids_structure', kids_structure)
+ print('parents_structure', parents_structure)
+ print('stub', stub)
+ if stub not in kids_structure:
+ loctype = "nan"
+ text = [["Path not available!"], [""]]
+ elif any([stub.endswith(x) for x in filetypes]):
+ loctype = "file"
+ fullpath = f"{parents_structure[stub]}/{stub}"
+ print(fullpath)
+ text = [[x, y] for x, y in zip(
+ list(df[df['file_path'] == fullpath]['source']),
+ list(df[df['file_path'] == fullpath]['summary'])
+ )]
+ else:
+ loctype = "folder"
+ text = [[x, ""] for x in kids_structure[stub]]
+ res = {
+ 'parents': parents_structure[stub],
+ 'loctype': loctype,
+ 'text': text,
+ 'current': stub
+ }
+ return render_template('index.html', payload=res)
+
+
+@app.route('/answer')
+def answer():
+ q = request.args.get('q', '').strip()
+ a = search_code(df, q)
+ res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]
+
+ return jsonify(res)
+
+@app.route('/explain')
+def explain():
+ q = request.args.get('q', '').strip()
+ a = generate_answer(q)
+ return jsonify(a)
+
+
+if __name__ == '__main__':
+ app.run(port=5001, debug=True)
+ #app.run(host="0.0.0.0", port=5001, debug=True)