From db824e067d17eba3469a49dffb04566aed3449b2 Mon Sep 17 00:00:00 2001 From: Nate Buttke Date: Thu, 27 Jul 2023 17:29:36 -0700 Subject: add frontend components with my fixes. still messy. --- server.py | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 server.py (limited to 'server.py') diff --git a/server.py b/server.py new file mode 100644 index 0000000..05e726f --- /dev/null +++ b/server.py @@ -0,0 +1,135 @@ +from flask import Flask, request, Response, jsonify, render_template +import pandas as pd +from collections import defaultdict +import os +import openai +from openai.embeddings_utils import get_embedding, cosine_similarity +openai.api_key = os.getenv('OPENAI_KEY') +import numpy as np + +app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="") + + + +def search_code(df, query, n=4): + query_embedding = get_embedding( + query, + engine="text-embedding-ada-002" + ) + #df["similarity"] = df.summary_embeddings.apply(lambda x: cosine_similarity(x, query_embedding)) + df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding)) + # df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding)) + results = ( + df.sort_values("similarity", ascending=False) + ) + return results.head(n) + + +def generate_answer(question): + results = search_code(df, question, n=4) + prompt = '' + for i in range(3): + prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n" + prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: " + response = openai.Completion.create( + model="text-davinci-003", + # model="code-davinci-002", + prompt=prompt, + temperature=0.7, + max_tokens=1000, + top_p=1.0, + frequency_penalty=0.0, + presence_penalty=0.0, + stop=["\"\"\""] + ) + return response["choices"][0]["text"] + +# The above two functions should already work with the Go version. + +def get_code_structure(df): + kids_structure = defaultdict(list) + parents_structure = {} + for path in list(df['file_path'].unique()): + t = path.split("/") + for e in range(len(t)): + # if we know it's a dir name, record the next path component as a + # child of the current dir. + if e < len(t) - 1: + kids_structure[t[e]].append(t[e+1]) + else: + kids_structure[t[e]] = ["file"] + if e == 0: + parents_structure[t[e]] = "./" + else: + parents_structure[t[e]] = "/".join(t[:e]) + + for k, v in kids_structure.items(): + kids_structure[k] = list(set(v)) + return dict(kids_structure), parents_structure + + +# Nate: these are from the original project. My own csv is below +#df = pd.read_csv("./frontend/data/embedded_summarized.csv") +#df = pd.read_csv("./frontend/data/withsummary2.csv") + +# My line +df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval}) + +# need to do funny stuff to read in the data frame correctly from csv. that's +# why the eval() is below. and pd.eval is above. + +#df['summary_embeddings'] = df['summary_embeddings'].apply(lambda x: eval(x)) +#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x)) + +filetypes = ['go'] + + +# messed this area up for debugging +@app.route('/') +def home(): + stub = request.args.get('path', 'dirserver').strip() + kids_structure, parents_structure = get_code_structure(df) + print('kids_structure', kids_structure) + print('parents_structure', parents_structure) + print('stub', stub) + if stub not in kids_structure: + loctype = "nan" + text = [["Path not available!"], [""]] + elif any([stub.endswith(x) for x in filetypes]): + loctype = "file" + fullpath = f"{parents_structure[stub]}/{stub}" + print(fullpath) + text = [[x, y] for x, y in zip( + list(df[df['file_path'] == fullpath]['source']), + list(df[df['file_path'] == fullpath]['summary']) + )] + else: + loctype = "folder" + text = [[x, ""] for x in kids_structure[stub]] + res = { + 'parents': parents_structure[stub], + 'loctype': loctype, + 'text': text, + 'current': stub + } + return render_template('index.html', payload=res) + + +@app.route('/answer') +def answer(): + q = request.args.get('q', '').strip() + a = search_code(df, q) + res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')] + + return jsonify(res) + +@app.route('/explain') +def explain(): + q = request.args.get('q', '').strip() + a = generate_answer(q) + return jsonify(a) + + +if __name__ == '__main__': + app.run(port=5001, debug=True) + #app.run(host="0.0.0.0", port=5001, debug=True) -- cgit v1.2.3