from flask import Flask, request, Response, jsonify, render_template import pandas as pd from collections import defaultdict import os import json import openai from openai.embeddings_utils import get_embedding, cosine_similarity openai.api_key = os.getenv('OPENAI_KEY') import numpy as np from pathlib import PurePosixPath FILETYPES = ['.sh', '.c', '.h', '.cpp', '.cxx', '.hxx', '.hpp', '.go', '.hs', '.js', '.py', '.rs'] app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="") def search_code(df, query, n=4): query_embedding = get_embedding( query, engine="text-embedding-ada-002" ) df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding)) results = ( df.sort_values("similarity", ascending=False) ) return results.head(n) def generate_answer(question): results = search_code(df, question, n=4) prompt = '' for i in range(3): prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n" prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: " response = openai.Completion.create( model="text-davinci-003", # model="code-davinci-002", prompt=prompt, temperature=0.7, max_tokens=1000, top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0, stop=["\"\"\""] ) return response["choices"][0]["text"] def add_to_tree(tree: dict, path: str): parts = PurePosixPath(path).parts current = tree for i, part in enumerate(parts): if part not in current: current[part] = {} if i == len(parts) - 1: current[part]["filetype"] = "file" else: current[part]["filetype"] = "dir" current[part]["children"] = {} current = current[part].get("children", {}) def create_directory_tree(df): paths = list(df['file_path'].unique()) directory_tree = {} for path in paths: add_to_tree(directory_tree, path) return directory_tree # Nate: these are from the original project. My own csv is below #df = pd.read_csv("./frontend/data/embedded_summarized.csv") #df = pd.read_csv("./frontend/data/withsummary2.csv") # My line df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval}) # need to do funny stuff to read in the data frame correctly from csv. that's # why the eval() is below. and pd.eval is above. # df['embeddings'] = df['embeddings'].apply(lambda x: eval(x)) def check_path(path, dirstructure): children = None components = PurePosixPath(path).parts if(components[0] in dirstructure.keys()): print('SCREAM') currentdict = dirstructure.get(components[0]) #outermost is solitary by setup.py print(len(components)) for component in components[1:]: print('COMP', component) if component in currentdict.get("children", {}): currentdict = currentdict["children"][component] else: return False, None, children if currentdict["filetype"] == "dir": children = currentdict["children"] return True, currentdict["filetype"], children else: return False, None, None @app.route('/') def home(): req_path = request.args.get('path') dirstructure = create_directory_tree(df) if req_path is None: loctype = "folder" text = [[x, ""] for x in list(df['file_path'].unique())] else: text = [[x, y] for x, y in zip( list(df[df['file_path'] == req_path]['source']), list(df[df['file_path'] == req_path]['summary']) )] loctype = "file" res = { 'loctype': loctype, 'text': text, 'current': 'root directory' if (req_path is None) else req_path } return render_template('index.html', payload=res) @app.route('/answer') def answer(): q = request.args.get('q', '').strip() a = search_code(df, q) res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')] return jsonify(res) @app.route('/explain') def explain(): q = request.args.get('q', '').strip() a = generate_answer(q) return jsonify(a) if __name__ == '__main__': app.run(port=8080, debug=True)