diff options
-rw-r--r-- | frontend/index.html | 2 | ||||
-rw-r--r-- | server.py | 96 |
2 files changed, 54 insertions, 44 deletions
diff --git a/frontend/index.html b/frontend/index.html index f0284eb..22c142c 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -84,7 +84,7 @@ text = ""; } $("#main").html(text); - $("#main-title").html(res['parents'] + "/" + res['current']); + $("#main-title").html(res['current']); hljs.highlightAll(); $("#talk-button").click(()=>{ @@ -2,15 +2,15 @@ from flask import Flask, request, Response, jsonify, render_template import pandas as pd from collections import defaultdict import os +import json import openai from openai.embeddings_utils import get_embedding, cosine_similarity openai.api_key = os.getenv('OPENAI_KEY') import numpy as np +from pathlib import PurePosixPath app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="") - - def search_code(df, query, n=4): query_embedding = get_embedding( query, @@ -44,37 +44,34 @@ def generate_answer(question): ) return response["choices"][0]["text"] -# The above two functions should already work with the Go version. - -def get_code_structure(df): - kids_structure = defaultdict(list) - parents_structure = {} - for path in list(df['file_path'].unique()): - t = path.split("/") - for e in range(len(t)): - # if we know it's a dir name, record the next path component as a - # child of the current dir. - if e < len(t) - 1: - kids_structure[t[e]].append(t[e+1]) - else: - kids_structure[t[e]] = ["file"] - if e == 0: - parents_structure[t[e]] = "./" - else: - parents_structure[t[e]] = "/".join(t[:e]) - for k, v in kids_structure.items(): - kids_structure[k] = list(set(v)) - return dict(kids_structure), parents_structure +def add_to_tree(tree: dict, path: str): + parts = PurePosixPath(path).parts + current = tree + for i, part in enumerate(parts): + if part not in current: + current[part] = {} + if i == len(parts) - 1: + current[part]["filetype"] = "file" + else: + current[part]["filetype"] = "dir" + current[part]["children"] = {} + current = current[part].get("children", {}) +def create_directory_tree(df): + paths = list(df['file_path'].unique()) + directory_tree = {} + for path in paths: + add_to_tree(directory_tree, path) + return directory_tree # Nate: these are from the original project. My own csv is below #df = pd.read_csv("./frontend/data/embedded_summarized.csv") #df = pd.read_csv("./frontend/data/withsummary2.csv") # My line -#df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval}) -df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eval}) +df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval}) +#df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eval}) # need to do funny stuff to read in the data frame correctly from csv. that's # why the eval() is below. and pd.eval is above. @@ -83,31 +80,44 @@ df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eva #df['embeddings'] = df['embeddings'].apply(lambda x: eval(x)) filetypes = ['.sh', '.c', '.h', '.cpp', '.cxx', '.hxx', '.hpp', '.go', '.hs', '.js', '.py', '.rs'] +def check_path(path, dirstructure): + children = None + components = PurePosixPath(path).parts + if(components[0] in dirstructure.keys()): + print('SCREAM') + + currentdict = dirstructure.get(components[0]) #outermost is solitary by setup.py + print(len(components)) + for component in components[1:]: + print('COMP', component) + if component in currentdict.get("children", {}): + currentdict = currentdict["children"][component] + else: + return False, None, children + if currentdict["filetype"] == "dir": + children = currentdict["children"] + return True, currentdict["filetype"], children + else: + return False, None, None -# messed this area up for debugging @app.route('/') def home(): - stub = request.args.get('path', 'hello').strip() - kids_structure, parents_structure = get_code_structure(df) - if stub not in kids_structure: - loctype = "nan" - text = [["Path not available!"], [""]] - elif any([stub.endswith(x) for x in filetypes]): - loctype = "file" - fullpath = f"{parents_structure[stub]}/{stub}" - print(fullpath) - text = [[x, y] for x, y in zip( - list(df[df['file_path'] == fullpath]['source']), - list(df[df['file_path'] == fullpath]['summary']) - )] - else: + req_path = request.args.get('path') + dirstructure = create_directory_tree(df) + if req_path is None: loctype = "folder" - text = [[x, ""] for x in kids_structure[stub]] + text = [[x, ""] for x in list(df['file_path'].unique())] + else: + text = [[x, y] for x, y in zip( + list(df[df['file_path'] == req_path]['source']), + list(df[df['file_path'] == req_path]['summary']) + )] + loctype = "file" + res = { - 'parents': parents_structure[stub], 'loctype': loctype, 'text': text, - 'current': stub + 'current': 'root directory' if (req_path is None) else req_path } return render_template('index.html', payload=res) |