summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--frontend/index.html2
-rw-r--r--server.py96
2 files changed, 54 insertions, 44 deletions
diff --git a/frontend/index.html b/frontend/index.html
index f0284eb..22c142c 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -84,7 +84,7 @@
text = "";
}
$("#main").html(text);
- $("#main-title").html(res['parents'] + "/" + res['current']);
+ $("#main-title").html(res['current']);
hljs.highlightAll();
$("#talk-button").click(()=>{
diff --git a/server.py b/server.py
index 663f8b4..08f20dd 100644
--- a/server.py
+++ b/server.py
@@ -2,15 +2,15 @@ from flask import Flask, request, Response, jsonify, render_template
import pandas as pd
from collections import defaultdict
import os
+import json
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
openai.api_key = os.getenv('OPENAI_KEY')
import numpy as np
+from pathlib import PurePosixPath
app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")
-
-
def search_code(df, query, n=4):
query_embedding = get_embedding(
query,
@@ -44,37 +44,34 @@ def generate_answer(question):
)
return response["choices"][0]["text"]
-# The above two functions should already work with the Go version.
-
-def get_code_structure(df):
- kids_structure = defaultdict(list)
- parents_structure = {}
- for path in list(df['file_path'].unique()):
- t = path.split("/")
- for e in range(len(t)):
- # if we know it's a dir name, record the next path component as a
- # child of the current dir.
- if e < len(t) - 1:
- kids_structure[t[e]].append(t[e+1])
- else:
- kids_structure[t[e]] = ["file"]
- if e == 0:
- parents_structure[t[e]] = "./"
- else:
- parents_structure[t[e]] = "/".join(t[:e])
- for k, v in kids_structure.items():
- kids_structure[k] = list(set(v))
- return dict(kids_structure), parents_structure
+def add_to_tree(tree: dict, path: str):
+ parts = PurePosixPath(path).parts
+ current = tree
+ for i, part in enumerate(parts):
+ if part not in current:
+ current[part] = {}
+ if i == len(parts) - 1:
+ current[part]["filetype"] = "file"
+ else:
+ current[part]["filetype"] = "dir"
+ current[part]["children"] = {}
+ current = current[part].get("children", {})
+def create_directory_tree(df):
+ paths = list(df['file_path'].unique())
+ directory_tree = {}
+ for path in paths:
+ add_to_tree(directory_tree, path)
+ return directory_tree
# Nate: these are from the original project. My own csv is below
#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
#df = pd.read_csv("./frontend/data/withsummary2.csv")
# My line
-#df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
-df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eval})
+df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
+#df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eval})
# need to do funny stuff to read in the data frame correctly from csv. that's
# why the eval() is below. and pd.eval is above.
@@ -83,31 +80,44 @@ df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eva
#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))
filetypes = ['.sh', '.c', '.h', '.cpp', '.cxx', '.hxx', '.hpp', '.go', '.hs', '.js', '.py', '.rs']
+def check_path(path, dirstructure):
+ children = None
+ components = PurePosixPath(path).parts
+ if(components[0] in dirstructure.keys()):
+ print('SCREAM')
+
+ currentdict = dirstructure.get(components[0]) #outermost is solitary by setup.py
+ print(len(components))
+ for component in components[1:]:
+ print('COMP', component)
+ if component in currentdict.get("children", {}):
+ currentdict = currentdict["children"][component]
+ else:
+ return False, None, children
+ if currentdict["filetype"] == "dir":
+ children = currentdict["children"]
+ return True, currentdict["filetype"], children
+ else:
+ return False, None, None
-# messed this area up for debugging
@app.route('/')
def home():
- stub = request.args.get('path', 'hello').strip()
- kids_structure, parents_structure = get_code_structure(df)
- if stub not in kids_structure:
- loctype = "nan"
- text = [["Path not available!"], [""]]
- elif any([stub.endswith(x) for x in filetypes]):
- loctype = "file"
- fullpath = f"{parents_structure[stub]}/{stub}"
- print(fullpath)
- text = [[x, y] for x, y in zip(
- list(df[df['file_path'] == fullpath]['source']),
- list(df[df['file_path'] == fullpath]['summary'])
- )]
- else:
+ req_path = request.args.get('path')
+ dirstructure = create_directory_tree(df)
+ if req_path is None:
loctype = "folder"
- text = [[x, ""] for x in kids_structure[stub]]
+ text = [[x, ""] for x in list(df['file_path'].unique())]
+ else:
+ text = [[x, y] for x, y in zip(
+ list(df[df['file_path'] == req_path]['source']),
+ list(df[df['file_path'] == req_path]['summary'])
+ )]
+ loctype = "file"
+
res = {
- 'parents': parents_structure[stub],
'loctype': loctype,
'text': text,
- 'current': stub
+ 'current': 'root directory' if (req_path is None) else req_path
}
return render_template('index.html', payload=res)