summaryrefslogtreecommitdiff
path: root/server.py
diff options
context:
space:
mode:
authorNate Buttke <nate-web@riseup.net>2023-08-14 20:35:45 -0700
committerNate Buttke <nate-web@riseup.net>2023-08-14 20:35:45 -0700
commit7435e423776c7b35b9c6c9bebba25a44691554bf (patch)
tree2196a0e68bc9ff2c2df92590444eee8b785f4a11 /server.py
parentf334391613e01057d572e0228aa4f3c2f24346dc (diff)
huge fix to path handling. added clean cli, cost estimate, ignorefile.
Diffstat (limited to 'server.py')
-rw-r--r--server.py208
1 files changed, 127 insertions, 81 deletions
diff --git a/server.py b/server.py
index 2bb203d..1cd914c 100644
--- a/server.py
+++ b/server.py
@@ -1,49 +1,56 @@
-from flask import Flask, request, Response, jsonify, render_template
-import pandas as pd
from collections import defaultdict
import os
-import json
+import sys
+from pathlib import PurePosixPath
+import base64
+from flask import Flask, request, jsonify, render_template
+import pandas as pd
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
openai.api_key = os.getenv('OPENAI_KEY')
-import numpy as np
-from pathlib import PurePosixPath
-FILETYPES = ['.sh', '.c', '.h', '.cpp', '.cxx', '.hxx', '.hpp', '.go', '.hs', '.js', '.py', '.rs']
+if len(sys.argv) != 2:
+ print("USAGE: python server.py PATH_TO_CSV")
+ print("wrong number of arguments", file=sys.stderr)
+ sys.exit(1)
+try:
+ df=pd.read_csv(sys.argv[1], converters={"embedding_summary": pd.eval})
+except:
+ print(f"Problem opening {sys.argv[1]}", file=sys.stderr)
+ sys.exit(1)
+
app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")
def search_code(df, query, n=4):
query_embedding = get_embedding(
- query,
- engine="text-embedding-ada-002"
- )
+ query,
+ engine="text-embedding-ada-002"
+ )
df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
results = (
- df.sort_values("similarity", ascending=False)
- )
+ df.sort_values("similarity", ascending=False)
+ )
return results.head(n)
-
def generate_answer(question):
- results = search_code(df, question, n=4)
- prompt = ''
- for i in range(3):
- prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
- prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
- response = openai.Completion.create(
- model="text-davinci-003",
- # model="code-davinci-002",
- prompt=prompt,
- temperature=0.7,
- max_tokens=1000,
- top_p=1.0,
- frequency_penalty=0.0,
- presence_penalty=0.0,
- stop=["\"\"\""]
- )
- return response["choices"][0]["text"]
-
+ results = search_code(df, question, n=4)
+ prompt = ''
+ for i in range(3):
+ prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
+ prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
+ response = openai.Completion.create(
+ model="text-davinci-003",
+ # model="code-davinci-002",
+ prompt=prompt,
+ temperature=0.7,
+ max_tokens=1000,
+ top_p=1.0,
+ frequency_penalty=0.0,
+ presence_penalty=0.0,
+ stop=["\"\"\""]
+ )
+ return response["choices"][0]["text"]
def add_to_tree(tree: dict, path: str):
parts = PurePosixPath(path).parts
@@ -65,72 +72,111 @@ def create_directory_tree(df):
add_to_tree(directory_tree, path)
return directory_tree
-# Nate: these are from the original project. My own csv is below
-#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
-#df = pd.read_csv("./frontend/data/withsummary2.csv")
+def get_outermost_item(dirstructure):
+ return list(dirstructure.keys())[0]
-# My line
-df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
+def get_kids_of_root(dirstructure):
+ return list(dirstructure.values())[0].get("children").keys()
-# need to do funny stuff to read in the data frame correctly from csv. that's
-# why the eval() is below. and pd.eval is above.
-# df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))
def check_path(path, dirstructure):
- children = None
- components = PurePosixPath(path).parts
- if(components[0] in dirstructure.keys()):
- print('SCREAM')
-
- currentdict = dirstructure.get(components[0]) #outermost is solitary by setup.py
- print(len(components))
- for component in components[1:]:
- print('COMP', component)
- if component in currentdict.get("children", {}):
- currentdict = currentdict["children"][component]
- else:
- return False, None, children
- if currentdict["filetype"] == "dir":
- children = currentdict["children"]
- return True, currentdict["filetype"], children
- else:
+ components = PurePosixPath(path).parts
+ if components[0] in dirstructure.keys():
+ if dirstructure[components[0]]["filetype"] == "dir":
+ subdict = dirstructure.get(components[0])
+ if len(components) == 1:
+ ftype = subdict["filetype"]
+ kids = list(subdict["children"].keys())
+ return True, str(ftype), kids
+ else:
+ for c in components[1:]:
+ if c in subdict["children"]:
+ found = True
+ subdict = subdict["children"].get(c)
+ ftype = subdict["filetype"]
+ kids = list(subdict["children"].keys()) if ftype == 'dir' else None
+ else:
+ found = False
+ if found:
+ return found, str(ftype), kids
+ else:
+ return True, "file", None
return False, None, None
+
+# need to do this to read in the data frame correctly from csv. (pd.eval)
+#df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
+#df=pd.read_csv("./frontend/data/r2.csv", converters={'embedding_summary': pd.eval})
+
+# old strategy:
+# df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))
+
@app.route('/')
def home():
- req_path = request.args.get('path')
- dirstructure = create_directory_tree(df)
- if req_path is None:
- loctype = "folder"
- text = [[x, ""] for x in list(df['file_path'].unique())]
- else:
- text = [[x, y] for x, y in zip(
- list(df[df['file_path'] == req_path]['source']),
- list(df[df['file_path'] == req_path]['summary'])
- )]
- loctype = "file"
-
- res = {
- 'loctype': loctype,
- 'text': text,
- 'current': 'root directory' if (req_path is None) else req_path
- }
- return render_template('index.html', payload=res)
+ req_path = request.args.get('path')
+ path_decode = None if req_path is None else base64.urlsafe_b64decode(req_path).decode('utf-8')
+ dirstructure = create_directory_tree(df)
+ if req_path is None:
+ rootname = get_outermost_item(dirstructure)
+ if check_path(rootname, dirstructure)[1 == "file"]:
+ if len(dirstructure.keys()) == 1:
+ loctype = "file"
+ text = [[(x, None), y] for x, y in zip(
+ list(df[df["file_path"] == rootname]["source"]),
+ list(df[df["file_path"] == rootname]["summary"])
+ )]
+ else:
+ loctype = "folder"
+ files = list(dirstructure.keys())
+ text = [[(x, base64.urlsafe_b64encode(bytes(x, 'utf-8')).decode("utf-8")), ""] for x in files]
+ else:
+ loctype = "folder"
+ if not rootname.endswith("/"):
+ rootname += '/'
+ kids = get_kids_of_root(dirstructure)
+ text = [[(k, base64.urlsafe_b64encode(bytes(rootname + k, 'utf-8')).decode('utf-8')), ""] for k in kids]
+ else:
+ path_info = check_path(path_decode, dirstructure)
+ if path_info[0] is False:
+ text = [["error", "path not found"]]
+ loctype = "file"
+ elif path_info[1] == "file":
+ loctype = "file"
+ text = [[(x, None), y] for x, y in zip(
+ list(df[df["file_path"] == path_decode]["source"]),
+ list(df[df["file_path"] == path_decode]["summary"])
+ )]
+ elif path_info[1] == "dir":
+ loctype = "folder"
+ text = [[(x, base64.urlsafe_b64encode(bytes(path_decode + "/" + x, 'utf-8')).decode("utf-8")), ""] for x in path_info[2]]
+
+ if req_path is not None:
+ curr = path_decode
+ elif loctype == "folder":
+ curr = 'root directory'
+ else:
+ curr = get_outermost_item(dirstructure)
+
+ res = {
+ 'loctype': loctype,
+ 'text': text,
+ 'current': curr
+ }
+ return render_template('index.html', payload=res)
@app.route('/answer')
def answer():
- q = request.args.get('q', '').strip()
- a = search_code(df, q)
- res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]
-
- return jsonify(res)
+ q = request.args.get('q', '').strip()
+ a = search_code(df, q)
+ res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]
+ return jsonify(res)
@app.route('/explain')
def explain():
- q = request.args.get('q', '').strip()
- a = generate_answer(q)
- return jsonify(a)
+ q = request.args.get('q', '').strip()
+ a = generate_answer(q)
+ return jsonify(a)
if __name__ == '__main__':