server.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

from flask import Flask, request, Response, jsonify, render_template
import pandas as pd
from collections import defaultdict
import os
import json
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
openai.api_key = os.getenv('OPENAI_KEY')
import numpy as np
from pathlib import PurePosixPath

app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")

def search_code(df, query, n=4):
    query_embedding = get_embedding(
        query,
        engine="text-embedding-ada-002"
    )
    #df["similarity"] = df.summary_embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
    df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
    # df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
    results = (
        df.sort_values("similarity", ascending=False)
    )
    return results.head(n)


def generate_answer(question):
  results = search_code(df, question, n=4)
  prompt = ''
  for i in range(3):
    prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
  prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
  response = openai.Completion.create(
    model="text-davinci-003",
    # model="code-davinci-002",
    prompt=prompt,
    temperature=0.7,
    max_tokens=1000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=["\"\"\""]
  )
  return response["choices"][0]["text"]


def add_to_tree(tree: dict, path: str):
    parts = PurePosixPath(path).parts
    current = tree
    for i, part in enumerate(parts):
        if part not in current:
            current[part] = {}
            if i == len(parts) - 1:
                current[part]["filetype"] = "file"
            else:
                current[part]["filetype"] = "dir"
                current[part]["children"] = {}
        current = current[part].get("children", {})

def create_directory_tree(df):
    paths = list(df['file_path'].unique())
    directory_tree = {}
    for path in paths:
        add_to_tree(directory_tree, path)
    return directory_tree

# Nate: these are from the original project. My own csv is below
#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
#df = pd.read_csv("./frontend/data/withsummary2.csv")

# My line
df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})
#df=pd.read_csv("./frontend/data/rs.csv", converters={'embedding_summary': pd.eval})

# need to do funny stuff to read in the data frame correctly from csv. that's
# why the eval() is below. and pd.eval is above.

#df['summary_embeddings'] = df['summary_embeddings'].apply(lambda x: eval(x))
#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))

filetypes = ['.sh', '.c', '.h', '.cpp', '.cxx', '.hxx', '.hpp', '.go', '.hs', '.js', '.py', '.rs']
def check_path(path, dirstructure):
  children = None
  components = PurePosixPath(path).parts
  if(components[0] in dirstructure.keys()):
    print('SCREAM')

    currentdict = dirstructure.get(components[0]) #outermost is solitary by setup.py
    print(len(components))
    for component in components[1:]:
      print('COMP', component)
      if component in currentdict.get("children", {}):
        currentdict = currentdict["children"][component]
      else:
        return False, None, children
      if currentdict["filetype"] == "dir":
        children = currentdict["children"]
      return True, currentdict["filetype"], children
  else:
    return False, None, None

@app.route('/')
def home():
  req_path = request.args.get('path')
  dirstructure = create_directory_tree(df)
  if req_path is None:
    loctype = "folder"
    text = [[x, ""] for x in list(df['file_path'].unique())]
  else:
    text = [[x, y] for x, y in zip(
      list(df[df['file_path'] == req_path]['source']),
      list(df[df['file_path'] == req_path]['summary'])
        )]
    loctype = "file"

  res = {
    'loctype': loctype,
    'text': text,
    'current': 'root directory' if (req_path is None) else req_path
  }
  return render_template('index.html', payload=res)


@app.route('/answer')
def answer():
  q = request.args.get('q', '').strip()
  a = search_code(df, q)
  res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]

  return jsonify(res)

@app.route('/explain')
def explain():
  q = request.args.get('q', '').strip()
  a = generate_answer(q)
  return jsonify(a)


if __name__ == '__main__':
    app.run(port=8080, debug=True)
    #app.run(host="0.0.0.0", port=5001, debug=True)