server.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

from flask import Flask, request, Response, jsonify, render_template
import pandas as pd
from collections import defaultdict
import os
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
openai.api_key = os.getenv('OPENAI_KEY')
import numpy as np

app = Flask(__name__, template_folder="./frontend", static_folder="./frontend", static_url_path="")


def search_code(df, query, n=4):
    query_embedding = get_embedding(
        query,
        engine="text-embedding-ada-002"
    )
    #df["similarity"] = df.summary_embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
    df["similarity"] = df.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding))
    # df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding))
    results = (
        df.sort_values("similarity", ascending=False)
    )
    return results.head(n)


def generate_answer(question):
  results = search_code(df, question, n=4)
  prompt = ''
  for i in range(3):
    prompt += results.iloc[i]["summary"] + "\n" + results.iloc[i]["blob"] + "\n"
  prompt += "\n" + "Answer the following question using the code context given above, and show an example with 'Example'\nQ: " + question + "\nA: "
  response = openai.Completion.create(
    model="text-davinci-003",
    # model="code-davinci-002",
    prompt=prompt,
    temperature=0.7,
    max_tokens=1000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=["\"\"\""]
  )
  return response["choices"][0]["text"]

# The above two functions should already work with the Go version.

def get_code_structure(df):
    kids_structure = defaultdict(list)
    parents_structure = {}
    for path in list(df['file_path'].unique()):
        t = path.split("/")
        for e in range(len(t)):
            # if we know it's a dir name, record the next path component as a
            # child of the current dir.
            if e < len(t) - 1:
              kids_structure[t[e]].append(t[e+1])
            else:
              kids_structure[t[e]] = ["file"]
            if e == 0:
              parents_structure[t[e]] = "./"
            else:
              parents_structure[t[e]] = "/".join(t[:e])

    for k, v in kids_structure.items():
        kids_structure[k] = list(set(v))
    return dict(kids_structure), parents_structure


# Nate: these are from the original project. My own csv is below
#df = pd.read_csv("./frontend/data/embedded_summarized.csv")
#df = pd.read_csv("./frontend/data/withsummary2.csv")

# My line
df=pd.read_csv("./frontend/data/test_with_summary_and_embeddings.csv", converters={'embedding_summary': pd.eval})

# need to do funny stuff to read in the data frame correctly from csv. that's
# why the eval() is below. and pd.eval is above.

#df['summary_embeddings'] = df['summary_embeddings'].apply(lambda x: eval(x))
#df['embeddings'] = df['embeddings'].apply(lambda x: eval(x))

filetypes = ['go']


# messed this area up for debugging
@app.route('/')
def home():
  stub = request.args.get('path', 'dirserver').strip()
  kids_structure, parents_structure = get_code_structure(df)
  print('kids_structure', kids_structure)
  print('parents_structure', parents_structure)
  print('stub', stub)
  if stub not in kids_structure:
    loctype = "nan"
    text = [["Path not available!"], [""]]
  elif any([stub.endswith(x) for x in filetypes]):
    loctype = "file"
    fullpath = f"{parents_structure[stub]}/{stub}"
    print(fullpath)
    text = [[x, y] for x, y in zip(
      list(df[df['file_path'] == fullpath]['source']),
      list(df[df['file_path'] == fullpath]['summary'])
    )]
  else:
    loctype = "folder"
    text = [[x, ""] for x in kids_structure[stub]]
  res = {
    'parents': parents_structure[stub],
    'loctype': loctype,
    'text': text,
    'current': stub
  }
  return render_template('index.html', payload=res)


@app.route('/answer')
def answer():
  q = request.args.get('q', '').strip()
  a = search_code(df, q)
  res = [{'blob': x['blob'], 'summary': x['summary']} for x in a.to_dict('records')]

  return jsonify(res)

@app.route('/explain')
def explain():
  q = request.args.get('q', '').strip()
  a = generate_answer(q)
  return jsonify(a)


if __name__ == '__main__':
    app.run(port=5001, debug=True)
    #app.run(host="0.0.0.0", port=5001, debug=True)