summaryrefslogtreecommitdiff
path: root/setup_cont.py
diff options
context:
space:
mode:
authorNate Buttke <nate-web@riseup.net>2023-07-26 23:40:32 -0700
committerNate Buttke <nate-web@riseup.net>2023-07-26 23:40:32 -0700
commitad9ebbe7c78c2cf7c717d7898534371d59f325d9 (patch)
treea8b4986e7649ec1c74802c0306339919d4304875 /setup_cont.py
parent0985b7f2d467ecbeba0c6ca51ba03236cd4ff929 (diff)
today's work. fixed multi-file parsing. don't send too-large files up to API. Generate embeddings.
Diffstat (limited to 'setup_cont.py')
-rw-r--r--setup_cont.py16
1 files changed, 0 insertions, 16 deletions
diff --git a/setup_cont.py b/setup_cont.py
deleted file mode 100644
index 360c9f9..0000000
--- a/setup_cont.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import ast
-from collections import defaultdict
-import os
-import pandas as pd
-import openai
-import tiktoken
-from openai.embeddings_utils import get_embedding, cosine_similarity
-
-openai.api_key = os.getenv('END_OF_WORLD')
-
-df=pd.read_csv("setup_dataWithSummary.csv")
-embedding_model = "text-embedding-ada-002"
-df["embedding_summary"] = df.summary.apply([lambda x: get_embedding(x, engine=embedding_model)])
-print(df)
-
-df.to_csv('setup_dataWithSummaryEmbed.csv')