diff options
author | Nate Buttke <nate-web@riseup.net> | 2023-07-26 23:40:32 -0700 |
---|---|---|
committer | Nate Buttke <nate-web@riseup.net> | 2023-07-26 23:40:32 -0700 |
commit | ad9ebbe7c78c2cf7c717d7898534371d59f325d9 (patch) | |
tree | a8b4986e7649ec1c74802c0306339919d4304875 /setup_cont.py | |
parent | 0985b7f2d467ecbeba0c6ca51ba03236cd4ff929 (diff) |
today's work. fixed multi-file parsing. don't send too-large files up to API. Generate embeddings.
Diffstat (limited to 'setup_cont.py')
-rw-r--r-- | setup_cont.py | 16 |
1 files changed, 0 insertions, 16 deletions
diff --git a/setup_cont.py b/setup_cont.py deleted file mode 100644 index 360c9f9..0000000 --- a/setup_cont.py +++ /dev/null @@ -1,16 +0,0 @@ -import ast -from collections import defaultdict -import os -import pandas as pd -import openai -import tiktoken -from openai.embeddings_utils import get_embedding, cosine_similarity - -openai.api_key = os.getenv('END_OF_WORLD') - -df=pd.read_csv("setup_dataWithSummary.csv") -embedding_model = "text-embedding-ada-002" -df["embedding_summary"] = df.summary.apply([lambda x: get_embedding(x, engine=embedding_model)]) -print(df) - -df.to_csv('setup_dataWithSummaryEmbed.csv') |