import ast
from collections import defaultdict
import os
import pandas as pd
import openai 
import tiktoken
from openai.embeddings_utils import get_embedding, cosine_similarity

from tree_sitter import Language, Parser

SOURCE_DIR = './'

openai.api_key = os.getenv('END_OF_WORLD')

#def get_block(code, node, code_type, file_path):
#    """combine a bunch of data about a function. return dictionary"""
#    blob = f"{node['pretext']}{ast.get_source_segment(code, node['node'])}"
#    return {
#        'code_type': code_type, 
#        'source': blob,
#        'start_line': node['node'].lineno,
#        'end_line': node['node'].end_lineno,
#        'chars': len(blob),
#        'file_path': file_path
#    }

def ts_query(lang, tree, sexp):
    query = lang.query(sexp)
    return query.captures(tree.root_node)

def ts_get_all_code_blocks(lang, code_blocks, file_path, tree, code):
    """Use treesitter to get all code blocks"""

    # TODO need way to switch between declaration and definition ..
    # e.g. golang does not have function definitions according to treesitter
    results = ts_query(lang, tree, """(function_declaration) @function""")
    results += ts_query(lang, tree, """(method_declaration) @method""")

    # TODO something like list comprehension here
    for r in results:
        return_dict = {
            'code_type': r[1],
            'source': code[r[0].start_byte:r[0].end_byte].decode('utf-8'),
            'start_line': r[0].start_point[0],
            'end_line': r[0].end_point[0],
            'chars': r[0].end_byte - r[0].start_byte,
            'file_path': file_path
        }
        code_blocks.append(return_dict)


def ts_get_all_code_blocks_old(code_blocks, file_path, node):
    """Use treesitter to get all code blocks"""
    #dict has'code_type' 'source' 'start_line' 'end_line' 'chars' 'file_path'
    #print('HERRO', type(node))
    for child in node.children:
        #print(type(child), child)
        return_dict = {
            'code_type': child.type,
            'start_line': child.start_point[0],
            'end_line': child.end_point[0],
            'chars': child.end_byte - child.start_byte,
            'file_path': file_path
        }
        code_blocks.append(return_dict)
        #if child.type != "function_definition" and len(child.children)
        ts_get_all_code_blocks(code_blocks, file_path, child)

def parse_file(file_path):
    """take source code file and return pd dataframe"""
    # read file
    with open(file_path, 'r') as f:
        code = f.read()

    # Tree-Sitter
    parser = Parser()
    lang = Language("./tree-go.so", "go")
    parser.set_language(lang)
    tree = parser.parse(bytes(code, "utf8"))

    code_blocks = []
    ts_get_all_code_blocks(lang, code_blocks, file_path, tree, bytes(code, "utf8"))


    #TODO
    # collate imports, assign
    collate_types = ['import', 'assign']
    tempblock = None
    finblocks = []

    for block in code_blocks:
        if block['code_type'] in collate_types:
            if tempblock is None:
                tempblock = {k:v for k,v in block.items()}
            elif tempblock['code_type'] == block['code_type']:
                tempblock['source'] += f"\n{block['source']}"
                tempblock['start_line'] = min(tempblock['start_line'], block['start_line'])
                tempblock['end_line'] = max(tempblock['start_line'], block['end_line'])
                tempblock['chars'] += (block['chars'] + 1)
            else:
                finblocks.append(tempblock)
                tempblock = {k:v for k,v in block.items()}
        else:
            if tempblock is not None:
                finblocks.append(tempblock)
                tempblock = None
            finblocks.append(block)
    df = pd.DataFrame(finblocks)
    return df


def get_files_to_parse(root_path, files_extensions_to_parse=['py'], dirs_to_ignore=['tests']) -> list:
    """get all source file paths as list."""
    files_to_parse = []
    for root, dirs, files in os.walk(SOURCE_DIR):
        for name in files:
            if (root.rsplit("/", 1)[-1] in dirs_to_ignore) or (name.rsplit('.')[-1] not in files_extensions_to_parse):
                continue
            temp_path = os.path.join(root, name)
            files_to_parse.append(temp_path)
    return files_to_parse

def generate_summary(prompt):
  prompt = prompt + '\nSummarize the above code: '
  response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0.7,
    max_tokens=1024,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=["\"\"\""]
  )
  return response["choices"][0]["text"]

# nate function to create blob. the blob just contains the file path and the source code.
def blobify(pandaSeries):
    return f"file path: {pandaSeries['file_path']}\n {pandaSeries['source']}"


### doing stuff!!

df = parse_file("../../dirserver/src/dirserver/fdpoller.go")
df.to_csv('test.csv')
df["blob"] = df.apply(lambda x: blobify(x),axis=1)

print(type(df))
print(df)

df.to_csv('test_with_blob.csv')

print('startng to generate summary')
df["summary"] = df.blob.apply(lambda x: generate_summary(x))
print('done with generate summary')

df.to_csv('test_with_summary.csv')