from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader

from langchain.chains import AnalyzeDocumentChain
from langchain.chat_models import ChatOpenAI
openai_api_key = "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"

lm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0.0, api_key=openai_api_key, organization="org-cyAZLE0i6Fr1PUR0zuh3aJ31")
#lm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0, api_key=openai_api_key, organization="org-cyAZLE0i6Fr1PUR0zuh3aJ31")

import csv
from langchain.schema import Document

from langchain.text_splitter import RecursiveCharacterTextSplitter

import pandas as pd

import http.server
import socketserver
from urllib.parse import urlparse, parse_qs
import cgi

from PyPDF2 import PdfReader
import re
import os

from docx2python import docx2python

# TODO: filter options to filter out yes, partial, no
# TODO: fix table UI (pretty)

# TODO: generic ask box for user submittable question

# TODO: load csv from excel

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000, chunk_overlap=10, length_function = len, separators=["\n"],
)

# Assuming you have a CSV file named 'clauses.csv'
def load_csv_as_documents(file_path: str, contract_type: str) -> list[Document]:
    documents = []
    reader = pd.read_csv(file_path, delimiter='\t')
    #print(reader.keys())
    print(f"Using contract type {contract_type}")
    if contract_type in ["EULA", "SOW", "MSA"]:
        file_path = "clauses_v3.csv"

        reader = pd.read_csv(file_path, delimiter='\t')

        proper = reader[reader['Contract Type'].str.contains(contract_type, na=False)]
        print(f"Got {len(proper)} rows")
        #proper = proper.to_dict(orient="split")
        for index, row in proper.iterrows():
            doc = Document(
                page_content=row['Text'],
                metadata={'subject_section': row['Subject/Section'], 'area': row['Area']}
            )
            documents.append(doc)
    elif contract_type == "NDA":
        file_path = "nda_clauses.csv"

        reader = pd.read_csv(file_path, delimiter='\t')

        proper = reader[reader['Provision'].str.len() > 0]
        print(f"Got {len(proper)} rows")
        #proper = proper.to_dict(orient="split")
        for index, row in proper.iterrows():
            doc = Document(
                page_content=row['Carlyle Standard'],
                metadata={'provision': row['Provision'], 'notes': row['Notes']}
            )
            documents.append(doc)
    return documents

contract_types = ("EULA", "SOW", "MSA", "NDA")

docs = load_csv_as_documents('clauses_v3.csv', "MSA")

from langchain.chains.question_answering import load_qa_chain

qa_chain = load_qa_chain(lm, chain_type="map_reduce")

# Function to analyze contract
def analyze_contract(contract_text, guidelines, guidelines_index, contract_type):
    results = {}
    for g in guidelines_index:
        guideline = guidelines[g]
        question = f"Guideline: {guideline}\n\nDoes the following NDA contract comply or mention this guideline? Please include the section number from the document and you must start your response with Yes, No, or Partial. \n"
        prompt = f"Contract: {contract_text}"
        print(question)
        response = qa_document_chain.run(question=question, input_document=prompt)
        if contract_type in ["EULA", "SOW", "MSA"]:
            results[guideline.metadata['subject_section']] = response
        elif contract_type == "NDA":
            results[guideline.metadata['provision']] = response
    print(results)
    return results

def print_results(results):
    for k, v in results.items():
        print(f'{k}: ')
        print("\t" + f'{v}')

qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)

settings = {}

class SimpleHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
    def respond_with(self, out, filename, contenttype=""):
        self.send_response(200)
        if contenttype != "":
            self.send_header("Content-type", contenttype)
        self.end_headers()
        with open(filename, 'rb') as h:
            out(h.read())
        self.log_message("Responded to " + self.path + " with " + filename)

    def do_GET(self):
        out = self.wfile.write
        split = self.path.split("/")
        print(split)
        if len(split) >= 3:
            if "pdf" in split:
                self.respond_with(out, "samples/" + split[-1])
                return

        self.send_response(200)
        self.end_headers()
        response_data = open("page.html", "r").read()
        response = bytes(response_data, "utf-8")
        self.wfile.write(response)

    def do_fake_submit(self):
        # replace later
        global docs
        self.get_settings()

        contract_type = settings["doc_type"]

        docs = load_csv_as_documents('nda_clauses.csv', contract_type)

        table_style = open("table.css", "r").read()

        response = f"""<head><style>{table_style}</style><meta charset="UTF-8"></head>""" + """<body><table border="1">
    <tr>
        <th>Subject/Section</th>
        <th>Submit</th>
        <th width="80%">Results</th>
        <th width="3%">Text</th>
    </tr>
"""

        # generate response
        for di in range(len(docs)):
            d = docs[di]
            form = f"""
            <form action="http://localhost:8000/api/check/{di}" method="post" target="answer_frame_{di}" enctype="multipart/form-data">
            <input class="submit_btn" id="submit_{di}" type="submit" value="Submit">
            </form>
            """

            response_content = ""
            if contract_type in ["EULA", "SOW", "MSA"]:
                response_content = d.metadata["subject_section"]

            elif contract_type == "NDA":
                response_content = d.metadata["provision"]

            answer_frame = "<iframe name=\"answer_frame_" + f"{di}" + "\" width=\"100%\" height=\"100\"></iframe>"

            text_content = "<details class=\"clausedetails\"><summary><strong><</strong></summary><p>" + f"{d.page_content}" + "</p></details>"

            response += "<tr>\n<td class=\"subjectcolumn\">" + response_content + "</td>\n<td class=\"submitcolumn\">" + form + "</td>\n<td>" + answer_frame + "</td>\n<td>" + text_content + "</td>\n"

        response += "</tr>\n</table></body>"

        self.send_response(200)
        self.end_headers()
        response = bytes(response, "utf-8")
        self.wfile.write(response)

    def parse_api(self, url: str):
        split = url.split('/')
        print(split)

        if len(split) >= 3:
            if split[1] == 'api':
                return split[2:]

    def do_text(self, doc_num):
        global docs

        response = docs[int(doc_num)].page_content

        self.send_response(200)
        self.end_headers()
        response = bytes(response, "utf-8")
        self.wfile.write(response)

    def get_settings(self):
        global settings

        with open("settings.txt", "r") as s:
            r = s.readlines()
            settings["filename"] = r[0][:-1]
            settings["file"] = open(settings["filename"], "r").read()
            settings["doc_type"] = r[1][:-1]
            s.close()



    def do_analysis(self, doc_num):
        global docs
        self.get_settings()

        contract_text = settings["file"]

        compliance_results = analyze_contract(contract_text, docs, range(int(doc_num), int(doc_num)+1), settings["doc_type"])

        response = """<body style=\"margin: 0; \">
        <div style=\"display: flex; height: 100px;\">
        <div style=\"min-width: 10px; background-color: COLOR;\"></div>
        <div style=\"margin-left: 20px;\">
            TEXT
        </div>
    </div>
    </body>
"""
        color = ""
        result = ""

        for k, v in compliance_results.items():
            result = v

        if result.startswith("No"):
            color = "red"
        elif result.startswith("Yes"):
            color = "green"
        else:
            color = "orange"

        response = response.replace("COLOR", color)
        response = response.replace("TEXT", result)

        self.send_response(200)
        self.end_headers()
        response = bytes(response, "utf-8")
        self.wfile.write(response)

    def do_POST(self):
        print(self.path)
        p = self.parse_api(self.path)

        # Extract the Content-Type header
        content_type_header = self.headers['Content-Type']

        # Extract the boundary from the Content-Type header
        boundary = re.search("boundary=(.*)", content_type_header).group(1)
        #print(f"|{boundary}|")
        boundary = boundary.encode()  # Convert to bytes, since our data is in bytes

        # Read the POST data
        content_length = int(self.headers['Content-Length'])
        post_data = self.rfile.read(content_length)

        # Parse the multipart form data
        parsed_data = self.parse_multipart_form_data(post_data, boundary)

        # Handle the parsed data (printing for demonstration)
        for name, value in parsed_data.items():
            print("Name:", name)
            if value['filename']:
                print("Filename:", value['filename'])
            print("Content Type:", value['content_type'])
            print("Data length:", len(value['data']))
            # Further processing...

        if p is not None:
            print(f"p[2] = {p[1]}")
            if p[0] == "check":
                self.do_analysis(p[1])
            elif p[0] == "text":
                self.do_text(p[1])
        else:
            self.do_fake_submit()

        # Send response
        self.send_response(200)
        self.end_headers()
        response = bytes("POST request received", "utf-8")
        self.wfile.write(response)


    def parse_multipart_form_data(self, form_data, boundary):
        # Convert the boundary to bytes, since our data is in bytes
        boundary_bytes = boundary

        # Split the data into parts using the boundary
        parts = form_data.split(boundary)#[:1]  # First and last parts are not data

        # Dictionary to hold the parsed data
        parsed_data = {}

        for part in parts:
            # Remove leading and trailing CRLF
            part = part.strip(b'\r\n')

            # Split the headers and the body
            headers, _, body = part.partition(b'\r\n\r\n')

            # Decode headers from bytes to string
            headers = headers.decode()

            #print(f"Searching for: |{boundary_bytes}|")
            #print(f"Within: |{body}|")
            #print(f"found: {body.find(boundary_bytes)}")

            body = body[:body.find(boundary_bytes)-3]

            # Extract name and filename from Content-Disposition header
            disposition_match = re.search(r'Content-Disposition: form-data; name="(.*?)";? ?(filename="(.*?)")?', headers, re.IGNORECASE)
            content_type_match = re.search(r'Content-Type: (\S+)', headers, re.IGNORECASE)

            if disposition_match:
                name = disposition_match.group(1)
                filename = disposition_match.group(3) if disposition_match.group(3) else None
                content_type = content_type_match.group(1) if content_type_match else 'text/plain'

                # Store the data
                parsed_data[name] = {
                    'filename': filename,
                    'content_type': content_type,
                    'data': body  # Body is still in bytes
                }

        if parsed_data.get("document") or parsed_data.get("options"):
            settings = open("settings.txt", "w")

            if parsed_data.get("document"):
                os.makedirs("uploads", exist_ok=True)
                filename = "uploads/" + parsed_data.get("document")['filename']
                print("Writing to \"" + filename + "\"...")
                with open(filename, "wb") as f:
                    f.write(parsed_data.get("document")['data'])
                    f.close()

                # if docx
                if filename.endswith(".docx"):
                    with docx2python(filename) as docx_content:
                        print(docx_content.text)

                        txt_filename = filename[:-5] + ".txt"
                        with open(txt_filename, "w") as f:
                            f.write(docx_content.text)
                            f.close()

                        settings.write(txt_filename + "\n")
                elif filename.endswith(".pdf"):
                    pass

            if parsed_data.get("options"):
                o = str(parsed_data.get("options")["data"])[-2:-1]
                #print("op: " + o)
                choice = ""
                if o == "1":
                    choice = "EULA"
                elif o == "2":
                    choice = "SOW"
                elif o == "3":
                    choice = "MSA"
                elif o == "4":
                    choice = "NDA"

                settings.write(choice + "\n")

            settings.close()

        #for k, v in parsed_data.items():
        #    print(f"{k}: {v}")
        #print(f"boundary: {boundary}")
        return parsed_data


def run(server_class=http.server.HTTPServer, handler_class=SimpleHTTPRequestHandler):
    server_address = ('', 8000)
    httpd = server_class(server_address, handler_class)
    print("Starting httpd server on port 8000...")
    print("Open chrome to http://localhost:8000/")
    httpd.serve_forever()

if __name__ == "__main__":
    run()

