diff --git a/__pycache__/language_models.cpython-311.pyc b/__pycache__/language_models.cpython-311.pyc index 75c02867db46e28b9eb205285c3f469574629e0d..a29c3bfa5775e8793b283e942707ba8fa68b2a2a 100644 Binary files a/__pycache__/language_models.cpython-311.pyc and b/__pycache__/language_models.cpython-311.pyc differ diff --git a/chatbot_complet.py b/chatbot_complet.py index e819f5ef340fc1c9d4ac6850b23427e602ca5b3e..e37cc51af19fc8ee6a1465c59a9918ac857cd891 100644 --- a/chatbot_complet.py +++ b/chatbot_complet.py @@ -194,7 +194,7 @@ class ChatbotInterface(QWidget): self.conversation_text.setSizePolicy(size_policy) # Définir la fenêtre principale - icon = QIcon("chatbot.png") + icon = QIcon("public/chatbot.png") self.setWindowIcon(icon) self.setWindowTitle('chatbot') self.setGeometry(100, 100, 800, 600) diff --git a/visuel.py b/chatbot_with_summarizer.py similarity index 100% rename from visuel.py rename to chatbot_with_summarizer.py diff --git a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/data_level0.bin b/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/data_level0.bin deleted file mode 100644 index 1a5701dd124346f3b834ca45a9e11c0f445c4dac..0000000000000000000000000000000000000000 Binary files a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/data_level0.bin and /dev/null differ diff --git a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/header.bin b/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/header.bin deleted file mode 100644 index 074f5b8bbdc6cd0eaee77b7377f939bb31d39dcf..0000000000000000000000000000000000000000 Binary files a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/header.bin and /dev/null differ diff --git a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/length.bin b/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/length.bin deleted file mode 100644 index 92b3bc0a74d1941c6c3b2b14de16e69ed449d0df..0000000000000000000000000000000000000000 Binary files a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/length.bin and /dev/null differ diff --git a/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/link_lists.bin b/chroma_db/baf33d61-bd22-46b8-8f57-12158edf1e13/link_lists.bin deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/chroma_db/chroma.sqlite3 b/chroma_db/chroma.sqlite3 deleted file mode 100644 index f086ce344ccddc73f22a79f9c3286ea12296e8d5..0000000000000000000000000000000000000000 Binary files a/chroma_db/chroma.sqlite3 and /dev/null differ diff --git a/chroma_db/vector/chroma.sqlite3 b/chroma_db/vector/chroma.sqlite3 deleted file mode 100644 index f7db24fdddbc6a26dda0dbe0bfe086318d3b54e4..0000000000000000000000000000000000000000 Binary files a/chroma_db/vector/chroma.sqlite3 and /dev/null differ diff --git a/doc_output.py b/doc_output.py deleted file mode 100644 index 7559e95b9b20505c6818a9841fa30d437892b7a5..0000000000000000000000000000000000000000 --- a/doc_output.py +++ /dev/null @@ -1,27 +0,0 @@ -print("----------------------------------------------> Importation des modules") -import os - -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.vectorstores import Chroma - -embedding_function = HuggingFaceEmbeddings() -VECTORS_DIRECTORY = "text_files" # Change this directory if needed - -# Charge des documents locaux - -print("----------------------------------------------> Charge des documents locaux") -persist_directory = os.path.join(VECTORS_DIRECTORY, "vector") -new_db = Chroma(persist_directory=persist_directory, embedding_function=embedding_function) -print("Entrez votre requête :") - -query = input() -embedding_vector = embedding_function.embed_query(query) - -tot_docs = "" -print("----------------------------------------------> Recherche dans la documentation") -docs = new_db.similarity_search_by_vector(embedding_vector, k=3) - -for i in range(len(docs)): - tot_docs += f'data {str(i)} : {docs[i].page_content}\n' - -print(tot_docs) diff --git a/drive_data_nlp/nb_chunks_by_ids_drive_data_nlp.npy b/drive_data_nlp/nb_chunks_by_ids_drive_data_nlp.npy deleted file mode 100644 index a8e4e20e847da4a30a1482115ab438b20f8a5352..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/nb_chunks_by_ids_drive_data_nlp.npy and /dev/null differ diff --git a/drive_data_nlp/update.csv b/drive_data_nlp/update.csv deleted file mode 100644 index ceccb8901e7819838d5c5cd39295ac25c536db81..0000000000000000000000000000000000000000 --- a/drive_data_nlp/update.csv +++ /dev/null @@ -1,2 +0,0 @@ -date -2023-11-26 23:50:41.434831 diff --git a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/data_level0.bin b/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/data_level0.bin deleted file mode 100644 index e2a709f29d3724014ce55f923d2340d4549daaff..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/data_level0.bin and /dev/null differ diff --git a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/header.bin b/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/header.bin deleted file mode 100644 index a08d580958b127d4c884e2993b7235076c2f7b4b..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/header.bin and /dev/null differ diff --git a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/index_metadata.pickle b/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/index_metadata.pickle deleted file mode 100644 index f7a0cb7a35149677608fd11cde33bf99f200d9aa..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/index_metadata.pickle and /dev/null differ diff --git a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/length.bin b/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/length.bin deleted file mode 100644 index c5267ad281e1d34552f7e89cb03e7784a7a9b7c8..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/length.bin and /dev/null differ diff --git a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/link_lists.bin b/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/link_lists.bin deleted file mode 100644 index 42e35b0d6f1b66c3dd1db20da38a7c42b135d22f..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/815d4669-cbfd-4eb1-856f-75a8f3544364/link_lists.bin and /dev/null differ diff --git a/drive_data_nlp/vector/chroma.sqlite3 b/drive_data_nlp/vector/chroma.sqlite3 deleted file mode 100644 index 32dc3ff59e94138eafef9087440ebe170774557d..0000000000000000000000000000000000000000 Binary files a/drive_data_nlp/vector/chroma.sqlite3 and /dev/null differ diff --git a/gdrive_ingest.py b/gdrive_ingest.py deleted file mode 100644 index 3cac90be58d217e57ba851fd86eae38d82c35cee..0000000000000000000000000000000000000000 --- a/gdrive_ingest.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import spacy -from langchain.docstore.document import Document -from langchain.vectorstores import Chroma -from transformers import AutoTokenizer, AutoModel - -# Load spaCy model for French -nlp = spacy.load("fr_core_news_sm") - -# Directory containing text files -text_directory = "text_files" - -# Function to extract paragraphs from text -def extract_paragraphs(text): - doc = nlp(text) - paragraphs = [] - current_paragraph = "" - for sent in doc.sents: - if '\n' in sent.text and current_paragraph: - paragraphs.append(current_paragraph.strip()) - current_paragraph = "" - current_paragraph += sent.text.strip() + " " - if current_paragraph: - paragraphs.append(current_paragraph.strip()) - return paragraphs - -# Initialize Hugging Face model and tokenizer -model_name = "bert-base-multilingual-cased" -tokenizer = AutoTokenizer.from_pretrained(model_name) -model = AutoModel.from_pretrained(model_name) - -def embed(text): - inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) - outputs = model(**inputs) - return outputs.last_hidden_state.mean(dim=1).detach().numpy() - -# Prepare ChromaDB -chroma_db = Chroma() - -# Process each text file -documents = [] -for file_name in os.listdir(text_directory): - if file_name.endswith('.txt'): - print(f"Processing file: {file_name}") - file_path = os.path.join(text_directory, file_name) - with open(file_path, 'r', encoding='utf-8') as file: - text = file.read() - paragraphs = extract_paragraphs(text) - for paragraph in paragraphs: - vector = embed(paragraph) - document = Document(page_content=paragraph, embedding=vector) - documents.append(document) - -# Add documents to ChromaDB and save it -chroma_db.add_documents(documents) - -persist_directory = "chroma_db" -os.makedirs(persist_directory, exist_ok=True) -chroma_db.save(persist_directory) - -print("All paragraphs have been processed and stored in ChromaDB.") diff --git a/gdrive_ingest_with_nlp.py b/gdrive_ingest_with_nlp.py deleted file mode 100644 index fa515c1c62dab96cfec67497f2fb2670ff6d3f2b..0000000000000000000000000000000000000000 --- a/gdrive_ingest_with_nlp.py +++ /dev/null @@ -1,134 +0,0 @@ -import csv -import datetime -import io -import os -import pickle - -import numpy as np -import spacy -from PyPDF2 import PdfReader -from google.auth.transport.requests import Request -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import build -from googleapiclient.http import MediaIoBaseDownload -from langchain.docstore.document import Document -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.vectorstores import Chroma - -# Load the French language model -nlp = spacy.load("fr_core_news_sm") -# Google Drive API setup -SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] - -creds = None -# The file token.pickle stores the user's access and refresh tokens, and is -# created automatically when the authorization flow completes for the first time. -if os.path.exists('token.pickle'): - with open('token.pickle', 'rb') as token: - creds = pickle.load(token) - -# If there are no (valid) credentials available, let the user log in. -if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - # Path to the credentials.json file - credentials_path = 'credentials.json' - flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES) - creds = flow.run_local_server(port=5454) - # Save the credentials for the next run - with open('token.pickle', 'wb') as token: - pickle.dump(creds, token) - -service = build('drive', 'v3', credentials=creds) - -# Configuration for Google Drive extraction -print("----------------------------------------------> Importing Google Drive documents") -folder_id = "1W4Kh4yWkqJ81d-M2eP1o6HXJE6fay8s3" # Replace with your Google Drive folder ID - -query = f"'{folder_id}' in parents" -response = service.files().list(q=query, - spaces='drive', - fields='nextPageToken, files(id, name, mimeType)').execute() - -files = response.get('files', []) - -documents = [] - -for file in files: - if file.get('mimeType') == 'application/pdf': - request = service.files().get_media(fileId=file['id']) - file_io = io.BytesIO() - downloader = MediaIoBaseDownload(file_io, request) - done = False - while done is False: - status, done = downloader.next_chunk() - file_io.seek(0) - try: - reader = PdfReader(file_io) - full_text = "" - for page in reader.pages: - full_text += page.extract_text() + "\n" - - # Use spaCy to split text into paragraphs - doc = nlp(full_text) - paragraphs = [] - current_paragraph = "" - for sent in doc.sents: - # Add sentence to the current paragraph - current_paragraph += sent.text + " " - - # Check for paragraph breaks - if '\n\n' in sent.text: # Assuming two line breaks as a new paragraph indicator - # Save the current paragraph and start a new one - paragraphs.append(current_paragraph.strip()) - current_paragraph = "" - - # Add the last paragraph if it's not empty - if current_paragraph: - paragraphs.append(current_paragraph.strip()) - - # Process each paragraph - for paragraph in paragraphs: - documents.append(Document(page_content=paragraph, metadata={'id': file['id'], 'title': file['name']})) - - except Exception as e: - print(f"Error processing file {file['name']}: {e}") - -ids = [] -dict = {} # Adjusted to directly work with the documents list -for doc in documents: - id_doc = doc.metadata["id"] - if id_doc in dict: - dict[id_doc] += 1 - else: - dict[id_doc] = 1 - id_total = f"{id_doc}_{dict[id_doc] - 1}" - ids.append(id_total) - -# Directory for saving data -drive_folder = "drive_data_nlp" # Change this to your desired folder name -os.makedirs(drive_folder, exist_ok=True) - -nom_fichier = f'nb_chunks_by_ids_{drive_folder}.npy' -chemin_complet_fichier = os.path.join(drive_folder, nom_fichier) -np.save(chemin_complet_fichier, dict) # Save the number of chunks per document for easier updates - -print("----------------------------------------------> Create the open-source embedding function") -# Create the open-source embedding function -embedding_function = HuggingFaceEmbeddings() - -print("----------------------------------------------> Save it into Chroma") -# Load it into Chroma -persist_directory = os.path.join(drive_folder, "vector") -db = Chroma.from_documents(documents, embedding_function, ids=ids, persist_directory=persist_directory) - -chemin_csv = os.path.join(drive_folder, "update.csv") -# Get the current date and time -current_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') -with open(chemin_csv, 'w', newline='') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(['date']) # Write the header row - writer.writerow([current_date]) # Write the updated date row - -print("----------------------------------------------> Saved with success") diff --git a/ingest_txt_files.py b/ingest_txt_files.py deleted file mode 100644 index b1e8848fff422a467d268695c0851bd9abb11d03..0000000000000000000000000000000000000000 --- a/ingest_txt_files.py +++ /dev/null @@ -1,85 +0,0 @@ -# import -from langchain.document_loaders import ConfluenceLoader -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.vectorstores import Chroma -from langchain.docstore.document import Document -import numpy as np -import csv -import datetime -import os - -# Configuration de l'extraction Confluence -print("----------------------------------------------> Importing Confluence documents") -config = {"persist_directory": "./chroma_db/", - "space_key": "text_files" - } -persist_directory = 'text_data' -space_key = config.get("space_key", None) - - -def load_documents_from_directory(directory): - documents = [] - for filename in os.listdir(directory): - if filename.endswith(".txt"): - with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file: - # Create the document structure - document = { - 'content': file.read(), - 'metadata': {'id': filename, 'title': filename} - } - documents.append(document) - return documents - - -documents = load_documents_from_directory(space_key) - -# split it into chunks -text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1000, - chunk_overlap=0, - separators=[" ", ",", "\n"] -) -# Create Document objects as required by text_splitter -document_objects = [Document(page_content=doc['content'], metadata=doc['metadata']) for doc in documents] - -# Split the documents -docs = text_splitter.split_documents(document_objects) -ids = [] -ids_doc = [] -dict = {} # creation d'un dictionnaire avec comme clés l'id du doc et son nombre chunks -for i in range(len(docs)): - id_doc = docs[i].metadata["id"] # id du document duquel est extrait le chunk - id_supp = ids_doc.count( - id_doc) # nombre de chunk issus du même du document déjà identifiés -> fin de l'id total du chunk - id_total = str(id_doc) + "_" + str(id_supp) # id total = id du doc_nombre de chunk issu de ce document déjà mis - ids_doc.append(id_doc) # on rajoute l'id du doc pour compter le nombre de chunk par doc déjà identifiés - ids.append(id_total) # on rajoute l'id total du chunk à la liste des id totaux - dict[str(id_doc)] = str(id_supp + 1) - -nom_fichier = 'nb_chunks_by_ids' + '_' + space_key + '.npy' - -chemin_complet_fichier = os.path.join(space_key, nom_fichier) - -np.save(chemin_complet_fichier, dict) # on sauvegarde le nb de chunks par documents pour faciliter la maj - -print("----------------------------------------------> Create the open-source embedding function") -# create the open-source embedding function -embedding_function = HuggingFaceEmbeddings() -print(f"----------------------------------------------> Save it into Chroma {space_key}") -# load it into Chroma - -persist_directory = os.path.join(space_key, "vector") - -db = Chroma.from_documents(docs, embedding_function, ids=ids, persist_directory=persist_directory) - -chemin_csv = os.path.join(space_key, "update.csv") - -# Get the current date and time -current_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') -with open(chemin_csv, 'w', newline='') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(['date']) # Write the header row - writer.writerow([current_date]) # Write the updated date row - -print("----------------------------------------------> Saved with success") diff --git a/preprocessing.py b/preprocessing.py deleted file mode 100644 index edc3c5461ee2b380bf4c6d2d59746a4c01accfb4..0000000000000000000000000000000000000000 --- a/preprocessing.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import io -import pickle - -from google.auth.transport.requests import Request -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import build -from googleapiclient.http import MediaIoBaseDownload -from PyPDF2 import PdfReader - -# Google Drive API setup -SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] -creds = None - -# Load credentials -if os.path.exists('token.pickle'): - with open('token.pickle', 'rb') as token: - creds = pickle.load(token) - -# Refresh or obtain new credentials -if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) - creds = flow.run_local_server(port=5454) - with open('token.pickle', 'wb') as token: - pickle.dump(creds, token) - -service = build('drive', 'v3', credentials=creds) - -# Replace with your Google Drive folder ID -folder_id = "1W4Kh4yWkqJ81d-M2eP1o6HXJE6fay8s3" # Replace with your Google Drive folder ID - -# Query to list files in the folder -query = f"'{folder_id}' in parents" -response = service.files().list(q=query, spaces='drive', fields='nextPageToken, files(id, name, mimeType)').execute() -files = response.get('files', []) - -# Directory to save text files -text_directory = "text_files" -os.makedirs(text_directory, exist_ok=True) - -print( - "----------------------------------------------> Importing Google Drive documents and converting them into text files") -# Process each PDF file -for file in files: - if file.get('mimeType') == 'application/pdf': - request = service.files().get_media(fileId=file['id']) - file_io = io.BytesIO() - downloader = MediaIoBaseDownload(file_io, request) - done = False - while not done: - _, done = downloader.next_chunk() - file_io.seek(0) - reader = PdfReader(file_io) - full_text = "" - for page in reader.pages: - full_text += page.extract_text() + "\n" - - # Save the extracted text to a local file - text_file_path = os.path.join(text_directory, f"{file['name']}.txt") - with open(text_file_path, 'w', encoding='utf-8') as text_file: - text_file.write(full_text) - -print("PDF files have been converted to text and saved locally.") diff --git a/chatbot.png b/public/chatbot.png similarity index 100% rename from chatbot.png rename to public/chatbot.png diff --git a/chatbot_architecture.jpg b/public/chatbot_architecture.jpg similarity index 100% rename from chatbot_architecture.jpg rename to public/chatbot_architecture.jpg diff --git a/readme.md b/readme.md index b66f0a835b2235f8e115672784c181ff7b995ed6..ace6312e593afc4e0c12fbf387ace0b0ecea6a1f 100644 --- a/readme.md +++ b/readme.md @@ -1,30 +1,32 @@ # Prérequis : + Afin de pouvoir executer le code il faut créer un environnement virtuel python : + ``` python3 -m venv venv ``` + naviguer dans le virtual env : + ``` source venv/bin/activate ``` -Ensuite installer les bibliothèques nécessaires qui se trouvent dans le fichier `requirements.txt`: + +Ensuite installer les bibliothèques nécessaires qui se trouvent dans le fichier `requirements.txt`: + ``` pip install requirements.txt ``` # Architecture du code -Le code créée a pour objectif de reproduire cette architecture : - -## preprocessing.py -Le code `preprocessing.py` récupère les fichiers pdf depuis le répertoire drive où ils sont enregistrés, les transfome -en texte et opère des traitements dessus et les enregistre sous le répertoire `text_files` +Le code créée a pour objectif de reproduire cette architecture : + + +## Lancement de l'application : -## ingest_txt_files, gdrive_ingest_with_nlp, gdrive_ingest -ces trois codes pythons récupèrent les fichiers text enregistrés dans `text_files` (ou les pdf pour les drive_data_*), -opère un traitement dessus de telle sorte à découper chaque document en plus petit documents (de la taille d'un paragraphe -ou de quelques lignes en fonction du paramètrage) et les vectorise pour les enregistrer dans chromadb une bdd vectorielle. +Pour lancer l'application il faut soit executer le script `chatbot_complet.py` qui est en fait un algo de retrieving de +documents, il ne contient pas la partie LLM. -## doc_output.py -Ce code prend une requête utilisateur, la vectorise, cherche les n documents qui minimisent la distance cos par rapport -à la requête et les affiche. \ No newline at end of file +Pour la partie LLM il faut lancer le script `chatbot_with_summarizer.py` qui contient à la fois le modèle de Q/A et +l'affichage des documents qui sont pertinents pour la question posée. \ No newline at end of file diff --git a/test_combine.py b/tests/test_combine.py similarity index 99% rename from test_combine.py rename to tests/test_combine.py index f912be5f4ee801e13e4d32c1a5e9a69f592c4441..3c88f89dd3afb66737c478a8d9975d971422d5d7 100644 --- a/test_combine.py +++ b/tests/test_combine.py @@ -159,7 +159,7 @@ class ChatbotInterface(QWidget): """ def __init__(self): super().__init__() - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' self.text_lines = read_text_file(file_path) if not self.text_lines: @@ -217,7 +217,7 @@ class ChatbotInterface(QWidget): self.conversation_text.setSizePolicy(size_policy) # Définir la fenêtre principale - icon = QIcon("chatbot.png") + icon = QIcon("../public/chatbot.png") self.setWindowIcon(icon) self.setWindowTitle('chatbot') self.setGeometry(100, 100, 800, 600) diff --git a/test_doc2vect.py b/tests/test_doc2vect.py similarity index 99% rename from test_doc2vect.py rename to tests/test_doc2vect.py index 43c8d232bac885cb6616e4daf1be9151ba4547a9..3c105132cf278c5991072d9ab262c0e681f947ac 100644 --- a/test_doc2vect.py +++ b/tests/test_doc2vect.py @@ -175,7 +175,7 @@ class ChatbotInterface(QWidget): """ def __init__(self): super().__init__() - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' self.text_lines = read_text_file(file_path) if not self.text_lines: @@ -233,7 +233,7 @@ class ChatbotInterface(QWidget): self.conversation_text.setSizePolicy(size_policy) # Définir la fenêtre principale - icon = QIcon("chatbot.png") + icon = QIcon("../public/chatbot.png") self.setWindowIcon(icon) self.setWindowTitle('chatbot') self.setGeometry(100, 100, 800, 600) diff --git a/test_interface.py b/tests/test_interface.py similarity index 96% rename from test_interface.py rename to tests/test_interface.py index 0173d235502d7ab8f25e7db94d463c5d8e98ea49..1f48a78c08bed5f2893bf71674ffa557db496798 100644 --- a/test_interface.py +++ b/tests/test_interface.py @@ -40,7 +40,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base): class ChatbotInterface(QWidget): def __init__(self): super().__init__() - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' # Replace with the path to your text file + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' # Replace with the path to your text file self.text_lines = read_text_file(file_path) if not self.text_lines: diff --git a/test_jacard.py b/tests/test_jacard.py similarity index 98% rename from test_jacard.py rename to tests/test_jacard.py index 18d2fda84eeca245c44568cf6f0e4adfd42fb516..db2431e765937483f6ebc9f61321532eeac024b3 100644 --- a/test_jacard.py +++ b/tests/test_jacard.py @@ -167,7 +167,7 @@ class ChatbotInterface(QWidget): """ def __init__(self): super().__init__() - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' self.text_lines = read_text_file(file_path) if not self.text_lines: @@ -225,7 +225,7 @@ class ChatbotInterface(QWidget): self.conversation_text.setSizePolicy(size_policy) # Définir la fenêtre principale - icon = QIcon("chatbot.png") + icon = QIcon("../public/chatbot.png") self.setWindowIcon(icon) self.setWindowTitle('chatbot') self.setGeometry(100, 100, 800, 600) @@ -350,7 +350,7 @@ def read_text_file(file_path): if __name__ == '__main__': # Example usage - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' text_lines = read_text_file(file_path) target_string = "quels sont les baremes d'évaluation" diff --git a/testcosine.py b/tests/testcosine.py similarity index 94% rename from testcosine.py rename to tests/testcosine.py index 3f4b21cd4336ef2e051ac442936cc5b4c3b6d221..cb48b2c598c01055d2bd572a733610fc4a9e6e8e 100644 --- a/testcosine.py +++ b/tests/testcosine.py @@ -37,7 +37,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base): return best_answers def main(): - file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' # Replace with the path to your text file + file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' # Replace with the path to your text file text_lines = read_text_file(file_path) if not text_lines: diff --git a/token.pickle b/token.pickle deleted file mode 100644 index aeaabd298eccc1b97f74eb6ba61574b86a4902d4..0000000000000000000000000000000000000000 Binary files a/token.pickle and /dev/null differ