Skip to content
Snippets Groups Projects
Commit 4ccb0b4f authored by Bouchafaa Mohamed's avatar Bouchafaa Mohamed
Browse files

Merge branch 'fix-chatbot-complet' into 'main'

fixed chatbot_complet which was behind of main

See merge request !9
parents 860df6a2 59bf51ba
Branches
No related tags found
1 merge request!9fixed chatbot_complet which was behind of main
...@@ -7,6 +7,7 @@ from sklearn.metrics.pairwise import cosine_similarity ...@@ -7,6 +7,7 @@ from sklearn.metrics.pairwise import cosine_similarity
import numpy as np import numpy as np
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize from nltk.tokenize import word_tokenize
from unidecode import unidecode
def read_text_file(file_path): def read_text_file(file_path):
""" """
...@@ -18,7 +19,7 @@ def read_text_file(file_path): ...@@ -18,7 +19,7 @@ def read_text_file(file_path):
""" """
with open(file_path, 'r', encoding='utf-8') as file: with open(file_path, 'r', encoding='utf-8') as file:
content = file.read().split('\n\n') content = file.read().split('\n\n')
content1 = [item for item in content if item != ""] content1 = [unidecode(item) for item in content if item != ""]
return content1 return content1
def extract_keywords_french(sentence): def extract_keywords_french(sentence):
...@@ -36,6 +37,9 @@ def extract_keywords_french(sentence): ...@@ -36,6 +37,9 @@ def extract_keywords_french(sentence):
keywords = [word for word in words if word.lower() not in stop_words] keywords = [word for word in words if word.lower() not in stop_words]
return ' '.join(keywords) return ' '.join(keywords)
def calculate_combined_score(tfidf_score, jaccard_score):
# You can adjust the weights based on the importance of each score
return 0.7 * tfidf_score + 0.3 * jaccard_score
def create_vectorial_base(text_lines, min_chars=10): def create_vectorial_base(text_lines, min_chars=10):
""" """
...@@ -58,6 +62,20 @@ def create_vectorial_base(text_lines, min_chars=10): ...@@ -58,6 +62,20 @@ def create_vectorial_base(text_lines, min_chars=10):
return vectorizer, vectorial_base, feature_names return vectorizer, vectorial_base, feature_names
def jaccard_similarity(str1, str2):
tokens_str1 = set(word_tokenize(str1.lower()))
tokens_str2 = set(word_tokenize(str2.lower()))
stop_words = set(stopwords.words('french'))
tokens_str1 = tokens_str1 - stop_words
tokens_str2 = tokens_str2 - stop_words
intersection = len(tokens_str1.intersection(tokens_str2))
union = len(tokens_str1) + len(tokens_str2) - intersection
similarity = intersection / union if union != 0 else 0.0
return similarity
def get_best_answers(question, text_lines, vectorizer, vectorial_base): def get_best_answers(question, text_lines, vectorizer, vectorial_base):
""" """
Retrieves the top 3 most similar text lines to a given question based on cosine similarity. Retrieves the top 3 most similar text lines to a given question based on cosine similarity.
...@@ -74,10 +92,15 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base): ...@@ -74,10 +92,15 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base):
# Calculate cosine similarity between the question and each text line # Calculate cosine similarity between the question and each text line
similarities = cosine_similarity(question_vector, vectorial_base).flatten() similarities = cosine_similarity(question_vector, vectorial_base).flatten()
jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
combined_scores = [calculate_combined_score(similarities, jaccard_score)
for similarities, jaccard_score in zip(similarities, jaccard_similarities)]
# Get the indices of the top 3 most similar text lines # Get the indices of the top 3 most similar text lines
top_indices = np.argsort(similarities)[-3:][::-1] top_indices = np.argsort(combined_scores)[-3:][::-1]
# Retrieve the corresponding text lines # Retrieve the corresponding text lines
best_answers = [text_lines[i]+"\n" for i in top_indices] best_answers = [text_lines[i]+"\n"+"score TFIDF : "+str(similarities[i])+" score jacard : "+str(jaccard_similarities[i])+"\n" for i in top_indices]
return best_answers return best_answers
...@@ -136,7 +159,7 @@ class ChatbotInterface(QWidget): ...@@ -136,7 +159,7 @@ class ChatbotInterface(QWidget):
""" """
def __init__(self): def __init__(self):
super().__init__() super().__init__()
file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt'
self.text_lines = read_text_file(file_path) self.text_lines = read_text_file(file_path)
if not self.text_lines: if not self.text_lines:
...@@ -194,7 +217,7 @@ class ChatbotInterface(QWidget): ...@@ -194,7 +217,7 @@ class ChatbotInterface(QWidget):
self.conversation_text.setSizePolicy(size_policy) self.conversation_text.setSizePolicy(size_policy)
# Définir la fenêtre principale # Définir la fenêtre principale
icon = QIcon("public/chatbot.png") icon = QIcon("../public/chatbot.png")
self.setWindowIcon(icon) self.setWindowIcon(icon)
self.setWindowTitle('chatbot') self.setWindowTitle('chatbot')
self.setGeometry(100, 100, 800, 600) self.setGeometry(100, 100, 800, 600)
...@@ -229,6 +252,7 @@ class ChatbotInterface(QWidget): ...@@ -229,6 +252,7 @@ class ChatbotInterface(QWidget):
Handles the user's input, processes it, and displays the chatbot's response. Handles the user's input, processes it, and displays the chatbot's response.
""" """
user_command = self.user_input_entry.text() user_command = self.user_input_entry.text()
user_command=unidecode(user_command)
if len(user_command)>0: if len(user_command)>0:
self.conversation_text.clear() self.conversation_text.clear()
self.conversation_text.append(f"demande élève: {user_command}") self.conversation_text.append(f"demande élève: {user_command}")
......
...@@ -16,3 +16,4 @@ unidecode ...@@ -16,3 +16,4 @@ unidecode
transformers transformers
torch torch
sentencepiece sentencepiece
unidecode
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment