From 3665a50a48c3601395437f63cd2e40206ed4b9fe Mon Sep 17 00:00:00 2001
From: Bouchafaa <mohamed.bouchafaa@ec-lyon.fr>
Date: Thu, 28 Mar 2024 08:21:40 +0100
Subject: [PATCH] fixed

---
 chatbot_complet.py         |   2 +-
 chatbot_with_summarizer.py | 159 +++++++++++++++++++++++++++++--------
 2 files changed, 128 insertions(+), 33 deletions(-)

diff --git a/chatbot_complet.py b/chatbot_complet.py
index fd3d200..7a6bceb 100644
--- a/chatbot_complet.py
+++ b/chatbot_complet.py
@@ -159,7 +159,7 @@ class ChatbotInterface(QWidget):
     """
     def __init__(self):
         super().__init__()
-        file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt'
+        file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
         self.text_lines = read_text_file(file_path)
 
         if not self.text_lines:
diff --git a/chatbot_with_summarizer.py b/chatbot_with_summarizer.py
index 11adeb6..e3a28a2 100644
--- a/chatbot_with_summarizer.py
+++ b/chatbot_with_summarizer.py
@@ -1,26 +1,40 @@
 import sys
-
-import numpy as np
-from PyQt5.QtCore import Qt
-from PyQt5.QtGui import QPalette, QColor, QPixmap
 from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \
     QSizePolicy, QListWidget, QListWidgetItem, QLabel
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
+from PyQt5.QtCore import Qt
+from PyQt5.QtGui import QPalette, QColor, QFont, QIcon
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from unidecode import unidecode
 
 from language_models import flant5_summarizer
 
 
 def read_text_file(file_path):
+    """
+    Reads the content of a text file specified by `file_path` and splits it into paragraphs based on double line breaks (`'\n\n'`).
+    Parameters:
+    - file_path (str): The path to the text file.
+    Returns:
+    - list: A list of non-empty paragraphs from the file.
+    """
     with open(file_path, 'r', encoding='utf-8') as file:
         content = file.read().split('\n\n')
-        content1 = [item for item in content if item != ""]
+        content1 = [unidecode(item) for item in content if item != ""]
     return content1
 
 
-def extract_keywords_textblob_french(sentence):
+def extract_keywords_french(sentence):
+    """
+    Tokenizes and filters a given sentence to extract keywords in French. Removes stop words and focuses on meaningful terms.
+    Parameters:
+    - sentence (str): The input sentence.
+    Returns:
+    - str: A string containing the extracted keywords.
+    """
     stop_words = set(stopwords.words('french'))
     mots_questions = ['qui', 'quoi', 'où', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles',
                       'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle',
@@ -32,41 +46,93 @@ def extract_keywords_textblob_french(sentence):
     return ' '.join(keywords)
 
 
-def create_vectorial_base(text_lines, min_chars=50):
+def calculate_combined_score(tfidf_score, jaccard_score):
+    # You can adjust the weights based on the importance of each score
+    return 0.7 * tfidf_score + 0.3 * jaccard_score
+
+
+def create_vectorial_base(text_lines, min_chars=10):
+    """
+    Creates a TF-IDF vectorial base from a list of text lines.
+    Parameters:
+    - text_lines (list): List of text lines.
+    - min_chars (int): Minimum number of characters required for a line to be included (default is 10).
+    Returns:
+    - tuple: A tuple containing the TF-IDF vectorizer, the TF-IDF matrix (vectorial base), and the feature names.
+    """
     filtered_lines = [line for line in text_lines if len(line) >= min_chars]
 
     if not filtered_lines:
-        print("No lines with at least 50 characters found.")
+        print("No lines with at least 10 characters found.")
         return None, None, None
 
-    vectorizer = TfidfVectorizer()
+    vectorizer = TfidfVectorizer()  # a tester en option : stop_words=list(stopwords.words('french'))
     vectorial_base = vectorizer.fit_transform(filtered_lines).toarray()
     feature_names = vectorizer.get_feature_names_out()
 
     return vectorizer, vectorial_base, feature_names
 
 
+def jaccard_similarity(str1, str2):
+    tokens_str1 = set(word_tokenize(str1.lower()))
+    tokens_str2 = set(word_tokenize(str2.lower()))
+
+    stop_words = set(stopwords.words('french'))
+    tokens_str1 = tokens_str1 - stop_words
+    tokens_str2 = tokens_str2 - stop_words
+
+    intersection = len(tokens_str1.intersection(tokens_str2))
+    union = len(tokens_str1) + len(tokens_str2) - intersection
+
+    similarity = intersection / union if union != 0 else 0.0
+    return similarity
+
+
 def get_best_answers(question, text_lines, vectorizer, vectorial_base):
+    """
+    Retrieves the top 3 most similar text lines to a given question based on cosine similarity.
+    Parameters:
+    - question (str): The user's question.
+    - text_lines (list): List of text lines.
+    - vectorizer: The TF-IDF vectorizer.
+    - vectorial_base: The TF-IDF matrix (vectorial base).
+    Returns:
+    - list: A list of the top 3 most similar text lines as answers.
+    """
     question_vector = vectorizer.transform([question]).toarray()
 
     # Calculate cosine similarity between the question and each text line
     similarities = cosine_similarity(question_vector, vectorial_base).flatten()
 
-    # Get the indices of the top 5 most similar text lines
-    top_indices = np.argsort(similarities)[-3:][::-1]
+    jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
+
+    combined_scores = [calculate_combined_score(similarities, jaccard_score)
+                       for similarities, jaccard_score in zip(similarities, jaccard_similarities)]
+
+    # Get the indices of the top 3 most similar text lines
+    top_indices = np.argsort(combined_scores)[-3:][::-1]
     # Retrieve the corresponding text lines
-    best_answers = [text_lines[i] + "\n" for i in top_indices]
+    best_answers = [text_lines[i] + "\n" + "score TFIDF : " + str(similarities[i]) + " score jacard : " + str(
+        jaccard_similarities[i]) + "\n" for i in top_indices]
 
     return best_answers
 
 
 class WrappingLabel(QLabel):
+    """
+    Subclass of QLabel with word wrapping enabled. Used for displaying text in the GUI.
+    """
+
     def __init__(self, text='', parent=None):
         super(WrappingLabel, self).__init__(text, parent)
         self.setWordWrap(True)
 
 
 class StyledListWidgetItem(QListWidgetItem):
+    """
+    Subclass of QListWidgetItem with custom styling for the chat history list.
+    """
+
     def __init__(self, text='', parent=None):
         super(StyledListWidgetItem, self).__init__(parent)
         self.setText(text)
@@ -81,6 +147,10 @@ class StyledListWidgetItem(QListWidgetItem):
 
 
 class StyledListWidget(QListWidget):
+    """
+    Subclass of QListWidget with custom styling for the chat history list.
+    """
+
     def __init__(self, parent=None):
         super(StyledListWidget, self).__init__(parent)
         self.setAlternatingRowColors(False)
@@ -93,12 +163,21 @@ class StyledListWidget(QListWidget):
         """)
 
     def addStyledItem(self, text):
+        """
+        Adds a styled item to the list widget.
+        Parameters:
+        - text (str): The text to be added to the list.
+        """
         item = StyledListWidgetItem(text)
         item.initStyle()
         self.addItem(item)
 
 
 class ChatbotInterface(QWidget):
+    """
+    Main class representing the chatbot interface. Initializes the UI and handles user interactions.
+    """
+
     def __init__(self):
         super().__init__()
         file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
@@ -111,10 +190,15 @@ class ChatbotInterface(QWidget):
         self.init_ui()
         self.command_history = []  # Pour stocker l'historique des commandes
         self.dico = {}
+        self.dico2 = {}
 
     def init_ui(self):
+        """
+        Initializes the user interface.
+        """
         # Créer des widgets
         self.conversation_text = QTextEdit(self)
+        self.conversation_text.setFont(QFont("consolas", 9))
         self.conversation_text.setReadOnly(True)
 
         self.user_input_entry = QLineEdit(self)
@@ -139,6 +223,7 @@ class ChatbotInterface(QWidget):
         left_layout = QVBoxLayout()
         left_layout.addWidget(self.conversation_text)
         left_layout.addWidget(self.user_input_entry)
+
         # Ajouter le bouton "Envoyer" avec une taille réduite
         self.send_button.setMaximumWidth(self.send_button.width() // 3)
         left_layout.addWidget(self.send_button, alignment=Qt.AlignRight)
@@ -153,7 +238,9 @@ class ChatbotInterface(QWidget):
         self.conversation_text.setSizePolicy(size_policy)
 
         # Définir la fenêtre principale
-        self.setWindowTitle('Chatbot Interface')
+        icon = QIcon("../public/chatbot.png")
+        self.setWindowIcon(icon)
+        self.setWindowTitle('chatbot')
         self.setGeometry(100, 100, 800, 600)
 
         # Appliquer les styles
@@ -177,18 +264,22 @@ class ChatbotInterface(QWidget):
 
             
         """)
+
         self.user_input_entry.returnPressed.connect(self.send_message)
         self.history_list_widget.itemClicked.connect(self.history_item_clicked)
 
     def send_message(self):
-
+        """
+        Handles the user's input, processes it, and displays the chatbot's response.
+        """
         user_command = self.user_input_entry.text()
+        user_command = unidecode(user_command)
         if len(user_command) > 0:
             self.conversation_text.clear()
-            self.conversation_text.append(f"Commande utilisateur: {user_command}")
+            self.conversation_text.append(f"demande élève: {user_command}")
+            self.conversation_text.append("Réponse du chatbot pour la demande: ")
 
             best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base)
-
             flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer(
                 " ".join(best_answers))
 
@@ -199,8 +290,9 @@ class ChatbotInterface(QWidget):
             self.conversation_text.append(chatbot_response)
 
             # Ajouter la commande à l'historique
-            user_command1 = extract_keywords_textblob_french(user_command)
+            user_command1 = extract_keywords_french(user_command)
             self.command_history.append(user_command1)
+            self.dico2[user_command1] = user_command
             self.dico[user_command1] = chatbot_response
 
             # Mettre à jour la liste d'historique
@@ -210,40 +302,43 @@ class ChatbotInterface(QWidget):
         else:
             pass
 
-    def set_logo(self, image_path):
-        pixmap = QPixmap(image_path)
-        self.logo_label.setPixmap(pixmap)
-        self.logo_label.setScaledContents(True)
-
     def update_history_list(self):
+        """
+        Updates the chat history list in the UI.
+        """
         self.history_list_widget.clear()
         for command in self.command_history:
             self.history_list_widget.addStyledItem(command)
 
     def history_item_clicked(self, item):
+        """
+        Displays the chat history when an item is clicked.
+        Parameters:
+        - item: The clicked item.
+        """
         self.conversation_text.clear()
         # Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué
         selected_index = self.history_list_widget.row(item)
         if selected_index < len(self.command_history):
             selected_command = self.command_history[selected_index]
-            self.conversation_text.append(f"Commande utilisateur: {selected_command}")
+            self.conversation_text.append(f"demande élève: {self.dico2[selected_command]}")
             # Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application)
-            chatbot_response = f"Réponse du chatbot pour la commande: {self.dico[selected_command]}"
+            chatbot_response = f"Réponse du chatbot pour la demande: \n{self.dico[selected_command]}"
             self.conversation_text.append(chatbot_response)
 
 
 if __name__ == '__main__':
     app = QApplication(sys.argv)
     chatbot_app = ChatbotInterface()
-
-    # Centrer la fenêtre
     screen = app.primaryScreen()
-    center_point = screen.availableGeometry().center()
-    chatbot_app.move(center_point - chatbot_app.rect().center())
 
     # Ajuster la taille de la fenêtre
-    new_width = screen.availableGeometry().width() // 2
-    chatbot_app.resize(new_width, screen.availableGeometry().height() * round(7 / 8))
+    new_width = screen.availableGeometry().width() * 3 // 5
+    chatbot_app.resize(new_width, int(screen.availableGeometry().height() - 48))
+
+    # Centrer la fenêtre
+    center_point = screen.availableGeometry().center().x() - chatbot_app.rect().center().x()
+    chatbot_app.move(center_point, 0)
 
     chatbot_app.show()
     sys.exit(app.exec_())
-- 
GitLab