From 3665a50a48c3601395437f63cd2e40206ed4b9fe Mon Sep 17 00:00:00 2001 From: Bouchafaa <mohamed.bouchafaa@ec-lyon.fr> Date: Thu, 28 Mar 2024 08:21:40 +0100 Subject: [PATCH] fixed --- chatbot_complet.py | 2 +- chatbot_with_summarizer.py | 159 +++++++++++++++++++++++++++++-------- 2 files changed, 128 insertions(+), 33 deletions(-) diff --git a/chatbot_complet.py b/chatbot_complet.py index fd3d200..7a6bceb 100644 --- a/chatbot_complet.py +++ b/chatbot_complet.py @@ -159,7 +159,7 @@ class ChatbotInterface(QWidget): """ def __init__(self): super().__init__() - file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' + file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' self.text_lines = read_text_file(file_path) if not self.text_lines: diff --git a/chatbot_with_summarizer.py b/chatbot_with_summarizer.py index 11adeb6..e3a28a2 100644 --- a/chatbot_with_summarizer.py +++ b/chatbot_with_summarizer.py @@ -1,26 +1,40 @@ import sys - -import numpy as np -from PyQt5.QtCore import Qt -from PyQt5.QtGui import QPalette, QColor, QPixmap from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \ QSizePolicy, QListWidget, QListWidgetItem, QLabel -from nltk.corpus import stopwords -from nltk.tokenize import word_tokenize +from PyQt5.QtCore import Qt +from PyQt5.QtGui import QPalette, QColor, QFont, QIcon from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity +import numpy as np +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +from unidecode import unidecode from language_models import flant5_summarizer def read_text_file(file_path): + """ + Reads the content of a text file specified by `file_path` and splits it into paragraphs based on double line breaks (`'\n\n'`). + Parameters: + - file_path (str): The path to the text file. + Returns: + - list: A list of non-empty paragraphs from the file. + """ with open(file_path, 'r', encoding='utf-8') as file: content = file.read().split('\n\n') - content1 = [item for item in content if item != ""] + content1 = [unidecode(item) for item in content if item != ""] return content1 -def extract_keywords_textblob_french(sentence): +def extract_keywords_french(sentence): + """ + Tokenizes and filters a given sentence to extract keywords in French. Removes stop words and focuses on meaningful terms. + Parameters: + - sentence (str): The input sentence. + Returns: + - str: A string containing the extracted keywords. + """ stop_words = set(stopwords.words('french')) mots_questions = ['qui', 'quoi', 'où', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles', 'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle', @@ -32,41 +46,93 @@ def extract_keywords_textblob_french(sentence): return ' '.join(keywords) -def create_vectorial_base(text_lines, min_chars=50): +def calculate_combined_score(tfidf_score, jaccard_score): + # You can adjust the weights based on the importance of each score + return 0.7 * tfidf_score + 0.3 * jaccard_score + + +def create_vectorial_base(text_lines, min_chars=10): + """ + Creates a TF-IDF vectorial base from a list of text lines. + Parameters: + - text_lines (list): List of text lines. + - min_chars (int): Minimum number of characters required for a line to be included (default is 10). + Returns: + - tuple: A tuple containing the TF-IDF vectorizer, the TF-IDF matrix (vectorial base), and the feature names. + """ filtered_lines = [line for line in text_lines if len(line) >= min_chars] if not filtered_lines: - print("No lines with at least 50 characters found.") + print("No lines with at least 10 characters found.") return None, None, None - vectorizer = TfidfVectorizer() + vectorizer = TfidfVectorizer() # a tester en option : stop_words=list(stopwords.words('french')) vectorial_base = vectorizer.fit_transform(filtered_lines).toarray() feature_names = vectorizer.get_feature_names_out() return vectorizer, vectorial_base, feature_names +def jaccard_similarity(str1, str2): + tokens_str1 = set(word_tokenize(str1.lower())) + tokens_str2 = set(word_tokenize(str2.lower())) + + stop_words = set(stopwords.words('french')) + tokens_str1 = tokens_str1 - stop_words + tokens_str2 = tokens_str2 - stop_words + + intersection = len(tokens_str1.intersection(tokens_str2)) + union = len(tokens_str1) + len(tokens_str2) - intersection + + similarity = intersection / union if union != 0 else 0.0 + return similarity + + def get_best_answers(question, text_lines, vectorizer, vectorial_base): + """ + Retrieves the top 3 most similar text lines to a given question based on cosine similarity. + Parameters: + - question (str): The user's question. + - text_lines (list): List of text lines. + - vectorizer: The TF-IDF vectorizer. + - vectorial_base: The TF-IDF matrix (vectorial base). + Returns: + - list: A list of the top 3 most similar text lines as answers. + """ question_vector = vectorizer.transform([question]).toarray() # Calculate cosine similarity between the question and each text line similarities = cosine_similarity(question_vector, vectorial_base).flatten() - # Get the indices of the top 5 most similar text lines - top_indices = np.argsort(similarities)[-3:][::-1] + jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines] + + combined_scores = [calculate_combined_score(similarities, jaccard_score) + for similarities, jaccard_score in zip(similarities, jaccard_similarities)] + + # Get the indices of the top 3 most similar text lines + top_indices = np.argsort(combined_scores)[-3:][::-1] # Retrieve the corresponding text lines - best_answers = [text_lines[i] + "\n" for i in top_indices] + best_answers = [text_lines[i] + "\n" + "score TFIDF : " + str(similarities[i]) + " score jacard : " + str( + jaccard_similarities[i]) + "\n" for i in top_indices] return best_answers class WrappingLabel(QLabel): + """ + Subclass of QLabel with word wrapping enabled. Used for displaying text in the GUI. + """ + def __init__(self, text='', parent=None): super(WrappingLabel, self).__init__(text, parent) self.setWordWrap(True) class StyledListWidgetItem(QListWidgetItem): + """ + Subclass of QListWidgetItem with custom styling for the chat history list. + """ + def __init__(self, text='', parent=None): super(StyledListWidgetItem, self).__init__(parent) self.setText(text) @@ -81,6 +147,10 @@ class StyledListWidgetItem(QListWidgetItem): class StyledListWidget(QListWidget): + """ + Subclass of QListWidget with custom styling for the chat history list. + """ + def __init__(self, parent=None): super(StyledListWidget, self).__init__(parent) self.setAlternatingRowColors(False) @@ -93,12 +163,21 @@ class StyledListWidget(QListWidget): """) def addStyledItem(self, text): + """ + Adds a styled item to the list widget. + Parameters: + - text (str): The text to be added to the list. + """ item = StyledListWidgetItem(text) item.initStyle() self.addItem(item) class ChatbotInterface(QWidget): + """ + Main class representing the chatbot interface. Initializes the UI and handles user interactions. + """ + def __init__(self): super().__init__() file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' @@ -111,10 +190,15 @@ class ChatbotInterface(QWidget): self.init_ui() self.command_history = [] # Pour stocker l'historique des commandes self.dico = {} + self.dico2 = {} def init_ui(self): + """ + Initializes the user interface. + """ # Créer des widgets self.conversation_text = QTextEdit(self) + self.conversation_text.setFont(QFont("consolas", 9)) self.conversation_text.setReadOnly(True) self.user_input_entry = QLineEdit(self) @@ -139,6 +223,7 @@ class ChatbotInterface(QWidget): left_layout = QVBoxLayout() left_layout.addWidget(self.conversation_text) left_layout.addWidget(self.user_input_entry) + # Ajouter le bouton "Envoyer" avec une taille réduite self.send_button.setMaximumWidth(self.send_button.width() // 3) left_layout.addWidget(self.send_button, alignment=Qt.AlignRight) @@ -153,7 +238,9 @@ class ChatbotInterface(QWidget): self.conversation_text.setSizePolicy(size_policy) # Définir la fenêtre principale - self.setWindowTitle('Chatbot Interface') + icon = QIcon("../public/chatbot.png") + self.setWindowIcon(icon) + self.setWindowTitle('chatbot') self.setGeometry(100, 100, 800, 600) # Appliquer les styles @@ -177,18 +264,22 @@ class ChatbotInterface(QWidget): """) + self.user_input_entry.returnPressed.connect(self.send_message) self.history_list_widget.itemClicked.connect(self.history_item_clicked) def send_message(self): - + """ + Handles the user's input, processes it, and displays the chatbot's response. + """ user_command = self.user_input_entry.text() + user_command = unidecode(user_command) if len(user_command) > 0: self.conversation_text.clear() - self.conversation_text.append(f"Commande utilisateur: {user_command}") + self.conversation_text.append(f"demande élève: {user_command}") + self.conversation_text.append("Réponse du chatbot pour la demande: ") best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base) - flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer( " ".join(best_answers)) @@ -199,8 +290,9 @@ class ChatbotInterface(QWidget): self.conversation_text.append(chatbot_response) # Ajouter la commande à l'historique - user_command1 = extract_keywords_textblob_french(user_command) + user_command1 = extract_keywords_french(user_command) self.command_history.append(user_command1) + self.dico2[user_command1] = user_command self.dico[user_command1] = chatbot_response # Mettre à jour la liste d'historique @@ -210,40 +302,43 @@ class ChatbotInterface(QWidget): else: pass - def set_logo(self, image_path): - pixmap = QPixmap(image_path) - self.logo_label.setPixmap(pixmap) - self.logo_label.setScaledContents(True) - def update_history_list(self): + """ + Updates the chat history list in the UI. + """ self.history_list_widget.clear() for command in self.command_history: self.history_list_widget.addStyledItem(command) def history_item_clicked(self, item): + """ + Displays the chat history when an item is clicked. + Parameters: + - item: The clicked item. + """ self.conversation_text.clear() # Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué selected_index = self.history_list_widget.row(item) if selected_index < len(self.command_history): selected_command = self.command_history[selected_index] - self.conversation_text.append(f"Commande utilisateur: {selected_command}") + self.conversation_text.append(f"demande élève: {self.dico2[selected_command]}") # Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application) - chatbot_response = f"Réponse du chatbot pour la commande: {self.dico[selected_command]}" + chatbot_response = f"Réponse du chatbot pour la demande: \n{self.dico[selected_command]}" self.conversation_text.append(chatbot_response) if __name__ == '__main__': app = QApplication(sys.argv) chatbot_app = ChatbotInterface() - - # Centrer la fenêtre screen = app.primaryScreen() - center_point = screen.availableGeometry().center() - chatbot_app.move(center_point - chatbot_app.rect().center()) # Ajuster la taille de la fenêtre - new_width = screen.availableGeometry().width() // 2 - chatbot_app.resize(new_width, screen.availableGeometry().height() * round(7 / 8)) + new_width = screen.availableGeometry().width() * 3 // 5 + chatbot_app.resize(new_width, int(screen.availableGeometry().height() - 48)) + + # Centrer la fenêtre + center_point = screen.availableGeometry().center().x() - chatbot_app.rect().center().x() + chatbot_app.move(center_point, 0) chatbot_app.show() sys.exit(app.exec_()) -- GitLab