Skip to content
Snippets Groups Projects
Commit 44bfc9d8 authored by Bouchafaa Mohamed's avatar Bouchafaa Mohamed
Browse files

Merge branch 'fix-chatbot-with-summarizer' into 'main'

fixed

See merge request !10
parents 4ccb0b4f 3665a50a
Branches
No related tags found
1 merge request!10fixed
...@@ -159,7 +159,7 @@ class ChatbotInterface(QWidget): ...@@ -159,7 +159,7 @@ class ChatbotInterface(QWidget):
""" """
def __init__(self): def __init__(self):
super().__init__() super().__init__()
file_path = '../reglementdescolarite-ingegeneraliste2324-1.docx.txt' file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
self.text_lines = read_text_file(file_path) self.text_lines = read_text_file(file_path)
if not self.text_lines: if not self.text_lines:
......
import sys import sys
import numpy as np
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPalette, QColor, QPixmap
from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \ from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \
QSizePolicy, QListWidget, QListWidgetItem, QLabel QSizePolicy, QListWidget, QListWidgetItem, QLabel
from nltk.corpus import stopwords from PyQt5.QtCore import Qt
from nltk.tokenize import word_tokenize from PyQt5.QtGui import QPalette, QColor, QFont, QIcon
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from unidecode import unidecode
from language_models import flant5_summarizer from language_models import flant5_summarizer
def read_text_file(file_path): def read_text_file(file_path):
"""
Reads the content of a text file specified by `file_path` and splits it into paragraphs based on double line breaks (`'\n\n'`).
Parameters:
- file_path (str): The path to the text file.
Returns:
- list: A list of non-empty paragraphs from the file.
"""
with open(file_path, 'r', encoding='utf-8') as file: with open(file_path, 'r', encoding='utf-8') as file:
content = file.read().split('\n\n') content = file.read().split('\n\n')
content1 = [item for item in content if item != ""] content1 = [unidecode(item) for item in content if item != ""]
return content1 return content1
def extract_keywords_textblob_french(sentence): def extract_keywords_french(sentence):
"""
Tokenizes and filters a given sentence to extract keywords in French. Removes stop words and focuses on meaningful terms.
Parameters:
- sentence (str): The input sentence.
Returns:
- str: A string containing the extracted keywords.
"""
stop_words = set(stopwords.words('french')) stop_words = set(stopwords.words('french'))
mots_questions = ['qui', 'quoi', '', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles', mots_questions = ['qui', 'quoi', '', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles',
'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle', 'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle',
...@@ -32,41 +46,93 @@ def extract_keywords_textblob_french(sentence): ...@@ -32,41 +46,93 @@ def extract_keywords_textblob_french(sentence):
return ' '.join(keywords) return ' '.join(keywords)
def create_vectorial_base(text_lines, min_chars=50): def calculate_combined_score(tfidf_score, jaccard_score):
# You can adjust the weights based on the importance of each score
return 0.7 * tfidf_score + 0.3 * jaccard_score
def create_vectorial_base(text_lines, min_chars=10):
"""
Creates a TF-IDF vectorial base from a list of text lines.
Parameters:
- text_lines (list): List of text lines.
- min_chars (int): Minimum number of characters required for a line to be included (default is 10).
Returns:
- tuple: A tuple containing the TF-IDF vectorizer, the TF-IDF matrix (vectorial base), and the feature names.
"""
filtered_lines = [line for line in text_lines if len(line) >= min_chars] filtered_lines = [line for line in text_lines if len(line) >= min_chars]
if not filtered_lines: if not filtered_lines:
print("No lines with at least 50 characters found.") print("No lines with at least 10 characters found.")
return None, None, None return None, None, None
vectorizer = TfidfVectorizer() vectorizer = TfidfVectorizer() # a tester en option : stop_words=list(stopwords.words('french'))
vectorial_base = vectorizer.fit_transform(filtered_lines).toarray() vectorial_base = vectorizer.fit_transform(filtered_lines).toarray()
feature_names = vectorizer.get_feature_names_out() feature_names = vectorizer.get_feature_names_out()
return vectorizer, vectorial_base, feature_names return vectorizer, vectorial_base, feature_names
def jaccard_similarity(str1, str2):
tokens_str1 = set(word_tokenize(str1.lower()))
tokens_str2 = set(word_tokenize(str2.lower()))
stop_words = set(stopwords.words('french'))
tokens_str1 = tokens_str1 - stop_words
tokens_str2 = tokens_str2 - stop_words
intersection = len(tokens_str1.intersection(tokens_str2))
union = len(tokens_str1) + len(tokens_str2) - intersection
similarity = intersection / union if union != 0 else 0.0
return similarity
def get_best_answers(question, text_lines, vectorizer, vectorial_base): def get_best_answers(question, text_lines, vectorizer, vectorial_base):
"""
Retrieves the top 3 most similar text lines to a given question based on cosine similarity.
Parameters:
- question (str): The user's question.
- text_lines (list): List of text lines.
- vectorizer: The TF-IDF vectorizer.
- vectorial_base: The TF-IDF matrix (vectorial base).
Returns:
- list: A list of the top 3 most similar text lines as answers.
"""
question_vector = vectorizer.transform([question]).toarray() question_vector = vectorizer.transform([question]).toarray()
# Calculate cosine similarity between the question and each text line # Calculate cosine similarity between the question and each text line
similarities = cosine_similarity(question_vector, vectorial_base).flatten() similarities = cosine_similarity(question_vector, vectorial_base).flatten()
# Get the indices of the top 5 most similar text lines jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
top_indices = np.argsort(similarities)[-3:][::-1]
combined_scores = [calculate_combined_score(similarities, jaccard_score)
for similarities, jaccard_score in zip(similarities, jaccard_similarities)]
# Get the indices of the top 3 most similar text lines
top_indices = np.argsort(combined_scores)[-3:][::-1]
# Retrieve the corresponding text lines # Retrieve the corresponding text lines
best_answers = [text_lines[i] + "\n" for i in top_indices] best_answers = [text_lines[i] + "\n" + "score TFIDF : " + str(similarities[i]) + " score jacard : " + str(
jaccard_similarities[i]) + "\n" for i in top_indices]
return best_answers return best_answers
class WrappingLabel(QLabel): class WrappingLabel(QLabel):
"""
Subclass of QLabel with word wrapping enabled. Used for displaying text in the GUI.
"""
def __init__(self, text='', parent=None): def __init__(self, text='', parent=None):
super(WrappingLabel, self).__init__(text, parent) super(WrappingLabel, self).__init__(text, parent)
self.setWordWrap(True) self.setWordWrap(True)
class StyledListWidgetItem(QListWidgetItem): class StyledListWidgetItem(QListWidgetItem):
"""
Subclass of QListWidgetItem with custom styling for the chat history list.
"""
def __init__(self, text='', parent=None): def __init__(self, text='', parent=None):
super(StyledListWidgetItem, self).__init__(parent) super(StyledListWidgetItem, self).__init__(parent)
self.setText(text) self.setText(text)
...@@ -81,6 +147,10 @@ class StyledListWidgetItem(QListWidgetItem): ...@@ -81,6 +147,10 @@ class StyledListWidgetItem(QListWidgetItem):
class StyledListWidget(QListWidget): class StyledListWidget(QListWidget):
"""
Subclass of QListWidget with custom styling for the chat history list.
"""
def __init__(self, parent=None): def __init__(self, parent=None):
super(StyledListWidget, self).__init__(parent) super(StyledListWidget, self).__init__(parent)
self.setAlternatingRowColors(False) self.setAlternatingRowColors(False)
...@@ -93,12 +163,21 @@ class StyledListWidget(QListWidget): ...@@ -93,12 +163,21 @@ class StyledListWidget(QListWidget):
""") """)
def addStyledItem(self, text): def addStyledItem(self, text):
"""
Adds a styled item to the list widget.
Parameters:
- text (str): The text to be added to the list.
"""
item = StyledListWidgetItem(text) item = StyledListWidgetItem(text)
item.initStyle() item.initStyle()
self.addItem(item) self.addItem(item)
class ChatbotInterface(QWidget): class ChatbotInterface(QWidget):
"""
Main class representing the chatbot interface. Initializes the UI and handles user interactions.
"""
def __init__(self): def __init__(self):
super().__init__() super().__init__()
file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
...@@ -111,10 +190,15 @@ class ChatbotInterface(QWidget): ...@@ -111,10 +190,15 @@ class ChatbotInterface(QWidget):
self.init_ui() self.init_ui()
self.command_history = [] # Pour stocker l'historique des commandes self.command_history = [] # Pour stocker l'historique des commandes
self.dico = {} self.dico = {}
self.dico2 = {}
def init_ui(self): def init_ui(self):
"""
Initializes the user interface.
"""
# Créer des widgets # Créer des widgets
self.conversation_text = QTextEdit(self) self.conversation_text = QTextEdit(self)
self.conversation_text.setFont(QFont("consolas", 9))
self.conversation_text.setReadOnly(True) self.conversation_text.setReadOnly(True)
self.user_input_entry = QLineEdit(self) self.user_input_entry = QLineEdit(self)
...@@ -139,6 +223,7 @@ class ChatbotInterface(QWidget): ...@@ -139,6 +223,7 @@ class ChatbotInterface(QWidget):
left_layout = QVBoxLayout() left_layout = QVBoxLayout()
left_layout.addWidget(self.conversation_text) left_layout.addWidget(self.conversation_text)
left_layout.addWidget(self.user_input_entry) left_layout.addWidget(self.user_input_entry)
# Ajouter le bouton "Envoyer" avec une taille réduite # Ajouter le bouton "Envoyer" avec une taille réduite
self.send_button.setMaximumWidth(self.send_button.width() // 3) self.send_button.setMaximumWidth(self.send_button.width() // 3)
left_layout.addWidget(self.send_button, alignment=Qt.AlignRight) left_layout.addWidget(self.send_button, alignment=Qt.AlignRight)
...@@ -153,7 +238,9 @@ class ChatbotInterface(QWidget): ...@@ -153,7 +238,9 @@ class ChatbotInterface(QWidget):
self.conversation_text.setSizePolicy(size_policy) self.conversation_text.setSizePolicy(size_policy)
# Définir la fenêtre principale # Définir la fenêtre principale
self.setWindowTitle('Chatbot Interface') icon = QIcon("../public/chatbot.png")
self.setWindowIcon(icon)
self.setWindowTitle('chatbot')
self.setGeometry(100, 100, 800, 600) self.setGeometry(100, 100, 800, 600)
# Appliquer les styles # Appliquer les styles
...@@ -177,18 +264,22 @@ class ChatbotInterface(QWidget): ...@@ -177,18 +264,22 @@ class ChatbotInterface(QWidget):
""") """)
self.user_input_entry.returnPressed.connect(self.send_message) self.user_input_entry.returnPressed.connect(self.send_message)
self.history_list_widget.itemClicked.connect(self.history_item_clicked) self.history_list_widget.itemClicked.connect(self.history_item_clicked)
def send_message(self): def send_message(self):
"""
Handles the user's input, processes it, and displays the chatbot's response.
"""
user_command = self.user_input_entry.text() user_command = self.user_input_entry.text()
user_command = unidecode(user_command)
if len(user_command) > 0: if len(user_command) > 0:
self.conversation_text.clear() self.conversation_text.clear()
self.conversation_text.append(f"Commande utilisateur: {user_command}") self.conversation_text.append(f"demande élève: {user_command}")
self.conversation_text.append("Réponse du chatbot pour la demande: ")
best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base) best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base)
flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer( flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer(
" ".join(best_answers)) " ".join(best_answers))
...@@ -199,8 +290,9 @@ class ChatbotInterface(QWidget): ...@@ -199,8 +290,9 @@ class ChatbotInterface(QWidget):
self.conversation_text.append(chatbot_response) self.conversation_text.append(chatbot_response)
# Ajouter la commande à l'historique # Ajouter la commande à l'historique
user_command1 = extract_keywords_textblob_french(user_command) user_command1 = extract_keywords_french(user_command)
self.command_history.append(user_command1) self.command_history.append(user_command1)
self.dico2[user_command1] = user_command
self.dico[user_command1] = chatbot_response self.dico[user_command1] = chatbot_response
# Mettre à jour la liste d'historique # Mettre à jour la liste d'historique
...@@ -210,40 +302,43 @@ class ChatbotInterface(QWidget): ...@@ -210,40 +302,43 @@ class ChatbotInterface(QWidget):
else: else:
pass pass
def set_logo(self, image_path):
pixmap = QPixmap(image_path)
self.logo_label.setPixmap(pixmap)
self.logo_label.setScaledContents(True)
def update_history_list(self): def update_history_list(self):
"""
Updates the chat history list in the UI.
"""
self.history_list_widget.clear() self.history_list_widget.clear()
for command in self.command_history: for command in self.command_history:
self.history_list_widget.addStyledItem(command) self.history_list_widget.addStyledItem(command)
def history_item_clicked(self, item): def history_item_clicked(self, item):
"""
Displays the chat history when an item is clicked.
Parameters:
- item: The clicked item.
"""
self.conversation_text.clear() self.conversation_text.clear()
# Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué # Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué
selected_index = self.history_list_widget.row(item) selected_index = self.history_list_widget.row(item)
if selected_index < len(self.command_history): if selected_index < len(self.command_history):
selected_command = self.command_history[selected_index] selected_command = self.command_history[selected_index]
self.conversation_text.append(f"Commande utilisateur: {selected_command}") self.conversation_text.append(f"demande élève: {self.dico2[selected_command]}")
# Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application) # Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application)
chatbot_response = f"Réponse du chatbot pour la commande: {self.dico[selected_command]}" chatbot_response = f"Réponse du chatbot pour la demande: \n{self.dico[selected_command]}"
self.conversation_text.append(chatbot_response) self.conversation_text.append(chatbot_response)
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication(sys.argv) app = QApplication(sys.argv)
chatbot_app = ChatbotInterface() chatbot_app = ChatbotInterface()
# Centrer la fenêtre
screen = app.primaryScreen() screen = app.primaryScreen()
center_point = screen.availableGeometry().center()
chatbot_app.move(center_point - chatbot_app.rect().center())
# Ajuster la taille de la fenêtre # Ajuster la taille de la fenêtre
new_width = screen.availableGeometry().width() // 2 new_width = screen.availableGeometry().width() * 3 // 5
chatbot_app.resize(new_width, screen.availableGeometry().height() * round(7 / 8)) chatbot_app.resize(new_width, int(screen.availableGeometry().height() - 48))
# Centrer la fenêtre
center_point = screen.availableGeometry().center().x() - chatbot_app.rect().center().x()
chatbot_app.move(center_point, 0)
chatbot_app.show() chatbot_app.show()
sys.exit(app.exec_()) sys.exit(app.exec_())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment