Skip to content
Snippets Groups Projects
Select Git revision
  • 1ea04d6e1ca1b94a354f3afb96a936518ff4852d
  • main default protected
2 results

streamlit_app.py

Blame
  • chatbot_with_summarizer.py 13.48 KiB
    import sys
    from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \
        QSizePolicy, QListWidget, QListWidgetItem, QLabel
    from PyQt5.QtCore import Qt
    from PyQt5.QtGui import QPalette, QColor, QFont, QIcon
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity
    import numpy as np
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
    from unidecode import unidecode
    
    from language_models import flant5_summarizer
    
    
    def read_text_file(file_path):
        """
        Reads the content of a text file specified by `file_path` and splits it into paragraphs based on double line breaks (`'\n\n'`).
        Parameters:
        - file_path (str): The path to the text file.
        Returns:
        - list: A list of non-empty paragraphs from the file.
        """
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read().split('\n\n')
            content1 = [unidecode(item) for item in content if item != ""]
        return content1
    
    
    def extract_keywords_french(sentence):
        """
        Tokenizes and filters a given sentence to extract keywords in French. Removes stop words and focuses on meaningful terms.
        Parameters:
        - sentence (str): The input sentence.
        Returns:
        - str: A string containing the extracted keywords.
        """
        stop_words = set(stopwords.words('french'))
        mots_questions = ['qui', 'quoi', '', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles',
                          'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle',
                          'lesquels', 'lesquelles', 'est-ce', 'n\'est-ce pas', 'savoir', 'pouvez-vous', 'êtes-vous',
                          'avez-vous', 'dois-je', 'quelqu\'un', 'quelque chose']
        stop_words = stop_words.union(mots_questions)
        words = word_tokenize(sentence, language='french')
        keywords = [word for word in words if word.lower() not in stop_words]
        return ' '.join(keywords)
    
    
    def calculate_combined_score(tfidf_score, jaccard_score):
        # You can adjust the weights based on the importance of each score
        return 0.7 * tfidf_score + 0.3 * jaccard_score
    
    
    def create_vectorial_base(text_lines, min_chars=10):
        """
        Creates a TF-IDF vectorial base from a list of text lines.
        Parameters:
        - text_lines (list): List of text lines.
        - min_chars (int): Minimum number of characters required for a line to be included (default is 10).
        Returns:
        - tuple: A tuple containing the TF-IDF vectorizer, the TF-IDF matrix (vectorial base), and the feature names.
        """
        filtered_lines = [line for line in text_lines if len(line) >= min_chars]
    
        if not filtered_lines:
            print("No lines with at least 10 characters found.")
            return None, None, None
    
        vectorizer = TfidfVectorizer()  # a tester en option : stop_words=list(stopwords.words('french'))
        vectorial_base = vectorizer.fit_transform(filtered_lines).toarray()
        feature_names = vectorizer.get_feature_names_out()
    
        return vectorizer, vectorial_base, feature_names
    
    
    def jaccard_similarity(str1, str2):
        tokens_str1 = set(word_tokenize(str1.lower()))
        tokens_str2 = set(word_tokenize(str2.lower()))
    
        stop_words = set(stopwords.words('french'))
        tokens_str1 = tokens_str1 - stop_words
        tokens_str2 = tokens_str2 - stop_words
    
        intersection = len(tokens_str1.intersection(tokens_str2))
        union = len(tokens_str1) + len(tokens_str2) - intersection
    
        similarity = intersection / union if union != 0 else 0.0
        return similarity
    
    
    def get_best_answers(question, text_lines, vectorizer, vectorial_base):
        """
        Retrieves the top 3 most similar text lines to a given question based on cosine similarity.
        Parameters:
        - question (str): The user's question.
        - text_lines (list): List of text lines.
        - vectorizer: The TF-IDF vectorizer.
        - vectorial_base: The TF-IDF matrix (vectorial base).
        Returns:
        - list: A list of the top 3 most similar text lines as answers.
        """
        question_vector = vectorizer.transform([question]).toarray()
    
        # Calculate cosine similarity between the question and each text line
        similarities = cosine_similarity(question_vector, vectorial_base).flatten()
    
        jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
    
        combined_scores = [calculate_combined_score(similarities, jaccard_score)
                           for similarities, jaccard_score in zip(similarities, jaccard_similarities)]
    
        # Get the indices of the top 3 most similar text lines
        top_indices = np.argsort(combined_scores)[-3:][::-1]
        # Retrieve the corresponding text lines
        best_answers = [text_lines[i] + "\n" + "score TFIDF : " + str(similarities[i]) + " score jacard : " + str(
            jaccard_similarities[i]) + "\n" for i in top_indices]
    
        return best_answers
    
    
    class WrappingLabel(QLabel):
        """
        Subclass of QLabel with word wrapping enabled. Used for displaying text in the GUI.
        """
    
        def __init__(self, text='', parent=None):
            super(WrappingLabel, self).__init__(text, parent)
            self.setWordWrap(True)
    
    
    class StyledListWidgetItem(QListWidgetItem):
        """
        Subclass of QListWidgetItem with custom styling for the chat history list.
        """
    
        def __init__(self, text='', parent=None):
            super(StyledListWidgetItem, self).__init__(parent)
            self.setText(text)
    
        def initStyle(self):
            palette = QPalette()
            palette.setColor(QPalette.Highlight,
                             QColor("#4b5261"))  # Couleur de fond pour l'élément sélectionné dans la liste d'historique
            palette.setColor(QPalette.HighlightedText,
                             QColor("#ff0000"))  # Couleur du texte pour l'élément sélectionné dans la liste d'historique
            self.setData(Qt.UserRole, palette)
    
    
    class StyledListWidget(QListWidget):
        """
        Subclass of QListWidget with custom styling for the chat history list.
        """
    
        def __init__(self, parent=None):
            super(StyledListWidget, self).__init__(parent)
            self.setAlternatingRowColors(False)
            self.setStyleSheet("""
                QListWidget {
                    background-color: #282c34;  /* Couleur de fond pour la liste d'historique */
                    color: #abb2bf;  /* Couleur du texte dans la liste d'historique */
                    border-radius: 10px;  /* Coins arrondis */
                }
            """)
    
        def addStyledItem(self, text):
            """
            Adds a styled item to the list widget.
            Parameters:
            - text (str): The text to be added to the list.
            """
            item = StyledListWidgetItem(text)
            item.initStyle()
            self.addItem(item)
    
    
    class ChatbotInterface(QWidget):
        """
        Main class representing the chatbot interface. Initializes the UI and handles user interactions.
        """
    
        def __init__(self):
            super().__init__()
            file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
            self.text_lines = read_text_file(file_path)
    
            if not self.text_lines:
                print("The file is empty or doesn't exist.")
                return
            self.vectorizer, self.vectorial_base, _ = create_vectorial_base(self.text_lines)
            self.init_ui()
            self.command_history = []  # Pour stocker l'historique des commandes
            self.dico = {}
            self.dico2 = {}
    
        def init_ui(self):
            """
            Initializes the user interface.
            """
            # Créer des widgets
            self.conversation_text = QTextEdit(self)
            self.conversation_text.setFont(QFont("consolas", 9))
            self.conversation_text.setReadOnly(True)
    
            self.user_input_entry = QLineEdit(self)
            self.user_input_entry.setPlaceholderText("Saisissez votre message...")
            self.user_input_entry.setMinimumHeight(40)
    
            self.send_button = QPushButton("Envoyer", self)
            self.send_button.setMinimumSize(self.user_input_entry.width(), 30)  # Ajustez selon vos besoins
            self.send_button.setMaximumSize(200, 60)
            self.send_button.clicked.connect(self.send_message)
    
            # Historique à droite
            self.history_list_widget = StyledListWidget(self)
            self.history_list_widget.itemClicked.connect(self.history_item_clicked)
            self.history_list_widget.setFixedWidth(200)  # Ajuster la largeur selon vos besoins
    
            # Configurer la mise en page
            layout = QVBoxLayout(self)
            h_layout = QHBoxLayout()
    
            # Widgets à gauche
            left_layout = QVBoxLayout()
            left_layout.addWidget(self.conversation_text)
            left_layout.addWidget(self.user_input_entry)
    
            # Ajouter le bouton "Envoyer" avec une taille réduite
            self.send_button.setMaximumWidth(self.send_button.width() // 3)
            left_layout.addWidget(self.send_button, alignment=Qt.AlignRight)
            h_layout.addLayout(left_layout)
    
            # Historique à droite
            h_layout.addWidget(self.history_list_widget)
            layout.addLayout(h_layout)
    
            # Configurer la politique de taille pour permettre à la zone de conversation de s'étendre verticalement
            size_policy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)
            self.conversation_text.setSizePolicy(size_policy)
    
            # Définir la fenêtre principale
            icon = QIcon("../public/chatbot.png")
            self.setWindowIcon(icon)
            self.setWindowTitle('chatbot')
            self.setGeometry(100, 100, 800, 600)
    
            # Appliquer les styles
            self.setStyleSheet("""
                QWidget {
                    background-color: #282c34;  /* Couleur principale de fond pour l'application */
                    color: #abb2bf;  /* Couleur du texte principal */
                }
                
                QTextEdit, QLineEdit {
                    background-color: #2c313a;  /* Couleur de fond pour la zone de texte et d'entrée utilisateur */
                    color: #abb2bf;  /* Couleur du texte dans la zone de texte et d'entrée utilisateur */
                    border-radius: 10px;  /* Coins arrondis */
                }
    
                QPushButton {
                    background-color: #61afef;  /* Couleur de fond pour le bouton Envoyer */
                    color: #282c34;  /* Couleur du texte sur le bouton Envoyer */
                    border-radius: 10px;  /* Coins arrondis */
                }
    
                
            """)
    
            self.user_input_entry.returnPressed.connect(self.send_message)
            self.history_list_widget.itemClicked.connect(self.history_item_clicked)
    
        def send_message(self):
            """
            Handles the user's input, processes it, and displays the chatbot's response.
            """
            user_command = self.user_input_entry.text()
            user_command = unidecode(user_command)
            if len(user_command) > 0:
                self.conversation_text.clear()
                self.conversation_text.append(f"demande élève: {user_command}")
                self.conversation_text.append("Réponse du chatbot pour la demande: ")
    
                best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base)
                flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer(
                    " ".join(best_answers))
    
                chatbot_response = flant5_summarized_text + \
                                   "\n\n Les documents utilisés pour formuler cette réponse sont : \n\n\n"
                for i, answer in enumerate(best_answers, start=1):
                    chatbot_response += (f"{i}. {answer.strip()}\n\n")
                self.conversation_text.append(chatbot_response)
    
                # Ajouter la commande à l'historique
                user_command1 = extract_keywords_french(user_command)
                self.command_history.append(user_command1)
                self.dico2[user_command1] = user_command
                self.dico[user_command1] = chatbot_response
    
                # Mettre à jour la liste d'historique
                self.update_history_list()
    
                self.user_input_entry.clear()
            else:
                pass
    
        def update_history_list(self):
            """
            Updates the chat history list in the UI.
            """
            self.history_list_widget.clear()
            for command in self.command_history:
                self.history_list_widget.addStyledItem(command)
    
        def history_item_clicked(self, item):
            """
            Displays the chat history when an item is clicked.
            Parameters:
            - item: The clicked item.
            """
            self.conversation_text.clear()
            # Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué
            selected_index = self.history_list_widget.row(item)
            if selected_index < len(self.command_history):
                selected_command = self.command_history[selected_index]
                self.conversation_text.append(f"demande élève: {self.dico2[selected_command]}")
                # Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application)
                chatbot_response = f"Réponse du chatbot pour la demande: \n{self.dico[selected_command]}"
                self.conversation_text.append(chatbot_response)
    
    
    if __name__ == '__main__':
        app = QApplication(sys.argv)
        chatbot_app = ChatbotInterface()
        screen = app.primaryScreen()
    
        # Ajuster la taille de la fenêtre
        new_width = screen.availableGeometry().width() * 3 // 5
        chatbot_app.resize(new_width, int(screen.availableGeometry().height() - 48))
    
        # Centrer la fenêtre
        center_point = screen.availableGeometry().center().x() - chatbot_app.rect().center().x()
        chatbot_app.move(center_point, 0)
    
        chatbot_app.show()
        sys.exit(app.exec_())