diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml index aeee4828d31dfa4c4d8df17901b0aecf753c2d51..552f88ff80c30cc3e7e22953469faadf6c8a7974 100644 --- a/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -4,7 +4,7 @@ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true"> <option name="ignoredPackages"> <value> - <list size="11"> + <list size="14"> <item index="0" class="java.lang.String" itemvalue="google-auth-oauthlib" /> <item index="1" class="java.lang.String" itemvalue="langchain" /> <item index="2" class="java.lang.String" itemvalue="google-auth-httplib2" /> @@ -16,6 +16,9 @@ <item index="8" class="java.lang.String" itemvalue="sentence-transformers" /> <item index="9" class="java.lang.String" itemvalue="chromadb" /> <item index="10" class="java.lang.String" itemvalue="spacy" /> + <item index="11" class="java.lang.String" itemvalue="unidecode" /> + <item index="12" class="java.lang.String" itemvalue="sklearn" /> + <item index="13" class="java.lang.String" itemvalue="sys" /> </list> </value> </option> diff --git a/__pycache__/language_models.cpython-311.pyc b/__pycache__/language_models.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75c02867db46e28b9eb205285c3f469574629e0d Binary files /dev/null and b/__pycache__/language_models.cpython-311.pyc differ diff --git a/language_models.py b/language_models.py new file mode 100644 index 0000000000000000000000000000000000000000..12e7ec11d38bd4f61c83971b02685d0515609bb4 --- /dev/null +++ b/language_models.py @@ -0,0 +1,38 @@ +import torch +from transformers import RobertaTokenizerFast, EncoderDecoderModel, pipeline + + +def summarize_text(text_input): # Text should always be a string + device = 'cuda' if torch.cuda.is_available() else 'cpu' + ckpt = 'mrm8488/camembert2camembert_shared-finetuned-french-summarization' + tokenizer = RobertaTokenizerFast.from_pretrained(ckpt) + model = EncoderDecoderModel.from_pretrained(ckpt).to(device) + inputs = tokenizer([text_input], padding="max_length", truncation=True, max_length=512, return_tensors="pt") + input_ids = inputs.input_ids.to(device) + attention_mask = inputs.attention_mask.to(device) + output = model.generate(input_ids, attention_mask=attention_mask) + return tokenizer.decode(output[0], skip_special_tokens=True) + + +def qa_pipeline(text_input, user_question): + qa_engine = pipeline( + "question-answering", + model="cmarkea/distilcamembert-base-qa", + tokenizer="cmarkea/distilcamembert-base-qa" + ) + result = qa_engine( + context=text_input, + question=user_question + ) + return result + + +def flant5_summarizer(text_input): + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + pipe = pipeline( + "summarization", + model="bofenghuang/flan-t5-large-dialogsum-fr", + device=device, + ) + summarized_text = pipe(text_input, max_new_tokens=1024, max_length=2048)[0]["summary_text"] # greedy + return summarized_text diff --git a/requirements.txt b/requirements.txt index 8268cfcedbd320a495b8ac0f095de49009c82515..02e58833b96f796e8d5f2edc700fd59d63c3bee0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,6 @@ scikit-learn nltk sys unidecode +transformers +torch +sentencepiece \ No newline at end of file diff --git a/visuel.py b/visuel.py new file mode 100644 index 0000000000000000000000000000000000000000..11adeb6bf03d6af04cb3355a73045a9ae6c3c274 --- /dev/null +++ b/visuel.py @@ -0,0 +1,249 @@ +import sys + +import numpy as np +from PyQt5.QtCore import Qt +from PyQt5.QtGui import QPalette, QColor, QPixmap +from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \ + QSizePolicy, QListWidget, QListWidgetItem, QLabel +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + +from language_models import flant5_summarizer + + +def read_text_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read().split('\n\n') + content1 = [item for item in content if item != ""] + return content1 + + +def extract_keywords_textblob_french(sentence): + stop_words = set(stopwords.words('french')) + mots_questions = ['qui', 'quoi', 'où', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles', + 'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle', + 'lesquels', 'lesquelles', 'est-ce', 'n\'est-ce pas', 'savoir', 'pouvez-vous', 'êtes-vous', + 'avez-vous', 'dois-je', 'quelqu\'un', 'quelque chose'] + stop_words = stop_words.union(mots_questions) + words = word_tokenize(sentence, language='french') + keywords = [word for word in words if word.lower() not in stop_words] + return ' '.join(keywords) + + +def create_vectorial_base(text_lines, min_chars=50): + filtered_lines = [line for line in text_lines if len(line) >= min_chars] + + if not filtered_lines: + print("No lines with at least 50 characters found.") + return None, None, None + + vectorizer = TfidfVectorizer() + vectorial_base = vectorizer.fit_transform(filtered_lines).toarray() + feature_names = vectorizer.get_feature_names_out() + + return vectorizer, vectorial_base, feature_names + + +def get_best_answers(question, text_lines, vectorizer, vectorial_base): + question_vector = vectorizer.transform([question]).toarray() + + # Calculate cosine similarity between the question and each text line + similarities = cosine_similarity(question_vector, vectorial_base).flatten() + + # Get the indices of the top 5 most similar text lines + top_indices = np.argsort(similarities)[-3:][::-1] + # Retrieve the corresponding text lines + best_answers = [text_lines[i] + "\n" for i in top_indices] + + return best_answers + + +class WrappingLabel(QLabel): + def __init__(self, text='', parent=None): + super(WrappingLabel, self).__init__(text, parent) + self.setWordWrap(True) + + +class StyledListWidgetItem(QListWidgetItem): + def __init__(self, text='', parent=None): + super(StyledListWidgetItem, self).__init__(parent) + self.setText(text) + + def initStyle(self): + palette = QPalette() + palette.setColor(QPalette.Highlight, + QColor("#4b5261")) # Couleur de fond pour l'élément sélectionné dans la liste d'historique + palette.setColor(QPalette.HighlightedText, + QColor("#ff0000")) # Couleur du texte pour l'élément sélectionné dans la liste d'historique + self.setData(Qt.UserRole, palette) + + +class StyledListWidget(QListWidget): + def __init__(self, parent=None): + super(StyledListWidget, self).__init__(parent) + self.setAlternatingRowColors(False) + self.setStyleSheet(""" + QListWidget { + background-color: #282c34; /* Couleur de fond pour la liste d'historique */ + color: #abb2bf; /* Couleur du texte dans la liste d'historique */ + border-radius: 10px; /* Coins arrondis */ + } + """) + + def addStyledItem(self, text): + item = StyledListWidgetItem(text) + item.initStyle() + self.addItem(item) + + +class ChatbotInterface(QWidget): + def __init__(self): + super().__init__() + file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt' + self.text_lines = read_text_file(file_path) + + if not self.text_lines: + print("The file is empty or doesn't exist.") + return + self.vectorizer, self.vectorial_base, _ = create_vectorial_base(self.text_lines) + self.init_ui() + self.command_history = [] # Pour stocker l'historique des commandes + self.dico = {} + + def init_ui(self): + # Créer des widgets + self.conversation_text = QTextEdit(self) + self.conversation_text.setReadOnly(True) + + self.user_input_entry = QLineEdit(self) + self.user_input_entry.setPlaceholderText("Saisissez votre message...") + self.user_input_entry.setMinimumHeight(40) + + self.send_button = QPushButton("Envoyer", self) + self.send_button.setMinimumSize(self.user_input_entry.width(), 30) # Ajustez selon vos besoins + self.send_button.setMaximumSize(200, 60) + self.send_button.clicked.connect(self.send_message) + + # Historique à droite + self.history_list_widget = StyledListWidget(self) + self.history_list_widget.itemClicked.connect(self.history_item_clicked) + self.history_list_widget.setFixedWidth(200) # Ajuster la largeur selon vos besoins + + # Configurer la mise en page + layout = QVBoxLayout(self) + h_layout = QHBoxLayout() + + # Widgets à gauche + left_layout = QVBoxLayout() + left_layout.addWidget(self.conversation_text) + left_layout.addWidget(self.user_input_entry) + # Ajouter le bouton "Envoyer" avec une taille réduite + self.send_button.setMaximumWidth(self.send_button.width() // 3) + left_layout.addWidget(self.send_button, alignment=Qt.AlignRight) + h_layout.addLayout(left_layout) + + # Historique à droite + h_layout.addWidget(self.history_list_widget) + layout.addLayout(h_layout) + + # Configurer la politique de taille pour permettre à la zone de conversation de s'étendre verticalement + size_policy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) + self.conversation_text.setSizePolicy(size_policy) + + # Définir la fenêtre principale + self.setWindowTitle('Chatbot Interface') + self.setGeometry(100, 100, 800, 600) + + # Appliquer les styles + self.setStyleSheet(""" + QWidget { + background-color: #282c34; /* Couleur principale de fond pour l'application */ + color: #abb2bf; /* Couleur du texte principal */ + } + + QTextEdit, QLineEdit { + background-color: #2c313a; /* Couleur de fond pour la zone de texte et d'entrée utilisateur */ + color: #abb2bf; /* Couleur du texte dans la zone de texte et d'entrée utilisateur */ + border-radius: 10px; /* Coins arrondis */ + } + + QPushButton { + background-color: #61afef; /* Couleur de fond pour le bouton Envoyer */ + color: #282c34; /* Couleur du texte sur le bouton Envoyer */ + border-radius: 10px; /* Coins arrondis */ + } + + + """) + self.user_input_entry.returnPressed.connect(self.send_message) + self.history_list_widget.itemClicked.connect(self.history_item_clicked) + + def send_message(self): + + user_command = self.user_input_entry.text() + if len(user_command) > 0: + self.conversation_text.clear() + self.conversation_text.append(f"Commande utilisateur: {user_command}") + + best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base) + + flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer( + " ".join(best_answers)) + + chatbot_response = flant5_summarized_text + \ + "\n\n Les documents utilisés pour formuler cette réponse sont : \n\n\n" + for i, answer in enumerate(best_answers, start=1): + chatbot_response += (f"{i}. {answer.strip()}\n\n") + self.conversation_text.append(chatbot_response) + + # Ajouter la commande à l'historique + user_command1 = extract_keywords_textblob_french(user_command) + self.command_history.append(user_command1) + self.dico[user_command1] = chatbot_response + + # Mettre à jour la liste d'historique + self.update_history_list() + + self.user_input_entry.clear() + else: + pass + + def set_logo(self, image_path): + pixmap = QPixmap(image_path) + self.logo_label.setPixmap(pixmap) + self.logo_label.setScaledContents(True) + + def update_history_list(self): + self.history_list_widget.clear() + for command in self.command_history: + self.history_list_widget.addStyledItem(command) + + def history_item_clicked(self, item): + self.conversation_text.clear() + # Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué + selected_index = self.history_list_widget.row(item) + if selected_index < len(self.command_history): + selected_command = self.command_history[selected_index] + self.conversation_text.append(f"Commande utilisateur: {selected_command}") + # Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application) + chatbot_response = f"Réponse du chatbot pour la commande: {self.dico[selected_command]}" + self.conversation_text.append(chatbot_response) + + +if __name__ == '__main__': + app = QApplication(sys.argv) + chatbot_app = ChatbotInterface() + + # Centrer la fenêtre + screen = app.primaryScreen() + center_point = screen.availableGeometry().center() + chatbot_app.move(center_point - chatbot_app.rect().center()) + + # Ajuster la taille de la fenêtre + new_width = screen.availableGeometry().width() // 2 + chatbot_app.resize(new_width, screen.availableGeometry().height() * round(7 / 8)) + + chatbot_app.show() + sys.exit(app.exec_())