Skip to content
Snippets Groups Projects
Commit dd808d6b authored by Bouchafaa's avatar Bouchafaa
Browse files

summarizer works

parent 7f7ff25d
No related branches found
No related tags found
1 merge request!7summarizer works
......@@ -4,7 +4,7 @@
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="11">
<list size="14">
<item index="0" class="java.lang.String" itemvalue="google-auth-oauthlib" />
<item index="1" class="java.lang.String" itemvalue="langchain" />
<item index="2" class="java.lang.String" itemvalue="google-auth-httplib2" />
......@@ -16,6 +16,9 @@
<item index="8" class="java.lang.String" itemvalue="sentence-transformers" />
<item index="9" class="java.lang.String" itemvalue="chromadb" />
<item index="10" class="java.lang.String" itemvalue="spacy" />
<item index="11" class="java.lang.String" itemvalue="unidecode" />
<item index="12" class="java.lang.String" itemvalue="sklearn" />
<item index="13" class="java.lang.String" itemvalue="sys" />
</list>
</value>
</option>
......
File added
import torch
from transformers import RobertaTokenizerFast, EncoderDecoderModel, pipeline
def summarize_text(text_input): # Text should always be a string
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ckpt = 'mrm8488/camembert2camembert_shared-finetuned-french-summarization'
tokenizer = RobertaTokenizerFast.from_pretrained(ckpt)
model = EncoderDecoderModel.from_pretrained(ckpt).to(device)
inputs = tokenizer([text_input], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device)
output = model.generate(input_ids, attention_mask=attention_mask)
return tokenizer.decode(output[0], skip_special_tokens=True)
def qa_pipeline(text_input, user_question):
qa_engine = pipeline(
"question-answering",
model="cmarkea/distilcamembert-base-qa",
tokenizer="cmarkea/distilcamembert-base-qa"
)
result = qa_engine(
context=text_input,
question=user_question
)
return result
def flant5_summarizer(text_input):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pipe = pipeline(
"summarization",
model="bofenghuang/flan-t5-large-dialogsum-fr",
device=device,
)
summarized_text = pipe(text_input, max_new_tokens=1024, max_length=2048)[0]["summary_text"] # greedy
return summarized_text
......@@ -13,3 +13,6 @@ scikit-learn
nltk
sys
unidecode
transformers
torch
sentencepiece
\ No newline at end of file
visuel.py 0 → 100644
import sys
import numpy as np
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPalette, QColor, QPixmap
from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, \
QSizePolicy, QListWidget, QListWidgetItem, QLabel
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from language_models import flant5_summarizer
def read_text_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read().split('\n\n')
content1 = [item for item in content if item != ""]
return content1
def extract_keywords_textblob_french(sentence):
stop_words = set(stopwords.words('french'))
mots_questions = ['qui', 'quoi', '', 'quand', 'pourquoi', 'comment', 'quel', 'quelle', 'quels', 'quelles',
'est-ce que', 'y a-t-il', 'peut-on', 'sont-ils', 'sont-elles', 'combien', 'lequel', 'laquelle',
'lesquels', 'lesquelles', 'est-ce', 'n\'est-ce pas', 'savoir', 'pouvez-vous', 'êtes-vous',
'avez-vous', 'dois-je', 'quelqu\'un', 'quelque chose']
stop_words = stop_words.union(mots_questions)
words = word_tokenize(sentence, language='french')
keywords = [word for word in words if word.lower() not in stop_words]
return ' '.join(keywords)
def create_vectorial_base(text_lines, min_chars=50):
filtered_lines = [line for line in text_lines if len(line) >= min_chars]
if not filtered_lines:
print("No lines with at least 50 characters found.")
return None, None, None
vectorizer = TfidfVectorizer()
vectorial_base = vectorizer.fit_transform(filtered_lines).toarray()
feature_names = vectorizer.get_feature_names_out()
return vectorizer, vectorial_base, feature_names
def get_best_answers(question, text_lines, vectorizer, vectorial_base):
question_vector = vectorizer.transform([question]).toarray()
# Calculate cosine similarity between the question and each text line
similarities = cosine_similarity(question_vector, vectorial_base).flatten()
# Get the indices of the top 5 most similar text lines
top_indices = np.argsort(similarities)[-3:][::-1]
# Retrieve the corresponding text lines
best_answers = [text_lines[i] + "\n" for i in top_indices]
return best_answers
class WrappingLabel(QLabel):
def __init__(self, text='', parent=None):
super(WrappingLabel, self).__init__(text, parent)
self.setWordWrap(True)
class StyledListWidgetItem(QListWidgetItem):
def __init__(self, text='', parent=None):
super(StyledListWidgetItem, self).__init__(parent)
self.setText(text)
def initStyle(self):
palette = QPalette()
palette.setColor(QPalette.Highlight,
QColor("#4b5261")) # Couleur de fond pour l'élément sélectionné dans la liste d'historique
palette.setColor(QPalette.HighlightedText,
QColor("#ff0000")) # Couleur du texte pour l'élément sélectionné dans la liste d'historique
self.setData(Qt.UserRole, palette)
class StyledListWidget(QListWidget):
def __init__(self, parent=None):
super(StyledListWidget, self).__init__(parent)
self.setAlternatingRowColors(False)
self.setStyleSheet("""
QListWidget {
background-color: #282c34; /* Couleur de fond pour la liste d'historique */
color: #abb2bf; /* Couleur du texte dans la liste d'historique */
border-radius: 10px; /* Coins arrondis */
}
""")
def addStyledItem(self, text):
item = StyledListWidgetItem(text)
item.initStyle()
self.addItem(item)
class ChatbotInterface(QWidget):
def __init__(self):
super().__init__()
file_path = 'reglementdescolarite-ingegeneraliste2324-1.docx.txt'
self.text_lines = read_text_file(file_path)
if not self.text_lines:
print("The file is empty or doesn't exist.")
return
self.vectorizer, self.vectorial_base, _ = create_vectorial_base(self.text_lines)
self.init_ui()
self.command_history = [] # Pour stocker l'historique des commandes
self.dico = {}
def init_ui(self):
# Créer des widgets
self.conversation_text = QTextEdit(self)
self.conversation_text.setReadOnly(True)
self.user_input_entry = QLineEdit(self)
self.user_input_entry.setPlaceholderText("Saisissez votre message...")
self.user_input_entry.setMinimumHeight(40)
self.send_button = QPushButton("Envoyer", self)
self.send_button.setMinimumSize(self.user_input_entry.width(), 30) # Ajustez selon vos besoins
self.send_button.setMaximumSize(200, 60)
self.send_button.clicked.connect(self.send_message)
# Historique à droite
self.history_list_widget = StyledListWidget(self)
self.history_list_widget.itemClicked.connect(self.history_item_clicked)
self.history_list_widget.setFixedWidth(200) # Ajuster la largeur selon vos besoins
# Configurer la mise en page
layout = QVBoxLayout(self)
h_layout = QHBoxLayout()
# Widgets à gauche
left_layout = QVBoxLayout()
left_layout.addWidget(self.conversation_text)
left_layout.addWidget(self.user_input_entry)
# Ajouter le bouton "Envoyer" avec une taille réduite
self.send_button.setMaximumWidth(self.send_button.width() // 3)
left_layout.addWidget(self.send_button, alignment=Qt.AlignRight)
h_layout.addLayout(left_layout)
# Historique à droite
h_layout.addWidget(self.history_list_widget)
layout.addLayout(h_layout)
# Configurer la politique de taille pour permettre à la zone de conversation de s'étendre verticalement
size_policy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)
self.conversation_text.setSizePolicy(size_policy)
# Définir la fenêtre principale
self.setWindowTitle('Chatbot Interface')
self.setGeometry(100, 100, 800, 600)
# Appliquer les styles
self.setStyleSheet("""
QWidget {
background-color: #282c34; /* Couleur principale de fond pour l'application */
color: #abb2bf; /* Couleur du texte principal */
}
QTextEdit, QLineEdit {
background-color: #2c313a; /* Couleur de fond pour la zone de texte et d'entrée utilisateur */
color: #abb2bf; /* Couleur du texte dans la zone de texte et d'entrée utilisateur */
border-radius: 10px; /* Coins arrondis */
}
QPushButton {
background-color: #61afef; /* Couleur de fond pour le bouton Envoyer */
color: #282c34; /* Couleur du texte sur le bouton Envoyer */
border-radius: 10px; /* Coins arrondis */
}
""")
self.user_input_entry.returnPressed.connect(self.send_message)
self.history_list_widget.itemClicked.connect(self.history_item_clicked)
def send_message(self):
user_command = self.user_input_entry.text()
if len(user_command) > 0:
self.conversation_text.clear()
self.conversation_text.append(f"Commande utilisateur: {user_command}")
best_answers = get_best_answers(user_command, self.text_lines, self.vectorizer, self.vectorial_base)
flant5_summarized_text = "La réponse formulée par flan-T5 (1024) : \n" + flant5_summarizer(
" ".join(best_answers))
chatbot_response = flant5_summarized_text + \
"\n\n Les documents utilisés pour formuler cette réponse sont : \n\n\n"
for i, answer in enumerate(best_answers, start=1):
chatbot_response += (f"{i}. {answer.strip()}\n\n")
self.conversation_text.append(chatbot_response)
# Ajouter la commande à l'historique
user_command1 = extract_keywords_textblob_french(user_command)
self.command_history.append(user_command1)
self.dico[user_command1] = chatbot_response
# Mettre à jour la liste d'historique
self.update_history_list()
self.user_input_entry.clear()
else:
pass
def set_logo(self, image_path):
pixmap = QPixmap(image_path)
self.logo_label.setPixmap(pixmap)
self.logo_label.setScaledContents(True)
def update_history_list(self):
self.history_list_widget.clear()
for command in self.command_history:
self.history_list_widget.addStyledItem(command)
def history_item_clicked(self, item):
self.conversation_text.clear()
# Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué
selected_index = self.history_list_widget.row(item)
if selected_index < len(self.command_history):
selected_command = self.command_history[selected_index]
self.conversation_text.append(f"Commande utilisateur: {selected_command}")
# Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application)
chatbot_response = f"Réponse du chatbot pour la commande: {self.dico[selected_command]}"
self.conversation_text.append(chatbot_response)
if __name__ == '__main__':
app = QApplication(sys.argv)
chatbot_app = ChatbotInterface()
# Centrer la fenêtre
screen = app.primaryScreen()
center_point = screen.availableGeometry().center()
chatbot_app.move(center_point - chatbot_app.rect().center())
# Ajuster la taille de la fenêtre
new_width = screen.availableGeometry().width() // 2
chatbot_app.resize(new_width, screen.availableGeometry().height() * round(7 / 8))
chatbot_app.show()
sys.exit(app.exec_())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment