Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
chatbot
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
option3a
chatbot
Commits
7daa37dd
Commit
7daa37dd
authored
Mar 18, 2024
by
Tiravy Amaury
Browse files
Options
Downloads
Patches
Plain Diff
add_doc2vect
parent
f395eb54
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
test_doc2vect.py
+335
-0
335 additions, 0 deletions
test_doc2vect.py
with
335 additions
and
0 deletions
test_doc2vect.py
0 → 100644
+
335
−
0
View file @
7daa37dd
import
sys
from
PyQt5.QtWidgets
import
QApplication
,
QWidget
,
QHBoxLayout
,
QVBoxLayout
,
QTextEdit
,
QLineEdit
,
QPushButton
,
QSizePolicy
,
QListWidget
,
QListWidgetItem
,
QLabel
from
PyQt5.QtCore
import
Qt
from
PyQt5.QtGui
import
QPalette
,
QColor
,
QFont
,
QIcon
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.metrics.pairwise
import
cosine_similarity
import
numpy
as
np
from
nltk.corpus
import
stopwords
from
nltk.tokenize
import
word_tokenize
from
unidecode
import
unidecode
from
gensim.models.doc2vec
import
Doc2Vec
,
TaggedDocument
def
read_text_file
(
file_path
):
"""
Reads the content of a text file specified by `file_path` and splits it into paragraphs based on double line breaks (`
'
\n\n
'
`).
Parameters:
- file_path (str): The path to the text file.
Returns:
- list: A list of non-empty paragraphs from the file.
"""
with
open
(
file_path
,
'
r
'
,
encoding
=
'
utf-8
'
)
as
file
:
content
=
file
.
read
().
split
(
'
\n\n
'
)
content1
=
[
unidecode
(
item
)
for
item
in
content
if
item
!=
""
]
return
content1
def
extract_keywords_french
(
sentence
):
"""
Tokenizes and filters a given sentence to extract keywords in French. Removes stop words and focuses on meaningful terms.
Parameters:
- sentence (str): The input sentence.
Returns:
- str: A string containing the extracted keywords.
"""
stop_words
=
set
(
stopwords
.
words
(
'
french
'
))
mots_questions
=
[
'
qui
'
,
'
quoi
'
,
'
où
'
,
'
quand
'
,
'
pourquoi
'
,
'
comment
'
,
'
quel
'
,
'
quelle
'
,
'
quels
'
,
'
quelles
'
,
'
est-ce que
'
,
'
y a-t-il
'
,
'
peut-on
'
,
'
sont-ils
'
,
'
sont-elles
'
,
'
combien
'
,
'
lequel
'
,
'
laquelle
'
,
'
lesquels
'
,
'
lesquelles
'
,
'
est-ce
'
,
'
n
\'
est-ce pas
'
,
'
savoir
'
,
'
pouvez-vous
'
,
'
êtes-vous
'
,
'
avez-vous
'
,
'
dois-je
'
,
'
quelqu
\'
un
'
,
'
quelque chose
'
]
stop_words
=
stop_words
.
union
(
mots_questions
)
words
=
word_tokenize
(
sentence
,
language
=
'
french
'
)
keywords
=
[
word
for
word
in
words
if
word
.
lower
()
not
in
stop_words
]
return
'
'
.
join
(
keywords
)
def
calculate_combined_score
(
tfidf_score
,
jaccard_score
):
# You can adjust the weights based on the importance of each score
return
1
*
tfidf_score
+
0
*
jaccard_score
def
create_vectorial_base
(
text_lines
,
min_chars
=
10
):
"""
Creates a TF-IDF vectorial base from a list of text lines.
Parameters:
- text_lines (list): List of text lines.
- min_chars (int): Minimum number of characters required for a line to be included (default is 10).
Returns:
- tuple: A tuple containing the TF-IDF vectorizer, the TF-IDF matrix (vectorial base), and the feature names.
"""
filtered_lines
=
[
line
for
line
in
text_lines
if
len
(
line
)
>=
min_chars
]
if
not
filtered_lines
:
print
(
"
No lines with at least 10 characters found.
"
)
return
None
,
None
,
None
tagged_data
=
[
TaggedDocument
(
words
=
line
.
split
(),
tags
=
[
str
(
i
)])
for
i
,
line
in
enumerate
(
filtered_lines
)]
# Train Doc2Vec model
model
=
Doc2Vec
(
vector_size
=
100
,
window
=
5
,
min_count
=
1
,
workers
=
4
,
epochs
=
20
)
model
.
build_vocab
(
tagged_data
)
model
.
train
(
tagged_data
,
total_examples
=
model
.
corpus_count
,
epochs
=
model
.
epochs
)
# Generate vectors
vectorial_base
=
[
model
.
infer_vector
(
doc
.
words
)
for
doc
in
tagged_data
]
tag_labels
=
[
doc
.
tags
[
0
]
for
doc
in
tagged_data
]
#vectorizer = TfidfVectorizer() #a tester en option : stop_words=list(stopwords.words('french'))
#vectorial_base = vectorizer.fit_transform(filtered_lines).toarray()
#feature_names = vectorizer.get_feature_names_out()
return
model
,
vectorial_base
,
tag_labels
def
jaccard_similarity
(
str1
,
str2
):
tokens_str1
=
set
(
word_tokenize
(
str1
.
lower
()))
tokens_str2
=
set
(
word_tokenize
(
str2
.
lower
()))
stop_words
=
set
(
stopwords
.
words
(
'
french
'
))
tokens_str1
=
tokens_str1
-
stop_words
tokens_str2
=
tokens_str2
-
stop_words
intersection
=
len
(
tokens_str1
.
intersection
(
tokens_str2
))
union
=
len
(
tokens_str1
)
+
len
(
tokens_str2
)
-
intersection
similarity
=
intersection
/
union
if
union
!=
0
else
0.0
return
similarity
def
get_best_answers
(
question
,
text_lines
,
model
,
vectorial_base
):
"""
Retrieves the top 3 most similar text lines to a given question based on cosine similarity.
Parameters:
- question (str): The user
'
s question.
- text_lines (list): List of text lines.
- vectorizer: The TF-IDF vectorizer.
- vectorial_base: The TF-IDF matrix (vectorial base).
Returns:
- list: A list of the top 3 most similar text lines as answers.
"""
# Infer vector for the question using the trained Doc2Vec model
question_vector
=
model
.
infer_vector
(
question
.
split
())
# Convert the list of document vectors into numpy array
vectorial_base
=
np
.
array
(
vectorial_base
)
# Calculate cosine similarity between the question and each document vector
similarities
=
cosine_similarity
(
question_vector
.
reshape
(
1
,
-
1
),
vectorial_base
).
flatten
()
jaccard_similarities
=
[
jaccard_similarity
(
question
,
text
)
for
text
in
text_lines
]
combined_scores
=
[
calculate_combined_score
(
similarities
,
jaccard_score
)
for
similarities
,
jaccard_score
in
zip
(
similarities
,
jaccard_similarities
)]
# Get the indices of the top 3 most similar text lines
top_indices
=
np
.
argsort
(
combined_scores
)[
-
3
:][::
-
1
]
# Retrieve the corresponding text lines
best_answers
=
[
text_lines
[
i
]
+
"
\n
"
+
"
score doc2vect :
"
+
str
(
similarities
[
i
])
+
"
score jacard :
"
+
str
(
jaccard_similarities
[
i
])
+
"
\n
"
for
i
in
top_indices
]
return
best_answers
class
WrappingLabel
(
QLabel
):
"""
Subclass of QLabel with word wrapping enabled. Used for displaying text in the GUI.
"""
def
__init__
(
self
,
text
=
''
,
parent
=
None
):
super
(
WrappingLabel
,
self
).
__init__
(
text
,
parent
)
self
.
setWordWrap
(
True
)
class
StyledListWidgetItem
(
QListWidgetItem
):
"""
Subclass of QListWidgetItem with custom styling for the chat history list.
"""
def
__init__
(
self
,
text
=
''
,
parent
=
None
):
super
(
StyledListWidgetItem
,
self
).
__init__
(
parent
)
self
.
setText
(
text
)
def
initStyle
(
self
):
palette
=
QPalette
()
palette
.
setColor
(
QPalette
.
Highlight
,
QColor
(
"
#4b5261
"
))
# Couleur de fond pour l'élément sélectionné dans la liste d'historique
palette
.
setColor
(
QPalette
.
HighlightedText
,
QColor
(
"
#ff0000
"
))
# Couleur du texte pour l'élément sélectionné dans la liste d'historique
self
.
setData
(
Qt
.
UserRole
,
palette
)
class
StyledListWidget
(
QListWidget
):
"""
Subclass of QListWidget with custom styling for the chat history list.
"""
def
__init__
(
self
,
parent
=
None
):
super
(
StyledListWidget
,
self
).
__init__
(
parent
)
self
.
setAlternatingRowColors
(
False
)
self
.
setStyleSheet
(
"""
QListWidget {
background-color: #282c34; /* Couleur de fond pour la liste d
'
historique */
color: #abb2bf; /* Couleur du texte dans la liste d
'
historique */
border-radius: 10px; /* Coins arrondis */
}
"""
)
def
addStyledItem
(
self
,
text
):
"""
Adds a styled item to the list widget.
Parameters:
- text (str): The text to be added to the list.
"""
item
=
StyledListWidgetItem
(
text
)
item
.
initStyle
()
self
.
addItem
(
item
)
class
ChatbotInterface
(
QWidget
):
"""
Main class representing the chatbot interface. Initializes the UI and handles user interactions.
"""
def
__init__
(
self
):
super
().
__init__
()
file_path
=
'
reglementdescolarite-ingegeneraliste2324-1.docx.txt
'
self
.
text_lines
=
read_text_file
(
file_path
)
if
not
self
.
text_lines
:
print
(
"
The file is empty or doesn
'
t exist.
"
)
return
self
.
model
,
self
.
vectorial_base
,
_
=
create_vectorial_base
(
self
.
text_lines
)
self
.
init_ui
()
self
.
command_history
=
[]
# Pour stocker l'historique des commandes
self
.
dico
=
{}
self
.
dico2
=
{}
def
init_ui
(
self
):
"""
Initializes the user interface.
"""
# Créer des widgets
self
.
conversation_text
=
QTextEdit
(
self
)
self
.
conversation_text
.
setFont
(
QFont
(
"
consolas
"
,
9
))
self
.
conversation_text
.
setReadOnly
(
True
)
self
.
user_input_entry
=
QLineEdit
(
self
)
self
.
user_input_entry
.
setPlaceholderText
(
"
Saisissez votre message...
"
)
self
.
user_input_entry
.
setMinimumHeight
(
40
)
self
.
send_button
=
QPushButton
(
"
Envoyer
"
,
self
)
self
.
send_button
.
setMinimumSize
(
self
.
user_input_entry
.
width
(),
30
)
# Ajustez selon vos besoins
self
.
send_button
.
setMaximumSize
(
200
,
60
)
self
.
send_button
.
clicked
.
connect
(
self
.
send_message
)
# Historique à droite
self
.
history_list_widget
=
StyledListWidget
(
self
)
self
.
history_list_widget
.
itemClicked
.
connect
(
self
.
history_item_clicked
)
self
.
history_list_widget
.
setFixedWidth
(
200
)
# Ajuster la largeur selon vos besoins
# Configurer la mise en page
layout
=
QVBoxLayout
(
self
)
h_layout
=
QHBoxLayout
()
# Widgets à gauche
left_layout
=
QVBoxLayout
()
left_layout
.
addWidget
(
self
.
conversation_text
)
left_layout
.
addWidget
(
self
.
user_input_entry
)
# Ajouter le bouton "Envoyer" avec une taille réduite
self
.
send_button
.
setMaximumWidth
(
self
.
send_button
.
width
()
//
3
)
left_layout
.
addWidget
(
self
.
send_button
,
alignment
=
Qt
.
AlignRight
)
h_layout
.
addLayout
(
left_layout
)
# Historique à droite
h_layout
.
addWidget
(
self
.
history_list_widget
)
layout
.
addLayout
(
h_layout
)
# Configurer la politique de taille pour permettre à la zone de conversation de s'étendre verticalement
size_policy
=
QSizePolicy
(
QSizePolicy
.
Preferred
,
QSizePolicy
.
Expanding
)
self
.
conversation_text
.
setSizePolicy
(
size_policy
)
# Définir la fenêtre principale
icon
=
QIcon
(
"
chatbot.png
"
)
self
.
setWindowIcon
(
icon
)
self
.
setWindowTitle
(
'
chatbot
'
)
self
.
setGeometry
(
100
,
100
,
800
,
600
)
# Appliquer les styles
self
.
setStyleSheet
(
"""
QWidget {
background-color: #282c34; /* Couleur principale de fond pour l
'
application */
color: #abb2bf; /* Couleur du texte principal */
}
QTextEdit, QLineEdit {
background-color: #2c313a; /* Couleur de fond pour la zone de texte et d
'
entrée utilisateur */
color: #abb2bf; /* Couleur du texte dans la zone de texte et d
'
entrée utilisateur */
border-radius: 10px; /* Coins arrondis */
}
QPushButton {
background-color: #61afef; /* Couleur de fond pour le bouton Envoyer */
color: #282c34; /* Couleur du texte sur le bouton Envoyer */
border-radius: 10px; /* Coins arrondis */
}
"""
)
self
.
user_input_entry
.
returnPressed
.
connect
(
self
.
send_message
)
self
.
history_list_widget
.
itemClicked
.
connect
(
self
.
history_item_clicked
)
def
send_message
(
self
):
"""
Handles the user
'
s input, processes it, and displays the chatbot
'
s response.
"""
user_command
=
self
.
user_input_entry
.
text
()
user_command
=
unidecode
(
user_command
)
if
len
(
user_command
)
>
0
:
self
.
conversation_text
.
clear
()
self
.
conversation_text
.
append
(
f
"
demande élève:
{
user_command
}
"
)
self
.
conversation_text
.
append
(
"
Réponse du chatbot pour la demande:
"
)
best_answers
=
get_best_answers
(
user_command
,
self
.
text_lines
,
self
.
model
,
self
.
vectorial_base
)
chatbot_response
=
""
for
i
,
answer
in
enumerate
(
best_answers
,
start
=
1
):
chatbot_response
+=
(
f
"
{
i
}
.
{
answer
.
strip
()
}
\n\n
"
)
self
.
conversation_text
.
append
(
chatbot_response
)
# Ajouter la commande à l'historique
user_command1
=
extract_keywords_french
(
user_command
)
self
.
command_history
.
append
(
user_command1
)
self
.
dico2
[
user_command1
]
=
user_command
self
.
dico
[
user_command1
]
=
chatbot_response
# Mettre à jour la liste d'historique
self
.
update_history_list
()
self
.
user_input_entry
.
clear
()
else
:
pass
def
update_history_list
(
self
):
"""
Updates the chat history list in the UI.
"""
self
.
history_list_widget
.
clear
()
for
command
in
self
.
command_history
:
self
.
history_list_widget
.
addStyledItem
(
command
)
def
history_item_clicked
(
self
,
item
):
"""
Displays the chat history when an item is clicked.
Parameters:
- item: The clicked item.
"""
self
.
conversation_text
.
clear
()
# Réafficher le contenu dans la conversation_text lorsque l'élément de l'historique est cliqué
selected_index
=
self
.
history_list_widget
.
row
(
item
)
if
selected_index
<
len
(
self
.
command_history
):
selected_command
=
self
.
command_history
[
selected_index
]
self
.
conversation_text
.
append
(
f
"
demande élève:
{
self
.
dico2
[
selected_command
]
}
"
)
# Traiter la commande et obtenir la réponse du chatbot (vous devrez ajuster cela en fonction de votre application)
chatbot_response
=
f
"
Réponse du chatbot pour la demande:
\n
{
self
.
dico
[
selected_command
]
}
"
self
.
conversation_text
.
append
(
chatbot_response
)
if
__name__
==
'
__main__
'
:
app
=
QApplication
(
sys
.
argv
)
chatbot_app
=
ChatbotInterface
()
screen
=
app
.
primaryScreen
()
# Ajuster la taille de la fenêtre
new_width
=
screen
.
availableGeometry
().
width
()
*
3
//
5
chatbot_app
.
resize
(
new_width
,
int
(
screen
.
availableGeometry
().
height
()
-
48
))
# Centrer la fenêtre
center_point
=
screen
.
availableGeometry
().
center
().
x
()
-
chatbot_app
.
rect
().
center
().
x
()
chatbot_app
.
move
(
center_point
,
0
)
chatbot_app
.
show
()
sys
.
exit
(
app
.
exec_
())
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment