Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
chatbot
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
option3a
chatbot
Commits
98a6d660
Commit
98a6d660
authored
Jan 31, 2024
by
Tiravy Amaury
Browse files
Options
Downloads
Patches
Plain Diff
add_score
parent
c3b642e5
No related branches found
No related tags found
1 merge request
!6
Test amau
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
test_combine.py
+4
-29
4 additions, 29 deletions
test_combine.py
with
4 additions
and
29 deletions
test_combine.py
+
4
−
29
View file @
98a6d660
def
calculate_combined_score
(
tfidf_score
,
jaccard_score
):
# You can adjust the weights based on the importance of each score
return
0.7
*
tfidf_score
+
0.3
*
jaccard_score
def
get_best_answers
(
question
,
text_lines
,
vectorizer
,
vectorial_base
):
question_vector
=
vectorizer
.
transform
([
question
]).
toarray
()
# Calculate cosine similarity between the question and each text line
similarities
=
cosine_similarity
(
question_vector
,
vectorial_base
).
flatten
()
jaccard_similarities
=
[
jaccard_similarity
(
question
,
text
)
for
text
in
text_lines
]
# Calculate TF-IDF score for each text line
tfidf_scores
=
[
vectorizer
.
transform
([
text
]).
toarray
()
for
text
in
text_lines
]
# Calculate combined scores using both TF-IDF and cosine similarity
combined_scores
=
[
calculate_combined_score
(
tfidf_score
,
jaccard_score
)
for
tfidf_score
,
jaccard_score
in
zip
(
tfidf_scores
,
jaccard_similarities
)]
# Get the indices of the top 3 most similar text lines based on the combined scores
top_indices
=
np
.
argsort
(
combined_scores
)[
-
3
:][::
-
1
]
# Retrieve the corresponding text lines along with their combined scores
best_answers
=
[
text_lines
[
i
]
+
"
\n
"
for
i
in
top_indices
]
return
best_answers
import
sys
from
PyQt5.QtWidgets
import
QApplication
,
QWidget
,
QHBoxLayout
,
QVBoxLayout
,
QTextEdit
,
QLineEdit
,
QPushButton
,
QSizePolicy
,
QListWidget
,
QListWidgetItem
,
QLabel
from
PyQt5.QtCore
import
Qt
...
...
@@ -64,6 +36,9 @@ def extract_keywords_french(sentence):
keywords
=
[
word
for
word
in
words
if
word
.
lower
()
not
in
stop_words
]
return
'
'
.
join
(
keywords
)
def
calculate_combined_score
(
tfidf_score
,
jaccard_score
):
# You can adjust the weights based on the importance of each score
return
0.6
*
tfidf_score
+
0.4
*
jaccard_score
def
create_vectorial_base
(
text_lines
,
min_chars
=
10
):
"""
...
...
@@ -124,7 +99,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base):
# Get the indices of the top 3 most similar text lines
top_indices
=
np
.
argsort
(
combined_scores
)[
-
3
:][::
-
1
]
# Retrieve the corresponding text lines
best_answers
=
[
text_lines
[
i
]
+
"
\n
"
for
i
in
top_indices
]
best_answers
=
[
text_lines
[
i
]
+
"
\n
"
+
"
score TFIDF :
"
+
str
(
similarities
[
i
])
+
"
score jacard :
"
+
str
(
jaccard_similarities
[
i
])
+
"
\n
"
for
i
in
top_indices
]
return
best_answers
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment