Skip to content
Snippets Groups Projects
Commit 98a6d660 authored by Tiravy Amaury's avatar Tiravy Amaury
Browse files

add_score

parent c3b642e5
No related branches found
No related tags found
1 merge request!6Test amau
def calculate_combined_score(tfidf_score, jaccard_score):
# You can adjust the weights based on the importance of each score
return 0.7 * tfidf_score + 0.3 * jaccard_score
def get_best_answers(question, text_lines, vectorizer, vectorial_base):
question_vector = vectorizer.transform([question]).toarray()
# Calculate cosine similarity between the question and each text line
similarities = cosine_similarity(question_vector, vectorial_base).flatten()
jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
# Calculate TF-IDF score for each text line
tfidf_scores = [vectorizer.transform([text]).toarray() for text in text_lines]
# Calculate combined scores using both TF-IDF and cosine similarity
combined_scores = [calculate_combined_score(tfidf_score, jaccard_score)
for tfidf_score, jaccard_score in zip(tfidf_scores, jaccard_similarities)]
# Get the indices of the top 3 most similar text lines based on the combined scores
top_indices = np.argsort(combined_scores)[-3:][::-1]
# Retrieve the corresponding text lines along with their combined scores
best_answers = [text_lines[i]+"\n" for i in top_indices]
return best_answers
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, QSizePolicy, QListWidget, QListWidgetItem, QLabel
from PyQt5.QtCore import Qt
......@@ -64,6 +36,9 @@ def extract_keywords_french(sentence):
keywords = [word for word in words if word.lower() not in stop_words]
return ' '.join(keywords)
def calculate_combined_score(tfidf_score, jaccard_score):
# You can adjust the weights based on the importance of each score
return 0.6 * tfidf_score + 0.4 * jaccard_score
def create_vectorial_base(text_lines, min_chars=10):
"""
......@@ -124,7 +99,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base):
# Get the indices of the top 3 most similar text lines
top_indices = np.argsort(combined_scores)[-3:][::-1]
# Retrieve the corresponding text lines
best_answers = [text_lines[i]+"\n" for i in top_indices]
best_answers = [text_lines[i]+"\n"+"score TFIDF : "+str(similarities[i])+" score jacard : "+str(jaccard_similarities[i])+"\n" for i in top_indices]
return best_answers
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment