From 98a6d660d88ae4c1b9c863404a9a0e2d2d9c97f7 Mon Sep 17 00:00:00 2001
From: amau345 <amaury.tiravy@ecl20.ec-lyon.fr>
Date: Wed, 31 Jan 2024 17:23:25 +0100
Subject: [PATCH] add_score

---
 test_combine.py | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/test_combine.py b/test_combine.py
index 1a829df..8aea1c2 100644
--- a/test_combine.py
+++ b/test_combine.py
@@ -1,31 +1,3 @@
-def calculate_combined_score(tfidf_score, jaccard_score):
-    # You can adjust the weights based on the importance of each score
-    return 0.7 * tfidf_score + 0.3 * jaccard_score
-
-def get_best_answers(question, text_lines, vectorizer, vectorial_base):
-    
-    question_vector = vectorizer.transform([question]).toarray()
-
-    # Calculate cosine similarity between the question and each text line
-    similarities = cosine_similarity(question_vector, vectorial_base).flatten()
-
-    jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
-
-    # Calculate TF-IDF score for each text line
-    tfidf_scores = [vectorizer.transform([text]).toarray() for text in text_lines]
-
-    # Calculate combined scores using both TF-IDF and cosine similarity
-    combined_scores = [calculate_combined_score(tfidf_score, jaccard_score) 
-                       for tfidf_score, jaccard_score in zip(tfidf_scores, jaccard_similarities)]
-
-    # Get the indices of the top 3 most similar text lines based on the combined scores
-    top_indices = np.argsort(combined_scores)[-3:][::-1]
-    
-    # Retrieve the corresponding text lines along with their combined scores
-    best_answers = [text_lines[i]+"\n" for i in top_indices]
-
-    return best_answers
-
 import sys
 from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, QSizePolicy, QListWidget, QListWidgetItem, QLabel
 from PyQt5.QtCore import Qt
@@ -64,6 +36,9 @@ def extract_keywords_french(sentence):
     keywords = [word for word in words if word.lower() not in stop_words]
     return ' '.join(keywords)
 
+def calculate_combined_score(tfidf_score, jaccard_score):
+    # You can adjust the weights based on the importance of each score
+    return 0.6 * tfidf_score + 0.4 * jaccard_score
 
 def create_vectorial_base(text_lines, min_chars=10):
     """
@@ -124,7 +99,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base):
     # Get the indices of the top 3 most similar text lines
     top_indices = np.argsort(combined_scores)[-3:][::-1]
     # Retrieve the corresponding text lines
-    best_answers = [text_lines[i]+"\n" for i in top_indices]
+    best_answers = [text_lines[i]+"\n"+"score TFIDF : "+str(similarities[i])+" score jacard : "+str(jaccard_similarities[i])+"\n" for i in top_indices]
 
     return best_answers
 
-- 
GitLab