add_score

98a6d660 · Tiravy Amaury · c3b642e5 · 98a6d660
Commit 98a6d660 authored Jan 31, 2024 by Tiravy Amaury
--- a/test_combine.py
+++ b/test_combine.py
-def calculate_combined_score(tfidf_score, jaccard_score):
-    # You can adjust the weights based on the importance of each score
-    return 0.7 * tfidf_score + 0.3 * jaccard_score
-
-def get_best_answers(question, text_lines, vectorizer, vectorial_base):
-    
-    question_vector = vectorizer.transform([question]).toarray()
-
-    # Calculate cosine similarity between the question and each text line
-    similarities = cosine_similarity(question_vector, vectorial_base).flatten()
-
-    jaccard_similarities = [jaccard_similarity(question, text) for text in text_lines]
-
-    # Calculate TF-IDF score for each text line
-    tfidf_scores = [vectorizer.transform([text]).toarray() for text in text_lines]
-
-    # Calculate combined scores using both TF-IDF and cosine similarity
-    combined_scores = [calculate_combined_score(tfidf_score, jaccard_score) 
-                       for tfidf_score, jaccard_score in zip(tfidf_scores, jaccard_similarities)]
-
-    # Get the indices of the top 3 most similar text lines based on the combined scores
-    top_indices = np.argsort(combined_scores)[-3:][::-1]
-    
-    # Retrieve the corresponding text lines along with their combined scores
-    best_answers = [text_lines[i]+"\n" for i in top_indices]
-
-    return best_answers
-
 import sys
 from PyQt5.QtWidgets import QApplication, QWidget, QHBoxLayout, QVBoxLayout, QTextEdit, QLineEdit, QPushButton, QSizePolicy, QListWidget, QListWidgetItem, QLabel
 from PyQt5.QtCore import Qt
@@ -64,6 +36,9 @@ def extract_keywords_french(sentence):
    keywords = [word for word in words if word.lower() not in stop_words]
    return ' '.join(keywords)

+def calculate_combined_score(tfidf_score, jaccard_score):
+    # You can adjust the weights based on the importance of each score
+    return 0.6 * tfidf_score + 0.4 * jaccard_score

 def create_vectorial_base(text_lines, min_chars=10):
    """
@@ -124,7 +99,7 @@ def get_best_answers(question, text_lines, vectorizer, vectorial_base):
    # Get the indices of the top 3 most similar text lines
    top_indices = np.argsort(combined_scores)[-3:][::-1]
    # Retrieve the corresponding text lines
-    best_answers = [text_lines[i]+"\n" for i in top_indices]
+    best_answers = [text_lines[i]+"\n"+"score TFIDF : "+str(similarities[i])+" score jacard : "+str(jaccard_similarities[i])+"\n" for i in top_indices]

    return best_answers