Skip to content
Snippets Groups Projects
Commit f94ef1b7 authored by Elkhadri Doha's avatar Elkhadri Doha
Browse files

Upload New File

parent 756eda29
No related branches found
No related tags found
No related merge requests found
knn.py 0 → 100644
import numpy as np
import matplotlib.pyplot as plt
import os
def distance_matrix(A, B):
A_square = np.sum(np.square(A), axis=1)
B_square = np.sum(np.square(B), axis=1)
A_2 = A_square[:, None]
B_2 = B_square[None, :]
dists = np.sqrt(A_2 + B_2 - 2 * np.dot(A, B.T))
return dists
def knn_predict(dists, labels_train, k):
num_test = dists.shape[0]
predicted_labels = np.zeros(num_test, dtype=int)
for i in range(num_test):
# Find the indices of the k-nearest neighbors for the i-th test example
nearest_neighbor_indices = np.argsort(dists[i])[:k]
# Get the labels of the k-nearest neighbors
k_nearest_labels = labels_train[nearest_neighbor_indices]
# Count the occurrences of each label and select the most common one
unique_labels, counts = np.unique(k_nearest_labels, return_counts=True)
most_common_label = unique_labels[np.argmax(counts)]
predicted_labels[i] = most_common_label
return predicted_labels
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
# Compute the distance matrix
dists = distance_matrix(data_train, data_test)
# Predict labels for the test data using k-nearest neighbors
predicted_labels = knn_predict(dists, labels_train, k)
# Calculate accuracy
y_pred = knn_predict(dists, labels_train, k)
accuracy = np.mean(y_pred == labels_test)
return accuracy
def plot_accuracy_vs_k(data_train, labels_train, data_test, labels_test, split_factor=0.9):
k_values = list(range(1, 21))
accuracies = []
for k in k_values:
accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
accuracies.append(accuracy)
# Create the "results" directory if it doesn't exist
os.makedirs("results", exist_ok=True)
plt.plot(k_values, accuracies)
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. k for KNN')
plt.grid(True)
plt.savefig('results/knn.png')
plt.show()
if __name__ == "__main__":
# Load your data and split it into data_train, labels_train, data_test, and labels_test
data_train = np.random.rand(100, 2) # Replace with your actual data
labels_train = np.random.randint(0, 2, 100) # Replace with your actual labels
# Generate test data and labels with the same number of samples as data_train
data_test = np.random.rand(100, 2)
labels_test = np.random.randint(0, 2, 100)
plot_accuracy_vs_k(data_train, labels_train, data_test, labels_test)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment