From db2fb3d56fa071320af1457e4fdc0abbc0b1ef01 Mon Sep 17 00:00:00 2001 From: Loris <loris.duperret@etu.ec-lyon.fr> Date: Fri, 20 Oct 2023 17:45:01 +0200 Subject: [PATCH] fin premiere seance --- .gitignore | 1 + .idea/.gitignore | 8 ++++ .idea/BE1_IA.iml | 11 ++++++ .idea/misc.xml | 9 +++++ .idea/modules.xml | 8 ++++ .idea/vcs.xml | 6 +++ knn.py | 76 +++++++++++++++++++++++++++++++++++++ main.py.txt => main.py | 0 read_cifar.py | 85 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 204 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/BE1_IA.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 knn.py rename main.py.txt => main.py (100%) create mode 100644 read_cifar.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6320cd2 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/BE1_IA.iml b/.idea/BE1_IA.iml new file mode 100644 index 0000000..2cdb1e3 --- /dev/null +++ b/.idea/BE1_IA.iml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="JAVA_MODULE" version="4"> + <component name="NewModuleRootManager" inherit-compiler-output="true"> + <exclude-output /> + <content url="file://$MODULE_DIR$"> + <excludeFolder url="file://$MODULE_DIR$/venv" /> + </content> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> +</module> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fc2988f --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="Black"> + <option name="sdkName" value="Python 3.9 (BE1_IA)" /> + </component> + <component name="ProjectRootManager" version="2" languageLevel="JDK_21" project-jdk-name="Python 3.9 (BE1_IA)" project-jdk-type="Python SDK"> + <output url="file://$PROJECT_DIR$/out" /> + </component> +</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..43fcbf0 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/BE1_IA.iml" filepath="$PROJECT_DIR$/.idea/BE1_IA.iml" /> + </modules> + </component> +</project> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="" vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/knn.py b/knn.py new file mode 100644 index 0000000..ff2624e --- /dev/null +++ b/knn.py @@ -0,0 +1,76 @@ +import numpy as np +from sklearn.metrics import accuracy_score +import matplotlib.pyplot as plt + +def distance_matrix(matrix1, matrix2): + # Calculate the squared norms of each row in the input matrices + norms1 = np.sum(matrix1**2, axis=1, keepdims=True) + norms2 = np.sum(matrix2**2, axis=1, keepdims=True) + + # Compute the dot product between the matrices + dot_product = np.dot(matrix1, matrix2.T) + + # Calculate the L2 Euclidean distance using the hint formula + dists = np.sqrt(norms1 - 2 * dot_product + norms2.T) + + return dists + +def knn_predict(dists, labels_train, k): + # Number of test samples + num_test_samples = dists.shape[0] + + # Initialize an array to store the predicted labels + predicted_labels = np.zeros(num_test_samples, dtype=labels_train.dtype) + + for i in range(num_test_samples): + # Get the distances for the current test sample + distances = dists[i] + + # Find the indices of the k nearest neighbors + k_nearest_indices = np.argsort(distances)[:k] + + # Get the labels of the k nearest neighbors + k_nearest_labels = labels_train[k_nearest_indices] + + # Use np.bincount to count the occurrences of each label + # and choose the label with the highest count + predicted_label = np.argmax(np.bincount(k_nearest_labels)) + + # Assign the predicted label to the current test sample + predicted_labels[i] = predicted_label + + return predicted_labels + + + +def evaluate_knn(data_train, labels_train, data_test, labels_test, k): + # Use the previously defined knn_predict function to get predictions + predicted_labels = knn_predict(distance_matrix(data_test, data_train), labels_train, k) + + # Calculate the accuracy by comparing predicted labels to actual labels + accuracy = accuracy_score(labels_test, predicted_labels) + + return accuracy + +split_factor = 0.9 +k_values = range(1, 21) +accuracies = [] + +for k in k_values: + accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k) + accuracies.append(accuracy) + +# Create the plot +plt.figure(figsize=(8, 6)) +plt.plot(k_values, accuracies, marker='o') +plt.title('KNN Accuracy vs. k') +plt.xlabel('k') +plt.ylabel('Accuracy') +plt.grid(True) + +# Save the plot as "knn.png" in the "results" directory +plt.savefig('results/knn.png') + +# Show the plot (optional) +plt.show() + diff --git a/main.py.txt b/main.py similarity index 100% rename from main.py.txt rename to main.py diff --git a/read_cifar.py b/read_cifar.py new file mode 100644 index 0000000..73fe59a --- /dev/null +++ b/read_cifar.py @@ -0,0 +1,85 @@ +import numpy as np +import pickle +import os + +def read_cifar_batch(batch_path): + with open(batch_path, 'rb') as file: + # Load the batch data + batch_data = pickle.load(file, encoding='bytes') + + # Extract data and labels from the batch + data = batch_data[b'data'] # CIFAR-10 data + labels = batch_data[b'labels'] # Class labels + + # Convert data and labels to the desired data types + data = np.array(data, dtype=np.float32) + labels = np.array(labels, dtype=np.int64) + + return data, labels + + +def read_cifar(directory_path): + data_batches = [] + label_batches = [] + + # Iterate through the batch files in the directory + for batch_file in ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']: + batch_path = os.path.join(directory_path, batch_file) + + with open(batch_path, 'rb') as file: + # Load the batch data + batch_data = pickle.load(file, encoding='bytes') + + # Extract data and labels from the batch + data = batch_data[b'data'] # CIFAR-10 data + labels = batch_data[b'labels'] # Class labels + + data_batches.append(data) + label_batches.extend(labels) + + # Combine all batches into a single data matrix and label vector + data = np.concatenate(data_batches, axis=0) + labels = np.array(label_batches, dtype=np.int64) + + # Convert data to the desired data type + data = data.astype(np.float32) + + return data, labels + +def split_dataset(data, labels, split): + # Check if the split parameter is within the valid range (0 to 1) + if split < 0 or split > 1: + raise ValueError("Split must be a float between 0 and 1.") + + # Get the number of samples in the dataset + num_samples = len(data) + + # Calculate the number of samples for training and testing + num_train_samples = int(num_samples * split) + num_test_samples = num_samples - num_train_samples + + # Create a random shuffle order for the indices + shuffle_indices = np.random.permutation(num_samples) + + # Use the shuffled indices to split the data and labels + data_train = data[shuffle_indices[:num_train_samples]] + labels_train = labels[shuffle_indices[:num_train_samples]] + data_test = data[shuffle_indices[num_train_samples:]] + labels_test = labels[shuffle_indices[num_train_samples:]] + + return data_train, labels_train, data_test, labels_test + + + + + + + + +if __name__ == '__main__': + batch_path = "data/cifar-10-python\cifar-10-batches-py\data_batch_1" # Update with your path + data, labels = read_cifar_batch(batch_path) + print("Data shape:", data.shape) + print("Labels shape:", labels.shape) + + -- GitLab