fin premiere seance

db2fb3d5 · Duperret Loris · 450e4145 · db2fb3d5 · db2fb3d5 · db2fb3d5
Commit db2fb3d5 authored 1 year ago by Duperret Loris
--- a/.gitignore
+++ b/.gitignore
+data
\ No newline at end of file
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/BE1_IA.iml
+++ b/.idea/BE1_IA.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9 (BE1_IA)" />
+  </component>
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_21" project-jdk-name="Python 3.9 (BE1_IA)" project-jdk-type="Python SDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/BE1_IA.iml" filepath="$PROJECT_DIR$/.idea/BE1_IA.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/knn.py
+++ b/knn.py
+import numpy as np
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+
+def distance_matrix(matrix1, matrix2):
+    # Calculate the squared norms of each row in the input matrices
+    norms1 = np.sum(matrix1**2, axis=1, keepdims=True)
+    norms2 = np.sum(matrix2**2, axis=1, keepdims=True)
+
+    # Compute the dot product between the matrices
+    dot_product = np.dot(matrix1, matrix2.T)
+
+    # Calculate the L2 Euclidean distance using the hint formula
+    dists = np.sqrt(norms1 - 2 * dot_product + norms2.T)
+
+    return dists
+
+def knn_predict(dists, labels_train, k):
+    # Number of test samples
+    num_test_samples = dists.shape[0]
+
+    # Initialize an array to store the predicted labels
+    predicted_labels = np.zeros(num_test_samples, dtype=labels_train.dtype)
+
+    for i in range(num_test_samples):
+        # Get the distances for the current test sample
+        distances = dists[i]
+
+        # Find the indices of the k nearest neighbors
+        k_nearest_indices = np.argsort(distances)[:k]
+
+        # Get the labels of the k nearest neighbors
+        k_nearest_labels = labels_train[k_nearest_indices]
+
+        # Use np.bincount to count the occurrences of each label
+        # and choose the label with the highest count
+        predicted_label = np.argmax(np.bincount(k_nearest_labels))
+
+        # Assign the predicted label to the current test sample
+        predicted_labels[i] = predicted_label
+
+    return predicted_labels
+
+
+
+def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
+    # Use the previously defined knn_predict function to get predictions
+    predicted_labels = knn_predict(distance_matrix(data_test, data_train), labels_train, k)
+
+    # Calculate the accuracy by comparing predicted labels to actual labels
+    accuracy = accuracy_score(labels_test, predicted_labels)
+
+    return accuracy
+
+split_factor = 0.9
+k_values = range(1, 21)
+accuracies = []
+
+for k in k_values:
+    accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    accuracies.append(accuracy)
+
+# Create the plot
+plt.figure(figsize=(8, 6))
+plt.plot(k_values, accuracies, marker='o')
+plt.title('KNN Accuracy vs. k')
+plt.xlabel('k')
+plt.ylabel('Accuracy')
+plt.grid(True)
+
+# Save the plot as "knn.png" in the "results" directory
+plt.savefig('results/knn.png')
+
+# Show the plot (optional)
+plt.show()
+
--- a/main.py.txt
+++ b/main.py.txt
--- a/read_cifar.py
+++ b/read_cifar.py
+import numpy as np
+import pickle
+import os
+
+def read_cifar_batch(batch_path):
+    with open(batch_path, 'rb') as file:
+        # Load the batch data
+        batch_data = pickle.load(file, encoding='bytes')
+
+    # Extract data and labels from the batch
+    data = batch_data[b'data']  # CIFAR-10 data
+    labels = batch_data[b'labels']  # Class labels
+
+    # Convert data and labels to the desired data types
+    data = np.array(data, dtype=np.float32)
+    labels = np.array(labels, dtype=np.int64)
+
+    return data, labels
+
+
+def read_cifar(directory_path):
+    data_batches = []
+    label_batches = []
+
+    # Iterate through the batch files in the directory
+    for batch_file in ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']:
+        batch_path = os.path.join(directory_path, batch_file)
+
+        with open(batch_path, 'rb') as file:
+            # Load the batch data
+            batch_data = pickle.load(file, encoding='bytes')
+
+        # Extract data and labels from the batch
+        data = batch_data[b'data']  # CIFAR-10 data
+        labels = batch_data[b'labels']  # Class labels
+
+        data_batches.append(data)
+        label_batches.extend(labels)
+
+    # Combine all batches into a single data matrix and label vector
+    data = np.concatenate(data_batches, axis=0)
+    labels = np.array(label_batches, dtype=np.int64)
+
+    # Convert data to the desired data type
+    data = data.astype(np.float32)
+
+    return data, labels
+
+def split_dataset(data, labels, split):
+    # Check if the split parameter is within the valid range (0 to 1)
+    if split < 0 or split > 1:
+        raise ValueError("Split must be a float between 0 and 1.")
+
+    # Get the number of samples in the dataset
+    num_samples = len(data)
+
+    # Calculate the number of samples for training and testing
+    num_train_samples = int(num_samples * split)
+    num_test_samples = num_samples - num_train_samples
+
+    # Create a random shuffle order for the indices
+    shuffle_indices = np.random.permutation(num_samples)
+
+    # Use the shuffled indices to split the data and labels
+    data_train = data[shuffle_indices[:num_train_samples]]
+    labels_train = labels[shuffle_indices[:num_train_samples]]
+    data_test = data[shuffle_indices[num_train_samples:]]
+    labels_test = labels[shuffle_indices[num_train_samples:]]
+
+    return data_train, labels_train, data_test, labels_test
+
+
+
+
+
+
+
+
+if __name__ == '__main__':
+    batch_path = "data/cifar-10-python\cifar-10-batches-py\data_batch_1"  # Update with your path
+    data, labels = read_cifar_batch(batch_path)
+    print("Data shape:", data.shape)
+    print("Labels shape:", labels.shape)
+
+