Skip to content
Snippets Groups Projects
Commit db2fb3d5 authored by Duperret Loris's avatar Duperret Loris
Browse files

fin premiere seance

parent 450e4145
No related branches found
No related tags found
1 merge request!2fin premiere seance
data
\ No newline at end of file
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.9 (BE1_IA)" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" project-jdk-name="Python 3.9 (BE1_IA)" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/BE1_IA.iml" filepath="$PROJECT_DIR$/.idea/BE1_IA.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
knn.py 0 → 100644
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
def distance_matrix(matrix1, matrix2):
# Calculate the squared norms of each row in the input matrices
norms1 = np.sum(matrix1**2, axis=1, keepdims=True)
norms2 = np.sum(matrix2**2, axis=1, keepdims=True)
# Compute the dot product between the matrices
dot_product = np.dot(matrix1, matrix2.T)
# Calculate the L2 Euclidean distance using the hint formula
dists = np.sqrt(norms1 - 2 * dot_product + norms2.T)
return dists
def knn_predict(dists, labels_train, k):
# Number of test samples
num_test_samples = dists.shape[0]
# Initialize an array to store the predicted labels
predicted_labels = np.zeros(num_test_samples, dtype=labels_train.dtype)
for i in range(num_test_samples):
# Get the distances for the current test sample
distances = dists[i]
# Find the indices of the k nearest neighbors
k_nearest_indices = np.argsort(distances)[:k]
# Get the labels of the k nearest neighbors
k_nearest_labels = labels_train[k_nearest_indices]
# Use np.bincount to count the occurrences of each label
# and choose the label with the highest count
predicted_label = np.argmax(np.bincount(k_nearest_labels))
# Assign the predicted label to the current test sample
predicted_labels[i] = predicted_label
return predicted_labels
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
# Use the previously defined knn_predict function to get predictions
predicted_labels = knn_predict(distance_matrix(data_test, data_train), labels_train, k)
# Calculate the accuracy by comparing predicted labels to actual labels
accuracy = accuracy_score(labels_test, predicted_labels)
return accuracy
split_factor = 0.9
k_values = range(1, 21)
accuracies = []
for k in k_values:
accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
accuracies.append(accuracy)
# Create the plot
plt.figure(figsize=(8, 6))
plt.plot(k_values, accuracies, marker='o')
plt.title('KNN Accuracy vs. k')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.grid(True)
# Save the plot as "knn.png" in the "results" directory
plt.savefig('results/knn.png')
# Show the plot (optional)
plt.show()
File moved
import numpy as np
import pickle
import os
def read_cifar_batch(batch_path):
with open(batch_path, 'rb') as file:
# Load the batch data
batch_data = pickle.load(file, encoding='bytes')
# Extract data and labels from the batch
data = batch_data[b'data'] # CIFAR-10 data
labels = batch_data[b'labels'] # Class labels
# Convert data and labels to the desired data types
data = np.array(data, dtype=np.float32)
labels = np.array(labels, dtype=np.int64)
return data, labels
def read_cifar(directory_path):
data_batches = []
label_batches = []
# Iterate through the batch files in the directory
for batch_file in ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']:
batch_path = os.path.join(directory_path, batch_file)
with open(batch_path, 'rb') as file:
# Load the batch data
batch_data = pickle.load(file, encoding='bytes')
# Extract data and labels from the batch
data = batch_data[b'data'] # CIFAR-10 data
labels = batch_data[b'labels'] # Class labels
data_batches.append(data)
label_batches.extend(labels)
# Combine all batches into a single data matrix and label vector
data = np.concatenate(data_batches, axis=0)
labels = np.array(label_batches, dtype=np.int64)
# Convert data to the desired data type
data = data.astype(np.float32)
return data, labels
def split_dataset(data, labels, split):
# Check if the split parameter is within the valid range (0 to 1)
if split < 0 or split > 1:
raise ValueError("Split must be a float between 0 and 1.")
# Get the number of samples in the dataset
num_samples = len(data)
# Calculate the number of samples for training and testing
num_train_samples = int(num_samples * split)
num_test_samples = num_samples - num_train_samples
# Create a random shuffle order for the indices
shuffle_indices = np.random.permutation(num_samples)
# Use the shuffled indices to split the data and labels
data_train = data[shuffle_indices[:num_train_samples]]
labels_train = labels[shuffle_indices[:num_train_samples]]
data_test = data[shuffle_indices[num_train_samples:]]
labels_test = labels[shuffle_indices[num_train_samples:]]
return data_train, labels_train, data_test, labels_test
if __name__ == '__main__':
batch_path = "data/cifar-10-python\cifar-10-batches-py\data_batch_1" # Update with your path
data, labels = read_cifar_batch(batch_path)
print("Data shape:", data.shape)
print("Labels shape:", labels.shape)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment