Skip to content
Snippets Groups Projects
Commit 162fad1b authored by Duperret Loris's avatar Duperret Loris
Browse files

Ajout de fichiers

parent db2fb3d5
No related branches found
No related tags found
1 merge request!3Master
File added
File added
File added
...@@ -2,6 +2,7 @@ import numpy as np ...@@ -2,6 +2,7 @@ import numpy as np
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
def distance_matrix(matrix1, matrix2): def distance_matrix(matrix1, matrix2):
# Calculate the squared norms of each row in the input matrices # Calculate the squared norms of each row in the input matrices
norms1 = np.sum(matrix1**2, axis=1, keepdims=True) norms1 = np.sum(matrix1**2, axis=1, keepdims=True)
...@@ -52,25 +53,3 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k): ...@@ -52,25 +53,3 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
return accuracy return accuracy
split_factor = 0.9
k_values = range(1, 21)
accuracies = []
for k in k_values:
accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
accuracies.append(accuracy)
# Create the plot
plt.figure(figsize=(8, 6))
plt.plot(k_values, accuracies, marker='o')
plt.title('KNN Accuracy vs. k')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.grid(True)
# Save the plot as "knn.png" in the "results" directory
plt.savefig('results/knn.png')
# Show the plot (optional)
plt.show()
import read_cifar
import knn
import matplotlib.pyplot as plt
import mlp
split = 0.9
d_h=64
learning_rate=0.1
num_epochs=2
batch_path = "data/cifar-10-python\cifar-10-batches-py"
data, labels = read_cifar.read_cifar(batch_path)
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, split)
"""k_values = range(1, 21)
accuracies = []
for k in k_values:
accuracy = knn.evaluate_knn(data_train, labels_train, data_test, labels_test, k)
accuracies.append(accuracy)
plt.figure(figsize=(8, 6))
plt.plot(k_values, accuracies, marker='o')
plt.title('KNN Accuracy vs. k')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.grid(True)
# On enregistre le graphique dans Results
plt.savefig('results/knn.png')
plt.show()"""
train_accuracies,test_accuracy = mlp.run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs)
def plot_learning_accuracy(train_accuracies):
plt.figure()
plt.plot(range(1, len(train_accuracies) + 1), train_accuracies)
plt.xlabel("Epoch")
plt.ylabel("Training Accuracy")
plt.title("MLP Training Accuracy")
plt.savefig("results/mlp.png")
plot_learning_accuracy(train_accuracies)
\ No newline at end of file
mlp.py 0 → 100644
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
# Forward pass
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
predictions = a2
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
# Backpropagation
delta_a2 = 2 * (predictions - targets) / data.shape[0]
delta_z2 = delta_a2 * sigmoid_derivative(a2)
delta_a1 = np.matmul(delta_z2, w2.T)
delta_z1 = delta_a1 * sigmoid_derivative(a1)
# Update weights and biases
w2 -= learning_rate * np.matmul(a1.T, delta_z2)
b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
w1 -= learning_rate * np.matmul(a0.T, delta_z1)
b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
return w1, b1, w2, b2, loss
def one_hot(labels, num_classes):
one_hot_matrix = np.zeros((len(labels), num_classes))
one_hot_matrix[np.arange(len(labels)), labels] = 1
return one_hot_matrix
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
# Forward pass
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
predictions = a2
# Compute loss (cross-entropy)
m = len(labels_train)
one_hot_labels = one_hot(labels_train, num_classes=w2.shape[1])
loss = -1/m * np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions))
# Backpropagation
delta_z2 = a2 - one_hot_labels
delta_a1 = np.matmul(delta_z2, w2.T)
delta_z1 = delta_a1 * sigmoid_derivative(a1)
# Update weights and biases
w2 -= learning_rate * np.matmul(a1.T, delta_z2)
b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
w1 -= learning_rate * np.matmul(a0.T, delta_z1)
b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
return w1, b1, w2, b2, loss
def compute_accuracy(predictions, labels_train):
predicted_labels = np.argmax(predictions, axis=1)
correct = np.sum(predicted_labels == labels_train)
accuracy = correct / len(labels_train)
return accuracy
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs):
train_accuracies = []
for epoch in range(num_epochs):
for i in range(len(data_train)):
data = data_train[i:i+1]
labels = labels_train[i:i+1]
# Forward pass
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
predictions = a2
# Compute loss (cross-entropy)
one_hot_labels = one_hot(labels, num_classes=w2.shape[1])
loss = -np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions))
# Backpropagation
delta_z2 = a2 - one_hot_labels
delta_a1 = np.matmul(delta_z2, w2.T)
delta_z1 = delta_a1 * sigmoid_derivative(a1)
# Update weights and biases
w2 -= learning_rate * np.matmul(a1.T, delta_z2)
b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
w1 -= learning_rate * np.matmul(a0.T, delta_z1)
b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
# Calculate training accuracy for this epoch
a0 = data_train
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
train_accuracy = compute_accuracy(a2, labels_train)
train_accuracies.append(train_accuracy)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
return w1, b1, w2, b2, train_accuracies
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
a0 = data_test
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
predicted_labels = np.argmax(a2, axis=1)
correct = np.sum(predicted_labels == labels_test)
test_accuracy = correct / len(labels_test)
return test_accuracy
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs):
d_in = data_train.shape[1]
d_out = len(np.unique(labels_train))
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
train_accuracies = []
for epoch in range(num_epochs):
for i in range(len(data_train)):
data = data_train[i:i+1]
labels = labels_train[i:i+1]
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
one_hot_labels = one_hot(labels,num_classes=d_out)
loss = -np.sum(one_hot_labels * np.log(a2) + (1 - one_hot_labels) * np.log(1 - a2))
delta_z2 = a2 - one_hot_labels
delta_a1 = np.matmul(delta_z2, w2.T)
delta_z1 = delta_a1 * a1 * (1 - a1)
w2 -= learning_rate * np.matmul(a1.T, delta_z2)
b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
w1 -= learning_rate * np.matmul(a0.T, delta_z1)
b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
a0 = data_train
z1 = np.matmul(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = sigmoid(z2)
train_accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
train_accuracies.append(train_accuracy)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return train_accuracies, test_accuracy
...@@ -4,14 +4,12 @@ import os ...@@ -4,14 +4,12 @@ import os
def read_cifar_batch(batch_path): def read_cifar_batch(batch_path):
with open(batch_path, 'rb') as file: with open(batch_path, 'rb') as file:
# Load the batch data
batch_data = pickle.load(file, encoding='bytes') batch_data = pickle.load(file, encoding='bytes')
# Extract data and labels from the batch
data = batch_data[b'data'] # CIFAR-10 data data = batch_data[b'data'] # CIFAR-10 data
labels = batch_data[b'labels'] # Class labels labels = batch_data[b'labels'] # Class labels
# Convert data and labels to the desired data types # Convertis data et label dans les types souhaités
data = np.array(data, dtype=np.float32) data = np.array(data, dtype=np.float32)
labels = np.array(labels, dtype=np.int64) labels = np.array(labels, dtype=np.int64)
...@@ -22,34 +20,21 @@ def read_cifar(directory_path): ...@@ -22,34 +20,21 @@ def read_cifar(directory_path):
data_batches = [] data_batches = []
label_batches = [] label_batches = []
# Iterate through the batch files in the directory
for batch_file in ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']: for batch_file in ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']:
batch_path = os.path.join(directory_path, batch_file) batch_path = os.path.join(directory_path, batch_file)
with open(batch_path, 'rb') as file: data, labels = read_cifar_batch(batch_path)
# Load the batch data
batch_data = pickle.load(file, encoding='bytes')
# Extract data and labels from the batch
data = batch_data[b'data'] # CIFAR-10 data
labels = batch_data[b'labels'] # Class labels
data_batches.append(data) data_batches.append(data)
label_batches.extend(labels) label_batches.append(labels)
# Combine all batches into a single data matrix and label vector # Concatene les données en une seule matrice et un seul vecteur
data = np.concatenate(data_batches, axis=0) data = np.concatenate(data_batches, axis=0)
labels = np.array(label_batches, dtype=np.int64) labels = np.concatenate(label_batches)
# Convert data to the desired data type
data = data.astype(np.float32)
return data, labels return data, labels
def split_dataset(data, labels, split): def split_dataset(data, labels, split):
# Check if the split parameter is within the valid range (0 to 1)
if split < 0 or split > 1:
raise ValueError("Split must be a float between 0 and 1.")
# Get the number of samples in the dataset # Get the number of samples in the dataset
num_samples = len(data) num_samples = len(data)
...@@ -58,10 +43,10 @@ def split_dataset(data, labels, split): ...@@ -58,10 +43,10 @@ def split_dataset(data, labels, split):
num_train_samples = int(num_samples * split) num_train_samples = int(num_samples * split)
num_test_samples = num_samples - num_train_samples num_test_samples = num_samples - num_train_samples
# Create a random shuffle order for the indices # Cree des permutations aléatoires
shuffle_indices = np.random.permutation(num_samples) shuffle_indices = np.random.permutation(num_samples)
# Use the shuffled indices to split the data and labels # Mélange des données
data_train = data[shuffle_indices[:num_train_samples]] data_train = data[shuffle_indices[:num_train_samples]]
labels_train = labels[shuffle_indices[:num_train_samples]] labels_train = labels[shuffle_indices[:num_train_samples]]
data_test = data[shuffle_indices[num_train_samples:]] data_test = data[shuffle_indices[num_train_samples:]]
...@@ -71,15 +56,12 @@ def split_dataset(data, labels, split): ...@@ -71,15 +56,12 @@ def split_dataset(data, labels, split):
if __name__ == '__main__': if __name__ == '__main__':
batch_path = "data/cifar-10-python\cifar-10-batches-py\data_batch_1" # Update with your path batch_path = "data/cifar-10-python\cifar-10-batches-py"
data, labels = read_cifar_batch(batch_path) data, labels = read_cifar(batch_path)
print("Data shape:", data.shape) print("Data shape:", data.shape)
print("Labels shape:", labels.shape) print("Labels shape:", labels.shape)
split=0.9
data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split)
results/knn.png

31.1 KiB

results/mlp.png

30.9 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment