Skip to content
Snippets Groups Projects
Commit f3334295 authored by unknown's avatar unknown
Browse files

Ajout des fichiers

parent df7ad104
Branches
No related tags found
No related merge requests found
knn.py 0 → 100644
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 26 15:59:25 2023
@author: DE SIMEIS
"""
from read_cifar import *
from collections import Counter
import matplotlib.pyplot as plt
# Définition de la fonction calculant la matrice des distances euclidiennes entre les données d'apprentissage et de test
def distance_matrix(data_train, data_test):
sum_of_squares_matrix1 = np.sum(data_train ** 2, axis=1, keepdims=True)
sum_of_squares_matrix2 = np.sum(data_test ** 2, axis=1, keepdims=True)
dot_product = np.dot(data_train, data_test.T)
dists = np.sqrt(sum_of_squares_matrix1 - 2 * dot_product + sum_of_squares_matrix2.T)
return dists
# Définition de la fonction k-NN pour faire des prédictions
def knn_predict(dists, labels_train, k):
dists=dists.T
predictions = []
for distances in dists:
min_indexes = np.argpartition(distances, k)[:k]
possible_pred = labels_train[min_indexes]
counted = Counter(possible_pred)
pred = counted.most_common(1)[0][0]
predictions.append(pred)
return predictions
# Évaluation de la précision des prédictions
def evaluate_knn(predictions, labels_test):
sum=0
for i in range(len(predictions)):
if predictions[i] == labels_test[i]:
sum+=1
return sum / len(predictions)
'''def evaluate_knn(data_train , labels_train,data_test ,labels_test, k):
return'''
# Fonction principale
def main():
print('#START#')
folder_path = 'data/cifar-10-batches-py'
data, labels = read_cifar(folder_path)
data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
num_epochs = 20
accuracies=[]
dists = distance_matrix(data_train, data_test)
for k in range(num_epochs):
prediction=knn_predict(dists, labels_train, k+1)
accuracy = evaluate_knn(prediction, labels_test)
print(accuracy)
accuracies.append(accuracy)
plt.figure(figsize=(10, 6))
x = range(1, num_epochs + 1)
plt.plot(x, accuracies)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy Evolution')
plt.grid()
plt.savefig('KNN.png')
plt.show()
main()
mlp.py 0 → 100644
import numpy as np
import os
import matplotlib.pyplot as plt
from read_cifar import read_cifar, split_dataset
# Fonction d'activation sigmoid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Dérivée de la fonction d'activation sigmoid
def sigmoid_derivative(x):
return x * (1 - x)
# Fonction d'erreur quadratique moyenne
def mean_squared_error(predictions, targets):
return np.mean(np.square(predictions - targets))
# Conversion des étiquettes en one-hot encoding
def one_hot(labels, num_classes):
one_hot_labels = np.zeros((len(labels), num_classes))
one_hot_labels[np.arange(len(labels)), labels] = 1
return one_hot_labels
# Fonction pour une itération d'apprentissage avec la perte de mean squared error (MSE)
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
a0 = data
z1 = np.dot(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
dC_da2 = a2 - targets
dC_dz2 = dC_da2 * sigmoid_derivative(a2)
dC_da1 = np.dot(dC_dz2, w2.T)
dC_dz1 = dC_da1 * sigmoid_derivative(a1)
dC_dw2 = np.dot(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0)
dC_dw1 = np.dot(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0)
# moyenne des gradients
w1 -= learning_rate * dC_dw1 / len(data)
b1 -= learning_rate * dC_db1 / len(data)
w2 -= learning_rate * dC_dw2 / len(data)
b2 -= learning_rate * dC_db2 / len(data)
loss = mean_squared_error(a2, targets)
return w1, b1, w2, b2, loss
# Initialisation des poids
def initialize_weights(input_size, hidden_size, output_size):
w1 = np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
b1 = np.zeros((1, hidden_size))
w2 = np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size)
b2 = np.zeros((1, output_size))
return w1, b1, w2, b2
# Ajout de la fonction de prédiction
def predict_mlp(w1, b1, w2, b2, data):
a0 = data
z1 = np.dot(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
return a2
# Fonction principale pour le training du MLP
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs, results_dir='results'):
# Create results directory if it does not exist
if not os.path.exists(results_dir):
os.makedirs(results_dir)
N_train = len(data_train)
N_test = len(data_test)
d_in = data_train.shape[1]
d_out = len(np.unique(labels_train))
w1, b1, w2, b2 = initialize_weights(d_in, d_h, d_out)
train_accuracies = []
losses = []
for epoch in range(num_epochs):
w1, b1, w2, b2, loss = learn_once_mse(w1, b1, w2, b2, data_train, one_hot(labels_train, d_out), learning_rate)
predictions = predict_mlp(w1, b1, w2, b2, data_train)
accuracy = np.mean(np.argmax(predictions, axis=1) == labels_train)
train_accuracies.append(accuracy)
losses.append(loss)
print(f"Epoch {epoch + 1}/{num_epochs} - Loss: {loss:.4f} - Accuracy: {accuracy:.4f}")
# Enregistrement du graphe training accuracy
plt.figure(figsize=(8, 6))
plt.plot(range(1, num_epochs + 1), train_accuracies)
plt.xlabel("Epoch")
plt.ylabel("Training Accuracy")
plt.title("MLP Training Accuracy Evolution")
plt.grid(True)
save_path = os.path.join(results_dir, "mlp_training_accuracy.png")
plt.savefig(save_path)
plt.show()
# Avec les données on a :
split_factor = 0.9
folder_path = 'data/cifar-10-batches-py'
data, labels = read_cifar(folder_path)
data_train, data_test, labels_train, labels_test = split_dataset(data, labels, split_factor)
d_h = 64
learning_rate = 0.01
num_epochs = 100
# Pour que ça aille dans le dossier results dans le même dossier que le code
results_directory = 'results'
# Exécution de l'entraînement mlp
w1, b1, w2, b2, train_accuracies, losses = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs, results_directory)
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 26 15:53:56 2023
@author: DE SIMEIS
"""
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
# Charge un batch CIFAR-10 depuis le chemin spécifié.
#Returns:
# - data (numpy.ndarray): Les données du batch.
# - labels (numpy.ndarray): Les étiquettes associées aux données.
def read_cifar_batch(batch):
with open(batch, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
data = dict[b'data']
labels = dict[b'labels']
return data, labels
# Charge l'ensemble du dataset CIFAR-10 depuis le répertoire spécifié
def read_cifar(path):
batches_list = os.listdir(path)
data, labels = [], []
for batch in batches_list:
if(batch == 'batches.meta' or batch == 'readme.html'):
continue
data_batch, labels_batch = read_cifar_batch(path + '/' + batch)
data.append(data_batch)
labels.append(labels_batch)
return np.array(data, dtype=np.float32).reshape((60000, 3072)), np.array(labels, dtype=np.int64).reshape(-1)
#Divise l'ensemble de données en ensembles d'entraînement et de test.
# Returns:
# - data_train (numpy.ndarray): Les données d'entraînement.
# - data_test (numpy.ndarray): Les données de test.
# - labels_train (numpy.ndarray): Les étiquettes d'entraînement.
# - labels_test (numpy.ndarray): Les étiquettes de test.
def split_dataset(data, labels, split):
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=1-split, shuffle=True)
return data_train, data_test, labels_train,labels_test
results/._mlp.png

4 KiB

results/mlp_training_accuracy.png

18.7 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment