diff --git a/README.md b/README.md index c7d2a292450fa11009dc8c22cf2a9cd78e688d78..4a2028e42c50696c5d04edebebeff33daca6dd73 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,33 @@ # Image Classification Ce projet de Classification d'image a été réalisé dans le cadre du cours d'Apprentissage Profond et Intelligence Artificielle de l'Ecole Centrale Lyon. -Le but du projet est de développer grâce à diférentes méthodes des algorithmes de machone learning pour la classification d'images. +Le but du projet est de développer grâce à diférents algorithmes de machine learning pour la classification d'images. ## Introduction Deux algorithmes de classification d'images sont développés dans ce projet : - k-nearest neighbors -- Artificial Neural Network +- Artificial Neural Network (réseau de neurones artificels) + +## Installation + +Ce projet nécessite Python3 ainsi que les librairies suivantes : +- Numpy +- Pickle +- Matplotlib + ## Dataset La base de données CIFAR-10 est utilisée dans ce projet pour entrainer et tester les algorithmes de classification. Cette base de données peut être trouvée a l'adresse suivante : https://www.cs.toronto.edu/~kriz/cifar.html. + +## Structure du projet + +Le projet est divisé en trois sections ayant chacune un script distinct : +- lecture et préparation du dataset (code read_cifar.py) +- algorithme k-nearest neighbors (code knn.py) +- algorithme de réseau de neurones artificiels (code mlp.py) + +## Auteur +Oscar CHAUFOUR + diff --git a/knn.py b/knn.py index 4dc7d61363961d8093d446d6df5ff8eca607d050..f3006bde1c5699e67ce407b33bb71d85af6da9df 100644 --- a/knn.py +++ b/knn.py @@ -10,80 +10,59 @@ import statistics from statistics import mode import time import matplotlib.pyplot as plt +from tqdm import tqdm def distance_matrix(A,B) : - print("test0") sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True) sum_of_squaresB = np.sum(B**2, axis = 1) - print("test1") # sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1)) # sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1)) # Calculate the dot product between the two matrices - # dot_product = np.matmul(A, B.T) - dot_product = np.einsum('ij,jk', A, B.T) - print("test2") + dot_product = np.dot(A, B.T) + # dot_product = np.einsum('ij,jk', A, B.T) # Calculate the Euclidean distance matrix using the hint provided dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product) - print("test3") return dists def knn_predict(dists, labels_train, k) : - number_train, number_test = dists.shape + number_train, number_test = np.shape(dists) # initialze the predicted labels to zeros labels_predicted = np.zeros(number_test) for j in range(number_test) : sorted_indices = np.argsort(dists[:, j]) - print(len(dists[:, j])) - break knn_indices = sorted_indices[ : k] knn_labels = labels_train[knn_indices] label_predicted = mode(knn_labels) labels_predicted[j] = label_predicted - return labels_predicted def evaluate_knn(data_train, labels_train, data_test, labels_test, k) : dists = distance_matrix(data_train, data_test) labels_predicted = knn_predict(dists, labels_train, k) number_true_prediction = np.sum(labels_test == labels_predicted) - number_total_prediction = labels_test.shape[0] + number_total_prediction = len(labels_test) classification_rate = number_true_prediction/number_total_prediction - + print(classification_rate) return classification_rate if __name__ == "__main__" : - t1 = time.time() - # # Example distance matrix, training labels, and k value - # dists = np.array([[1000, 2, 3], - # [4, 0.1, 6], - # [7, 8, 0]]) - # labels_train = np.array([0, 1, 5]) - # k = 2 - - # # Predict labels for the test set using k-NN - # predicted_labels = knn_predict(dists, labels_train, k) - - - # classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1) - # print("Classification rate:") - # print(classification_rate) - file = "./data/cifar-10-python/" data, labels = read_cifar.read_cifar(file) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) - k = 10 - print(len(data_train)) - print(len(data_test)) - print(len(data_train[0])) - print(len(data_test[0])) - # dists = distance_matrix(data_train, data_test) - # knn_predict(dists, labels_train, k) - classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k) - print("classification rate :", classification_rate) - # plot_accuracy(data_train, labels_train, data_test, labels_test, 4) - t2 = time.time() - print('run time (second): ') - print(t2-t1) \ No newline at end of file + + k = 8 + evaluations = [] + for k in tqdm(range(1, k)) : + evaluations.append(evaluate_knn(data_train, labels_train, data_test, labels_test, k)) + + fig=plt.figure() + plt.title("Prediction accuracy as a function of k") + plt.xlabel("k-nearest neighbors") + plt.ylabel("Accuracy (%)") + plt.plot(evaluations) + plt.show() + plt.savefig('results/knn.png') + diff --git a/test1.py b/test1.py deleted file mode 100644 index 5bc93fb8ee1dd43c732eb6da06ce8dc62c77586b..0000000000000000000000000000000000000000 --- a/test1.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Oct 23 19:43:47 2023 - -@author: oscar -""" - -import numpy as np -from collections import Counter -import read_cifar - -def distance_matrix(M1,M2): - # dists(i,j) = dist entre ième ligne de M1 et jème ligne de M1, soit la racine de sum((M1i,p - M2j,p)²)) - # qu'on peut simplifier en sum(M1i,p²) + sum(M2j,p²) - sum(2* M1j,p * M2i,p) - - l1=np.shape(M1)[0] - l2=np.shape(M2)[0] - Vect1=np.sum(M1**2,1) - Vect2=np.sum(M2**2,1) - - Mat1=np.tile(Vect1, (l2,1)) - Mat2=np.tile(Vect2, (l1,1)) - Mat3=2*np.dot(M1,M2.T) - - dists=np.sqrt(Mat1.T+Mat2-Mat3) - - return dists - -def knn_predict(dists,labels_train,k): - labels_predict=np.array([]) - size_test=np.shape(dists)[1] - for j in range(size_test): - list_arg_min=np.argsort(dists[:,j]) - labels_sorted=[labels_train[i] for i in list_arg_min] - k_labels=labels_sorted[:k] - count = Counter(k_labels) - - labels_predict=np.append(labels_predict,count.most_common(1)[0][0]) - - return labels_predict - -def evaluate_knn(data_train,data_test,labels_train,labels_test,k): - dists=distance_matrix(data_train,data_test) - labels_predict=knn_predict(dists,labels_train,k) - count=np.sum(labels_predict==labels_test) - return count/np.shape(labels_predict) - -if __name__ == "__main__": - file = "./data/cifar-10-python/" - data, labels = read_cifar.read_cifar(file) - data_train,labels_train,data_test,labels_test=read_cifar.split_dataset(data,labels,0.9) - print(evaluate_knn(data_train,data_test,labels_train,labels_test,20)) \ No newline at end of file