knn modifications

e5a25f27 · oscarchaufour · fe63dfd7 · e5a25f27 · e5a25f27 · fe63dfd7
Commit e5a25f27 authored 1 year ago by oscarchaufour
--- a/README.md
+++ b/README.md
 # Image Classification
 Ce projet de Classification d'image a été réalisé dans le cadre du cours d'Apprentissage Profond et Intelligence Artificielle de l'Ecole Centrale Lyon.
-Le but du projet est de développer grâce à diférentes méthodes des algorithmes de machone learning pour la classification d'images. 
+Le but du projet est de développer grâce à diférents algorithmes de machine learning pour la classification d'images. 
 ## Introduction
 Deux algorithmes de classification d'images sont développés dans ce projet : 
 - k-nearest neighbors 
- Artificial Neural Network 
+- Artificial Neural Network (réseau de neurones artificels)
+## Installation
+Ce projet nécessite Python3 ainsi que les librairies suivantes : 
+- Numpy
+- Pickle
+- Matplotlib
 ## Dataset 
 La base de données CIFAR-10 est utilisée dans ce projet pour entrainer et tester les algorithmes de classification. Cette base de données peut être trouvée a l'adresse suivante :  https://www.cs.toronto.edu/~kriz/cifar.html.
+## Structure du projet
+Le projet est divisé en trois sections ayant chacune un script distinct :
+- lecture et préparation du dataset (code read_cifar.py)
+- algorithme k-nearest neighbors (code knn.py)
+- algorithme de réseau de neurones artificiels (code mlp.py)
+## Auteur
+Oscar CHAUFOUR
--- a/knn.py
+++ b/knn.py
@@ -10,80 +10,59 @@ import statistics
 from statistics import mode
 import time
 import matplotlib.pyplot as plt
+from tqdm import tqdm
 def distance_matrix(A,B) : 
-    print("test0")
    sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True)
    sum_of_squaresB = np.sum(B**2, axis = 1)
-    print("test1")
    # sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
    # sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
    # Calculate the dot product between the two matrices
-    # dot_product = np.matmul(A, B.T)
+    dot_product = np.dot(A, B.T)
-    dot_product = np.einsum('ij,jk', A, B.T)
+    # dot_product = np.einsum('ij,jk', A, B.T)
-    print("test2")
    # Calculate the Euclidean distance matrix using the hint provided
    dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
-    print("test3")
    return dists
 def knn_predict(dists, labels_train, k) : 
-    number_train, number_test = dists.shape
+    number_train, number_test = np.shape(dists)
    # initialze the predicted labels to zeros
    labels_predicted = np.zeros(number_test)
    for j in range(number_test) : 
        sorted_indices = np.argsort(dists[:, j])
-        print(len(dists[:, j]))
-        break
        knn_indices = sorted_indices[ : k]
        knn_labels = labels_train[knn_indices]
        label_predicted = mode(knn_labels)
        labels_predicted[j] = label_predicted
    return labels_predicted
 def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
    dists = distance_matrix(data_train, data_test)
    labels_predicted = knn_predict(dists, labels_train, k)
    number_true_prediction = np.sum(labels_test == labels_predicted)
-    number_total_prediction = labels_test.shape[0]
+    number_total_prediction = len(labels_test)
    classification_rate = number_true_prediction/number_total_prediction
+    print(classification_rate)
    return classification_rate   
 if __name__ == "__main__" :
-    t1 = time.time()
-    # # Example distance matrix, training labels, and k value
-    # dists = np.array([[1000, 2, 3],
-    #                  [4, 0.1, 6],
-    #                  [7, 8, 0]])
-    # labels_train = np.array([0, 1, 5])
-    # k = 2
-    # # Predict labels for the test set using k-NN
-    # predicted_labels = knn_predict(dists, labels_train, k)
-    # classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
-    # print("Classification rate:")
-    # print(classification_rate)    
    file = "./data/cifar-10-python/"
    data, labels = read_cifar.read_cifar(file)
    data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
-    k = 10
-    print(len(data_train))
+    k = 8
-    print(len(data_test))
+    evaluations = []
-    print(len(data_train[0]))
+    for k in tqdm(range(1, k)) :
-    print(len(data_test[0]))
+        evaluations.append(evaluate_knn(data_train, labels_train, data_test, labels_test, k))
-    # dists = distance_matrix(data_train, data_test)
-    # knn_predict(dists, labels_train, k)
+    fig=plt.figure()
-    classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    plt.title("Prediction accuracy as a function of k")
-    print("classification rate :", classification_rate)
+    plt.xlabel("k-nearest neighbors")
-    # plot_accuracy(data_train, labels_train, data_test, labels_test, 4)
+    plt.ylabel("Accuracy (%)")
-    t2 = time.time()
+    plt.plot(evaluations)
-    print('run time (second): ')
+    plt.show()
-    print(t2-t1)
+    plt.savefig('results/knn.png')
\ No newline at end of file
--- a/test1.py
+++ b/test1.py
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Oct 23 19:43:47 2023
-@author: oscar
-"""
-import numpy as np
-from collections import Counter
-import read_cifar
-def distance_matrix(M1,M2):
-    # dists(i,j) = dist entre ième ligne de M1 et jème ligne de M1, soit la racine de sum((M1i,p - M2j,p)²))
-    # qu'on peut simplifier en sum(M1i,p²) + sum(M2j,p²) - sum(2* M1j,p * M2i,p)
-    l1=np.shape(M1)[0]
-    l2=np.shape(M2)[0]
-    Vect1=np.sum(M1**2,1)
-    Vect2=np.sum(M2**2,1)
-    Mat1=np.tile(Vect1, (l2,1))
-    Mat2=np.tile(Vect2, (l1,1))
-    Mat3=2*np.dot(M1,M2.T)
-    dists=np.sqrt(Mat1.T+Mat2-Mat3)
-    return dists
-def knn_predict(dists,labels_train,k):
-    labels_predict=np.array([])
-    size_test=np.shape(dists)[1]
-    for j in range(size_test):
-        list_arg_min=np.argsort(dists[:,j])
-        labels_sorted=[labels_train[i] for i in list_arg_min]
-        k_labels=labels_sorted[:k]
-        count = Counter(k_labels)
-        labels_predict=np.append(labels_predict,count.most_common(1)[0][0])
-    return labels_predict
-def evaluate_knn(data_train,data_test,labels_train,labels_test,k):
-    dists=distance_matrix(data_train,data_test)
-    labels_predict=knn_predict(dists,labels_train,k)
-    count=np.sum(labels_predict==labels_test)
-    return count/np.shape(labels_predict)
-if __name__ == "__main__":
-    file = "./data/cifar-10-python/"
-    data, labels = read_cifar.read_cifar(file)
-    data_train,labels_train,data_test,labels_test=read_cifar.split_dataset(data,labels,0.9)
-    print(evaluate_knn(data_train,data_test,labels_train,labels_test,20))
\ No newline at end of file