Skip to content
Snippets Groups Projects
Commit e5a25f27 authored by oscarchaufour's avatar oscarchaufour
Browse files

knn modifications

parent fe63dfd7
Branches
No related tags found
No related merge requests found
# Image Classification # Image Classification
Ce projet de Classification d'image a été réalisé dans le cadre du cours d'Apprentissage Profond et Intelligence Artificielle de l'Ecole Centrale Lyon. Ce projet de Classification d'image a été réalisé dans le cadre du cours d'Apprentissage Profond et Intelligence Artificielle de l'Ecole Centrale Lyon.
Le but du projet est de développer grâce à diférentes méthodes des algorithmes de machone learning pour la classification d'images. Le but du projet est de développer grâce à diférents algorithmes de machine learning pour la classification d'images.
## Introduction ## Introduction
Deux algorithmes de classification d'images sont développés dans ce projet : Deux algorithmes de classification d'images sont développés dans ce projet :
- k-nearest neighbors - k-nearest neighbors
- Artificial Neural Network - Artificial Neural Network (réseau de neurones artificels)
## Installation
Ce projet nécessite Python3 ainsi que les librairies suivantes :
- Numpy
- Pickle
- Matplotlib
## Dataset ## Dataset
La base de données CIFAR-10 est utilisée dans ce projet pour entrainer et tester les algorithmes de classification. Cette base de données peut être trouvée a l'adresse suivante : https://www.cs.toronto.edu/~kriz/cifar.html. La base de données CIFAR-10 est utilisée dans ce projet pour entrainer et tester les algorithmes de classification. Cette base de données peut être trouvée a l'adresse suivante : https://www.cs.toronto.edu/~kriz/cifar.html.
## Structure du projet
Le projet est divisé en trois sections ayant chacune un script distinct :
- lecture et préparation du dataset (code read_cifar.py)
- algorithme k-nearest neighbors (code knn.py)
- algorithme de réseau de neurones artificiels (code mlp.py)
## Auteur
Oscar CHAUFOUR
...@@ -10,80 +10,59 @@ import statistics ...@@ -10,80 +10,59 @@ import statistics
from statistics import mode from statistics import mode
import time import time
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tqdm import tqdm
def distance_matrix(A,B) : def distance_matrix(A,B) :
print("test0")
sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True) sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True)
sum_of_squaresB = np.sum(B**2, axis = 1) sum_of_squaresB = np.sum(B**2, axis = 1)
print("test1")
# sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1)) # sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
# sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1)) # sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
# Calculate the dot product between the two matrices # Calculate the dot product between the two matrices
# dot_product = np.matmul(A, B.T) dot_product = np.dot(A, B.T)
dot_product = np.einsum('ij,jk', A, B.T) # dot_product = np.einsum('ij,jk', A, B.T)
print("test2")
# Calculate the Euclidean distance matrix using the hint provided # Calculate the Euclidean distance matrix using the hint provided
dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product) dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
print("test3")
return dists return dists
def knn_predict(dists, labels_train, k) : def knn_predict(dists, labels_train, k) :
number_train, number_test = dists.shape number_train, number_test = np.shape(dists)
# initialze the predicted labels to zeros # initialze the predicted labels to zeros
labels_predicted = np.zeros(number_test) labels_predicted = np.zeros(number_test)
for j in range(number_test) : for j in range(number_test) :
sorted_indices = np.argsort(dists[:, j]) sorted_indices = np.argsort(dists[:, j])
print(len(dists[:, j]))
break
knn_indices = sorted_indices[ : k] knn_indices = sorted_indices[ : k]
knn_labels = labels_train[knn_indices] knn_labels = labels_train[knn_indices]
label_predicted = mode(knn_labels) label_predicted = mode(knn_labels)
labels_predicted[j] = label_predicted labels_predicted[j] = label_predicted
return labels_predicted return labels_predicted
def evaluate_knn(data_train, labels_train, data_test, labels_test, k) : def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
dists = distance_matrix(data_train, data_test) dists = distance_matrix(data_train, data_test)
labels_predicted = knn_predict(dists, labels_train, k) labels_predicted = knn_predict(dists, labels_train, k)
number_true_prediction = np.sum(labels_test == labels_predicted) number_true_prediction = np.sum(labels_test == labels_predicted)
number_total_prediction = labels_test.shape[0] number_total_prediction = len(labels_test)
classification_rate = number_true_prediction/number_total_prediction classification_rate = number_true_prediction/number_total_prediction
print(classification_rate)
return classification_rate return classification_rate
if __name__ == "__main__" : if __name__ == "__main__" :
t1 = time.time()
# # Example distance matrix, training labels, and k value
# dists = np.array([[1000, 2, 3],
# [4, 0.1, 6],
# [7, 8, 0]])
# labels_train = np.array([0, 1, 5])
# k = 2
# # Predict labels for the test set using k-NN
# predicted_labels = knn_predict(dists, labels_train, k)
# classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
# print("Classification rate:")
# print(classification_rate)
file = "./data/cifar-10-python/" file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file) data, labels = read_cifar.read_cifar(file)
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
k = 10
print(len(data_train)) k = 8
print(len(data_test)) evaluations = []
print(len(data_train[0])) for k in tqdm(range(1, k)) :
print(len(data_test[0])) evaluations.append(evaluate_knn(data_train, labels_train, data_test, labels_test, k))
# dists = distance_matrix(data_train, data_test)
# knn_predict(dists, labels_train, k) fig=plt.figure()
classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k) plt.title("Prediction accuracy as a function of k")
print("classification rate :", classification_rate) plt.xlabel("k-nearest neighbors")
# plot_accuracy(data_train, labels_train, data_test, labels_test, 4) plt.ylabel("Accuracy (%)")
t2 = time.time() plt.plot(evaluations)
print('run time (second): ') plt.show()
print(t2-t1) plt.savefig('results/knn.png')
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 19:43:47 2023
@author: oscar
"""
import numpy as np
from collections import Counter
import read_cifar
def distance_matrix(M1,M2):
# dists(i,j) = dist entre ième ligne de M1 et jème ligne de M1, soit la racine de sum((M1i,p - M2j,p)²))
# qu'on peut simplifier en sum(M1i,p²) + sum(M2j,p²) - sum(2* M1j,p * M2i,p)
l1=np.shape(M1)[0]
l2=np.shape(M2)[0]
Vect1=np.sum(M1**2,1)
Vect2=np.sum(M2**2,1)
Mat1=np.tile(Vect1, (l2,1))
Mat2=np.tile(Vect2, (l1,1))
Mat3=2*np.dot(M1,M2.T)
dists=np.sqrt(Mat1.T+Mat2-Mat3)
return dists
def knn_predict(dists,labels_train,k):
labels_predict=np.array([])
size_test=np.shape(dists)[1]
for j in range(size_test):
list_arg_min=np.argsort(dists[:,j])
labels_sorted=[labels_train[i] for i in list_arg_min]
k_labels=labels_sorted[:k]
count = Counter(k_labels)
labels_predict=np.append(labels_predict,count.most_common(1)[0][0])
return labels_predict
def evaluate_knn(data_train,data_test,labels_train,labels_test,k):
dists=distance_matrix(data_train,data_test)
labels_predict=knn_predict(dists,labels_train,k)
count=np.sum(labels_predict==labels_test)
return count/np.shape(labels_predict)
if __name__ == "__main__":
file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file)
data_train,labels_train,data_test,labels_test=read_cifar.split_dataset(data,labels,0.9)
print(evaluate_knn(data_train,data_test,labels_train,labels_test,20))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment