Skip to content
Snippets Groups Projects
Commit e5a25f27 authored by oscarchaufour's avatar oscarchaufour
Browse files

knn modifications

parent fe63dfd7
No related branches found
No related tags found
No related merge requests found
# Image Classification
Ce projet de Classification d'image a été réalisé dans le cadre du cours d'Apprentissage Profond et Intelligence Artificielle de l'Ecole Centrale Lyon.
Le but du projet est de développer grâce à diférentes méthodes des algorithmes de machone learning pour la classification d'images.
Le but du projet est de développer grâce à diférents algorithmes de machine learning pour la classification d'images.
## Introduction
Deux algorithmes de classification d'images sont développés dans ce projet :
- k-nearest neighbors
- Artificial Neural Network
- Artificial Neural Network (réseau de neurones artificels)
## Installation
Ce projet nécessite Python3 ainsi que les librairies suivantes :
- Numpy
- Pickle
- Matplotlib
## Dataset
La base de données CIFAR-10 est utilisée dans ce projet pour entrainer et tester les algorithmes de classification. Cette base de données peut être trouvée a l'adresse suivante : https://www.cs.toronto.edu/~kriz/cifar.html.
## Structure du projet
Le projet est divisé en trois sections ayant chacune un script distinct :
- lecture et préparation du dataset (code read_cifar.py)
- algorithme k-nearest neighbors (code knn.py)
- algorithme de réseau de neurones artificiels (code mlp.py)
## Auteur
Oscar CHAUFOUR
......@@ -10,80 +10,59 @@ import statistics
from statistics import mode
import time
import matplotlib.pyplot as plt
from tqdm import tqdm
def distance_matrix(A,B) :
print("test0")
sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True)
sum_of_squaresB = np.sum(B**2, axis = 1)
print("test1")
# sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
# sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
# Calculate the dot product between the two matrices
# dot_product = np.matmul(A, B.T)
dot_product = np.einsum('ij,jk', A, B.T)
print("test2")
dot_product = np.dot(A, B.T)
# dot_product = np.einsum('ij,jk', A, B.T)
# Calculate the Euclidean distance matrix using the hint provided
dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
print("test3")
return dists
def knn_predict(dists, labels_train, k) :
number_train, number_test = dists.shape
number_train, number_test = np.shape(dists)
# initialze the predicted labels to zeros
labels_predicted = np.zeros(number_test)
for j in range(number_test) :
sorted_indices = np.argsort(dists[:, j])
print(len(dists[:, j]))
break
knn_indices = sorted_indices[ : k]
knn_labels = labels_train[knn_indices]
label_predicted = mode(knn_labels)
labels_predicted[j] = label_predicted
return labels_predicted
def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
dists = distance_matrix(data_train, data_test)
labels_predicted = knn_predict(dists, labels_train, k)
number_true_prediction = np.sum(labels_test == labels_predicted)
number_total_prediction = labels_test.shape[0]
number_total_prediction = len(labels_test)
classification_rate = number_true_prediction/number_total_prediction
print(classification_rate)
return classification_rate
if __name__ == "__main__" :
t1 = time.time()
# # Example distance matrix, training labels, and k value
# dists = np.array([[1000, 2, 3],
# [4, 0.1, 6],
# [7, 8, 0]])
# labels_train = np.array([0, 1, 5])
# k = 2
# # Predict labels for the test set using k-NN
# predicted_labels = knn_predict(dists, labels_train, k)
# classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
# print("Classification rate:")
# print(classification_rate)
file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file)
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
k = 10
print(len(data_train))
print(len(data_test))
print(len(data_train[0]))
print(len(data_test[0]))
# dists = distance_matrix(data_train, data_test)
# knn_predict(dists, labels_train, k)
classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
print("classification rate :", classification_rate)
# plot_accuracy(data_train, labels_train, data_test, labels_test, 4)
t2 = time.time()
print('run time (second): ')
print(t2-t1)
\ No newline at end of file
k = 8
evaluations = []
for k in tqdm(range(1, k)) :
evaluations.append(evaluate_knn(data_train, labels_train, data_test, labels_test, k))
fig=plt.figure()
plt.title("Prediction accuracy as a function of k")
plt.xlabel("k-nearest neighbors")
plt.ylabel("Accuracy (%)")
plt.plot(evaluations)
plt.show()
plt.savefig('results/knn.png')
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 19:43:47 2023
@author: oscar
"""
import numpy as np
from collections import Counter
import read_cifar
def distance_matrix(M1,M2):
# dists(i,j) = dist entre ième ligne de M1 et jème ligne de M1, soit la racine de sum((M1i,p - M2j,p)²))
# qu'on peut simplifier en sum(M1i,p²) + sum(M2j,p²) - sum(2* M1j,p * M2i,p)
l1=np.shape(M1)[0]
l2=np.shape(M2)[0]
Vect1=np.sum(M1**2,1)
Vect2=np.sum(M2**2,1)
Mat1=np.tile(Vect1, (l2,1))
Mat2=np.tile(Vect2, (l1,1))
Mat3=2*np.dot(M1,M2.T)
dists=np.sqrt(Mat1.T+Mat2-Mat3)
return dists
def knn_predict(dists,labels_train,k):
labels_predict=np.array([])
size_test=np.shape(dists)[1]
for j in range(size_test):
list_arg_min=np.argsort(dists[:,j])
labels_sorted=[labels_train[i] for i in list_arg_min]
k_labels=labels_sorted[:k]
count = Counter(k_labels)
labels_predict=np.append(labels_predict,count.most_common(1)[0][0])
return labels_predict
def evaluate_knn(data_train,data_test,labels_train,labels_test,k):
dists=distance_matrix(data_train,data_test)
labels_predict=knn_predict(dists,labels_train,k)
count=np.sum(labels_predict==labels_test)
return count/np.shape(labels_predict)
if __name__ == "__main__":
file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file)
data_train,labels_train,data_test,labels_test=read_cifar.split_dataset(data,labels,0.9)
print(evaluate_knn(data_train,data_test,labels_train,labels_test,20))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment