Skip to content
Snippets Groups Projects
Commit bde18979 authored by widad174's avatar widad174
Browse files

Partie 1

parent 69411637
Branches
No related tags found
No related merge requests found
DATA data
Env_Deep Env_Deep
File added
File added
knn.py 0 → 100644
import numpy as np
import matplotlib.pyplot as plt
def distance_matrix(train, test):
"""
Args:
train (np.ndarray(np.float32)): La matrice de données d'images d'entraînement de forme (n, m).
test (np.ndarray(np.float32)): La matrice de données d'images de test de forme (m, p).
Retourne:
np.ndarray(np.float32): La matrice des distances euclidiennes L2 entre train et test, de forme (n, p).
"""
dists = np.sqrt(-2 * np.matmul(train, test.T) +
np.sum(train*train, axis=1, keepdims=True) +
np.sum(test*test, axis=1, keepdims=True).T)
print('finished calculating dists')
return dists
def mode(x):
"""
Args:
x (float): un tableau de nombres
Retourne:
float: le mode de x
"""
vals, counts = np.unique(x, return_counts=True)
return vals[np.argmax(counts)]
def knn_predict(dists, labels_train, k):
"""
Cette fonction prédit les étiquettes pour les exemples de test dans la matrice de distances "dists" en utilisant les k plus proches voisins du test dans l'ensemble d'entraînement.
Retourne :
labels_pred (tableau numpy) : tableau de taille (k, num_test) des étiquettes prédites pour les données de test.
"""
indexes_of_knn = np.argsort(dists, axis=0)[0:k, :]
nearest_labels_pred = labels_train[indexes_of_knn]
labels_pred = np.array([ mode(label) for label in nearest_labels_pred.T ])
return labels_pred
def evaluate_knn_for_k(data_train, labels_train, data_test, labels_test, K_max):
"""
Args:
data_train (tableau numpy): données d'entraînement
labels_train (tableau numpy): étiquettes d'entraînement
data_test (tableau numpy): données de test
labels_test (tableau numpy): étiquettes de test
k_max (entier) : nombre maximal de voisins dans le k-NN
Retourne:
accuracies (Liste): Liste des précisions des prédictions pour chaque valeur de k.
"""
accuracies = [0] * K_max
dists = distance_matrix(data_train, data_test)
for k in range(1, K_max + 1):
labels_pred = knn_predict(dists, labels_train, k)
accuracy = np.sum(labels_pred == labels_test) / len(labels_test)
accuracies[k - 1] = accuracy
return accuracies
def plot_accuracy_versus_k(accuracies):
"""
Cette fonction génère un graphique illustrant la variation de la précision en fonction de k
et sauvegarde le graphique dans le dossier /results.
"""
k = len(accuracies)
fig = plt.figure(figsize=(12, 8))
plt.plot(np.arange(1, k+1, 1), accuracies, 'o-r')
plt.title("Variation of the accuracy as a function of k")
plt.xlabel("k (number of neighbors)")
plt.ylabel("Accuracy")
ax = fig.gca()
ax.set_xticks(np.arange(1, k+1, 1))
plt.grid(axis='both', which='both')
plt.savefig(r'C:\Users\hp\Desktop\BE\image-classification\resultats\Knn.png')
\ No newline at end of file
main.py 0 → 100644
from read_cifar import *
from knn import *
path = r'C:\Users\hp\Desktop\BE\image-classification\data'
if __name__ == "__main__":
split_factor = 0.9
X, y = read_cifar(path)
X_train,y_train,X_test,y_test=split_dataset(X,y,split=0.9)
K_max=20
accuries=evaluate_knn_for_k(X_train, y_train, X_test, y_test, K_max)
plot_accuracy_versus_k(accuries)
\ No newline at end of file
import pickle
import numpy as np import numpy as np
import os
import pickle
#read_cifaar_batch
def read_cifar_batch(file) : def unpickle(file):
with open(file, 'rb') as fo: with open(file, 'rb') as fo:
dictionnaire = pickle.load(fo, encoding='bytes') dict = pickle.load(fo, encoding='bytes')
data=dictionnaire[b'data'].astype(np.float32) return dict
labels=np.array(dictionnaire[b'labels'],np.int64)
return data,labels #La fonction lecture_cifar : prenne en argument le chemin du répertoire contenant les données, et renvoyant une matrice X de taille NxD où N correspond au nombre de données disponibles, et D à la dimension de ces données (nombre de valeurs numériques décrivant les données), ainsi qu'un vecteur Y de taille N dont les valeurs correspondent au code de la classe de la donnée de même indice dans X.
'''
EXPLICATION DE LA FONCTION: #X et Y sont objets numpy
def read_cifar_batch(file):
"""
read_cifaar_batch function: read the path of a single batch. read_cifaar_batch function: read the path of a single batch.
Arguments: Arguments:
...@@ -18,29 +21,16 @@ Arguments: ...@@ -18,29 +21,16 @@ Arguments:
Returns: Returns:
- Matrix data of size (batch_size x data_size) - Matrix data of size (batch_size x data_size)
- Vector labels of size batch_size - Vector labels of size batch_size
"""
dict = unpickle(file)
data = dict[b'data'].astype(np.float32)
labels = np.array(dict[b'labels'], dtype=np.int64)
labels = labels.reshape(labels.shape[0])
The data must be np.float32 array and labels must be np.int64 array.
'''
# read_cifar
def read_cifar(folder):
batch_file=["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5","test_batch"]
for i in range(len(batch_file)):
path= folder +'/'+batch_file[i]
if i==0:
data,labels=read_cifar_batch(path)
else:
x,y=read_cifar_batch(path)
data =np.vstack([data ,x]) # all data for all batches is in variable "data"
labels=np.hstack([labels,y]) # All labels for all batches is in variable "labels"
return data, labels return data, labels
''' def read_cifar(path):
EXPLICATION DE LA FONCTION: """
read_cifaar function: read the path of the directory containing all batches (including test_batch). read_cifaar function: read the path of the directory containing all batches (including test_batch).
Arguments: Arguments:
...@@ -48,31 +38,25 @@ Arguments: ...@@ -48,31 +38,25 @@ Arguments:
Returns: Returns:
- Matrix data of size (batch_size x data_size) - Matrix data of size (batch_size x data_size)
- Vector labels of size batch_size< - Vector labels of size batch_size
"""
The data must be np.float32 array and labels must be np.int64 array. data_batches = ["data_batch_" + str(i) for i in range(1, 6)] + ['test_batch']
'''
flag = True
# split_dataset for db in data_batches:
data, labels = read_cifar_batch(os.path.join(path, db))
def split_dataset(data,labels,split): if flag:
labels=labels.reshape(data.shape[0],1) DATA = data
# Stack our Data and labels LABELS = labels
con = np.hstack((data, labels)) flag = False
k=int(split*con.shape[0]) else:
# Shuffle all our Data stack it DATA = np.concatenate((DATA, data), axis=0, dtype=np.float32)
np.random.shuffle(con) LABELS = np.concatenate((LABELS, labels), axis=-1, dtype=np.int64)
# Train return DATA, LABELS
X_train=con[:k,:-1]
y_train=np.array(con[:k,-1],np.int64) def split_dataset(data, labels, split=0.6):
# Test """
X_test=con[k:,:-1]
y_test=np.array(con[k:,-1],np.int64)
return X_train,y_train,X_test,y_test
'''
EXPLICATION DE LA FONCTION:
split_dataset function: splits the dataset into a training set and a test set. split_dataset function: splits the dataset into a training set and a test set.
Arguments: Arguments:
...@@ -84,5 +68,12 @@ Returns: ...@@ -84,5 +68,12 @@ Returns:
- labels_train: the corresponding labels, - labels_train: the corresponding labels,
- data_test: the testing data, and - data_test: the testing data, and
- labels_test: the corresponding labels. - labels_test: the corresponding labels.
''' """
n = data.shape[0]
indices = np.random.permutation(n)
train_idx, test_idx = indices[:int(split*n)], indices[int(split*n):]
data_train, data_test = data[train_idx,:].astype(np.float32), data[test_idx,:].astype(np.float32)
labels_train, labels_test = labels[train_idx].astype(np.int64), labels[test_idx].astype(np.int64)
return data_train, labels_train, data_test, labels_test
\ No newline at end of file
resultats/Knn.png

44.7 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment