diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1269488f7fb1f4b56a8c0e5eb48cecbfadfa9219 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data diff --git a/__pycache__/read_cifar.cpython-37.pyc b/__pycache__/read_cifar.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b48dc30065475775d3fe048e26ff3ba78fcc652d Binary files /dev/null and b/__pycache__/read_cifar.cpython-37.pyc differ diff --git a/knn.py b/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..90154a7544087d48693e1e8f488a4942c85a0b03 --- /dev/null +++ b/knn.py @@ -0,0 +1,63 @@ +import numpy as np +import matplotlib.pyplot as plt +from math import * +import read_cifar + + + +def distance_matrix(A,B): + k1 = np.sum(A**2, axis=1)[:,np.newaxis] + k2 = np.sum(B**2, axis=1) + k3 = -2*np.dot(A,B.T) + dists = np.round(np.sqrt(k1+k2+k3),2) + return dists + + + +def knn_predict(dists,labels_train,k): + # prediction du label pour le test + # l'ensemble de référence est l'ensemble d'apprentissage + num_test = dists.shape[0] + labels_predict = np.zeros(num_test) + for i in range(num_test): + L = [] + # L = list storing the labels of the k nearest neighbors to the ith test point + L = labels_train[np.argsort(dists[i])][0:k] + labels_predict[i] = np.argmax(np.bincount(L)) + return labels_predict + + + +def evaluate_knn(data_train,labels_train,data_test,labels_test,k): + # data_train the training data, + # labels_train the corresponding labels, + # data_test the testing data, + # labels_test the corresponding labels, and + # k the number of of neighbors. + num_val=labels_train.shape[0] + # dists = distance_matrix(labels_train,labels_test) + dists = distance_matrix(data_test,data_train) + labels_predict = knn.predict(dists,labels_train,k) + num_correct = np.sum(labels_predict == labels_train) + accuracy = (float(num_correct) / num_val)*100 + + return accuracy + + + +# For split=0.9, plot the variation of the accuracy as a function of k +# (from 1 to 20) + +path = "C:/Users/mylaa/Documents/ECL_2022/MOD/MOD_04_6_Apprentissage_profond_Intelligence_Artificielle/TD1/image-classification/data/cifar-10-batches-py" +data,labels = read_cifar.read_cifar(path) +split = 0.9 +data_train,labels_train,data_test,labels_test = read_cifar.split_dataset(data,labels,split) + +k = np.arange(1, 21, 1) +acc = evaluate_knn(data_train,labels_train,data_test,labels_test,k) +plt.plot(k, acc) +plt.title("variation of the accuracy following the value of k") +plt.xlabel("Values of k") +plt.ylabel("Values of the accuracy") +plt.show() + diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..226844d97a3f7ffb95ecc9e39513e629908d90f2 --- /dev/null +++ b/mlp.py @@ -0,0 +1,254 @@ +import numpy as np +import read_cifar + +N = 30 # number of input data +d_in = 3 # input dimension +d_h = 3 # number of neurons in the hidden layer +d_out = 2 # output dimension (number of neurons of the output layer) + +# Random initialization of the network weights and biaises +w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights +b1 = np.zeros((1, d_h)) # first layer biaises +w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights +b2 = np.zeros((1, d_out)) # second layer biaises + +data = np.random.rand(N, d_in) # create a random data +targets = np.random.rand(N, d_out) # create a random targets + +data_train = np.random.rand(N,d_in) +labels_train = np.random.randint(1,d_out,N) + +data_test = np.random.rand(N,d_in) +labels_test = np.random.randint(1,d_out,N) + + + +split_factor=0.9 +#d_h=64 +learning_rate=0.1 +num_epoch=100 + +path = "C:/Users/mylaa/Documents/ECL_2022/MOD/MOD_04_6_Apprentissage_profond_Intelligence_Artificielle/TD1/image-classification/data/cifar-10-batches-py" +#data,labels = read_cifar.read_cifar(path) +#data_train,labels_train,data_test,labels_test = read_cifar.split_dataset(data,labels,split_factor) + + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +def sigmoid_derived(x): + return sigmoid(x)*(1-sigmoid(x)) + + + +def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate): +# performs one gradient descent step + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # calculation of dw1, db1, dw2, db2 + dz2 = a2 - targets + dw2 = 1 / N *(np.matmul(a1.T,dz2)) + db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True)) + dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1) + dw1 = 1 / N *(np.matmul(a0.T,dz1)) + db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True)) + + # gradient descent + w1 = w1 - learning_rate * dw1 + b1 = b1 - learning_rate * db1 + w2 = w2 - learning_rate * dw2 + b2 = b2 - learning_rate * db2 + + # Compute loss (MSE) + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output laye + + loss = np.mean(np.square(predictions - targets)) + + return w1,b1,w2,b2,loss + + + + +def one_hot(M : np.ndarray): +# takes a (n)-D array as parameters and returns the corresponding (n+1)-D one-hot matrix + M_one = np.zeros((M.size, M.max()+1)) + M_one[np.arange(M.size),M] = 1 + return M_one + + + + +def learn_once_cross_entropy_0(w1,b1,w2,b2,data,targets,learning_rate): + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # calculation of dw1, db1, dw2, db2 + dz2 = a2 - targets + dw2 = 1 / N *(np.matmul(a1.T,dz2)) + db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True)) + dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1) + dw1 = 1 / N *(np.matmul(a0.T,dz1)) + db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True)) + + # gradient descent + w1 = w1 - learning_rate * dw1 + b1 = b1 - learning_rate * db1 + w2 = w2 - learning_rate * dw2 + b2 = b2 - learning_rate * db2 + + # Compute loss (cross entropy loss) + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = np.exp(a2)/np.sum(np.exp(a2)) # the predicted values are the outputs of the output layer + # must be a softmax + + loss = -np.mean(np.log(predictions)) + + return w1,b1,w2,b2,loss + + + + +def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate): + # labels_train a vector of size batch_size + # performs one gradient descent step using a cross entropy loss + + labels_train_one = one_hot(labels_train) + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # calculation of dw1, db1, dw2, db2 + dz2 = a2 - labels_train_one + dw2 = 1 / N *(np.matmul(a1.T,dz2)) + db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True)) + dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1) + dw1 = 1 / N *(np.matmul(a0.T,dz1)) + db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True)) + + # gradient descent + w1 = w1 - learning_rate * dw1 + b1 = b1 - learning_rate * db1 + w2 = w2 - learning_rate * dw2 + b2 = b2 - learning_rate * db2 + + # Compute loss (cross entropy loss) + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = np.exp(a2)/np.sum(np.exp(a2)) # the predicted values are the outputs of the output layer + # must be a softmax + + loss = -np.mean(np.log(predictions)) + + return w1,b1,w2,b2,loss + + + + +def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch): + # performs num_epoch of training steps + # num_epoch the number of training epoch + + labels_train_one = one_hot(labels_train) + + i=0 + train_accuracies = [] + # Forward pass + a0 = data_train # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + while i<num_epoch: + + # calculation of dw1, db1, dw2, db2 + dz2 = a2 - labels_train_one + dw2 = 1 / N *(np.matmul(a1.T,dz2)) + db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True)) + dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1) + dw1 = 1 / N *(np.matmul(a0.T,dz1)) + db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True)) + + # gradient descent + w1 = w1 - learning_rate * dw1 + b1 = b1 - learning_rate * db1 + w2 = w2 - learning_rate * dw2 + b2 = b2 - learning_rate * db2 + + # Compute loss (cross entropy loss) + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = np.exp(a2)/np.sum(np.exp(a2)) # the predicted values are the outputs of the output layer + # must be a softmax + + loss = -np.mean(np.log(predictions)) + train_accuracies.append(loss) + + i=i+1 + + return w1,b1,w2,b2,train_accuracies + + + + +def test_mlp(w1,b1,w2,b2,data_test,labels_test): + # w1, b1, w2 and b2 the weights and biases of the network, + # data_test a matrix of shape (batch_size x d_in), and + # labels_test a vector of size batch_size, + # tests the network on the test set and returns test_accuracy the testing accuracy + + + return test_accuracy + + + + +def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_rate,num_epoch): + # trains an MLP classifier and + # returns the training accuracies across epochs as a list of floats and the + # final testing accuracy as a float + + + + return train_accuracies,test_accuracy + + + + + + + + + + diff --git a/read_cifar.py b/read_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..b3694a251596e334bc735b92c8b1b542ce8eaaf3 --- /dev/null +++ b/read_cifar.py @@ -0,0 +1,49 @@ +import pickle +from typing import Tuple +import os +import numpy as np + + + +def read_cifar_batch(path: str) -> Tuple[np.ndarray, np.ndarray]: + with open(path,"rb") as file: + dict=pickle.load(file,encoding="bytes") + data=np.array(dict[b"data"],dtype=np.float32) + labels=np.array(dict[b"labels"],dtype=np.int64) + + return data, labels + + + +def read_cifar(path): + data = np.array([],dtype=np.float32) + labels = np.array([],dtype=np.int64) + for b in range(1,6): + files = os.path.join(path, 'data_batch_%d' % (b, )) + X, Y = read_cifar_batch(files) + if np.size(data)==0: + data, labels = X,Y + else: + data = np.concatenate((data,X)) + labels = np.concatenate((labels,Y)) + data_test, labels_test = read_cifar_batch(os.path.join(path,'test_batch')) + data = np.concatenate((data,data_test)) + labels = np.concatenate((labels,labels_test)) + + return data, labels + + + +def split_dataset(data,labels,split): + k = np.random.permutation(np.size(data)) + data,labels=data[k],labels[k] + + data_nb=split*np.size(data) + + data_train=data[:int(data_nb)] + labels_train=labels[:int(data_nb)] + data_test=data[int(data_nb):] + labels_test=labels[int(data_nb):] + + return data_train,labels_train,data_test,labels_test +