diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..1269488f7fb1f4b56a8c0e5eb48cecbfadfa9219
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+data
diff --git a/__pycache__/read_cifar.cpython-37.pyc b/__pycache__/read_cifar.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b48dc30065475775d3fe048e26ff3ba78fcc652d
Binary files /dev/null and b/__pycache__/read_cifar.cpython-37.pyc differ
diff --git a/knn.py b/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..90154a7544087d48693e1e8f488a4942c85a0b03
--- /dev/null
+++ b/knn.py
@@ -0,0 +1,63 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from math import *
+import read_cifar
+
+
+
+def distance_matrix(A,B):
+    k1 = np.sum(A**2, axis=1)[:,np.newaxis]
+    k2 = np.sum(B**2, axis=1)
+    k3 = -2*np.dot(A,B.T)
+    dists = np.round(np.sqrt(k1+k2+k3),2)
+    return dists
+    
+    
+
+def knn_predict(dists,labels_train,k):
+    # prediction du label pour le test
+    # l'ensemble de référence est l'ensemble d'apprentissage
+    num_test = dists.shape[0]
+    labels_predict = np.zeros(num_test)
+    for i in range(num_test):
+        L = []
+  # L = list storing the labels of the k nearest neighbors to the ith test point
+        L = labels_train[np.argsort(dists[i])][0:k]
+        labels_predict[i] = np.argmax(np.bincount(L))
+    return labels_predict
+    
+    
+    
+def evaluate_knn(data_train,labels_train,data_test,labels_test,k):
+    # data_train the training data,
+    # labels_train the corresponding labels,
+    # data_test the testing data,
+    # labels_test the corresponding labels, and
+    # k the number of of neighbors.
+    num_val=labels_train.shape[0]
+    # dists = distance_matrix(labels_train,labels_test)
+    dists = distance_matrix(data_test,data_train) 
+    labels_predict = knn.predict(dists,labels_train,k)
+    num_correct = np.sum(labels_predict == labels_train)
+    accuracy = (float(num_correct) / num_val)*100
+    
+    return accuracy
+
+
+
+# For split=0.9, plot the variation of the accuracy as a function of k 
+# (from 1 to 20)
+
+path = "C:/Users/mylaa/Documents/ECL_2022/MOD/MOD_04_6_Apprentissage_profond_Intelligence_Artificielle/TD1/image-classification/data/cifar-10-batches-py"
+data,labels = read_cifar.read_cifar(path)
+split = 0.9
+data_train,labels_train,data_test,labels_test = read_cifar.split_dataset(data,labels,split)
+
+k = np.arange(1, 21, 1)
+acc = evaluate_knn(data_train,labels_train,data_test,labels_test,k)
+plt.plot(k, acc)
+plt.title("variation of the accuracy following the value of k")
+plt.xlabel("Values of k")
+plt.ylabel("Values of the accuracy")
+plt.show()
+
diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..226844d97a3f7ffb95ecc9e39513e629908d90f2
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,254 @@
+import numpy as np
+import read_cifar
+
+N = 30  # number of input data
+d_in = 3  # input dimension
+d_h = 3  # number of neurons in the hidden layer
+d_out = 2  # output dimension (number of neurons of the output layer)
+
+# Random initialization of the network weights and biaises
+w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+b1 = np.zeros((1, d_h))  # first layer biaises
+w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+b2 = np.zeros((1, d_out))  # second layer biaises
+    
+data = np.random.rand(N, d_in)  # create a random data
+targets = np.random.rand(N, d_out)  # create a random targets
+
+data_train = np.random.rand(N,d_in)
+labels_train = np.random.randint(1,d_out,N)
+
+data_test = np.random.rand(N,d_in)
+labels_test = np.random.randint(1,d_out,N)
+
+
+
+split_factor=0.9
+#d_h=64 
+learning_rate=0.1 
+num_epoch=100
+
+path = "C:/Users/mylaa/Documents/ECL_2022/MOD/MOD_04_6_Apprentissage_profond_Intelligence_Artificielle/TD1/image-classification/data/cifar-10-batches-py"
+#data,labels = read_cifar.read_cifar(path)
+#data_train,labels_train,data_test,labels_test = read_cifar.split_dataset(data,labels,split_factor)
+
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+def sigmoid_derived(x):
+    return sigmoid(x)*(1-sigmoid(x))
+
+
+
+def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
+# performs one gradient descent step
+    
+        # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    
+        # calculation of dw1, db1, dw2, db2 
+    dz2 = a2 - targets
+    dw2 = 1 / N *(np.matmul(a1.T,dz2))
+    db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True))
+    dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1)
+    dw1 = 1 / N *(np.matmul(a0.T,dz1))
+    db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True))
+    
+        # gradient descent
+    w1 = w1 - learning_rate * dw1
+    b1 = b1 - learning_rate * db1
+    w2 = w2 - learning_rate * dw2
+    b2 = b2 - learning_rate * db2
+    
+        # Compute loss (MSE)
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output laye
+    
+    loss = np.mean(np.square(predictions - targets))
+
+    return w1,b1,w2,b2,loss
+
+
+
+
+def one_hot(M : np.ndarray):
+# takes a (n)-D array as parameters and returns the corresponding (n+1)-D one-hot matrix    
+    M_one = np.zeros((M.size, M.max()+1))
+    M_one[np.arange(M.size),M] = 1
+    return M_one
+
+
+
+
+def learn_once_cross_entropy_0(w1,b1,w2,b2,data,targets,learning_rate):
+    
+            # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    
+        # calculation of dw1, db1, dw2, db2 
+    dz2 = a2 - targets
+    dw2 = 1 / N *(np.matmul(a1.T,dz2))
+    db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True))
+    dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1)
+    dw1 = 1 / N *(np.matmul(a0.T,dz1))
+    db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True))
+    
+        # gradient descent
+    w1 = w1 - learning_rate * dw1
+    b1 = b1 - learning_rate * db1
+    w2 = w2 - learning_rate * dw2
+    b2 = b2 - learning_rate * db2
+    
+        # Compute loss (cross entropy loss)
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = np.exp(a2)/np.sum(np.exp(a2))   # the predicted values are the outputs of the output layer
+    # must be a softmax
+    
+    loss = -np.mean(np.log(predictions))
+    
+    return w1,b1,w2,b2,loss
+
+
+
+
+def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
+    # labels_train a vector of size batch_size
+    # performs one gradient descent step using a cross entropy loss
+    
+    labels_train_one = one_hot(labels_train)
+    
+                # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    
+        # calculation of dw1, db1, dw2, db2 
+    dz2 = a2 - labels_train_one
+    dw2 = 1 / N *(np.matmul(a1.T,dz2))
+    db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True))
+    dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1)
+    dw1 = 1 / N *(np.matmul(a0.T,dz1))
+    db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True))
+    
+        # gradient descent
+    w1 = w1 - learning_rate * dw1
+    b1 = b1 - learning_rate * db1
+    w2 = w2 - learning_rate * dw2
+    b2 = b2 - learning_rate * db2
+    
+        # Compute loss (cross entropy loss)
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = np.exp(a2)/np.sum(np.exp(a2))   # the predicted values are the outputs of the output layer
+    # must be a softmax
+    
+    loss = -np.mean(np.log(predictions))
+    
+    return w1,b1,w2,b2,loss
+
+
+
+
+def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
+    # performs num_epoch of training steps 
+    # num_epoch the number of training epoch
+    
+    labels_train_one = one_hot(labels_train)
+    
+    i=0
+    train_accuracies = []
+        # Forward pass
+    a0 = data_train # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    
+    while i<num_epoch:         
+        
+            # calculation of dw1, db1, dw2, db2 
+        dz2 = a2 - labels_train_one
+        dw2 = 1 / N *(np.matmul(a1.T,dz2))
+        db2 = 1 / N *(np.sum(dz2,axis = 1,keepdims = True))
+        dz1 = np.matmul(dz2,w2.T) * sigmoid_derived(z1)
+        dw1 = 1 / N *(np.matmul(a0.T,dz1))
+        db1 = 1 / N *(np.sum(dz1,axis = 1,keepdims = True))
+    
+            # gradient descent
+        w1 = w1 - learning_rate * dw1
+        b1 = b1 - learning_rate * db1
+        w2 = w2 - learning_rate * dw2
+        b2 = b2 - learning_rate * db2
+    
+            # Compute loss (cross entropy loss)
+        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+        z2 = np.matmul(a1, w2) + b2  # input of the output layer
+        a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+        predictions = np.exp(a2)/np.sum(np.exp(a2))   # the predicted values are the outputs of the output layer
+        # must be a softmax
+    
+        loss = -np.mean(np.log(predictions))
+        train_accuracies.append(loss)
+        
+        i=i+1
+    
+    return w1,b1,w2,b2,train_accuracies
+
+
+
+
+def test_mlp(w1,b1,w2,b2,data_test,labels_test):    
+    # w1, b1, w2 and b2 the weights and biases of the network,
+    # data_test a matrix of shape (batch_size x d_in), and
+    # labels_test a vector of size batch_size,
+    # tests the network on the test set and returns test_accuracy the testing accuracy
+    
+    
+    return test_accuracy
+
+
+
+
+def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_rate,num_epoch):
+    # trains an MLP classifier and 
+    # returns the training accuracies across epochs as a list of floats and the
+    # final testing accuracy as a float
+    
+    
+    
+    return train_accuracies,test_accuracy 
+
+
+    
+    
+
+
+
+
+
+
diff --git a/read_cifar.py b/read_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3694a251596e334bc735b92c8b1b542ce8eaaf3
--- /dev/null
+++ b/read_cifar.py
@@ -0,0 +1,49 @@
+import pickle
+from typing import Tuple
+import os
+import numpy as np
+
+
+
+def read_cifar_batch(path: str) -> Tuple[np.ndarray, np.ndarray]:
+    with open(path,"rb") as file:
+        dict=pickle.load(file,encoding="bytes")
+    data=np.array(dict[b"data"],dtype=np.float32)
+    labels=np.array(dict[b"labels"],dtype=np.int64)
+    
+    return data, labels
+
+
+
+def read_cifar(path):
+    data = np.array([],dtype=np.float32)
+    labels = np.array([],dtype=np.int64)
+    for b in range(1,6):
+        files = os.path.join(path, 'data_batch_%d' % (b, ))
+        X, Y = read_cifar_batch(files)
+        if np.size(data)==0:
+            data, labels = X,Y
+        else: 
+            data = np.concatenate((data,X))
+            labels = np.concatenate((labels,Y))
+    data_test, labels_test = read_cifar_batch(os.path.join(path,'test_batch'))
+    data = np.concatenate((data,data_test))
+    labels = np.concatenate((labels,labels_test))
+    
+    return data, labels
+
+
+
+def split_dataset(data,labels,split):
+    k = np.random.permutation(np.size(data))
+    data,labels=data[k],labels[k]
+    
+    data_nb=split*np.size(data)
+    
+    data_train=data[:int(data_nb)]
+    labels_train=labels[:int(data_nb)]
+    data_test=data[int(data_nb):]
+    labels_test=labels[int(data_nb):]
+    
+    return data_train,labels_train,data_test,labels_test
+