Update mlp.py

12fab373 · Saidi Aya · c4364fff · 12fab373
Commit 12fab373 authored 2 years ago by Saidi Aya
--- a/mlp.py
+++ b/mlp.py
@@ -18,4 +18,130 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
    A1=segmoid(np.matmul(A0, w1) + b1)
    A2=segmoid(np.matmul(A1,w2) + b2)
    #Let calculate the partial derivates
+    #2
+    D_A2=2*(A2-tragets)
+    D_A2_T=np.matmul(A2,(1-A2).T)
+    D_Z2=np.matmul(D_A2_T,D_A2)
+    D_W2=np.matmul(A1.T,D_Z2)
+    D_B2=D_Z2
+    #1
+    D_A1=np.matmul(D_Z2,w2.T)
+    D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
+    D_B1=D_Z1
+    D_W1=np.matmul(A0.T,D_Z1)
+    #The backpropagation of the gradient
+    w1=w1-learning_rate*D_W1
+    w2=w2-learning_rate*D_W2
+    b1=b1-learning_rate*D_B1
+    b2=b2-learning_rate*D_B2
+    # Forward pass
+    G1 = np.matmul(A0, w1) + b1
+    C1 = segmoid(G1)
+    G2 = np.matmul(C1, w2) + b2
+    C2 = segmoid(G2)
+    predictions = C2

+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+
+    return(w1,b1,w2,b2,loss)
+
+def one_hot(D_array):
+    #This function transforms an array to the one-hot encoding
+    n=D_array.shape[0]
+    o_h_matrix = np.zeros((D_array.shape[0],int(np.max(D_array)+1)))
+    for i in range(0,n):
+        o_h_matrix[i,int(D_array[i])]=1
+    return o_h_matrix
+
+def softmax(x):
+    #the softmax activation function
+    exp_x=np.exp(x)
+    func=exp_x/exp_x.sum(axis=1, keepdims=True)
+    return func
+
+def learn_once_cross_entropy(w1,b1,w2,b2,data,targets,learning_rate):
+    # This function performs one gradient descent step using a binary cross-entropy loss
+    A0=data
+    Targets=one_hot(targets)
+    A1=segmoid(np.matmul(A0, w1) + b1)
+    A2=softmax(np.matmul(A1,w2) + b2)
+    #Let calculate the partial derivates
+    #2
+    D_Z2=np.matmul(A2-Targets)
+    D_W2=np.matmul(A1.T,D_Z2)
+    D_B2=D_Z2
+    #1
+    D_A1=np.matmul(D_Z2,w2.T)
+    D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
+    D_B1=D_Z1
+    D_W1=np.matmul(A0.T,D_Z1)
+    #The backpropagation of the gradient
+    w1=w1-learning_rate*D_W1
+    w2=w2-learning_rate*D_W2
+    b1=b1-learning_rate*D_B1
+    b2=b2-learning_rate*D_B2
+    # Forward pass
+    G1 = np.matmul(A0, w1) + b1
+    C1 = segmoid(G1)
+    G2 = np.matmul(C1, w2) + b2
+    C2 = softmax(G2)
+    #Cross entropy loss
+    loss = -np.sum(np.multiply(Targets,np.log(C2)))/float(C2.shape[0])
+    return (w1,b1,w2,b2,loss)
+
+def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
+    #This function returns the different accuracies of the program depending on the number of epoches chosen
+    train_accuracies=[]
+    for i in range(0,num_epoch):
+        (w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
+        # forward pass in order to determine the accuracy
+        A0=data_train
+        G1 = np.matmul(A0, w1) + b1
+        C1 = segmoid(G1)
+        G2 = np.matmul(C1, w2) + b2
+        C2 = softmax(G2)
+        predictions = np.argmax(C2,axis=1)
+        acc=np.sum(predictions == labels_train)/predictions.shape[0])*100
+        train_accuracies.append(acc)
+     return (w1,w2,b1,b2,train_accuracies)
+
+def test_mlp(w1,b1,w2,b2,data_test,labels_test):
+    # This function tests the previous function on the data_test.
+    # First: predict the classes
+    A0=data_test
+    G1 = np.matmul(A0, w1) + b1
+    C1 = segmoid(G1)
+    G2 = np.matmul(C1, w2) + b2
+    C2 = softmax(G2)
+    # the predicted classes
+    predictions = np.argmax(C2,axis=1)
+    # The accuracy of the predictions
+    test_accuracy = (np.sum(predictions == labels_test)/predictions.shape[0])*100
+    return test_accuracy
+
+def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
+    # This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
+    d_in = data_train.shape[1]
+    d_out = 10
+    w1 = 2 * np.random.rand(d_in, d_h) - 1
+    b1 = np.zeros((1, d_h))
+    w2 = 2 * np.random.rand(d_h, d_out) - 1
+    b2 = np.zeros((1, d_out))
+    # training
+    (w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
+    # Testing
+    final_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
+    return train_accuracies, final_accuracy
+
+def evolution_learning_acc(split_factor,d_h,num_epoch):
+    # This function plots the evolution of the learning accuracy as a function of the number of epoches
+    path_batches=str(dirname)+"\\data\\cifar-10-batches-py"+str(num_batch)
+    (data,labels)=read_cifar(dir_batches)
+    (data_train,data_test,labels_train,labels_test)=split_dataset(data,labels,split_factor)
+    accuracy=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)[4]
+    epochs =[i+1 for i in range(0,num_epoch,1)]
+
+    plt.plot(epochs,accuracy)
+    plt.show()
+    plt.savefig("results\\mlp.png")