Add new file

d9474802 · Chauvin Hugo · 600437e4 · d9474802
Commit d9474802 authored 1 year ago by Chauvin Hugo
--- a/mlp.py
+++ b/mlp.py
+import numpy as np
+def sigma(x) :
+    return 1 / (1 + np.exp(-x))
+def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) :
+    N = data.shape[0]
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigma(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigma(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+    # Adjustment of w1, b1, w2, b2 with their gradients
+    # A REVOIR !!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    dCda2 = 2/N * (a2 - targets)
+    dCdz2 = dCda2 * a2 * (1 - a2)
+    dCdw2 = np.matmul(transpose(a2),dCdz2)
+    dCdb2 = dCdz2
+    dCdz1 = np.matmul(dCda2, transpose(w2) * a1 * (1 - a1))
+    dCdw1 = np.matmul(transpose(a2), dCdz1)
+    dCdb1 = dCdw1
+    # Correction of the w1, b1, w2, b2 values
+    w1 += -learning_rate * dCdw1
+    b1 += -learning_rate * dCdb1
+    w2 += -learning_rate * dCdw2
+    b2 += -learning_rate * dCdb2
+    return w1, b1, w2, b2, loss
+def one_hot(labels) :
+    array = np.zeros((len(labels),len(labels)), dtype=np.int)
+    for i in range(len(labels)) :
+        array[i,labels[i]] = 1
+    return array
+def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate) :
+    a0 = data
+    z1 = np.matmul(a0, w1) + b1
+    a1 = sigma(z1)
+    z2 = np.matmul(a1, w2) + b2
+    a2 = sigma(z2)
+    # Creation of the encoded vector and calculation of gradients
+    Y = one_hot(labels_train)
+    dCdz2 = a2 - Y
+    dCdw2 = np.matmul(transpose(a2),dCdz2)
+    dCdb2 = dCdz2
+    dCdz1 = np.matmul(dCda2, transpose(w2) * a1 * (1 - a1))
+    dCdw1 = np.matmul(transpose(a2), dCdz1)
+    dCdb1 = dCdw1
+    # Gradient descent
+    w1 += -learning_rate * dCdw1
+    b1 += -learning_rate * dCdb1
+    w2 += -learning_rate * dCdw2
+    b2 += -learning_rate * dCdb2
+    # Loss calculation
+    loss = np.mean(np.square(a2-labels_train))
+    return w1, b1, w2, b2, loss
+def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) :
+    train_accuracies = np.zeros(num_epoch)
+    for i in range(num_epoch) :
+        # Creation of weights and biases of the network
+        w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
+        # Calculation of the output a2
+        a0 = data_train
+        z1 = np.matmul(a0, w1) + b1
+        a1 = sigma(z1)
+        z2 = np.matmul(a1, w2) + b2
+        a2 = sigma(z2)
+        # Accuracy calculation
+        count = 0
+        for j in range(len(a2)) :
+            if a2[j] == labels_train[j,0] :
+                count += 1/len(a2)
+        train_accuracies[i] = count
+    return w1, w2, b1, b2, train_accuracies
+def test_mlp(w1, b1, w2, b2, data_test, labels_test) :
+    # Calculation of the output a2
+    a0 = data_test
+    z1 = np.matmul(a0, w1) + b1
+    a1 = sigma(z1)
+    z2 = np.matmul(a1, w2) + b2
+    a2 = sigma(z2)
+    # Accuracy calculation
+    test_accuracy = 0
+    for j in range(len(a2)) :
+        if a2[j] == labels_train[j,0] :
+            test_accuracy += 1/len(a2)
+    return test_accuracy
+def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) :
+    # Setting of the neuron numbers of the network
+    d_in = data_train.shape[1]
+    d_out = data_test.shape[0]
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+    # Accuracy calculation of the train and test sets
+    w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
+    w1, b1, w2, b2, test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+    return train_accuracies, test_accuracy
+##
+# COPY OF READCIFAR.PY AS I WAS UNABLE TO IMPORT IT
+import numpy as np
+import os
+import pickle
+import random
+def unpickle(file):
+    import pickle
+    with open(file, 'rb') as f:
+        dict = pickle.load(f, encoding='bytes')
+    return dict
+def read_cifar_batch(batch_path) :
+    with open(batch_path, 'rb') as file:
+        # On unpickle le batch
+        batch = pickle.load(file, encoding='bytes')
+        # Extraction de data et labels
+        data = np.array(batch[b'data'], dtype=np.float32)/255.0
+        labels = np.array(batch[b'labels'], dtype = np.int64)
+    return data, labels
+def read_cifar(batch_dir):
+    data_batches = []
+    label_batches = []
+    # Itération sur les batches
+    for file_name in os.listdir(batch_dir):
+        if file_name.startswith("data_batch") or file_name.startswith("test_batch") :
+            batch_path = os.path.join(batch_dir, file_name)
+            data, labels = read_cifar_batch(batch_path)
+            data_batches.append(data)
+            label_batches.append(labels)
+    # On combine data et labels depuis tous les batches
+    data = np.concatenate(data_batches, axis=0)
+    labels = np.concatenate(label_batches, axis=0)
+    return data, labels
+def split_dataset(data, labels, split):
+    # On vérifie la bonne dimension de data et labels
+    if data.shape[0] != labels.shape[0]:
+        return OSError("data et labels doivent avoir le même nombre de lignes !")
+    # On détermine la taille des data train et test
+    train_size = round(data.shape[0]*split)
+    # On shuffle les data et labels
+    shuffle_index = [i for i in range(data.shape[0])]
+    # On extirpe les data/labels train et test
+    data_train = data[shuffle_index][:train_size]
+    labels_train = np.array([[labels[i]] for i in shuffle_index])[:train_size]
+    data_test = data[shuffle_index][train_size:]
+    labels_test = np.array([[labels[i]] for i in shuffle_index])[train_size:]
+    return data_train, labels_train, data_test, labels_test
+##
+if __name__ == "__main__" :
+    data_folder = 'C:\\Users\\hugol\\Desktop\\Centrale Lyon\\Centrale Lyon 4A\\Informatique\\Machine Learning\\BE1\\cifar-10-batches-py'
+    data, labels = read_cifar(data_folder)
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.9)
+    train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100)
+    plt.figure()
+    plt.plot(range(len(train_accuracies)), train_accuracies)
+    plt.show()