final_results

c9406460 · toto · e983f6c4 · c9406460 · c9406460 · c9406460
Commit c9406460 authored 1 year ago by toto
--- a/.gitignore
+++ b/.gitignore
+/data
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
--- a/knn.py
+++ b/knn.py
+import numpy as np
+import matplotlib.pyplot as plt
+def distance_matrix(train, test):
+    print('Computing distance matrix between train and test sets') 
+    dists = np.sqrt(-2 * np.matmul(train, test.T) + 
+                    np.sum(train*train, axis=1, keepdims=True) + 
+                    np.sum(test*test, axis=1, keepdims=True).T)
+    print('finished calculating dists')
+    return dists
+def mode(x):
+    vals, counts = np.unique(x, return_counts=True)
+    return vals[np.argmax(counts)]
+def knn_predict(dists, labels_train, k):
+    # dists has shape [num_train, num_test]
+    indexes_of_knn = np.argsort(dists, axis=0)[0:k, :]
+    nearest_labels_pred = labels_train[indexes_of_knn]
+    labels_pred = np.array([ mode(label) for label in nearest_labels_pred.T ])
+    return labels_pred
+def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
+    print(f"Evaluating the k-NN with k = {k}")
+    dists = distance_matrix(data_train, data_test)
+    labels_pred = knn_predict(dists, labels_train, k)
+    accuracy = np.sum(labels_pred == labels_test) / len(labels_test)
+    return accuracy
+def evaluate_knn_for_k(data_train, labels_train, data_test, labels_test, k_max):
+    print(f"Evaluating the k-NN for k in range [1, {k_max}]")
+    accuracies = [0] * k_max
+    dists = distance_matrix(data_train, data_test)
+    for k in range(1, k_max + 1):
+        labels_pred = knn_predict(dists, labels_train, k)
+        accuracy = np.sum(labels_pred == labels_test) / len(labels_test)
+        accuracies[k - 1] = accuracy
+    return accuracies
+def plot_accuracy_versus_k(accuracies):
+    k = len(accuracies)
+    fig = plt.figure(figsize=(12, 8))
+    plt.plot(np.arange(1, k+1, 1), accuracies)
+    plt.title("Variation of the accuracy as a function of k")
+    plt.xlabel("k (number of neighbors)")
+    plt.ylabel("Accuracy")
+    # ax = fig.gca()
+    # ax.set_xticks(np.arange(1, k+1, 1))
+    plt.grid(axis='both', which='both')
+    plt.savefig('./results/knn.png')
--- a/main.py
+++ b/main.py
+from read_cifar import read_cifar, split_dataset
+from knn import evaluate_knn_for_k, plot_accuracy_versus_k
+import matplotlib.pyplot as plt
+from mlp import run_mlp_training, plot_accuracy_versus_epoch
+if __name__=="__main__":
+    # data, labels = read_cifar("data\cifar-10-batches-py")
+    #split = 0.9
+    #data_train, labels_train, data_test, labels_test = split_dataset(data,labels,split)
+    # data_train, data_test = data_train/255.0, data_test/255.0
+    # kmax = 20
+    #accuracies = evaluate_knn_for_k(data_train, labels_train, data_test, labels_test,kmax)
+    # accuracies = [0.351,
+    #             0.31316666666666665,
+    #             0.329,
+    #             0.33666666666666667,
+    #             0.33616666666666667,
+    #             0.3413333333333333,
+    #             0.343,
+    #             0.3428333333333333,
+    #             0.341,
+    #             0.3335,
+    #             0.3325,
+    #             0.3328333333333333,
+    #             0.33016666666666666,
+    #             0.3295,
+    #             0.32766666666666666,
+    #             0.3285,
+    #             0.327,
+    #             0.32716666666666666,
+    #             0.32916666666666666,
+    #             0.3305]
+    #plot_accuracy_versus_k(accuracies)
+    ####################################
+    # parameters of the MLP :
+    split_factor = 0.9
+    data, labels = read_cifar("data\cifar-10-batches-py")
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=split_factor)
+    data_train, data_test = data_train/255.0, data_test/255.0 # normalize ou data
+    d_h = 64
+    lr = 0.1
+    num_epoch=100
+    accuracies, losses = run_mlp_training(data_train, labels_train, data_test,
+                                                       labels_test, d_h, lr, num_epoch)
+    # accuracies = [0.08788888888888889, 0.08990740740740741, 0.09135185185185185, 0.09296296296296297, 0.09514814814814815, 0.09631481481481481, 0.09724074074074074, 0.09787037037037037, 0.09820370370370371, 0.09883333333333333, 0.09844444444444445, 0.09859259259259259, 0.09857407407407408, 0.09885185185185186, 0.09872222222222223, 0.09855555555555555, 0.09872222222222223, 0.09883333333333333, 0.0989074074074074, 0.09881481481481481, 0.0987962962962963, 0.09898148148148148, 0.09916666666666667, 0.09938888888888889, 0.09961111111111111, 0.09975925925925926, 0.09975925925925926, 0.1, 0.10003703703703704, 0.09998148148148148, 0.10007407407407408, 0.10011111111111111, 0.10001851851851852, 0.10014814814814815, 0.10012962962962962, 0.09998148148148148, 0.1000925925925926, 0.1000925925925926, 0.10007407407407408, 0.10005555555555555, 0.10014814814814815, 0.10018518518518518, 0.1002037037037037, 0.10018518518518518, 0.10016666666666667, 0.10011111111111111, 0.10016666666666667, 0.10012962962962962, 0.10007407407407408, 0.10005555555555555, 0.1, 0.1, 0.1, 0.1, 0.1, 0.09998148148148148, 0.09998148148148148, 0.09996296296296296, 0.09996296296296296, 0.09996296296296296, 0.09994444444444445, 0.09994444444444445, 0.09994444444444445, 0.0999074074074074, 0.09994444444444445, 0.09996296296296296, 0.09996296296296296, 0.09996296296296296, 0.09998148148148148, 0.09996296296296296, 0.09998148148148148, 0.1, 0.1, 0.10003703703703704, 0.10003703703703704, 0.10005555555555555, 0.10007407407407408, 0.10007407407407408, 0.10007407407407408, 0.10003703703703704, 0.10001851851851852, 0.10003703703703704, 0.10003703703703704, 0.10003703703703704, 0.10001851851851852, 0.10001851851851852, 0.10003703703703704, 0.10003703703703704, 0.10005555555555555, 0.10007407407407408, 0.10007407407407408, 0.10007407407407408, 0.10007407407407408, 0.10005555555555555, 0.10005555555555555, 0.10005555555555555, 0.10007407407407408, 0.10007407407407408, 0.10007407407407408, 0.10007407407407408]
+    # print(accuracies)
+    plot_accuracy_versus_epoch(accuracies)
+# Result for k = 1
+# Reading data from disk
+# [INFO] Splitting data into train/test with split=70
+# [INFO] Training set has 42000 samples and testing set has 18000 samples.
+# [INFO] Time taken 0
+# Evaluating the k-NN with k = 1
+# Computing distance matrix between train and test sets
+# finished calculating dists
+# Running the prediction using k-NN with k = 1
+# [INFO] computing accuracy of the predictions
+# accuracy = 0.3388888888888889
+# Reading data from disk
+# [INFO] Splitting data into train/test with split=70
+# [INFO] Training set has 42000 samples and testing set has 18000 samples.
+# [INFO] Time taken 0
+# Evaluating the k-NN with k = 3
+# Computing distance matrix between train and test sets
+# finished calculating dists
+# Running the prediction using k-NN with k = 3
+# [INFO] computing accuracy of the predictions
+# 0.3308333333333333
--- a/mlp.py
+++ b/mlp.py
+import numpy as np
+import matplotlib.pyplot as plt
+import time
+def learn_once_mse(w1, b1, w2, b2, data, targets, lr):
+    # Forward pass
+    a0 = data  # Input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # Input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # Output of the hidden layer (sigmoid activation)
+    z2 = np.matmul(a1, w2) + b2  # Input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # Output of the output layer (sigmoid activation)
+    predictions = a2  # Predicted values are the outputs of the output layer
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+    # Compute gradients
+    delta2 = predictions - targets
+    delta1 = np.dot(delta2, w2.T) * a1 * (1 - a1)  # Gradient for the hidden layer
+    # Update weights and biases using gradients
+    w2 -= lr * np.dot(a1.T, delta2) / len(data)
+    b2 -= lr * np.sum(delta2, axis=0) / len(data)
+    w1 -= lr * np.dot(a0.T, delta1) / len(data)
+    b1 -= lr * np.sum(delta1, axis=0) / len(data)
+    return w1, b1, w2, b2, loss
+def one_hot(x):
+    n_classes = 10
+    return np.eye(n_classes)[x]
+def softmax(x):
+    e_x = np.exp(x - np.max(x))
+    return e_x / e_x.sum()
+def learn_once_cross_entropy(w1, b1, w2, b2, data, targets, learning_rate):
+    N = data.shape[0]
+    # Forward pass
+    a0 = data                       # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1     # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))      # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2     # input of the output layer
+    a2 = softmax(z2)                # output of the output layer (softmax activation function)
+    predictions = a2                # the predicted values are the outputs of the output layer
+    # One-hot encode the targets
+    oh_targets = one_hot(targets)
+    # Compute the Cross-Entropy loss
+    loss = - np.sum(oh_targets * np.log(predictions + 1e-9)) / N
+    # Backward pass
+    dz2 = predictions - oh_targets
+    dw2 = np.dot(a1.T, dz2) / N
+    db2 = np.sum(dz2, axis=0, keepdims=True) / N  
+    da1 = np.dot(dz2, w2.T)
+    dz1 = da1 * a1 * (1 - a1)
+    dw1 = np.dot(a0.T, dz1) / N
+    db1 = np.sum(dz1, axis=0, keepdims=True) / N
+    # One step of gradient descent
+    w1 -= learning_rate * dw1
+    w2 -= learning_rate * dw2
+    b1 -= learning_rate * db1
+    b2 -= learning_rate * db2
+    return w1, b1, w2, b2, loss
+def predict_mlp(w1, b1, w2, b2, data):
+    # Forward pass
+    a0 = data                    # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))   # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = softmax(z2)             # output of the output layer (softmax activation function)
+    predictions = np.argmax(a2, axis=1)
+    return predictions
+def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
+    # perform num_epoch of training steps
+    losses = []
+    train_accuracies = [0] * num_epoch
+    for epoch in range(num_epoch):
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
+        losses.append(loss)
+        labels_pred = predict_mlp(w1, b1, w2, b2, data_train)
+        accuracy = np.mean(labels_pred == labels_train)
+        train_accuracies[epoch] = accuracy
+        print(f"Epoch loss [{epoch+1}/{num_epoch}] : {loss} --- accuracy : {accuracy}")
+        # Update weights and biases for the next iteration
+        # Pass the updated parameters to the next iteration
+    return w1, b1, w2, b2, train_accuracies
+def test_mlp(w1, b1, w2, b2, data_test, labels_test):
+    #testing the network on the test set 
+    labels_pred = predict_mlp(w1, b1, w2, b2, data_test)
+    test_accuracy = np.mean(labels_pred == labels_test)
+    return test_accuracy
+def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lr, num_epoch):
+    """Train an MLP with given parameters."""
+    print("Starting Training...")
+    tic = time.time()
+    d_in = data_train.shape[1]
+    d_out = len(set(labels_train))
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))                 # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
+    b2 = np.zeros((1, d_out))               # second layer biaises
+    w1, b1, w2, b2, accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, lr, num_epoch)
+    toc = time.time()
+    print("Finished Training.")
+    print('Time taken for training: ', toc-tic)
+    print("Starting Testing...")
+    tic = time.time()
+    accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+    toc = time.time()
+    print("Finished Testing.")
+    print('Time taken for Testing: ', toc-tic)
+    return accuracies, accuracy
+def plot_accuracy_versus_epoch(accuracies):
+    plt.figure(figsize=(18, 10))
+    plt.plot(accuracies, 'o-b')
+    plt.title("Variation of the accuracy over the epochs")
+    plt.xlabel("Epochs")
+    plt.ylabel("Accuracy")
+    plt.grid(axis='both', which='both')
+    plt.savefig('./results/mlp.png')
--- a/read_cifar.py
+++ b/read_cifar.py
+import numpy as np
+import os
+def unpickle(file):
+    import pickle
+    with open(file, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    return dict
+def read_cifar_batch(file):
+    dict = unpickle(file)
+    data = dict[b'data'].astype(np.float32)
+    labels = np.array(dict[b'labels'], dtype=np.int64)
+    labels = labels.reshape(labels.shape[0])
+    return data, labels
+def read_cifar(path):
+    print('Reading data from disk')
+    data_batches = ["data_batch_" + str(i) for i in range(1, 6)] + ['test_batch']
+    flag = True
+    for db in data_batches:
+        data, labels = read_cifar_batch(os.path.join(path, db))
+        if flag:
+                DATA = data
+                LABELS = labels
+                flag = False
+        else:
+            DATA = np.concatenate((DATA, data), axis=0, dtype=np.float32)
+            LABELS = np.concatenate((LABELS, labels), axis=-1, dtype=np.int64)
+    return DATA, LABELS
+def split_dataset(data, labels, split=0.6):
+    print(f"Splitting data into train/test with split={split}")
+    n = data.shape[0]
+    indices = np.random.permutation(n)
+    train_idx, test_idx = indices[:int(split*n)], indices[int(split*n):]
+    data_train, data_test = data[train_idx,:].astype(np.float32), data[test_idx,:].astype(np.float32)
+    labels_train, labels_test = labels[train_idx].astype(np.int64), labels[test_idx].astype(np.int64)
+    return data_train, labels_train, data_test, labels_test 
\ No newline at end of file
--- a/results/knn.png
+++ b/results/knn.png
--- a/results/mlp.png
+++ b/results/mlp.png