diff --git a/knn.py b/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..6332eddc448d7c139f676123fdab10b02b355f2c --- /dev/null +++ b/knn.py @@ -0,0 +1,58 @@ +import read_cifar +import numpy as np + +def distance_matrix(matrix1, matrix2): + #X_test then X_train in this order + sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) + sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) + + dot_product = np.dot(matrix1, matrix2.T) + + dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) + return dists + +def knn_predict(dists, labels_train, k): + output = [] + for i in range(len(dists)): + res = [0] * 10 + b = np.argsort(dists[i])[:k] + for lab in b: + res[labels_train[lab]] += 1 + label_temp = np.argmax(res) #Attention à la logique ici + output.append(label_temp) + return(np.array(output)) + +def evaluate_knn(data_train, labels_train, data_test, labels_tests, k): + dist = distance_matrix(data_test, data_train) + result_test = knn_predict(dist, labels_train, k) + + #accuracy + N = labels_tests.shape[0] + accuracy = (labels_tests == result_test).sum() / N + return(accuracy) + + + + + + +if __name__ == "__main__": + + data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py') + X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.8) + print(evaluate_knn(X_train[:1000], y_train[:1000], X_test, y_test, 5)) + + + + # print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) + + # y_test = [] + # x_test = np.array([[1,2],[4,6]]) + # x_train = np.array([[2,4],[7,2],[4,6]]) + # y_train = [1,2,1] + # dist = distance_matrix(x_test,x_train) + + + + + \ No newline at end of file diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..36635fb25f0d27ad67a7f01951b80867322ffce5 --- /dev/null +++ b/mlp.py @@ -0,0 +1,254 @@ +import numpy as np +import read_cifar +import matplotlib.pyplot as plt + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): + + N_out = len(targets) #number of training examples + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - targets)) + print(f'loss: {loss}') + # print('shape a1', a1.shape) + # print('shape w1', w1.shape) + # print('shape b1', b1.shape) + + # print('shape a2', a2.shape) + # print('shape w2', w2.shape) + # print('shape b2', b2.shape) + + # Backpropagation + delta_a2 = 2 / N_out * (a2 - targets) + # print('shape delta_a2', delta_a2.shape) + delta_z2 = delta_a2 * (a2 * (1 - a2)) + # print('shape delta_z2', delta_z2.shape) + delta_w2 = np.dot(a1.T, delta_z2) + # print('shape delta_w2', delta_w2.shape) + delta_b2 = delta_z2 + + delta_a1 = np.dot(delta_z2, w2.T) + # print('shape delta_a1', delta_a1.shape) + delta_z1 = delta_a1 * (a1 * (1- a1)) + # print('shape delta_z1', delta_z1.shape) + delta_w1 = np.dot(a0.T, delta_z1) + # print('shape delta_w1', delta_w2.shape) + delta_b1 = delta_z1 + + # Update weights and biases + w2 -= learning_rate * delta_w2 + b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True) + + w1 -= learning_rate * delta_w1 + b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True) + + return w1, b1, w2, b2, loss + +def one_hot(labels): + #num_classes = np.max(labels) + 1 on va le hardcoder ici + num_classes = 10 + one_hot_matrix = np.eye(num_classes)[labels] + return one_hot_matrix + +def softmax_stable(x): + #We use this function to avoid computing to big numbers + return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum()) + +def cross_entropy_loss(y_pred, y_true): + loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred) + return loss + + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): + + N_out = len(labels_train) #number of training examples + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + + # Compute loss (cross-entropy loss) + y_true_one_hot = one_hot(labels_train) + loss = cross_entropy_loss(predictions, y_true_one_hot) + + + # Backpropagation + # delta_a2 = 2 / N_out * (a2 - labels_train) ceci n'est plus nécessaire ici + delta_z2 = (a2 - y_true_one_hot) + delta_w2 = np.dot(a1.T, delta_z2) / N_out # on divise par N_out pour ne pas faire des saut de gradient trop elevés + delta_b2 = delta_z2 / N_out + + + delta_a1 = np.dot(delta_z2, w2.T) + delta_z1 = delta_a1 * (a1 * (1 - a1)) + delta_w1 = np.dot(a0.T, delta_z1) / N_out + delta_b1 = delta_z1 / N_out + + # Update weights and biases + w2 -= learning_rate * delta_w2 + b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True) + + w1 -= learning_rate * delta_w1 + b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True) + + return w1, b1, w2, b2, loss + +def learn_once_cross_entropy_2(w1, w2, data, labels_train, learning_rate): + + N_out = len(labels_train) #number of training examples + + # Forward pass + # Feedforward propagation + z1 = np.dot(data, w1) + a1 = sigmoid(z1) + z2 = np.dot(a1, w2) + a2 = sigmoid(z2) + + + # Compute loss (cross-entropy loss) + y_true_one_hot = one_hot(labels_train) + loss = cross_entropy_loss(a2, y_true_one_hot) + + # Backpropagation + E1 = a2 - np.eye(10)[labels_train] + dw1 = E1 * a2 * (1 - a2) + E2 = np.dot(dw1, w2.T) + dw2 = E2 * a1 * (1 - a1) + + # Update weights + W2_update = np.dot(a1.T, dw1) / N_out + W1_update = np.dot(data.T, dw2) / N_out + w2 = w2 - learning_rate * W2_update + w1 = w1 - learning_rate * W1_update + + return w1, w2, loss + +def forward_2(w1, w2, data): + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) # input of the output layer + a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + return(predictions) + +def forward(w1, b1, w2, b2, data): + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + return(predictions) + +def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch): + train_accuracies = [] + for epoch in range(num_epoch): + w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate) + + # Compute accuracy + predictions = forward(w1, b1, w2, b2, data_train) + predicted_labels = np.argmax(predictions, axis=1) + # print(predictions.shape) + # print(predicted_labels.shape) + # print(labels_train.shape) + accuracy = np.mean(predicted_labels == labels_train) + train_accuracies.append(accuracy) + + print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}') + + return w1, b1, w2, b2, train_accuracies +def train_mlp_2(w1, w2, data_train, labels_train, learning_rate, num_epoch): + train_accuracies = [] + for epoch in range(num_epoch): + w1, w2, loss = learn_once_cross_entropy_2(w1, w2, data_train, labels_train, learning_rate) + # Compute accuracy + predictions = forward_2(w1, w2, data_train) + predicted_labels = np.argmax(predictions, axis=1) + # print(predictions.shape) + # print(predicted_labels.shape) + # print(labels_train.shape) + accuracy = np.mean(predicted_labels == labels_train) + train_accuracies.append(accuracy) + + print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}') + + return w1, w2, train_accuracies + +def test_mlp(w1, b1, w2, b2, data_test, labels_test): + + # Compute accuracy + predictions = forward(w1, b1, w2, b2, data_test) + predicted_labels = np.argmax(predictions, axis=1) + print(predicted_labels) + test_accuracy = np.mean(predicted_labels == labels_test) + print(f'Train Accuracy: {test_accuracy:.2f}') + return test_accuracy + +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch): + + d_in = data_train.shape[1] + d_out = 10 #we can hard code it here or len(np.unique(label_train)) + + #Random initialisation of weights + w1 = np.random.randn(d_in, d_h) + b1 = np.random.randn(1, d_h) + + w2 = np.random.randn(d_h, d_out) + b2 = np.random.randn(1, d_out) + + # Train MLP + w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) + + # Test MLP + test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + return train_accuracies, test_accuracy + + +if __name__ == '__main__': + data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py') + X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9) + + d_in, d_h, d_out = 3072, 728, 10 + w1 = np.random.normal(scale=0.5, size=(d_in, d_h)) + b1 = np.random.randn(1, d_h) + w2 = np.random.normal(scale=0.5, size=(d_h, d_out)) + b2 = np.random.randn(1, d_out) + + # print(forward(w1, b1, w2, b2,X_train[:1])) + # for i in range(100): + # learn_once_cross_entropy(w1, b1, w2, b2, X_train[:1000], y_train[:1000], 0.005) + train_mlp(w1, b1, w2, b2, X_train[:10000], y_train[:10000], 0.1, 100) + # train_mlp_2(w1, w2, X_train[:10000], y_train[:10000], 0.05, 100) + # test_mlp(w1, b1, w2, b2, X_test[:50], y_test[:50]) + + + + # values = [2, 4, 5, 3] + # # Output achieved + # output = softmax_stable(values) + # y_true = [3, 1] # 1 observation + # y_true_one_hot = one_hot(y_true) + # print(y_true_one_hot) + # y_pred = [[0.1, 0.1, 0.1, 0.7],[0.1, 0.1, 0.1, 0.7]] + # loss = cross_entropy_loss(y_pred, y_true_one_hot) + # print(loss) \ No newline at end of file diff --git a/read_cifar.py b/read_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..72f04ee304c076525190a54b8679cf7d2419cbf5 --- /dev/null +++ b/read_cifar.py @@ -0,0 +1,57 @@ +import numpy as np +import pickle +from sklearn.model_selection import train_test_split +import pandas as pd + + + +# batch.meta +#{b'num_cases_per_batch': 10000, b'label_names': [b'airplane', b'automobile', b'bird', b'cat', b'deer', b'dog', b'frog', b'horse', b'ship', b'truck'], b'num_vis': 3072} + +def read_cifar_batch(file): + with open(file, 'rb') as fo: + dict = pickle.load(fo, encoding='bytes') + # keys = [b'batch_label', + # b'labels', + # b'data', + # b'filenames'] + return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') ) + +def read_cifar(path): + data = [] + labels = [] + + #Add the 5 batches + for i in range(1,6): + data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}') + data.append(data_temp) + labels.append(labels_temps) + + #Add the test batches + data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch') + data.append(data_temp) + labels.append(labels_temps) + + #Concatenate all the batches to create a big one + data = np.concatenate(data, axis = 0) + labels = np.concatenate(labels, axis = 0) + + return(data, labels) + +def split_dataset(data, labels, split): + X_train, X_test, y_train, y_test = train_test_split( + data, labels, test_size=(1 - split), random_state=0) + + return(X_train, X_test, y_train, y_test) + + + + +if __name__ == "__main__": + path = 'image-classification/data/cifar-10-batches-py/data_batch_1' + main_path = 'image-classification/data/cifar-10-batches-py' + data, labels = read_cifar_batch(path) + data, labels = read_cifar(main_path) + X_train, X_test, y_train, y_test = split_dataset(data, labels, 0.8) + # print(X_train, X_test, y_train, y_test) + # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)