diff --git a/knn.py b/knn.py index 370e49b9cda0c37ca5d5d4a80d20655b75365311..8f7755b712d4121e39a3df9b0c7ba888b14b2d8d 100644 --- a/knn.py +++ b/knn.py @@ -1,67 +1,76 @@ from read_cifar import * from collections import Counter +import matplotlib.pyplot as plt -def compute_distance(m1, m2): - if m1.shape != m2.shape: - raise ValueError("Dimensions must be identical") - #distance = np.linalg.norm(m1 - m2) - x = (m1 - m2) ** 2 - y = np.sum(x) - dist = np.sqrt(y) - - return dist +# Compute the euclidean distance matrix where the rows are the training data and the columns the testing data +# In the dists[i][j] there is the euclidean distance between the i-data_train image and the j-data_test image def distance_matrix(data_train, data_test): - dists = [] - for test in data_test: - dist = [] - for train in data_train: - dist.append(compute_distance(test, train)) - dists.append(dist) + train_squared = np.sum(data_train ** 2, axis=1, keepdims=True) + test_squared = np.sum(data_test ** 2, axis=1, keepdims=True) + dot_product = np.dot(data_train, data_test.T) + dists = np.sqrt(train_squared - 2 * dot_product + test_squared.T) + #print(dists.shape) return dists def knn_predict(dists, labels_train, k): - predictions=[] + + # we look for the k-images at the minimum distance for each data_test image + # and we assign the class with the highest frequency among the k + # (I personally prefer having the testing data on the rows) + dists=dists.T + predictions = [] + for distances in dists: min_indexes = np.argpartition(distances, k)[:k] possible_pred = labels_train[min_indexes] counted = Counter(possible_pred) pred = counted.most_common(1)[0][0] predictions.append(pred) + return predictions -def evaluate_knn(predictions, labels_test): - sum=0 - for i in range(len(predictions)): - if predictions[i] == labels_test[i]: - sum+=1 +def evaluate_knn(dists, labels_train, labels_test, k): - return sum / len(predictions) + # We apply the knn algorithm and then we compare the predictionswith the labels + predictions = knn_predict(dists, labels_train, k) -'''def evaluate_knn(data_train , labels_train,data_test ,labels_test, k): - - return''' + return np.mean(predictions == labels_test) def main(): - folder_path = 'data/cifar-10-batches-py' - data, labels = read_cifar(folder_path) - print((data.shape)) - print((labels.shape)) + print('#START#') - data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9) - - print("Training set shape:", data_train.shape, labels_train.shape) - print("Testing set shape:", data_test.shape, labels_test.shape) - - dists=distance_matrix(data_train, data_test) + # Set hyperparameters + num_k = 20 - prediction=knn_predict(dists, labels_train, 4) + # Load CIFAR dataset and split the training data and the labels for the two phases(train and test) + folder_path = 'data/cifar-10-batches-py' + data, labels = read_cifar(folder_path) - accuracy = evaluate_knn(prediction, labels_test) + data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9) - print(accuracy) + # Computation of the distance matrix once + dists = distance_matrix(data_train, data_test) + + # Test the knn algorithm at the variation of k + accuracies=[] + for k in range(num_k): + accuracy = evaluate_knn(dists, labels_train, labels_test, k+1) + print('For k = ' + str(k) +' accuracy : '+ str(round(accuracy, 4))) + accuracies.append(accuracy) + + # Plot the accuracy for each k + plt.figure(figsize=(10, 6)) + x = range(1, num_k + 1) + plt.plot(x, accuracies) + plt.xlabel('K') + plt.ylabel('Accuracy') + plt.title('Accuracy evolution') + plt.grid() + plt.savefig('results/knn.png') + plt.show() if __name__ == "__main__": main() diff --git a/mlp.py b/mlp.py index 1505d0891bcd976023dec9ebefc33936311780bd..c2f9b0eaa648e82416a4b628d492b462e812c98d 100644 --- a/mlp.py +++ b/mlp.py @@ -1,172 +1,141 @@ import numpy as np import matplotlib.pyplot as plt -import pylab as pl - -def sigmoid(x): - return 1/(1 + np.exp(-x)) - -def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): - - # Forward pass - a0 = data # the data are the input of the first layer - z1 = np.matmul(a0, w1) + b1 # input of the hidden layer - a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) - z2 = np.matmul(a1, w2) + b2 # input of the output layer - a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) - predictions = a2 # the predicted values are the outputs of the output layer - - # Compute loss (MSE) - loss = np.mean(np.square(predictions - targets)) +from read_cifar import * + +# Function to code a vector to one-hot encoding +def one_hot(y): + one_hot_matrix = np.zeros((y.shape[0], max(y)+1)) + for i in range(y.shape[0]): + one_hot_matrix[i, y[i]] = 1 + return one_hot_matrix + +# Sigmoid activation function +def sigmoid(z): + return 1 / (1 + np.exp(-z)) + +# Function to perform one gradient descent step with Binary cross-entropy loss +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels, learning_rate): + m = data.shape[0] # Batch size + + a0 = data + z1 = np.matmul(a0, w1) + b1 + a1 = sigmoid(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigmoid(z2) + + #Compute the Binary Cross-Entropy loss + loss = -np.sum(labels * np.log(a2) + (1 - labels) * np.log(1 - a2)) / m + + # To update the weights and biases, we need to calculate the gradients of the loss with respect to each parameter + d_a2 = (a2 - labels) / m + d_z2 = d_a2 + d_w2 = np.matmul(a1.T, d_z2) + d_b2 = np.sum(d_z2, axis=0, keepdims=True) + + d_a1 = np.matmul(d_z2, w2.T) + d_z1 = d_a1 * a1 * (1 - a1) + d_w1 = np.matmul(a0.T, d_z1) + d_b1 = np.sum(d_z1, axis=0, keepdims=True) + + # Update weights and biases, these serve to reduce the loss during training. As is demonstrated in the plot 'loss.png' + w1 -= learning_rate * d_w1 + b1 -= learning_rate * d_b1 + w2 -= learning_rate * d_w2 + b2 -= learning_rate * d_b2 return w1, b1, w2, b2, loss - -def one_hot(vet): - encoded = np.zeros((len(vet), max(vet) + 1), dtype=int) - encoded[np.arange(len(vet)), vet]=1 - return encoded - - -def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): - # Forward Pass - Z1 = np.dot(data, w1) + b1 - A1 = sigmoid(Z1) - Z2 = np.dot(A1, w2) + b2 - A2 = sigmoid(Z2) - - # Calculate loss (Binary Cross Entropy) - m = labels_train.shape[0] - epsilon = 1e-15 # small constant to avoid log(0) - loss = (-1.0 / m) * np.sum(labels_train * np.log(A2 + epsilon) + (1 - labels_train) * np.log(1 - A2 + epsilon)) - - # Backward Pass - dZ2 = A2 - labels_train - dW2 = (1 / data.shape[0]) * np.dot(A1.T, dZ2) - db2 = (1 / data.shape[0]) * np.sum(dZ2, axis=0) - dZ1 = np.dot(dZ2, w2.T) * A1 * (1 - A1) - dW1 = (1 / data.shape[0]) * np.dot(data.T, dZ1) - db1 = (1 / data.shape[0]) * np.sum(dZ1, axis=0) - - # Update weights and biases - w1 -= learning_rate * dW1 - b1 -= learning_rate * db1 - w2 -= learning_rate * dW2 - b2 -= learning_rate * db2 - - return w1, b1, w2, b2, loss - - -def accuracy(Y, Y_pred): - m = Y.shape[0] - correct_predictions = np.sum(Y == Y_pred) - return correct_predictions / m - - +# Function to train an MLP for a specified number of epochs def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs): - train_accuracies = [] + train_accuracies = [] + losses=[] for epoch in range(num_epochs): - for i in range(data_train.shape[0]): - x = data_train[i:i+1] - y = labels_train[i:i+1] - w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, x, y, learning_rate) - - # Calculate accuracy for the epoch - Z1 = np.dot(data_train, w1) + b1 - A1 = sigmoid(Z1) - Z2 = np.dot(A1, w2) + b2 - A2 = sigmoid(Z2) - train_pred = (A2 > 0.5).astype(int) - acc = accuracy(labels_train, train_pred) - train_accuracies.append(acc) - - return w1, b1, w2, b2, train_accuracies + print('EPOCH ' + str(epoch + 1)) + labels_coded = one_hot(labels_train) + w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_coded, learning_rate) -def test_mlp(w1, b1, w2, b2, data_test, labels_test): - Z1 = np.dot(data_test, w1) + b1 - A1 = sigmoid(Z1) - Z2 = np.dot(A1, w2) + b2 - A2 = sigmoid(Z2) - test_pred = (A2 > 0.5).astype(int) - test_acc = accuracy(labels_test, test_pred) - return test_acc + # Calculate training accuracy for this epoch + a0 = data_train + z1 = np.matmul(a0, w1) + b1 + a1 = sigmoid(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigmoid(z2) + predictions = np.argmax(a2, axis=1) -def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs): - d_in = data_train.shape[1] - w1 = np.random.randn(d_in, d_h) - b1 = np.zeros((1, d_h)) - w2 = np.random.randn(d_h, 1) - b2 = np.zeros((1, 1)) + accuracy = np.mean(predictions == labels_train) + train_accuracies.append(accuracy) - w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs) - test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + print('Loss : '+ str(round(loss, 4)) + '\n') - return train_accuracies, test_accuracy + return w1, b1, w2, b2, train_accuracies, losses +# Function to test the MLP on a test set +def test_mlp(w1, b1, w2, b2, data_test, labels_test): + a0 = data_test + z1 = np.matmul(a0, w1) + b1 + a1 = sigmoid(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigmoid(z2) + predictions = np.argmax(a2, axis=1) + test_accuracy = np.mean(predictions == labels_test) -def main_MSE(): - N = 30 # number of input data - d_in = 3 # input dimension - d_h = 3 # number of neurons in the hidden layer - d_out = 2 # output dimension (number of neurons of the output layer) - learning_rate = 0.1 + return test_accuracy +# Function to run the entire MLP training and testing process +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs): - # Random initialization of the network weights and biaises - w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights - b1 = np.zeros((1, d_h)) # first layer biaises - w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights - b2 = np.zeros((1, d_out)) # second layer biaises + d_in = data_train.shape[1] + num_classes = len(np.unique(labels_train)) - data = np.random.rand(N, d_in) # create a random data - targets = np.random.rand(N, d_out) # create a random targets + # Initialize the first and second layer weights and biases + w1 = 2 * np.random.rand(d_in, d_h) - 1 + b1 = np.zeros((1, d_h)) + w2 = 2 * np.random.rand(d_h, num_classes) - 1 + b2 = np.zeros((1, num_classes)) - w1, b1, w2, b2, loss = learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) + # Train the MLP on the training data + w1, b1, w2, b2, train_accuracies, losses = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs) - print('Loss (MSE) : ' + str(loss)) + # Test the MLP on the testing data + test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + return train_accuracies, test_accuracy, losses +def main(): + print('#START#') -def main_CrossEntropy(): + # Set hyperparameters split_factor = 0.9 d_h = 64 learning_rate = 0.1 num_epochs = 100 - # Define your data, labels, and parameters here - - # Generate some sample data for demonstration - # Replace this with your actual data - data_train = np.random.rand(100, 10) - labels_train = np.random.randint(2, size=100) - data_test = np.random.rand(20, 10) - labels_test = np.random.randint(2, size=20) - - # Call run_mlp_training with your data and parameters - train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, - learning_rate, num_epochs) - - # Create a plot of training accuracies across epochs - plt.figure(figsize=(10, 6)) - x=range(1, num_epochs +1) - plt.plot(x, train_accuracies) - plt.xlabel('Epochs') - plt.ylabel('Accuracy') - plt.title('Training Accuracy Evolution') - pl.grid() - - plt.savefig('mlp.png') + # Load CIFAR dataset and split the training data and the labels for the two phases(train and test) + folder_path = 'data/cifar-10-batches-py' + data, labels = read_cifar(folder_path) + data_train, data_test, labels_train, labels_test = split_dataset(data, labels, split_factor) + + # Run MLP training and testing + train_accuracies, test_accuracy, losses= run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, + num_epochs) + # Test accuracy after 'num_epochs' epochs of training + print('FINAL ACCURACY : ' + str(round(test_accuracy, 4)) + '\n') + + # Plot the training accuracy for each epoch + x = range(1, num_epochs + 1) + plt.plot(x, losses) + plt.xlabel('Epoch') + plt.ylabel('Training Loss') + plt.title('Training Loss vs. Epoch') + plt.grid() + plt.savefig('results/loss.png') plt.show() - if __name__ == "__main__": - main_MSE() - main_CrossEntropy() - - - + main() diff --git a/read_cifar.py b/read_cifar.py index fb1251ff048017ef0bc6d975e72c0706165cf858..1e8d727b9cad306b8a15e59b29cbe48598027e79 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -4,26 +4,36 @@ import os from sklearn.model_selection import train_test_split def read_cifar_batch(batch): - with open(batch, 'rb') as fo: - dict = pickle.load(fo, encoding='bytes') - data = dict[b'data'] - labels = dict[b'labels'] - print(dict[b'batch_label']) - return data, labels + + with open(batch, 'rb') as file: + + dict = pickle.load(file, encoding='bytes') + batch_data = dict[b'data'] + batch_labels = dict[b'labels'] + + return batch_data, batch_labels def read_cifar(path): + batches_list = os.listdir(path) data, labels = [], [] + for batch in batches_list: if(batch == 'batches.meta' or batch == 'readme.html'): continue data_batch, labels_batch = read_cifar_batch(path + '/' + batch) data.append(data_batch) labels.append(labels_batch) - return np.array(data, dtype=np.float32).reshape((60000, 3072)), np.array(labels, dtype=np.int64).reshape(-1) -def split_dataset(data, labels, split): - data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=1-split, shuffle=True) + data= np.array(data, dtype=np.float32).reshape((60000, 3072)) + labels=np.array(labels, dtype=np.int64).reshape(-1) + + return data, labels + +def split_dataset(data, labels, split_factor): + + data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=1-split_factor, shuffle=True) + return data_train, data_test, labels_train, labels_test diff --git a/results/knn.png b/results/knn.png new file mode 100644 index 0000000000000000000000000000000000000000..3c22b972abd98317d1d75ca5111d7a1405d3f9e1 Binary files /dev/null and b/results/knn.png differ diff --git a/results/loss.png b/results/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..81e6421abf7e8d4eb5c400fd933cc0416ac4f2f8 Binary files /dev/null and b/results/loss.png differ diff --git a/results/mlp.png b/results/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..e2e197c23034254cea0468048191f99bb0594821 Binary files /dev/null and b/results/mlp.png differ