Skip to content
Snippets Groups Projects
Commit 02e604a2 authored by BaptisteBrd's avatar BaptisteBrd
Browse files

mlp final

parent abf8537c
Branches
No related tags found
No related merge requests found
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as sp
from tqdm import tqdm
import read_cifar
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
"""
Perform one iteration of training using Mean Squared Error (MSE) loss.
"""
# Forward propagation
a0 = data # Input layer receives the data
z1 = np.matmul(a0, w1) + b1 # Compute hidden layer input
a1 = 1 / (1 + np.exp(-z1)) # Apply sigmoid activation in hidden layer
z2 = np.matmul(a1, w2) + b2 # Compute output layer input
a2 = 1 / (1 + np.exp(-z2)) # Apply sigmoid activation in output layer
predictions = a2 # Final predictions from the network
# Calculate MSE loss
loss = np.mean(np.square(predictions - targets))
# Backward propagation to compute gradients
grad_a2 = 2 * (predictions - targets)
grad_z2 = grad_a2 * a2 * (1 - a2)
grad_w2 = np.matmul(a1.T, grad_z2)
grad_b2 = np.sum(grad_z2, axis=0)
grad_a1 = np.matmul(grad_z2, w2.T)
grad_z1 = grad_a1 * a1 * (1 - a1)
grad_w1 = np.matmul(a0.T, grad_z1)
grad_b1 = np.sum(grad_z1, axis=0)
# Update the network parameters
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
b1 -= learning_rate * grad_b1
b2 -= learning_rate * grad_b2
return w1, b1, w2, b2, loss
def one_hot(labels):
"""
Convert labels to one-hot encoded format.
"""
if isinstance(labels, np.int64):
labels = np.array([labels])
one_hot_matrix = np.zeros((len(labels), 10))
one_hot_matrix[np.arange(len(labels)), labels] = 1
return one_hot_matrix
def learn_once_cross_entropy(w1, b1, w2, b2, data, targets, learning_rate, batch_size):
"""
Perform one iteration of training using Cross-Entropy loss.
"""
# Forward propagation
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = sp.softmax(z2, axis=1)
predictions = a2
# Convert targets to one-hot format and calculate accuracy
pred_labels = a2.argmax(axis=1)
correct_predictions = np.sum(pred_labels == targets)
targets_one_hot = one_hot(targets)
# Compute Cross-Entropy loss
loss = -np.sum(targets_one_hot * np.log(predictions + 1e-8)) / batch_size
grad_z2 = (predictions - targets_one_hot) / batch_size
grad_w2 = np.matmul(a1.T, grad_z2)
grad_b2 = np.sum(grad_z2, axis=0)
grad_a1 = np.matmul(grad_z2, w2.T)
grad_z1 = grad_a1 * a1 * (1 - a1)
a0 = a0.reshape(-1, batch_size)
grad_w1 = np.matmul(a0, grad_z1)
grad_b1 = np.sum(grad_z1, axis=0)
# Update weights and biases
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
b1 -= learning_rate * grad_b1
b2 -= learning_rate * grad_b2
accuracy = correct_predictions / len(pred_labels)
return w1, b1, w2, b2, accuracy
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate=0.01, nb_epochs=100, batch_size=1):
training_accuracies = []
for epoch in range(nb_epochs):
batch_accuracies = []
batch_count = len(data_train) // batch_size
for i in tqdm(range(batch_count)):
batch_start, batch_end = i * batch_size, (i + 1) * batch_size
w1, b1, w2, b2, acc = learn_once_cross_entropy(w1, b1, w2, b2, data_train[batch_start:batch_end], labels_train[batch_start:batch_end], learning_rate, batch_size)
batch_accuracies.append(acc)
# Handling remaining data if total data is not a multiple of batch size
if len(data_train) % batch_size != 0:
remaining = len(data_train) - batch_count * batch_size
w1, b1, w2, b2, acc = learn_once_cross_entropy(w1, b1, w2, b2, data_train[-remaining:], labels_train[-remaining:], learning_rate, remaining)
batch_accuracies.append(acc)
epoch_accuracy = sum(batch_accuracies) / len(batch_accuracies)
print(f"Epoch {epoch + 1} Accuracy: {epoch_accuracy:.4f}")
training_accuracies.append(epoch_accuracy)
return w1, b1, w2, b2, training_accuracies
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
# Forward pass
a0 = data_test
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = sp.softmax(z2)
# Compute accuracy
correct_count = np.sum(a2.argmax(axis=1) == labels_test)
return correct_count / len(labels_test)
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate=0.1, nb_epochs=100, batch_size=200):
# Initialize network parameters
w1 = np.random.uniform(-1, 1, (3072, d_h))
b1 = np.zeros((1, d_h))
w2 = np.random.uniform(-1, 1, (d_h, 10))
b2 = np.zeros((1, 10))
# Training phase
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, nb_epochs, batch_size)
# Testing phase
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
print(f"Test Accuracy: {test_accuracy:.4f}")
return train_accuracies, test_accuracy
if __name__ == "__main__":
# Load and preprocess data
data, labels = read_cifar.read_cifar('data/cifar-10-batches-py')
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
# Execute training and testing
train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, 64, 0.1, 100, 100)
plt.plot(train_accuracies, label="Training Accuracy")
plt.legend()
plt.show()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment