Skip to content
Snippets Groups Projects
Commit 157f7118 authored by selalimi's avatar selalimi
Browse files

Final Update

parent d227b04d
No related branches found
No related tags found
No related merge requests found
......@@ -90,7 +90,7 @@ Unfortunately, the performance of the KNN algorithm was disappointing, with accu
4. *Lack of Feature Abstraction*: KNN directly uses pixels as features. More advanced feature extraction techniques could improve performance
## Analysis of ANN Results
The deep learning algorithm (ANN) used for our dataset has relatively low performance, with test set accuracy plateauing around 15% over 100 epochs.
The deep learning algorithm (ANN) used for our dataset has relatively low performance, with test set accuracy plateauing around 14% over 100 epochs.
These results suggest that adjustments to certain aspects of the model, such as complexity, hyperparameters, or weight initialization, may be necessary to improve its ability to generalize to new data. Further exploration of these aspects could be beneficial in optimizing model performance.
......
Results/mlp.png

36 KiB | W: | H:

Results/mlp.png

29.8 KiB | W: | H:

Results/mlp.png
Results/mlp.png
Results/mlp.png
Results/mlp.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -2,7 +2,7 @@ import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import read_cifar as rc
......@@ -77,3 +77,15 @@ def plot_KNN(X_train, y_train, X_test, y_test, max_k=20):
plt.ylabel('Accuracy')
plt.title('Variation of Accuracy with K')
plt.savefig("Results/knn.png")
# The following code block is executed only if the script is run as the main program
if _name_ == "_main_":
# Read the CIFAR-10 dataset from the specified path
X, y = rc.read_cifar('data\cifar-10-batches-py')
# Split the dataset into training and testing sets
X_train, y_train, X_test, y_test = rc.split_dataset(X, y, split=0.9)
# Plot the evolution of learning accuracy across the number of neighbors (K) using the 'plot_KNN' function
plot_KNN(X_train, y_train, X_test, y_test, max_k=20)
This diff is collapsed.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
learning_rate = 0.1
num_epochs=100
# Random initialization of the network weights and biaises
def initialization(d_in,d_h,d_out):
np.random.seed(10) # To get the same random values
W1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
W2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
return W1,b1,W2,b2
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
# Define the sigmoid activation function
def sigmoid(x,derivate):
if derivate==False:
return 1 / (1 + np.exp(-x))
else:
return x*(1-x)
# Define the softmax activation function
def softmax(x,derivate):
if derivate == False :
return np.exp(x) / np.exp(np.array(x)).sum(axis=1, keepdims=True)
else :
return x*(1-x)
#Definir les métriques :
def loss_metrics(predictions, targets, metric, status):
if metric == "MSE":
if status == "forward":
return np.mean((predictions - targets) ** 2)
elif status == "backward":
return 2 * (predictions - targets) / len(predictions) # Gradient of MSE loss
elif metric == "BCE":
# Binary Cross-Entropy Loss
epsilon = 1e-15 # Small constant to prevent log(0)
predictions = np.clip(predictions, epsilon, 1 - epsilon)
if status == "forward":
return - (targets * np.log(predictions) + (1 - targets) * np.log(1 - predictions)).mean()
elif status == "backward":
return (predictions - targets) / ((1 - predictions) * predictions) # Gradient of BCE loss
else:
raise ValueError("Metric not supported: " + metric)
import read_cifar as rc
# learn_once_mse
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
"""
Update the weights and biases of the network for one gradient descent step using Mean Squared Error (MSE) loss.
......@@ -74,42 +24,38 @@ def loss_metrics(predictions, targets, metric, status):
- b2: Updated bias vector of the second layer.
- loss: Mean Squared Error (MSE) loss for monitoring.
"""
def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate):
a0 = data
# Forward pass
# Calculate the input and output of the hidden layer
hidden_layer_input = np.matmul(data, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the sigmoid activation
z1 = np.matmul(a0, w1) + b1 # Calculate the weighted sum for the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # Apply the sigmoid activation function to hidden layer
z2 = np.matmul(a1, w2) + b2 # Calculate the weighted sum for the output layer
a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True) # Apply the softmax activation to the output layer
predictions = a2 # The network's predictions
# Calculate the input and output of the output layer
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False) # Apply the softmax activation
n = data.shape[0] # Number of samples (batch size)
# Backpropagation phase
# Calculate the error at the output layer
output_error = output_layer_output - targets
# Backpropagation
e2 = predictions - targets # Compute the error in the output layer
# Calculate gradients for the output layer
output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
dw2 = e2 * a2 * (1 - a2) / n # Gradient for w2
update_w2 = np.dot(a1.T, dw2) / n # Update for w2
update_b2 = (1/a1.shape[1])*dw2.sum(axis=0, keepdims=True) # Update for b2
# Update weights and biases of the output layer
W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0]
b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
e1 = np.dot(e2, w2.T) # Compute the error in the hidden layer
dw1 = e1 * a1 * (1 - a1) # Gradient for w1
update_b1 = (1/data.shape[1])*dw1.sum(axis=0, keepdims=True) # Update for b1
update_w1 = np.dot(data.T, dw1) / n # Update for w2
# Calculate the error at the hidden layer
hidden_layer_error = np.dot(output_layer_gradients, W2.T)
# Calculate gradients for the hidden layer
hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
# Update weights and biases of the hidden layer
W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0]
b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0)
# Gradient descent
w2 = w2 - learning_rate * update_w2
b2 = b2 - learning_rate * update_b2
w1 = w1 - learning_rate * update_w1
b1 = b1 - learning_rate * update_b1
# Calculate the loss using the specified metric
loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward")
# Calculate the Mean Squared Error (MSE) loss
loss = compute_error(predictions, targets, loss_type = 'MSE')
return W1, b1, W2, b2, loss
return w1, b1, w2, b2, loss
#One Hot Function :
def one_hot(targets):
......@@ -133,9 +79,9 @@ def one_hot(targets):
return one_hot_matrix
#learn_once_cross_entropy
def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
# The function learn_once_mse:
def learn_once_cross_entropy(w1, b1, w2, b2, data, targets, learning_rate):
"""
Perform one gradient descent step using binary cross-entropy loss.
......@@ -149,57 +95,45 @@ def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
- Updated weights and biases (W1, b1, W2, b2) of the network.
- Loss value for monitoring.
"""
# Forward pass
# Implement feedforward propagation on the hidden layer
hidden_layer_input = np.matmul(data, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the Sigmoid activation function
# Implement feedforward propagation on the output layer
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False) # Apply the Softmax activation function
# Backpropagation phase
# Updating W2 and b2
output_error = output_layer_output - targets
dW2 = output_error * softmax(output_layer_output, derivate=True)
W2_update = np.dot(hidden_layer_output.T, dW2) / data.shape[0]
update_b2 = (1 / hidden_layer_output.shape[1]) * dW2.sum(axis=0, keepdims=True)
# Updating W1 and b1
hidden_layer_error = np.dot(dW2, W2.T)
dW1 = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
W1_update = np.dot(data.T, dW1) / data.shape[0]
update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True)
z1 = np.matmul(data, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
# Gradient descent
W2 = W2 - learning_rate * W2_update
W1 = W1 - learning_rate * W1_update
b2 = b2 - learning_rate * update_b2
b1 = b1 - learning_rate * update_b1
predictions = a2
# Compute loss (Binary Cross Entropy)
loss = loss_metrics(output_layer_output, targets, metric="BCE", status="forward")
one_hot_matrix = one_hot(targets)
return W1, b1, W2, b2, loss
n = data.shape[0]
# Backpropagation
e2 = predictions - one_hot_matrix
dw2 = e2 * a2 * (1 - a2) / n
update_w2 = np.dot(a1.T, dw2) / n
update_b2 = (1/a1.shape[1])*dw2.sum(axis=0, keepdims=True)
def calculate_accuracy(predictions, actual_values):
"""
calculate_accuracy: Compute the accuracy of the model.
e1 = np.dot(e2, w2.T)
dw1 = e1 * a1 * (1 - a1)
update_b1 = (1/data.shape[1])*dw1.sum(axis=0, keepdims=True)
update_w1 = np.dot(data.T, dw1) / n
Parameters:
- predictions: Predicted values.
- actual_values: Ground truth observations.
# Gradient descent
w2 = w2 - learning_rate * update_w2
b2 = b2 - learning_rate * update_b2
w1 = w1 - learning_rate * update_w1
b1 = b1 - learning_rate * update_b1
Returns:
- Accuracy as a float.
"""
correct_predictions = predictions.argmax(axis=1) == actual_values.argmax(axis=1)
accuracy = correct_predictions.mean()
return accuracy
# Calculate binary cross-entropy loss
loss = compute_error(predictions, one_hot_matrix, loss_type = 'binary cross-entropy')
def train_mlp(W1, b1, W2, b2, data, targets, learning_rate):
# Calculate the accuray for a single batch
batch_accuracy = accuracy(predictions, one_hot_matrix)
return w1, b1, w2, b2, loss, batch_accuracy
# The function train_mlp:
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
"""
Perform training steps for a specified number of epochs.
......@@ -215,43 +149,25 @@ def train_mlp(W1, b1, W2, b2, data, targets, learning_rate):
- Updated weights and biases (W1, b1, W2, b2) of the network.
- List of training accuracies across epochs as a list of floats.
"""
train_accuracies = [] # To store training accuracies across epochs
# Forward pass
hidden_layer_input = np.matmul(data, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False)
# Iterate through the specified number of epochs
for epoch in range(num_epoch):
N = data.shape[0]
# Call the 'learn_once_cross_entropy' function to update weights, calculate loss, and obtain batch accuracy
w1, b1, w2, b2, loss, batch_accuracy = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
# Backpropagation phase
output_error = output_layer_output - targets
output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
# Append the batch accuracy to the 'train_accuracies' list for tracking progress
train_accuracies.append(batch_accuracy)
W2_update = np.dot(hidden_layer_output.T, output_layer_gradients) / N
update_b2 = (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
# Print the current epoch's progress
print("Epoch {}/{}".format(epoch+1, num_epoch))
print("[=======] Train_Accuracies : {}".format(round(batch_accuracy, 5)))
hidden_layer_error = np.dot(output_layer_gradients, W2.T)
hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
return w1, b1, w2, b2, train_accuracies
W1_update = np.dot(data.T, hidden_layer_gradients) / N
update_b1 = (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True)
# Gradient descent
W2 = W2 - learning_rate * W2_update
W1 = W1 - learning_rate * W1_update
b2 = b2 - learning_rate * update_b2
b1 = b1 - learning_rate * update_b1
# Calculate loss and accuracy
loss = loss_metrics(output_layer_output, targets,metric="BCE",status="forward")
train_accuracies=calculate_accuracy(output_layer_output, targets)
return W1, b1, W2, b2, loss, train_accuracies
def test_mlp(W1, b1, W2, b2, data_test, labels_test):
# The function test_mlp:
def test_mlp(w1,b1,w2,b2,data_test,labels_test):
"""
Evaluate the network's performance on the test set.
......@@ -263,18 +179,19 @@ def test_mlp(W1, b1, W2, b2, data_test, labels_test):
Returns:
- test_accuracy: The testing accuracy as a float.
"""
# Forward pass
hidden_layer_input = np.matmul(data_test, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
z1 = np.matmul(data_test, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False)
# Compute the testing accuracy using the 'accuracy' function
test_accuracy = accuracy(a2, labels_test)
# Compute testing accuracy
test_accuracy = calculate_accuracy(output_layer_output, labels_test)
return test_accuracy
def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_units, learning_rate, num_epochs):
# The function run_mlp_training:
def run_mlp_training(X_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
"""
Train an MLP classifier and evaluate its performance.
......@@ -291,30 +208,31 @@ def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_u
- train_accuracies: List of training accuracies across epochs.
- test_accuracy: The final testing accuracy.
"""
#input_dimension = X_train.shape[1]
#output_dimension = np.unique(labels_train).shape[0] # Number of classes
# Initialize weights and biases
W1, b1, W2, b2 = initialization(d_in, d_h, d_out)
d_in = X_train.shape[1] # Input dimension
d_out = 10 # Output dimension: 10 classes
train_accuracies = [] # List to store training accuracies
np.random.seed(10) # Set a random seed for reproducibility
# Training loop
for epoch in range(num_epochs):
W1, b1, W2, b2, loss, train_accuracy = train_mlp(W1, b1, W2, b2, X_train, one_hot(labels_train), learning_rate)
test_accuracy = test_mlp(W1, b1, W2, b2, data_test, one_hot(labels_test))
train_accuracies.append(train_accuracy)
# Initialize weights and biases for the neural network
w1 = 2 * np.random.rand(d_in, d_h) - 1 # First layer weights
b1 = np.zeros((1, d_h)) # First layer biases
w2 = 2 * np.random.rand(d_h, d_out) - 1 # Second layer weights
b2 = np.zeros((1, d_out)) # Second layer biases
print("Epoch {}/{}".format(epoch + 1, num_epochs))
print("Train Accuracy: {:.6f} Test Accuracy: {:.6f}".format(round(train_accuracy, 6), round(test_accuracy, 6)))
# Train the MLP using the provided training data and parameters
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, X_train, labels_train, learning_rate, num_epoch)
return train_accuracies, test_accuracy
# Test the trained MLP on the testing data and compute the test accuracy
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, one_hot(labels_test))
# plot_ANN
# Print the test set accuracy
print("test accuracy:", test_accuracy)
import matplotlib.pyplot as plt
return train_accuracies, test_accuracy
def plot_ANN(X_train, y_train, X_test, y_test):
# Plot of the evolution of learning accuracy across learning epochs:
def plot_ANN(data_train, labels_train, data_test, labels_test):
"""
Plot the variation of accuracy in terms of the number of epochs.
......@@ -324,29 +242,61 @@ def plot_ANN(X_train, y_train, X_test, y_test):
- X_test: Test data matrix.
- y_test: True labels for the test data.
"""
# Train an MLP and obtain training accuracies and final test accuracy
train_accuracies, test_accuracy = run_mlp_training(X_train, y_train, X_test, y_test, num_hidden_units=64, learning_rate=0.1, num_epochs=100)
# Train the MLP and obtain training accuracies and test accuracy
train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, 64, 0.1, 100)
# Display the test accuracy
print("Test Set Accuracy: {}".format(test_accuracy))
# Create a DataFrame from the accuracy values
df = pd.DataFrame({'Epoch': range(1, len(train_accuracies) + 1), 'Accuracy': train_accuracies})
# Create a Matplotlib plot
plt.plot(list(range(1, len(train_accuracies) + 1)), train_accuracies)
plt.title('Accuracy Variation Over Epochs')
# Create a line plot using Matplotlib
plt.figure(figsize=(10, 6))
plt.plot(df['Epoch'], df['Accuracy'], 'b')
# Add labels and title to the plot
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('The Variation of Accuracy')
# Save the figure (optional)
# Save the plot as an image file
plt.savefig("Results/mlp.png")
# Show the plot (optional)
plt.show()
# Define accuracy function
def accuracy(y_pred, y_true):
"""
calculate_accuracy: Compute the accuracy of the model.
Parameters:
- predictions: Predicted values.
- actual_values: Ground truth observations.
Returns:
- Accuracy as a float.
"""
accuracy = (y_pred.argmax(axis=1) == y_true.argmax(axis=1)).mean()
return accuracy
def compute_error(predictions, targets, loss_type):
# Calculate the loss based on the specified loss type
if loss_type == 'MSE': # Mean Squared Error loss
loss = np.mean(np.square(predictions - targets))
elif loss_type == 'binary cross-entropy': # Binary Cross-Entropy loss
n = targets.shape[0]
loss = -(1/n)*np.mean((np.dot(targets.T,np.log(predictions+ 1e-7)) + np.dot((1 - targets).T,np.log((1 - predictions+ 1e-7)))))
else:
raise ValueError("Unsupported loss type. Use 'MSE' or 'binary cross-entropy'.")
return loss
# The following code block is executed only if the script is run as the main program
if _name_ == "_main_":
# Read the CIFAR-10 dataset from the specified path
X, y = rc.read_cifar('data\cifar-10-batches-py')
# Split the dataset into training and testing sets
X_train, y_train, X_test, y_test = rc.split_dataset(X, y, split=0.9)
# Plot the evolution of learning accuracy across learning epochs using the 'plot_ANN' function
plot_ANN(X_train, y_train, X_test, y_test)
\ No newline at end of file
File suppressed by a .gitattributes entry, the file's encoding is unsupported, or the file size exceeds the limit.
import numpy as np
# Importez les fonctions
from mlp import initialization, train_mlp, calculate_accuracy
from mlp import train_mlp, accuracy
def test_mlp_training():
# Paramètres du test
......@@ -11,6 +11,9 @@ def test_mlp_training():
num_hidden_units = 5
learning_rate = 0.1
num_epochs = 10
d_in= 3
d_out=2
d_h=3
# Générez des données factices pour le test
X_train = np.random.randn(num_samples, num_features)
......@@ -19,10 +22,13 @@ def test_mlp_training():
y_test = np.random.randint(0, num_classes, num_samples)
# Initialisez les poids et les biais
W1, b1, W2, b2 = initialization(num_features, num_hidden_units, num_classes)
w1 = 2 * np.random.rand(d_in, d_h) - 1 # First layer weights
b1 = np.zeros((1, d_h)) # First layer biases
w2 = 2 * np.random.rand(d_h, d_out) - 1 # Second layer weights
b2 = np.zeros((1, d_out)) # Second layer biases
# Entraînez le modèle
train_accuracies, test_accuracy = train_mlp(W1, b1, W2, b2, X_train, y_train, learning_rate, num_epochs)
train_accuracies, test_accuracy = train_mlp(w1, b1, w2, b2, X_train, y_train, learning_rate, num_epochs)
# Vérifiez si l'accuracy est un nombre entre 0 et 1
assert 0 <= test_accuracy <= 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment