Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • mduhalde/image-classification
1 result
Select Git revision
Show changes
Commits on Source (4)
......@@ -16,8 +16,8 @@ This project contains several files and directory. A brief description of each i
- `results/`: This folder contains some of the results generated by the program.
- `knn.py`: Contains functions related to the KNN algorithm.
- `read_cifar.py`: Contains functions related to reading and parsing the CIFAR-10 dataset.
- `nn.py`: Contains functions related to the Neural Network algorithm.
- `main.ipynb`: Jupyter Notebook containing the main program. It is used to the algorithms and generate the results.
- `mlp.py`: Contains functions related to the Neural Network algorithm.
- `main.ipynb`: Jupyter Notebook containing the main program. It is used to test the algorithms and generate the results. It also contains some descriptions regarding the algorithms, notably a mathematical description of the Neural Network algorithm.
## Usage
......@@ -37,3 +37,4 @@ The main program is contained in the `main.ipynb` file. It can be run using Jupy
## References
- Data Source : The CIFAR-10 Dataset. <https://www.cs.toronto.edu/~kriz/cifar.html>
- Assignment repository <https://gitlab.ec-lyon.fr/edelland/mod_4_6-td1>
This diff is collapsed.
import numpy as np
def learn_once_mse(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data: np.ndarray,
targets: np.ndarray,
learning_rate: float,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
"""Perform one step of gradient descent on the given data and targets.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
# Backward pass
# Compute gradients
dC_da2 = 2 * (predictions - targets) / predictions.shape[0]
dC_dz2 = dC_da2 * a2 * (1 - a2)
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
# Update weights and biases
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return w1, b1, w2, b2, loss
def one_hot(labels: np.ndarray) -> np.ndarray:
"""Calculates the one-hot matrix of the given labels.
Args:
labels (np.ndarray): The labels.
Returns:
np.ndarray: The one-hot matrix of the labels.
"""
return np.eye(labels.max() + 1)[labels]
def learn_once_cross_entropy(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data: np.ndarray,
labels_train: np.ndarray,
learning_rate: float,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
"""Perform one step of gradient descent using a binary cross-entropy loss on the given data and targets.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
one_hot_targets = one_hot(labels_train)
# Compute loss (Cross Entropy)
# https://arize.com/blog-course/binary-cross-entropy-log-loss/
loss = -np.mean(
one_hot_targets * np.log(predictions)
+ (1 - one_hot_targets) * np.log(1 - predictions)
)
# Backward pass
# Compute gradients
dC_dz2 = a2 - one_hot_targets
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
# Update weights and biases
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return w1, b1, w2, b2, loss
def train_mlp(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data_train: np.ndarray,
labels_train: np.ndarray,
learning_rate: float,
num_epoch: int,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]):
"""Perform num_epoch training steps.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data_train (np.ndarray): The data, of shape (N, d_in).
labels_train (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
num_epoch (int): The number of epochs.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]): A tuple containing the resulting weights and biases, and the list of accuracy values of each epoch.
"""
# Starting accuracy (random weights)
accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
accuracies = [accuracy]
for _ in range(num_epoch):
# Train once
w1, b1, w2, b2, _ = learn_once_mse(
w1, b1, w2, b2, data_train, labels_train, learning_rate
)
# Compute current model training accuracy
accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
accuracies.append(accuracy)
return w1, b1, w2, b2, accuracies
def test_mlp(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data_test: np.ndarray,
labels_test: np.ndarray,
) -> float:
"""Test the network on the given test set.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
Returns:
float: The testing accuracy of the model on the given data.
"""
# Forward pass
a0 = data_test # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute accuracy
accuracy = np.mean(np.argmax(predictions, axis=1) == labels_test)
return accuracy
def run_mlp_training(
data_train: np.ndarray,
labels_train: np.ndarray,
data_test: np.ndarray,
labels_test: np.ndarray,
d_h: int,
learning_rate: float,
num_epoch: int,
) -> (list[float], float):
"""Train an MLP classifier.
Args:
data_train (np.ndarray): The training data, of shape (N, d_in).
labels_train (np.ndarray): The training labels, of shape (N, d_out).
data_test (np.ndarray): The test data, of shape (N, d_in).
labels_test (np.ndarray): The test labels, of shape (N, d_out).
learning_rate (float): The learning rate.
num_epoch (int): The number of training epochs.
Returns:
(list[float], float): A tuple containing the list of training accuracy values of each epoch, and the final accuracy.
"""
d_in = data_train.shape[1]
d_out = labels_train.shape[0]
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
# Train the network
w1, b1, w2, b2, accuracy_values = train_mlp(
w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
)
# Test the network
accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return accuracy_values, accuracy
results/knn.png

22 KiB | W: 0px | H: 0px

results/knn.png

28.2 KiB | W: 0px | H: 0px

results/knn.png
results/knn.png
results/knn.png
results/knn.png
  • 2-up
  • Swipe
  • Onion skin