Skip to content
Snippets Groups Projects
Commit f710c639 authored by choukri's avatar choukri
Browse files

commit mlp

parent be92e118
No related branches found
No related tags found
No related merge requests found
/data
/.pytest_cache
/__pycache__
/tests/__pycache__
2. On a C = (a2 - Y)^2 alors dC/da2 = 2(a2-Y)
3. On a dC/dz2 = dC/da2 * da2/dz2
Or z2 = a1*w2 + b2 et a2 = sigmoid(z2) donc da2/dz2 = sigmoid(z2) * (1 - sigmoid(z2))
dC/dz2 = dC/da2 * a2(1-a2)
4. dC/dw2 = dC/dz2 * dz2/dw2
Or z2 = a1*w2 + b2
Donc dC/dw2 = dC/dz2 * a1
5. dC/db2 = dz2/db2 * dC/dz2
Or dz2/db2 = 1
Donc dC/db2 = dC/dz2
6. dC/da1 = dC/dz2 * dz2/da1 = dC/dz2 * w2
7. dC/dz1 = dC/da1 * da1/dz1 = dC/da1 * a1(1-a1)
8. dC/dw1 = dC/dz1 * dz1/dw1 = dC/dz1 * a0
9. dC/db1 = dC/dz1 * dz1/db1 = dC/dz1 * 1 = dC/dz1
\ No newline at end of file
cifar10.png

286 KiB

mlp.py 0 → 100644
"""
2. On a C = (a2 - Y)^2 alors dC/da2 = 2(a2-Y)
3. On a dC/dz2 = dC/da2 * da2/dz2
Or z2 = a1*w2 + b2 et a2 = sigmoid(z2) donc da2/dz2 = sigmoid(z2) * (1 - sigmoid(z2))
dC/dz2 = dC/da2 * a2(1-a2)
4. dC/dw2 = dC/dz2 * dz2/dw2
Or z2 = a1*w2 + b2
Donc dC/dw2 = dC/dz2 * a1
5. dC/db2 = dz2/db2 * dC/dz2
Or dz2/db2 = 1
Donc dC/db2 = dC/dz2
6. dC/da1 = dC/dz2 * dz2/da1 = dC/dz2 * w2
7. dC/dz1 = dC/da1 * da1/dz1 = dC/da1 * a1(1-a1)
8. dC/dw1 = dC/dz1 * dz1/dw1 = dC/dz1 * a0
9. dC/db1 = dC/dz1 * dz1/db1 = dC/dz1 * 1 = dC/dz1
"""
# 10. learn_once_mse function
import matplotlib.pyplot as plt
import numpy as np
from read_cifar import read_cifar, read_cifar_batch, split_dataset
path = "data/cifar-10-batches-py/"
def learn_once_mse(w1, w2, b1, b2, data, targets, learning_rate=0.00001):
"""
perform one gradient descent step
:param
w1: weight of the network
w2: weight of the network
b1: biases of the network
b2: biases of the network
data: the data input of the network
targets: the desired output
learning_rate: the factor that changes the weights
:return:
w1: updated weight of the network
w2: updated weight of the network
b1: updated biases of the network
b2: updated biases of the network
loss: the loss that separates us from the desired output
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# cas de la couche 2
# on a dC/dw2 = dz2/dw2 * da2/dz2 * dC/da2
# on a dC/db2 = dz2/db2 * da2/dz2 * dC/da2
var_z2_w2 = a1 # dz2/dw2
var_z2_b2 = np.ones((1, data.shape[0]), dtype=float) # dz2/db2
var_a2_z2 = a2 * (1 - a2) # da2/dz2 sigmoid(z2) * (1 - sigmoid(z2))
var_loss_a2 = 2 * (a2 - targets) # dC/da2
var_loss_w2 = np.dot(var_z2_w2.T, var_a2_z2 * var_loss_a2) # dC/dw2
var_loss_b2 = np.dot(var_z2_b2, var_a2_z2 * var_loss_a2) # dC/db2
# cas de la couche 1
# on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1
# on a dC/db1 = dz1/db1 * da1/dz1 * dC/da1
var_z1_w1 = a0 # dz1/dw1
var_z1_b1 = np.ones((1, data.shape[0]), dtype=float) # dz1/db1
var_a1_z1 = a1 * (1 - a1) # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
var_z2_a1 = w2 # dz2/da1
var_loss_a1 = np.dot(
var_loss_a2 * var_a2_z2, var_z2_a1.T
) # dC/da1 = dC/da2 * da2/dz2 * dz2/da1
var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
# updated weights of the network
w1 = w1 - learning_rate * var_loss_w1
w2 = w2 - learning_rate * var_loss_w2
# updated biaises of the network
b1 = b1 - learning_rate * var_loss_b1
b2 = b2 - learning_rate * var_loss_b2
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
print(loss)
return w1, w2, b1, b2, loss
# 11. one_hot function
def one_hot(array):
"""
compute the one_hot_encoding of a 1D array
:param
array: a 1D array
:retrun:
one_hot_encoded_array: the corresponding one_hot matrix
"""
one_hot_encoded_array = np.zeros((array.size, array.max() + 1))
one_hot_encoded_array[np.arange(array.size), array] = 1
return one_hot_encoded_array
# 12. learn_once_cross_entropy
def learn_once_cross_entropy(
w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001
):
"""
perform one gradient descent step using a binary cross_entropy loss
:param
w1: weight of the network
w2: weight of the network
b1: biases of the network
b2: biases of the network
data_train: the train set data which is the input of the network
labels_train: the desired output will play the rule of target
learning_rate: the factor that changes the weights
:return:
w1: updated weight of the network
w2: updated weight of the network
b1: updated biases of the network
b2: updated biases of the network
loss: the loss that separates us from the desired output
"""
# we admit that dC/dz2 = a2 - Y where Y = one_hot(labels_train)
# Forward pass
a0 = data_train # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# cas de la couche 2
# on a dC/dw2 = dz2/dw2 * dC/dz2
# on a dC/db2 = dz2/db2 * dC/dz2
var_z2_w2 = a1 # dz2/dw2
var_z2_b2 = np.ones((1, data_train.shape[0]), dtype=float) # dz2/db2
var_loss_z2 = a2 - one_hot(labels_train) # dC/dz2
var_loss_w2 = np.dot(var_z2_w2.T, var_loss_z2) # dC/dw2
var_loss_b2 = np.dot(var_z2_b2, var_loss_z2) # dC/db2
# cas de la couche 1
# on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1 = dz1/dw1 * da1/dz1 * dC/dz2 * dz2/da1
# on a dC/dz1 = dz1/db1 * da1/dz1 * dC/da1 = dz1/db1 * da1/dz1 * dC/dz2 * dz2/da1
var_z1_w1 = a0 # dz1/dw1
var_z1_b1 = np.ones((1, data_train.shape[0]), dtype=float) # dz1/db1
var_a1_z1 = a1 * (1 - a1) # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
var_z2_a1 = w2 # dz2/da1
var_loss_a1 = np.dot(var_loss_z2, var_z2_a1.T) # dC/da1 = dC/dz2 * dz2/da1
var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
# updated weights of the network
w1 = w1 - learning_rate * var_loss_w1
w2 = w2 - learning_rate * var_loss_w2
# updated biaises of the network
b1 = b1 - learning_rate * var_loss_b1
b2 = b2 - learning_rate * var_loss_b2
# Forward pass
a0 = data_train # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - one_hot(labels_train)))
print(loss)
return w1, w2, b1, b2, loss
# 13. train_mlp function
# first we write a function that will compute the accuracy
def compute_accuracy(target, prediction):
correct = 0
for i in range(target.shape[0]):
if np.argmax(target[i]) == np.argmax(prediction[i]):
correct += 1
precision = correct / target.shape[0]
return precision
def train_mlp(
w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001, num_epoch=3
):
"""
perform gradient descent step using a binary cross_entropy loss looping on the num_epoch
this function will train our model so that the weights and biases are well updated
:param
w1: weight of the network
w2: weight of the network
b1: biases of the network
b2: biases of the network
data_train: the train set data which is the input of the network
labels_train: the desired output will play the rule of target
learning_rate: the factor that changes the weights
num_epoch: number of time to reapeat the gradient descent
:return:
w1: updated weight of the network
w2: updated weight of the network
b1: updated biases of the network
b2: updated biases of the network
train_accuracies: the list of training accuracy across num_epoch
losses: the list of loss that separates us from the desired output across the num_epoch
"""
# Forward pass
a0 = data_train # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
train_accuracies = []
losses = []
for i in range(num_epoch):
# cas de la couche 2
# on a dC/dw2 = dz2/dw2 * dC/dz2
# on a dC/db2 = dz2/db2 * dC/dz2
var_z2_w2 = a1 # dz2/dw2
var_z2_b2 = np.ones((1, data_train.shape[0]), dtype=float) # dz2/db2
var_loss_z2 = a2 - one_hot(labels_train) # dC/dz2
var_loss_w2 = np.dot(var_z2_w2.T, var_loss_z2) # dC/dw2
var_loss_b2 = np.dot(var_z2_b2, var_loss_z2) # dC/db2
# cas de la couche 1
# on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1 = dz1/dw1 * da1/dz1 * dC/dz2 * dz2/da1
# on a dC/dz1 = dz1/db1 * da1/dz1 * dC/da1 = dz1/db1 * da1/dz1 * dC/dz2 * dz2/da1
var_z1_w1 = a0 # dz1/dw1
var_z1_b1 = np.ones((1, data_train.shape[0]), dtype=float) # dz1/db1
var_a1_z1 = a1 * (
1 - a1
) # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
var_z2_a1 = w2 # dz2/da1
var_loss_a1 = np.dot(var_loss_z2, var_z2_a1.T) # dC/da1 = dC/dz2 * dz2/da1
var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
# updated weights of the network
w1 = w1 - learning_rate * var_loss_w1
w2 = w2 - learning_rate * var_loss_w2
# updated biaises of the network
b1 = b1 - learning_rate * var_loss_b1
b2 = b2 - learning_rate * var_loss_b2
# Forward pass
a0 = data_train # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - one_hot(labels_train)))
losses.append(loss)
# Compute accuracy
acc = compute_accuracy(one_hot(labels_train), predictions)
train_accuracies.append(acc)
return w1, w2, b1, b2, train_accuracies, losses
# 14. test_mpl function
def test_mlp(w1, w2, b1, b2, data_test, labels_test):
"""
the function perform a forward pass to test our trained model accuracy
:param
w1: weight of the network
w2: weight of the network
b1: biases of the network
b2: biases of the network
data_test: the test set data
labels_test: the corresponding labels for the data_test with whom we'll compare the predictions
:return:
test_accuracy: the testing accuracy
"""
# Forward pass
a0 = data_test # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute accuracy
test_accuracy = compute_accuracy(one_hot(labels_test), predictions)
return test_accuracy
# 15. run_mlp_training function
def run_mlp_training(
data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch
):
"""
train an MLP classifier compute taining accuracies across num_epoch and the testing_accuracy
:param
data_train: the training data
labels_train: the corresponding labels of the training data
data_test: the test data
labels_test: the corresponding labels of the test data
d_h: number of neurons in the hidden layer
learning_rate: the factor that changes the weights
num_epoch: number of time to reapeat the gradient descent
:return:
test_accuracy: the testing accuracy
train_accuracies: the list of training accuracy across num_epoch
losses: the list of loss that separates us from the desired output across the num_epoch
"""
d_in = data_train.shape[1]
d_out = 10
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
# Forward pass
a0 = data_train # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
w1, w2, b1, b2, train_accuracies, losses = train_mlp(
w1, w2, b1, b2, data_train, labels_train, learning_rate, num_epoch
)
test_accuracy = test_mlp(w1, w2, b1, b2, data_test, labels_test)
return train_accuracies, test_accuracy, losses
if __name__ == "__main__":
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
# test of learn_once_mse
w1, w2, b1, b2, loss = learn_once_mse(
w1, w2, b1, b2, data, targets, learning_rate=0.00001
)
# test of one_hot
print(one_hot(np.array([1, 2, 5])))
# test of learn_once_cross_entropy
data, labels = read_cifar_batch(path + "data_batch_1")
data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.2)
d_in = data_train.shape[1] # input dimension
d_h = 10 # number of neurons in the hidden layer
d_out = 10 # output dimension (number of neurons of the output layer)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
w1, w2, b1, b2, loss = learn_once_cross_entropy(
w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001
)
# test of train_mlp
w1, w2, b1, b2, train_accuracies, losses = train_mlp(
w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001, num_epoch=100
)
# test of test_mlp
test_accuracy = test_mlp(w1, w2, b1, b2, data_test, labels_test)
print(test_accuracy)
# test of run_mlp_training
data, labels = read_cifar(path)
data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
train_accuracies, test_accuracy, losses = run_mlp_training(
data_train,
labels_train,
data_test,
labels_test,
d_h=64,
learning_rate=0.00001,
num_epoch=100,
)
test_accuracy
# plt.plot(losses)
# plt.title("losses")
# plt.savefig("mlp_loss.png", bbox_inches="tight")
# plt.show()
plt.plot(train_accuracies)
plt.title("Accuracies")
plt.savefig("mlp.png", bbox_inches="tight")
plt.show()
results/knn.png

15.5 KiB

results/mlp.png

23.4 KiB

results/mlp_loss.png

15.6 KiB

from .knn_test import *
from .mlp_test import *
from .read_cifar_test import *
import unittest
import numpy as np
from knn import *
from read_cifar import *
# testing the correct execution of equal_shape_distance_matrix
def test_equal_shape_distance_matrix():
X = np.array([[1, 2], [0, 3]])
V = np.array([[3, 2], [1, 4]])
expected_dist = np.array([[4, -1], [5, 2]]) # X*X.T + V*V.T - 2*X*V.T
dist = equal_shape_distance_matrix(X, V)
assert np.all(dist == expected_dist)
# testing the correct execution of equal_shape_distance_matrix
def test_equal_shape_distance_matrix():
X = np.array([[1, 2], [0, 3]])
V = np.array([[3, 2], [1, 4]])
expected_dist = np.array([[4, -1], [5, 2]]) # X*X.T + V*V.T - 2*X*V.T
dist = equal_shape_distance_matrix(X, V)
assert np.all(dist == expected_dist)
# testing the correct execution of distance_matrix
def test_distance_matrix():
data_train = np.array([[1, 2], [0, 3], [3, 2], [1, 4]])
data_test = np.array([[0, 2], [1, 3]])
dist = distance_matrix(data_train, data_test)
first_dist = equal_shape_distance_matrix(
data_train[: data_test.shape[0]], data_test
)
second_dist = equal_shape_distance_matrix(
data_train[data_test.shape[0] : data_test.shape[0] * 2], data_test
)
expected_dist = np.concatenate((first_dist, second_dist), axis=1)
assert np.all(dist == expected_dist)
# test the right execution of knn_predict
def test_knn_predict():
data, labels = read_cifar(path)
data_train, data_test, labels_train, labels_test = split_dataset(
data, labels, split=0.2
)
assert data_train.shape[0] == 48000
num_test = 2000
mask = list(range(num_test))
data_test = data_test[mask]
labels_test = labels_test[mask]
num_train = 2000
mask = list(range(num_train))
data_train = data_train[mask]
labels_train = labels_train[mask]
dists = distance_matrix(data_train, data_test)
predicted_label = knn_predict(labels_train, dists, k=3)
assert predicted_label.shape[0] == labels_test.shape[0]
# test the right execution of evaluate_knn
def test_evaluate_knn():
data, labels = read_cifar(path)
data_train, data_test, labels_train, labels_test = split_dataset(
data, labels, split=0.2
)
assert data_train.shape[0] == 48000
num_test = 2000
mask = list(range(num_test))
data_test = data_test[mask]
labels_test = labels_test[mask]
num_train = 2000
mask = list(range(num_train))
data_train = data_train[mask]
labels_train = labels_train[mask]
dists = distance_matrix(data_train, data_test)
y_test_pred = knn_predict(labels_train, dists, k=3)
# total number of predictions
num_test = dists.shape[0]
# number of correct predictions
correct = np.sum(y_test_pred == labels_test)
# accuracy
predicted_accuracy = float(correct) / num_test
accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k=3)
assert predicted_accuracy == accuracy
import numpy as np
from mlp import one_hot
# test the right execution of one_hot
def test_one_hot():
X = np.array([1, 2, 5])
expected_one_hot_encode = np.array(
[[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1]]
)
one_hot_encode = one_hot(X)
assert np.all(one_hot_encode == expected_one_hot_encode)
from read_cifar import *
# testing the right execution of the read_cifar_batch()
def test_read_cifar_batch():
data, labels = read_cifar_batch(path + "data_batch_1")
assert data.shape[0] == 10000
# testing the right execution of the read_cifar()
def test_read_cifar():
data, labels = read_cifar(path)
assert data.shape[0] == 60000
# testing the right execution of split_datset
def test_split_dataset():
data, labels = read_cifar(path)
assert data.shape[0] == 60000
data_train, data_test, labels_train, labels_test = split_dataset(
data, labels, split=0.2
)
assert data_train.shape[0] == 48000
assert data_test.shape[0] == 12000
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment