Skip to content
Snippets Groups Projects
Unverified Commit 79448985 authored by Jangberry (Nomad-Debian)'s avatar Jangberry (Nomad-Debian)
Browse files

Finaly done

parent a95e44f9
No related branches found
No related tags found
No related merge requests found
import numpy as np import numpy as np
import read_cifar
def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float): def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float):
""" """
...@@ -76,7 +78,7 @@ def one_hot(labels: np.array): ...@@ -76,7 +78,7 @@ def one_hot(labels: np.array):
return one_hot_matrix return one_hot_matrix
def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int): def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: float):
""" """
Performs one learning step of the MLP with cross-entropy loss Performs one learning step of the MLP with cross-entropy loss
...@@ -96,24 +98,42 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar ...@@ -96,24 +98,42 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar
loss -- loss of the forward pass loss -- loss of the forward pass
""" """
# Forward pass # Forward pass
a0 = data # the data are the input of the first layer # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a0 = data
# input of the hidden layer
z1 = np.matmul(a0, w1) + b1
# output of the hidden layer (sigmoid activation function) # output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1)) a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer # input of the output layer
z2 = np.matmul(a1, w2) + b2
# output of the output layer (sigmoid activation function) # output of the output layer (sigmoid activation function)
a2 = 1 / (1 + np.exp(-z2)) a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
predictions = a2 # the predicted values are the outputs of the output layer # the predicted values are the outputs of the output layer
predictions = a2
one_hot_targets = one_hot(labels_train)
# Compute loss (cross-entropy) # Compute loss (cross-entropy)
loss = -np.mean(np.sum(labels_train * np.log(predictions) + loss = -np.mean(
(1 - labels_train) * np.log(1 - predictions), axis=1)) one_hot_targets * np.log(predictions) +
(1 - one_hot_targets) * np.log(1 - predictions)
)
# Backward pass # Backward pass
# derivative of the loss with respect to the output of the output layer # derivative of the loss with respect to the output of the output layer
dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions) dC_dZ2 = predictions - one_hot_targets
# derivative of the loss with respect to the input of the output layer # derivative of the loss with respect to the weights of the output layer
# dC_dZ2 = a2 - dC_dW2 = np.matmul(a1.T, dC_dZ2)
# derivative of the loss with respect to the biaises of the output layer
dC_dB2 = np.sum(dC_dZ2, axis=0, keepdims=True)
# derivative of the loss with respect to the output of the hidden layer
dC_dA1 = np.matmul(dC_dZ2, w2.T)
# derivative of the loss with respect to the input of the hidden layer
dC_dZ1 = dC_dA1 * (1 - a1) * a1
# derivative of the loss with respect to the weights of the hidden layer
dC_dW1 = np.matmul(a0.T, dC_dZ1)
# derivative of the loss with respect to the biaises of the hidden layer
dC_dB1 = np.sum(dC_dZ1, axis=0, keepdims=True)
# Update weights and biaises # Update weights and biaises
w1 -= learning_rate * dC_dW1 w1 -= learning_rate * dC_dW1
...@@ -124,6 +144,103 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar ...@@ -124,6 +144,103 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar
return w1, b1, w2, b2, loss return w1, b1, w2, b2, loss
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs):
"""
Trains the MLP
Arguments:
w1 -- weights of the hidden layer
b1 -- biaises of the hidden layer
w2 -- weights of the output layer
b2 -- biaises of the output layer
data_train -- training data
labels_train -- labels of the training data
learning_rate -- learning rate
num_epochs -- number of epochs
Returns:
w1 -- updated weights of the hidden layer
b1 -- updated biaises of the hidden layer
w2 -- updated weights of the output layer
b2 -- updated biaises of the output layer
acc -- list of accuracies across epochs
"""
acc = []
for i in range(num_epochs):
w1, b1, w2, b2, loss = learn_once_cross_entropy(
w1, b1, w2, b2, data_train, labels_train, learning_rate)
acc.append(test_mlp(w1, b1, w2, b2, data_train, labels_train))
return w1, b1, w2, b2, acc
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
"""
Tests the MLP
Arguments:
w1 -- weights of the hidden layer
b1 -- biaises of the hidden layer
w2 -- weights of the output layer
b2 -- biaises of the output layer
data_test -- test data
labels_test -- labels of the test data
Returns:
acc -- accuracy
"""
# Forward pass
# the data are the input of the first layer
a0 = data_test
# input of the hidden layer
z1 = np.matmul(a0, w1) + b1
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
# input of the output layer
z2 = np.matmul(a1, w2) + b2
# output of the output layer (sigmoid activation function)
a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
# the predicted values are the outputs of the output layer
predictions = a2
# Compute accuracy
acc = np.mean(np.argmax(predictions, axis=1) == labels_test)
return acc
def run_mlp_training(data_train, labels_train, data_test, labels_test, learning_rate, num_epochs):
"""
Runs the MLP training
Arguments:
data_train -- training data
labels_train -- labels of the training data
data_test -- test data
labels_test -- labels of the test data
learning_rate -- learning rate
n_epochs -- number of epochs
Returns:
w1 -- weights of the hidden layer
b1 -- biaises of the hidden layer
w2 -- weights of the output layer
b2 -- biaises of the output layer
acc -- list of accuracies across epochs
"""
N = data_train.shape[0] # number of training data
d_in = data_train.shape[1] # input dimension
d_h = 3 # number of neurons in the hidden layer
# output dimension (number of neurons of the output layer)
d_out = np.max(labels_train) + 1
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
w1, b1, w2, b2, acc = train_mlp(
w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs)
return w1, b1, w2, b2, acc
if __name__ == "__main__": if __name__ == "__main__":
N = 30 # number of input data N = 30 # number of input data
d_in = 3 # input dimension d_in = 3 # input dimension
...@@ -145,3 +262,28 @@ if __name__ == "__main__": ...@@ -145,3 +262,28 @@ if __name__ == "__main__":
print(loss) print(loss)
print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4]))) print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4])))
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 5 # output dimension (number of neurons of the output layer)
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
targets = np.random.randint(1, d_out, N) # create a random targets
for i in range(100):
w1, b1, w2, b2, loss = learn_once_cross_entropy(
w1, b1, w2, b2, data, targets, 0.1)
print(loss)
data, labels = read_cifar.read_cifar("data/cifar-10-batches-py/")
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(
data, labels, 0.8)
w1, b1, w2, b2, acc = run_mlp_training(
data_train, labels_train, data_test, labels_test, 0.1, 100)
print(acc)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment