Skip to content
Snippets Groups Projects
Commit cd1f2e19 authored by Cart Milan's avatar Cart Milan
Browse files

com

parent f6c99d4c
No related branches found
No related tags found
No related merge requests found
...@@ -8,31 +8,21 @@ def sigmoid(x): ...@@ -8,31 +8,21 @@ def sigmoid(x):
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
N_out = len(data) #number of training examples N_out = len(data)
# Forward pass
a0 = data # the data are the input of the first layer a0 = data
z1 = np.dot(a0, w1) + b1 # input of the hidden layer z1 = np.dot(a0, w1) + b1
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2 # input of the output layer z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) a2 = sigmoid(z2)
predictions = a2 # the predicted values are the outputs of the output layer predictions = a2
# Compute loss (MSE) # Compute loss (MSE)
loss = np.mean(np.square(predictions - targets)) loss = np.mean(np.square(predictions - targets))
print(f'loss: {loss}') print(f'loss: {loss}')
# print('shape a1', a1.shape)
# print('shape w1', w1.shape)
# print('shape b1', b1.shape)
# print('shape a2', a2.shape)
# print('shape w2', w2.shape)
# print('shape b2', b2.shape)
# Backpropagation
# Backpropagation
delta_a2 = 2 / N_out * (a2 - targets) delta_a2 = 2 / N_out * (a2 - targets)
delta_z2 = delta_a2 * (a2 * (1 - a2)) # We divide by the sample size to have an average on the error and avoid big gradient jumps delta_z2 = delta_a2 * (a2 * (1 - a2))
delta_w2 = np.dot(a1.T, delta_z2) delta_w2 = np.dot(a1.T, delta_z2)
delta_b2 = np.sum(delta_z2, axis = 0, keepdims = True) delta_b2 = np.sum(delta_z2, axis = 0, keepdims = True)
...@@ -44,12 +34,12 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): ...@@ -44,12 +34,12 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
return w1, b1, w2, b2, loss return w1, b1, w2, b2, loss
def one_hot(labels): def one_hot(labels):
num_classes = int(np.max(labels) + 1) #num_classes = 10 num_classes = int(np.max(labels) + 1)
one_hot_matrix = np.eye(num_classes)[labels] one_hot_matrix = np.eye(num_classes)[labels]
return one_hot_matrix return one_hot_matrix
def softmax_stable(x): def softmax_stable(x):
#We use this function to avoid computing big numbers
return(np.exp(x - np.max(x, axis=1, keepdims=True)) / np.exp(x - np.max(x, axis=1, keepdims=True)).sum()) return(np.exp(x - np.max(x, axis=1, keepdims=True)) / np.exp(x - np.max(x, axis=1, keepdims=True)).sum())
def cross_entropy_loss(y_pred, y_true_one_hot): def cross_entropy_loss(y_pred, y_true_one_hot):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment