import numpy as np def sigmoid(x): return 1 / (1 + np.exp(-x)) def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): N = len(targets) # number of training examples # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer # Compute loss (MSE) loss = np.mean(np.square(predictions - targets)) # According to the formulas established by theory : d_a2 = 2 / N * (1 - targets) d_z2 = d_a2 * a2 * (1 - a2) d_w2 = np.matmul(a1.T, d_z2) d_b2 = d_z2 d_a1 = np.matmul(d_z2, w2.T) d_z1 = d_a1 * a1 * (1 - a1) d_w1 = np.matmul(a0.T, d_z1) d_b1 = d_z1 # Calculation of the updated weights and biases of the network with gradient descent method w1 -= learning_rate * d_w1 w2 -= learning_rate * d_w2 b2 -= learning_rate * d_b2 b1 -= learning_rate * d_b1 return w1, b1, w2, b2, loss def one_hot(labels): # Total number of classes num_classes = np.max(labels) + 1 # one_hot_matrix one_hot_matrix = np.eye(num_classes)[labels] return one_hot_matrix def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): N = len(labels_train) # number of training examples # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels # cross-entropy loss loss = -np.sum(targets_one_hot * np.log(predictions)) / N # Backpropagation d_z2 = a2 - targets_one_hot d_w2 = np.dot(a1.T, d_z2) / N d_b2 = d_z2 / N d_a1 = np.dot(d_z2, w2.T) d_z1 = d_a1 * z1 * (1 - a1) d_w1 = np.dot(a0.T, d_z1) / N d_b1 = d_z1 / N # Calculation of the updated weights and biases of the network with gradient descent method w1 -= learning_rate * d_w1 w2 -= learning_rate * d_w2 b2 -= learning_rate * d_b2 b1 -= learning_rate * d_b1 return w1, b1, w2, b2, loss