d67598549627dc061b05eb2f2e266263cfcb7f20 to 3b9f1865b350bf7d1f87d03003f6755a22ec1824 · Audard Lucile / Image classification

mlp.py

+62
−0

Original line number
Diff line number
Diff line

import numpy as np

def sigmoid(x):

    return 1 / (1 + np.exp(-x))

def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    N = len(targets) # number of training examples

    # Forward pass

    a0 = data # the data are the input of the first layer

    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer

    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)

    z2 = np.matmul(a1, w2) + b2  # input of the output layer

    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)

    predictions = a2  # the predicted values are the outputs of the output layer

    # Compute loss (MSE)

    loss = np.mean(np.square(predictions - targets))

    # According to the formulas established by theory :

    d_a2 = 2 / N * (1 - targets)

    d_z2 = d_a2 * a2 * (1 - a2)

    d_w2 = np.matmul(a1.T, d_z2)

    d_b2 = d_z2

    d_a1 = np.matmul(d_z2, w2.T)

    d_z1 = d_a1 * a1 * (1 - a1)

    d_w1 = np.matmul(a0.T, d_z1)

    d_b1 = d_z1

    # Calculation of the updated weights and biases of the network with gradient descent method

    w1 -= learning_rate * d_w1

    w2 -= learning_rate * d_w2

    b2 -= learning_rate * d_b2

    b1 -= learning_rate * d_b1

    return w1, b1, w2, b2, loss

def one_hot(labels):

    # Total number of classes

    num_classes = np.max(labels) + 1

    # one_hot_matrix

    one_hot_matrix = np.eye(num_classes)[labels]

    return one_hot_matrix

def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):

    N = len(labels_train) # number of training examples

    # Forward pass

    a0 = data # the data are the input of the first layer

    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer

    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)

    z2 = np.matmul(a1, w2) + b2  # input of the output layer

    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)

    predictions = a2  # the predicted values are the outputs of the output layer

    targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels

    # cross-entropy loss

    loss = 

    return w1, b1, w2, b2, loss
Compare revisions

Source

Target

Commits on Source 2

Files

mlp.py