Skip to content
Snippets Groups Projects
Commit 4461aff3 authored by Dellandrea Emmanuel's avatar Dellandrea Emmanuel
Browse files

Create nn_regression-completed.py

parent 3fafe7f2
Branches
No related tags found
No related merge requests found
import matplotlib.pyplot as plt
import numpy as np
def read_data(file_name, delimiter=','):
""" Reads the file containing the data and returns the corresponding matrices
Parameters
----------
file_name : name of the file containing the data
delimiter : character separating columns in the file ("," by default)
Returns
-------
x : data matrix of size [N, num_vars]
d : matrix containing the target variable values of size [N, num_targets]
N : number of elements
num_vars : number of predictor variables
num_targets : number of target variables
"""
data = np.loadtxt(file_name, delimiter=delimiter)
num_targets = 1
num_vars = data.shape[1] - num_targets
N = data.shape[0]
x = data[:, :num_vars]
d = data[:, num_vars:].reshape(N,1)
return x, d, N, num_vars, num_targets
def normalization(x):
""" Normalizes the data by centering and scaling the predictor variables
Parameters
----------
X : data matrix of size [N, num_vars]
with N : number of elements and num_vars : number of predictor variables
Returns
-------
X_norm : centered-scaled data matrix of size [N, num_vars]
mu : mean of the variables of size [1, num_vars]
sigma : standard deviation of the variables of size [1, num_vars]
"""
mu = np.mean(x, 0)
sigma = np.std(x, 0)
x_norm = (x - mu) / sigma
return x_norm, mu, sigma
def split_data(x, d, val_prop=0.2, test_prop=0.2):
""" Splits the initial data into three distinct subsets for training, validation, and testing
Parameters
----------
x : data matrix of size [N, num_vars]
d : matrix of target values [N, num_targets]
val_prop : proportion of validation data over the entire dataset (between 0 and 1)
test_prop : proportion of test data over the entire dataset (between 0 and 1)
with N : number of elements, num_vars : number of predictor variables, num_targets : number of target variables
Returns
-------
x_train : training data matrix
d_train : training target values matrix
x_val : validation data matrix
d_val : validation target values matrix
x_test : test data matrix
d_test : test target values matrix
"""
assert val_prop + test_prop < 1.0
N = x.shape[0]
indices = np.arange(N)
np.random.shuffle(indices)
num_val = int(N*val_prop)
num_test = int(N*test_prop)
num_train = N - num_val - num_test
x = x[indices,:]
d = d[indices,:]
x_train = x[:num_train,:]
d_train = d[:num_train,:]
x_val = x[num_train:num_train+num_val,:]
d_val = d[num_train:num_train+num_val,:]
x_test = x[N-num_test:,:]
d_test = d[N-num_test:,:]
return x_train, d_train, x_val, d_val, x_test, d_test
def calculate_mse_cost(y, d):
""" Calculates the value of the MSE (mean squared error) cost function
Parameters
----------
y : matrix of predicted data
d : matrix of actual data
Returns
-------
cost : value corresponding to the MSE cost function (mean squared error)
"""
N = y.shape[1]
cost = np.square(y - d).sum() / 2 / N
return cost
def forward_pass(x, W, b, activation):
""" Performs a forward pass in the neural network
Parameters
----------
x : input matrix, of size num_vars x N
W : list containing the weight matrices of the network
b : list containing the bias matrices of the network
activation : list containing the activation functions of the network layers
with N : number of elements, num_vars : number of predictor variables
Returns
-------
a : list containing the input potentials of the network layers
h : list containing the outputs of the network layers
"""
h = [x]
a = []
for i in range(len(b)):
a.append( W[i].dot(h[i]) + b[i] )
h.append( activation[i](a[i]) )
return a, h
def backward_pass(delta_h, a, h, W, activation):
""" Performs a backward pass in the neural network (backpropagation)
Parameters
----------
delta_h : matrix containing the gradient of the cost with respect to the output of the network
a : list containing the input potentials of the network layers
h : list containing the outputs of the network layers
W : list containing the weight matrices of the network
activation : list containing the activation functions of the network layers
Returns
-------
delta_W : list containing the gradient matrices of the network layer weights
delta_b : list containing the gradient matrices of the network layer biases
"""
delta_b = []
delta_W = []
for i in range(len(W)-1,-1,-1):
delta_a = delta_h * activation[i](a[i], True)
delta_b.append( delta_a.mean(1).reshape(-1,1) )
delta_W.append( delta_a.dot(h[i].T) )
delta_h = (W[i].T).dot(delta_a)
delta_b = delta_b[::-1]
delta_W = delta_W[::-1]
return delta_W, delta_b
def sigmoid(z, deriv=False):
""" Calculates the value of the sigmoid function or its derivative applied to z
Parameters
----------
z : can be a scalar or a matrix
deriv : boolean. If False returns the value of the sigmoid function, if True returns its derivative
Returns
-------
s : value of the sigmoid function applied to z or its derivative. Same dimension as z
"""
s = 1 / (1 + np.exp(-z))
if deriv:
return s * (1 - s)
else :
return s
def linear(z, deriv=False):
""" Calculates the value of the linear function or its derivative applied to z
Parameters
----------
z : can be a scalar or a matrix
deriv : boolean. If False returns the value of the linear function, if True returns its derivative
Returns
-------
s : value of the linear function applied to z or its derivative. Same dimension as z
"""
if deriv:
return 1
else :
return z
def relu(z, deriv=False):
""" Calculates the value of the relu function or its derivative applied to z
Parameters
----------
z : can be a scalar or a matrix
deriv : boolean. If False returns the value of the relu function, if True returns its derivative
Returns
-------
s : value of the relu function applied to z or its derivative. Same dimension as z
"""
r = np.zeros(z.shape)
if deriv:
pos = np.where(z>=0)
r[pos] = 1.0
return r
else :
return np.maximum(r,z)
# ===================== Part 1: Data Reading and Normalization =====================
print("Reading data ...")
x, d, N, num_vars, num_targets = read_data("food_truck.txt")
# x, d, N, num_vars, num_targets = read_data("houses.txt")
# Displaying the first 10 examples from the dataset
print("Displaying the first 10 examples from the dataset: ")
for i in range(0, 10):
print(f"x = {x[i,:]}, d = {d[i]}")
# Normalizing the variables (centering and scaling)
print("Normalizing the variables ...")
x, mu, sigma = normalization(x)
dmax = d.max()
d = d / dmax
# Splitting the data into training, validation, and test subsets
x_train, d_train, x_val, d_val, x_test, d_test = split_data(x, d)
# ===================== Part 2: Training =====================
# Choosing the learning rate and number of iterations
alpha = 0.001
num_iters = 500
train_costs = np.zeros(num_iters)
val_costs = np.zeros(num_iters)
# Network dimensions
D_c = [num_vars, 5, 10, num_targets] # list containing the number of neurons for each layer
activation = [relu, sigmoid, linear] # list containing the activation functions for the hidden layers and the output layer
# Random initialization of the network weights
W = []
b = []
for i in range(len(D_c)-1):
W.append(2 * np.random.random((D_c[i+1], D_c[i])) - 1)
b.append(np.zeros((D_c[i+1],1)))
x_train = x_train.T # Data is presented as column vectors at the input of the network
d_train = d_train.T
x_val = x_val.T # Data is presented as column vectors at the input of the network
d_val = d_val.T
x_test = x_test.T # Data is presented as column vectors at the input of the network
d_test = d_test.T
for t in range(num_iters):
#############################################################################
# Forward pass: calculating predicted output y on validation data #
#############################################################################
a, h = forward_pass(x_val, W, b, activation)
y_val = h[-1] # Predicted output
###############################################################################
# Forward pass: calculating predicted output y on training data #
###############################################################################
a, h = forward_pass(x_train, W, b, activation)
y_train = h[-1] # Predicted output
###########################################
# Calculating the MSE loss function #
###########################################
train_costs[t] = calculate_mse_cost(y_train, d_train)
val_costs[t] = calculate_mse_cost(y_val, d_val)
####################################
# Backward pass: backpropagation #
####################################
delta_h = (y_train-d_train) # For the last layer
delta_W, delta_b = backward_pass(delta_h, a, h, W, activation)
#############################################
# Updating weights and biases #
#############################################
for i in range(len(b)-1,-1,-1):
b[i] -= alpha * delta_b[i]
W[i] -= alpha * delta_W[i]
print("Final cost on the training set: ", train_costs[-1])
print("Final cost on the validation set: ", val_costs[-1])
# Plotting the evolution of the cost function during backpropagation
plt.figure(0)
plt.title("Evolution of the cost function during backpropagation")
plt.plot(np.arange(train_costs.size), train_costs, label="Training")
plt.plot(np.arange(val_costs.size), val_costs, label="Validation")
plt.legend(loc="upper left")
plt.xlabel("Number of iterations")
plt.ylabel("Cost")
plt.show()
# ===================== Part 3: Evaluation on the test set =====================
#######################################################################
# Forward pass: calculating predicted output y on test data #
#######################################################################
a, h = forward_pass(x_test, W, b, activation)
y_test = h[-1] # Predicted output
cost = calculate_mse_cost(y_test, d_test)
print("Test set cost: ", cost)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment