From 4461aff372a159fdf8f39deb31a7e3883579a02c Mon Sep 17 00:00:00 2001 From: Emmanuel Dellandrea <emmanuel.dellandrea@ec-lyon.fr> Date: Tue, 21 Jan 2025 09:31:51 +0100 Subject: [PATCH] Create nn_regression-completed.py --- .../Session_2/nn_regression-completed.py | 346 ++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 Practical_sessions/Session_2/nn_regression-completed.py diff --git a/Practical_sessions/Session_2/nn_regression-completed.py b/Practical_sessions/Session_2/nn_regression-completed.py new file mode 100644 index 0000000..6f2a079 --- /dev/null +++ b/Practical_sessions/Session_2/nn_regression-completed.py @@ -0,0 +1,346 @@ + +import matplotlib.pyplot as plt +import numpy as np + +def read_data(file_name, delimiter=','): + """ Reads the file containing the data and returns the corresponding matrices + + Parameters + ---------- + file_name : name of the file containing the data + delimiter : character separating columns in the file ("," by default) + + Returns + ------- + x : data matrix of size [N, num_vars] + d : matrix containing the target variable values of size [N, num_targets] + N : number of elements + num_vars : number of predictor variables + num_targets : number of target variables + + """ + + data = np.loadtxt(file_name, delimiter=delimiter) + + num_targets = 1 + num_vars = data.shape[1] - num_targets + N = data.shape[0] + + x = data[:, :num_vars] + d = data[:, num_vars:].reshape(N,1) + + return x, d, N, num_vars, num_targets + +def normalization(x): + """ Normalizes the data by centering and scaling the predictor variables + + Parameters + ---------- + X : data matrix of size [N, num_vars] + + with N : number of elements and num_vars : number of predictor variables + + Returns + ------- + X_norm : centered-scaled data matrix of size [N, num_vars] + mu : mean of the variables of size [1, num_vars] + sigma : standard deviation of the variables of size [1, num_vars] + + """ + + mu = np.mean(x, 0) + sigma = np.std(x, 0) + x_norm = (x - mu) / sigma + + return x_norm, mu, sigma + +def split_data(x, d, val_prop=0.2, test_prop=0.2): + """ Splits the initial data into three distinct subsets for training, validation, and testing + + Parameters + ---------- + x : data matrix of size [N, num_vars] + d : matrix of target values [N, num_targets] + val_prop : proportion of validation data over the entire dataset (between 0 and 1) + test_prop : proportion of test data over the entire dataset (between 0 and 1) + + with N : number of elements, num_vars : number of predictor variables, num_targets : number of target variables + + Returns + ------- + x_train : training data matrix + d_train : training target values matrix + x_val : validation data matrix + d_val : validation target values matrix + x_test : test data matrix + d_test : test target values matrix + + """ + assert val_prop + test_prop < 1.0 + + N = x.shape[0] + indices = np.arange(N) + np.random.shuffle(indices) + num_val = int(N*val_prop) + num_test = int(N*test_prop) + num_train = N - num_val - num_test + + x = x[indices,:] + d = d[indices,:] + + x_train = x[:num_train,:] + d_train = d[:num_train,:] + + x_val = x[num_train:num_train+num_val,:] + d_val = d[num_train:num_train+num_val,:] + + x_test = x[N-num_test:,:] + d_test = d[N-num_test:,:] + + return x_train, d_train, x_val, d_val, x_test, d_test + +def calculate_mse_cost(y, d): + """ Calculates the value of the MSE (mean squared error) cost function + + Parameters + ---------- + y : matrix of predicted data + d : matrix of actual data + + Returns + ------- + cost : value corresponding to the MSE cost function (mean squared error) + + """ + + N = y.shape[1] + cost = np.square(y - d).sum() / 2 / N + + return cost + +def forward_pass(x, W, b, activation): + """ Performs a forward pass in the neural network + + Parameters + ---------- + x : input matrix, of size num_vars x N + W : list containing the weight matrices of the network + b : list containing the bias matrices of the network + activation : list containing the activation functions of the network layers + + with N : number of elements, num_vars : number of predictor variables + + Returns + ------- + a : list containing the input potentials of the network layers + h : list containing the outputs of the network layers + + """ + h = [x] + a = [] + for i in range(len(b)): + a.append( W[i].dot(h[i]) + b[i] ) + h.append( activation[i](a[i]) ) + + return a, h + +def backward_pass(delta_h, a, h, W, activation): + """ Performs a backward pass in the neural network (backpropagation) + + Parameters + ---------- + delta_h : matrix containing the gradient of the cost with respect to the output of the network + a : list containing the input potentials of the network layers + h : list containing the outputs of the network layers + W : list containing the weight matrices of the network + activation : list containing the activation functions of the network layers + + Returns + ------- + delta_W : list containing the gradient matrices of the network layer weights + delta_b : list containing the gradient matrices of the network layer biases + + """ + + delta_b = [] + delta_W = [] + + for i in range(len(W)-1,-1,-1): + + delta_a = delta_h * activation[i](a[i], True) + + delta_b.append( delta_a.mean(1).reshape(-1,1) ) + delta_W.append( delta_a.dot(h[i].T) ) + + delta_h = (W[i].T).dot(delta_a) + + delta_b = delta_b[::-1] + delta_W = delta_W[::-1] + + return delta_W, delta_b + +def sigmoid(z, deriv=False): + """ Calculates the value of the sigmoid function or its derivative applied to z + + Parameters + ---------- + z : can be a scalar or a matrix + deriv : boolean. If False returns the value of the sigmoid function, if True returns its derivative + + Returns + ------- + s : value of the sigmoid function applied to z or its derivative. Same dimension as z + + """ + + s = 1 / (1 + np.exp(-z)) + if deriv: + return s * (1 - s) + else : + return s + +def linear(z, deriv=False): + """ Calculates the value of the linear function or its derivative applied to z + + Parameters + ---------- + z : can be a scalar or a matrix + deriv : boolean. If False returns the value of the linear function, if True returns its derivative + + Returns + ------- + s : value of the linear function applied to z or its derivative. Same dimension as z + + """ + if deriv: + return 1 + else : + return z + +def relu(z, deriv=False): + """ Calculates the value of the relu function or its derivative applied to z + + Parameters + ---------- + z : can be a scalar or a matrix + deriv : boolean. If False returns the value of the relu function, if True returns its derivative + + Returns + ------- + s : value of the relu function applied to z or its derivative. Same dimension as z + + """ + + r = np.zeros(z.shape) + if deriv: + pos = np.where(z>=0) + r[pos] = 1.0 + return r + else : + return np.maximum(r,z) + + +# ===================== Part 1: Data Reading and Normalization ===================== +print("Reading data ...") + +x, d, N, num_vars, num_targets = read_data("food_truck.txt") +# x, d, N, num_vars, num_targets = read_data("houses.txt") + +# Displaying the first 10 examples from the dataset +print("Displaying the first 10 examples from the dataset: ") +for i in range(0, 10): + print(f"x = {x[i,:]}, d = {d[i]}") + +# Normalizing the variables (centering and scaling) +print("Normalizing the variables ...") +x, mu, sigma = normalization(x) +dmax = d.max() +d = d / dmax + +# Splitting the data into training, validation, and test subsets +x_train, d_train, x_val, d_val, x_test, d_test = split_data(x, d) + +# ===================== Part 2: Training ===================== + +# Choosing the learning rate and number of iterations +alpha = 0.001 +num_iters = 500 +train_costs = np.zeros(num_iters) +val_costs = np.zeros(num_iters) + +# Network dimensions +D_c = [num_vars, 5, 10, num_targets] # list containing the number of neurons for each layer +activation = [relu, sigmoid, linear] # list containing the activation functions for the hidden layers and the output layer + +# Random initialization of the network weights +W = [] +b = [] +for i in range(len(D_c)-1): + W.append(2 * np.random.random((D_c[i+1], D_c[i])) - 1) + b.append(np.zeros((D_c[i+1],1))) + +x_train = x_train.T # Data is presented as column vectors at the input of the network +d_train = d_train.T + +x_val = x_val.T # Data is presented as column vectors at the input of the network +d_val = d_val.T + +x_test = x_test.T # Data is presented as column vectors at the input of the network +d_test = d_test.T + +for t in range(num_iters): + + ############################################################################# + # Forward pass: calculating predicted output y on validation data # + ############################################################################# + a, h = forward_pass(x_val, W, b, activation) + y_val = h[-1] # Predicted output + + ############################################################################### + # Forward pass: calculating predicted output y on training data # + ############################################################################### + a, h = forward_pass(x_train, W, b, activation) + y_train = h[-1] # Predicted output + + ########################################### + # Calculating the MSE loss function # + ########################################### + train_costs[t] = calculate_mse_cost(y_train, d_train) + val_costs[t] = calculate_mse_cost(y_val, d_val) + + #################################### + # Backward pass: backpropagation # + #################################### + delta_h = (y_train-d_train) # For the last layer + delta_W, delta_b = backward_pass(delta_h, a, h, W, activation) + + ############################################# + # Updating weights and biases # + ############################################# + for i in range(len(b)-1,-1,-1): + b[i] -= alpha * delta_b[i] + W[i] -= alpha * delta_W[i] + +print("Final cost on the training set: ", train_costs[-1]) +print("Final cost on the validation set: ", val_costs[-1]) + +# Plotting the evolution of the cost function during backpropagation +plt.figure(0) +plt.title("Evolution of the cost function during backpropagation") +plt.plot(np.arange(train_costs.size), train_costs, label="Training") +plt.plot(np.arange(val_costs.size), val_costs, label="Validation") +plt.legend(loc="upper left") +plt.xlabel("Number of iterations") +plt.ylabel("Cost") +plt.show() + +# ===================== Part 3: Evaluation on the test set ===================== + +####################################################################### +# Forward pass: calculating predicted output y on test data # +####################################################################### +a, h = forward_pass(x_test, W, b, activation) +y_test = h[-1] # Predicted output + +cost = calculate_mse_cost(y_test, d_test) +print("Test set cost: ", cost) -- GitLab