import matplotlib.pyplot as plt import numpy as np def load_data(file_name, delimiter=','): """ Reads the file containing the data and returns the matrices corresponding to it Parameters ---------- file_name : name of the file containing the data delimiter : character that separates the columns in the file (default is ",") Returns ------- x : data matrix of dimension [N, nb_var] d : matrix containing the target variable values of dimension [N, nb_target] N : number of elements nb_var : number of predictor variables nb_target : number of target variables """ data = np.loadtxt(file_name, delimiter=delimiter) nb_target = 1 nb_var = data.shape[1] - nb_target N = data.shape[0] x = data[:, :nb_var] d = data[:, nb_var:].reshape(N,1) return x, d, N, nb_var, nb_target def normalization(x): """ Normalizes the data by centering and scaling the predictor variables Parameters ---------- X : data matrix of dimension [N, nb_var] with N : number of elements and nb_var : number of predictor variables Returns ------- X_norm : normalized data matrix of dimension [N, nb_var] mu : mean of the variables of dimension [1, nb_var] sigma : standard deviation of the variables of dimension [1, nb_var] """ mu = np.mean(x, 0) sigma = np.std(x, 0) x_norm = (x - mu) / sigma return x_norm, mu, sigma def split_data(x,d,prop_val=0.2, prop_test=0.2): """ Splits the original data into three distinct subsets: training, validation, and test Parameters ---------- x : data matrix of dimension [N, nb_var] d : target values matrix [N, nb_target] prop_val : proportion of validation data in the total data (between 0 and 1) prop_test : proportion of test data in the total data (between 0 and 1) with N : number of elements, nb_var : number of predictor variables, nb_target : number of target variables Returns ------- x_train : training data matrix d_train : target values matrix for training x_val : validation data matrix d_val : target values matrix for validation x_test : test data matrix d_test : target values matrix for test """ assert prop_val + prop_test < 1.0 N = x.shape[0] indices = np.arange(N) np.random.shuffle(indices) nb_val = int(N*prop_val) nb_test = int(N*prop_test) nb_train = N - nb_val - nb_test x = x[indices,:] d = d[indices,:] x_train = x[:nb_train,:] d_train = d[:nb_train,:] x_val = x[nb_train:nb_train+nb_val,:] d_val = d[nb_train:nb_train+nb_val,:] x_test = x[N-nb_test:,:] d_test = d[N-nb_test:,:] return x_train, d_train, x_val, d_val, x_test, d_test def compute_cross_entropy_cost(y, d): """ Computes the value of the cross-entropy cost function Parameters ---------- y : predicted data matrix (softmax) d : actual data matrix encoded by 1 Returns ------- cost : value corresponding to the cost function """ N = y.shape[1] cost = - np.sum(d*np.log(y)) / N return cost def forward_pass(x, W, b, activation): """ Performs a forward pass in the neural network Parameters ---------- x : input matrix, dimension nb_var x N W : list containing the weight matrices of the network b : list containing the bias matrices of the network activation : list containing the activation functions of the network layers with N : number of elements, nb_var : number of predictor variables Returns ------- a : list containing the input potentials of the network layers h : list containing the outputs of the network layers """ h = [x] a = [] for i in range(len(b)): a.append( W[i].dot(h[i]) + b[i] ) h.append( activation[i](a[i]) ) return a, h def backward_pass(delta_h, a, h, W, activation): """ Performs a backward pass in the neural network (backpropagation) Parameters ---------- delta_h : matrix containing the gradient of the cost with respect to the network output a : list containing the input potentials of the network layers h : list containing the outputs of the network layers W : list containing the weight matrices of the network activation : list containing the activation functions of the network layers Returns ------- delta_W : list containing the gradient matrices of the network's weight layers delta_b : list containing the gradient matrices of the network's bias layers """ delta_b = [] delta_W = [] for i in range(len(W)-1,-1,-1): delta_a = delta_h * activation[i](a[i], True) delta_b.append( delta_a.mean(1).reshape(-1,1) ) delta_W.append( delta_a.dot(h[i].T) ) delta_h = (W[i].T).dot(delta_a) delta_b = delta_b[::-1] delta_W = delta_W[::-1] return delta_W, delta_b def sigmoid(z, deriv=False): """ Computes the value of the sigmoid function or its derivative applied to z Parameters ---------- z : can be a scalar or a matrix deriv : boolean. If False returns the value of the sigmoid function, if True returns its derivative Returns ------- s : value of the sigmoid function applied to z or its derivative. Same dimension as z """ s = 1 / (1 + np.exp(-z)) if deriv: return s * (1 - s) else : return s def linear(z, deriv=False): """ Computes the value of the linear function or its derivative applied to z Parameters ---------- z : can be a scalar or a matrix deriv : boolean. If False returns the value of the linear function, if True returns its derivative Returns ------- s : value of the linear function applied to z or its derivative. Same dimension as z """ if deriv: return 1 else : return z def relu(z, deriv=False): """ Computes the value of the ReLU function or its derivative applied to z Parameters ---------- z : can be a scalar or a matrix deriv : boolean. If False returns the value of the ReLU function, if True returns its derivative Returns ------- s : value of the ReLU function applied to z or its derivative. Same dimension as z """ r = np.zeros(z.shape) if deriv: pos = np.where(z>=0) r[pos] = 1.0 return r else : return np.maximum(r,z) def softmax(z, deriv=False): """ Computes the value of the softmax function or its derivative applied to z Parameters ---------- z : data matrix deriv : boolean. If False returns the value of the softmax function, if True returns its derivative Returns ------- s : value of the softmax function applied to z or its derivative. Same dimension as z """ if deriv: return 1 else : return np.exp(z) / np.sum(np.exp(z),axis=0) def one_hot_encoding(d): """ Performs a one-hot encoding: for the output neurons of the network, only 1 will have the value 1, all others will be 0 Parameters ---------- d : matrix containing the values of the target variable (class of the elements) of dimension [N, 1] with N : number of elements Returns ------- e : encoded data matrix of dimension [N, nb_classes] with N : number of elements and nb_classes the number of classes (maximum+1) of the values in d """ d = d.astype(int).flatten() N = d.shape[0] nb_classes = d.max() + 1 e = np.zeros((N,nb_classes)) e[range(N),d] = 1 return e def classification_accuracy(y,d): """ Computes the classification accuracy (proportion of correctly classified elements) Parameters ---------- y : network outputs matrix of dimension [nb_output_neurons x N] d : true values matrix [nb_output_neurons x N] with N : number of elements and nb_output_neurons : number of neurons in the output layer Returns ------- t : classification accuracy """ ind_y = np.argmax(y,axis=0) ind_d = np.argmax(d,axis=0) t = np.mean(ind_y == ind_d) return t # ===================== Part 1: Reading and Normalizing the Data ===================== print("Reading the data ...") x, d, N, nb_var, nb_target = load_data("iris.txt") # x, d, N, nb_var, nb_target = load_data("scores.txt") # Display the first 10 examples of the dataset print("Displaying the first 10 examples of the dataset: ") for i in range(0, 10): print(f"x = {x[i,:]}, d = {d[i]}") # Normalization of the variables (centering and scaling) print("Normalizing the variables ...") x, mu, sigma = normalization(x) d = one_hot_encoding(d) # Split the data into training, validation, and test subsets x_train, d_train, x_val, d_val, x_test, d_test = split_data(x,d) # ===================== Part 2: Training ===================== # Learning rate and number of iterations alpha = 0.0001 nb_iters = 10000 training_costs = np.zeros(nb_iters) validation_costs = np.zeros(nb_iters) # Network dimensions D_c = [nb_var, 15, 15, d_train.shape[1]] # list containing the number of neurons for each layer activation = [relu, relu, softmax] # list containing the activation functions for hidden layers and output layer # Random initialization of network weights W = [] b = [] for i in range(len(D_c)-1): W.append(2 * np.random.random((D_c[i+1], D_c[i])) - 1) b.append(np.zeros((D_c[i+1],1))) x_train = x_train.T # Data is presented as column vectors at the network input d_train = d_train.T x_val = x_val.T # Data is presented as column vectors at the network input d_val = d_val.T x_test = x_test.T # Data is presented as column vectors at the network input d_test = d_test.T for t in range(nb_iters): ############################################################################# # Forward pass: calculate predicted output y on validation data # ############################################################################# a, h = forward_pass(x_val, W, b, activation) y_val = h[-1] # Predicted output ############################################################################### # Forward pass: calculate predicted output y on training data # ############################################################################### a, h = forward_pass(x_train, W, b, activation) y_train = h[-1] # Predicted output ########################################### # Compute Mean Squared Error loss function # ########################################### training_costs[t] = compute_cross_entropy_cost(y_train,d_train) validation_costs[t] = compute_cross_entropy_cost(y_val,d_val) #################################### # Backward pass: backpropagation # #################################### delta_h = (y_train-d_train) # For the last layer delta_W, delta_b = backward_pass(delta_h, a, h, W, activation) ############################################# # Update weights and biases ##### # ############################################# for i in range(len(b)-1,-1,-1): b[i] -= alpha * delta_b[i] W[i] -= alpha * delta_W[i] print("Final cost on the training set: ", training_costs[-1]) print("Classification accuracy on the training set: ", classification_accuracy(y_train, d_train)) print("Final cost on the validation set: ", validation_costs[-1]) print("Classification accuracy on the validation set: ", classification_accuracy(y_val, d_val)) # Display cost function evolution during backpropagation plt.figure(0) plt.title("Cost function evolution during backpropagation") plt.plot(np.arange(training_costs.size), training_costs, label="Training") plt.plot(np.arange(validation_costs.size), validation_costs, label="Validation") plt.legend(loc="upper left") plt.xlabel("Number of iterations") plt.ylabel("Cost") plt.show() # ===================== Part 3: Evaluation on the test set ===================== ####################################################################### # Forward pass: calculate predicted output y on test data # ####################################################################### a, h = forward_pass(x_test, W, b, activation) y_test = h[-1] # Predicted output cost = compute_cross_entropy_cost(y_test,d_test) print("Cost on the test set: ", cost) print("Classification accuracy on the test set: ", classification_accuracy(y_test, d_test))