Image classification
Corentin MASSALA
Prepare the CIFAR dataset
All the code can be found on the python file read_cifar.py
2-
def read_cifar_batch(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') )
3-
def read_cifar(path):
data = []
labels = []
#Add the 5 batches
for i in range(1,6):
data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}')
data.append(data_temp)
labels.append(labels_temps)
#Add the test batches
data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch')
data.append(data_temp)
labels.append(labels_temps)
#Concatenate all the batches to create a big one
data = np.concatenate(data, axis = 0)
labels = np.concatenate(labels, axis = 0)
return(data, labels)
4-
To split the dataset we use the split function from the sklearn library
def split_dataset(data, labels, split):
X_train, X_test, y_train, y_test = train_test_split(
data, labels, test_size=(1 - split), random_state=0)
return(X_train, X_test, y_train, y_test)
K-nearest neighbors
All the code can be found on the python file knn.py
1-
def distance_matrix(matrix1, matrix2):
#X_test then X_train in this order
sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2
sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2
dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication)
dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product
return dists
2-
def knn_predict(dists, labels_train, k):
output = []
# Loop on all the images_test
for i in range(len(dists)):
# Innitialize table to store the neighbors
res = [0] * 10
# Get the closest neighbors
labels_close = np.argsort(dists[i])[:k]
for label in labels_close:
#add a label to the table of result
res[labels_train[label]] += 1
# Get the class with the maximum neighbors
label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
output.append(label_temp)
return(np.array(output))
3-
def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
dist = distance_matrix(data_test, data_train)
result_test = knn_predict(dist, labels_train, k)
#accuracy
N = labels_tests.shape[0]
accuracy = (labels_tests == result_test).sum() / N
return(accuracy)
4-
def bench_knn():
k_indices = [i for i in range(20) if i % 2 != 0]
accuracies = []
# Load data
data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
# Loop on the k_indices to get all the accuracies
for k in k_indices:
accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k)
accuracies.append(accuracy)
# Save and show the graph of accuracies
fig = plt.figure()
plt.plot(k_indices, accuracies)
plt.title("Accuracy as function of k")
plt.show()
plt.savefig('image-classification/results/knn_batch_1.png')
plt.close(fig)
Here is the graph of the accuracy vs K for the whole Cifar dataset with a split factor of 0.9:
Here we can conclude that the best K is 9, (if we don't use k = 1) with a performace of 35% of accuracy.
Artificial Neural Network
Math Theory
Here are all the answer for the theory of the backpropagation.
1-
2-
3-
4-
5-
6-
7-
8-
9-
Coding part
All the code can be found on the file mlp.py
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
N_out = len(targets) #number of training examples
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = sigmoid(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
print(f'loss: {loss}')
# print('shape a1', a1.shape)
# print('shape w1', w1.shape)
# print('shape b1', b1.shape)
# print('shape a2', a2.shape)
# print('shape w2', w2.shape)
# print('shape b2', b2.shape)
# Backpropagation
delta_a2 = 2 / N_out * (a2 - targets)
# print('shape delta_a2', delta_a2.shape)
delta_z2 = delta_a2 * (a2 * (1 - a2))
# print('shape delta_z2', delta_z2.shape)
delta_w2 = np.dot(a1.T, delta_z2)
# print('shape delta_w2', delta_w2.shape)
delta_b2 = delta_z2
delta_a1 = np.dot(delta_z2, w2.T)
# print('shape delta_a1', delta_a1.shape)
delta_z1 = delta_a1 * (a1 * (1- a1))
# print('shape delta_z1', delta_z1.shape)
delta_w1 = np.dot(a0.T, delta_z1)
# print('shape delta_w1', delta_w2.shape)
delta_b1 = delta_z1
# Update weights and biases
w2 -= learning_rate * delta_w2
b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
w1 -= learning_rate * delta_w1
b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
return w1, b1, w2, b2, loss
11-
def one_hot(labels):
#num_classes = np.max(labels) + 1 on va le hardcoder ici
num_classes = 10
one_hot_matrix = np.eye(num_classes)[labels]
return one_hot_matrix
12-
The cross_entropy_loss is :
def cross_entropy_loss(y_pred, y_true):
loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred)
return loss
The new learning function is :
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
N_out = len(labels_train) #number of training examples
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (cross-entropy loss)
y_true_one_hot = one_hot(labels_train)
loss = cross_entropy_loss(predictions, y_true_one_hot)
# Backpropagation
delta_z2 = (a2 - y_true_one_hot)
delta_w2 = np.dot(a1.T, delta_z2) / N_out # We divide by the sample size to have an average on the error and avoid big gradient jumps
delta_b2 = delta_z2 / N_out
delta_a1 = np.dot(delta_z2, w2.T)
delta_z1 = delta_a1 * (a1 * (1 - a1))
delta_w1 = np.dot(a0.T, delta_z1) / N_out
delta_b1 = delta_z1 / N_out
# Update weights and biases
w2 -= learning_rate * delta_w2
b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
w1 -= learning_rate * delta_w1
b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
return w1, b1, w2, b2, loss
13-
def forward(w1, b1, w2, b2, data):
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
return(predictions)
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
train_accuracies = []
for epoch in range(num_epoch):
w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
# Compute accuracy
predictions = forward(w1, b1, w2, b2, data_train)
predicted_labels = np.argmax(predictions, axis=1)
# print(predictions.shape)
# print(predicted_labels.shape)
# print(labels_train.shape)
accuracy = np.mean(predicted_labels == labels_train)
train_accuracies.append(accuracy)
print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
return w1, b1, w2, b2, train_accuracies
14-
# Compute accuracy
predictions = forward(w1, b1, w2, b2, data_test)
predicted_labels = np.argmax(predictions, axis=1)
print(predicted_labels)
test_accuracy = np.mean(predicted_labels == labels_test)
print(f'Train Accuracy: {test_accuracy:.2f}')
return test_accuracy
15-
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch):
d_in = data_train.shape[1]
d_out = 10 #we can hard code it here or len(np.unique(label_train))
#Random initialisation of weights
w1 = np.random.randn(d_in, d_h)
b1 = np.random.randn(1, d_h)
w2 = np.random.randn(d_h, d_out)
b2 = np.random.randn(1, d_out)
# Train MLP
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
# Test MLP
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return train_accuracies, test_accuracy