Image classification
Corentin MASSALA
Prepare the CIFAR dataset
All the code can be found on the python file read_cifar.py
2-
def read_cifar_batch(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') )
3-
def read_cifar(path):
data = []
labels = []
#Add the 5 batches
for i in range(1,6):
data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}')
data.append(data_temp)
labels.append(labels_temps)
#Add the test batches
data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch')
data.append(data_temp)
labels.append(labels_temps)
#Concatenate all the batches to create a big one
data = np.concatenate(data, axis = 0)
labels = np.concatenate(labels, axis = 0)
return(data, labels)
4-
To split the dataset we use the split function from the sklearn library
def split_dataset(data, labels, split):
X_train, X_test, y_train, y_test = train_test_split(
data, labels, test_size=(1 - split), random_state=0)
return(X_train, X_test, y_train, y_test)
K-nearest neighbors
All the code can be found on the python file knn.py
1-
def distance_matrix(matrix1, matrix2):
#X_test then X_train in this order
sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2
sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2
dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication)
dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product
return dists
2-
def knn_predict(dists, labels_train, k):
output = []
# Loop on all the images_test
for i in range(len(dists)):
# Innitialize table to store the neighbors
res = [0] * 10
# Get the closest neighbors
labels_close = np.argsort(dists[i])[:k]
for label in labels_close:
#add a label to the table of result
res[labels_train[label]] += 1
# Get the class with the maximum neighbors
label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
output.append(label_temp)
return(np.array(output))
3-
def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
dist = distance_matrix(data_test, data_train)
result_test = knn_predict(dist, labels_train, k)
#accuracy
N = labels_tests.shape[0]
accuracy = (labels_tests == result_test).sum() / N
return(accuracy)
4-
def bench_knn():
k_indices = [i for i in range(20) if i % 2 != 0]
accuracies = []
# Load data
data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
# Loop on the k_indices to get all the accuracies
for k in k_indices:
accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k)
accuracies.append(accuracy)
# Save and show the graph of accuracies
fig = plt.figure()
plt.plot(k_indices, accuracies)
plt.title("Accuracy as function of k")
plt.show()
plt.savefig('image-classification/results/knn_batch_1.png')
plt.close(fig)
Here is the graph of the accuracy vs K for the whole Cifar dataset with a split factor of 0.9:
Here we can conclude that the best K is 9, (if we don't use k = 1) with a performace of 35% of accuracy.
Artificial Neural Network
Math Theory
Here are all the answer for the theory of the backpropagation.
1-
2-
3-
4-
5-
6-
7-
8-
9-
Coding part
All the code can be found on the file mlp.py
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
N_out = len(targets) #number of training examples
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = sigmoid(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
print(f'loss: {loss}')
# print('shape a1', a1.shape)
# print('shape w1', w1.shape)
# print('shape b1', b1.shape)
# print('shape a2', a2.shape)
# print('shape w2', w2.shape)
# print('shape b2', b2.shape)
# Backpropagation
delta_a2 = 2 / N_out * (a2 - targets)
# print('shape delta_a2', delta_a2.shape)
delta_z2 = delta_a2 * (a2 * (1 - a2))
# print('shape delta_z2', delta_z2.shape)
delta_w2 = np.dot(a1.T, delta_z2)
# print('shape delta_w2', delta_w2.shape)
delta_b2 = delta_z2
delta_a1 = np.dot(delta_z2, w2.T)
# print('shape delta_a1', delta_a1.shape)
delta_z1 = delta_a1 * (a1 * (1- a1))
# print('shape delta_z1', delta_z1.shape)
delta_w1 = np.dot(a0.T, delta_z1)
# print('shape delta_w1', delta_w2.shape)
delta_b1 = delta_z1
# Update weights and biases
w2 -= learning_rate * delta_w2
b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
w1 -= learning_rate * delta_w1
b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
return w1, b1, w2, b2, loss
11-
def one_hot(labels):
#num_classes = np.max(labels) + 1 on va le hardcoder ici
num_classes = 10
one_hot_matrix = np.eye(num_classes)[labels]
return one_hot_matrix
12-
The cross_entropy_loss is :
def cross_entropy_loss(y_pred, y_true):
loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred)
return loss
The new learning function is :
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
N_out = len(labels_train) #number of training examples
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (cross-entropy loss)
y_true_one_hot = one_hot(labels_train)
loss = cross_entropy_loss(predictions, y_true_one_hot)
# Backpropagation
delta_z2 = (a2 - y_true_one_hot)
delta_w2 = np.dot(a1.T, delta_z2) / N_out # We divide by the sample size to have an average on the error and avoid big gradient jumps
delta_b2 = delta_z2 / N_out
delta_a1 = np.dot(delta_z2, w2.T)
delta_z1 = delta_a1 * (a1 * (1 - a1))
delta_w1 = np.dot(a0.T, delta_z1) / N_out
delta_b1 = delta_z1 / N_out
# Update weights and biases
w2 -= learning_rate * delta_w2
b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
w1 -= learning_rate * delta_w1
b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
return w1, b1, w2, b2, loss
13-
def forward(w1, b1, w2, b2, data):
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
return(predictions)
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
train_accuracies = []
for epoch in range(num_epoch):
w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
# Compute accuracy
predictions = forward(w1, b1, w2, b2, data_train)
predicted_labels = np.argmax(predictions, axis=1)
# print(predictions.shape)
# print(predicted_labels.shape)
# print(labels_train.shape)
accuracy = np.mean(predicted_labels == labels_train)
train_accuracies.append(accuracy)
print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
return w1, b1, w2, b2, train_accuracies
14-
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
# Compute accuracy
predictions = forward(w1, b1, w2, b2, data_test)
predicted_labels = np.argmax(predictions, axis=1)
print(predicted_labels)
test_accuracy = np.mean(predicted_labels == labels_test)
print(f'Train Accuracy: {test_accuracy:.2f}')
return test_accuracy
15-
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch):
d_in = data_train.shape[1]
d_out = 10 #we can hard code it here or len(np.unique(label_train))
#Random initialisation of weights (Xavier initialisation)
w1 = np.random.randn(d_in, d_h) / np.sqrt(d_in)
b1 = np.zeros((1, d_h))
w2 = np.random.randn(d_h, d_out) / np.sqrt(d_h)
b2 = np.zeros((1, d_out))
# Train MLP
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
# Test MLP
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return train_accuracies, test_accuracy
16-
def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
# Run MLP training
train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
# Plot and save the learning accuracy graph
plt.figure(figsize=(8, 6))
epochs = np.arange(1, num_epoch + 1)
plt.plot(epochs, train_accuracies, marker='x', color='b', label='Train Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('MLP Train Accuracy')
plt.legend()
plt.grid(True)
plt.savefig('image-classification/results/mlp.png')
plt.show()
The accuracy is increasing with each epochs without converging, we could increase the learning rate to speed up the training and inscrease the numbers of epoch to see what would be our maximum accuracy. For 100 epochs and a learning rate of 0.1 we got a test accuracy of 0.13. For 300 epochs and a learning rate of 0.1 we increased the training accuracy to 0.15991 and we got a test accuracy of 0.155