" #This function takes as parameter the path of a single batch as a string, and returns a matrix data of size (batch_size x data_size) and a a vector labels of size batch_size.\n",
" \n",
" data_dict = unpickle(batch_path)\n",
" data = data_dict[b'data']\n",
" labels = data_dict[b'labels']\n",
" data = data.reshape(len(data),len(data[0]))\n",
" data = data.astype('f') #data must be np.float32 array.\n",
" labels = np.array(labels, dtype='int64') #labels must be np.int64 array.\n",
" #This function takes as parameter the path of the directory containing the six batches and returns a matrix data a vector lables of size batch_size\n",
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" #This function splits the dataset into a training set and a test set\n",
" #It takes as parameter data and labels, two arrays that have the same size in the first dimension. And a split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.\n",
" #split -- the split factor\n",
" #data -- the whole data (all the batches including the test batch)\n",
" #labels -- the labels associated to the data\n",
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" # This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.\n",
#This function takes as parameter the path of a single batch as a string, and returns a matrix data of size (batch_size x data_size) and a a vector labels of size batch_size.
data_dict = unpickle(batch_path)
data = data_dict[b'data']
labels = data_dict[b'labels']
data = data.reshape(len(data),len(data[0]))
data = data.astype('f') #data must be np.float32 array.
labels = np.array(labels, dtype='int64') #labels must be np.int64 array.
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
import sys
(array([[ 59., 43., 50., ..., 140., 84., 72.],
[154., 126., 105., ..., 139., 142., 144.],
[255., 253., 253., ..., 83., 83., 84.],
...,
[ 20., 19., 15., ..., 50., 53., 47.],
[ 25., 15., 23., ..., 80., 81., 80.],
[ 73., 98., 99., ..., 94., 58., 26.]]),
array([6, 9, 9, ..., 5, 1, 7]))
%% Cell type:code id: tags:
```
def split_dataset(data,labels,split):
#This function splits the dataset into a training set and a test set
#It takes as parameter data and labels, two arrays that have the same size in the first dimension. And a split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.
#split -- the split factor
#data -- the whole data (all the batches including the test batch)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
import sys
(array([[ 39., 28., 55., ..., 136., 108., 101.],
[122., 126., 130., ..., 88., 85., 82.],
[ 55., 53., 53., ..., 171., 170., 173.],
...,
[159., 158., 159., ..., 192., 193., 205.],
[198., 195., 185., ..., 75., 74., 77.],
[182., 195., 194., ..., 79., 80., 77.]]),
array([1., 7., 3., ..., 8., 9., 3.]),
array([[166., 165., 162., ..., 140., 144., 149.],
[155., 157., 159., ..., 125., 127., 128.],
[144., 139., 139., ..., 119., 123., 122.],
...,
[138., 240., 241., ..., 182., 191., 131.],
[245., 241., 240., ..., 115., 127., 129.],
[224., 222., 222., ..., 111., 110., 112.]]),
array([0., 5., 1., ..., 3., 3., 8.]))
%% Cell type:code id: tags:
```
data_train.shape == (54000, 3072)
```
%% Output
True
%% Cell type:markdown id: tags:
# **KNN**
%% Cell type:code id: tags:
```
import numpy as np
import matplotlib.pyplot as plt
import math
import random
```
%% Cell type:code id: tags:
```
def distance_matrix(Y , X):
#This function takes as parameters two matrices X and Y
a_2=(Y**2).sum(axis=1)
a_2=a_2.reshape(-1,1)
b_2=(X**2).sum(axis=1)
b_2=b_2.reshape(1,-1)
dist = np.sqrt(a_2 + b_2 -2*Y.dot(X.T))
#dist is the euclidian distance between two matrices
return dist
```
%% Cell type:code id: tags:
```
def knn_predict(dists, labels_train, k):
#This function takes as parameters: dists (from above), labels_train, and k the number of neighbors
# This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.