Skip to content
Snippets Groups Projects
Commit 57a2287f authored by Saidi Aya's avatar Saidi Aya
Browse files

Create py_test.ipynb

parent a099fa3b
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# **Read_cifar**
%% Cell type:code id: tags:
```
import numpy as np
from six.moves import cPickle as pickle
import platform
import os
import random
import math
```
%% Cell type:code id: tags:
```
def unpickle(file):
'''loads the data dictionnary.'''
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
```
%% Cell type:code id: tags:
```
def read_cifar_batch (batch_path):
#This function takes as parameter the path of a single batch as a string, and returns a matrix data of size (batch_size x data_size) and a a vector labels of size batch_size.
data_dict = unpickle(batch_path)
data = data_dict[b'data']
labels = data_dict[b'labels']
data = data.reshape(len(data),len(data[0]))
data = data.astype('f') #data must be np.float32 array.
labels = np.array(labels, dtype='int64') #labels must be np.int64 array.
return data, labels
```
%% Cell type:code id: tags:
```
#test read_cifar_batch
read_cifar_batch ('/content/drive/MyDrive/cifar10/data_batch_1')
```
%% Output
(array([[ 59., 43., 50., ..., 140., 84., 72.],
[154., 126., 105., ..., 139., 142., 144.],
[255., 253., 253., ..., 83., 83., 84.],
...,
[ 71., 60., 74., ..., 68., 69., 68.],
[250., 254., 211., ..., 215., 255., 254.],
[ 62., 61., 60., ..., 130., 130., 131.]], dtype=float32),
array([6, 9, 9, ..., 1, 1, 5]))
%% Cell type:code id: tags:
```
def read_cifar(fo):
#This function takes as parameter the path of the directory containing the six batches and returns a matrix data a vector lables of size batch_size
files=['/data_batch_1','/data_batch_2','/data_batch_3','/data_batch_4','/data_batch_5','/test_batch']
A=10000
N=60000
P=3072
X=np.empty((N,P),dtype=np.float)
Y=np.empty(A,dtype=np.int64)
for i in range(len(files)):
fichier=fo+files[i]
data_dict=unpickle(fichier)
M=data_dict[b'data']
L=data_dict[b'labels']
L=np.array(L)
X=np.vstack((X,M))
Y=np.hstack((Y,L))
X=X[N:2*N,]
Y=Y[A:,]
return X,Y
```
%% Cell type:code id: tags:
```
#test read_cifar
read_cifar("/content/drive/MyDrive/cifar10")
```
%% Output
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
import sys
(array([[ 59., 43., 50., ..., 140., 84., 72.],
[154., 126., 105., ..., 139., 142., 144.],
[255., 253., 253., ..., 83., 83., 84.],
...,
[ 20., 19., 15., ..., 50., 53., 47.],
[ 25., 15., 23., ..., 80., 81., 80.],
[ 73., 98., 99., ..., 94., 58., 26.]]),
array([6, 9, 9, ..., 5, 1, 7]))
%% Cell type:code id: tags:
```
def split_dataset(data,labels,split):
#This function splits the dataset into a training set and a test set
#It takes as parameter data and labels, two arrays that have the same size in the first dimension. And a split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.
#split -- the split factor
#data -- the whole data (all the batches including the test batch)
#labels -- the labels associated to the data
data_train=[]
labels=labels.reshape(data.shape[0],1)
# Stack our Data and labels
con = np.hstack((data, labels))
k=int(split*con.shape[0])
# Shuffle all our Data
np.random.shuffle(con)
# Train
data_train=con[:k,:-1]
labels_train=con[:k,-1]
# Test
data_test=con[k:,:-1]
labels_test=con[k:,-1]
return data_train,labels_train,data_test,labels_test
```
%% Cell type:code id: tags:
```
#test split_dataset
data,labels=read_cifar("/content/drive/MyDrive/cifar10/")
split_dataset(data,labels,0.8)
```
%% Output
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
import sys
(array([[ 39., 28., 55., ..., 136., 108., 101.],
[122., 126., 130., ..., 88., 85., 82.],
[ 55., 53., 53., ..., 171., 170., 173.],
...,
[159., 158., 159., ..., 192., 193., 205.],
[198., 195., 185., ..., 75., 74., 77.],
[182., 195., 194., ..., 79., 80., 77.]]),
array([1., 7., 3., ..., 8., 9., 3.]),
array([[166., 165., 162., ..., 140., 144., 149.],
[155., 157., 159., ..., 125., 127., 128.],
[144., 139., 139., ..., 119., 123., 122.],
...,
[138., 240., 241., ..., 182., 191., 131.],
[245., 241., 240., ..., 115., 127., 129.],
[224., 222., 222., ..., 111., 110., 112.]]),
array([0., 5., 1., ..., 3., 3., 8.]))
%% Cell type:code id: tags:
```
data_train.shape == (54000, 3072)
```
%% Output
True
%% Cell type:markdown id: tags:
# **KNN**
%% Cell type:code id: tags:
```
import numpy as np
import matplotlib.pyplot as plt
import math
import random
```
%% Cell type:code id: tags:
```
def distance_matrix(Y , X):
#This function takes as parameters two matrices X and Y
a_2=(Y**2).sum(axis=1)
a_2=a_2.reshape(-1,1)
b_2=(X**2).sum(axis=1)
b_2=b_2.reshape(1,-1)
dist = np.sqrt(a_2 + b_2 -2*Y.dot(X.T))
#dist is the euclidian distance between two matrices
return dist
```
%% Cell type:code id: tags:
```
def knn_predict(dists, labels_train, k):
#This function takes as parameters: dists (from above), labels_train, and k the number of neighbors
labels_pred=np.zeros(labels_train.shape[0])
for i in range(0,dists.shape[0]):
# Find index of k smallest distances
index_smallest_distance = np.argsort(dists[i,:])[0:k+1]
# Index the labels according to these distances
labels_distances = [labels_train[i] for i in index_smallest_distance]
#Predict the class / label
labels_pred[i]=max(labels_distances,key=labels_distances.count)
return labels_pred
```
%% Cell type:code id: tags:
```
(data,labels)=read_cifar_batch('/content/drive/MyDrive/cifar10/data_batch_2') # training with the second batch only for memory purposes
(data_test,labels_test)=read_cifar_batch('/content/drive/MyDrive/cifar10/test_batch')
```
%% Cell type:code id: tags:
```
assert distance_matrix(data,data_test).shape == (data.shape[0],data_test.shape[0])
```
%% Cell type:code id: tags:
```
assert knn_predict(data,labels,2).shape == labels.shape
```
%% Cell type:code id: tags:
```
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
#This function evaluates the knn classifier rate
labels_test_pred=knn_predict(distance_matrix(data_train, data_test), labels_train, k)
num_samples= data_test.shape[0]
num_correct= (labels_test == labels_test_pred).sum().item()
accuracy= 100 * (num_correct / num_samples) #The accuracy is the percentage of the correctly predicted classes
return accuracy
```
%% Cell type:code id: tags:
```
assert 0 < evaluate_knn(data,labels,data_test,labels_test,5) < 100
```
%% Cell type:code id: tags:
```
def accuracy_graph(k,dirname,num_batch):
#This function is used to plot the variation of the accuracy as a function of k
# k -- the max number of neighbors
x=[] #axis x : k
y=[] #axis y : accuracy
dir_batch=str(dirname)+"/data_batch_"+str(num_batch)
dir_test = str(dirname)+"/test_batch"
(data_test, labels_test)=read_cifar_batch(dir_test)
(data_train, labels_train)=read_cifar_batch(dir_batch)
for i in range (1,k+1):
x.append(i) #axis (k from 1 to 20)
accuracy=evaluate_knn(data_train , labels_train , data_test , labels_test , i)
y.append(accuracy)
plt.plot(x,y)
plt.show()
plt.savefig(str(dirname)+"/results/accuracy_knn")
```
%% Cell type:code id: tags:
```
accuracy_graph(20,'/content/drive/MyDrive/cifar10',4)
```
%% Output
%% Cell type:markdown id: tags:
# **mlp**
%% Cell type:code id: tags:
```
def segmoid(x):
return 1/(1+np.exp(-x))
```
%% Cell type:code id: tags:
```
def derivation(x):
deriv_segmoid = segmoid(x)*(1-segmoid(x))
return deriv_segmoid
```
%% Cell type:code id: tags:
```
def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
# This function performs one gradient descent step
# w1, b1, w2 and b2 -- the weights and biases of the network,
# data -- a matrix of shape (batch_size x d_in)
# targets -- a matrix of shape (batch_size x d_out)
# learning_rate -- the learning rate
A0=data
A1=segmoid(np.matmul(A0, w1) + b1)
A2=segmoid(np.matmul(A1,w2) + b2)
#Let calculate the partial derivates
#2
D_A2=2*(A2-targets)
D_A2_T=np.matmul(A2,(1-A2).T)
D_Z2=np.matmul(D_A2_T,D_A2)
D_W2=np.matmul(A1.T,D_Z2)
D_B2=D_Z2
#1
D_A1=np.matmul(D_Z2,w2.T)
D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
D_B1=D_Z1
D_W1=np.matmul(A0.T,D_Z1)
#The backpropagation of the gradient
w1=w1-learning_rate*D_W1
w2=w2-learning_rate*D_W2
b1=b1-learning_rate*D_B1
b2=b2-learning_rate*D_B2
# Forward pass
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = segmoid(G2)
predictions = C2
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
return(w1,b1,w2,b2,loss)
```
%% Cell type:code id: tags:
```
def one_hot(D_array):
#This function transforms an array to the one-hot encoding
n=D_array.shape[0]
o_h_matrix = np.zeros((D_array.shape[0],int(np.max(D_array)+1)))
for i in range(0,n):
o_h_matrix[i,int(D_array[i])]=1
return o_h_matrix
```
%% Cell type:code id: tags:
```
def softmax(x):
#the softmax activation function
exp_x=np.exp(x)
func=exp_x/exp_x.sum(axis=1, keepdims=True)
return func
```
%% Cell type:code id: tags:
```
def learn_once_cross_entropy(w1,b1,w2,b2,data,targets,learning_rate):
# This function performs one gradient descent step using a binary cross-entropy loss
A0=data
Targets=one_hot(targets)
A1=segmoid(np.matmul(A0, w1) + b1)
A2=softmax(np.matmul(A1,w2) + b2)
#Let calculate the partial derivates
#2
D_Z2=(A2-Targets)
D_W2=np.matmul(A1.T,D_Z2)
D_B2=D_Z2
#1
D_A1=np.matmul(D_Z2,w2.T)
D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
D_B1=D_Z1
D_W1=np.matmul(A0.T,D_Z1)
#The backpropagation of the gradient
w1=w1-learning_rate*D_W1
w2=w2-learning_rate*D_W2
b1=b1-learning_rate*D_B1
b2=b2-learning_rate*D_B2
# Forward pass
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
#Cross entropy loss
loss = -np.sum(np.multiply(Targets,np.log(C2)))/float(C2.shape[0])
return (w1,b1,w2,b2,loss)
```
%% Cell type:code id: tags:
```
def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
#This function returns the different accuracies of the program depending on the number of epoches chosen
train_accuracies=[]
for i in range(0,num_epoch):
(w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
# forward pass in order to determine the accuracy
A0=data_train
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
predictions = np.argmax(C2,axis=1)
acc=(np.sum(predictions == labels_train)/predictions.shape[0])*100
train_accuracies.append(acc)
return (w1,w2,b1,b2,train_accuracies)
```
%% Cell type:code id: tags:
```
def test_mlp(w1,b1,w2,b2,data_test,labels_test):
# This function tests the previous function on the data_test.
# First: predict the classes
A0=data_test
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
# the predicted classes
predictions = np.argmax(C2,axis=1)
# The accuracy of the predictions
test_accuracy = (np.sum(predictions == labels_test)/predictions.shape[0])*100
return test_accuracy
```
%% Cell type:code id: tags:
```
def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
# This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
d_in = data_train.shape[1]
d_out = 10
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
# training
(w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
# Testing
final_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
return train_accuracies, final_accuracy
```
%% Cell type:code id: tags:
```
N = 30
d_in = 5
d_h = 3
d_out = 2
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
data = np.random.rand(N, d_in)
targets = np.random.rand(N, d_out)
learning_rate=0.5
(w1n,b1n,w2n,b2n,loss)=learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate)
```
%% Output
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
%% Cell type:code id: tags:
```
0 < loss < 1
```
%% Output
True
%% Cell type:code id: tags:
```
w1n.shape==w1.shape
```
%% Output
True
%% Cell type:code id: tags:
```
assert ((one_hot(np.array([1,2,0])) == [[0, 1, 0],[0, 0, 1],[1, 0, 0]]).all())==True
```
%% Cell type:code id: tags:
```
(data,labels)=read_cifar_batch('/content/drive/MyDrive/cifar10/data_batch_1')
```
%% Cell type:code id: tags:
```
N = data.shape[0]
d_in = data.shape[1]
d_h = 64
d_out = 10
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
learning_rate=0.1
num_epoch=100
```
%% Cell type:code id: tags:
```
0< learn_once_cross_entropy(w1,b1,w2,b2,data,labels,learning_rate)[4] <1
```
%% Output
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: RuntimeWarning: overflow encountered in exp
This is separate from the ipykernel package so we can avoid doing imports until
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
after removing the cwd from sys.path.
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:28: RuntimeWarning: divide by zero encountered in log
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:28: RuntimeWarning: invalid value encountered in multiply
False
%% Cell type:code id: tags:
```
print(train_mlp(w1,b1,w2,b2,data,labels,learning_rate,num_epoch)[4])
```
%% Output
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: RuntimeWarning: overflow encountered in exp
This is separate from the ipykernel package so we can avoid doing imports until
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
after removing the cwd from sys.path.
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:28: RuntimeWarning: divide by zero encountered in log
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:28: RuntimeWarning: invalid value encountered in multiply
[10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05, 10.05]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment