Skip to content
Snippets Groups Projects
Commit 460f2847 authored by hajer627's avatar hajer627
Browse files

Image Classification

parent 26fce385
Branches
No related tags found
No related merge requests found
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
data
\ No newline at end of file
knn.py 0 → 100644
import numpy as np
from read_cifar import *
from matplotlib import pyplot as plt
def distance_matrix(mat1, mat2):
norm1 = np.sum(mat1**2, axis=1, keepdims=True)
norm2 = np.sum(mat2**2, axis=1, keepdims=True)
dot_products = np.dot(mat1, mat2.T)
dists = np.sqrt(norm1 - 2 * dot_products + norm2.T)
return dists
def knn_predict(dists, labels_train, k):
predicted_labels = np.zeros(dists.shape[0], dtype=int)
for i in range(0,dists.shape[0],1):
nearest_indices = np.argsort(dists[i])[:k]
nearest_labels=[labels_train[i] for i in nearest_indices]
predicted_class=max(nearest_labels,key=nearest_labels.count)
predicted_labels[i]=predicted_class
return predicted_labels
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
dists=distance_matrix(data_train,data_test)
predicted_labels=knn_predict(dists,labels_train,k)
accuracy = (np.sum(predicted_labels == labels_test)) / len(labels_test)
return accuracy
def acc_graph():
axis=[]
result=[]
data_path = r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py"
allData,allLabels = read_cifar(data_path)
data_train,labels_train, data_test,labels_test = split_dataset(allData,allLabels, split=0.9)
for i in range(1,20,1):
axis.append(i)
acc=evaluate_knn(data_train,labels_train, data_test,labels_test,i)
result.append(acc)
plt.plot(axis,result)
plt.title("the variation of the accuracy as a function of k")
plt.xlabel("Number of neighbors")
plt.ylabel("Accuracy")
plt.show()
plt.savefig(r"C:\Intel\Desktop\DeepLearning\image-classification\results")
if __name__ == "__main__":
acc_graph()
mlp.py 0 → 100644
from read_cifar import *
from matplotlib import pyplot as plt
def segmoid(x):
return 1/(1 + np.exp(-x))
def deriv_segmoid(x):
return segmoid(x)*(1-segmoid(x))
def softmax(x):
e_x = np.exp(x)
return e_x / e_x.sum(axis=1, keepdims=True)
def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
a0=data
a1=segmoid(np.matmul(a0, w1) + b1)
a2=segmoid(np.matmul(a1,w2) + b2) #The predicted classes
#Calculate the different partial derivatives
dc_da2=2*(a2-targets)
dc_dz2=np.multiply(np.multiply(a2,(1-a2)),dc_da2)
dc_dw2=np.matmul(a1.T,dc_dz2)
dc_db2=dc_dz2
dc_da1=np.matmul(dc_dz2,w2.T)
dc_dz1=np.multiply(np.multiply(a1,(1-a1)),dc_da1)
dc_dw1=np.matmul(a0.T,dc_dz1)
dc_db1=dc_dz1
#application of the backpropagation of the gradient
w1=w1-learning_rate*dc_dw1
w2=w2-learning_rate*dc_dw2
b1=b1-learning_rate*dc_db1
b2=b2-learning_rate*dc_db2
# Forward pass
z1 = np.matmul(a0, w1) + b1
a1 = segmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = segmoid(z2)
predictions = a2
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
return (w1,b1,w2,b2,loss)
def one_hot(NDarray):
result = np.zeros((NDarray.shape[0],int(np.max(NDarray)+1)))
for i in range(0,NDarray.shape[0],1):
result[i,int(NDarray[i])]=1
return result
def cross_entropy(classes,prob):
loss=-np.sum(np.multiply(classes,np.log(prob)))
return loss/float(prob.shape[0])
def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
a0=data
labels_train=one_hot(labels_train)
a1=segmoid(np.matmul(a0, w1) + b1)
a2=softmax(np.matmul(a1,w2) + b2)
nb_rows=data.shape[0]
#gradient descent optimization
dc_dz2=(a2-labels_train)/data.shape[0]
dc_dw2=np.matmul(a1.T,dc_dz2)
dc_db2=np.dot(np.ones(nb_rows),dc_dz2)
dc_da1=np.matmul(dc_dz2,w2.T)
dc_dz1=np.multiply(np.multiply(a1,(1-a1)),dc_da1)
dc_dw1=np.matmul(a0.T,dc_dz1)
dc_db1= np.dot(np.ones(nb_rows),dc_dz1)
#application of the backpropagation of the gradient
w1=w1-learning_rate*dc_dw1
w2=w2-learning_rate*dc_dw2
b1=b1-learning_rate*dc_db1
b2=b2-learning_rate*dc_db2
# Forward pass
z1 = np.matmul(a0, w1) + b1
a1 = segmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = softmax(z2)
# Compute loss (Cross entropy loss)
loss = cross_entropy(labels_train,a2)
return (w1,b1,w2,b2,loss)
def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
train_accuracies=[]
for i in range(0,num_epoch,1):
(w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
a0=data_train
z1 = np.matmul(a0, w1) + b1
a1 = segmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = softmax(z2)
predict = np.argmax(a2,axis=1)
train_accuracies.append((np.sum(predict == labels_train)/predict.shape[0])*100)
return (w1,w2,b1,b2,train_accuracies)
def test_mlp(w1,b1,w2,b2,data_test,labels_test):
a0=data_test
z1 = np.matmul(a0, w1) + b1
a1 = segmoid(z1)
z2 = np.matmul(a1, w2) + b2
a2 = softmax(z2)
predict = np.argmax(a2,axis=1)
return (np.sum(predict == labels_test)/predict.shape[0])*100
def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
d_in = data_train.shape[1]
d_out = 10
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
(w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
test_acc=test_mlp(w1,b1,w2,b2,data_test,labels_test)
return (train_accuracies,test_acc)
if __name__ == "__main__":
dir_batches = r'C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py'
(data,labels)=read_cifar(dir_batches)
(data_train, labels_train, data_test, labels_test)=split_dataset(data,labels,0.9)
d_in = data_train.shape[1]
d_out = 10
w1 = 2 * np.random.rand(d_in, 64) - 1
b1 = np.zeros((1, 64))
w2 = 2 * np.random.rand(64, d_out) - 1
b2 = np.zeros((1, d_out))
acc=train_mlp(w1,b1,w2,b2,data_train,labels_train,0.1,100)[4]
epochs =[i+1 for i in range(0,100,1)]
plt.plot(epochs,acc)
plt.title("the evolution of learning accuracy across learning epochs")
plt.xlabel("epochs")
plt.ylabel("Accuracy")
plt.show()
plt.savefig(r"C:\Intel\Desktop\DeepLearning\image-classification\results")
\ No newline at end of file
import os
import numpy as np
import pickle
def unpickle(path):
with open(path, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
def read_cifar_batch(path):
batch_data = unpickle(path)
data = batch_data[b'data']
labels = batch_data[b'labels']
data = np.array(data, dtype=np.float32)
labels = np.array(labels, dtype=np.int64)
return data, labels
def read_cifar(path):
file_list = os.listdir(path)
unwanted_files = ['data_batch_1', 'batches.meta', 'readme.html']
#initilize the Matrix data and labels with the data of the first batch
(Matrix_Data, allLabels) = read_cifar_batch(os.path.join(path, 'data_batch_1'))
file_list.remove('data_batch_1')
for i in file_list:
if i not in unwanted_files:
(data, labels) = read_cifar_batch(os.path.join(path, i))
Matrix_Data = np.concatenate((Matrix_Data, data), axis=0)
allLabels = np.concatenate((allLabels, labels), axis=0)
return (Matrix_Data, allLabels)
def split_dataset(data, labels, split):
if split < 0 or split > 1:
raise ValueError("The 'split' factor is a float between 0 and 1")
# Determine the split indices
split_idx = int(len(data) * split)
# Shuffle the data and labels using the same random order
shuffled_indices = np.random.permutation(len(data))
data_shuffled = data[shuffled_indices]
labels_shuffled = labels[shuffled_indices]
# Split the data and labels into training and test sets
data_train = data_shuffled[:split_idx]
labels_train = labels_shuffled[:split_idx]
data_test = data_shuffled[split_idx:]
labels_test = labels_shuffled[split_idx:]
return data_train, labels_train, data_test, labels_test
if __name__ == "__main__":
batch_path = r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\data_batch_1"
data_path = r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py"
data, labels = read_cifar_batch(batch_path)
allData,allLabels = read_cifar(data_path)
print("Data shape:", data.shape)
print("Labels shape:", labels.shape)
data_train,labels_train, data_test,labels_test = split_dataset(allData,allLabels, split=0.9)
print("Data_train:", data_train)
print("Labels_train:", labels_train)
print("Data_test:", data_test)
print("Labels_test:", labels_test)
\ No newline at end of file
results.png

2.34 KiB

results/Accuracy.png

32.4 KiB

results/mlp.png

17.2 KiB

import numpy as np
from knn import distance_matrix
from knn import knn_predict
from knn import evaluate_knn
from read_cifar import read_cifar_batch
(data,labels)=read_cifar_batch(r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\data_batch_1")
(data_test,labels_test)=read_cifar_batch(r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\test_batch")
#Unittest
assert distance_matrix(data,data_test).shape == (data.shape[0],data_test.shape[0])
assert knn_predict(data,labels,2).shape == labels.shape
assert 0 < evaluate_knn(data,labels,data_test,labels_test,5) < 1
import numpy as np
from mlp import *
#testing the MSE Gradient Descent
N = 30
d_in = 3
d_h = 3
d_out = 2
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
data = np.random.rand(N, d_in)
targets = np.random.rand(N, d_out)
learning_rate=0.5
(w1n,b1n,w2n,b2n,loss)=learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate)
assert 0 < loss < 1
assert w1n.shape==w1.shape
#test the one-hot encoding function
assert ((one_hot(np.array([1,2,0])) == [[0, 1, 0],[0, 0, 1],[1, 0, 0]]).all())==True
#testing the Cross Entropy Gradien descent and the training of the model
(data,labels)=read_cifar_batch(r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\data_batch_1")
N = data.shape[0]
d_in = data.shape[1]
d_h = 64
d_out = 10
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
learning_rate=0.1
num_epoch=100
assert 0< learn_once_cross_entropy(w1,b1,w2,b2,data,labels,learning_rate)[4]
print(train_mlp(w1,b1,w2,b2,data,labels,learning_rate,num_epoch)[4])
#test the model testing function
dir_test =r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\test_batch"
(data_test,labels_test)=read_cifar_batch(dir_test)
assert 0< test_mlp(w1,b1,w2,b2,data_test,labels_test) < 100
import numpy as np
from read_cifar import read_cifar_batch
from read_cifar import read_cifar
from read_cifar import split_dataset
batch_path = r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py\data_batch_1"
data_path = r"C:\Intel\Desktop\DeepLearning\image-classification\data\cifar-10-batches-py"
(data,labels)=read_cifar_batch(batch_path)
(alldata,alllabels)=read_cifar(data_path)
image = np.random.randrange(25000)
pixel = np.random.randrange(3071)
#Unittest
assert data.shape == (10000,3072)
assert labels.shape == (10000,)
assert alldata.shape == (60000,3072)
assert alllabels.shape == (60000,)
assert split_dataset(alldata,alllabels,0.5)[0].shape == (30000,3072)
assert split_dataset(alldata,alllabels,0.5)[0][image][pixel] != split_dataset(alldata,alllabels,0.5)[0][image][pixel]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment