Compare revisions

Khalil · Khalil · 68e10453 · 68e10453 · 68e10453 · 68e10453
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/data/cifar-10-batches-py/batches.meta
+++ b/data/cifar-10-batches-py/batches.meta
--- a/data/cifar-10-batches-py/data_batch_1
+++ b/data/cifar-10-batches-py/data_batch_1
--- a/data/cifar-10-batches-py/data_batch_2
+++ b/data/cifar-10-batches-py/data_batch_2
--- a/data/cifar-10-batches-py/data_batch_3
+++ b/data/cifar-10-batches-py/data_batch_3
--- a/data/cifar-10-batches-py/data_batch_4
+++ b/data/cifar-10-batches-py/data_batch_4
--- a/data/cifar-10-batches-py/data_batch_5
+++ b/data/cifar-10-batches-py/data_batch_5
--- a/data/cifar-10-batches-py/readme.html
+++ b/data/cifar-10-batches-py/readme.html
+<meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html">
--- a/data/cifar-10-batches-py/test_batch
+++ b/data/cifar-10-batches-py/test_batch
--- a/knn.py
+++ b/knn.py
+import math
+import numpy as np
+#Question1
+#X1:train , X2:test
+def distance_matrix(X1,X2):
+    x1=np.sum(X1*X1,axis=1)
+    x2=np.sum(X2*X2,axis=1)
+    x1=x1[:,None] # convertir en vecteur colonne
+    x2=x2[np.newaxis] #convertir en vecteur ligne
+    dists=np.sqrt(x1+x2-2*(X1@(X2.T)))
+    return(dists)
+#Question2
+# dists est considérée comme ma matrice de taille [taille_train*taille_test]
+def knn_predict(dists,labels_train,k):
+    labels_test=np.zeros((np.shape(dists)[1],1))
+    for i in range(np.shape(dists)[1]):
+        nearest_indices=np.argsort(dists[:,i])[:k]
+        nearest_labels=labels_train[nearest_indices]
+        uni_labels, frequence = np.unique(nearest_labels,return_counts=True)
+        labels_test[i]=uni_labels[np.argmax(frequence)]
+    return(labels_test)
+#Question3
+def evaluate_knn(data_train, labels_train,data_test, labels_test,k):
+    dists=distance_matrix(data_train,data_test)
+    labels=knn_predict(dists,labels_train,k)
+    compare=[labels[i]==labels_train[i] for i in range(len(labels))]
+    uni, count=np.unique(compare,return_counts=True)
+    uni=list(uni)
+    accuracy=count[uni.index(True)]/len(labels)
+    return(accuracy)
+if __name__== '__main__':
+#Question4
+    from read_cifar import *
+    import matplotlib.pyplot as plt
+    split=0.9
+    path='C:/Users/LENOVO/Desktop/deeplearning/BE1 - Image Classification/image-classification/data/cifar-10-batches-py'
+    data, labels = read_cifar(path)
+    data_train,labels_train,data_test,labels_test = split_dataset(data, labels,split) 
+    K=list(range(1,21))
+    A = []
+    for k in K:
+        accuracy=evaluate_knn(data_train, labels_train,data_test, labels_test,k)
+        A.append(accuracy)
+        print(accuracy)
+    plt.plot(K,A)
+    plt.title("Accuracy=f(k)")
+    plt.xlabel("k")
+    plt.ylabel("Accuracy")   
+    plt.show() 
+    plt.savefig('results/knn.png')
+#conclusion:
+# On constate que la précision de l'algorithme est autour de 10% pour les valeurs 
+# de k parcourues ce qui confirme que le knn n'est pas adapté à la classification   
+# d'images    
--- a/mlp.py
+++ b/mlp.py
+import numpy as np
+#Q10
+def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = np.exp(z2)/np.sum(z2)  # output of the output layer (softmax activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+    # Backward pass
+    N,_= np.shape(data)
+    da2=2/N * (a2-targets)
+    dz2= da2*(a2*(1-a2))
+    dw2=np.dot(np.transpose(a1),dz2)
+    db2=dz2
+    da1=np.dot(dz2,np.transpose(w2))
+    dz1=da1*a1*(1-a1)
+    dw1=np.dot(np.transpose(a0),dz1)
+    db1=dz1
+    w1 -=learning_rate*dw1
+    b1 -=learning_rate*np.transpose(db1)
+    w2 -=learning_rate*dw2
+    b2 -=learning_rate*np.transpose(db2)
+    return w1,b1,w2,b2, loss
+#Q11
+def one_hot(A):
+    s=np.size(A)
+    m=np.max(A)
+    R= np.zeros((s,m+1))
+    for i in range(len(A)):
+        R[i,A[i]]=1
+    return R
+#Q12
+def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
+    N,_=np.shape(data)
+    #encoding one hot labels
+    one_hot_labels = one_hot(labels_train)
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = np.exp(z2)/np.sum(z2)  # output of the output layer (softmax activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    # Compute loss (Binary X-entropy)
+    loss = - np.sum(one_hot_labels*np.log(predictions)+(1-one_hot_labels)*np.log(1-predictions))/N
+    # Backward pass
+    dz2= a2-one_hot_labels
+    dw2=np.dot(np.transpose(a1),dz2)
+    db2=dz2
+    da1=np.dot(dz2,np.transpose(w2))
+    dz1=da1*a1*(1-a1)
+    dw1=np.dot(np.transpose(a0),dz1)
+    db1=dz1
+    w1 -=learning_rate*dw1
+    b1 =b1 -learning_rate*db1
+    w2 -=learning_rate*dw2
+    b2 =b2 - learning_rate*db2
+    #predictions(a2) are returned to serve accuracy calculation in Question 13
+    return w1,b1,w2,b2, loss,a2
+#Q13
+def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
+    #encoding one hot labels
+    one_hot_labels = one_hot(labels_train)
+    N,_=np.shape(data)
+    train_accuracies=[]
+    for i in range(num_epoch):
+        w1,b1,w2,b2, loss, predictions= learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
+        # predictions is a matrix of probabilities, we need to put one for the biggest propobility for each indivual
+        maxi=np.max(predictions,1)
+        predictions_zeros_ones=np.floor(predictions/maxi[:, np.newaxis]).astype(int)
+        A=np.sum(one_hot_labels==predictions_zeros_ones)
+        train_accuracies.append(A/N)
+    return w1,b1,w2,b2, train_accuracies
+#Q14
+def test_mlp(w1,b1,w2,b2,data_test,labels_test):
+    #encoding one hot labels
+    one_hot_labels = one_hot(labels_test)
+    a0 = data_test # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = np.exp(z2)/np.sum(z2)  # output of the output layer (softmax activation function)
+    predictions = a2
+    N,_=np.shape(data)
+    maxi=np.max(predictions,1)
+    predictions_zeros_ones=np.floor(predictions/maxi[:, np.newaxis]).astype(int)
+    A=np.sum(one_hot_labels==predictions_zeros_ones)
+    test_accuracy=A/N
+    return test_accuracy
+#Q15
+def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate,num_epoch):
+    _,d_in=np.shape(data_test)
+    d_out=1+np.max(labels_train)
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1,d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1,d_out))  # second layer biaises
+    _,_,_,_,train_accuracies=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
+    test_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
+    return train_accuracies, test_accuracy
+#Q16
+if __name__== '__main__':
+    from read_cifar import *
+    import matplotlib.pyplot as plt
+    split=0.9
+    d_h=64
+    learning_rate=0.1
+    num_epoch=100
+    path='C:/Users/LENOVO/Desktop/deeplearning/BE1 - Image Classification/image-classification/data/cifar-10-batches-py'
+    data, labels = read_cifar(path)
+    data_train,labels_train,data_test,labels_test = split_dataset(data, labels,split) 
+    train_accuracies, test_accuracy=run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate,num_epoch)
+    train_accuracies.append(test_accuracy)
+    print(train_accuracies)
+    K=list(range(1,num_epoch+1))
+    plt.plot(K,train_accuracies)
+    plt.title("Accuracy=f(k)")
+    plt.xlabel("k")
+    plt.ylabel("Accuracy")   
+    plt.show() 
+    plt.savefig('results/knn.png')
--- a/read_cifar.py
+++ b/read_cifar.py
+import numpy as np
+import pickle
+#Question 2
+def read_cifar_batch(path):
+    with open(path,'rb') as fo:
+        dic = pickle.load(fo, encoding='bytes')
+    data = np.array(dic[b'data'],np.float32)
+    labels= np.array(dic[b'labels'],np.int64)
+    return data, labels
+# test1
+#path='C:/Users/LENOVO/Desktop/deeplearning/BE1 - Image Classification/image-classification/data/cifar-10-batches-py/data_batch_1'
+# print (read_cifar_batch(path))  
+#Question 3 
+def read_cifar(path):
+    L=['data_batch_'+str(i) for i in range(1,6)]+['test_batch']
+    if len(L)>0:
+        data, labels = read_cifar_batch(path + '/'+L[0])
+    for i in range(1,len(L)):
+        data_i, labels_i = read_cifar_batch(path + '/'+L[i])               
+        data = np.concatenate((data,data_i),axis=0)
+        labels = np.concatenate((labels, labels_i),axis=0)
+    return data, labels
+#test2
+path='C:/Users/LENOVO/Desktop/deeplearning/BE1 - Image Classification/image-classification/data/cifar-10-batches-py'
+print (read_cifar(path))  
+#Question4
+def split_dataset(data, labels,split):
+    x=np.size(labels)
+    rand = np.arange(x)
+    np.random.shuffle(rand)
+    data=data[rand]
+    labels=labels[rand]
+    return data[:int(split*x),], labels[:int(split*x)], data[int(split*x):,], labels[int(split*x):]
+#test3
+print (split_dataset(read_cifar(path)[0],read_cifar(path)[1],0.5))
\ No newline at end of file
No results found