From 4e10c5f96618f0e7b5e321d58dd7c384525ad56a Mon Sep 17 00:00:00 2001 From: widad174 <azzouzi.widad.17@gmail.com> Date: Tue, 24 Oct 2023 14:55:07 +0200 Subject: [PATCH] fichier read_cifar.py --- classifi1.ipynb | 20 ----------- read_cifar.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 20 deletions(-) delete mode 100644 classifi1.ipynb create mode 100644 read_cifar.py diff --git a/classifi1.ipynb b/classifi1.ipynb deleted file mode 100644 index 9a1c163..0000000 --- a/classifi1.ipynb +++ /dev/null @@ -1,20 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np " - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/read_cifar.py b/read_cifar.py new file mode 100644 index 0000000..ad16d24 --- /dev/null +++ b/read_cifar.py @@ -0,0 +1,88 @@ +import pickle +import numpy as np + +#read_cifaar_batch +def read_cifar_batch(file) : + with open(file, 'rb') as fo: + dictionnaire = pickle.load(fo, encoding='bytes') + data=dictionnaire[b'data'].astype(np.float32) + labels=np.array(dictionnaire[b'labels'],np.int64) + return data,labels +''' +EXPLICATION DE LA FONCTION: +read_cifaar_batch function: read the path of a single batch. + +Arguments: +- The path of a single batch as a string, + +Returns: +- Matrix data of size (batch_size x data_size) +- Vector labels of size batch_size + +The data must be np.float32 array and labels must be np.int64 array. +''' + + + + +# read_cifar + +def read_cifar(folder): + batch_file=["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5","test_batch"] + for i in range(len(batch_file)): + path= folder +'/'+batch_file[i] + if i==0: + data,labels=read_cifar_batch(path) + else: + x,y=read_cifar_batch(path) + data =np.vstack([data ,x]) # all data for all batches is in variable "data" + labels=np.hstack([labels,y]) # All labels for all batches is in variable "labels" + return data ,labels + +''' +EXPLICATION DE LA FONCTION: +read_cifaar function: read the path of the directory containing all batches (including test_batch). + +Arguments: +- the path of the directory containing the six batches (five data_batch and one test_batch) as a string + +Returns: +- Matrix data of size (batch_size x data_size) +- Vector labels of size batch_size< + +The data must be np.float32 array and labels must be np.int64 array. +''' + + +# split_dataset + +def split_dataset(data,labels,split): + labels=labels.reshape(data.shape[0],1) + # Stack our Data and labels + con = np.hstack((data, labels)) + k=int(split*con.shape[0]) + # Shuffle all our Data stack it + np.random.shuffle(con) + # Train + X_train=con[:k,:-1] + y_train=np.array(con[:k,-1],np.int64) + # Test + X_test=con[k:,:-1] + y_test=np.array(con[k:,-1],np.int64) + return X_train,y_train,X_test,y_test + +''' +EXPLICATION DE LA FONCTION: +split_dataset function: splits the dataset into a training set and a test set. + +Arguments: +- data and labels, two arrays that have the same size in the first dimension. +- split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set. + +Returns: +- data_train: the training data, +- labels_train: the corresponding labels, +- data_test: the testing data, and +- labels_test: the corresponding labels. +''' + -- GitLab