fichier read_cifar.py

4e10c5f9 · widad174 · 1562f7cc · 1562f7cc · 4e10c5f9
Commit 4e10c5f9 authored 1 year ago by widad174
--- a/classifi1.ipynb
+++ b/classifi1.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np "
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
-%% Cell type:code id: tags:
-
-``` 
-import numpy as np
-```
--- a/read_cifar.py
+++ b/read_cifar.py
+import pickle
+import numpy as np
+
+#read_cifaar_batch
+def read_cifar_batch(file) :
+    with open(file, 'rb') as fo:
+        dictionnaire = pickle.load(fo, encoding='bytes') 
+        data=dictionnaire[b'data'].astype(np.float32)
+        labels=np.array(dictionnaire[b'labels'],np.int64)
+    return data,labels
+'''
+EXPLICATION DE LA FONCTION:
+read_cifaar_batch function: read the path of a single batch.
+
+Arguments:
+- The path of a single batch as a string, 
+
+Returns: 
+- Matrix data of size (batch_size x data_size)
+- Vector labels of size batch_size
+
+The data must be np.float32 array and labels must be np.int64 array.
+'''
+
+
+
+
+# read_cifar
+
+def read_cifar(folder):
+    batch_file=["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5","test_batch"]
+    for i in range(len(batch_file)):
+        path= folder +'/'+batch_file[i]
+        if i==0:
+            data,labels=read_cifar_batch(path)
+        else:
+            x,y=read_cifar_batch(path)
+            data =np.vstack([data ,x])    # all data for all batches is in variable "data"
+            labels=np.hstack([labels,y])  # All labels for all batches is in variable "labels"
+    return data ,labels
+
+'''
+EXPLICATION DE LA FONCTION:
+read_cifaar function: read the path of the directory containing all batches (including test_batch).
+
+Arguments:
+- the path of the directory containing the six batches (five data_batch and one test_batch) as a string
+
+Returns:
+- Matrix data of size (batch_size x data_size)
+- Vector labels of size batch_size<
+
+The data must be np.float32 array and labels must be np.int64 array.
+'''
+
+
+# split_dataset
+
+def split_dataset(data,labels,split):
+    labels=labels.reshape(data.shape[0],1)
+    # Stack our Data and labels
+    con = np.hstack((data, labels))
+    k=int(split*con.shape[0])
+    # Shuffle all our Data stack it
+    np.random.shuffle(con)
+    # Train
+    X_train=con[:k,:-1]
+    y_train=np.array(con[:k,-1],np.int64)
+    # Test
+    X_test=con[k:,:-1]
+    y_test=np.array(con[k:,-1],np.int64)
+    return X_train,y_train,X_test,y_test
+
+'''
+EXPLICATION DE LA FONCTION:
+split_dataset function: splits the dataset into a training set and a test set.
+
+Arguments:
+- data and labels, two arrays that have the same size in the first dimension.
+- split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.
+
+Returns:
+- data_train: the training data,
+- labels_train: the corresponding labels,
+- data_test: the testing data, and
+- labels_test: the corresponding labels.
+'''
+