From 319c8de1ecc13fa2bd68ad4c3c34c51719af8530 Mon Sep 17 00:00:00 2001
From: Milan <milan.cart@ecl20.ec-lyon.fr>
Date: Thu, 9 Nov 2023 21:24:45 +0100
Subject: [PATCH] Part 1 : Prepare the CIFAR dataset

---
 __pycache__/read_cifar.cpython-311.pyc | Bin 0 -> 2090 bytes
 read_cifar.py                          |  39 ++++++++++++++++++++++++-
 test.py                                |   2 ++
 3 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 __pycache__/read_cifar.cpython-311.pyc
 create mode 100644 test.py

diff --git a/__pycache__/read_cifar.cpython-311.pyc b/__pycache__/read_cifar.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c4534351e5e03b53d76e9087a1ffc193f08cd06
GIT binary patch
literal 2090
zcmZ3^%ge>Uz`zh><(qn+nStRkhy%l{P{!vr1_p-d3@HpLj5!QZj9{86iYbMug&~SL
zg}H?xiY1jbg(aH_q`pXpi6NCOg>@Mt1H)=YkO&AfGJr&x7*d&2*idCqc@T9e?C7E?
zjKK^btjX~bWSn0z+++p@1~vu;24)5ZhR<@0Aa|A^$<;8VFfL<aU|0=y8%V1uLoH(o
zNHvJVz`&3Nm#JY)VOqnCs=Agb1>|oAR8dBTJZ@zMLxy6Oa;6IANQQDoMutcRMg~TP
z5_YJuAd^{O>>8#N7D*U~p#<b!C@%{Z#tds%QSGQ@#%UHC4zu9qVwlBJ!(79X#uUt;
z$?kWHsVM0dYf@!NYOyBcEsoT@<ouM(y!2v_Yf}<S5;Ylbv8I$%7Niz~%*#nkO3f+O
zWWU9dUyz!2i>)9tIXfry7E4ZkV#+P1yn<V-iA6<;mABZ_a`F>PjE!!wX6BWcnP{@!
z;z~*^NzRBbNG!>?#h7-B70kWG0<z*38^qXRkna^h;FpVjXmM&$v3_o5PGVkiVo`~{
zOMY@`ZfaghvA%m|iAQOYer9fBda7=6PGWI!W?E))Vo7Fxo_<kkVoH2+W?EvAUP0w8
z0R%T5Y+sQm0|Nudmg1cZ3=9nnPuaOUyk@XmWS70dF8hIjky&?v;9VY}9`6~93*0a8
z=v?H{xx%B<;QE1?ky#fk4<<f>RDJ;wU@lJL$B!S_<i4^oaPjm=Oi-P{K1FXW(;BV|
zoW>V9jjwPTH>lj@=IybV;5tL|iipZur!^iIxJ@r|n_l5IZBTo_$=gxZA#;IAw3vs1
zfnlP!3G)^f3-;ZN7VOy5{{(RQPiLrQOlPQNLe2*%j44d0>C1&7*147$IVaRG%w|Yo
zu3?_dFqauM|HUvdFx0Ztveqyzfaexy0tQnxEU4-k8ETktsKh82dR#Evz%rK!)jV3-
zk0X~crm(JILk+Q7MjUFHu&8CKVXR?-=D3%j0_i0vH&yZIgVH}FZN^t|>zAY!mq2($
zppeyMyv34Oky)(CdW*FvF)uw8oPKYyB^DH<=A{&IGcYjR;!e)bOHM3F%}Xpv)nvQH
z0?J@TLJSNHMZzFy#>`usU^_}ua|>>9Lo!-Chy%{gpv<PAP$U9U%7w@+MT!gz3<3-c
z48;$@`Q!#SZ-dJXUj7ERuWSr_!V?T91a?OCM15djW>p3g9gKGs4L5{bF!H)!<aMAB
zLSIz$zM|-TkwdP7bAt5^4xWDQF764Q7dhmvaL8TYkh{Ub*U#I<J0bfbhtd@er3+y6
zm5o78Z-dAMJtqjdDC>Mh*7+ifRD)+n^$iw|cJC(d37i*Mq^__?U0{*A!NS$<+vGbz
z`yz|n6&AS*EOKAj7<l-4>}IGeD7?U}d68T53b$s1%Uxdnp12uqD-192>R#m4y~3;8
z;P!x<e}Y(t-USwk&!A*`iz7ZhH!(9WKK>RfxY(&ubxABq)CZR)x`qb2;1VRYSht`O
zIo6DB@fT+mW@Kljq-AF%rQ~KMX|ms9%`43<s02q)5ho}vK;;fNP(iljCFZ7rY$+(p
z%qv;RP^1J>0g8fO95%W6DWy57c12nY3=E)jQrySD!0>^Yk&*ERgU|&SdceTj0EQn}
zI2gq~Fu(~hVMfUh3~)k&frY0d>k_lX1!jpG%p47-9pa64Ep`od4?vs?oRT0U<#>_P
o=?bUQMP}zK%+42>ouQIa7n!B6FiT&6p%2^)ERq+PCBW_l0C85)zW@LL

literal 0
HcmV?d00001

diff --git a/read_cifar.py b/read_cifar.py
index c8cedb0..ffd06f3 100644
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -1,4 +1,8 @@
 import numpy as np
+import pickle
+from sklearn.model_selection import train_test_split 
+import pandas as pd
+
 
 import pickle
 
@@ -12,4 +16,37 @@ def read_cifar_batch(batch_path):
   
   return data, labels
 
-print(read_cifar_batch('Data/cifar-10-batches-py/data_batch_2'))
\ No newline at end of file
+
+def read_cifar(path):
+    data = []
+    labels = []
+
+    #Add the 5 batches
+    for i in range(1,6):
+        data_temp, labels_temp = read_cifar_batch(f'{path}/data_batch_{i}')
+        data.append(data_temp)
+        labels.append(labels_temp)
+
+    #Add the test batches
+    data_temp, labels_temp = read_cifar_batch(f'{path}/test_batch')
+    data.append(data_temp)
+    labels.append(labels_temp)
+
+    #Concatenate all the batches to create a big one
+    data = np.concatenate(data, axis = 0)
+    labels = np.concatenate(labels, axis = 0)
+
+    return(data, labels)
+
+def split_dataset(data, labels, split):
+    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=(1 - split), random_state=0)
+
+    return(X_train, X_test, y_train, y_test)
+
+
+if __name__== '__main__':
+   
+   data, labels = read_cifar_batch('Data/cifar-10-batches-py/data_batch_1')
+   data, labels = read_cifar('/Users/milancart/Documents/GitHub/image-classification/Data/cifar-10-batches-py')
+   X_train, X_test, y_train, y_test = split_dataset(data, labels, 0.8)
+   print(X_train, X_test, y_train, y_test)
\ No newline at end of file
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..c6e72d1
--- /dev/null
+++ b/test.py
@@ -0,0 +1,2 @@
+import read_cifar
+
-- 
GitLab