Upload New File

80d13b17 · Elkhadri Doha · aea5093d · 80d13b17
Commit 80d13b17 authored 1 year ago by Elkhadri Doha
--- a/read_cifar.py
+++ b/read_cifar.py
+import numpy as np
+import pickle
+import os
+
+def read_cifar_batch(batch_path):
+    with open(batch_path, 'rb') as file:
+        #Read the binary data from the file and load it as a Python object
+        batch = pickle.load(file, encoding='bytes') 
+
+    #Convert extracted data and labels from the batch to NumPy arrays
+    data = np.array(batch[b'data'], dtype=np.float32)
+    labels = np.array(batch[b'labels'], dtype=np.int64)
+
+    return data, labels
+    
+
+def read_cifar(directory_path):
+    #directory_path contains five data_batch and one test_batch
+    data_list=[]
+    labels_list=[]
+
+    # Load data and labels from each batch file
+    for filename in os.listdir(directory_path):
+        if filename.startswith('data_batch') or filename == 'test_batch':
+            batch_path = os.path.join(directory_path, filename)
+            with open(batch_path, 'rb') as file:
+                batch = pickle.load(file, encoding='bytes')
+                data_list.append(batch[b'data'])
+                labels_list.extend(batch[b'labels'])
+                
+    # Combine data and labels from all batches
+    data = np.concatenate(data_list, axis=0)
+    labels = np.array(labels_list, dtype=np.int64)
+
+    # Convert data to np.float32
+    data = data.astype(np.float32)
+
+    return data, labels
+
+
+def split_dataset(data, labels, split):
+    if split < 0.0 or split > 1.0:
+        raise ValueError("Split value must be between 0 and 1 ")
+
+    #Number of training samples
+    num_samples = data.shape[0]
+    num_train_samples = int(num_samples * split)
+
+    # Create a random permutation of indices for shuffling
+    shuffled_indices = np.random.permutation(num_samples)
+
+    #Split the indices into training and test indices
+    train_indices = shuffled_indices[ :num_train_samples]
+    test_indices = shuffled_indices[num_train_samples:]
+
+    # Use the shuffled indices to split the data and labels
+    data_train = data[train_indices]
+    labels_train = labels[train_indices]
+    data_test = data[test_indices]
+    labels_test = labels[test_indices]
+
+    return data_train, labels_train, data_test, labels_test