Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
Image classification
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Durget Colin
Image classification
Commits
011bd547
Commit
011bd547
authored
1 year ago
by
Durget Colin
Browse files
Options
Downloads
Patches
Plain Diff
Mis à jour des fichiers
parent
ad05fcc6
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
knn.py
+8
-63
8 additions, 63 deletions
knn.py
mlp.py
+29
-32
29 additions, 32 deletions
mlp.py
read_cifar.py
+1
-14
1 addition, 14 deletions
read_cifar.py
results/knn.png
+0
-0
0 additions, 0 deletions
results/knn.png
results/mlp.png
+0
-0
0 additions, 0 deletions
results/mlp.png
with
38 additions
and
109 deletions
knn.py
+
8
−
63
View file @
011bd547
...
@@ -10,64 +10,26 @@ import os
...
@@ -10,64 +10,26 @@ import os
#Cette méthode n'est pas la plus efficace aujourd'hui, mais permet d'avoir une
#Cette méthode n'est pas la plus efficace aujourd'hui, mais permet d'avoir une
#première idée
#première idée
def
distance_matrix
(
matrix1
,
matrix2
):
def
distance_matrix
(
matrix1
,
matrix2
):
# Calculate the squared sum of matrix1
sum_matrix1
=
np
.
sum
(
matrix1
**
2
,
axis
=
1
,
keepdims
=
True
)
sum_matrix1
=
np
.
sum
(
matrix1
**
2
,
axis
=
1
,
keepdims
=
True
)
# Calculate the squared sum of matrix2
sum_matrix2
=
np
.
sum
(
matrix2
**
2
,
axis
=
1
,
keepdims
=
True
)
sum_matrix2
=
np
.
sum
(
matrix2
**
2
,
axis
=
1
,
keepdims
=
True
)
# Compute the dot product between matrix1 and matrix2
dot_product
=
np
.
dot
(
matrix1
,
matrix2
.
T
)
dot_product
=
np
.
dot
(
matrix1
,
matrix2
.
T
)
# Compute the Euclidean distance matrix
dists
=
np
.
sqrt
(
sum_matrix1
-
2
*
dot_product
+
sum_matrix2
.
T
)
dists
=
np
.
sqrt
(
sum_matrix1
-
2
*
dot_product
+
sum_matrix2
.
T
)
return
dists
return
dists
#Test
###Test sur 2 matrices
# Create two example matrices
##matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
matrix1
=
np
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]])
##matrix2 = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]])
matrix2
=
np
.
array
([[
10
,
11
,
12
],
[
13
,
14
,
15
],
[
16
,
17
,
18
]])
##dists = distance_matrix(matrix1, matrix2)
# Compute the Euclidean distance matrix
dists
=
distance_matrix
(
matrix1
,
matrix2
)
##print(dists)
##print(dists)
#La fonction knn_predicts est assez simple :
#La fonction knn_predicts est assez simple :
#On regarde la matrice de distance pour une image, on la trie dans l'ordre croissant
#On regarde la matrice de distance pour une image, on la trie dans l'ordre croissant
#(donc avec les images les plus "proches" d'abord), puis on regarde les labels
#(donc avec les images les plus "proches" d'abord), puis on regarde les labels
#des k premières images : on prend ensuite le label qui revient le plus
#des k premières images : on prend ensuite le label qui revient le plus
##def knn_predict(dists, labels_train, k):
## # Initialize an empty array to store the predicted labels
## predicted_labels = []
## # Loop through each row in the distance matrix (each test example)
## for i in range(dists.shape[0]):
## # Get the distances for the current test example
## distances = dists[i]
## # Get the indices of the k nearest neighbors
## nearest_indices = np.argsort(distances)[:k]
##
## # Get the labels of the k nearest neighbors
## nearest_labels = [labels_train[idx] for idx in nearest_indices]
##
## # Use a voting mechanism to determine the predicted label
## predicted_label = max(set(nearest_labels), key=nearest_labels.count)
##
## # Append the predicted label to the result array
## predicted_labels.append(predicted_label)
## return predicted_labels
def
knn_predict
(
dists
,
labels_train
,
k
):
def
knn_predict
(
dists
,
labels_train
,
k
):
# Use np.argpartition to find the indices of the k nearest neighbors for all test examples
nearest_indices
=
np
.
argpartition
(
dists
,
k
,
axis
=
1
)[:,
:
k
]
nearest_indices
=
np
.
argpartition
(
dists
,
k
,
axis
=
1
)[:,
:
k
]
# Get the labels of the k nearest neighbors for all test examples
nearest_labels
=
labels_train
[
nearest_indices
]
nearest_labels
=
labels_train
[
nearest_indices
]
# Use a voting mechanism to determine the predicted labels for all test examples
predicted_labels
=
np
.
array
([
np
.
argmax
(
np
.
bincount
(
nearest_labels
[
i
]))
for
i
in
range
(
nearest_labels
.
shape
[
0
])])
predicted_labels
=
np
.
array
([
np
.
argmax
(
np
.
bincount
(
nearest_labels
[
i
]))
for
i
in
range
(
nearest_labels
.
shape
[
0
])])
return
predicted_labels
return
predicted_labels
#Dans cette fonction on calcule le taux de classification,
#Dans cette fonction on calcule le taux de classification,
...
@@ -75,50 +37,33 @@ def knn_predict(dists, labels_train, k):
...
@@ -75,50 +37,33 @@ def knn_predict(dists, labels_train, k):
#d'observations. Pour cela, on va d'abord entrainer l'algorithme avec
#d'observations. Pour cela, on va d'abord entrainer l'algorithme avec
#la base d'entraînement, puis on va vérifier avec la base de test
#la base d'entraînement, puis on va vérifier avec la base de test
def
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
):
def
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
):
# Calculate the distance matrix between the training and test data
dists
=
distance_matrix
(
data_test
,
data_train
)
dists
=
distance_matrix
(
data_test
,
data_train
)
# Use the knn_predict function to get predicted labels for the test data
predicted_labels
=
knn_predict
(
dists
,
labels_train
,
k
)
predicted_labels
=
knn_predict
(
dists
,
labels_train
,
k
)
# Initialize a variable to count the number of correct predictions
correct_predictions
=
0
correct_predictions
=
0
# Loop through the predicted and true labels and count the correct predictions
for
predicted_label
,
true_label
in
zip
(
predicted_labels
,
labels_test
):
for
predicted_label
,
true_label
in
zip
(
predicted_labels
,
labels_test
):
if
predicted_label
==
true_label
:
if
predicted_label
==
true_label
:
correct_predictions
+=
1
correct_predictions
+=
1
# Calculate accuracy as the ratio of correct predictions to the total number of test instances
accuracy
=
correct_predictions
/
len
(
labels_test
)
*
100
accuracy
=
correct_predictions
/
len
(
labels_test
)
*
100
return
accuracy
return
accuracy
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
data_folder
=
'
data/cifar-10-batches-py
'
data_folder
=
'
data/cifar-10-batches-py
'
batch_filename
=
'
data_batch_1
'
# Adjust this to the specific batch file you want to read
batch_filename
=
'
data_batch_1
'
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
data
,
labels
=
read_cifar
.
read_cifar_batch
(
batch_path
)
data
,
labels
=
read_cifar
.
read_cifar_batch
(
batch_path
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
print
(
len
(
data_train
),
len
(
data_test
))
# Liste pour les valeurs de k
# Initialize lists to store k values and corresponding accuracies
k_values
=
list
(
range
(
1
,
21
))
k_values
=
list
(
range
(
1
,
21
))
accuracies
=
[]
accuracies
=
[]
# C
alculate accuracy for
diff
e
rent val
ues of
k
# C
Accuracy pour les
diff
é
rent
es
val
eurs de
k
for
k
in
k_values
:
for
k
in
k_values
:
accuracy
=
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)
accuracy
=
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)
accuracies
.
append
(
accuracy
)
accuracies
.
append
(
accuracy
)
# Create a plot of accuracy vs. k values
plt
.
figure
(
figsize
=
(
10
,
6
))
plt
.
figure
(
figsize
=
(
10
,
6
))
plt
.
plot
(
k_values
,
accuracies
,
marker
=
'
o
'
,
linestyle
=
'
-
'
,
color
=
'
b
'
)
plt
.
plot
(
k_values
,
accuracies
,
marker
=
'
o
'
,
linestyle
=
'
-
'
,
color
=
'
b
'
)
plt
.
title
(
'
Accuracy
vs.
k for k-Nearest Neighbors
'
)
plt
.
title
(
'
Accuracy
as a function of
k
,
for k-Nearest Neighbors
'
)
plt
.
xlabel
(
'
k (Number of Neighbors)
'
)
plt
.
xlabel
(
'
k (Number of Neighbors)
'
)
plt
.
ylabel
(
'
Accuracy (%)
'
)
plt
.
ylabel
(
'
Accuracy (%)
'
)
plt
.
grid
(
True
)
plt
.
grid
(
True
)
# Save the plot as "knn.png" in the "results" directory
plt
.
savefig
(
'
results/knn.png
'
)
plt
.
savefig
(
'
results/knn.png
'
)
# Show the plot (optional)
plt
.
show
()
plt
.
show
()
This diff is collapsed.
Click to expand it.
mlp.py
+
29
−
32
View file @
011bd547
...
@@ -18,18 +18,18 @@ b2 = np.zeros((1, d_out)) # second layer biaises
...
@@ -18,18 +18,18 @@ b2 = np.zeros((1, d_out)) # second layer biaises
data
=
np
.
random
.
rand
(
N
,
d_in
)
# create a random data
data
=
np
.
random
.
rand
(
N
,
d_in
)
# create a random data
targets
=
np
.
random
.
rand
(
N
,
d_out
)
# create a random targets
targets
=
np
.
random
.
rand
(
N
,
d_out
)
# create a random targets
#
Sigmoid function
#
Fonction sigmoide, utilisée par la suite pour le calcul des matrices a1 et a2
def
sigmoid
(
z
):
def
sigmoid
(
z
):
return
1
/
(
1
+
np
.
exp
(
-
np
.
clip
(
z
,
-
30
,
30
)))
#
to avoid
overflow
return
1
/
(
1
+
np
.
exp
(
-
np
.
clip
(
z
,
-
30
,
30
)))
#
pour éviter l'
overflow
# Forward pass
#
Fonction
Forward pass
pour créer les premières matrices a0,z1,a1,z2,a2, ainsi que les prédictions
def
forward_pass
(
data
,
w1
,
b1
,
w2
,
b2
):
def
forward_pass
(
data
,
w1
,
b1
,
w2
,
b2
):
a0
=
data
# the data are the input of the first layer
a0
=
data
z1
=
np
.
matmul
(
a0
,
w1
)
+
b1
#
input of the
hidden layer
z1
=
np
.
matmul
(
a0
,
w1
)
+
b1
#
entrée pour l'
hidden layer
a1
=
sigmoid
(
z1
)
#
output of the hidden layer (sigmoid activation function)
a1
=
sigmoid
(
z1
)
#
sortie pour l'hidden layer
z2
=
np
.
matmul
(
a1
,
w2
)
+
b2
#
input of the
output layer
z2
=
np
.
matmul
(
a1
,
w2
)
+
b2
#
entrée pour l'
output layer
a2
=
sigmoid
(
z2
)
#
output of the output layer (sigmoid activation function)
a2
=
sigmoid
(
z2
)
#
sortie pour l'output layer
predictions
=
a2
#
the
pr
e
dict
ed values are the outputs of the
output layer
predictions
=
a2
#
les
pr
é
dict
ions sont la matrice de sortie de l'
output layer
return
(
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
)
return
(
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
)
# Compute loss (MSE)
# Compute loss (MSE)
...
@@ -50,7 +50,7 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01):
...
@@ -50,7 +50,7 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01):
grad_w1
=
np
.
matmul
(
data
.
T
,
grad_z1
)
grad_w1
=
np
.
matmul
(
data
.
T
,
grad_z1
)
grad_b1
=
np
.
sum
(
grad_z1
,
axis
=
0
,
keepdims
=
True
)
grad_b1
=
np
.
sum
(
grad_z1
,
axis
=
0
,
keepdims
=
True
)
#
Update
weights
and
biases
using
gradient descent
#
Mis à jour des
weights
et
biases
en utilisant le
gradient descen
dan
t
w1
-=
learning_rate
*
grad_w1
w1
-=
learning_rate
*
grad_w1
b1
-=
learning_rate
*
grad_b1
b1
-=
learning_rate
*
grad_b1
w2
-=
learning_rate
*
grad_w2
w2
-=
learning_rate
*
grad_w2
...
@@ -58,36 +58,33 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01):
...
@@ -58,36 +58,33 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01):
return
w1
,
b1
,
w2
,
b2
,
loss
return
w1
,
b1
,
w2
,
b2
,
loss
#Cette fonction tpermet d'éviter les trop grands nombres
def
softmax
(
x
):
return
(
np
.
exp
(
x
-
np
.
max
(
x
))
/
np
.
exp
(
x
-
np
.
max
(
x
)).
sum
())
#
Forward pass
#
Nouvelle fonciton forward pass utilisant la fonction softmax
def
forward
(
data
,
w1
,
b1
,
w2
,
b2
):
def
forward
(
data
,
w1
,
b1
,
w2
,
b2
):
a0
=
data
# the data are the input of the first layer
a0
=
data
# the data are the input of the first layer
z1
=
np
.
matmul
(
a0
,
w1
)
+
b1
# input of the hidden layer
z1
=
np
.
matmul
(
a0
,
w1
)
+
b1
# input of the hidden layer
a1
=
sigmoid
(
z1
)
# output of the hidden layer (sigmoid activation function)
a1
=
sigmoid
(
z1
)
# output of the hidden layer (sigmoid activation function)
z2
=
np
.
matmul
(
a1
,
w2
)
+
b2
# input of the output layer
z2
=
np
.
matmul
(
a1
,
w2
)
+
b2
# input of the output layer
a2
=
softmax
_stable
(
z2
)
# output of the output layer (sigmoid activation function)
a2
=
softmax
(
z2
)
# output of the output layer (sigmoid activation function)
predictions
=
a2
# the predicted values are the outputs of the output layer
predictions
=
a2
# the predicted values are the outputs of the output layer
return
(
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
)
return
(
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
)
# Fonction transformant chaque label de classe en un vecteur de la taille de la classe.
def
one_hot
(
labels
):
def
one_hot
(
labels
):
num_classes
=
np
.
max
(
labels
)
+
1
num_classes
=
np
.
max
(
labels
)
+
1
one_hot_matrix
=
np
.
eye
(
num_classes
)[
labels
]
one_hot_matrix
=
np
.
eye
(
num_classes
)[
labels
]
return
one_hot_matrix
return
one_hot_matrix
def
softmax_stable
(
x
):
#We use this function to avoid computing to big numbers
return
(
np
.
exp
(
x
-
np
.
max
(
x
))
/
np
.
exp
(
x
-
np
.
max
(
x
)).
sum
())
def
learn_once_cross_entropy
(
w1
,
b1
,
w2
,
b2
,
data
,
labels_train
,
learning_rate
):
def
learn_once_cross_entropy
(
w1
,
b1
,
w2
,
b2
,
data
,
labels_train
,
learning_rate
):
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
=
forward
(
data
,
w1
,
b1
,
w2
,
b2
)
a0
,
z1
,
a1
,
z2
,
a2
,
predictions
=
forward
(
data
,
w1
,
b1
,
w2
,
b2
)
N
=
len
(
labels_train
)
N
=
len
(
labels_train
)
labels_train
=
one_hot
(
labels_train
)
labels_train
=
one_hot
(
labels_train
)
# Compute the gradient of the loss with respect to the predictions (a2)
grad_z2
=
a2
-
labels_train
grad_z2
=
a2
-
labels_train
# Backpropagation
# Backpropagation
...
@@ -98,7 +95,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
...
@@ -98,7 +95,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
grad_w1
=
np
.
matmul
(
data
.
T
,
grad_z1
)
grad_w1
=
np
.
matmul
(
data
.
T
,
grad_z1
)
grad_b1
=
np
.
sum
(
grad_z1
,
axis
=
0
,
keepdims
=
True
)
grad_b1
=
np
.
sum
(
grad_z1
,
axis
=
0
,
keepdims
=
True
)
#
Update
weights
and
biases
using
gradient descent
#
Mis à jour des
weights
et
biases
en utilisant le
gradient descen
dan
t
w1
-=
learning_rate
*
grad_w1
w1
-=
learning_rate
*
grad_w1
b1
-=
learning_rate
*
grad_b1
b1
-=
learning_rate
*
grad_b1
w2
-=
learning_rate
*
grad_w2
w2
-=
learning_rate
*
grad_w2
...
@@ -110,11 +107,11 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
...
@@ -110,11 +107,11 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
return
w1
,
b1
,
w2
,
b2
,
loss
return
w1
,
b1
,
w2
,
b2
,
loss
#Fonction de prédiction qui pour un vecteur donné renvoie la classe prédite
(cad l'indice de l'élément le plus élevé)
#Fonction de prédiction qui pour un vecteur donné renvoie la classe prédite
def
predict_class
(
predictions
):
def
predict_class
(
predictions
):
return
np
.
argmax
(
predictions
,
axis
=
1
)
return
np
.
argmax
(
predictions
,
axis
=
1
)
#Fonction taux de réussite qui compare une liste de prédictions à la liste des résultats et renvoie la proportion de
vraies
prédictions
#Fonction taux de réussite qui compare une liste de prédictions à la liste des résultats et renvoie la proportion de prédictions
correctes
def
accuracy
(
y_true
,
y_pred
):
def
accuracy
(
y_true
,
y_pred
):
return
np
.
mean
(
y_true
==
y_pred
)
return
np
.
mean
(
y_true
==
y_pred
)
...
@@ -134,15 +131,15 @@ def test_mlp(w1,b1,w2,b2, data_test,labels_test):
...
@@ -134,15 +131,15 @@ def test_mlp(w1,b1,w2,b2, data_test,labels_test):
return
test_accuracy
return
test_accuracy
def
run_mlp_training
(
data_train
,
labels_train
,
data_test
,
labels_test
,
d_h
,
learning_rate
,
num_epoch
):
def
run_mlp_training
(
data_train
,
labels_train
,
data_test
,
labels_test
,
d_h
,
learning_rate
,
num_epoch
):
N
=
data_train
.
shape
[
0
]
# number of input data
N
=
data_train
.
shape
[
0
]
d_in
=
data_train
.
shape
[
1
]
# input dimension
d_in
=
data_train
.
shape
[
1
]
d_out
=
np
.
max
(
labels_train
)
+
1
# output dimension (number of neurons of the output layer)
d_out
=
np
.
max
(
labels_train
)
+
1
#
Random i
nitiali
z
ation
of the network weights and biaises
#
I
nitiali
s
ation
du réseau
w1
=
2
*
np
.
random
.
rand
(
d_in
,
d_h
)
-
1
# first layer weights
w1
=
2
*
np
.
random
.
rand
(
d_in
,
d_h
)
-
1
b1
=
np
.
zeros
((
1
,
d_h
))
# first layer biaises
b1
=
np
.
zeros
((
1
,
d_h
))
w2
=
2
*
np
.
random
.
rand
(
d_h
,
d_out
)
-
1
# second layer weights
w2
=
2
*
np
.
random
.
rand
(
d_h
,
d_out
)
-
1
b2
=
np
.
zeros
((
1
,
d_out
))
# second layer biaises
b2
=
np
.
zeros
((
1
,
d_out
))
w1
,
b1
,
w2
,
b2
,
train_accuracies
=
train_mlp
(
w1
,
b1
,
w2
,
b2
,
data_train
,
labels_train
,
learning_rate
,
num_epoch
)
w1
,
b1
,
w2
,
b2
,
train_accuracies
=
train_mlp
(
w1
,
b1
,
w2
,
b2
,
data_train
,
labels_train
,
learning_rate
,
num_epoch
)
test_accuracy
=
test_mlp
(
w1
,
b1
,
w2
,
b2
,
data_test
,
labels_test
)
test_accuracy
=
test_mlp
(
w1
,
b1
,
w2
,
b2
,
data_test
,
labels_test
)
...
@@ -151,7 +148,7 @@ def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_
...
@@ -151,7 +148,7 @@ def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
data_folder
=
'
data/cifar-10-batches-py
'
data_folder
=
'
data/cifar-10-batches-py
'
batch_filename
=
'
data_batch_1
'
# Adjust this to the specific batch file you want to read
batch_filename
=
'
data_batch_1
'
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
data
,
labels
=
read_cifar
.
read_cifar_batch
(
batch_path
)
data
,
labels
=
read_cifar
.
read_cifar_batch
(
batch_path
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
...
...
This diff is collapsed.
Click to expand it.
read_cifar.py
+
1
−
14
View file @
011bd547
...
@@ -29,20 +29,15 @@ def split_dataset(data, labels, split):
...
@@ -29,20 +29,15 @@ def split_dataset(data, labels, split):
if
split
<
0
or
split
>
1
:
if
split
<
0
or
split
>
1
:
raise
ValueError
(
"
The split parameter must be a float between 0 and 1.
"
)
raise
ValueError
(
"
The split parameter must be a float between 0 and 1.
"
)
# Get the number of samples in the dataset
num_samples
=
len
(
data
)
num_samples
=
len
(
data
)
# Calculate the number of samples for the training set
num_train_samples
=
int
(
num_samples
*
split
)
num_train_samples
=
int
(
num_samples
*
split
)
# Create a random permutation of indices for shuffling
indices
=
np
.
random
.
permutation
(
num_samples
)
indices
=
np
.
random
.
permutation
(
num_samples
)
# Split the indices into training and test sets
train_indices
=
indices
[:
num_train_samples
]
train_indices
=
indices
[:
num_train_samples
]
test_indices
=
indices
[
num_train_samples
:]
test_indices
=
indices
[
num_train_samples
:]
# Split the data and labels based on the shuffled indices
data_train
=
data
[
train_indices
]
data_train
=
data
[
train_indices
]
labels_train
=
labels
[
train_indices
]
labels_train
=
labels
[
train_indices
]
data_test
=
data
[
test_indices
]
data_test
=
data
[
test_indices
]
...
@@ -53,22 +48,14 @@ def split_dataset(data, labels, split):
...
@@ -53,22 +48,14 @@ def split_dataset(data, labels, split):
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
data_folder
=
'
data/cifar-10-batches-py
'
data_folder
=
'
data/cifar-10-batches-py
'
batch_filename
=
'
data_batch_1
'
# Adjust this to the specific batch file you want to read
batch_filename
=
'
data_batch_1
'
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
batch_path
=
os
.
path
.
join
(
data_folder
,
batch_filename
)
data
,
labels
=
read_cifar_batch
(
batch_path
)
data
,
labels
=
read_cifar_batch
(
batch_path
)
## # Example: Printing the shape of data and labels
## print("Data shape:", data.shape)
## print("Labels shape:", labels.shape)
# Example: Printing data and labels for all files from the folder
# Example: Printing data and labels for all files from the folder
data1
,
labels1
=
read_cifar
(
data_folder
)
data1
,
labels1
=
read_cifar
(
data_folder
)
print
(
"
Data :
"
,
data1
)
print
(
"
Data :
"
,
data1
)
print
(
"
Labels :
"
,
labels1
)
print
(
"
Labels :
"
,
labels1
)
## data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
## # Example: Printing the shape of data test and train :
## print("Data train shape:", data_train.shape)
## print("Data test shape:", data_test.shape)
This diff is collapsed.
Click to expand it.
results/knn.png
+
0
−
0
View replaced file @
ad05fcc6
View file @
011bd547
37.8 KiB
|
W:
|
H:
43.3 KiB
|
W:
|
H:
2-up
Swipe
Onion skin
This diff is collapsed.
Click to expand it.
results/mlp.png
+
0
−
0
View replaced file @
ad05fcc6
View file @
011bd547
20.2 KiB
|
W:
|
H:
20.7 KiB
|
W:
|
H:
2-up
Swipe
Onion skin
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment