Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
Image Classification
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Chaufour Oscar
Image Classification
Commits
f4d3c48d
Commit
f4d3c48d
authored
1 year ago
by
oscarchaufour
Browse files
Options
Downloads
Patches
Plain Diff
knn modifications and mlp learn_once_mse
parent
1c03774b
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
knn.py
+48
-23
48 additions, 23 deletions
knn.py
mlp.py
+40
-0
40 additions, 0 deletions
mlp.py
read_cifar.py
+23
-6
23 additions, 6 deletions
read_cifar.py
results/knn.png
+0
-0
0 additions, 0 deletions
results/knn.png
test1.py
+52
-0
52 additions, 0 deletions
test1.py
with
163 additions
and
29 deletions
knn.py
+
48
−
23
View file @
f4d3c48d
...
@@ -8,39 +8,45 @@ import read_cifar
...
@@ -8,39 +8,45 @@ import read_cifar
import
numpy
as
np
import
numpy
as
np
import
statistics
import
statistics
from
statistics
import
mode
from
statistics
import
mode
import
time
import
matplotlib.pyplot
as
plt
def
distance_matrix
(
A
,
B
)
:
def
distance_matrix
(
A
,
B
)
:
# sum_of_squaresA = np.sum(A ** 2, axis=1)
print
(
"
test0
"
)
# sum_of_squaresB = np.sum(B ** 2, axis=1)
sum_of_squaresA
=
np
.
sum
(
A
**
2
,
axis
=
1
,
keepdims
=
True
)
sum_of_squaresA
=
np
.
sum
(
np
.
square
(
A
),
axis
=
1
)
sum_of_squaresB
=
np
.
sum
(
B
**
2
,
axis
=
1
)
sum_of_squaresB
=
np
.
sum
(
np
.
square
(
B
)
**
2
,
axis
=
1
)
print
(
"
test1
"
)
# sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
# sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
# Calculate the dot product between the two matrices
# Calculate the dot product between the two matrices
dot_product
=
np
.
dot
(
A
,
B
.
T
)
# dot_product = np.matmul(A, B.T)
dot_product
=
np
.
einsum
(
'
ij,jk
'
,
A
,
B
.
T
)
print
(
"
test2
"
)
# Calculate the Euclidean distance matrix using the hint provided
# Calculate the Euclidean distance matrix using the hint provided
dists
=
np
.
sqrt
(
sum_of_squaresA
+
sum_of_squaresB
-
2
*
dot_product
)
dists
=
np
.
sqrt
(
sum_of_squaresA
+
sum_of_squaresB
-
2
*
dot_product
)
print
(
"
test3
"
)
return
dists
return
dists
def
knn_predict
(
dists
,
labels_train
,
k
)
:
def
knn_predict
(
dists
,
labels_train
,
k
)
:
number_t
est
,
number_t
rain
=
dists
.
shape
number_t
rain
,
number_t
est
=
dists
.
shape
# initialze the predicted labels to zeros
# initialze the predicted labels to zeros
labels_predicted
=
np
.
zeros
(
number_test
)
labels_predicted
=
np
.
zeros
(
number_test
)
for
i
in
range
(
number_test
)
:
for
j
in
range
(
number_test
)
:
sorted_indices
=
np
.
argsort
(
dists
[
i
])
sorted_indices
=
np
.
argsort
(
dists
[:,
j
])
print
(
len
(
dists
[:,
j
]))
break
knn_indices
=
sorted_indices
[
:
k
]
knn_indices
=
sorted_indices
[
:
k
]
knn_labels
=
labels_train
[
knn_indices
]
knn_labels
=
labels_train
[
knn_indices
]
label_predicted
=
mode
(
knn_labels
)
label_predicted
=
mode
(
knn_labels
)
labels_predicted
[
i
]
=
label_predicted
labels_predicted
[
j
]
=
label_predicted
return
labels_predicted
return
labels_predicted
def
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)
:
def
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)
:
dists
=
distance_matrix
(
data_t
est
,
data_t
rain
)
dists
=
distance_matrix
(
data_t
rain
,
data_t
est
)
labels_predicted
=
knn_predict
(
dists
,
labels_train
,
k
)
labels_predicted
=
knn_predict
(
dists
,
labels_train
,
k
)
number_true_prediction
=
np
.
sum
(
labels_test
==
labels_predicted
)
number_true_prediction
=
np
.
sum
(
labels_test
==
labels_predicted
)
number_total_prediction
=
labels_test
.
shape
[
0
]
number_total_prediction
=
labels_test
.
shape
[
0
]
...
@@ -48,9 +54,19 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
...
@@ -48,9 +54,19 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
return
classification_rate
return
classification_rate
def
plot_accuracy
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k_max
)
:
Y
=
[]
for
k
in
range
(
1
,
k_max
+
1
)
:
Y
+=
[
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)]
plt
.
plot
(
list
(
range
(
1
,
k_max
+
1
)),
Y
)
plt
.
xlabel
(
'
k (Number of Neighbors)
'
)
plt
.
ylabel
(
'
Accuracy
'
)
plt
.
savefig
(
'
results/knn.png
'
)
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
t1
=
time
.
time
()
# # Example distance matrix, training labels, and k value
# # Example distance matrix, training labels, and k value
# dists = np.array([[1000, 2, 3],
# dists = np.array([[1000, 2, 3],
# [4, 0.1, 6],
# [4, 0.1, 6],
...
@@ -62,14 +78,23 @@ if __name__ == "__main__" :
...
@@ -62,14 +78,23 @@ if __name__ == "__main__" :
# predicted_labels = knn_predict(dists, labels_train, k)
# predicted_labels = knn_predict(dists, labels_train, k)
classification_rate
=
evaluate_knn
(
np
.
array
([[
1
,
27
],
[
100
,
300
]]),
np
.
array
([
0.002
,
9000
]),
np
.
array
([[
25
,
350
]]),
np
.
array
([
9000
]),
1
)
# classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
print
(
"
Classification rate:
"
)
# print("Classification rate:")
print
(
classification_rate
)
# print(classification_rate)
# file = "./data/cifar-10-python/"
# data, labels = read_cifar.read_cifar(file)
# data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.8)
file
=
"
./data/cifar-10-python/
"
data
,
labels
=
read_cifar
.
read_cifar
(
file
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
k
=
10
print
(
len
(
data_train
))
print
(
len
(
data_test
))
print
(
len
(
data_train
[
0
]))
print
(
len
(
data_test
[
0
]))
# dists = distance_matrix(data_train, data_test)
# dists = distance_matrix(data_train, data_test)
# k = 2
# knn_predict(dists, labels_train, k)
# knn_predict(dists, labels_train, k)
classification_rate
=
evaluate_knn
(
data_train
,
labels_train
,
data_test
,
labels_test
,
k
)
print
(
"
classification rate :
"
,
classification_rate
)
# plot_accuracy(data_train, labels_train, data_test, labels_test, 4)
t2
=
time
.
time
()
print
(
'
run time (second):
'
)
print
(
t2
-
t1
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
mlp.py
0 → 100644
+
40
−
0
View file @
f4d3c48d
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 27 16:48:16 2023
@author: oscar
"""
import
numpy
as
np
def
learn_once_mse
(
w1
,
b1
,
w2
,
b2
,
data
,
targets
,
learning_rate
)
:
a0
=
data
# the data are the input of the first layer
z1
=
np
.
matmul
(
a0
,
w1
)
+
b1
# input of the hidden layer
a1
=
1
/
(
1
+
np
.
exp
(
-
z1
))
# output of the hidden layer (sigmoid activation function)
z2
=
np
.
matmul
(
a1
,
w2
)
+
b2
# input of the output layer
a2
=
1
/
(
1
+
np
.
exp
(
-
z2
))
# output of the output layer (sigmoid activation function)
predictions
=
a2
# the predicted values are the outputs of the output layer
N
=
targets
.
shape
[
0
]
# calculation of partial derivates of C
dCdA2
=
2
/
N
*
(
a2
-
targets
)
dCdZ2
=
dCdA2
*
(
a2
-
a2
**
2
)
dCdW2
=
np
.
matmul
(
a1
.
T
,
dCdZ2
)
dCdB2
=
(
1
/
N
)
*
np
.
sum
(
dCdZ2
,
axis
=
0
,
keepdims
=
True
)
dCdA1
=
np
.
matmul
(
dCdZ2
,
w2
.
T
)
dCdZ1
=
dCdA1
*
(
a1
-
a1
**
2
)
dCdW1
=
np
.
matmul
(
a0
.
T
,
dCdZ1
)
dCdB1
=
(
1
/
N
)
*
np
.
sum
(
dCdZ1
,
axis
=
0
,
keepdims
=
True
)
# one gradient descent step
w1
-=
dCdW1
*
learning_rate
b1
-=
dCdB1
*
learning_rate
w2
-=
dCdW2
*
learning_rate
b2
-=
dCdB2
*
learning_rate
loss
=
np
.
mean
(
np
.
square
(
predictions
-
targets
))
return
w1
,
b1
,
w2
,
b2
,
loss
\ No newline at end of file
This diff is collapsed.
Click to expand it.
read_cifar.py
+
23
−
6
View file @
f4d3c48d
...
@@ -24,24 +24,41 @@ def read_cifar (batch_dir) :
...
@@ -24,24 +24,41 @@ def read_cifar (batch_dir) :
data_batches
=
[]
data_batches
=
[]
label_batches
=
[]
label_batches
=
[]
for
i
in
range
(
1
,
6
)
:
for
i
in
range
(
1
,
4
)
:
batch_filename
=
f
'
data_batch_
{
i
}
'
batch_filename
=
f
'
data_batch_
{
i
}
'
batch_path
=
os
.
path
.
join
(
batch_dir
,
batch_filename
)
batch_path
=
os
.
path
.
join
(
batch_dir
,
batch_filename
)
data
,
labels
=
read_cifar_batch
(
batch_path
)
data
,
labels
=
read_cifar_batch
(
batch_path
)
data_batches
.
append
(
data
)
data_batches
.
append
(
data
)
label_batches
.
append
(
labels
)
label_batches
.
append
(
labels
)
test_batch_filename
=
'
test_batch
'
#
test_batch_filename = 'test_batch'
test_batch_path
=
os
.
path
.
join
(
batch_dir
,
test_batch_filename
)
#
test_batch_path = os.path.join(batch_dir, test_batch_filename)
data_test
,
labels_test
=
read_cifar_batch
(
test_batch_path
)
#
data_test, labels_test = read_cifar_batch(test_batch_path)
data_batches
.
append
(
data_test
)
#
data_batches.append(data_test)
label_batches
.
append
(
labels_test
)
#
label_batches.append(labels_test)
data
=
np
.
concatenate
(
data_batches
,
axis
=
0
)
data
=
np
.
concatenate
(
data_batches
,
axis
=
0
)
labels
=
np
.
concatenate
(
label_batches
,
axis
=
0
)
labels
=
np
.
concatenate
(
label_batches
,
axis
=
0
)
return
data
,
labels
return
data
,
labels
# def read_cifar(directory_path):
# batches = os.listdir(directory_path)
# data=None
# labels=None
# for batch in batches:
# batch_path = os.path.join(directory_path, batch)
# if not batch_path.endswith(".meta"):
# data_batch,labels_batch=read_cifar_batch(batch_path)
# if data is None:
# data=data_batch
# labels=labels_batch
# else:
# data=np.concatenate((data,data_batch))
# labels=np.concatenate((labels,labels_batch))
# return(data, labels)
def
split_dataset
(
data
,
labels
,
split
)
:
def
split_dataset
(
data
,
labels
,
split
)
:
number_total
=
data
.
shape
[
0
]
number_total
=
data
.
shape
[
0
]
...
...
This diff is collapsed.
Click to expand it.
results/knn.png
0 → 100644
+
0
−
0
View file @
f4d3c48d
12.9 KiB
This diff is collapsed.
Click to expand it.
test1.py
0 → 100644
+
52
−
0
View file @
f4d3c48d
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 19:43:47 2023
@author: oscar
"""
import
numpy
as
np
from
collections
import
Counter
import
read_cifar
def
distance_matrix
(
M1
,
M2
):
# dists(i,j) = dist entre ième ligne de M1 et jème ligne de M1, soit la racine de sum((M1i,p - M2j,p)²))
# qu'on peut simplifier en sum(M1i,p²) + sum(M2j,p²) - sum(2* M1j,p * M2i,p)
l1
=
np
.
shape
(
M1
)[
0
]
l2
=
np
.
shape
(
M2
)[
0
]
Vect1
=
np
.
sum
(
M1
**
2
,
1
)
Vect2
=
np
.
sum
(
M2
**
2
,
1
)
Mat1
=
np
.
tile
(
Vect1
,
(
l2
,
1
))
Mat2
=
np
.
tile
(
Vect2
,
(
l1
,
1
))
Mat3
=
2
*
np
.
dot
(
M1
,
M2
.
T
)
dists
=
np
.
sqrt
(
Mat1
.
T
+
Mat2
-
Mat3
)
return
dists
def
knn_predict
(
dists
,
labels_train
,
k
):
labels_predict
=
np
.
array
([])
size_test
=
np
.
shape
(
dists
)[
1
]
for
j
in
range
(
size_test
):
list_arg_min
=
np
.
argsort
(
dists
[:,
j
])
labels_sorted
=
[
labels_train
[
i
]
for
i
in
list_arg_min
]
k_labels
=
labels_sorted
[:
k
]
count
=
Counter
(
k_labels
)
labels_predict
=
np
.
append
(
labels_predict
,
count
.
most_common
(
1
)[
0
][
0
])
return
labels_predict
def
evaluate_knn
(
data_train
,
data_test
,
labels_train
,
labels_test
,
k
):
dists
=
distance_matrix
(
data_train
,
data_test
)
labels_predict
=
knn_predict
(
dists
,
labels_train
,
k
)
count
=
np
.
sum
(
labels_predict
==
labels_test
)
return
count
/
np
.
shape
(
labels_predict
)
if
__name__
==
"
__main__
"
:
file
=
"
./data/cifar-10-python/
"
data
,
labels
=
read_cifar
.
read_cifar
(
file
)
data_train
,
labels_train
,
data_test
,
labels_test
=
read_cifar
.
split_dataset
(
data
,
labels
,
0.9
)
print
(
evaluate_knn
(
data_train
,
data_test
,
labels_train
,
labels_test
,
20
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment