Skip to content
Snippets Groups Projects
Commit 04d57a04 authored by Sucio's avatar Sucio
Browse files

correction code

parent 59861d42
Branches
No related tags found
No related merge requests found
Source diff could not be displayed: it is too large. Options to address this: view the blob.
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from read_cifar import read_cifar_batch, split_dataset
import matplotlib.pyplot as plt
# Charger CIFAR-10 depuis votre source de données
X,y = read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
# Prétraitement des données
# Vous devrez redimensionner les images, les normaliser, etc.
# Définir l'architecture du réseau de neurones
input_size = 32 * 32 * 3 # 32x32 pixels et 3 canaux (RGB)
hidden_size = 64 # Nombre d'unités dans la couche cachée
output_size = 10 # 10 classes dans CIFAR-10
# Initialiser les poids et les biais
np.random.seed(0)
weights_input_hidden = np.random.randn(input_size, hidden_size)
bias_input_hidden = np.zeros((1, hidden_size))
weights_hidden_output = np.random.randn(hidden_size, output_size)
bias_hidden_output = np.zeros((1, output_size))
# Hyperparamètres
learning_rate = 0.1
num_epochs = 100
y_print,x_print,y2_print=[],[],[]
# Entraînement du modèle
for epoch in range(num_epochs):
# Forward pass
hidden_input = np.dot(X_train, weights_input_hidden) + bias_input_hidden
hidden_output = 1 / (1 + np.exp(-hidden_input)) # Fonction d'activation (sigmoid)
output_layer = np.dot(hidden_output, weights_hidden_output) + bias_hidden_output
# Calcul softmax
exp_scores = np.exp(output_layer)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calcul de la perte (cross-entropy)
num_examples = len(X_train)
corect_logprobs = -np.log(probs[range(num_examples), y_train])
data_loss = np.sum(corect_logprobs) / num_examples
# Calcul du gradient
dprobs = probs
dprobs[range(num_examples), y_train] -= 1
dprobs /= num_examples
dweights_hidden_output = np.dot(hidden_output.T, dprobs)
dbias_hidden_output = np.sum(dprobs, axis=0, keepdims=True)
dhidden = np.dot(dprobs, weights_hidden_output.T)
dhidden_hidden = dhidden * (1 - hidden_output) * hidden_output
dweights_input_hidden = np.dot(X_train.T, dhidden_hidden)
dbias_input_hidden = np.sum(dhidden_hidden, axis=0)
# Mise à jour des poids et des biais
weights_input_hidden -= learning_rate * dweights_input_hidden
bias_input_hidden -= learning_rate * dbias_input_hidden
weights_hidden_output -= learning_rate * dweights_hidden_output
bias_hidden_output -= learning_rate * dbias_hidden_output
x_print.append(epoch)
y_print.append(data_loss)
predicted_class = np.argmax(output_layer, axis=1)
y2_print.append(accuracy_score(y_train, predicted_class))
# Affichage du loss à chaque époque (pour le suivi)
if (epoch + 1) % 100 == 0:
print(f'Époque {epoch + 1}: Loss = {data_loss:.4f}')
# Évaluation du modèle
hidden_input = np.dot(X_test, weights_input_hidden) + bias_input_hidden
hidden_output = 1 / (1 + np.exp(-hidden_input))
output_layer = np.dot(hidden_output, weights_hidden_output) + bias_hidden_output
predicted_class = np.argmax(output_layer, axis=1)
accuracy = accuracy_score(y_test, predicted_class)
print(f'Précision sur l\'ensemble de test: {accuracy:.4f}')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
ax1.plot(x_print,y_print)
ax1.set_xlabel('epoque')
ax1.set_ylabel('loss')
ax1.set_title('evolution de la fonction loss par epoque')
ax1.legend()
ax2.plot(x_print,y2_print)
ax2.set_xlabel('epoque')
ax2.set_ylabel('accuracy')
ax2.set_title('evolution de la accuracy')
ax2.legend()
plt.tight_layout()
plt.show()
\ No newline at end of file
...@@ -5,38 +5,18 @@ import matplotlib.pyplot as plt ...@@ -5,38 +5,18 @@ import matplotlib.pyplot as plt
def learning_methode(k,dk,learning_rate): def learning_methode(k,dk,learning_rate):
k=k-learning_rate*dk k=k-learning_rate*dk
#normalisation de k entre [-1,1]
# max_k=np.max(k)
# min_k=np.min(k)
# k=(k*2)/(max_k-min_k)-min_k-1
print(np.max(dk))
return(k) return(k)
def softmax(y):
y=np.exp(y)
v=np.sum(y,axis=1)
return(y / v[:, np.newaxis])
# def reugalisation(W)
def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate): def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
# Forward pass a0 = data
a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = 1 / (1 + np.exp(-z1))
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2
z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = 1 / (1 + np.exp(-z2))
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) predictions = a2
# s=np.sum(a2,axis=1)
# a2=a2/s[:, np.newaxis]
# print(np.max(a2,axis=1))
#a2=softmax(a2)
predictions = a2 # the predicted values are the outputs of the output layer
dc_da2=(2/data.shape[0])*(a2-targets)
# dc_da2=(1/data.shape[0])*((-targets/a2)-(1-targets)/(1-a2))
# dc_da2=((np.ones(targets.shape)-2*targets)/(data.shape[0]*a2))
# dc_da2=(-targets)/(data.shape[0]*a2)
dc_da2=(2/data.shape[0])*(a2-targets)
dc_dz2=dc_da2*(a2*(1-a2)) dc_dz2=dc_da2*(a2*(1-a2))
dc_dw2=np.matmul(np.transpose(a1), dc_dz2) dc_dw2=np.matmul(np.transpose(a1), dc_dz2)
dc_db2=np.matmul(np.ones((1,dc_dz2.shape[0])),dc_dz2) dc_db2=np.matmul(np.ones((1,dc_dz2.shape[0])),dc_dz2)
...@@ -50,18 +30,8 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate): ...@@ -50,18 +30,8 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
w2=learning_methode(w2,dc_dw2,learning_rate) w2=learning_methode(w2,dc_dw2,learning_rate)
b2=learning_methode(b2,dc_db2,learning_rate) b2=learning_methode(b2,dc_db2,learning_rate)
# prediction_2 = np.zeros(predictions.shape, dtype=int)
# for i, ligne in enumerate(predictions):
# prediction_2[i][np.argmin(ligne)] = 1
# indices_egalite = np.where(prediction_2 == targets)[0]
# nombre_indices = len(indices_egalite)
# Compute loss (MSE) # Compute loss (MSE)
# loss = np.mean(np.square(predictions - targets)) loss = np.mean(np.square(predictions - targets))
# binary cross-entropy loss
# loss = np.mean(targets*np.log(predictions)-(1-targets)*np.log(1-predictions))
# loss=np.mean(-np.log(np.max(targets*predictions,axis=1)))
# loss=np.mean((np.ones(targets.shape)-2*targets)*np.log(predictions))
return(w1,b1,w2,b2,loss) return(w1,b1,w2,b2,loss)
def one_hot(label): def one_hot(label):
...@@ -71,28 +41,74 @@ def one_hot(label): ...@@ -71,28 +41,74 @@ def one_hot(label):
mat[label_indexe,label_im-1]=1 mat[label_indexe,label_im-1]=1
return(mat) return(mat)
def softmax(y):
y=np.exp(y)
v=np.sum(y,axis=1)
return(y / v[:, np.newaxis])
def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate): def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
Y=one_hot(labels_train) targets = one_hot(labels_train)
w1,b1,w2,b2,loss=learn_once_mse(w1,b1,w2,b2,data,Y,learning_rate) targets=targets+1e-15
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2))
softa2=softmax(a2)
# predictions = softa2
predictions=softa2
# dc_softmax=-(targets/softa2)+((1-targets)/(1-softa2))
# dc_a2=dc_softmax*(softa2*(1-softa2))
# dc_dz2=dc_a2*(a2*(1-a2))
dc_dz2=predictions-targets
dc_dw2=np.matmul(np.transpose(a1), dc_dz2)
dc_db2=np.matmul(np.ones((1,dc_dz2.shape[0])),dc_dz2)
dc_da1=np.matmul(dc_dz2,np.transpose(w2))
dc_dz1=dc_da1*(a1*(1-a1))
dc_dw1=np.matmul(np.transpose(a0), dc_dz1)
dc_db1=np.matmul(np.ones((1,dc_dz1.shape[0])),dc_dz1)
w1=learning_methode(w1,dc_dw1,learning_rate)
b1=learning_methode(b1,dc_db1,learning_rate)
w2=learning_methode(w2,dc_dw2,learning_rate)
b2=learning_methode(b2,dc_db2,learning_rate)
# binary cross-entropy loss
loss = np.mean(targets*np.log(predictions)-(1-targets)*np.log(1-predictions))
return(w1,b1,w2,b2,loss) return(w1,b1,w2,b2,loss)
def accuracy(w1,b1,w2,b2,data,labels):
a0 = data
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2))
softa2=softmax(a2)
predictions = softa2
prediction_2 = np.empty(predictions.shape[0], dtype=int)
for i, ligne in enumerate(predictions):
prediction_2[i] = np.argmax(ligne)+1
indices_egalite = np.where(prediction_2 == labels)[0]
nombre_indices = len(indices_egalite)
return(nombre_indices/len(labels))
def train_mlp(w1,b1,w2,b2,d_train,labels_train,learning_rate,num_epoch): def train_mlp(w1,b1,w2,b2,d_train,labels_train,learning_rate,num_epoch):
train_accuracies=[] train_accuracies=[]
pas=len(labels_train)//num_epoch
for k in range(num_epoch): for k in range(num_epoch):
partial_data=d_train[k*pas:(k+1)*pas,:] w1,b1,w2,b2,loss=learn_once_mse(w1,b1,w2,b2,d_train,labels_train,learning_rate)
patial_label=l_train[k*pas:(k+1)*pas] train_accuracies.append(accuracy(w1,b1,w2,b2,d_train,labels_train))
w1,b1,w2,b2,loss=learn_once_cross_entropy(w1,b1,w2,b2,partial_data,patial_label,learning_rate)
train_accuracies.append(loss)
return (w1,b1,w2,b2,train_accuracies) return (w1,b1,w2,b2,train_accuracies)
def test_mlp(w1,b1,w2,b2,d_test,labels_test): def test_mlp(w1,b1,w2,b2,d_test,labels_test):
a0 = d_test # the data are the input of the first layer a0 = d_test
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) a2 = 1 / (1 + np.exp(-z2))
predictions = a2 # the predicted values are the outputs of the output layer predictions = a2
prediction_2 = np.empty(predictions.shape[0], dtype=int) prediction_2 = np.empty(predictions.shape[0], dtype=int)
for i, ligne in enumerate(predictions): for i, ligne in enumerate(predictions):
prediction_2[i] = np.argmax(ligne)+1 prediction_2[i] = np.argmax(ligne)+1
...@@ -101,13 +117,9 @@ def test_mlp(w1,b1,w2,b2,d_test,labels_test): ...@@ -101,13 +117,9 @@ def test_mlp(w1,b1,w2,b2,d_test,labels_test):
return(nombre_indices/len(labels_test)) return(nombre_indices/len(labels_test))
def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate,num_epoch): def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate,num_epoch):
d_in = data_train.shape[1] # input dimension d_in = data_train.shape[1]
d_out = max(labels_train) # output dimension (number of neurons of the output layer) d_out = max(labels_train)
# w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
# b1 = np.zeros((1, d_h)) # first layer biaises
# w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
# b2 = np.zeros((1, d_out)) # second layer biaises
w1 = (2*np.random.rand(d_in, d_h)-1) # first layer weights w1 = (2*np.random.rand(d_in, d_h)-1) # first layer weights
b1 = 2*np.random.rand(1, d_h)-1 # first layer biaises b1 = 2*np.random.rand(1, d_h)-1 # first layer biaises
w2 = 2*np.random.rand(d_h, d_out)-1 # second layer weights w2 = 2*np.random.rand(d_h, d_out)-1 # second layer weights
......
...@@ -19,14 +19,21 @@ import numpy as np ...@@ -19,14 +19,21 @@ import numpy as np
# if len(dico) > 1: # if len(dico) > 1:
# filtered_dict = sorted(dico, key=lambda item: item[1][1]) # filtered_dict = sorted(dico, key=lambda item: item[1][1])
# print(dico[0][0]) # print(dico[0][0])
def one_hot(label):
nbr_classe=9
mat=np.zeros((len(label),nbr_classe))
for label_indexe,label_im, in enumerate(label):
mat[label_indexe,label_im-1]=1
return(mat)
mat=np.array([[1,2,3,4],[6,6,4,4],[3,2,4,85]]) mat=np.array([1,8,6,4,7,8,5,2,4,6,4])
mat_exp=np.exp(mat) print(one_hot(mat))
v=np.sum(mat_exp,axis=1) # mat_exp=np.exp(mat)
print(v) # v=np.sum(mat_exp,axis=1)
mat_exp_norm=mat_exp/v[:, np.newaxis] # print(v)
# mat_exp_norm=mat_exp/v[:, np.newaxis]
vrai=np.array([[0,0,0,1],[1,0,0,0],[0,0,1,0]]) # vrai=np.array([[0,0,0,1],[1,0,0,0],[0,0,1,0]])
print(-np.log(np.max(mat_exp_norm*vrai,axis=1))) # print(-np.log(np.max(mat_exp_norm*vrai,axis=1)))
L=np.mean(-np.log(np.max(vrai*mat_exp_norm,axis=1))) # L=np.mean(-np.log(np.max(vrai*mat_exp_norm,axis=1)))
print(L) # print(L)
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment