correction code

04d57a04 · Sucio · 59861d42 · 04d57a04 · 04d57a04 · 04d57a04
Commit 04d57a04 authored 1 year ago by Sucio
--- a/Rapport.ipynb
+++ b/Rapport.ipynb
--- a/chatgpt.py
+++ b/chatgpt.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.metrics import accuracy_score
+from read_cifar import read_cifar_batch, split_dataset
+import matplotlib.pyplot as plt
+# Charger CIFAR-10 depuis votre source de données
+X,y = read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
+# Diviser les données en ensembles d'entraînement et de test
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
+# Prétraitement des données
+# Vous devrez redimensionner les images, les normaliser, etc.
+# Définir l'architecture du réseau de neurones
+input_size = 32 * 32 * 3  # 32x32 pixels et 3 canaux (RGB)
+hidden_size = 64  # Nombre d'unités dans la couche cachée
+output_size = 10  # 10 classes dans CIFAR-10
+# Initialiser les poids et les biais
+np.random.seed(0)
+weights_input_hidden = np.random.randn(input_size, hidden_size)
+bias_input_hidden = np.zeros((1, hidden_size))
+weights_hidden_output = np.random.randn(hidden_size, output_size)
+bias_hidden_output = np.zeros((1, output_size))
+# Hyperparamètres
+learning_rate = 0.1
+num_epochs = 100
+y_print,x_print,y2_print=[],[],[]
+# Entraînement du modèle
+for epoch in range(num_epochs):
+    # Forward pass
+    hidden_input = np.dot(X_train, weights_input_hidden) + bias_input_hidden
+    hidden_output = 1 / (1 + np.exp(-hidden_input))  # Fonction d'activation (sigmoid)
+    output_layer = np.dot(hidden_output, weights_hidden_output) + bias_hidden_output
+    # Calcul softmax
+    exp_scores = np.exp(output_layer)
+    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
+    # Calcul de la perte (cross-entropy)
+    num_examples = len(X_train)
+    corect_logprobs = -np.log(probs[range(num_examples), y_train])
+    data_loss = np.sum(corect_logprobs) / num_examples
+    # Calcul du gradient
+    dprobs = probs
+    dprobs[range(num_examples), y_train] -= 1
+    dprobs /= num_examples
+    dweights_hidden_output = np.dot(hidden_output.T, dprobs)
+    dbias_hidden_output = np.sum(dprobs, axis=0, keepdims=True)
+    dhidden = np.dot(dprobs, weights_hidden_output.T)
+    dhidden_hidden = dhidden * (1 - hidden_output) * hidden_output
+    dweights_input_hidden = np.dot(X_train.T, dhidden_hidden)
+    dbias_input_hidden = np.sum(dhidden_hidden, axis=0)
+    # Mise à jour des poids et des biais
+    weights_input_hidden -= learning_rate * dweights_input_hidden
+    bias_input_hidden -= learning_rate * dbias_input_hidden
+    weights_hidden_output -= learning_rate * dweights_hidden_output
+    bias_hidden_output -= learning_rate * dbias_hidden_output
+    x_print.append(epoch)
+    y_print.append(data_loss)
+    predicted_class = np.argmax(output_layer, axis=1)
+    y2_print.append(accuracy_score(y_train, predicted_class))
+    # Affichage du loss à chaque époque (pour le suivi)
+    if (epoch + 1) % 100 == 0:
+        print(f'Époque {epoch + 1}: Loss = {data_loss:.4f}')
+# Évaluation du modèle
+hidden_input = np.dot(X_test, weights_input_hidden) + bias_input_hidden
+hidden_output = 1 / (1 + np.exp(-hidden_input))
+output_layer = np.dot(hidden_output, weights_hidden_output) + bias_hidden_output
+predicted_class = np.argmax(output_layer, axis=1)
+accuracy = accuracy_score(y_test, predicted_class)
+print(f'Précision sur l\'ensemble de test: {accuracy:.4f}')
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
+ax1.plot(x_print,y_print)
+ax1.set_xlabel('epoque')
+ax1.set_ylabel('loss')
+ax1.set_title('evolution de la fonction loss par epoque')
+ax1.legend()
+ax2.plot(x_print,y2_print)
+ax2.set_xlabel('epoque')
+ax2.set_ylabel('accuracy')
+ax2.set_title('evolution de la accuracy')
+ax2.legend()
+plt.tight_layout()
+plt.show()
\ No newline at end of file
--- a/mlp.py
+++ b/mlp.py
@@ -5,38 +5,18 @@ import matplotlib.pyplot as plt
 def learning_methode(k,dk,learning_rate):
    k=k-learning_rate*dk
-    #normalisation de k entre [-1,1]
-    # max_k=np.max(k)
-    # min_k=np.min(k)
-    # k=(k*2)/(max_k-min_k)-min_k-1
-    print(np.max(dk))
    return(k)
-def softmax(y):
-    y=np.exp(y)
-    v=np.sum(y,axis=1)
-    return(y / v[:, np.newaxis])
-# def reugalisation(W)
 def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
-    # Forward pass
+    a0 = data
-    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1
-    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2
-    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    predictions = a2
-    # s=np.sum(a2,axis=1)
-    # a2=a2/s[:, np.newaxis]
-    # print(np.max(a2,axis=1))
-    #a2=softmax(a2)
-    predictions = a2  # the predicted values are the outputs of the output layer
-    dc_da2=(2/data.shape[0])*(a2-targets)
-    # dc_da2=(1/data.shape[0])*((-targets/a2)-(1-targets)/(1-a2))
-    # dc_da2=((np.ones(targets.shape)-2*targets)/(data.shape[0]*a2))
-    # dc_da2=(-targets)/(data.shape[0]*a2)
+    dc_da2=(2/data.shape[0])*(a2-targets)
    dc_dz2=dc_da2*(a2*(1-a2))
    dc_dw2=np.matmul(np.transpose(a1), dc_dz2)
    dc_db2=np.matmul(np.ones((1,dc_dz2.shape[0])),dc_dz2)
@@ -50,18 +30,8 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
    w2=learning_methode(w2,dc_dw2,learning_rate)
    b2=learning_methode(b2,dc_db2,learning_rate)
-    # prediction_2 = np.zeros(predictions.shape, dtype=int)
-    # for i, ligne in enumerate(predictions):
-    #     prediction_2[i][np.argmin(ligne)] = 1
-    # indices_egalite = np.where(prediction_2 == targets)[0]
-    # nombre_indices = len(indices_egalite)
    # Compute loss (MSE)
-    # loss = np.mean(np.square(predictions - targets))
+    loss = np.mean(np.square(predictions - targets))
-    # binary cross-entropy loss
-    # loss = np.mean(targets*np.log(predictions)-(1-targets)*np.log(1-predictions))
-    # loss=np.mean(-np.log(np.max(targets*predictions,axis=1)))
-    # loss=np.mean((np.ones(targets.shape)-2*targets)*np.log(predictions))
    return(w1,b1,w2,b2,loss)
 def one_hot(label):
@@ -71,28 +41,74 @@ def one_hot(label):
        mat[label_indexe,label_im-1]=1
    return(mat)
+def softmax(y):
+    y=np.exp(y)
+    v=np.sum(y,axis=1)
+    return(y / v[:, np.newaxis])
 def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
-    Y=one_hot(labels_train)
+    targets = one_hot(labels_train)
-    w1,b1,w2,b2,loss=learn_once_mse(w1,b1,w2,b2,data,Y,learning_rate)
+    targets=targets+1e-15
+    a0 = data
+    z1 = np.matmul(a0, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2
+    a2 = 1 / (1 + np.exp(-z2))
+    softa2=softmax(a2)
+    # predictions = softa2
+    predictions=softa2
+    # dc_softmax=-(targets/softa2)+((1-targets)/(1-softa2))
+    # dc_a2=dc_softmax*(softa2*(1-softa2))
+    # dc_dz2=dc_a2*(a2*(1-a2))
+    dc_dz2=predictions-targets
+    dc_dw2=np.matmul(np.transpose(a1), dc_dz2)
+    dc_db2=np.matmul(np.ones((1,dc_dz2.shape[0])),dc_dz2)
+    dc_da1=np.matmul(dc_dz2,np.transpose(w2))
+    dc_dz1=dc_da1*(a1*(1-a1))
+    dc_dw1=np.matmul(np.transpose(a0), dc_dz1)
+    dc_db1=np.matmul(np.ones((1,dc_dz1.shape[0])),dc_dz1)
+    w1=learning_methode(w1,dc_dw1,learning_rate)
+    b1=learning_methode(b1,dc_db1,learning_rate)
+    w2=learning_methode(w2,dc_dw2,learning_rate)
+    b2=learning_methode(b2,dc_db2,learning_rate)
+    # binary cross-entropy loss
+    loss = np.mean(targets*np.log(predictions)-(1-targets)*np.log(1-predictions))
    return(w1,b1,w2,b2,loss)
+def accuracy(w1,b1,w2,b2,data,labels):
+    a0 = data
+    z1 = np.matmul(a0, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2
+    a2 = 1 / (1 + np.exp(-z2))
+    softa2=softmax(a2)
+    predictions = softa2
+    prediction_2 = np.empty(predictions.shape[0], dtype=int)
+    for i, ligne in enumerate(predictions):
+        prediction_2[i] = np.argmax(ligne)+1
+    indices_egalite = np.where(prediction_2 == labels)[0]
+    nombre_indices = len(indices_egalite)
+    return(nombre_indices/len(labels))
 def train_mlp(w1,b1,w2,b2,d_train,labels_train,learning_rate,num_epoch):
    train_accuracies=[]
-    pas=len(labels_train)//num_epoch
    for k in range(num_epoch):
-        partial_data=d_train[k*pas:(k+1)*pas,:]
+        w1,b1,w2,b2,loss=learn_once_mse(w1,b1,w2,b2,d_train,labels_train,learning_rate)
-        patial_label=l_train[k*pas:(k+1)*pas]
+        train_accuracies.append(accuracy(w1,b1,w2,b2,d_train,labels_train))
-        w1,b1,w2,b2,loss=learn_once_cross_entropy(w1,b1,w2,b2,partial_data,patial_label,learning_rate)
-        train_accuracies.append(loss)
    return (w1,b1,w2,b2,train_accuracies)
 def test_mlp(w1,b1,w2,b2,d_test,labels_test):
-    a0 = d_test # the data are the input of the first layer
+    a0 = d_test
-    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
-    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    z2 = np.matmul(a1, w2) + b2
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    a2 = 1 / (1 + np.exp(-z2))
-    predictions = a2  # the predicted values are the outputs of the output layer
+    predictions = a2
    prediction_2 = np.empty(predictions.shape[0], dtype=int)
    for i, ligne in enumerate(predictions):
        prediction_2[i] = np.argmax(ligne)+1
@@ -101,13 +117,9 @@ def test_mlp(w1,b1,w2,b2,d_test,labels_test):
    return(nombre_indices/len(labels_test))
 def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate,num_epoch):
-    d_in = data_train.shape[1]  # input dimension
+    d_in = data_train.shape[1] 
-    d_out = max(labels_train)  # output dimension (number of neurons of the output layer)
+    d_out = max(labels_train)
-    # w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
-    # b1 = np.zeros((1, d_h))  # first layer biaises
-    # w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
-    # b2 = np.zeros((1, d_out))  # second layer biaises
    w1 = (2*np.random.rand(d_in, d_h)-1)  # first layer weights
    b1 = 2*np.random.rand(1, d_h)-1  # first layer biaises
    w2 = 2*np.random.rand(d_h, d_out)-1  # second layer weights

--- a/test.py
+++ b/test.py
@@ -19,14 +19,21 @@ import numpy as np
 #     if len(dico) > 1:
 #         filtered_dict = sorted(dico, key=lambda item: item[1][1])
 #     print(dico[0][0])
+def one_hot(label):
+    nbr_classe=9
+    mat=np.zeros((len(label),nbr_classe))
+    for label_indexe,label_im, in enumerate(label):
+        mat[label_indexe,label_im-1]=1
+    return(mat)
-mat=np.array([[1,2,3,4],[6,6,4,4],[3,2,4,85]])
+mat=np.array([1,8,6,4,7,8,5,2,4,6,4])
-mat_exp=np.exp(mat)
+print(one_hot(mat))
-v=np.sum(mat_exp,axis=1)
+# mat_exp=np.exp(mat)
-print(v)
+# v=np.sum(mat_exp,axis=1)
-mat_exp_norm=mat_exp/v[:, np.newaxis]
+# print(v)
+# mat_exp_norm=mat_exp/v[:, np.newaxis]
-vrai=np.array([[0,0,0,1],[1,0,0,0],[0,0,1,0]])
+# vrai=np.array([[0,0,0,1],[1,0,0,0],[0,0,1,0]])
-print(-np.log(np.max(mat_exp_norm*vrai,axis=1)))
+# print(-np.log(np.max(mat_exp_norm*vrai,axis=1)))
-L=np.mean(-np.log(np.max(vrai*mat_exp_norm,axis=1)))
+# L=np.mean(-np.log(np.max(vrai*mat_exp_norm,axis=1)))
-print(L)
+# print(L)
\ No newline at end of file