From 6e2e532b6dc24ed62b9acad4bbb1d3d3585184ac Mon Sep 17 00:00:00 2001
From: "Jangberry (Nomad-Debian)" <matt2001@hotmail.fr>
Date: Thu, 9 Nov 2023 15:52:18 +0100
Subject: [PATCH] Pep8

---
 .vscode/settings.json |  4 ++++
 README.md             | 14 +++++++++++++-
 knn.py                | 41 +++++++++++++++++++++++++----------------
 requirements.txt      |  3 ++-
 4 files changed, 44 insertions(+), 18 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..56c56a0
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+    "python.analysis.autoImportCompletions": true,
+    "python.analysis.typeCheckingMode": "off"
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index fbaab16..a0ac53b 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,22 @@
 ## Setup
 
 1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
-1. You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
+1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
 1. Install the requirements using `pip install -r requirements.txt`.
 
 ## Usage
 
 To test knn, simply run [knn.py](./knn.py) using `python knn.py`.  
 Otherwise here is a test result: ![knn test result](./results/knn.png)
+
+## Some proofs
+
+### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$*
+
+To prove that, let's firt derive the sigmoid function:  
+$\sigma(x) = \frac{1}{1+e^{-x}}$  
+so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$  
+$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$  
+Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$  
+And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$  
+So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$
\ No newline at end of file
diff --git a/knn.py b/knn.py
index 9d67d29..9ba4dbf 100644
--- a/knn.py
+++ b/knn.py
@@ -1,6 +1,8 @@
-import read_cifar
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
+import read_cifar
+
 
 def distance_matrix(X: np.array, Y: np.array):
     """Compute the L2 distance between two matricies
@@ -12,10 +14,13 @@ def distance_matrix(X: np.array, Y: np.array):
         dist -- distance matrix (shape: (X.shape[0], Y.shape[0])) => dist[i, j] = L2(X[i], Y[j])
     """
     return np.sqrt(
-        np.sum(np.square(X), axis=1).reshape((-1, 1))@np.ones((1, Y.shape[0])) + # X²
-        np.ones((X.shape[0], 1))@np.sum(np.square(Y), axis=1).reshape((1, -1))   # Y²
-        - 2*X@Y.T)                                                       # -2XY
-    
+        # X² + Y² - 2XY
+        np.sum(np.square(X), axis=1).reshape((-1, 1))@np.ones((1, Y.shape[0])) +
+        np.ones((X.shape[0], 1))@np.sum(np.square(Y),
+                                        axis=1).reshape((1, -1))
+        - 2*X@Y.T)
+
+
 def knn_predict(dist: np.array, labels_train: np.array, k: int):
     """Predict the labels of the test set using the k-nearest neighbors algorithm
 
@@ -31,11 +36,13 @@ def knn_predict(dist: np.array, labels_train: np.array, k: int):
     # Get the labels of the k nearest neighbors
     labels = labels_train[indices]
     # Get the most frequent label
-    labels = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=labels)
-    
+    labels = np.apply_along_axis(
+        lambda x: np.bincount(x).argmax(), axis=1, arr=labels)
+
     return labels
 
-def evaluate_knn(data_train: np.array, labels_train: np.array, data_test: np.array, labels_test: np.array, k: int, dist = None):
+
+def evaluate_knn(data_train: np.array, labels_train: np.array, data_test: np.array, labels_test: np.array, k: int, dist=None):
     """Evaluate the k-nearest neighbors algorithm
 
     Arguments:
@@ -59,24 +66,26 @@ if __name__ == "__main__":
     print("Reading data")
     images, labels = read_cifar.read_cifar("data/cifar-10-batches-py/")
     split_factor = 0.9
-    
+
     # Split the data into training and testing sets
     print("Splitting sets")
-    images_train, labels_train, images_test, labels_test = read_cifar.split_dataset(images, labels, split_factor)
+    images_train, labels_train, images_test, labels_test = read_cifar.split_dataset(
+        images, labels, split_factor)
 
     # Compute the distance matrix
     print("Computing the distance matrix...")
     dist = distance_matrix(images_test, images_train)
-    
+
     # Evaluate the k-nearest neighbors algorithm
-    accuracies = [] # List of the accuracy
-    ks = [] # List to make sure the plot starts at one and not 0
+    accuracies = []  # List of the accuracy
+    ks = []  # List to make sure the plot starts at one and not 0
     for k in range(1, 21):
-        accuracy = evaluate_knn(images_train, labels_train, images_test, labels_test, k)
+        accuracy = evaluate_knn(
+            images_train, labels_train, images_test, labels_test, k)
         print(f"Accuracy for k = {k}: {accuracy}")
         accuracies.append(accuracy)
         ks.append(k)
     fig = plt.figure()
     plt.plot(ks, accuracies)
     fig.savefig(fname="results/knn.png")
-    plt.show()
\ No newline at end of file
+    plt.show()
diff --git a/requirements.txt b/requirements.txt
index 296d654..806f221 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
-numpy
\ No newline at end of file
+numpy
+matplotlib
\ No newline at end of file
-- 
GitLab