From 6e2e532b6dc24ed62b9acad4bbb1d3d3585184ac Mon Sep 17 00:00:00 2001 From: "Jangberry (Nomad-Debian)" <matt2001@hotmail.fr> Date: Thu, 9 Nov 2023 15:52:18 +0100 Subject: [PATCH] Pep8 --- .vscode/settings.json | 4 ++++ README.md | 14 +++++++++++++- knn.py | 41 +++++++++++++++++++++++++---------------- requirements.txt | 3 ++- 4 files changed, 44 insertions(+), 18 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..56c56a0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.analysis.autoImportCompletions": true, + "python.analysis.typeCheckingMode": "off" +} \ No newline at end of file diff --git a/README.md b/README.md index fbaab16..a0ac53b 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,22 @@ ## Setup 1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder. -1. You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`. +1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`. 1. Install the requirements using `pip install -r requirements.txt`. ## Usage To test knn, simply run [knn.py](./knn.py) using `python knn.py`. Otherwise here is a test result:  + +## Some proofs + +### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$* + +To prove that, let's firt derive the sigmoid function: +$\sigma(x) = \frac{1}{1+e^{-x}}$ +so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$ +$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$ +Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$ +And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$ +So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$ \ No newline at end of file diff --git a/knn.py b/knn.py index 9d67d29..9ba4dbf 100644 --- a/knn.py +++ b/knn.py @@ -1,6 +1,8 @@ -import read_cifar -import numpy as np import matplotlib.pyplot as plt +import numpy as np + +import read_cifar + def distance_matrix(X: np.array, Y: np.array): """Compute the L2 distance between two matricies @@ -12,10 +14,13 @@ def distance_matrix(X: np.array, Y: np.array): dist -- distance matrix (shape: (X.shape[0], Y.shape[0])) => dist[i, j] = L2(X[i], Y[j]) """ return np.sqrt( - np.sum(np.square(X), axis=1).reshape((-1, 1))@np.ones((1, Y.shape[0])) + # X² - np.ones((X.shape[0], 1))@np.sum(np.square(Y), axis=1).reshape((1, -1)) # Y² - - 2*X@Y.T) # -2XY - + # X² + Y² - 2XY + np.sum(np.square(X), axis=1).reshape((-1, 1))@np.ones((1, Y.shape[0])) + + np.ones((X.shape[0], 1))@np.sum(np.square(Y), + axis=1).reshape((1, -1)) + - 2*X@Y.T) + + def knn_predict(dist: np.array, labels_train: np.array, k: int): """Predict the labels of the test set using the k-nearest neighbors algorithm @@ -31,11 +36,13 @@ def knn_predict(dist: np.array, labels_train: np.array, k: int): # Get the labels of the k nearest neighbors labels = labels_train[indices] # Get the most frequent label - labels = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=labels) - + labels = np.apply_along_axis( + lambda x: np.bincount(x).argmax(), axis=1, arr=labels) + return labels -def evaluate_knn(data_train: np.array, labels_train: np.array, data_test: np.array, labels_test: np.array, k: int, dist = None): + +def evaluate_knn(data_train: np.array, labels_train: np.array, data_test: np.array, labels_test: np.array, k: int, dist=None): """Evaluate the k-nearest neighbors algorithm Arguments: @@ -59,24 +66,26 @@ if __name__ == "__main__": print("Reading data") images, labels = read_cifar.read_cifar("data/cifar-10-batches-py/") split_factor = 0.9 - + # Split the data into training and testing sets print("Splitting sets") - images_train, labels_train, images_test, labels_test = read_cifar.split_dataset(images, labels, split_factor) + images_train, labels_train, images_test, labels_test = read_cifar.split_dataset( + images, labels, split_factor) # Compute the distance matrix print("Computing the distance matrix...") dist = distance_matrix(images_test, images_train) - + # Evaluate the k-nearest neighbors algorithm - accuracies = [] # List of the accuracy - ks = [] # List to make sure the plot starts at one and not 0 + accuracies = [] # List of the accuracy + ks = [] # List to make sure the plot starts at one and not 0 for k in range(1, 21): - accuracy = evaluate_knn(images_train, labels_train, images_test, labels_test, k) + accuracy = evaluate_knn( + images_train, labels_train, images_test, labels_test, k) print(f"Accuracy for k = {k}: {accuracy}") accuracies.append(accuracy) ks.append(k) fig = plt.figure() plt.plot(ks, accuracies) fig.savefig(fname="results/knn.png") - plt.show() \ No newline at end of file + plt.show() diff --git a/requirements.txt b/requirements.txt index 296d654..806f221 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -numpy \ No newline at end of file +numpy +matplotlib \ No newline at end of file -- GitLab