diff --git a/.gitignore b/.gitignore index 63649e64c6c6e62adfd72e7b568f1300ebbc4740..45755846413dd3cfa8c5f8237cc10e65c801b9ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ image/* _pycache_/* plotting.py image-classification/ +cifar.PNG diff --git a/README.md b/README.md index 3ec3bb7af0298dcd9745890dbd9db3afd6b8bcc7..77c08a88a1e81fab607f233d3710d3b7037883cd 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,99 @@ -# Image Classification +# Image Classification Project +This project aims to implement an image classification program using two successive models: k-nearest neighbors (KNN) and artificial neural networks (ANN). +## CIFAR-10 Dataset -## Getting started +The CIFAR-10 dataset is a commonly used database in computer vision for image classification. It consists of 60,000 color images of 32x32 pixels, distributed across 10 distinct classes, representing different objects or animals. -To make it easy for you to get started with GitLab, here's a list of recommended next steps. + +### CIFAR-10 Dataset Classes: -Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)! +1. airplane +2. automobile +3. bird +4. cat +5. deer +6. dog +7. frog +8. horse +9. ship +10. truck -## Add your files +## Accomplished Steps -- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files -- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command: +### CIFAR Database Preparation -``` -cd existing_repo -git remote add origin https://gitlab.ec-lyon.fr/selalimi/image-classification.git -git branch -M main -git push -uf origin main -``` +#### Database Download: +The CIFAR-10 database was downloaded from [Dataset_Cifar-10](https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz). -## Integrate with your tools +#### Creating the data Folder: +A folder named data was created to store the CIFAR-10 database files. -- [ ] [Set up project integrations](https://gitlab.ec-lyon.fr/selalimi/image-classification/-/settings/integrations) +#### Writing the read_cifar.py Script: +A Python file named read_cifar.py was created, including the following functions: +- `read_cifar_batch` +- `read_cifar` +- `split_dataset` -## Collaborate with your team +## Implementation of k-nearest neighbors (KNN) -- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/) -- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html) -- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically) -- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/) -- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html) +### Writing the knn.py Script: +A Python file named knn.py was created, including the following functions: +- `distance_matrix` +- `knn_predict` +- `evaluate_knn` +- `plot_KNN` -## Test and Deploy +### Performance Study +The effectiveness of the KNN algorithm was evaluated based on the number of neighbors (k) for `split_factor=0.9`. -Use the built-in continuous integration in GitLab. +### Running the KNN Code -- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html) -- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/) -- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html) -- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/) -- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html) - -*** +1. Run the script to split data +```bash +import read_cifar as rc +X, y = rc.read_cifar('data') +# Split the Dataset +X_train, y_train, X_test, y_test = rc.split_dataset(X, y, split=0.9) +``` -# Editing this README +## Results : +### Generating the Graph +1. Results using KNN: -When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template. +A graph showing the accuracy variation with k was generated using matplotlib and saved as "knn.png" in the "Results" folder. : -## Suggestions for a good README -Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information. -## Name -Choose a self-explaining name for your project. + -## Description -Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors. +2. Results using ANN : -## Badges -On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge. +A graph showing the accuracy variation with the number of epochs was generated using matplotlib and saved as "mlp.png" in the "Results" folder. -## Visuals -Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method. + -## Installation -Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection. +## Analysis of KNN Results +Unfortunately, the performance of the KNN algorithm was disappointing, with accuracy ranging between 0.33 and 0.34 for different values of k (up to k=20). Several reasons may explain these mixed results: -## Usage -Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README. +1. **High Dimensionality of Data**: CIFAR-10 dataset images are 32x32 pixels, resulting in high-dimensional data. This can make Euclidean distance less discriminative, affecting KNN's performance. -## Support -Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc. +2. **Scale Sensitivity**: KNN is sensitive to different feature scales. Pixels in an image can have different values, and KNN may be influenced by these disparities. -## Roadmap -If you have ideas for releases in the future, it is a good idea to list them in the README. +3. **Choice of k**: The choice of the number of neighbors (k) can significantly influence results. An inappropriate k value can lead to underestimation or overestimation of the model's complexity. -## Contributing -State if you are open to contributions and what your requirements are for accepting them. +4. **Lack of Feature Abstraction**: KNN directly uses pixels as features. More advanced feature extraction techniques could improve performance -For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self. + ## Analysis of ANN Results +The deep learning algorithm (ANN) used for our dataset has relatively low performance, with test set accuracy plateauing around 0.098 over 100 epochs. -You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser. +These results suggest that adjustments to certain aspects of the model, such as complexity, hyperparameters, or weight initialization, may be necessary to improve its ability to generalize to new data. Further exploration of these aspects could be beneficial in optimizing model performance. -## Authors and acknowledgment -Show your appreciation to those who have contributed to the project. + ## Conculsion + The best accuracy is achieved with the KNN model, reaching 36%. However, it could be further improved by using Convolutional Neural Networks (CNN) instead of Artificial Neural Networks (ANN). CNNs are particularly recognized for their effectiveness in image recognition, analysis, and classification of images and videos. -## License -For open source projects, say how it is licensed. +## Author +Sara EL ALIMI -## Project status -If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers. +## Licence +Ce projet est sous licence MIT. diff --git a/Results/knn.png b/Results/knn.png new file mode 100644 index 0000000000000000000000000000000000000000..93ce23e5896f4bd9604d96a8349e8c56a1e0e0ec Binary files /dev/null and b/Results/knn.png differ diff --git a/Results/mlp.png b/Results/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..b68ffa5438c8c26397c7be686d998e7166bc5f5c Binary files /dev/null and b/Results/mlp.png differ diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..04aa675cec429e135d1349890fb63c031028a691 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,623 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare the Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import read_cifar as rc\n", + "X,y=rc.read_cifar('data') \n", + "# Split the Dataset\n", + "X_train,y_train,X_test,y_test=rc.split_dataset(X,y,split=0.9) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## k-nearest neighbors" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "import read_cifar as rc\n", + "import knn\n", + "X,y=rc.read_cifar('data') \n", + "# Split the Dataset\n", + "X_train,y_train,X_test,y_test=rc.split_dataset(X,y,split=0.9) \n", + "# Plot the accuracy of the model KNN\n", + "knn.plot_KNN(X_train,y_train,X_test,y_test) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Artificial Neural Network" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\HP\\Desktop\\Deep_learning_BE\\image-classification\\mlp.py:29: RuntimeWarning: overflow encountered in exp\n", + " return 1 / (1 + np.exp(-x))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "Train Accuracy: 0.093370 Test Accuracy: 0.069000\n", + "Epoch 2/100\n", + "Train Accuracy: 0.073167 Test Accuracy: 0.073833\n", + "Epoch 3/100\n", + "Train Accuracy: 0.074981 Test Accuracy: 0.078000\n", + "Epoch 4/100\n", + "Train Accuracy: 0.076926 Test Accuracy: 0.076667\n", + "Epoch 5/100\n", + "Train Accuracy: 0.078741 Test Accuracy: 0.076000\n", + "Epoch 6/100\n", + "Train Accuracy: 0.079537 Test Accuracy: 0.076000\n", + "Epoch 7/100\n", + "Train Accuracy: 0.078704 Test Accuracy: 0.076500\n", + "Epoch 8/100\n", + "Train Accuracy: 0.079852 Test Accuracy: 0.077000\n", + "Epoch 9/100\n", + "Train Accuracy: 0.079944 Test Accuracy: 0.078000\n", + "Epoch 10/100\n", + "Train Accuracy: 0.079722 Test Accuracy: 0.078667\n", + "Epoch 11/100\n", + "Train Accuracy: 0.080241 Test Accuracy: 0.077667\n", + "Epoch 12/100\n", + "Train Accuracy: 0.080463 Test Accuracy: 0.077500\n", + "Epoch 13/100\n", + "Train Accuracy: 0.080500 Test Accuracy: 0.076167\n", + "Epoch 14/100\n", + "Train Accuracy: 0.080870 Test Accuracy: 0.076333\n", + "Epoch 15/100\n", + "Train Accuracy: 0.081204 Test Accuracy: 0.075333\n", + "Epoch 16/100\n", + "Train Accuracy: 0.081352 Test Accuracy: 0.076167\n", + "Epoch 17/100\n", + "Train Accuracy: 0.081389 Test Accuracy: 0.075667\n", + "Epoch 18/100\n", + "Train Accuracy: 0.082185 Test Accuracy: 0.075333\n", + "Epoch 19/100\n", + "Train Accuracy: 0.081981 Test Accuracy: 0.076167\n", + "Epoch 20/100\n", + "Train Accuracy: 0.081741 Test Accuracy: 0.075667\n", + "Epoch 21/100\n", + "Train Accuracy: 0.081741 Test Accuracy: 0.076833\n", + "Epoch 22/100\n", + "Train Accuracy: 0.082111 Test Accuracy: 0.075667\n", + "Epoch 23/100\n", + "Train Accuracy: 0.082259 Test Accuracy: 0.075333\n", + "Epoch 24/100\n", + "Train Accuracy: 0.082315 Test Accuracy: 0.074667\n", + "Epoch 25/100\n", + "Train Accuracy: 0.082370 Test Accuracy: 0.076833\n", + "Epoch 26/100\n", + "Train Accuracy: 0.082852 Test Accuracy: 0.074833\n", + "Epoch 27/100\n", + "Train Accuracy: 0.082926 Test Accuracy: 0.075833\n", + "Epoch 28/100\n", + "Train Accuracy: 0.083185 Test Accuracy: 0.075833\n", + "Epoch 29/100\n", + "Train Accuracy: 0.083370 Test Accuracy: 0.075833\n", + "Epoch 30/100\n", + "Train Accuracy: 0.083667 Test Accuracy: 0.076500\n", + "Epoch 31/100\n", + "Train Accuracy: 0.083741 Test Accuracy: 0.076833\n", + "Epoch 32/100\n", + "Train Accuracy: 0.083778 Test Accuracy: 0.078500\n", + "Epoch 33/100\n", + "Train Accuracy: 0.084185 Test Accuracy: 0.079167\n", + "Epoch 34/100\n", + "Train Accuracy: 0.084148 Test Accuracy: 0.079000\n", + "Epoch 35/100\n", + "Train Accuracy: 0.084407 Test Accuracy: 0.079500\n", + "Epoch 36/100\n", + "Train Accuracy: 0.085037 Test Accuracy: 0.079500\n", + "Epoch 37/100\n", + "Train Accuracy: 0.085037 Test Accuracy: 0.080000\n", + "Epoch 38/100\n", + "Train Accuracy: 0.084778 Test Accuracy: 0.079667\n", + "Epoch 39/100\n", + "Train Accuracy: 0.084593 Test Accuracy: 0.079833\n", + "Epoch 40/100\n", + "Train Accuracy: 0.084519 Test Accuracy: 0.080167\n", + "Epoch 41/100\n", + "Train Accuracy: 0.084852 Test Accuracy: 0.079500\n", + "Epoch 42/100\n", + "Train Accuracy: 0.084815 Test Accuracy: 0.079167\n", + "Epoch 43/100\n", + "Train Accuracy: 0.084574 Test Accuracy: 0.079000\n", + "Epoch 44/100\n", + "Train Accuracy: 0.084685 Test Accuracy: 0.078500\n", + "Epoch 45/100\n", + "Train Accuracy: 0.084481 Test Accuracy: 0.079000\n", + "Epoch 46/100\n", + "Train Accuracy: 0.084833 Test Accuracy: 0.079333\n", + "Epoch 47/100\n", + "Train Accuracy: 0.085130 Test Accuracy: 0.079500\n", + "Epoch 48/100\n", + "Train Accuracy: 0.085185 Test Accuracy: 0.080667\n", + "Epoch 49/100\n", + "Train Accuracy: 0.085389 Test Accuracy: 0.080833\n", + "Epoch 50/100\n", + "Train Accuracy: 0.085944 Test Accuracy: 0.081667\n", + "Epoch 51/100\n", + "Train Accuracy: 0.086111 Test Accuracy: 0.079000\n", + "Epoch 52/100\n", + "Train Accuracy: 0.086648 Test Accuracy: 0.079667\n", + "Epoch 53/100\n", + "Train Accuracy: 0.086963 Test Accuracy: 0.080833\n", + "Epoch 54/100\n", + "Train Accuracy: 0.087278 Test Accuracy: 0.081833\n", + "Epoch 55/100\n", + "Train Accuracy: 0.087648 Test Accuracy: 0.082500\n", + "Epoch 56/100\n", + "Train Accuracy: 0.088352 Test Accuracy: 0.084167\n", + "Epoch 57/100\n", + "Train Accuracy: 0.088519 Test Accuracy: 0.083500\n", + "Epoch 58/100\n", + "Train Accuracy: 0.089019 Test Accuracy: 0.085167\n", + "Epoch 59/100\n", + "Train Accuracy: 0.089000 Test Accuracy: 0.086333\n", + "Epoch 60/100\n", + "Train Accuracy: 0.089500 Test Accuracy: 0.086000\n", + "Epoch 61/100\n", + "Train Accuracy: 0.089833 Test Accuracy: 0.086333\n", + "Epoch 62/100\n", + "Train Accuracy: 0.090056 Test Accuracy: 0.088000\n", + "Epoch 63/100\n", + "Train Accuracy: 0.090370 Test Accuracy: 0.087833\n", + "Epoch 64/100\n", + "Train Accuracy: 0.090185 Test Accuracy: 0.087667\n", + "Epoch 65/100\n", + "Train Accuracy: 0.090741 Test Accuracy: 0.087167\n", + "Epoch 66/100\n", + "Train Accuracy: 0.091444 Test Accuracy: 0.087833\n", + "Epoch 67/100\n", + "Train Accuracy: 0.091667 Test Accuracy: 0.088500\n", + "Epoch 68/100\n", + "Train Accuracy: 0.092315 Test Accuracy: 0.090167\n", + "Epoch 69/100\n", + "Train Accuracy: 0.091963 Test Accuracy: 0.090667\n", + "Epoch 70/100\n", + "Train Accuracy: 0.092093 Test Accuracy: 0.091167\n", + "Epoch 71/100\n", + "Train Accuracy: 0.091833 Test Accuracy: 0.091833\n", + "Epoch 72/100\n", + "Train Accuracy: 0.091870 Test Accuracy: 0.090333\n", + "Epoch 73/100\n", + "Train Accuracy: 0.092148 Test Accuracy: 0.091833\n", + "Epoch 74/100\n", + "Train Accuracy: 0.092481 Test Accuracy: 0.091667\n", + "Epoch 75/100\n", + "Train Accuracy: 0.092315 Test Accuracy: 0.093000\n", + "Epoch 76/100\n", + "Train Accuracy: 0.092852 Test Accuracy: 0.093333\n", + "Epoch 77/100\n", + "Train Accuracy: 0.093389 Test Accuracy: 0.093500\n", + "Epoch 78/100\n", + "Train Accuracy: 0.093852 Test Accuracy: 0.095500\n", + "Epoch 79/100\n", + "Train Accuracy: 0.094167 Test Accuracy: 0.095500\n", + "Epoch 80/100\n", + "Train Accuracy: 0.094685 Test Accuracy: 0.096333\n", + "Epoch 81/100\n", + "Train Accuracy: 0.095111 Test Accuracy: 0.097000\n", + "Epoch 82/100\n", + "Train Accuracy: 0.095537 Test Accuracy: 0.097667\n", + "Epoch 83/100\n", + "Train Accuracy: 0.095685 Test Accuracy: 0.096833\n", + "Epoch 84/100\n", + "Train Accuracy: 0.096167 Test Accuracy: 0.098000\n", + "Epoch 85/100\n", + "Train Accuracy: 0.095685 Test Accuracy: 0.097833\n", + "Epoch 86/100\n", + "Train Accuracy: 0.096000 Test Accuracy: 0.097333\n", + "Epoch 87/100\n", + "Train Accuracy: 0.096481 Test Accuracy: 0.097667\n", + "Epoch 88/100\n", + "Train Accuracy: 0.096611 Test Accuracy: 0.098167\n", + "Epoch 89/100\n", + "Train Accuracy: 0.096519 Test Accuracy: 0.098667\n", + "Epoch 90/100\n", + "Train Accuracy: 0.096519 Test Accuracy: 0.098500\n", + "Epoch 91/100\n", + "Train Accuracy: 0.096519 Test Accuracy: 0.096500\n", + "Epoch 92/100\n", + "Train Accuracy: 0.096704 Test Accuracy: 0.097667\n", + "Epoch 93/100\n", + "Train Accuracy: 0.096815 Test Accuracy: 0.098167\n", + "Epoch 94/100\n", + "Train Accuracy: 0.096685 Test Accuracy: 0.099000\n", + "Epoch 95/100\n", + "Train Accuracy: 0.096796 Test Accuracy: 0.098833\n", + "Epoch 96/100\n", + "Train Accuracy: 0.096815 Test Accuracy: 0.099167\n", + "Epoch 97/100\n", + "Train Accuracy: 0.097241 Test Accuracy: 0.098833\n", + "Epoch 98/100\n", + "Train Accuracy: 0.097333 Test Accuracy: 0.099500\n", + "Epoch 99/100\n", + "Train Accuracy: 0.097389 Test Accuracy: 0.098833\n", + "Epoch 100/100\n", + "Train Accuracy: 0.097556 Test Accuracy: 0.098167\n", + "Test Set Accuracy: 0.09816666666666667\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import mlp\n", + "mlp.plot_ANN(X_train,y_train,X_test,y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test de la fonction read_cifar " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import read_cifar as rc\n", + "from read_cifar import read_cifar_batch\n", + "from read_cifar import read_cifar\n", + "\n", + "def test_read_cifar_batch():\n", + " # Test read_cifar_batch function\n", + " batch_path = \"data\\data_batch_1\"\n", + " data, labels = read_cifar_batch(batch_path)\n", + "\n", + " # Check that data has the right shape and type\n", + " assert data.shape == (10000, 3072)\n", + " assert data.dtype == np.float32\n", + "\n", + " # Check that labels has the right shape and type\n", + " assert labels.shape == (10000,)\n", + " assert labels.dtype == np.int64\n", + " print(\"All tests passed successfully.\")\n", + " \n", + "def test_read_cifar():\n", + " # Test read_cifar function\n", + " data, labels = read_cifar('data')\n", + "\n", + " # Check that data has the right shape and type\n", + " assert data.shape == (60000, 3072)\n", + " assert data.dtype == np.float32\n", + "\n", + " # Check that labels has the right shape and type\n", + " assert labels.shape == (60000,)\n", + " assert labels.dtype == np.int64\n", + " print(\"All tests passed successfully.\")\n", + "\n", + "def test_split_dataset():\n", + " data = np.random.randn(150, 4)\n", + " labels = np.random.randn(150)\n", + " split = 0.8\n", + " data_train, labels_train, data_test, labels_test = rc.split_dataset(data, labels, split)\n", + "\n", + " total_size = data_train.shape[0] + data_test.shape[0]\n", + "\n", + " assert total_size == len(data)\n", + " assert len(labels_train) == len(data_train)\n", + " assert len(labels_test) == len(data_test)\n", + " \n", + " print(\"All tests passed successfully.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All tests passed successfully.\n" + ] + } + ], + "source": [ + "test_read_cifar_batch()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All tests passed successfully.\n" + ] + } + ], + "source": [ + "test_read_cifar()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All tests passed successfully.\n" + ] + } + ], + "source": [ + "test_split_dataset()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test de la fonction knn" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import knn \n", + "\n", + "# Test de la fonction distance_matrix :\n", + "def test_distance_matrix():\n", + " X = np.array([[1, 2], [3, 4]])\n", + " Y = np.array([[2, 2], [1, 1]])\n", + " dists = knn.distance_matrix(X, Y)\n", + " assert dists.shape == (2, 2)\n", + " print(\"Test for distance_matrix passed.\")\n", + " \n", + "\n", + "# Test de la fonction knn_predict :\n", + "def test_knn_predict():\n", + " dists = np.array([[2, 5], [10, 1]])\n", + " labels_train = np.array([0, 1])\n", + " k = 1\n", + " y_pred = knn.knn_predict(dists, labels_train, k)\n", + " assert y_pred.shape == (2,)\n", + " assert np.array_equal(y_pred, np.array([0, 1]))\n", + " print(\"Test for knn_predict passed.\")\n", + "\n", + "# Test de la fonction evaluate_knn :\n", + "def test_evaluate_knn_accuracy():\n", + " data_train = np.array([[1, 2], [3, 4], [5, 6], [1, 1], [2, 2]])\n", + " labels_train = np.array([0, 1, 2, 0, 1])\n", + " data_test = np.array([[2, 2], [1, 1], [3, 3]])\n", + " labels_test = np.array([1, 0, 1])\n", + " k = 2\n", + " accuracy = knn.evaluate_knn(data_train, labels_train, data_test, labels_test, k)\n", + " assert 0 <= accuracy <= 1\n", + " print(\"Test for evaluate_knn accuracy passed.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test for distance_matrix passed.\n" + ] + } + ], + "source": [ + "test_distance_matrix()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test for knn_predict passed.\n" + ] + } + ], + "source": [ + "test_knn_predict()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test for evaluate_knn accuracy passed.\n" + ] + } + ], + "source": [ + "test_evaluate_knn_accuracy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test de la fonction mlp" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from mlp import initialization, train_mlp, calculate_accuracy, run_mlp_training\n", + "\n", + "def test_mlp_training():\n", + " #Paramètres du test\n", + " num_samples = 200\n", + " num_features = 3\n", + " num_classes = 2\n", + " num_hidden_units = 3\n", + " learning_rate = 0.1\n", + " num_epochs = 10\n", + "\n", + " # Générez des données factices pour le test\n", + " X_train = np.random.randn(num_samples, num_features)\n", + " y_train = np.random.randint(0, num_classes, num_samples)\n", + " X_test = np.random.randn(num_samples, num_features)\n", + " y_test = np.random.randint(0, num_classes, num_samples)\n", + "\n", + " # Initialisez les poids et les biais\n", + " W1, b1, W2, b2 = initialization(num_features, num_hidden_units, num_classes)\n", + "\n", + " # Entraînez le modèle\n", + " train_accuracies, test_accuracy = run_mlp_training(X_train, y_train, X_test, y_test, num_hidden_units, learning_rate, num_epochs)\n", + "\n", + " # Vérifiez si l'accuracy sur l'ensemble de test est un nombre entre 0 et 1\n", + " assert 0 <= test_accuracy <= 1\n", + "\n", + " # Vérifiez si la longueur de la liste des accuracies d'entraînement correspond au nombre d'époques\n", + " assert len(train_accuracies) == num_epochs\n", + "\n", + " # Vérifiez si les accuracies d'entraînement sont des nombres entre 0 et 1\n", + " for accuracy in train_accuracies:\n", + " assert 0 <= accuracy <= 1\n", + "\n", + " print(\"Tous les tests ont réussi avec succès.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "Train Accuracy: 0.490000 Test Accuracy: 0.555000\n", + "Epoch 2/10\n", + "Train Accuracy: 0.490000 Test Accuracy: 0.535000\n", + "Epoch 3/10\n", + "Train Accuracy: 0.505000 Test Accuracy: 0.540000\n", + "Epoch 4/10\n", + "Train Accuracy: 0.505000 Test Accuracy: 0.540000\n", + "Epoch 5/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Epoch 6/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Epoch 7/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Epoch 8/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Epoch 9/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Epoch 10/10\n", + "Train Accuracy: 0.500000 Test Accuracy: 0.545000\n", + "Tous les tests ont réussi avec succès.\n" + ] + } + ], + "source": [ + "test_mlp_training()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mlp.py b/mlp.py index 7812729f50be812fcc4370346eca45aabe63b553..7fa9b88cedc561c0a344be830268fc7b3d0e97a8 100644 --- a/mlp.py +++ b/mlp.py @@ -32,7 +32,7 @@ def sigmoid(x,derivate): # Define the softmax activation function def softmax(x,derivate): if derivate == False : - return np.exp(x) / np.exp(np.array(x)).sum(axis=-1, keepdims=True) + return np.exp(x) / np.exp(np.array(x)).sum(axis=1, keepdims=True) else : return x*(1-x) @@ -94,7 +94,7 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): # Update weights and biases of the output layer W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0] - b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) + b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0) # Calculate the error at the hidden layer hidden_layer_error = np.dot(output_layer_gradients, W2.T) @@ -104,7 +104,7 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): # Update weights and biases of the hidden layer W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0] - b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) + b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0) # Calculate the loss using the specified metric loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward") diff --git a/test/__pycache__/read_cifar.cpython-312.pyc b/test/__pycache__/read_cifar.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..998b9339c4bd361ea0c214803f363c7d9a563bc2 Binary files /dev/null and b/test/__pycache__/read_cifar.cpython-312.pyc differ diff --git a/test/__pycache__/test_read_cifar.cpython-312.pyc b/test/__pycache__/test_read_cifar.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e59a03bb7d0607acc6662e7dc4edcd909da2b9f Binary files /dev/null and b/test/__pycache__/test_read_cifar.cpython-312.pyc differ diff --git a/test/test_knn.py b/test/test_knn.py new file mode 100644 index 0000000000000000000000000000000000000000..9368c502d898a364c9954e895d462c1da3f454e0 --- /dev/null +++ b/test/test_knn.py @@ -0,0 +1,33 @@ +import numpy as np +import knn + +# Test de la fonction distance_matrix : +def test_distance_matrix(): + X = np.array([[1, 2], [3, 4]]) + Y = np.array([[2, 2], [1, 1]]) + dists = knn.distance_matrix(X, Y) + assert dists.shape == (2, 2) + assert np.allclose(dists, np.array([[2, 5], [10, 1]])) + print("Test for distance_matrix passed.") + + +# Test de la fonction knn_predict : +def test_knn_predict(): + dists = np.array([[2, 5], [10, 1]]) + labels_train = np.array([0, 1]) + k = 1 + y_pred = knn.knn_predict(dists, labels_train, k) + assert y_pred.shape == (2,) + assert np.array_equal(y_pred, np.array([0, 1])) + print("Test for knn_predict passed.") + +# Test de la fonction evaluate_knn : +def test_evaluate_knn_accuracy(): + data_train = np.array([[1, 2], [3, 4], [5, 6], [1, 1], [2, 2]]) + labels_train = np.array([0, 1, 2, 0, 1]) + data_test = np.array([[2, 2], [1, 1], [3, 3]]) + labels_test = np.array([1, 0, 1]) + k = 2 + accuracy = knn.evaluate_knn(data_train, labels_train, data_test, labels_test, k) + assert 0 <= accuracy <= 1 + print("Test for evaluate_knn accuracy passed.") diff --git a/test/test_mlp.py b/test/test_mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..546d9f3f1959693a52f8de88f4f53f961af72415 --- /dev/null +++ b/test/test_mlp.py @@ -0,0 +1,39 @@ +import numpy as np + +# Importez les fonctions +from mlp import initialization, train_mlp, calculate_accuracy + +def test_mlp_training(): + # Paramètres du test + num_samples = 200 + num_features = 10 + num_classes = 3 + num_hidden_units = 5 + learning_rate = 0.1 + num_epochs = 10 + + # Générez des données factices pour le test + X_train = np.random.randn(num_samples, num_features) + y_train = np.random.randint(0, num_classes, num_samples) + X_test = np.random.randn(num_samples, num_features) + y_test = np.random.randint(0, num_classes, num_samples) + + # Initialisez les poids et les biais + W1, b1, W2, b2 = initialization(num_features, num_hidden_units, num_classes) + + # Entraînez le modèle + train_accuracies, test_accuracy = train_mlp(W1, b1, W2, b2, X_train, y_train, learning_rate, num_epochs) + + # Vérifiez si l'accuracy est un nombre entre 0 et 1 + assert 0 <= test_accuracy <= 1 + + # Vérifiez si la longueur de la liste des accuracies d'entraînement correspond au nombre d'époques + assert len(train_accuracies) == num_epochs + + # Vérifiez si les accuracies d'entraînement sont des nombres entre 0 et 1 + for accuracy in train_accuracies: + assert 0 <= accuracy <= 1 + + print("Tous les tests ont réussi avec succès.") + + diff --git a/test/test_read_cifar.py b/test/test_read_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..13080e1c90fb8307fb16d9c4776301c0995efded --- /dev/null +++ b/test/test_read_cifar.py @@ -0,0 +1,46 @@ +import numpy as np +import read_cifar as rc +from read_cifar import read_cifar_batch +from read_cifar import read_cifar +def test_read_cifar_batch(): + # Test read_cifar_batch function + batch_path = "data\data_batch_1" + data, labels = read_cifar_batch(batch_path) + + # Check that data has the right shape and type + assert data.shape == (10000, 3072) + assert data.dtype == np.float32 + + # Check that labels has the right shape and type + assert labels.shape == (10000,) + assert labels.dtype == np.int64 + print("All tests passed successfully.") + + + +def test_read_cifar(): + # Test read_cifar function + data, labels = read_cifar('data') + + # Check that data has the right shape and type + assert data.shape == (60000, 3072) + assert data.dtype == np.float32 + + # Check that labels has the right shape and type + assert labels.shape == (60000,) + assert labels.dtype == np.int64 + print("All tests passed successfully.") + +def test_split_dataset(): + data = np.random.randn(150, 4) + labels = np.random.randn(150) + split = 0.8 + data_train, labels_train, data_test, labels_test = rc.split_dataset(data, labels, split) + + total_size = data_train.shape[0] + data_test.shape[0] + + assert total_size == len(data) + assert len(labels_train) == len(data_train) + assert len(labels_test) == len(data_test) + + print("All tests passed successfully.")