diff --git a/Practical_sessions/Project/chromadb_example.ipynb b/Practical_sessions/Project/chromadb_example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6360b0484c3b0f4d1e56809e6dc9828893b5af6d --- /dev/null +++ b/Practical_sessions/Project/chromadb_example.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chroma database\n", + "\n", + "Chroma is an open-source vector database that is similar to Milvus and can be used with Windows systems. Here is an example of code illustrating its use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Installing the chromadb package\n", + "!pip install chromadb" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing the necessary module\n", + "from chromadb import PersistentClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating a database client stored in the \"ragdb\" folder, or loading it if it already exists\n", + "client = PersistentClient(path=\"./ragdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating or loading a collection in ChromaDB\n", + "collection_name = \"my_rag_collection\"\n", + "try:\n", + " collection = client.get_collection(name=collection_name)\n", + "except:\n", + " collection = client.create_collection(name=collection_name) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "\n", + "# Load an embedding model\n", + "embedding_model = SentenceTransformer(\"BAAI/bge-small-en-v1.5\")\n", + "\n", + "# Define an embedding function\n", + "def text_embedding(text):\n", + " return embedding_model.encode(text).tolist()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Adding documents with their metadata and unique identifiers\n", + "documents = [\n", + " \"The sun rises in the east and sets in the west.\",\n", + " \"Raindrops create soothing sounds as they hit the ground.\",\n", + " \"Stars twinkle brightly in the clear night sky.\",\n", + " \"The ocean waves crash gently against the shore.\",\n", + " \"Mountains stand tall and majestic, covered in snow.\",\n", + " \"Birds chirp melodiously during the early morning hours.\",\n", + " \"The forest is alive with the sounds of rustling leaves and wildlife.\",\n", + " \"A gentle breeze flows through the meadow, carrying the scent of flowers.\"\n", + "]\n", + "embeddings = [text_embedding(document) for document in documents]\n", + "ids = [f\"{i}\" for i in range(len(documents))]\n", + "\n", + "collection.add(\n", + " documents=documents,\n", + " embeddings=embeddings,\n", + " ids=ids\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Querying to find the documents most similar to a given phrase\n", + "query = \"What happens in the forest during the day?\"\n", + "# query = \"Describe how stars appear in a clear night sky.\"\n", + "\n", + "query_embedding = text_embedding(query)\n", + "\n", + "results = collection.query(\n", + " query_embeddings=[query_embedding],\n", + " n_results=2 # Number of desired similar results\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the results\n", + "for result in results['documents']:\n", + " print(\"Similar document:\", result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "td_llm", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}