Create chromadb_example.ipynb

97ab6e75 · Dellandrea Emmanuel · 136605a9 · 97ab6e75
Commit 97ab6e75 authored 1 month ago by Dellandrea Emmanuel
--- a/Practical_sessions/Project/chromadb_example.ipynb
+++ b/Practical_sessions/Project/chromadb_example.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Chroma database\n",
+    "\n",
+    "Chroma is an open-source vector database that is similar to Milvus and can be used with Windows systems. Here is an example of code illustrating its use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Installing the chromadb package\n",
+    "!pip install chromadb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Importing the necessary module\n",
+    "from chromadb import PersistentClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Creating a database client stored in the \"ragdb\" folder, or loading it if it already exists\n",
+    "client = PersistentClient(path=\"./ragdb\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Creating or loading a collection in ChromaDB\n",
+    "collection_name = \"my_rag_collection\"\n",
+    "try:\n",
+    "    collection = client.get_collection(name=collection_name)\n",
+    "except:\n",
+    "    collection = client.create_collection(name=collection_name) \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "# Load an embedding model\n",
+    "embedding_model = SentenceTransformer(\"BAAI/bge-small-en-v1.5\")\n",
+    "\n",
+    "# Define an embedding function\n",
+    "def text_embedding(text):\n",
+    "    return embedding_model.encode(text).tolist()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adding documents with their metadata and unique identifiers\n",
+    "documents = [\n",
+    "    \"The sun rises in the east and sets in the west.\",\n",
+    "    \"Raindrops create soothing sounds as they hit the ground.\",\n",
+    "    \"Stars twinkle brightly in the clear night sky.\",\n",
+    "    \"The ocean waves crash gently against the shore.\",\n",
+    "    \"Mountains stand tall and majestic, covered in snow.\",\n",
+    "    \"Birds chirp melodiously during the early morning hours.\",\n",
+    "    \"The forest is alive with the sounds of rustling leaves and wildlife.\",\n",
+    "    \"A gentle breeze flows through the meadow, carrying the scent of flowers.\"\n",
+    "]\n",
+    "embeddings = [text_embedding(document) for document in documents]\n",
+    "ids = [f\"{i}\" for i in range(len(documents))]\n",
+    "\n",
+    "collection.add(\n",
+    "    documents=documents,\n",
+    "    embeddings=embeddings,\n",
+    "    ids=ids\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Querying to find the documents most similar to a given phrase\n",
+    "query = \"What happens in the forest during the day?\"\n",
+    "# query = \"Describe how stars appear in a clear night sky.\"\n",
+    "\n",
+    "query_embedding = text_embedding(query)\n",
+    "\n",
+    "results = collection.query(\n",
+    "    query_embeddings=[query_embedding],\n",
+    "    n_results=2  # Number of desired similar results\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Displaying the results\n",
+    "for result in results['documents']:\n",
+    "    print(\"Similar document:\", result)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "td_llm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:markdown id: tags:
+
+# Chroma database
+
+Chroma is an open-source vector database that is similar to Milvus and can be used with Windows systems. Here is an example of code illustrating its use.
+
+%% Cell type:code id: tags:
+
+``` python
+# Installing the chromadb package
+!pip install chromadb
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Importing the necessary module
+from chromadb import PersistentClient
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Creating a database client stored in the "ragdb" folder, or loading it if it already exists
+client = PersistentClient(path="./ragdb")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Creating or loading a collection in ChromaDB
+collection_name = "my_rag_collection"
+try:
+    collection = client.get_collection(name=collection_name)
+except:
+    collection = client.create_collection(name=collection_name)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+from sentence_transformers import SentenceTransformer
+
+# Load an embedding model
+embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
+
+# Define an embedding function
+def text_embedding(text):
+    return embedding_model.encode(text).tolist()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Adding documents with their metadata and unique identifiers
+documents = [
+    "The sun rises in the east and sets in the west.",
+    "Raindrops create soothing sounds as they hit the ground.",
+    "Stars twinkle brightly in the clear night sky.",
+    "The ocean waves crash gently against the shore.",
+    "Mountains stand tall and majestic, covered in snow.",
+    "Birds chirp melodiously during the early morning hours.",
+    "The forest is alive with the sounds of rustling leaves and wildlife.",
+    "A gentle breeze flows through the meadow, carrying the scent of flowers."
+]
+embeddings = [text_embedding(document) for document in documents]
+ids = [f"{i}" for i in range(len(documents))]
+
+collection.add(
+    documents=documents,
+    embeddings=embeddings,
+    ids=ids
+)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Querying to find the documents most similar to a given phrase
+query = "What happens in the forest during the day?"
+# query = "Describe how stars appear in a clear night sky."
+
+query_embedding = text_embedding(query)
+
+results = collection.query(
+    query_embeddings=[query_embedding],
+    n_results=2  # Number of desired similar results
+)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# Displaying the results
+for result in results['documents']:
+    print("Similar document:", result)
+```