Llamaindex RAG guide (#985)

ArthurKamalov · web-flow · commit 6c3ed44b43e3 · 2025-03-10T15:42:32.000-07:00
* add terraform files

* add k8s templates and cloudbuild file for image

* add demo app

* add README

* fix typos in README

* add description comment for terraform state bucket

* remove duplicate output from the infrastructure module
diff --git a/tutorials-and-examples/llamaindex/rag/README.md b/tutorials-and-examples/llamaindex/rag/README.md
diff --git a/tutorials-and-examples/llamaindex/rag/app/Dockerfile b/tutorials-and-examples/llamaindex/rag/app/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.12-slim
+
+WORKDIR /app/
+ADD requirements.txt /app/
+
+RUN pip install --no-cache -r requirements.txt
+ADD . /app/
+
+EXPOSE 8000
+
+CMD ["uvicorn", "rag_demo.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
diff --git a/tutorials-and-examples/llamaindex/rag/app/cmd/ingest_data.py b/tutorials-and-examples/llamaindex/rag/app/cmd/ingest_data.py
@@ -0,0 +1,72 @@
+import os
+import sys
+import pathlib
+
+
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.ingestion import (
+    DocstoreStrategy,
+    IngestionPipeline,
+    IngestionCache,
+)
+from llama_index.storage.kvstore.redis import RedisKVStore as RedisCache
+from llama_index.storage.docstore.redis import RedisDocumentStore
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.vector_stores.redis import RedisVectorStore
+
+from redisvl.schema import IndexSchema
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
+
+# Add rag_demo package to PYTHONPATH so this script can access it.
+sys.path.append(str(pathlib.Path(__file__).parent.parent.absolute()))
+from rag_demo import custom_schema, getenv_or_exit 
+
+
+EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-en-v1.5")
+REDIS_HOST = getenv_or_exit("REDIS_HOST")
+REDIS_PORT = int(os.getenv("REDIS_URL", "6379"))
+INPUT_DIR = getenv_or_exit("INPUT_DIR")
+
+embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL_NAME)
+vector_store = RedisVectorStore(
+    schema=custom_schema,
+    redis_url=f"redis://{REDIS_HOST}",
+)
+
+# Set up the ingestion cache layer
+cache = IngestionCache(
+    cache=RedisCache.from_host_and_port(REDIS_HOST, REDIS_PORT),
+    collection="redis_cache",
+)
+
+pipeline = IngestionPipeline(
+    transformations=[
+        SentenceSplitter(),
+        embed_model,
+    ],
+    docstore=RedisDocumentStore.from_host_and_port(
+        REDIS_HOST, REDIS_PORT, namespace="document_store"
+    ),
+    vector_store=vector_store,
+    cache=cache,
+    docstore_strategy=DocstoreStrategy.UPSERTS,
+)
+
+index = VectorStoreIndex.from_vector_store(
+    pipeline.vector_store, 
+    embed_model=embed_model
+)
+
+reader = SimpleDirectoryReader(input_dir=INPUT_DIR)
+
+def load_data(reader: SimpleDirectoryReader):
+    docs = reader.load_data()
+    for doc in docs:
+        doc.id_ = doc.metadata["file_path"]
+    return docs
+
+docs = load_data(reader)
+print(f"Loaded {len(docs)} docs")
+
+nodes = pipeline.run(documents=docs, show_progress=True)
+print(f"Ingested {len(nodes)} Nodes")
diff --git a/tutorials-and-examples/llamaindex/rag/app/rag_demo/__init__.py b/tutorials-and-examples/llamaindex/rag/app/rag_demo/__init__.py
@@ -0,0 +1,37 @@
+import os
+import logging 
+
+from redisvl.schema import IndexSchema
+
+logger = logging.getLogger()
+
+custom_schema = IndexSchema.from_dict(
+    {
+        "index": {"name": "bucket", "prefix": "doc"},
+        # customize fields that are indexed
+        "fields": [
+            # required fields for llamaindex
+            {"type": "tag", "name": "id"},
+            {"type": "tag", "name": "doc_id"},
+            {"type": "text", "name": "text"},
+            # custom vector field for bge-small-en-v1.5 embeddings
+            {
+                "type": "vector",
+                "name": "vector",
+                "attrs": {
+                    "dims": 384,
+                    "algorithm": "hnsw",
+                    "distance_metric": "cosine",
+                },
+            },
+        ],
+    }
+)
+
+def getenv_or_exit(name: str) -> str:
+    value = os.getenv(name) 
+    if value is None:
+        logger.critical(f"The environment variable '{name}' is not specified")
+        exit(1)
+
+    return value
diff --git a/tutorials-and-examples/llamaindex/rag/app/rag_demo/main.py b/tutorials-and-examples/llamaindex/rag/app/rag_demo/main.py
@@ -0,0 +1,51 @@
+import os
+import logging
+
+from llama_index.core import VectorStoreIndex
+from llama_index.vector_stores.redis import RedisVectorStore
+from llama_index.embeddings.huggingface import  HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+
+from fastapi import FastAPI, Depends
+from fastapi.encoders import jsonable_encoder
+from fastapi.responses import JSONResponse
+
+from rag_demo import custom_schema, getenv_or_exit 
+
+logger = logging.getLogger()
+
+MODEL_NAME = getenv_or_exit("MODEL_NAME")
+EMBEDDING_MODEL_NAME= os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-en-v1.5")
+REDIS_HOST = getenv_or_exit("REDIS_HOST")
+REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
+OLLAMA_SERVER_URL = getenv_or_exit("OLLAMA_SERVER_URL")
+
+embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL_NAME)
+
+# Connect to vector store with already ingested data
+vector_store = RedisVectorStore(
+    schema=custom_schema,
+    redis_url=f"redis://{REDIS_HOST}:{REDIS_PORT}",
+)
+# Create index from a vector store
+index = VectorStoreIndex.from_vector_store(
+    vector_store, embed_model=embed_model
+)
+# Connect to LLM using Ollama
+llm = Ollama(
+    model=MODEL_NAME,
+    base_url=OLLAMA_SERVER_URL,
+)
+# Create query engine that is ready to query our RAG
+query_engine = index.as_query_engine(llm=llm)
+
+def get_query_engine():
+    return query_engine
+
+app = FastAPI()
+
+@app.get("/invoke")
+async def root(message: str, query_engine = Depends(get_query_engine)):
+    response = query_engine.query(message)
+    json_compatible_item_data = jsonable_encoder({"message": f"{response}"})
+    return JSONResponse(content=json_compatible_item_data)
diff --git a/tutorials-and-examples/llamaindex/rag/app/requirements.txt b/tutorials-and-examples/llamaindex/rag/app/requirements.txt
@@ -0,0 +1,8 @@
+llama-index-storage-docstore-redis==0.3.0
+llama-index-vector-stores-redis==0.4.0
+llama-index-embeddings-huggingface==0.5.0
+llama-index-readers-file==0.4.3
+llama-index-llms-ollama==0.5.0
+fastapi==0.115.6
+uvicorn==0.34.0
+
diff --git a/tutorials-and-examples/llamaindex/rag/cloudbuild.yaml b/tutorials-and-examples/llamaindex/rag/cloudbuild.yaml
@@ -0,0 +1,5 @@
+steps:
+- name: 'gcr.io/cloud-builders/docker'
+  args: [ 'build', '-t', 'us-docker.pkg.dev/$PROJECT_ID/${_IMAGE_REPO_NAME}/llamaindex-rag-demo:latest', '.' ]
+images:
+- 'us-docker.pkg.dev/$PROJECT_ID/${_IMAGE_REPO_NAME}/llamaindex-rag-demo:latest'
diff --git a/tutorials-and-examples/llamaindex/rag/redis-stack.yaml b/tutorials-and-examples/llamaindex/rag/redis-stack.yaml
@@ -0,0 +1,33 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: redis-stack
+  namespace: default
+  labels:
+    app: redis-stack
+spec:
+  selector:
+    matchLabels:
+      app: redis-stack
+  template:
+    metadata:
+      labels:
+        app: redis-stack
+    spec:
+      containers:
+      - name: redis-stack
+        image: redis/redis-stack:latest
+        ports:
+        - containerPort: 6379
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis-stack-service
+spec:
+  selector:
+    app: redis-stack
+  ports:
+  - protocol: TCP
+    port: 6379
+    targetPort: 6379
diff --git a/tutorials-and-examples/llamaindex/rag/templates/ingest-data-job.yaml b/tutorials-and-examples/llamaindex/rag/templates/ingest-data-job.yaml
@@ -0,0 +1,37 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: llamaindex-ingest-data
+spec:
+  template:
+    metadata:
+      labels:
+        name: ingest-data
+      annotations:
+        gke-gcsfuse/volumes: 'true'
+    spec:
+      serviceAccount: ${SERVICE_ACCOUNT_NAME}
+      containers:
+      - name: ingest-data
+        image:  ${IMAGE_NAME} 
+        imagePullPolicy: Always
+        command: ["python3", "cmd/ingest_data.py"]
+        env:
+        - name: REDIS_HOST
+          value: redis-stack-service
+        - name: REDIS_PORT
+          value: "6379"
+        - name: INPUT_DIR
+          value: /datalake
+        volumeMounts:
+        - name: datalake
+          mountPath: /datalake
+      volumes:
+      - name: datalake
+        csi:
+          driver: gcsfuse.csi.storage.gke.io
+          volumeAttributes:
+            bucketName: ${GCSFUSE_BUCKET_NAME}
+            mountOptions: implicit-dirs,only-dir=datalake
+      restartPolicy: Never
+  backoffLimit: 4
diff --git a/tutorials-and-examples/llamaindex/rag/templates/ollama-deployment.yaml b/tutorials-and-examples/llamaindex/rag/templates/ollama-deployment.yaml
@@ -0,0 +1,48 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+spec:
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+      annotations:
+        gke-gcsfuse/volumes: 'true'
+    spec:
+      serviceAccount: ${SERVICE_ACCOUNT_NAME}
+      nodeSelector:
+        cloud.google.com/gke-accelerator: nvidia-l4
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          ports:
+            - containerPort: 11434
+          volumeMounts:
+            - name: ollama-data
+              mountPath: /root/.ollama/
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+      volumes:
+        - name: ollama-data
+          csi:
+            driver: gcsfuse.csi.storage.gke.io
+            volumeAttributes:
+              bucketName: ${GCSFUSE_BUCKET_NAME}
+              mountOptions: implicit-dirs,only-dir=ollama
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+spec:
+  selector:
+    app: ollama
+  ports:
+    - protocol: TCP
+      port: 11434
+      targetPort: 11434
diff --git a/tutorials-and-examples/llamaindex/rag/templates/rag-deployment.yaml b/tutorials-and-examples/llamaindex/rag/templates/rag-deployment.yaml
@@ -0,0 +1,42 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamaindex-rag
+  labels:
+    app: llamaindex-rag
+spec:
+  selector:
+    matchLabels:
+      app: llamaindex-rag
+  template:
+    metadata:
+      labels:
+        app: llamaindex-rag
+    spec:
+      containers:
+        - name: llamaindex-rag
+          image:  ${IMAGE_NAME} 
+          imagePullPolicy: Always
+          env:
+            - name: MODEL_NAME
+              value: ${MODEL_NAME}
+            - name: REDIS_HOST
+              value: redis-stack-service
+            - name: REDIS_PORT
+              value: "6379"
+            - name: OLLAMA_SERVER_URL
+              value: http://ollama-service:11434
+          ports:
+            - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamaindex-rag-service
+spec:
+  selector:
+    app: llamaindex-rag
+  ports:
+    - protocol: TCP
+      port: 8000
+      targetPort: 8000
diff --git a/tutorials-and-examples/llamaindex/rag/terraform/backend.tf b/tutorials-and-examples/llamaindex/rag/terraform/backend.tf
@@ -0,0 +1,7 @@
+## Optional gcs bucket configuration
+#terraform {
+#  backend "gcs" {
+#    bucket = "<bucket_name>"
+#    prefix = "terraform/state/llamaindex"
+#  }
+#}
diff --git a/tutorials-and-examples/llamaindex/rag/terraform/default_env.tfvars b/tutorials-and-examples/llamaindex/rag/terraform/default_env.tfvars
@@ -0,0 +1,11 @@
+project_id            = <project_id>
+default_resource_name = "llamaindex-rag-demo-tf"
+
+cluster_name      = "" # Leave empty to use the default name (default_resource_name) 
+cluster_location  = "us-central1"
+private_cluster   = false
+autopilot_cluster = true
+
+service_account_name  = "" # Leave empty to use the default name
+bucket_name           = "" # Leave empty to use the default name
+image_repository_name = "" # Leave empty to use the default name
diff --git a/tutorials-and-examples/llamaindex/rag/terraform/main.tf b/tutorials-and-examples/llamaindex/rag/terraform/main.tf
diff --git a/tutorials-and-examples/llamaindex/rag/terraform/outputs.tf b/tutorials-and-examples/llamaindex/rag/terraform/outputs.tf
diff --git a/tutorials-and-examples/llamaindex/rag/terraform/variables.tf b/tutorials-and-examples/llamaindex/rag/terraform/variables.tf