Skip to content

Commit 6c3ed44

Browse files
Llamaindex RAG guide (#985)
* add terraform files * add k8s templates and cloudbuild file for image * add demo app * add README * fix typos in README * add description comment for terraform state bucket * remove duplicate output from the infrastructure module
1 parent eb7c6bf commit 6c3ed44

16 files changed

+1078
-0
lines changed

tutorials-and-examples/llamaindex/rag/README.md

+481
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM python:3.12-slim
2+
3+
WORKDIR /app/
4+
ADD requirements.txt /app/
5+
6+
RUN pip install --no-cache -r requirements.txt
7+
ADD . /app/
8+
9+
EXPOSE 8000
10+
11+
CMD ["uvicorn", "rag_demo.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import os
2+
import sys
3+
import pathlib
4+
5+
6+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7+
from llama_index.core.ingestion import (
8+
DocstoreStrategy,
9+
IngestionPipeline,
10+
IngestionCache,
11+
)
12+
from llama_index.storage.kvstore.redis import RedisKVStore as RedisCache
13+
from llama_index.storage.docstore.redis import RedisDocumentStore
14+
from llama_index.core.node_parser import SentenceSplitter
15+
from llama_index.vector_stores.redis import RedisVectorStore
16+
17+
from redisvl.schema import IndexSchema
18+
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
19+
20+
# Add rag_demo package to PYTHONPATH so this script can access it.
21+
sys.path.append(str(pathlib.Path(__file__).parent.parent.absolute()))
22+
from rag_demo import custom_schema, getenv_or_exit
23+
24+
25+
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-en-v1.5")
26+
REDIS_HOST = getenv_or_exit("REDIS_HOST")
27+
REDIS_PORT = int(os.getenv("REDIS_URL", "6379"))
28+
INPUT_DIR = getenv_or_exit("INPUT_DIR")
29+
30+
embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL_NAME)
31+
vector_store = RedisVectorStore(
32+
schema=custom_schema,
33+
redis_url=f"redis://{REDIS_HOST}",
34+
)
35+
36+
# Set up the ingestion cache layer
37+
cache = IngestionCache(
38+
cache=RedisCache.from_host_and_port(REDIS_HOST, REDIS_PORT),
39+
collection="redis_cache",
40+
)
41+
42+
pipeline = IngestionPipeline(
43+
transformations=[
44+
SentenceSplitter(),
45+
embed_model,
46+
],
47+
docstore=RedisDocumentStore.from_host_and_port(
48+
REDIS_HOST, REDIS_PORT, namespace="document_store"
49+
),
50+
vector_store=vector_store,
51+
cache=cache,
52+
docstore_strategy=DocstoreStrategy.UPSERTS,
53+
)
54+
55+
index = VectorStoreIndex.from_vector_store(
56+
pipeline.vector_store,
57+
embed_model=embed_model
58+
)
59+
60+
reader = SimpleDirectoryReader(input_dir=INPUT_DIR)
61+
62+
def load_data(reader: SimpleDirectoryReader):
63+
docs = reader.load_data()
64+
for doc in docs:
65+
doc.id_ = doc.metadata["file_path"]
66+
return docs
67+
68+
docs = load_data(reader)
69+
print(f"Loaded {len(docs)} docs")
70+
71+
nodes = pipeline.run(documents=docs, show_progress=True)
72+
print(f"Ingested {len(nodes)} Nodes")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
import logging
3+
4+
from redisvl.schema import IndexSchema
5+
6+
logger = logging.getLogger()
7+
8+
custom_schema = IndexSchema.from_dict(
9+
{
10+
"index": {"name": "bucket", "prefix": "doc"},
11+
# customize fields that are indexed
12+
"fields": [
13+
# required fields for llamaindex
14+
{"type": "tag", "name": "id"},
15+
{"type": "tag", "name": "doc_id"},
16+
{"type": "text", "name": "text"},
17+
# custom vector field for bge-small-en-v1.5 embeddings
18+
{
19+
"type": "vector",
20+
"name": "vector",
21+
"attrs": {
22+
"dims": 384,
23+
"algorithm": "hnsw",
24+
"distance_metric": "cosine",
25+
},
26+
},
27+
],
28+
}
29+
)
30+
31+
def getenv_or_exit(name: str) -> str:
32+
value = os.getenv(name)
33+
if value is None:
34+
logger.critical(f"The environment variable '{name}' is not specified")
35+
exit(1)
36+
37+
return value
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import os
2+
import logging
3+
4+
from llama_index.core import VectorStoreIndex
5+
from llama_index.vector_stores.redis import RedisVectorStore
6+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7+
from llama_index.llms.ollama import Ollama
8+
9+
from fastapi import FastAPI, Depends
10+
from fastapi.encoders import jsonable_encoder
11+
from fastapi.responses import JSONResponse
12+
13+
from rag_demo import custom_schema, getenv_or_exit
14+
15+
logger = logging.getLogger()
16+
17+
MODEL_NAME = getenv_or_exit("MODEL_NAME")
18+
EMBEDDING_MODEL_NAME= os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-en-v1.5")
19+
REDIS_HOST = getenv_or_exit("REDIS_HOST")
20+
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
21+
OLLAMA_SERVER_URL = getenv_or_exit("OLLAMA_SERVER_URL")
22+
23+
embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL_NAME)
24+
25+
# Connect to vector store with already ingested data
26+
vector_store = RedisVectorStore(
27+
schema=custom_schema,
28+
redis_url=f"redis://{REDIS_HOST}:{REDIS_PORT}",
29+
)
30+
# Create index from a vector store
31+
index = VectorStoreIndex.from_vector_store(
32+
vector_store, embed_model=embed_model
33+
)
34+
# Connect to LLM using Ollama
35+
llm = Ollama(
36+
model=MODEL_NAME,
37+
base_url=OLLAMA_SERVER_URL,
38+
)
39+
# Create query engine that is ready to query our RAG
40+
query_engine = index.as_query_engine(llm=llm)
41+
42+
def get_query_engine():
43+
return query_engine
44+
45+
app = FastAPI()
46+
47+
@app.get("/invoke")
48+
async def root(message: str, query_engine = Depends(get_query_engine)):
49+
response = query_engine.query(message)
50+
json_compatible_item_data = jsonable_encoder({"message": f"{response}"})
51+
return JSONResponse(content=json_compatible_item_data)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
llama-index-storage-docstore-redis==0.3.0
2+
llama-index-vector-stores-redis==0.4.0
3+
llama-index-embeddings-huggingface==0.5.0
4+
llama-index-readers-file==0.4.3
5+
llama-index-llms-ollama==0.5.0
6+
fastapi==0.115.6
7+
uvicorn==0.34.0
8+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
steps:
2+
- name: 'gcr.io/cloud-builders/docker'
3+
args: [ 'build', '-t', 'us-docker.pkg.dev/$PROJECT_ID/${_IMAGE_REPO_NAME}/llamaindex-rag-demo:latest', '.' ]
4+
images:
5+
- 'us-docker.pkg.dev/$PROJECT_ID/${_IMAGE_REPO_NAME}/llamaindex-rag-demo:latest'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: redis-stack
5+
namespace: default
6+
labels:
7+
app: redis-stack
8+
spec:
9+
selector:
10+
matchLabels:
11+
app: redis-stack
12+
template:
13+
metadata:
14+
labels:
15+
app: redis-stack
16+
spec:
17+
containers:
18+
- name: redis-stack
19+
image: redis/redis-stack:latest
20+
ports:
21+
- containerPort: 6379
22+
---
23+
apiVersion: v1
24+
kind: Service
25+
metadata:
26+
name: redis-stack-service
27+
spec:
28+
selector:
29+
app: redis-stack
30+
ports:
31+
- protocol: TCP
32+
port: 6379
33+
targetPort: 6379
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: llamaindex-ingest-data
5+
spec:
6+
template:
7+
metadata:
8+
labels:
9+
name: ingest-data
10+
annotations:
11+
gke-gcsfuse/volumes: 'true'
12+
spec:
13+
serviceAccount: ${SERVICE_ACCOUNT_NAME}
14+
containers:
15+
- name: ingest-data
16+
image: ${IMAGE_NAME}
17+
imagePullPolicy: Always
18+
command: ["python3", "cmd/ingest_data.py"]
19+
env:
20+
- name: REDIS_HOST
21+
value: redis-stack-service
22+
- name: REDIS_PORT
23+
value: "6379"
24+
- name: INPUT_DIR
25+
value: /datalake
26+
volumeMounts:
27+
- name: datalake
28+
mountPath: /datalake
29+
volumes:
30+
- name: datalake
31+
csi:
32+
driver: gcsfuse.csi.storage.gke.io
33+
volumeAttributes:
34+
bucketName: ${GCSFUSE_BUCKET_NAME}
35+
mountOptions: implicit-dirs,only-dir=datalake
36+
restartPolicy: Never
37+
backoffLimit: 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: ollama
5+
spec:
6+
selector:
7+
matchLabels:
8+
app: ollama
9+
template:
10+
metadata:
11+
labels:
12+
app: ollama
13+
annotations:
14+
gke-gcsfuse/volumes: 'true'
15+
spec:
16+
serviceAccount: ${SERVICE_ACCOUNT_NAME}
17+
nodeSelector:
18+
cloud.google.com/gke-accelerator: nvidia-l4
19+
containers:
20+
- name: ollama
21+
image: ollama/ollama:latest
22+
ports:
23+
- containerPort: 11434
24+
volumeMounts:
25+
- name: ollama-data
26+
mountPath: /root/.ollama/
27+
resources:
28+
limits:
29+
nvidia.com/gpu: 1
30+
volumes:
31+
- name: ollama-data
32+
csi:
33+
driver: gcsfuse.csi.storage.gke.io
34+
volumeAttributes:
35+
bucketName: ${GCSFUSE_BUCKET_NAME}
36+
mountOptions: implicit-dirs,only-dir=ollama
37+
---
38+
apiVersion: v1
39+
kind: Service
40+
metadata:
41+
name: ollama-service
42+
spec:
43+
selector:
44+
app: ollama
45+
ports:
46+
- protocol: TCP
47+
port: 11434
48+
targetPort: 11434
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: llamaindex-rag
5+
labels:
6+
app: llamaindex-rag
7+
spec:
8+
selector:
9+
matchLabels:
10+
app: llamaindex-rag
11+
template:
12+
metadata:
13+
labels:
14+
app: llamaindex-rag
15+
spec:
16+
containers:
17+
- name: llamaindex-rag
18+
image: ${IMAGE_NAME}
19+
imagePullPolicy: Always
20+
env:
21+
- name: MODEL_NAME
22+
value: ${MODEL_NAME}
23+
- name: REDIS_HOST
24+
value: redis-stack-service
25+
- name: REDIS_PORT
26+
value: "6379"
27+
- name: OLLAMA_SERVER_URL
28+
value: http://ollama-service:11434
29+
ports:
30+
- containerPort: 8000
31+
---
32+
apiVersion: v1
33+
kind: Service
34+
metadata:
35+
name: llamaindex-rag-service
36+
spec:
37+
selector:
38+
app: llamaindex-rag
39+
ports:
40+
- protocol: TCP
41+
port: 8000
42+
targetPort: 8000
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
## Optional gcs bucket configuration
2+
#terraform {
3+
# backend "gcs" {
4+
# bucket = "<bucket_name>"
5+
# prefix = "terraform/state/llamaindex"
6+
# }
7+
#}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
project_id = <project_id>
2+
default_resource_name = "llamaindex-rag-demo-tf"
3+
4+
cluster_name = "" # Leave empty to use the default name (default_resource_name)
5+
cluster_location = "us-central1"
6+
private_cluster = false
7+
autopilot_cluster = true
8+
9+
service_account_name = "" # Leave empty to use the default name
10+
bucket_name = "" # Leave empty to use the default name
11+
image_repository_name = "" # Leave empty to use the default name

0 commit comments

Comments
 (0)