Skip to content

Commit 64330ed

Browse files
Added Pebblo Cloud Sample App. (#292)
1 parent 6e6640c commit 64330ed

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
credentials
2+
google_token.json
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import os
2+
from typing import List
3+
4+
from dotenv import load_dotenv
5+
from langchain.schema import Document
6+
from langchain_community.document_loaders import (
7+
GoogleDriveLoader,
8+
UnstructuredFileIOLoader,
9+
)
10+
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
11+
from langchain_community.vectorstores.qdrant import Qdrant
12+
from langchain_openai.embeddings import OpenAIEmbeddings
13+
14+
load_dotenv()
15+
16+
# Qdrant DB path
17+
QDRANT_PATH = "qdrant.db"
18+
# Qdrant DB collection name
19+
COLLECTION_NAME = "identity-enabled-rag"
20+
21+
22+
class DataLoader:
23+
def __init__(self, folder_id: str, collection_name: str = COLLECTION_NAME):
24+
self.app_name = "pebblo-cloud-sample-app"
25+
self.folder_id = folder_id
26+
self.qdrant_collection_name = collection_name
27+
28+
def load_documents(self):
29+
print("\nLoading RAG documents ...")
30+
loader = PebbloSafeLoader(
31+
GoogleDriveLoader(
32+
folder_id=self.folder_id,
33+
credentials_path="credentials/credentials.json",
34+
token_path="./google_token.json",
35+
recursive=True,
36+
file_loader_cls=UnstructuredFileIOLoader,
37+
file_loader_kwargs={"mode": "elements"},
38+
load_auth=True,
39+
),
40+
name=self.app_name, # App name (Mandatory)
41+
owner="Joe Smith", # Owner (Optional)
42+
description="SafeLoader and SafeRetrival app using Pebblo", # Description (Optional)
43+
api_key=os.environ.get("PEBBLO_API_KEY"), #Pebblo cloud API key
44+
)
45+
documents = loader.load()
46+
unique_identities = set()
47+
for doc in documents:
48+
unique_identities.update(doc.metadata.get("authorized_identities"))
49+
50+
print(f"Authorized Identities: {list(unique_identities)}")
51+
print(f"Loaded {len(documents)} documents ...\n")
52+
return documents
53+
54+
def add_docs_to_qdrant(self, documents: List[Document]):
55+
"""
56+
Load documents into Qdrant
57+
"""
58+
print("\nAdding documents to Qdrant ...")
59+
embeddings = OpenAIEmbeddings()
60+
vectordb = Qdrant.from_documents(
61+
documents,
62+
embeddings,
63+
path=QDRANT_PATH,
64+
collection_name=self.qdrant_collection_name,
65+
)
66+
print(f"Added {len(documents)} documents to Qdrant ...\n")
67+
return vectordb
68+
69+
70+
if __name__ == "__main__":
71+
print("Loading documents to Qdrant ...")
72+
def_folder_id = "<google_drive_folder_id>"
73+
input_collection_name = "identity-enabled-rag"
74+
75+
qloader = DataLoader(def_folder_id, input_collection_name)
76+
77+
result_documents = qloader.load_documents()
78+
79+
vectordb_obj = qloader.add_docs_to_qdrant(result_documents)

0 commit comments

Comments
 (0)