Skip to content

Commit 54bc7b4

Browse files
committed
Added Pebblo Cloud Sample App.
1 parent 6e6640c commit 54bc7b4

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
credentials
2+
google_token.json
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from typing import List
2+
3+
from dotenv import load_dotenv
4+
from langchain.schema import Document
5+
from langchain_community.document_loaders import (
6+
GoogleDriveLoader,
7+
UnstructuredFileIOLoader,
8+
)
9+
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
10+
from langchain_community.vectorstores.qdrant import Qdrant
11+
from langchain_openai.embeddings import OpenAIEmbeddings
12+
13+
load_dotenv()
14+
15+
# Qdrant DB path
16+
QDRANT_PATH = "qdrant.db"
17+
# Qdrant DB collection name
18+
COLLECTION_NAME = "identity-enabled-rag"
19+
20+
21+
class DataLoader:
22+
def __init__(self, folder_id: str, collection_name: str = COLLECTION_NAME):
23+
self.app_name = "pebblo-cloud-sample-app"
24+
self.folder_id = folder_id
25+
self.qdrant_collection_name = collection_name
26+
27+
def load_documents(self):
28+
print("\nLoading RAG documents ...")
29+
loader = PebbloSafeLoader(
30+
GoogleDriveLoader(
31+
folder_id=self.folder_id,
32+
credentials_path="credentials/credentials.json",
33+
token_path="./google_token.json",
34+
recursive=True,
35+
file_loader_cls=UnstructuredFileIOLoader,
36+
file_loader_kwargs={"mode": "elements"},
37+
load_auth=True,
38+
),
39+
name=self.app_name, # App name (Mandatory)
40+
owner="Joe Smith", # Owner (Optional)
41+
description="SafeLoader and SafeRetrival app using Pebblo", # Description (Optional)
42+
)
43+
documents = loader.load()
44+
unique_identities = set()
45+
for doc in documents:
46+
unique_identities.update(doc.metadata.get("authorized_identities"))
47+
48+
print(f"Authorized Identities: {list(unique_identities)}")
49+
print(f"Loaded {len(documents)} documents ...\n")
50+
return documents
51+
52+
def add_docs_to_qdrant(self, documents: List[Document]):
53+
"""
54+
Load documents into Qdrant
55+
"""
56+
print("\nAdding documents to Qdrant ...")
57+
embeddings = OpenAIEmbeddings()
58+
vectordb = Qdrant.from_documents(
59+
documents,
60+
embeddings,
61+
path=QDRANT_PATH,
62+
collection_name=self.qdrant_collection_name,
63+
)
64+
print(f"Added {len(documents)} documents to Qdrant ...\n")
65+
return vectordb
66+
67+
68+
if __name__ == "__main__":
69+
print("Loading documents to Qdrant ...")
70+
def_folder_id = "<google_drive_folder_id>"
71+
input_collection_name = "identity-enabled-rag"
72+
73+
qloader = DataLoader(def_folder_id, input_collection_name)
74+
75+
result_documents = qloader.load_documents()
76+
77+
vectordb_obj = qloader.add_docs_to_qdrant(result_documents)

0 commit comments

Comments
 (0)