Skip to content

Added Pebblo Cloud Sample App #292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pebblo_cloud/langchain/rag-sample/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
credentials
google_token.json
79 changes: 79 additions & 0 deletions pebblo_cloud/langchain/rag-sample/rag_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
from typing import List

from dotenv import load_dotenv
from langchain.schema import Document
from langchain_community.document_loaders import (
GoogleDriveLoader,
UnstructuredFileIOLoader,
)
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
from langchain_community.vectorstores.qdrant import Qdrant
from langchain_openai.embeddings import OpenAIEmbeddings

load_dotenv()

# Qdrant DB path
QDRANT_PATH = "qdrant.db"
# Qdrant DB collection name
COLLECTION_NAME = "identity-enabled-rag"


class DataLoader:
def __init__(self, folder_id: str, collection_name: str = COLLECTION_NAME):
self.app_name = "pebblo-cloud-sample-app"
self.folder_id = folder_id
self.qdrant_collection_name = collection_name

def load_documents(self):
print("\nLoading RAG documents ...")
loader = PebbloSafeLoader(
GoogleDriveLoader(
folder_id=self.folder_id,
credentials_path="credentials/credentials.json",
token_path="./google_token.json",
recursive=True,
file_loader_cls=UnstructuredFileIOLoader,
file_loader_kwargs={"mode": "elements"},
load_auth=True,
),
name=self.app_name, # App name (Mandatory)
owner="Joe Smith", # Owner (Optional)
description="SafeLoader and SafeRetrival app using Pebblo", # Description (Optional)
api_key=os.environ.get("PEBBLO_API_KEY"), #Pebblo cloud API key
)
documents = loader.load()
unique_identities = set()
for doc in documents:
unique_identities.update(doc.metadata.get("authorized_identities"))

print(f"Authorized Identities: {list(unique_identities)}")
print(f"Loaded {len(documents)} documents ...\n")
return documents

def add_docs_to_qdrant(self, documents: List[Document]):
"""
Load documents into Qdrant
"""
print("\nAdding documents to Qdrant ...")
embeddings = OpenAIEmbeddings()
vectordb = Qdrant.from_documents(
documents,
embeddings,
path=QDRANT_PATH,
collection_name=self.qdrant_collection_name,
)
print(f"Added {len(documents)} documents to Qdrant ...\n")
return vectordb


if __name__ == "__main__":
print("Loading documents to Qdrant ...")
def_folder_id = "<google_drive_folder_id>"
input_collection_name = "identity-enabled-rag"

qloader = DataLoader(def_folder_id, input_collection_name)

result_documents = qloader.load_documents()

vectordb_obj = qloader.add_docs_to_qdrant(result_documents)