From 81a89251400860aa54852a206c345af7194f7241 Mon Sep 17 00:00:00 2001 From: Yograj Shisode Date: Fri, 9 Aug 2024 14:23:07 +0530 Subject: [PATCH 1/3] Added documentation for LlamaIndex Pebblo Safe DataReader --- docs/gh_pages/docs/llama_index_safe_reader.md | 61 +++++++++++++++++++ docs/gh_pages/sidebars.ts | 7 +++ 2 files changed, 68 insertions(+) create mode 100644 docs/gh_pages/docs/llama_index_safe_reader.md diff --git a/docs/gh_pages/docs/llama_index_safe_reader.md b/docs/gh_pages/docs/llama_index_safe_reader.md new file mode 100644 index 00000000..0ce8cb45 --- /dev/null +++ b/docs/gh_pages/docs/llama_index_safe_reader.md @@ -0,0 +1,61 @@ +# Pebblo Safe DataReader for LlamaIndex + +This document describes how to augment your existing Llama DocumentReader with Pebblo Safe DocumentReader to get deep data visibility on the types of Topics and Entities ingested into the Gen-AI Llama application. For details on `Pebblo Daemon` see this [pebblo server](daemon.md) document. + +Pebblo Safe DocumentReader enables safe data ingestion for Llama `DocumentReader`. This is done by wrapping the document reader call with `Pebblo Safe DocumentReader` + +## How to Pebblo enable Document Reading? + +Assume a Llama RAG application snippet using `CSVReader` to read a CSV document for inference. + +Here is the snippet of Document loading using `CSVReader` + +``` +from pathlib import Path +from llama_index.readers.file import CSVReader +reader = CSVReader() +documents = reader.load_data(file=Path('data/corp_sens_data.csv')) +print(documents) +``` + +The Pebblo SafeReader can be installed and enabled with few lines of code change to the above snippet. + +### Install PebbloSafeReader + +``` +pip install llama-index-readers-pebblo +``` + +### Use PebbloSafeReader + +``` +from pathlib import Path +from llama_index.readers.pebblo import PebbloSafeReader +from llama_index.readers.file import CSVReader +reader = CSVReader() +pebblo_reader = PebbloSafeReader(reader, + name="acme-corp-rag-1", # App name (Mandatory) + owner="Joe Smith", # Owner (Optional) + description="Support productivity RAG application"), + documents = pebblo_reader.load_data(file=Path('data/corp_sens_data.csv') +) +``` + +A data report with all the findings, both Topics and Entities, will be generated and available for inspection in the `Pebblo Server`. See this [pebblo server](daemon.md) for further details. + +Note: By default Pebblo Server runs at localhost:8000. If your Pebblo Server is running at some other location for eg. a docker container etc, put the correct URL in `PEBBLO_CLASSIFIER_URL` env variable. ref: [server-configurations](config.md#server) + +```bash +export PEBBLO_CLASSIFIER_URL="" +``` + +## Supported Document Readers + +The following LlamaIndex DocumentReaders are currently supported. + +1. PDFReader +1. DocxReader +1. CSVReader + + +> Note : _Most other LlamaIndex document readers that implement load_data() method should work. The above list indicates the ones that are explicitly tested. If you have successfully tested a particular DocumentReader other than this list above, please consider raising a PR. \ No newline at end of file diff --git a/docs/gh_pages/sidebars.ts b/docs/gh_pages/sidebars.ts index 5db5bc4b..2be0079e 100644 --- a/docs/gh_pages/sidebars.ts +++ b/docs/gh_pages/sidebars.ts @@ -61,6 +61,13 @@ const sidebars: SidebarsConfig = { id: "retrieval_chain", // document ID label: "Safe Retriever for Langchain", // sidebar label }, + { + type: "category", + label: "LlamaIndex", // sidebar label + items: [ + { type: "doc", label: "Safe DataReader", id: "llama_index_safe_reader" }, + ], + }, { type: "doc", id: "reports", // document ID From 1de4f410cae9b2009818836a75643bf9852b06b1 Mon Sep 17 00:00:00 2001 From: Yograj Shisode Date: Wed, 21 Aug 2024 10:27:25 +0530 Subject: [PATCH 2/3] Changed Llama application name --- docs/gh_pages/docs/llama_index_safe_reader.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/gh_pages/docs/llama_index_safe_reader.md b/docs/gh_pages/docs/llama_index_safe_reader.md index 0ce8cb45..ff533ebd 100644 --- a/docs/gh_pages/docs/llama_index_safe_reader.md +++ b/docs/gh_pages/docs/llama_index_safe_reader.md @@ -1,12 +1,12 @@ # Pebblo Safe DataReader for LlamaIndex -This document describes how to augment your existing Llama DocumentReader with Pebblo Safe DocumentReader to get deep data visibility on the types of Topics and Entities ingested into the Gen-AI Llama application. For details on `Pebblo Daemon` see this [pebblo server](daemon.md) document. +This document describes how to augment your existing Llama DocumentReader with Pebblo Safe DocumentReader to get deep data visibility on the types of Topics and Entities ingested into the Gen-AI LlamaIndex application. For details on `Pebblo Daemon` see this [pebblo server](daemon.md) document. Pebblo Safe DocumentReader enables safe data ingestion for Llama `DocumentReader`. This is done by wrapping the document reader call with `Pebblo Safe DocumentReader` ## How to Pebblo enable Document Reading? -Assume a Llama RAG application snippet using `CSVReader` to read a CSV document for inference. +Assume a LlamaIndex RAG application snippet using `CSVReader` to read a CSV document for inference. Here is the snippet of Document loading using `CSVReader` From 05a93373bf45b4d435f1807be587e77d71bf1af3 Mon Sep 17 00:00:00 2001 From: Rutuja Chaudhari <36963955+rutujaac@users.noreply.github.com> Date: Thu, 22 Aug 2024 10:09:36 +0530 Subject: [PATCH 3/3] Update llama_index_safe_reader.md change Llama to LlamaIndex --- docs/gh_pages/docs/llama_index_safe_reader.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/gh_pages/docs/llama_index_safe_reader.md b/docs/gh_pages/docs/llama_index_safe_reader.md index ff533ebd..7f43979d 100644 --- a/docs/gh_pages/docs/llama_index_safe_reader.md +++ b/docs/gh_pages/docs/llama_index_safe_reader.md @@ -1,8 +1,8 @@ # Pebblo Safe DataReader for LlamaIndex -This document describes how to augment your existing Llama DocumentReader with Pebblo Safe DocumentReader to get deep data visibility on the types of Topics and Entities ingested into the Gen-AI LlamaIndex application. For details on `Pebblo Daemon` see this [pebblo server](daemon.md) document. +This document describes how to augment your existing LlamaIndex DocumentReader with Pebblo Safe DocumentReader to get deep data visibility on the types of Topics and Entities ingested into the Gen-AI LlamaIndex application. For details on `Pebblo Daemon` see this [pebblo server](daemon.md) document. -Pebblo Safe DocumentReader enables safe data ingestion for Llama `DocumentReader`. This is done by wrapping the document reader call with `Pebblo Safe DocumentReader` +Pebblo Safe DocumentReader enables safe data ingestion for LlamaIndex `DocumentReader`. This is done by wrapping the document reader call with `Pebblo Safe DocumentReader` ## How to Pebblo enable Document Reading? @@ -58,4 +58,4 @@ The following LlamaIndex DocumentReaders are currently supported. 1. CSVReader -> Note : _Most other LlamaIndex document readers that implement load_data() method should work. The above list indicates the ones that are explicitly tested. If you have successfully tested a particular DocumentReader other than this list above, please consider raising a PR. \ No newline at end of file +> Note : _Most other LlamaIndex document readers that implement load_data() method should work. The above list indicates the ones that are explicitly tested. If you have successfully tested a particular DocumentReader other than this list above, please consider raising a PR.