Skip to content

Commit c0589e3

Browse files
authored
Merge branch 'main' into main
2 parents e01125c + 65add33 commit c0589e3

File tree

6 files changed

+83
-13
lines changed

6 files changed

+83
-13
lines changed

pebblo/app/config/config.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ def get_default_config_values():
2222
# set default config value
2323
conf_obj = Config(
2424
daemon=DaemonConfig(host="localhost", port=8000),
25-
reports=ReportConfig(format="pdf", renderer="xhtml2pdf", cacheDir="~/.pebblo"),
26-
logging=LoggingConfig(),
27-
classifier=ClassifierConfig(
28-
mode=ClassificationMode.ALL.value, anonymizeSnippets=False
25+
reports=ReportConfig(
26+
format="pdf",
27+
renderer="xhtml2pdf",
28+
cacheDir="~/.pebblo",
29+
anonymizeSnippets=False,
2930
),
31+
logging=LoggingConfig(),
32+
classifier=ClassifierConfig(mode=ClassificationMode.ALL.value),
3033
storage=StorageConfig(type="file", db=None),
3134
# for now, a default storage type is FILE, but in the next release DB will be the default storage type.
3235
)
@@ -37,7 +40,7 @@ def load_config(path: Optional[str]) -> Tuple[dict, Config]:
3740
try:
3841
if not path:
3942
# If Path does not exist in command, set default config value
40-
get_default_config_values()
43+
return get_default_config_values()
4144

4245
# If Path exist, set config value
4346
try:
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# OpenAI credentials
22
OPENAI_API_KEY=<YOUR OPENAI API KEY>
33

4-
# Pebblo configuration
5-
PEBBLO_CLOUD_URL=<PEBBLO CLOUD URL>
6-
PEBBLO_API_KEY=<YOUR PEBBLO API KEY>
7-
PEBBLO_CLASSIFIER_URL="http://localhost:8000/"
8-
94
# Postgres configuration
105
PG_CONNECTION_STRING = "postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE-NAME>"
116

7+
# Pebblo configuration
8+
PEBBLO_CLASSIFIER_URL="http://localhost:8000/"
9+
# Optional (only if you are using Pebblo Cloud)
10+
PEBBLO_CLOUD_URL=<PEBBLO CLOUD URL>
11+
PEBBLO_API_KEY=<YOUR PEBBLO API KEY>
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Pebblo Text Loader
2+
3+
This is a sample application that demonstrates how to use the `Pebblo Text Loader` to load the text data
4+
with the `Pebblo Safe Loader` into `Postgres` Vector Database.
5+
6+
\* This solution uses predefined text data and metadata from the utility functions to demonstrate the loading of
7+
in-memory text data using Pebblo Safe Loader. Real-world applications can use this solution to load text data from
8+
various sources.
9+
10+
**PebbloTextLoader**: PebbloTextLoader is a loader for text data. Since PebbloSafeLoader is a wrapper around document
11+
loaders, this loader is used to load text data directly into Documents.
12+
13+
**This solution uses:**
14+
15+
- PostgreSQL 15.7
16+
- langchain-community from daxa-ai/langchain branch(pebblo-0.1.19)
17+
18+
### Instructions
19+
20+
1. Create Python virtual-env
21+
22+
```console
23+
$ python3 -m venv .venv
24+
$ source .venv/bin/activate
25+
```
26+
27+
2. Install dependencies
28+
29+
```console
30+
$ pip3 install -r requirements.txt
31+
```
32+
33+
3. Install langchain-community from the branch `pebblo-0.1.19`
34+
35+
```console
36+
$ git clone https://github.com/daxa-ai/langchain.git
37+
$ cd langchain
38+
$ git fetch && git checkout pebblo-0.1.19
39+
$ cd libs/community
40+
$ pip3 install langchain-community .
41+
```
42+
43+
4. Copy the `.env.sample` file to `.env` and populate the necessary environment variable. The `.env` file should look
44+
like this:
45+
46+
```console
47+
$ cat .env
48+
# OpenAI credentials
49+
OPENAI_API_KEY=<YOUR OPENAI API KEY>
50+
51+
# Postgres configuration
52+
PG_CONNECTION_STRING = "postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE-NAME>"
53+
```
54+
55+
5. Run Pebblo Safe Loader sample app
56+
57+
```console
58+
$ python3 pebblo_safeload.py
59+
```
60+
61+
6. Retrieve the Pebblo PDF report in `$HOME/.pebblo/pebblo-safe-loader-text-loader/pebblo_report.pdf` file path on the
62+
system where `Pebblo Server` is running.

pebblo_safeloader/langchain/textloader_postgress/pebblo_safeload.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(self, collection_name: str):
4848
description="Identity & Semantic enabled SafeLoader app using Pebblo", # Description (Optional)
4949
load_semantic=True,
5050
api_key=PEBBLO_API_KEY,
51+
anonymize_snippets=True,
5152
)
5253
self.documents = self.loader.load()
5354
unique_identities = set()

pebblo_safeloader/langchain/textloader_postgress/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ python-dotenv==1.0.0
22
tiktoken # OpenAI tokenizer
33

44
langchain-openai>=0.1.7 # For OpenAI LLM and OpenAIEmbeddings
5-
langchain-community>=0.2.16,<0.3 # for PebbloSafeLoader, PebbloRetrievalQA
5+
#langchain-community>=0.2.16,<0.3 # for PebbloSafeLoader, PebbloRetrievalQA
66

77
psycopg2-binary # For Postgres VectorStore
88
langchain-postgres # For Postgres VectorStore

pebblo_safeloader/langchain/textloader_postgress/util.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ def get_data(
4040
if metadatas:
4141
# Metadata(source: fake news web url) for each text
4242
_metadata_list = [
43-
{"source": f"https://www.acme.org/news/{i}"}
44-
for i in range(1, len(texts) + 1)
43+
{
44+
"source": f"https://www.acme.org/news/{i + 1}",
45+
"owner": "Joe Smith",
46+
"size": f"{len(texts[i])}",
47+
}
48+
for i in range(len(texts))
4549
]
4650
else:
4751
_metadata_list = None

0 commit comments

Comments
 (0)