File tree Expand file tree Collapse file tree 5 files changed +75
-8
lines changed
pebblo_safeloader/langchain/textloader_postgress Expand file tree Collapse file tree 5 files changed +75
-8
lines changed Original file line number Diff line number Diff line change 1
1
# OpenAI credentials
2
2
OPENAI_API_KEY = <YOUR OPENAI API KEY>
3
3
4
- # Pebblo configuration
5
- PEBBLO_CLOUD_URL = <PEBBLO CLOUD URL>
6
- PEBBLO_API_KEY = <YOUR PEBBLO API KEY>
7
- PEBBLO_CLASSIFIER_URL = " http://localhost:8000/"
8
-
9
4
# Postgres configuration
10
5
PG_CONNECTION_STRING = " postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE-NAME>"
11
6
7
+ # Pebblo configuration
8
+ PEBBLO_CLASSIFIER_URL = " http://localhost:8000/"
9
+ # Optional (only if you are using Pebblo Cloud)
10
+ PEBBLO_CLOUD_URL = <PEBBLO CLOUD URL>
11
+ PEBBLO_API_KEY = <YOUR PEBBLO API KEY>
Original file line number Diff line number Diff line change
1
+ # Pebblo Text Loader
2
+
3
+ This is a sample application that demonstrates how to use the ` Pebblo Text Loader ` to load the text data
4
+ with the ` Pebblo Safe Loader ` into ` Postgres ` Vector Database.
5
+
6
+ \* This solution uses predefined text data and metadata from the utility functions to demonstrate the loading of
7
+ in-memory text data using Pebblo Safe Loader. Real-world applications can use this solution to load text data from
8
+ various sources.
9
+
10
+ ** PebbloTextLoader** : PebbloTextLoader is a loader for text data. Since PebbloSafeLoader is a wrapper around document
11
+ loaders, this loader is used to load text data directly into Documents.
12
+
13
+ ** This solution uses:**
14
+
15
+ - PostgreSQL 15.7
16
+ - langchain-community from daxa-ai/langchain branch(pebblo-0.1.19)
17
+
18
+ ### Instructions
19
+
20
+ 1 . Create Python virtual-env
21
+
22
+ ``` console
23
+ $ python3 -m venv .venv
24
+ $ source .venv/bin/activate
25
+ ```
26
+
27
+ 2 . Install dependencies
28
+
29
+ ``` console
30
+ $ pip3 install -r requirements.txt
31
+ ```
32
+
33
+ 3 . Install langchain-community from the branch ` pebblo-0.1.19 `
34
+
35
+ ``` console
36
+ $ git clone https://github.com/daxa-ai/langchain.git
37
+ $ cd langchain
38
+ $ git fetch && git checkout pebblo-0.1.19
39
+ $ cd libs/community
40
+ $ pip3 install langchain-community .
41
+ ```
42
+
43
+ 4 . Copy the ` .env.sample ` file to ` .env ` and populate the necessary environment variable. The ` .env ` file should look
44
+ like this:
45
+
46
+ ``` console
47
+ $ cat .env
48
+ # OpenAI credentials
49
+ OPENAI_API_KEY=<YOUR OPENAI API KEY>
50
+
51
+ # Postgres configuration
52
+ PG_CONNECTION_STRING = "postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE-NAME>"
53
+ ```
54
+
55
+ 5 . Run Pebblo Safe Loader sample app
56
+
57
+ ``` console
58
+ $ python3 pebblo_safeload.py
59
+ ```
60
+
61
+ 6 . Retrieve the Pebblo PDF report in ` $HOME/.pebblo/pebblo-safe-loader-text-loader/pebblo_report.pdf ` file path on the
62
+ system where ` Pebblo Server ` is running.
Original file line number Diff line number Diff line change @@ -48,6 +48,7 @@ def __init__(self, collection_name: str):
48
48
description = "Identity & Semantic enabled SafeLoader app using Pebblo" , # Description (Optional)
49
49
load_semantic = True ,
50
50
api_key = PEBBLO_API_KEY ,
51
+ anonymize_snippets = True ,
51
52
)
52
53
self .documents = self .loader .load ()
53
54
unique_identities = set ()
Original file line number Diff line number Diff line change @@ -2,7 +2,7 @@ python-dotenv==1.0.0
2
2
tiktoken # OpenAI tokenizer
3
3
4
4
langchain-openai >= 0.1.7 # For OpenAI LLM and OpenAIEmbeddings
5
- langchain-community >= 0.2.16 ,< 0.3 # for PebbloSafeLoader, PebbloRetrievalQA
5
+ # langchain-community>=0.2.16,<0.3 # for PebbloSafeLoader, PebbloRetrievalQA
6
6
7
7
psycopg2-binary # For Postgres VectorStore
8
8
langchain-postgres # For Postgres VectorStore
Original file line number Diff line number Diff line change @@ -40,8 +40,12 @@ def get_data(
40
40
if metadatas :
41
41
# Metadata(source: fake news web url) for each text
42
42
_metadata_list = [
43
- {"source" : f"https://www.acme.org/news/{ i } " }
44
- for i in range (1 , len (texts ) + 1 )
43
+ {
44
+ "source" : f"https://www.acme.org/news/{ i + 1 } " ,
45
+ "owner" : "Joe Smith" ,
46
+ "size" : f"{ len (texts [i ])} " ,
47
+ }
48
+ for i in range (len (texts ))
45
49
]
46
50
else :
47
51
_metadata_list = None
You can’t perform that action at this time.
0 commit comments