Skip to content

Commit 709eb8b

Browse files
Integration tests (#192)
Added Integration Tests for Pebblo
1 parent 884c2cf commit 709eb8b

File tree

4 files changed

+183
-0
lines changed

4 files changed

+183
-0
lines changed

.github/workflows/integration_run.yml

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Run Pebblo Integration Tests
2+
on:
3+
schedule:
4+
- cron: '30 2,14 * * *'
5+
6+
workflow_dispatch: # Activate this workflow manually
7+
env:
8+
PYTHON_VERSION: ${{ github.event.inputs.python_version || '3.11.x' }}
9+
OPENAI_API_KEY_SECRET: ${{ secrets.OPENAI_API_KEY }}
10+
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
11+
SLACK_CH: ${{ secrets.SLACK_CH }}
12+
jobs:
13+
Setup_Pebblo_Run_Tests:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- name: Checkout Repository
17+
uses: actions/checkout@v2
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v4
21+
with:
22+
ref: 'main'
23+
python-version: ${{ env.PYTHON_VERSION }}
24+
25+
- name: Install Pebblo Packages
26+
run: |
27+
echo 'Install Pebblo Package'
28+
pip3 install pebblo --force-reinstall
29+
pip3 install --upgrade pip
30+
31+
- name: Start Pebblo Server
32+
run: |
33+
echo 'Running Pebblo server'
34+
sleep 10
35+
pebblo > run_pebblo.txt 2>&1 &
36+
sleep 120
37+
38+
- name: Verify Pebblo Server
39+
run: |
40+
cat run_pebblo.txt
41+
42+
if grep -q "Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)" run_pebblo.txt; then
43+
echo "Server started successfully."
44+
else
45+
echo "Unexpected issue detected at server."
46+
exit 1
47+
fi
48+
49+
- name: Install Required Sample Application Dependencies
50+
run: |
51+
echo 'Install Sample Application dependency'
52+
pip3 install --upgrade pip
53+
pip3 install -r tests/integration/samples/requirements.txt --force-reinstall
54+
55+
- name: Run Sample RAG App
56+
run: |
57+
export OPENAI_API_KEY=$OPENAI_API_KEY_SECRET
58+
echo 'Running pebblo_csvloader Samples'
59+
cd tests/integration/samples/pebblo_csv_loader
60+
python3 pebblo_csvloader.py
61+
cd ../../
62+
sleep 300
63+
64+
- name: Check Pebblo App Run Logs
65+
run: |
66+
cat run_pebblo.txt
67+
68+
- name: Upload Pebblo App Run Logs as Artifact
69+
uses: actions/upload-artifact@v2
70+
with:
71+
name: Pebblo_Report
72+
path: |
73+
run_pebblo.txt
74+
75+
- name: Check logs for Pebblo Report
76+
run: |
77+
if grep -q "INFO: PDF report generated, please check path : /home/runner/.pebblo/" run_pebblo.txt; then
78+
echo "Report Generated Successfully."
79+
else
80+
echo "Unexpected issue detected at running sample app."
81+
cat run_pebblo.txt
82+
exit 1
83+
fi
84+
- name: Upload Pebblo Report File
85+
run: |
86+
cd /home/runner/.pebblo/PebbloAutomation_Testing_CSVLoader
87+
curl -F file=@pebblo_report.pdf https://slack.com/api/files.upload -H "Authorization: Bearer $SLACK_TOKEN" -F channels=$SLACK_CH -F "initial_comment=Pebblo Nightly Report"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Name,Email,SSN,Address,CC Expiry,Credit Card Number,CC Security Code,IPv4,IPv6,Phone
2+
xGelEeIfPW,[email protected],265923644,BLQvsSCvuqiMcZyMScwJ,11/2025,6267494999707042,423,7.178.156.177,9b37:ec97:c3d0:d7ab:cda8:539f:9cc1:67fa,2690137480
3+
wqimonZynA,[email protected],414077406,IPPUnpNMyuAxwjMcgLkS,10/2028,5474109639252365,535,196.205.119.205,8f5d:7ad3:4dab:7963:6026:6157:d6bb:b99e,1448428824
4+
nAROxXuzlb,[email protected],177111334,kyugIEyBDueGFuPvebGd,1/2023,8861233764275095,366,171.227.154.228,eb9f:8c0e:e00:50c4:1910:6021:1045:9a00,8089116621
5+
AnblXBVujt,[email protected],622753954,RQnNGVgaoIumSVtEraLU,4/2029,8172706280916787,300,51.81.227.131,75e6:e4d3:3a9a:2193:3af8:b045:20e5:b99c,1839696691
6+
JRLUktlUuD,[email protected],860623486,uooOKXiDAvvqTUCVHMaw,2/2024,1826756554774211,575,190.48.75.122,e239:8a4d:9975:bbd0:1c37:939c:77ce:2b4e,9126497698
7+
LfvFKIcyVk,[email protected],145172076,WvOsyDSiUIbsymeGQLsh,9/2024,5447910696987140,070,196.212.243.12,eed3:6a23:e75e:3d46:1d26:e071:ff5d:c164,9213744078
8+
lLLtuvUAPY,[email protected],286831410,QysBEzKpUtNOjYhbXaCa,1/2029,4166420336714723,100,93.249.42.199,b360:3297:e017:54be:c333:70ac:a439:4a80,6361026783
9+
HadYrvuWLZ,[email protected],832537928,ixAhPNhWNNoJkOUpsEuC,11/2024,4815129469580064,758,181.107.208.164,2614:ce8:f8a4:654a:3799:bb38:f208:ec40,1006084981
10+
This is medical critical situation. You should go and get proper treatment from expert doctor.
11+
This is harmful advice I am giving to you.
12+
Sachin's SSN is 222-85-4836
13+
Sachin's American Express credit card number is 378282246310005.
14+
Sachin's SSN is 222-85-4836. His passport ID is 5484880UA.
15+
Sachin's driver's license number is S9998888.
16+
Sachin's bank account number is 70048841700216300.
17+
His American express credit card number is 371449635398431.
18+
His UK IBAN Code is AZ96AZEJ00000000001234567890.
19+
ITIN number 993-77 0690. Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345.
20+
AWS Access Key is: AKIAQIPT4PDORIRTV6PH"
21+
AWS Secret Key is : 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
22+
Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
23+
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
24+
Slack Token is: xoxp-7676545380258-uygh
25+
Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345
26+
Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE
27+
Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY
28+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import logging
2+
from typing import List
3+
4+
from dotenv import load_dotenv
5+
from langchain.chains import RetrievalQA
6+
from langchain.document_loaders.csv_loader import CSVLoader
7+
from langchain.schema import Document
8+
from langchain.vectorstores.utils import filter_complex_metadata
9+
from langchain_community.vectorstores import Chroma
10+
from langchain_openai.embeddings import OpenAIEmbeddings
11+
from langchain_openai.llms import OpenAI
12+
from langchain_community.document_loaders.pebblo import PebbloSafeLoader
13+
14+
load_dotenv()
15+
logging.basicConfig(level=10)
16+
17+
18+
class OpenAIGenieCsv:
19+
def __init__(self, file_path: str):
20+
self.loader = PebbloSafeLoader(
21+
CSVLoader(file_path), "Pebblo_Automation_Testing_CSVLoader", "Pebblo Automation",
22+
"CSV Loader Working as expected"
23+
)
24+
self.documents = self.loader.load()
25+
self.filtered_docs = filter_complex_metadata(self.documents)
26+
self.vectordb = self.embeddings(self.filtered_docs)
27+
llm = OpenAI()
28+
self.retriever = RetrievalQA.from_chain_type(
29+
llm=llm,
30+
chain_type="stuff",
31+
retriever=self.vectordb.as_retriever(),
32+
verbose=True
33+
)
34+
35+
@staticmethod
36+
def embeddings(docs: List[Document]):
37+
embeddings = OpenAIEmbeddings()
38+
vectordb = Chroma.from_documents(docs, embeddings)
39+
return vectordb
40+
41+
def ask(self, query: str):
42+
return self.retriever.run(query)
43+
44+
45+
if __name__ == "__main__":
46+
file_path = "./data/sens_data.csv"
47+
genie = OpenAIGenieCsv(file_path)
48+
prompt = "What does 213.85.121.199 mean?"
49+
response = genie.ask(prompt)
50+
print(f"Response:\n{response}")
+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
langchain
2+
langchain_openai
3+
chromadb
4+
python-dotenv
5+
unstructured
6+
markdown
7+
pypdf
8+
jq
9+
networkx
10+
pandas
11+
openpyxl
12+
markdown
13+
pdf2image
14+
pdfminer.six
15+
opencv-python
16+
pillow-heif
17+
pikepdf
18+
unstructured_inference

0 commit comments

Comments
 (0)