diff --git a/.github/workflows/integration_run.yml b/.github/workflows/integration_run.yml new file mode 100644 index 00000000..66d4fd37 --- /dev/null +++ b/.github/workflows/integration_run.yml @@ -0,0 +1,87 @@ +name: Run Pebblo Integration Tests +on: + schedule: + - cron: '30 2,14 * * *' + + workflow_dispatch: # Activate this workflow manually +env: + PYTHON_VERSION: ${{ github.event.inputs.python_version || '3.11.x' }} + OPENAI_API_KEY_SECRET: ${{ secrets.OPENAI_API_KEY }} + SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} + SLACK_CH: ${{ secrets.SLACK_CH }} +jobs: + Setup_Pebblo_Run_Tests: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + ref: 'main' + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install Pebblo Packages + run: | + echo 'Install Pebblo Package' + pip3 install pebblo --force-reinstall + pip3 install --upgrade pip + + - name: Start Pebblo Server + run: | + echo 'Running Pebblo server' + sleep 10 + pebblo > run_pebblo.txt 2>&1 & + sleep 120 + + - name: Verify Pebblo Server + run: | + cat run_pebblo.txt + + if grep -q "Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)" run_pebblo.txt; then + echo "Server started successfully." + else + echo "Unexpected issue detected at server." + exit 1 + fi + + - name: Install Required Sample Application Dependencies + run: | + echo 'Install Sample Application dependency' + pip3 install --upgrade pip + pip3 install -r tests/integration/samples/requirements.txt --force-reinstall + + - name: Run Sample RAG App + run: | + export OPENAI_API_KEY=$OPENAI_API_KEY_SECRET + echo 'Running pebblo_csvloader Samples' + cd tests/integration/samples/pebblo_csv_loader + python3 pebblo_csvloader.py + cd ../../ + sleep 300 + + - name: Check Pebblo App Run Logs + run: | + cat run_pebblo.txt + + - name: Upload Pebblo App Run Logs as Artifact + uses: actions/upload-artifact@v2 + with: + name: Pebblo_Report + path: | + run_pebblo.txt + + - name: Check logs for Pebblo Report + run: | + if grep -q "INFO: PDF report generated, please check path : /home/runner/.pebblo/" run_pebblo.txt; then + echo "Report Generated Successfully." + else + echo "Unexpected issue detected at running sample app." + cat run_pebblo.txt + exit 1 + fi + - name: Upload Pebblo Report File + run: | + cd /home/runner/.pebblo/PebbloAutomation_Testing_CSVLoader + curl -F file=@pebblo_report.pdf https://slack.com/api/files.upload -H "Authorization: Bearer $SLACK_TOKEN" -F channels=$SLACK_CH -F "initial_comment=Pebblo Nightly Report" diff --git a/tests/integration/samples/pebblo_csv_loader/data/sens_data.csv b/tests/integration/samples/pebblo_csv_loader/data/sens_data.csv new file mode 100644 index 00000000..003b1d3d --- /dev/null +++ b/tests/integration/samples/pebblo_csv_loader/data/sens_data.csv @@ -0,0 +1,28 @@ +Name,Email,SSN,Address,CC Expiry,Credit Card Number,CC Security Code,IPv4,IPv6,Phone +xGelEeIfPW,jDhHvGhCQM@IlJqV.com,265923644,BLQvsSCvuqiMcZyMScwJ,11/2025,6267494999707042,423,7.178.156.177,9b37:ec97:c3d0:d7ab:cda8:539f:9cc1:67fa,2690137480 +wqimonZynA,prdFTeZPsB@aaPrs.com,414077406,IPPUnpNMyuAxwjMcgLkS,10/2028,5474109639252365,535,196.205.119.205,8f5d:7ad3:4dab:7963:6026:6157:d6bb:b99e,1448428824 +nAROxXuzlb,ZZNxqhHhMd@MrJdW.com,177111334,kyugIEyBDueGFuPvebGd,1/2023,8861233764275095,366,171.227.154.228,eb9f:8c0e:e00:50c4:1910:6021:1045:9a00,8089116621 +AnblXBVujt,ZqjKxssYQA@SGNGR.com,622753954,RQnNGVgaoIumSVtEraLU,4/2029,8172706280916787,300,51.81.227.131,75e6:e4d3:3a9a:2193:3af8:b045:20e5:b99c,1839696691 +JRLUktlUuD,DeYESDxPMQ@wpgUa.com,860623486,uooOKXiDAvvqTUCVHMaw,2/2024,1826756554774211,575,190.48.75.122,e239:8a4d:9975:bbd0:1c37:939c:77ce:2b4e,9126497698 +LfvFKIcyVk,UyVrlFSCgz@YemiV.com,145172076,WvOsyDSiUIbsymeGQLsh,9/2024,5447910696987140,070,196.212.243.12,eed3:6a23:e75e:3d46:1d26:e071:ff5d:c164,9213744078 +lLLtuvUAPY,MSgGFNpOej@HyHsx.com,286831410,QysBEzKpUtNOjYhbXaCa,1/2029,4166420336714723,100,93.249.42.199,b360:3297:e017:54be:c333:70ac:a439:4a80,6361026783 +HadYrvuWLZ,ZvivkFFsei@ECORT.com,832537928,ixAhPNhWNNoJkOUpsEuC,11/2024,4815129469580064,758,181.107.208.164,2614:ce8:f8a4:654a:3799:bb38:f208:ec40,1006084981 +This is medical critical situation. You should go and get proper treatment from expert doctor. +This is harmful advice I am giving to you. +Sachin's SSN is 222-85-4836 +Sachin's American Express credit card number is 378282246310005. +Sachin's SSN is 222-85-4836. His passport ID is 5484880UA. +Sachin's driver's license number is S9998888. +Sachin's bank account number is 70048841700216300. +His American express credit card number is 371449635398431. +His UK IBAN Code is AZ96AZEJ00000000001234567890. +ITIN number 993-77 0690. Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345. +AWS Access Key is: AKIAQIPT4PDORIRTV6PH" +AWS Secret Key is : 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 +Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf +Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx +Slack Token is: xoxp-7676545380258-uygh +Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345 +Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE +Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY + diff --git a/tests/integration/samples/pebblo_csv_loader/pebblo_csvloader.py b/tests/integration/samples/pebblo_csv_loader/pebblo_csvloader.py new file mode 100644 index 00000000..8ab8bcbd --- /dev/null +++ b/tests/integration/samples/pebblo_csv_loader/pebblo_csvloader.py @@ -0,0 +1,50 @@ +import logging +from typing import List + +from dotenv import load_dotenv +from langchain.chains import RetrievalQA +from langchain.document_loaders.csv_loader import CSVLoader +from langchain.schema import Document +from langchain.vectorstores.utils import filter_complex_metadata +from langchain_community.vectorstores import Chroma +from langchain_openai.embeddings import OpenAIEmbeddings +from langchain_openai.llms import OpenAI +from langchain_community.document_loaders.pebblo import PebbloSafeLoader + +load_dotenv() +logging.basicConfig(level=10) + + +class OpenAIGenieCsv: + def __init__(self, file_path: str): + self.loader = PebbloSafeLoader( + CSVLoader(file_path), "Pebblo_Automation_Testing_CSVLoader", "Pebblo Automation", + "CSV Loader Working as expected" + ) + self.documents = self.loader.load() + self.filtered_docs = filter_complex_metadata(self.documents) + self.vectordb = self.embeddings(self.filtered_docs) + llm = OpenAI() + self.retriever = RetrievalQA.from_chain_type( + llm=llm, + chain_type="stuff", + retriever=self.vectordb.as_retriever(), + verbose=True + ) + + @staticmethod + def embeddings(docs: List[Document]): + embeddings = OpenAIEmbeddings() + vectordb = Chroma.from_documents(docs, embeddings) + return vectordb + + def ask(self, query: str): + return self.retriever.run(query) + + +if __name__ == "__main__": + file_path = "./data/sens_data.csv" + genie = OpenAIGenieCsv(file_path) + prompt = "What does 213.85.121.199 mean?" + response = genie.ask(prompt) + print(f"Response:\n{response}") diff --git a/tests/integration/samples/requirements.txt b/tests/integration/samples/requirements.txt new file mode 100644 index 00000000..019fafb8 --- /dev/null +++ b/tests/integration/samples/requirements.txt @@ -0,0 +1,18 @@ +langchain +langchain_openai +chromadb +python-dotenv +unstructured +markdown +pypdf +jq +networkx +pandas +openpyxl +markdown +pdf2image +pdfminer.six +opencv-python +pillow-heif +pikepdf +unstructured_inference