From 19fdcf37be3a096300dac145b4b54c5a14e9742e Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 21 Apr 2025 14:06:47 +0800
Subject: [PATCH 1/8] Add sanity test script

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/SanityTest.py    | 222 ++++++++++++++++++++++
 scripts/SanityTestScript/test_queries.csv |   3 +
 2 files changed, 225 insertions(+)
 create mode 100644 scripts/SanityTestScript/SanityTest.py
 create mode 100644 scripts/SanityTestScript/test_queries.csv

diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
new file mode 100644
index 0000000000..3274efb4b6
--- /dev/null
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -0,0 +1,222 @@
+#  Copyright OpenSearch Contributors
+#  SPDX-License-Identifier: Apache-2.0
+
+import signal
+import sys
+import requests
+import json
+import csv
+import time
+import logging
+from datetime import datetime
+import pandas as pd
+import argparse
+from requests.auth import HTTPBasicAuth
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import threading
+
+"""
+Environment: python3
+
+Example to use this script:
+
+python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600
+
+The input file test_queries.csv should contain column: `query`
+
+For more details, please use command:
+
+python SanityTest.py --help
+
+"""
+
+class PPLTester:
+  def __init__(self, base_url, username, password, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level):
+    self.base_url = base_url
+    self.auth = HTTPBasicAuth(username, password)
+    self.headers = { 'Content-Type': 'application/json' }
+    self.max_workers = max_workers
+    self.check_interval = check_interval
+    self.timeout = timeout
+    self.output_file = output_file
+    self.start = start_row - 1 if start_row else None
+    self.end = end_row - 1 if end_row else None
+    self.log_level = log_level
+    self.max_attempts = (int)(timeout / check_interval)
+    self.logger = self._setup_logger()
+    self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
+    self.thread_local = threading.local()
+    self.test_results = []
+
+  def _setup_logger(self):
+    logger = logging.getLogger('PPLTester')
+    logger.setLevel(self.log_level)
+
+    fh = logging.FileHandler('flint_test.log')
+    fh.setLevel(self.log_level)
+
+    ch = logging.StreamHandler()
+    ch.setLevel(self.log_level)
+
+    formatter = logging.Formatter(
+      '%(asctime)s - %(threadName)s - %(levelname)s - %(message)s'
+    )
+    fh.setFormatter(formatter)
+    ch.setFormatter(formatter)
+
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+
+    return logger
+
+
+  # Call submit API to submit the query
+  def submit_query(self, query):
+    url = f"{self.base_url}/_plugins/_ppl"
+    response_json = None
+    payload = {
+      "query": query,
+    }
+    try:
+      response = requests.post(url, auth=self.auth, headers=self.headers, json=payload)
+      print(response)
+      response_json = response.json()
+      response.raise_for_status()
+      return response_json
+    except Exception as e:
+      return {"error": str(e), "response": response_json}
+
+  # Run the test and return the result
+  def run_test(self, query, seq_id, expected_status):
+    self.logger.info(f"Starting test: {seq_id}, {query}")
+    start_time = datetime.now()
+    submit_result = self.submit_query(query)
+    if "error" in submit_result:
+      self.logger.warning(f"Submit error: {submit_result}")
+      return {
+        "query_name": seq_id,
+        "query": query,
+        "expected_status": expected_status,
+        "status": "SUBMIT_FAILED",
+        "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None,
+        "error": submit_result["response"]["error"],
+        "duration": 0,
+        "start_time": start_time,
+        "end_time": datetime.now()
+      }
+
+    self.logger.debug(f"Submit return: {submit_result}")
+
+    # TODO: need to check results
+    end_time = datetime.now()
+    duration = (end_time - start_time).total_seconds()
+
+    return {
+      "query_name": seq_id,
+      "query": query,
+      "expected_status": expected_status,
+      "status": "SUCCESS",
+      "check_status": expected_status == "SUCCESS",
+      "error": None,
+      "result": submit_result,
+      "duration": duration,
+      "start_time": start_time,
+      "end_time": end_time
+    }
+
+
+  def run_tests_from_csv(self, csv_file):
+    with open(csv_file, 'r') as f:
+      reader = csv.DictReader(f)
+      queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]
+
+    # Filtering queries based on start and end
+    queries = queries[self.start:self.end]
+
+    # Parallel execution
+    futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries]
+    for future in as_completed(futures):
+      result = future.result()
+      self.test_results.append(result)
+
+  def generate_report(self):
+    self.logger.info("Generating report...")
+    total_queries = len(self.test_results)
+    successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS')
+    failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED')
+    submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED')
+    timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT')
+
+    # Create report
+    report = {
+      "summary": {
+        "total_queries": total_queries,
+        "successful_queries": successful_queries,
+        "failed_queries": failed_queries,
+        "submit_failed_queries": submit_failed_queries,
+        "timeout_queries": timeout_queries,
+        "execution_time": sum(r['duration'] for r in self.test_results)
+      },
+      "detailed_results": self.test_results
+    }
+
+    # Save report to JSON file
+    with open(f"{self.output_file}.json", 'w') as f:
+      json.dump(report, f, indent=2, default=str)
+
+    # Save reults to Excel file
+    df = pd.DataFrame(self.test_results)
+    df.to_excel(f"{self.output_file}.xlsx", index=False)
+
+    self.logger.info(f"Generated report in {self.output_file}.xlsx and {self.output_file}.json")
+
+def signal_handler(sig, frame, tester):
+  print(f"Signal {sig} received, generating report...")
+  try:
+    tester.executor.shutdown(wait=False, cancel_futures=True)
+    tester.generate_report()
+  finally:
+    sys.exit(0)
+
+def main():
+  # Parse command line arguments
+  parser = argparse.ArgumentParser(description="Run tests from a CSV file and generate a report.")
+  parser.add_argument("--base-url", required=True, help="Base URL of the service")
+  parser.add_argument("--username", required=True, help="Username for authentication")
+  parser.add_argument("--password", required=True, help="Password for authentication")
+  parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries")
+  parser.add_argument("--output-file", required=True, help="Path to the output report file")
+  parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)")
+  parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)")
+  parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)")
+  parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1")
+  parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included")
+  parser.add_argument("--log-level", default="INFO", help="optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO)")
+
+  args = parser.parse_args()
+
+  tester = PPLTester(
+    base_url=args.base_url,
+    username=args.username,
+    password=args.password,
+    max_workers=args.max_workers,
+    check_interval=args.check_interval,
+    timeout=args.timeout,
+    output_file=args.output_file,
+    start_row=args.start_row,
+    end_row=args.end_row,
+    log_level=args.log_level,
+  )
+
+  # Register signal handlers to generate report on interrupt
+  signal.signal(signal.SIGINT, lambda sig, frame: signal_handler(sig, frame, tester))
+  signal.signal(signal.SIGTERM, lambda sig, frame: signal_handler(sig, frame, tester))
+
+  # Running tests
+  tester.run_tests_from_csv(args.input_csv)
+
+  # Gnerate report
+  tester.generate_report()
+
+if __name__ == "__main__":
+  main()
diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv
new file mode 100644
index 0000000000..6fb55d0bf1
--- /dev/null
+++ b/scripts/SanityTestScript/test_queries.csv
@@ -0,0 +1,3 @@
+,query,expected_status,,,,
+,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType","SUCCESS",,,
+,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,,

From 807540286530b09609f4dfe2b6a0c7585d498511 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 21 Apr 2025 14:23:51 +0800
Subject: [PATCH 2/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/SanityTest.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index 3274efb4b6..a7dfa14a37 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -6,7 +6,6 @@
 import requests
 import json
 import csv
-import time
 import logging
 from datetime import datetime
 import pandas as pd
@@ -20,7 +19,7 @@
 
 Example to use this script:
 
-python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600
+python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2
 
 The input file test_queries.csv should contain column: `query`
 
@@ -31,18 +30,16 @@
 """
 
 class PPLTester:
-  def __init__(self, base_url, username, password, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level):
+  def __init__(self, base_url, username, password, max_workers, timeout, output_file, start_row, end_row, log_level):
     self.base_url = base_url
     self.auth = HTTPBasicAuth(username, password)
     self.headers = { 'Content-Type': 'application/json' }
     self.max_workers = max_workers
-    self.check_interval = check_interval
     self.timeout = timeout
     self.output_file = output_file
     self.start = start_row - 1 if start_row else None
     self.end = end_row - 1 if end_row else None
     self.log_level = log_level
-    self.max_attempts = (int)(timeout / check_interval)
     self.logger = self._setup_logger()
     self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
     self.thread_local = threading.local()
@@ -78,8 +75,7 @@ def submit_query(self, query):
       "query": query,
     }
     try:
-      response = requests.post(url, auth=self.auth, headers=self.headers, json=payload)
-      print(response)
+      response = requests.post(url, auth=self.auth, headers=self.headers, json=payload, timeout=self.timeout)
       response_json = response.json()
       response.raise_for_status()
       return response_json
@@ -99,7 +95,7 @@ def run_test(self, query, seq_id, expected_status):
         "expected_status": expected_status,
         "status": "SUBMIT_FAILED",
         "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None,
-        "error": submit_result["response"]["error"],
+        "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"],
         "duration": 0,
         "start_time": start_time,
         "end_time": datetime.now()
@@ -187,7 +183,6 @@ def main():
   parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries")
   parser.add_argument("--output-file", required=True, help="Path to the output report file")
   parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)")
-  parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)")
   parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)")
   parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1")
   parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included")
@@ -200,7 +195,6 @@ def main():
     username=args.username,
     password=args.password,
     max_workers=args.max_workers,
-    check_interval=args.check_interval,
     timeout=args.timeout,
     output_file=args.output_file,
     start_row=args.start_row,

From 93764025acb02244318f8a121d7e184ad883dbbf Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 21 Apr 2025 15:20:13 +0800
Subject: [PATCH 3/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/SanityTest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index a7dfa14a37..550cbb7119 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -93,8 +93,8 @@ def run_test(self, query, seq_id, expected_status):
         "query_name": seq_id,
         "query": query,
         "expected_status": expected_status,
-        "status": "SUBMIT_FAILED",
-        "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None,
+        "status": "FAILED",
+        "check_status": "FAILED" == expected_status if expected_status else None,
         "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"],
         "duration": 0,
         "start_time": start_time,
@@ -141,12 +141,14 @@ def generate_report(self):
     successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS')
     failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED')
     submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED')
+    check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False)
     timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT')
 
     # Create report
     report = {
       "summary": {
         "total_queries": total_queries,
+        "check_failed": check_failed_queries,
         "successful_queries": successful_queries,
         "failed_queries": failed_queries,
         "submit_failed_queries": submit_failed_queries,

From 83a68e0bfc3745310131700749ea46eae08fdfc5 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 21 Apr 2025 15:25:50 +0800
Subject: [PATCH 4/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/SanityTest.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index 550cbb7119..e0c260c859 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -95,7 +95,11 @@ def run_test(self, query, seq_id, expected_status):
         "expected_status": expected_status,
         "status": "FAILED",
         "check_status": "FAILED" == expected_status if expected_status else None,
-        "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"],
+        "error": {
+          "error_message": submit_result.get("error"),
+          "response_error": submit_result.get("response", {}).get("error")
+        },
+        # "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"],
         "duration": 0,
         "start_time": start_time,
         "end_time": datetime.now()

From 3f8299159cbaf2af8c7901ac4a162416eccef9ed Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 21 Apr 2025 15:41:13 +0800
Subject: [PATCH 5/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/SanityTest.py    | 3 ++-
 scripts/SanityTestScript/test_queries.csv | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index e0c260c859..f568cff294 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -116,7 +116,8 @@ def run_test(self, query, seq_id, expected_status):
       "query": query,
       "expected_status": expected_status,
       "status": "SUCCESS",
-      "check_status": expected_status == "SUCCESS",
+      "check_status": expected_status == "SUCCESS" if expected_status else True,
+      # "check_status": expected_status == "SUCCESS",
       "error": None,
       "result": submit_result,
       "duration": duration,
diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv
index 6fb55d0bf1..b74720b985 100644
--- a/scripts/SanityTestScript/test_queries.csv
+++ b/scripts/SanityTestScript/test_queries.csv
@@ -1,3 +1,4 @@
 ,query,expected_status,,,,
-,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType","SUCCESS",,,
+,"source = http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10","SUCCESS",,,
+,"source = http_logs | dedup status, size | head 10	",,,,
 ,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,,

From b7419229e54dd2d3147401bfdb4d6f7971f38b88 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Tue, 22 Apr 2025 16:32:07 +0800
Subject: [PATCH 6/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/README.md        | 141 ++++++++++++++++++++++
 scripts/SanityTestScript/SanityTest.py    |  58 ++++-----
 scripts/SanityTestScript/test_queries.csv |   7 +-
 3 files changed, 167 insertions(+), 39 deletions(-)
 create mode 100644 scripts/SanityTestScript/README.md

diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md
new file mode 100644
index 0000000000..17a0cdeec3
--- /dev/null
+++ b/scripts/SanityTestScript/README.md
@@ -0,0 +1,141 @@
+# Sanity Test Script
+
+### Description
+This Python script executes test queries from a CSV file using an asynchronous query API and generates comprehensive test reports.
+
+The script produces two report types:
+1. An Excel report with detailed test information for each query
+2. A JSON report containing both test result overview and query-specific details
+
+Apart from the basic feature, it also has some advanced functionality includes:
+1. Concurrent query execution (note: the async query service has session limits, so use thread workers moderately despite it already supports session ID reuse)
+2. Configurable query timeout for the requests
+3. Flexible row selection from the input CSV file, by specifying start row and end row of the input CSV file.
+4. Expected status validation when expected_status is present in the CSV
+5. Ability to generate partial reports if testing is interrupted
+
+### Usage
+To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries:
+```shell
+pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf
+```
+
+After getting the requisite libraries, you can run the script with the following command line parameters in your shell:
+```shell
+python SanityTest.py --base-url ${BASE_URL} --username *** --password *** --input-csv test_queries.csv --output-file test_report
+```
+You need to replace the placeholders with your actual values of BASE_URL, and USERNAME, PASSWORD for authentication to your endpoint.
+
+Running against the localhost cluster, `BASE_URL` should be set to `http://localhost:9200`. You can launch an OpenSearch cluster with SQL plugin locally in opensearch-sql repo directory with command:
+```shell
+./gradlew run
+```
+
+For more details of the command line parameters, you can see the help manual via command:
+```shell
+python SanityTest.py --help   
+
+usage: SanityTest.py [-h] --spark-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE
+                                      [--max-workers MAX_WORKERS] [--timeout TIMEOUT]
+                                      [--start-row START_ROW] [--end-row END_ROW]
+                                      [--log-level LOG_LEVEL]
+
+Run tests from a CSV file and generate a report.
+
+options:
+  -h, --help            show this help message and exit
+  --base-url BASE_URL  OpenSearch Cluster Connect URL of the service
+  --username USERNAME   Username for authentication
+  --password PASSWORD   Password for authentication
+  --input-csv INPUT_CSV
+                        Path to the CSV file containing test queries
+  --output-file OUTPUT_FILE
+                        Path to the output report file
+  --max-workers MAX_WORKERS
+                        optional, Maximum number of worker threads (default: 2)
+  --timeout TIMEOUT     optional, Timeout in seconds (default: 600)
+  --start-row START_ROW
+                        optional, The start row of the query to run, start from 1
+  --end-row END_ROW     optional, The end row of the query to run, not included
+  --log-level LOG_LEVEL
+                        optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO)
+```
+
+### Input CSV File
+As claimed in the description, the input CSV file should at least have the column of `query` to run the tests. It also supports an optional column of `expected_status` whose value could be `SUCCESS`, `FAILED` or `TIMEOUT`. The script will check the actual status against the expected status and generate a new column of `check_status` for the check result -- TRUE means the status check passed; FALSE means the status check failed.
+
+#### JSON Report
+The JSON report provides the same information as the Excel report. But in JSON format, additionally, it includes a statistical summary of the test results at the beginning of the report.
+
+An example of JSON report:
+```json
+{
+  "summary": {
+    "total_queries": 2,
+    "check_failed": 0,
+    "successful_queries": 1,
+    "failed_queries": 1,
+    "timeout_queries": 0,
+    "execution_time": 1.245655
+  },
+  "detailed_results": [
+    {
+      "seq_id": 1,
+      "query_name": "Successful Demo",
+      "query": "source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType | stats count() by patterns_field",
+      "expected_status": "SUCCESS",
+      "status": "SUCCESS",
+      "duration": 0.843509,
+      "start_time": "2025-04-22 16:30:22.461069",
+      "end_time": "2025-04-22 16:30:23.304578",
+      "result": {
+        "schema": [
+          {
+            "name": "count()",
+            "type": "int"
+          },
+          {
+            "name": "patterns_field",
+            "type": "string"
+          }
+        ],
+        "datarows": [
+          [
+            9311,
+            " "
+          ],
+          [
+            689,
+            "  "
+          ]
+        ],
+        "total": 2,
+        "size": 2
+      },
+      "check_status": true
+    },
+    {
+      "seq_id": 2,
+      "query_name": "Failed Demo",
+      "query": "source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType | stats count() by patterns_field",
+      "expected_status": "FAILED",
+      "status": "FAILED",
+      "duration": 0.402146,
+      "start_time": "2025-04-22 16:30:22.461505",
+      "end_time": "2025-04-22 16:30:22.863651",
+      "error": {
+        "error": "404 Client Error: Not Found for url: http://k8s-calcitep-opensear-8312a971dd-1309739395.us-west-1.elb.amazonaws.com/_plugins/_ppl",
+        "response": {
+          "error": {
+            "reason": "Error occurred in OpenSearch engine: no such index [opensearch_dashboards_sample_data_flights_2]",
+            "details": "[opensearch_dashboards_sample_data_flights_2] IndexNotFoundException[no such index [opensearch_dashboards_sample_data_flights_2]]\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.",
+            "type": "IndexNotFoundException"
+          },
+          "status": 404
+        }
+      },
+      "check_status": true
+    }
+  ]
+}
+```
diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index f568cff294..bace3e7731 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -83,59 +83,49 @@ def submit_query(self, query):
       return {"error": str(e), "response": response_json}
 
   # Run the test and return the result
-  def run_test(self, query, seq_id, expected_status):
+  def run_test(self, query, seq_id, query_name, expected_status):
     self.logger.info(f"Starting test: {seq_id}, {query}")
     start_time = datetime.now()
     submit_result = self.submit_query(query)
-    if "error" in submit_result:
-      self.logger.warning(f"Submit error: {submit_result}")
-      return {
-        "query_name": seq_id,
-        "query": query,
-        "expected_status": expected_status,
-        "status": "FAILED",
-        "check_status": "FAILED" == expected_status if expected_status else None,
-        "error": {
-          "error_message": submit_result.get("error"),
-          "response_error": submit_result.get("response", {}).get("error")
-        },
-        # "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"],
-        "duration": 0,
-        "start_time": start_time,
-        "end_time": datetime.now()
-      }
-
-    self.logger.debug(f"Submit return: {submit_result}")
-
-    # TODO: need to check results
     end_time = datetime.now()
     duration = (end_time - start_time).total_seconds()
 
-    return {
-      "query_name": seq_id,
+    status = "SUCCESS"
+
+    result = {
+      "seq_id": seq_id,
+      "query_name": query_name,
       "query": query,
       "expected_status": expected_status,
-      "status": "SUCCESS",
-      "check_status": expected_status == "SUCCESS" if expected_status else True,
-      # "check_status": expected_status == "SUCCESS",
-      "error": None,
-      "result": submit_result,
+      "status": status,
       "duration": duration,
       "start_time": start_time,
       "end_time": end_time
     }
 
+    if "error" in submit_result:
+      self.logger.warning(f"Submit query with error: {submit_result}")
+      status = "TIMEOUT" if "read timed out" in submit_result.get("error", "").lower() else "FAILED"
+      result["status"] = status
+      result["error"] = submit_result
+    else:
+      self.logger.debug(f"Submit return: {submit_result}")
+      result["result"] = submit_result
+
+    check_status = status.lower() == expected_status.lower() if expected_status else False
+    result["check_status"] = check_status
+    return result
 
   def run_tests_from_csv(self, csv_file):
     with open(csv_file, 'r') as f:
       reader = csv.DictReader(f)
-      queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]
+      queries = [(row['query'], i, row.get('query_name', f'Q{i}'), row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]
 
     # Filtering queries based on start and end
     queries = queries[self.start:self.end]
 
     # Parallel execution
-    futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries]
+    futures = [self.executor.submit(self.run_test, query, seq_id, query_name, expected_status) for query, seq_id, query_name, expected_status in queries]
     for future in as_completed(futures):
       result = future.result()
       self.test_results.append(result)
@@ -143,10 +133,9 @@ def run_tests_from_csv(self, csv_file):
   def generate_report(self):
     self.logger.info("Generating report...")
     total_queries = len(self.test_results)
+    check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False)
     successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS')
     failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED')
-    submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED')
-    check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False)
     timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT')
 
     # Create report
@@ -156,11 +145,10 @@ def generate_report(self):
         "check_failed": check_failed_queries,
         "successful_queries": successful_queries,
         "failed_queries": failed_queries,
-        "submit_failed_queries": submit_failed_queries,
         "timeout_queries": timeout_queries,
         "execution_time": sum(r['duration'] for r in self.test_results)
       },
-      "detailed_results": self.test_results
+      "detailed_results": sorted(self.test_results, key=lambda r: r['seq_id'])
     }
 
     # Save report to JSON file
diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv
index b74720b985..201367ac1a 100644
--- a/scripts/SanityTestScript/test_queries.csv
+++ b/scripts/SanityTestScript/test_queries.csv
@@ -1,4 +1,3 @@
-,query,expected_status,,,,
-,"source = http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10","SUCCESS",,,
-,"source = http_logs | dedup status, size | head 10	",,,,
-,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,,
+query_name,query,expected_status
+Successful Demo,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType | stats count() by patterns_field",SUCCESS
+Failed Demo,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType | stats count() by patterns_field",FAILED

From d5bab719a2d503f560ddf7ef3436155c8f0293ac Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Tue, 22 Apr 2025 16:36:34 +0800
Subject: [PATCH 7/8] Refine code

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/README.md     | 4 ++--
 scripts/SanityTestScript/SanityTest.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md
index 17a0cdeec3..68f0f198b4 100644
--- a/scripts/SanityTestScript/README.md
+++ b/scripts/SanityTestScript/README.md
@@ -17,7 +17,7 @@ Apart from the basic feature, it also has some advanced functionality includes:
 ### Usage
 To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries:
 ```shell
-pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf
+pip install requests pandas openpyxl
 ```
 
 After getting the requisite libraries, you can run the script with the following command line parameters in your shell:
@@ -35,7 +35,7 @@ For more details of the command line parameters, you can see the help manual via
 ```shell
 python SanityTest.py --help   
 
-usage: SanityTest.py [-h] --spark-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE
+usage: SanityTest.py [-h] --base-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE
                                       [--max-workers MAX_WORKERS] [--timeout TIMEOUT]
                                       [--start-row START_ROW] [--end-row END_ROW]
                                       [--log-level LOG_LEVEL]
diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py
index bace3e7731..6e3dfc50f2 100644
--- a/scripts/SanityTestScript/SanityTest.py
+++ b/scripts/SanityTestScript/SanityTest.py
@@ -49,7 +49,7 @@ def _setup_logger(self):
     logger = logging.getLogger('PPLTester')
     logger.setLevel(self.log_level)
 
-    fh = logging.FileHandler('flint_test.log')
+    fh = logging.FileHandler('PPL_test.log')
     fh.setLevel(self.log_level)
 
     ch = logging.StreamHandler()

From 5f434977adb791fd3b506d23ac40ccab1a8560d4 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Tue, 22 Apr 2025 16:49:59 +0800
Subject: [PATCH 8/8] Address comments

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 scripts/SanityTestScript/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md
index 68f0f198b4..dcd65b6061 100644
--- a/scripts/SanityTestScript/README.md
+++ b/scripts/SanityTestScript/README.md
@@ -124,7 +124,7 @@ An example of JSON report:
       "start_time": "2025-04-22 16:30:22.461505",
       "end_time": "2025-04-22 16:30:22.863651",
       "error": {
-        "error": "404 Client Error: Not Found for url: http://k8s-calcitep-opensear-8312a971dd-1309739395.us-west-1.elb.amazonaws.com/_plugins/_ppl",
+        "error": "404 Client Error: Not Found for url: http:localhost:9200/_plugins/_ppl",
         "response": {
           "error": {
             "reason": "Error occurred in OpenSearch engine: no such index [opensearch_dashboards_sample_data_flights_2]",