From 19fdcf37be3a096300dac145b4b54c5a14e9742e Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 21 Apr 2025 14:06:47 +0800 Subject: [PATCH 1/8] Add sanity test script Signed-off-by: Heng Qian --- scripts/SanityTestScript/SanityTest.py | 222 ++++++++++++++++++++++ scripts/SanityTestScript/test_queries.csv | 3 + 2 files changed, 225 insertions(+) create mode 100644 scripts/SanityTestScript/SanityTest.py create mode 100644 scripts/SanityTestScript/test_queries.csv diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py new file mode 100644 index 0000000000..3274efb4b6 --- /dev/null +++ b/scripts/SanityTestScript/SanityTest.py @@ -0,0 +1,222 @@ +# Copyright OpenSearch Contributors +# SPDX-License-Identifier: Apache-2.0 + +import signal +import sys +import requests +import json +import csv +import time +import logging +from datetime import datetime +import pandas as pd +import argparse +from requests.auth import HTTPBasicAuth +from concurrent.futures import ThreadPoolExecutor, as_completed +import threading + +""" +Environment: python3 + +Example to use this script: + +python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600 + +The input file test_queries.csv should contain column: `query` + +For more details, please use command: + +python SanityTest.py --help + +""" + +class PPLTester: + def __init__(self, base_url, username, password, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level): + self.base_url = base_url + self.auth = HTTPBasicAuth(username, password) + self.headers = { 'Content-Type': 'application/json' } + self.max_workers = max_workers + self.check_interval = check_interval + self.timeout = timeout + self.output_file = output_file + self.start = start_row - 1 if start_row else None + self.end = end_row - 1 if end_row else None + self.log_level = log_level + self.max_attempts = (int)(timeout / check_interval) + self.logger = self._setup_logger() + self.executor = ThreadPoolExecutor(max_workers=self.max_workers) + self.thread_local = threading.local() + self.test_results = [] + + def _setup_logger(self): + logger = logging.getLogger('PPLTester') + logger.setLevel(self.log_level) + + fh = logging.FileHandler('flint_test.log') + fh.setLevel(self.log_level) + + ch = logging.StreamHandler() + ch.setLevel(self.log_level) + + formatter = logging.Formatter( + '%(asctime)s - %(threadName)s - %(levelname)s - %(message)s' + ) + fh.setFormatter(formatter) + ch.setFormatter(formatter) + + logger.addHandler(fh) + logger.addHandler(ch) + + return logger + + + # Call submit API to submit the query + def submit_query(self, query): + url = f"{self.base_url}/_plugins/_ppl" + response_json = None + payload = { + "query": query, + } + try: + response = requests.post(url, auth=self.auth, headers=self.headers, json=payload) + print(response) + response_json = response.json() + response.raise_for_status() + return response_json + except Exception as e: + return {"error": str(e), "response": response_json} + + # Run the test and return the result + def run_test(self, query, seq_id, expected_status): + self.logger.info(f"Starting test: {seq_id}, {query}") + start_time = datetime.now() + submit_result = self.submit_query(query) + if "error" in submit_result: + self.logger.warning(f"Submit error: {submit_result}") + return { + "query_name": seq_id, + "query": query, + "expected_status": expected_status, + "status": "SUBMIT_FAILED", + "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None, + "error": submit_result["response"]["error"], + "duration": 0, + "start_time": start_time, + "end_time": datetime.now() + } + + self.logger.debug(f"Submit return: {submit_result}") + + # TODO: need to check results + end_time = datetime.now() + duration = (end_time - start_time).total_seconds() + + return { + "query_name": seq_id, + "query": query, + "expected_status": expected_status, + "status": "SUCCESS", + "check_status": expected_status == "SUCCESS", + "error": None, + "result": submit_result, + "duration": duration, + "start_time": start_time, + "end_time": end_time + } + + + def run_tests_from_csv(self, csv_file): + with open(csv_file, 'r') as f: + reader = csv.DictReader(f) + queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()] + + # Filtering queries based on start and end + queries = queries[self.start:self.end] + + # Parallel execution + futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries] + for future in as_completed(futures): + result = future.result() + self.test_results.append(result) + + def generate_report(self): + self.logger.info("Generating report...") + total_queries = len(self.test_results) + successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS') + failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED') + submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED') + timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT') + + # Create report + report = { + "summary": { + "total_queries": total_queries, + "successful_queries": successful_queries, + "failed_queries": failed_queries, + "submit_failed_queries": submit_failed_queries, + "timeout_queries": timeout_queries, + "execution_time": sum(r['duration'] for r in self.test_results) + }, + "detailed_results": self.test_results + } + + # Save report to JSON file + with open(f"{self.output_file}.json", 'w') as f: + json.dump(report, f, indent=2, default=str) + + # Save reults to Excel file + df = pd.DataFrame(self.test_results) + df.to_excel(f"{self.output_file}.xlsx", index=False) + + self.logger.info(f"Generated report in {self.output_file}.xlsx and {self.output_file}.json") + +def signal_handler(sig, frame, tester): + print(f"Signal {sig} received, generating report...") + try: + tester.executor.shutdown(wait=False, cancel_futures=True) + tester.generate_report() + finally: + sys.exit(0) + +def main(): + # Parse command line arguments + parser = argparse.ArgumentParser(description="Run tests from a CSV file and generate a report.") + parser.add_argument("--base-url", required=True, help="Base URL of the service") + parser.add_argument("--username", required=True, help="Username for authentication") + parser.add_argument("--password", required=True, help="Password for authentication") + parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries") + parser.add_argument("--output-file", required=True, help="Path to the output report file") + parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)") + parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)") + parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)") + parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1") + parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included") + parser.add_argument("--log-level", default="INFO", help="optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO)") + + args = parser.parse_args() + + tester = PPLTester( + base_url=args.base_url, + username=args.username, + password=args.password, + max_workers=args.max_workers, + check_interval=args.check_interval, + timeout=args.timeout, + output_file=args.output_file, + start_row=args.start_row, + end_row=args.end_row, + log_level=args.log_level, + ) + + # Register signal handlers to generate report on interrupt + signal.signal(signal.SIGINT, lambda sig, frame: signal_handler(sig, frame, tester)) + signal.signal(signal.SIGTERM, lambda sig, frame: signal_handler(sig, frame, tester)) + + # Running tests + tester.run_tests_from_csv(args.input_csv) + + # Gnerate report + tester.generate_report() + +if __name__ == "__main__": + main() diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv new file mode 100644 index 0000000000..6fb55d0bf1 --- /dev/null +++ b/scripts/SanityTestScript/test_queries.csv @@ -0,0 +1,3 @@ +,query,expected_status,,,, +,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType","SUCCESS",,, +,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,, From 807540286530b09609f4dfe2b6a0c7585d498511 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 21 Apr 2025 14:23:51 +0800 Subject: [PATCH 2/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/SanityTest.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index 3274efb4b6..a7dfa14a37 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -6,7 +6,6 @@ import requests import json import csv -import time import logging from datetime import datetime import pandas as pd @@ -20,7 +19,7 @@ Example to use this script: -python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600 +python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --input-csv test_queries.csv --output-file test_report --max-workers 2 The input file test_queries.csv should contain column: `query` @@ -31,18 +30,16 @@ """ class PPLTester: - def __init__(self, base_url, username, password, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level): + def __init__(self, base_url, username, password, max_workers, timeout, output_file, start_row, end_row, log_level): self.base_url = base_url self.auth = HTTPBasicAuth(username, password) self.headers = { 'Content-Type': 'application/json' } self.max_workers = max_workers - self.check_interval = check_interval self.timeout = timeout self.output_file = output_file self.start = start_row - 1 if start_row else None self.end = end_row - 1 if end_row else None self.log_level = log_level - self.max_attempts = (int)(timeout / check_interval) self.logger = self._setup_logger() self.executor = ThreadPoolExecutor(max_workers=self.max_workers) self.thread_local = threading.local() @@ -78,8 +75,7 @@ def submit_query(self, query): "query": query, } try: - response = requests.post(url, auth=self.auth, headers=self.headers, json=payload) - print(response) + response = requests.post(url, auth=self.auth, headers=self.headers, json=payload, timeout=self.timeout) response_json = response.json() response.raise_for_status() return response_json @@ -99,7 +95,7 @@ def run_test(self, query, seq_id, expected_status): "expected_status": expected_status, "status": "SUBMIT_FAILED", "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None, - "error": submit_result["response"]["error"], + "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"], "duration": 0, "start_time": start_time, "end_time": datetime.now() @@ -187,7 +183,6 @@ def main(): parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries") parser.add_argument("--output-file", required=True, help="Path to the output report file") parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)") - parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)") parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)") parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1") parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included") @@ -200,7 +195,6 @@ def main(): username=args.username, password=args.password, max_workers=args.max_workers, - check_interval=args.check_interval, timeout=args.timeout, output_file=args.output_file, start_row=args.start_row, From 93764025acb02244318f8a121d7e184ad883dbbf Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 21 Apr 2025 15:20:13 +0800 Subject: [PATCH 3/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/SanityTest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index a7dfa14a37..550cbb7119 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -93,8 +93,8 @@ def run_test(self, query, seq_id, expected_status): "query_name": seq_id, "query": query, "expected_status": expected_status, - "status": "SUBMIT_FAILED", - "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None, + "status": "FAILED", + "check_status": "FAILED" == expected_status if expected_status else None, "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"], "duration": 0, "start_time": start_time, @@ -141,12 +141,14 @@ def generate_report(self): successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS') failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED') submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED') + check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False) timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT') # Create report report = { "summary": { "total_queries": total_queries, + "check_failed": check_failed_queries, "successful_queries": successful_queries, "failed_queries": failed_queries, "submit_failed_queries": submit_failed_queries, From 83a68e0bfc3745310131700749ea46eae08fdfc5 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 21 Apr 2025 15:25:50 +0800 Subject: [PATCH 4/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/SanityTest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index 550cbb7119..e0c260c859 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -95,7 +95,11 @@ def run_test(self, query, seq_id, expected_status): "expected_status": expected_status, "status": "FAILED", "check_status": "FAILED" == expected_status if expected_status else None, - "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"], + "error": { + "error_message": submit_result.get("error"), + "response_error": submit_result.get("response", {}).get("error") + }, + # "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"], "duration": 0, "start_time": start_time, "end_time": datetime.now() From 3f8299159cbaf2af8c7901ac4a162416eccef9ed Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 21 Apr 2025 15:41:13 +0800 Subject: [PATCH 5/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/SanityTest.py | 3 ++- scripts/SanityTestScript/test_queries.csv | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index e0c260c859..f568cff294 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -116,7 +116,8 @@ def run_test(self, query, seq_id, expected_status): "query": query, "expected_status": expected_status, "status": "SUCCESS", - "check_status": expected_status == "SUCCESS", + "check_status": expected_status == "SUCCESS" if expected_status else True, + # "check_status": expected_status == "SUCCESS", "error": None, "result": submit_result, "duration": duration, diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv index 6fb55d0bf1..b74720b985 100644 --- a/scripts/SanityTestScript/test_queries.csv +++ b/scripts/SanityTestScript/test_queries.csv @@ -1,3 +1,4 @@ ,query,expected_status,,,, -,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType","SUCCESS",,, +,"source = http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10","SUCCESS",,, +,"source = http_logs | dedup status, size | head 10 ",,,, ,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,, From b7419229e54dd2d3147401bfdb4d6f7971f38b88 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 22 Apr 2025 16:32:07 +0800 Subject: [PATCH 6/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/README.md | 141 ++++++++++++++++++++++ scripts/SanityTestScript/SanityTest.py | 58 ++++----- scripts/SanityTestScript/test_queries.csv | 7 +- 3 files changed, 167 insertions(+), 39 deletions(-) create mode 100644 scripts/SanityTestScript/README.md diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md new file mode 100644 index 0000000000..17a0cdeec3 --- /dev/null +++ b/scripts/SanityTestScript/README.md @@ -0,0 +1,141 @@ +# Sanity Test Script + +### Description +This Python script executes test queries from a CSV file using an asynchronous query API and generates comprehensive test reports. + +The script produces two report types: +1. An Excel report with detailed test information for each query +2. A JSON report containing both test result overview and query-specific details + +Apart from the basic feature, it also has some advanced functionality includes: +1. Concurrent query execution (note: the async query service has session limits, so use thread workers moderately despite it already supports session ID reuse) +2. Configurable query timeout for the requests +3. Flexible row selection from the input CSV file, by specifying start row and end row of the input CSV file. +4. Expected status validation when expected_status is present in the CSV +5. Ability to generate partial reports if testing is interrupted + +### Usage +To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries: +```shell +pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf +``` + +After getting the requisite libraries, you can run the script with the following command line parameters in your shell: +```shell +python SanityTest.py --base-url ${BASE_URL} --username *** --password *** --input-csv test_queries.csv --output-file test_report +``` +You need to replace the placeholders with your actual values of BASE_URL, and USERNAME, PASSWORD for authentication to your endpoint. + +Running against the localhost cluster, `BASE_URL` should be set to `http://localhost:9200`. You can launch an OpenSearch cluster with SQL plugin locally in opensearch-sql repo directory with command: +```shell +./gradlew run +``` + +For more details of the command line parameters, you can see the help manual via command: +```shell +python SanityTest.py --help + +usage: SanityTest.py [-h] --spark-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE + [--max-workers MAX_WORKERS] [--timeout TIMEOUT] + [--start-row START_ROW] [--end-row END_ROW] + [--log-level LOG_LEVEL] + +Run tests from a CSV file and generate a report. + +options: + -h, --help show this help message and exit + --base-url BASE_URL OpenSearch Cluster Connect URL of the service + --username USERNAME Username for authentication + --password PASSWORD Password for authentication + --input-csv INPUT_CSV + Path to the CSV file containing test queries + --output-file OUTPUT_FILE + Path to the output report file + --max-workers MAX_WORKERS + optional, Maximum number of worker threads (default: 2) + --timeout TIMEOUT optional, Timeout in seconds (default: 600) + --start-row START_ROW + optional, The start row of the query to run, start from 1 + --end-row END_ROW optional, The end row of the query to run, not included + --log-level LOG_LEVEL + optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO) +``` + +### Input CSV File +As claimed in the description, the input CSV file should at least have the column of `query` to run the tests. It also supports an optional column of `expected_status` whose value could be `SUCCESS`, `FAILED` or `TIMEOUT`. The script will check the actual status against the expected status and generate a new column of `check_status` for the check result -- TRUE means the status check passed; FALSE means the status check failed. + +#### JSON Report +The JSON report provides the same information as the Excel report. But in JSON format, additionally, it includes a statistical summary of the test results at the beginning of the report. + +An example of JSON report: +```json +{ + "summary": { + "total_queries": 2, + "check_failed": 0, + "successful_queries": 1, + "failed_queries": 1, + "timeout_queries": 0, + "execution_time": 1.245655 + }, + "detailed_results": [ + { + "seq_id": 1, + "query_name": "Successful Demo", + "query": "source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType | stats count() by patterns_field", + "expected_status": "SUCCESS", + "status": "SUCCESS", + "duration": 0.843509, + "start_time": "2025-04-22 16:30:22.461069", + "end_time": "2025-04-22 16:30:23.304578", + "result": { + "schema": [ + { + "name": "count()", + "type": "int" + }, + { + "name": "patterns_field", + "type": "string" + } + ], + "datarows": [ + [ + 9311, + " " + ], + [ + 689, + " " + ] + ], + "total": 2, + "size": 2 + }, + "check_status": true + }, + { + "seq_id": 2, + "query_name": "Failed Demo", + "query": "source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType | stats count() by patterns_field", + "expected_status": "FAILED", + "status": "FAILED", + "duration": 0.402146, + "start_time": "2025-04-22 16:30:22.461505", + "end_time": "2025-04-22 16:30:22.863651", + "error": { + "error": "404 Client Error: Not Found for url: http://k8s-calcitep-opensear-8312a971dd-1309739395.us-west-1.elb.amazonaws.com/_plugins/_ppl", + "response": { + "error": { + "reason": "Error occurred in OpenSearch engine: no such index [opensearch_dashboards_sample_data_flights_2]", + "details": "[opensearch_dashboards_sample_data_flights_2] IndexNotFoundException[no such index [opensearch_dashboards_sample_data_flights_2]]\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "type": "IndexNotFoundException" + }, + "status": 404 + } + }, + "check_status": true + } + ] +} +``` diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index f568cff294..bace3e7731 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -83,59 +83,49 @@ def submit_query(self, query): return {"error": str(e), "response": response_json} # Run the test and return the result - def run_test(self, query, seq_id, expected_status): + def run_test(self, query, seq_id, query_name, expected_status): self.logger.info(f"Starting test: {seq_id}, {query}") start_time = datetime.now() submit_result = self.submit_query(query) - if "error" in submit_result: - self.logger.warning(f"Submit error: {submit_result}") - return { - "query_name": seq_id, - "query": query, - "expected_status": expected_status, - "status": "FAILED", - "check_status": "FAILED" == expected_status if expected_status else None, - "error": { - "error_message": submit_result.get("error"), - "response_error": submit_result.get("response", {}).get("error") - }, - # "error": submit_result["error"] if "error" in submit_result else submit_result["response"]["error"], - "duration": 0, - "start_time": start_time, - "end_time": datetime.now() - } - - self.logger.debug(f"Submit return: {submit_result}") - - # TODO: need to check results end_time = datetime.now() duration = (end_time - start_time).total_seconds() - return { - "query_name": seq_id, + status = "SUCCESS" + + result = { + "seq_id": seq_id, + "query_name": query_name, "query": query, "expected_status": expected_status, - "status": "SUCCESS", - "check_status": expected_status == "SUCCESS" if expected_status else True, - # "check_status": expected_status == "SUCCESS", - "error": None, - "result": submit_result, + "status": status, "duration": duration, "start_time": start_time, "end_time": end_time } + if "error" in submit_result: + self.logger.warning(f"Submit query with error: {submit_result}") + status = "TIMEOUT" if "read timed out" in submit_result.get("error", "").lower() else "FAILED" + result["status"] = status + result["error"] = submit_result + else: + self.logger.debug(f"Submit return: {submit_result}") + result["result"] = submit_result + + check_status = status.lower() == expected_status.lower() if expected_status else False + result["check_status"] = check_status + return result def run_tests_from_csv(self, csv_file): with open(csv_file, 'r') as f: reader = csv.DictReader(f) - queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()] + queries = [(row['query'], i, row.get('query_name', f'Q{i}'), row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()] # Filtering queries based on start and end queries = queries[self.start:self.end] # Parallel execution - futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries] + futures = [self.executor.submit(self.run_test, query, seq_id, query_name, expected_status) for query, seq_id, query_name, expected_status in queries] for future in as_completed(futures): result = future.result() self.test_results.append(result) @@ -143,10 +133,9 @@ def run_tests_from_csv(self, csv_file): def generate_report(self): self.logger.info("Generating report...") total_queries = len(self.test_results) + check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False) successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS') failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED') - submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED') - check_failed_queries = sum(1 for r in self.test_results if r['check_status'] == False) timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT') # Create report @@ -156,11 +145,10 @@ def generate_report(self): "check_failed": check_failed_queries, "successful_queries": successful_queries, "failed_queries": failed_queries, - "submit_failed_queries": submit_failed_queries, "timeout_queries": timeout_queries, "execution_time": sum(r['duration'] for r in self.test_results) }, - "detailed_results": self.test_results + "detailed_results": sorted(self.test_results, key=lambda r: r['seq_id']) } # Save report to JSON file diff --git a/scripts/SanityTestScript/test_queries.csv b/scripts/SanityTestScript/test_queries.csv index b74720b985..201367ac1a 100644 --- a/scripts/SanityTestScript/test_queries.csv +++ b/scripts/SanityTestScript/test_queries.csv @@ -1,4 +1,3 @@ -,query,expected_status,,,, -,"source = http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10","SUCCESS",,, -,"source = http_logs | dedup status, size | head 10 ",,,, -,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType","FAILED",,, +query_name,query,expected_status +Successful Demo,"source=opensearch_dashboards_sample_data_flights | patterns FlightDelayType | stats count() by patterns_field",SUCCESS +Failed Demo,"source=opensearch_dashboards_sample_data_flights_2 | patterns FlightDelayType | stats count() by patterns_field",FAILED From d5bab719a2d503f560ddf7ef3436155c8f0293ac Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 22 Apr 2025 16:36:34 +0800 Subject: [PATCH 7/8] Refine code Signed-off-by: Heng Qian --- scripts/SanityTestScript/README.md | 4 ++-- scripts/SanityTestScript/SanityTest.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md index 17a0cdeec3..68f0f198b4 100644 --- a/scripts/SanityTestScript/README.md +++ b/scripts/SanityTestScript/README.md @@ -17,7 +17,7 @@ Apart from the basic feature, it also has some advanced functionality includes: ### Usage To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries: ```shell -pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf +pip install requests pandas openpyxl ``` After getting the requisite libraries, you can run the script with the following command line parameters in your shell: @@ -35,7 +35,7 @@ For more details of the command line parameters, you can see the help manual via ```shell python SanityTest.py --help -usage: SanityTest.py [-h] --spark-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE +usage: SanityTest.py [-h] --base-url BASE_URL --username USERNAME --password PASSWORD --input-csv INPUT_CSV --output-file OUTPUT_FILE [--max-workers MAX_WORKERS] [--timeout TIMEOUT] [--start-row START_ROW] [--end-row END_ROW] [--log-level LOG_LEVEL] diff --git a/scripts/SanityTestScript/SanityTest.py b/scripts/SanityTestScript/SanityTest.py index bace3e7731..6e3dfc50f2 100644 --- a/scripts/SanityTestScript/SanityTest.py +++ b/scripts/SanityTestScript/SanityTest.py @@ -49,7 +49,7 @@ def _setup_logger(self): logger = logging.getLogger('PPLTester') logger.setLevel(self.log_level) - fh = logging.FileHandler('flint_test.log') + fh = logging.FileHandler('PPL_test.log') fh.setLevel(self.log_level) ch = logging.StreamHandler() From 5f434977adb791fd3b506d23ac40ccab1a8560d4 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 22 Apr 2025 16:49:59 +0800 Subject: [PATCH 8/8] Address comments Signed-off-by: Heng Qian --- scripts/SanityTestScript/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/SanityTestScript/README.md b/scripts/SanityTestScript/README.md index 68f0f198b4..dcd65b6061 100644 --- a/scripts/SanityTestScript/README.md +++ b/scripts/SanityTestScript/README.md @@ -124,7 +124,7 @@ An example of JSON report: "start_time": "2025-04-22 16:30:22.461505", "end_time": "2025-04-22 16:30:22.863651", "error": { - "error": "404 Client Error: Not Found for url: http://k8s-calcitep-opensear-8312a971dd-1309739395.us-west-1.elb.amazonaws.com/_plugins/_ppl", + "error": "404 Client Error: Not Found for url: http:localhost:9200/_plugins/_ppl", "response": { "error": { "reason": "Error occurred in OpenSearch engine: no such index [opensearch_dashboards_sample_data_flights_2]",