SNOW-2021009: test optimisation (#2388)

sfc-gh-pcyrek · web-flow · commit 411c973763b8 · 2025-07-09T16:38:59.000+02:00
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -173,7 +173,7 @@ jobs:
       - name: Run tests
        # To run a single test on GHA use the below command:
 #        run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-single-ci | sed 's/ /,/g'`
-        run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-{extras,unit,integ,pandas,sso}-ci | sed 's/ /,/g'`
+        run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-{extras,unit-parallel,integ-parallel,pandas-parallel,sso}-ci | sed 's/ /,/g'`
 
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
diff --git a/ci/test_fips.sh b/ci/test_fips.sh
@@ -14,6 +14,10 @@ curl https://repo1.maven.org/maven2/org/wiremock/wiremock-standalone/3.11.0/wire
 python3 -m venv fips_env
 source fips_env/bin/activate
 pip install -U setuptools pip
+
+# Install pytest-xdist for parallel execution
+pip install pytest-xdist
+
 pip install "${CONNECTOR_WHL}[pandas,secure-local-storage,development]"
 
 echo "!!! Environment description !!!"
@@ -24,6 +28,8 @@ python -c  "from cryptography.hazmat.backends.openssl import backend;print('Cryp
 pip freeze
 
 cd $CONNECTOR_DIR
-pytest -vvv --cov=snowflake.connector --cov-report=xml:coverage.xml test
+
+# Run tests in parallel using pytest-xdist
+pytest -n auto -vvv --cov=snowflake.connector --cov-report=xml:coverage.xml test
 
 deactivate
diff --git a/ci/test_linux.sh b/ci/test_linux.sh
@@ -40,7 +40,7 @@ else
         echo "[Info] Testing with ${PYTHON_VERSION}"
         SHORT_VERSION=$(python3.10 -c "print('${PYTHON_VERSION}'.replace('.', ''))")
         CONNECTOR_WHL=$(ls $CONNECTOR_DIR/dist/snowflake_connector_python*cp${SHORT_VERSION}*manylinux2014*.whl | sort -r | head -n 1)
-        TEST_LIST=`echo py${PYTHON_VERSION/\./}-{unit,integ,pandas,sso}-ci | sed 's/ /,/g'`
+        TEST_LIST=`echo py${PYTHON_VERSION/\./}-{unit-parallel,integ,pandas-parallel,sso}-ci | sed 's/ /,/g'`
         TEST_ENVLIST=fix_lint,$TEST_LIST,py${PYTHON_VERSION/\./}-coverage
         echo "[Info] Running tox for ${TEST_ENVLIST}"
 
diff --git a/src/snowflake/connector/ocsp_snowflake.py b/src/snowflake/connector/ocsp_snowflake.py
@@ -572,7 +572,7 @@ def _download_ocsp_response_cache(ocsp, url, do_retry: bool = True) -> bool:
                             response.status_code,
                             sleep_time,
                         )
-                    time.sleep(sleep_time)
+                        time.sleep(sleep_time)
                 else:
                     logger.error(
                         "Failed to get OCSP response after %s attempt.", max_retry
@@ -1645,7 +1645,7 @@ def _fetch_ocsp_response(
                             response.status_code,
                             sleep_time,
                         )
-                    time.sleep(sleep_time)
+                        time.sleep(sleep_time)
                 except Exception as ex:
                     if max_retry > 1:
                         sleep_time = next(backoff)
diff --git a/test/conftest.py b/test/conftest.py
@@ -148,3 +148,14 @@ def pytest_runtest_setup(item) -> None:
     if "auth" in test_tags:
         if os.getenv("RUN_AUTH_TESTS") != "true":
             pytest.skip("Skipping auth test in current environment")
+
+
+def get_server_parameter_value(connection, parameter_name: str) -> str | None:
+    """Get server parameter value, returns None if parameter doesn't exist."""
+    try:
+        with connection.cursor() as cur:
+            cur.execute(f"show parameters like '{parameter_name}'")
+            ret = cur.fetchone()
+            return ret[1] if ret else None
+    except Exception:
+        return None
diff --git a/test/helpers.py b/test/helpers.py
@@ -145,7 +145,34 @@ def _arrow_error_stream_chunk_remove_single_byte_test(use_table_iterator):
     decode_bytes = base64.b64decode(b64data)
     exception_result = []
     result_array = []
-    for i in range(len(decode_bytes)):
+
+    # Test strategic positions instead of every byte for performance
+    # Test header (first 50), middle section, end (last 50), and some random positions
+    data_len = len(decode_bytes)
+    test_positions = set()
+
+    # Critical positions: beginning (headers/metadata)
+    test_positions.update(range(min(50, data_len)))
+
+    # Middle section positions
+    mid_start = data_len // 2 - 25
+    mid_end = data_len // 2 + 25
+    test_positions.update(range(max(0, mid_start), min(data_len, mid_end)))
+
+    # End positions
+    test_positions.update(range(max(0, data_len - 50), data_len))
+
+    # Some random positions throughout the data (for broader coverage)
+    import random
+
+    random.seed(42)  # Deterministic for reproducible tests
+    random_positions = random.sample(range(data_len), min(50, data_len))
+    test_positions.update(random_positions)
+
+    # Convert to sorted list for consistent execution
+    test_positions = sorted(test_positions)
+
+    for i in test_positions:
         try:
             # removing the i-th char in the bytes
             iterator = create_nanoarrow_pyarrow_iterator(
diff --git a/test/integ/conftest.py b/test/integ/conftest.py
@@ -41,10 +41,30 @@
 
 logger = getLogger(__name__)
 
-if RUNNING_ON_GH:
-    TEST_SCHEMA = "GH_JOB_{}".format(str(uuid.uuid4()).replace("-", "_"))
-else:
-    TEST_SCHEMA = "python_connector_tests_" + str(uuid.uuid4()).replace("-", "_")
+
+def _get_worker_specific_schema():
+    """Generate worker-specific schema name for parallel test execution."""
+    base_uuid = str(uuid.uuid4()).replace("-", "_")
+
+    # Check if running in pytest-xdist parallel mode
+    worker_id = os.getenv("PYTEST_XDIST_WORKER")
+    if worker_id:
+        # Use worker ID to ensure unique schema per worker
+        worker_suffix = worker_id.replace("-", "_")
+        if RUNNING_ON_GH:
+            return f"GH_JOB_{worker_suffix}_{base_uuid}"
+        else:
+            return f"python_connector_tests_{worker_suffix}_{base_uuid}"
+    else:
+        # Single worker mode (original behavior)
+        if RUNNING_ON_GH:
+            return f"GH_JOB_{base_uuid}"
+        else:
+            return f"python_connector_tests_{base_uuid}"
+
+
+TEST_SCHEMA = _get_worker_specific_schema()
+
 
 if TEST_USING_VENDORED_ARROW:
     snowflake.connector.cursor.NANOARR_USAGE = (
@@ -136,8 +156,15 @@ def get_db_parameters(connection_name: str = "default") -> dict[str, Any]:
         print_help()
         sys.exit(2)
 
-    # a unique table name
-    ret["name"] = "python_tests_" + str(uuid.uuid4()).replace("-", "_")
+    # a unique table name (worker-specific for parallel execution)
+    base_uuid = str(uuid.uuid4()).replace("-", "_")
+    worker_id = os.getenv("PYTEST_XDIST_WORKER")
+    if worker_id:
+        # Include worker ID to prevent conflicts between parallel workers
+        worker_suffix = worker_id.replace("-", "_")
+        ret["name"] = f"python_tests_{worker_suffix}_{base_uuid}"
+    else:
+        ret["name"] = f"python_tests_{base_uuid}"
     ret["name_wh"] = ret["name"] + "wh"
 
     ret["schema"] = TEST_SCHEMA
diff --git a/test/integ/test_arrow_result.py b/test/integ/test_arrow_result.py
@@ -300,7 +300,7 @@ def pandas_verify(cur, data, deserialize):
             ), f"Result value {value} should match input example {datum}."
 
 
-@pytest.mark.parametrize("datatype", ICEBERG_UNSUPPORTED_TYPES)
+@pytest.mark.parametrize("datatype", sorted(ICEBERG_UNSUPPORTED_TYPES))
 def test_iceberg_negative(datatype, conn_cnx, iceberg_support, structured_type_support):
     if not iceberg_support:
         pytest.skip("Test requires iceberg support.")
@@ -999,35 +999,46 @@ def test_select_vector(conn_cnx, is_public_test):
 
 
 def test_select_time(conn_cnx):
-    for scale in range(10):
-        select_time_with_scale(conn_cnx, scale)
-
-
-def select_time_with_scale(conn_cnx, scale):
+    # Test key scales and meaningful cases in a single table operation
+    # Cover: no fractional seconds, milliseconds, microseconds, nanoseconds
+    scales = [0, 3, 6, 9]  # Key precision levels
     cases = [
-        "00:01:23",
-        "00:01:23.1",
-        "00:01:23.12",
-        "00:01:23.123",
-        "00:01:23.1234",
-        "00:01:23.12345",
-        "00:01:23.123456",
-        "00:01:23.1234567",
-        "00:01:23.12345678",
-        "00:01:23.123456789",
+        "00:01:23",  # Basic time
+        "00:01:23.123456789",  # Max precision
+        "23:59:59.999999999",  # Edge case - max time with max precision
+        "00:00:00.000000001",  # Edge case - min time with min precision
     ]
-    table = "test_arrow_time"
-    column = f"(a time({scale}))"
-    values = (
-        "(-1, NULL), ("
-        + "),(".join([f"{i}, '{c}'" for i, c in enumerate(cases)])
-        + f"), ({len(cases)}, NULL)"
-    )
-    init(conn_cnx, table, column, values)
-    sql_text = f"select a from {table} order by s"
-    row_count = len(cases) + 2
-    col_count = 1
-    iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
+
+    table = "test_arrow_time_scales"
+
+    # Create columns for selected scales only (init function will add 's number' automatically)
+    columns = ", ".join([f"a{i} time({i})" for i in scales])
+    column_def = f"({columns})"
+
+    # Create values for selected scales - each case tests all scales simultaneously
+    value_rows = []
+    for i, case in enumerate(cases):
+        # Each row has the same time value for all scale columns
+        time_values = ", ".join([f"'{case}'" for _ in scales])
+        value_rows.append(f"({i}, {time_values})")
+
+    # Add NULL rows
+    null_values = ", ".join(["NULL" for _ in scales])
+    value_rows.append(f"(-1, {null_values})")
+    value_rows.append(f"({len(cases)}, {null_values})")
+
+    values = ", ".join(value_rows)
+
+    # Single table creation and test
+    init(conn_cnx, table, column_def, values)
+
+    # Test each scale column
+    for scale in scales:
+        sql_text = f"select a{scale} from {table} order by s"
+        row_count = len(cases) + 2
+        col_count = 1
+        iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
+
     finish(conn_cnx, table)
 
 
diff --git a/test/integ/test_connection.py b/test/integ/test_connection.py
@@ -111,6 +111,8 @@ def test_connection_without_database2(db_parameters):
 
 def test_with_config(db_parameters):
     """Creates a connection with the config parameter."""
+    from ..conftest import get_server_parameter_value
+
     config = {
         "user": db_parameters["user"],
         "password": db_parameters["password"],
@@ -125,7 +127,22 @@ def test_with_config(db_parameters):
     cnx = snowflake.connector.connect(**config)
     try:
         assert cnx, "invalid cnx"
-        assert not cnx.client_session_keep_alive  # default is False
+
+        # Check what the server default is to make test environment-aware
+        server_default_str = get_server_parameter_value(
+            cnx, "CLIENT_SESSION_KEEP_ALIVE"
+        )
+        if server_default_str:
+            server_default = server_default_str.lower() == "true"
+            # Test that connection respects server default when not explicitly set
+            assert (
+                cnx.client_session_keep_alive == server_default
+            ), f"Expected client_session_keep_alive={server_default} (server default), got {cnx.client_session_keep_alive}"
+        else:
+            # Fallback: if we can't determine server default, expect False
+            assert (
+                not cnx.client_session_keep_alive
+            ), "Expected client_session_keep_alive=False when server default unknown"
     finally:
         cnx.close()
 
diff --git a/test/integ/test_dbapi.py b/test/integ/test_dbapi.py
@@ -724,15 +724,65 @@ def test_escape(conn_local):
     with conn_local() as con:
         cur = con.cursor()
         executeDDL1(cur)
-        for i in teststrings:
-            args = {"dbapi_ddl2": i}
-            cur.execute("insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, args)
-            cur.execute("select * from %s" % TABLE1)
-            row = cur.fetchone()
-            cur.execute("delete from %s where name=%%s" % TABLE1, i)
-            assert (
-                i == row[0]
-            ), f"newline not properly converted, got {row[0]}, should be {i}"
+
+        # Test 1: Batch INSERT with dictionary parameters (executemany)
+        # This tests the same dictionary parameter binding as the original
+        batch_args = [{"dbapi_ddl2": test_string} for test_string in teststrings]
+        cur.executemany("insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, batch_args)
+
+        # Test 2: Batch SELECT with no parameters
+        # This tests the same SELECT functionality as the original
+        cur.execute("select name from %s" % TABLE1)
+        rows = cur.fetchall()
+
+        # Verify each test string was properly escaped/handled
+        assert len(rows) == len(
+            teststrings
+        ), f"Expected {len(teststrings)} rows, got {len(rows)}"
+
+        # Extract actual strings from result set
+        actual_strings = {row[0] for row in rows}  # Use set to ignore order
+        expected_strings = set(teststrings)
+
+        # Verify all expected strings are present
+        missing_strings = expected_strings - actual_strings
+        extra_strings = actual_strings - expected_strings
+
+        assert len(missing_strings) == 0, f"Missing strings: {missing_strings}"
+        assert len(extra_strings) == 0, f"Extra strings: {extra_strings}"
+        assert actual_strings == expected_strings, "String sets don't match"
+
+        # Test 3: DELETE with positional parameters (batched for efficiency)
+        # This maintains the same DELETE parameter binding test as the original
+        # We test a representative subset to maintain coverage while being efficient
+        critical_test_strings = [
+            teststrings[0],  # Basic newline: "abc\ndef"
+            teststrings[5],  # Double quote: 'abc"def'
+            teststrings[7],  # Single quote: "abc'def"
+            teststrings[13],  # Tab: "abc\tdef"
+            teststrings[16],  # Backslash-x: "\\x"
+        ]
+
+        # Batch DELETE with positional parameters using executemany
+        # This tests the same positional parameter binding as the original individual DELETEs
+        cur.executemany(
+            "delete from %s where name=%%s" % TABLE1,
+            [(test_string,) for test_string in critical_test_strings],
+        )
+
+        # Batch verification: check that all critical strings were deleted
+        cur.execute(
+            "select name from %s where name in (%s)"
+            % (TABLE1, ",".join(["%s"] * len(critical_test_strings))),
+            critical_test_strings,
+        )
+        remaining_critical = cur.fetchall()
+        assert (
+            len(remaining_critical) == 0
+        ), f"Failed to delete strings: {[row[0] for row in remaining_critical]}"
+
+        # Clean up remaining rows
+        cur.execute("delete from %s" % TABLE1)
 
 
 @pytest.mark.skipolddriver
diff --git a/test/integ/test_put_get.py b/test/integ/test_put_get.py
diff --git a/test/unit/test_ocsp.py b/test/unit/test_ocsp.py
diff --git a/test/unit/test_retry_network.py b/test/unit/test_retry_network.py
diff --git a/tox.ini b/tox.ini