Skip to content

Commit 411c973

Browse files
SNOW-2021009: test optimisation (#2388)
1 parent 456cb39 commit 411c973

File tree

14 files changed

+371
-95
lines changed

14 files changed

+371
-95
lines changed

.github/workflows/build_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ jobs:
173173
- name: Run tests
174174
# To run a single test on GHA use the below command:
175175
# run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-single-ci | sed 's/ /,/g'`
176-
run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-{extras,unit,integ,pandas,sso}-ci | sed 's/ /,/g'`
176+
run: python -m tox run -e `echo py${PYTHON_VERSION/\./}-{extras,unit-parallel,integ-parallel,pandas-parallel,sso}-ci | sed 's/ /,/g'`
177177

178178
env:
179179
PYTHON_VERSION: ${{ matrix.python-version }}

ci/test_fips.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ curl https://repo1.maven.org/maven2/org/wiremock/wiremock-standalone/3.11.0/wire
1414
python3 -m venv fips_env
1515
source fips_env/bin/activate
1616
pip install -U setuptools pip
17+
18+
# Install pytest-xdist for parallel execution
19+
pip install pytest-xdist
20+
1721
pip install "${CONNECTOR_WHL}[pandas,secure-local-storage,development]"
1822

1923
echo "!!! Environment description !!!"
@@ -24,6 +28,8 @@ python -c "from cryptography.hazmat.backends.openssl import backend;print('Cryp
2428
pip freeze
2529

2630
cd $CONNECTOR_DIR
27-
pytest -vvv --cov=snowflake.connector --cov-report=xml:coverage.xml test
31+
32+
# Run tests in parallel using pytest-xdist
33+
pytest -n auto -vvv --cov=snowflake.connector --cov-report=xml:coverage.xml test
2834

2935
deactivate

ci/test_linux.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ else
4040
echo "[Info] Testing with ${PYTHON_VERSION}"
4141
SHORT_VERSION=$(python3.10 -c "print('${PYTHON_VERSION}'.replace('.', ''))")
4242
CONNECTOR_WHL=$(ls $CONNECTOR_DIR/dist/snowflake_connector_python*cp${SHORT_VERSION}*manylinux2014*.whl | sort -r | head -n 1)
43-
TEST_LIST=`echo py${PYTHON_VERSION/\./}-{unit,integ,pandas,sso}-ci | sed 's/ /,/g'`
43+
TEST_LIST=`echo py${PYTHON_VERSION/\./}-{unit-parallel,integ,pandas-parallel,sso}-ci | sed 's/ /,/g'`
4444
TEST_ENVLIST=fix_lint,$TEST_LIST,py${PYTHON_VERSION/\./}-coverage
4545
echo "[Info] Running tox for ${TEST_ENVLIST}"
4646

src/snowflake/connector/ocsp_snowflake.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ def _download_ocsp_response_cache(ocsp, url, do_retry: bool = True) -> bool:
572572
response.status_code,
573573
sleep_time,
574574
)
575-
time.sleep(sleep_time)
575+
time.sleep(sleep_time)
576576
else:
577577
logger.error(
578578
"Failed to get OCSP response after %s attempt.", max_retry
@@ -1645,7 +1645,7 @@ def _fetch_ocsp_response(
16451645
response.status_code,
16461646
sleep_time,
16471647
)
1648-
time.sleep(sleep_time)
1648+
time.sleep(sleep_time)
16491649
except Exception as ex:
16501650
if max_retry > 1:
16511651
sleep_time = next(backoff)

test/conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,14 @@ def pytest_runtest_setup(item) -> None:
148148
if "auth" in test_tags:
149149
if os.getenv("RUN_AUTH_TESTS") != "true":
150150
pytest.skip("Skipping auth test in current environment")
151+
152+
153+
def get_server_parameter_value(connection, parameter_name: str) -> str | None:
154+
"""Get server parameter value, returns None if parameter doesn't exist."""
155+
try:
156+
with connection.cursor() as cur:
157+
cur.execute(f"show parameters like '{parameter_name}'")
158+
ret = cur.fetchone()
159+
return ret[1] if ret else None
160+
except Exception:
161+
return None

test/helpers.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,34 @@ def _arrow_error_stream_chunk_remove_single_byte_test(use_table_iterator):
145145
decode_bytes = base64.b64decode(b64data)
146146
exception_result = []
147147
result_array = []
148-
for i in range(len(decode_bytes)):
148+
149+
# Test strategic positions instead of every byte for performance
150+
# Test header (first 50), middle section, end (last 50), and some random positions
151+
data_len = len(decode_bytes)
152+
test_positions = set()
153+
154+
# Critical positions: beginning (headers/metadata)
155+
test_positions.update(range(min(50, data_len)))
156+
157+
# Middle section positions
158+
mid_start = data_len // 2 - 25
159+
mid_end = data_len // 2 + 25
160+
test_positions.update(range(max(0, mid_start), min(data_len, mid_end)))
161+
162+
# End positions
163+
test_positions.update(range(max(0, data_len - 50), data_len))
164+
165+
# Some random positions throughout the data (for broader coverage)
166+
import random
167+
168+
random.seed(42) # Deterministic for reproducible tests
169+
random_positions = random.sample(range(data_len), min(50, data_len))
170+
test_positions.update(random_positions)
171+
172+
# Convert to sorted list for consistent execution
173+
test_positions = sorted(test_positions)
174+
175+
for i in test_positions:
149176
try:
150177
# removing the i-th char in the bytes
151178
iterator = create_nanoarrow_pyarrow_iterator(

test/integ/conftest.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,30 @@
4141

4242
logger = getLogger(__name__)
4343

44-
if RUNNING_ON_GH:
45-
TEST_SCHEMA = "GH_JOB_{}".format(str(uuid.uuid4()).replace("-", "_"))
46-
else:
47-
TEST_SCHEMA = "python_connector_tests_" + str(uuid.uuid4()).replace("-", "_")
44+
45+
def _get_worker_specific_schema():
46+
"""Generate worker-specific schema name for parallel test execution."""
47+
base_uuid = str(uuid.uuid4()).replace("-", "_")
48+
49+
# Check if running in pytest-xdist parallel mode
50+
worker_id = os.getenv("PYTEST_XDIST_WORKER")
51+
if worker_id:
52+
# Use worker ID to ensure unique schema per worker
53+
worker_suffix = worker_id.replace("-", "_")
54+
if RUNNING_ON_GH:
55+
return f"GH_JOB_{worker_suffix}_{base_uuid}"
56+
else:
57+
return f"python_connector_tests_{worker_suffix}_{base_uuid}"
58+
else:
59+
# Single worker mode (original behavior)
60+
if RUNNING_ON_GH:
61+
return f"GH_JOB_{base_uuid}"
62+
else:
63+
return f"python_connector_tests_{base_uuid}"
64+
65+
66+
TEST_SCHEMA = _get_worker_specific_schema()
67+
4868

4969
if TEST_USING_VENDORED_ARROW:
5070
snowflake.connector.cursor.NANOARR_USAGE = (
@@ -136,8 +156,15 @@ def get_db_parameters(connection_name: str = "default") -> dict[str, Any]:
136156
print_help()
137157
sys.exit(2)
138158

139-
# a unique table name
140-
ret["name"] = "python_tests_" + str(uuid.uuid4()).replace("-", "_")
159+
# a unique table name (worker-specific for parallel execution)
160+
base_uuid = str(uuid.uuid4()).replace("-", "_")
161+
worker_id = os.getenv("PYTEST_XDIST_WORKER")
162+
if worker_id:
163+
# Include worker ID to prevent conflicts between parallel workers
164+
worker_suffix = worker_id.replace("-", "_")
165+
ret["name"] = f"python_tests_{worker_suffix}_{base_uuid}"
166+
else:
167+
ret["name"] = f"python_tests_{base_uuid}"
141168
ret["name_wh"] = ret["name"] + "wh"
142169

143170
ret["schema"] = TEST_SCHEMA

test/integ/test_arrow_result.py

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def pandas_verify(cur, data, deserialize):
300300
), f"Result value {value} should match input example {datum}."
301301

302302

303-
@pytest.mark.parametrize("datatype", ICEBERG_UNSUPPORTED_TYPES)
303+
@pytest.mark.parametrize("datatype", sorted(ICEBERG_UNSUPPORTED_TYPES))
304304
def test_iceberg_negative(datatype, conn_cnx, iceberg_support, structured_type_support):
305305
if not iceberg_support:
306306
pytest.skip("Test requires iceberg support.")
@@ -999,35 +999,46 @@ def test_select_vector(conn_cnx, is_public_test):
999999

10001000

10011001
def test_select_time(conn_cnx):
1002-
for scale in range(10):
1003-
select_time_with_scale(conn_cnx, scale)
1004-
1005-
1006-
def select_time_with_scale(conn_cnx, scale):
1002+
# Test key scales and meaningful cases in a single table operation
1003+
# Cover: no fractional seconds, milliseconds, microseconds, nanoseconds
1004+
scales = [0, 3, 6, 9] # Key precision levels
10071005
cases = [
1008-
"00:01:23",
1009-
"00:01:23.1",
1010-
"00:01:23.12",
1011-
"00:01:23.123",
1012-
"00:01:23.1234",
1013-
"00:01:23.12345",
1014-
"00:01:23.123456",
1015-
"00:01:23.1234567",
1016-
"00:01:23.12345678",
1017-
"00:01:23.123456789",
1006+
"00:01:23", # Basic time
1007+
"00:01:23.123456789", # Max precision
1008+
"23:59:59.999999999", # Edge case - max time with max precision
1009+
"00:00:00.000000001", # Edge case - min time with min precision
10181010
]
1019-
table = "test_arrow_time"
1020-
column = f"(a time({scale}))"
1021-
values = (
1022-
"(-1, NULL), ("
1023-
+ "),(".join([f"{i}, '{c}'" for i, c in enumerate(cases)])
1024-
+ f"), ({len(cases)}, NULL)"
1025-
)
1026-
init(conn_cnx, table, column, values)
1027-
sql_text = f"select a from {table} order by s"
1028-
row_count = len(cases) + 2
1029-
col_count = 1
1030-
iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
1011+
1012+
table = "test_arrow_time_scales"
1013+
1014+
# Create columns for selected scales only (init function will add 's number' automatically)
1015+
columns = ", ".join([f"a{i} time({i})" for i in scales])
1016+
column_def = f"({columns})"
1017+
1018+
# Create values for selected scales - each case tests all scales simultaneously
1019+
value_rows = []
1020+
for i, case in enumerate(cases):
1021+
# Each row has the same time value for all scale columns
1022+
time_values = ", ".join([f"'{case}'" for _ in scales])
1023+
value_rows.append(f"({i}, {time_values})")
1024+
1025+
# Add NULL rows
1026+
null_values = ", ".join(["NULL" for _ in scales])
1027+
value_rows.append(f"(-1, {null_values})")
1028+
value_rows.append(f"({len(cases)}, {null_values})")
1029+
1030+
values = ", ".join(value_rows)
1031+
1032+
# Single table creation and test
1033+
init(conn_cnx, table, column_def, values)
1034+
1035+
# Test each scale column
1036+
for scale in scales:
1037+
sql_text = f"select a{scale} from {table} order by s"
1038+
row_count = len(cases) + 2
1039+
col_count = 1
1040+
iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
1041+
10311042
finish(conn_cnx, table)
10321043

10331044

test/integ/test_connection.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ def test_connection_without_database2(db_parameters):
111111

112112
def test_with_config(db_parameters):
113113
"""Creates a connection with the config parameter."""
114+
from ..conftest import get_server_parameter_value
115+
114116
config = {
115117
"user": db_parameters["user"],
116118
"password": db_parameters["password"],
@@ -125,7 +127,22 @@ def test_with_config(db_parameters):
125127
cnx = snowflake.connector.connect(**config)
126128
try:
127129
assert cnx, "invalid cnx"
128-
assert not cnx.client_session_keep_alive # default is False
130+
131+
# Check what the server default is to make test environment-aware
132+
server_default_str = get_server_parameter_value(
133+
cnx, "CLIENT_SESSION_KEEP_ALIVE"
134+
)
135+
if server_default_str:
136+
server_default = server_default_str.lower() == "true"
137+
# Test that connection respects server default when not explicitly set
138+
assert (
139+
cnx.client_session_keep_alive == server_default
140+
), f"Expected client_session_keep_alive={server_default} (server default), got {cnx.client_session_keep_alive}"
141+
else:
142+
# Fallback: if we can't determine server default, expect False
143+
assert (
144+
not cnx.client_session_keep_alive
145+
), "Expected client_session_keep_alive=False when server default unknown"
129146
finally:
130147
cnx.close()
131148

test/integ/test_dbapi.py

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -724,15 +724,65 @@ def test_escape(conn_local):
724724
with conn_local() as con:
725725
cur = con.cursor()
726726
executeDDL1(cur)
727-
for i in teststrings:
728-
args = {"dbapi_ddl2": i}
729-
cur.execute("insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, args)
730-
cur.execute("select * from %s" % TABLE1)
731-
row = cur.fetchone()
732-
cur.execute("delete from %s where name=%%s" % TABLE1, i)
733-
assert (
734-
i == row[0]
735-
), f"newline not properly converted, got {row[0]}, should be {i}"
727+
728+
# Test 1: Batch INSERT with dictionary parameters (executemany)
729+
# This tests the same dictionary parameter binding as the original
730+
batch_args = [{"dbapi_ddl2": test_string} for test_string in teststrings]
731+
cur.executemany("insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, batch_args)
732+
733+
# Test 2: Batch SELECT with no parameters
734+
# This tests the same SELECT functionality as the original
735+
cur.execute("select name from %s" % TABLE1)
736+
rows = cur.fetchall()
737+
738+
# Verify each test string was properly escaped/handled
739+
assert len(rows) == len(
740+
teststrings
741+
), f"Expected {len(teststrings)} rows, got {len(rows)}"
742+
743+
# Extract actual strings from result set
744+
actual_strings = {row[0] for row in rows} # Use set to ignore order
745+
expected_strings = set(teststrings)
746+
747+
# Verify all expected strings are present
748+
missing_strings = expected_strings - actual_strings
749+
extra_strings = actual_strings - expected_strings
750+
751+
assert len(missing_strings) == 0, f"Missing strings: {missing_strings}"
752+
assert len(extra_strings) == 0, f"Extra strings: {extra_strings}"
753+
assert actual_strings == expected_strings, "String sets don't match"
754+
755+
# Test 3: DELETE with positional parameters (batched for efficiency)
756+
# This maintains the same DELETE parameter binding test as the original
757+
# We test a representative subset to maintain coverage while being efficient
758+
critical_test_strings = [
759+
teststrings[0], # Basic newline: "abc\ndef"
760+
teststrings[5], # Double quote: 'abc"def'
761+
teststrings[7], # Single quote: "abc'def"
762+
teststrings[13], # Tab: "abc\tdef"
763+
teststrings[16], # Backslash-x: "\\x"
764+
]
765+
766+
# Batch DELETE with positional parameters using executemany
767+
# This tests the same positional parameter binding as the original individual DELETEs
768+
cur.executemany(
769+
"delete from %s where name=%%s" % TABLE1,
770+
[(test_string,) for test_string in critical_test_strings],
771+
)
772+
773+
# Batch verification: check that all critical strings were deleted
774+
cur.execute(
775+
"select name from %s where name in (%s)"
776+
% (TABLE1, ",".join(["%s"] * len(critical_test_strings))),
777+
critical_test_strings,
778+
)
779+
remaining_critical = cur.fetchall()
780+
assert (
781+
len(remaining_critical) == 0
782+
), f"Failed to delete strings: {[row[0] for row in remaining_critical]}"
783+
784+
# Clean up remaining rows
785+
cur.execute("delete from %s" % TABLE1)
736786

737787

738788
@pytest.mark.skipolddriver

0 commit comments

Comments
 (0)