Skip to content

Commit d5fcedf

Browse files
CopilotAsafMah
andauthored
Cache CloudInfo / CloudSettings by authority (#583)
* Initial plan for issue * Update CloudSettings to cache by authority Co-authored-by: AsafMah <[email protected]> * Apply Black formatting Co-authored-by: AsafMah <[email protected]> * Enhance test_cloud_info to better verify authority-based caching Co-authored-by: AsafMah <[email protected]> * Update CHANGELOG.md to place changes under Unreleased section Co-authored-by: AsafMah <[email protected]> * Remove duplicate top-level test files, use proper tests in azure-kusto-data/tests Co-authored-by: AsafMah <[email protected]> * Run black 23.3.0 on modified files Co-authored-by: AsafMah <[email protected]> * Convert test_cloud_settings.py from unittest to pytest Co-authored-by: AsafMah <[email protected]> * Remove temporary test file Co-authored-by: AsafMah <[email protected]> * Undo changes to test_e2e_data.py Co-authored-by: AsafMah <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: AsafMah <[email protected]>
1 parent 7f76fb8 commit d5fcedf

File tree

3 files changed

+150
-38
lines changed

3 files changed

+150
-38
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## Unreleased
9+
10+
### Fixed
11+
12+
- CloudInfo / CloudSettings now cached by authority (schema, host and port) instead of full URL
13+
814
## [5.0.3] - 2025-05-04
915

1016
### Fixed

azure-kusto-data/azure/kusto/data/_cloud_settings.py

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,22 @@ class CloudSettings:
5454
first_party_authority_url=DEFAULT_FIRST_PARTY_AUTHORITY_URL,
5555
)
5656

57-
@classmethod
58-
@distributed_trace(name_of_span="CloudSettings.get_cloud_info", kind=SpanKind.CLIENT)
59-
def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str, str]] = None) -> CloudInfo:
60-
kusto_uri = cls._normalize_uri(kusto_uri)
61-
62-
# tracing attributes for cloud info
63-
Span.set_cloud_info_attributes(kusto_uri)
64-
65-
if kusto_uri in cls._cloud_cache: # Double-checked locking to avoid unnecessary lock access
66-
return cls._cloud_cache[kusto_uri]
67-
68-
with cls._cloud_cache_lock:
69-
if kusto_uri in cls._cloud_cache:
70-
return cls._cloud_cache[kusto_uri]
71-
72-
url_parts = urlparse(kusto_uri)
57+
@classmethod
58+
@distributed_trace(name_of_span="CloudSettings.get_cloud_info", kind=SpanKind.CLIENT)
59+
def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str, str]] = None) -> CloudInfo:
60+
normalized_authority = cls._normalize_uri(kusto_uri)
61+
62+
# tracing attributes for cloud info
63+
Span.set_cloud_info_attributes(kusto_uri)
64+
65+
if normalized_authority in cls._cloud_cache: # Double-checked locking to avoid unnecessary lock access
66+
return cls._cloud_cache[normalized_authority]
67+
68+
with cls._cloud_cache_lock:
69+
if normalized_authority in cls._cloud_cache:
70+
return cls._cloud_cache[normalized_authority]
71+
72+
url_parts = urlparse(kusto_uri)
7373
url = f"{url_parts.scheme}://{url_parts.netloc}/{METADATA_ENDPOINT}"
7474

7575
try:
@@ -87,31 +87,32 @@ def get_cloud_info_for_cluster(cls, kusto_uri: str, proxies: Optional[Dict[str,
8787
if content is None or content == {}:
8888
raise KustoServiceError("Kusto returned an invalid cloud metadata response", result)
8989
root = content["AzureAD"]
90-
if root is not None:
91-
cls._cloud_cache[kusto_uri] = CloudInfo(
92-
login_endpoint=root["LoginEndpoint"],
93-
login_mfa_required=root["LoginMfaRequired"],
94-
kusto_client_app_id=root["KustoClientAppId"],
95-
kusto_client_redirect_uri=root["KustoClientRedirectUri"],
96-
kusto_service_resource_id=root["KustoServiceResourceId"],
97-
first_party_authority_url=root["FirstPartyAuthorityUrl"],
98-
)
99-
else:
100-
cls._cloud_cache[kusto_uri] = cls.DEFAULT_CLOUD
101-
elif result.status_code == 404:
102-
# For now as long not all proxies implement the metadata endpoint, if no endpoint exists return public cloud data
103-
cls._cloud_cache[kusto_uri] = cls.DEFAULT_CLOUD
104-
else:
105-
raise KustoServiceError("Kusto returned an invalid cloud metadata response", result)
106-
return cls._cloud_cache[kusto_uri]
90+
if root is not None:
91+
cls._cloud_cache[normalized_authority] = CloudInfo(
92+
login_endpoint=root["LoginEndpoint"],
93+
login_mfa_required=root["LoginMfaRequired"],
94+
kusto_client_app_id=root["KustoClientAppId"],
95+
kusto_client_redirect_uri=root["KustoClientRedirectUri"],
96+
kusto_service_resource_id=root["KustoServiceResourceId"],
97+
first_party_authority_url=root["FirstPartyAuthorityUrl"],
98+
)
99+
else:
100+
cls._cloud_cache[normalized_authority] = cls.DEFAULT_CLOUD
101+
elif result.status_code == 404:
102+
# For now as long not all proxies implement the metadata endpoint, if no endpoint exists return public cloud data
103+
cls._cloud_cache[normalized_authority] = cls.DEFAULT_CLOUD
104+
else:
105+
raise KustoServiceError("Kusto returned an invalid cloud metadata response", result)
106+
return cls._cloud_cache[normalized_authority]
107107

108108
@classmethod
109109
def add_to_cache(cls, url: str, cloud_info: CloudInfo):
110110
with cls._cloud_cache_lock:
111111
cls._cloud_cache[cls._normalize_uri(url)] = cloud_info
112112

113-
@classmethod
114-
def _normalize_uri(cls, kusto_uri):
115-
if not kusto_uri.endswith("/"):
116-
kusto_uri += "/"
117-
return kusto_uri
113+
@classmethod
114+
def _normalize_uri(cls, kusto_uri):
115+
"""Extracts and returns the authority part of the URI (schema, host, port)"""
116+
url_parts = urlparse(kusto_uri)
117+
# Return only the scheme and netloc (which contains host and port if present)
118+
return f"{url_parts.scheme}://{url_parts.netloc}"
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License
3+
import pytest
4+
5+
from azure.kusto.data._cloud_settings import CloudSettings, CloudInfo
6+
7+
8+
@pytest.fixture
9+
def clear_cache():
10+
"""Fixture to clear the CloudSettings cache before each test"""
11+
with CloudSettings._cloud_cache_lock:
12+
CloudSettings._cloud_cache.clear()
13+
yield
14+
# Clean up after test if needed
15+
with CloudSettings._cloud_cache_lock:
16+
CloudSettings._cloud_cache.clear()
17+
18+
19+
def test_normalize_uri_extracts_authority():
20+
"""Test that _normalize_uri extracts only the authority part (schema, host, port) from a URI."""
21+
# Test with various URI formats
22+
test_cases = [
23+
("https://cluster.kusto.windows.net", "https://cluster.kusto.windows.net"),
24+
("https://cluster.kusto.windows.net/", "https://cluster.kusto.windows.net"),
25+
("https://cluster.kusto.windows.net/v1/rest", "https://cluster.kusto.windows.net"),
26+
("https://cluster.kusto.windows.net:443/v1/rest", "https://cluster.kusto.windows.net:443"),
27+
("http://localhost:8080/v1/rest/query", "http://localhost:8080"),
28+
("https://cluster.kusto.windows.net/database", "https://cluster.kusto.windows.net"),
29+
]
30+
31+
for input_uri, expected_authority in test_cases:
32+
assert CloudSettings._normalize_uri(input_uri) == expected_authority
33+
34+
35+
def test_cloud_info_cached_by_authority(clear_cache):
36+
"""Test that CloudInfo is cached by authority part of the URI (schema, host, port)."""
37+
# Create a test CloudInfo object
38+
test_cloud_info = CloudInfo(
39+
login_endpoint="https://login.test.com",
40+
login_mfa_required=False,
41+
kusto_client_app_id="test-app-id",
42+
kusto_client_redirect_uri="http://localhost/redirect",
43+
kusto_service_resource_id="https://test.kusto.windows.net",
44+
first_party_authority_url="https://login.test.com/tenant-id",
45+
)
46+
47+
# Add to cache with a specific URL
48+
base_url = "https://cluster.kusto.windows.net"
49+
CloudSettings.add_to_cache(base_url, test_cloud_info)
50+
51+
# Test that it can be retrieved with different path variations but same authority
52+
variations = [
53+
base_url + "/",
54+
base_url + "/database",
55+
base_url + "/v1/rest/query",
56+
base_url + "/some/other/path",
57+
]
58+
59+
for url in variations:
60+
# Use the internal _normalize_uri to get the cache key
61+
normalized_url = CloudSettings._normalize_uri(url)
62+
assert normalized_url == "https://cluster.kusto.windows.net"
63+
assert normalized_url in CloudSettings._cloud_cache
64+
65+
# Verify the retrieved CloudInfo is the same instance
66+
retrieved_info = CloudSettings._cloud_cache[normalized_url]
67+
assert retrieved_info is test_cloud_info
68+
69+
70+
def test_cloud_info_cached_with_port(clear_cache):
71+
"""Test that URIs with ports are cached separately from those without."""
72+
# Create two different CloudInfo objects
73+
cloud_info_default = CloudInfo(
74+
login_endpoint="https://login.default.com",
75+
login_mfa_required=False,
76+
kusto_client_app_id="default-app-id",
77+
kusto_client_redirect_uri="http://localhost/redirect",
78+
kusto_service_resource_id="https://default.kusto.windows.net",
79+
first_party_authority_url="https://login.default.com/tenant-id",
80+
)
81+
82+
cloud_info_with_port = CloudInfo(
83+
login_endpoint="https://login.withport.com",
84+
login_mfa_required=True,
85+
kusto_client_app_id="port-app-id",
86+
kusto_client_redirect_uri="http://localhost/redirect",
87+
kusto_service_resource_id="https://port.kusto.windows.net",
88+
first_party_authority_url="https://login.withport.com/tenant-id",
89+
)
90+
91+
# Add both to cache with different authorities
92+
CloudSettings.add_to_cache("https://cluster.kusto.windows.net", cloud_info_default)
93+
CloudSettings.add_to_cache("https://cluster.kusto.windows.net:443", cloud_info_with_port)
94+
95+
# Verify they are cached separately
96+
assert "https://cluster.kusto.windows.net" in CloudSettings._cloud_cache
97+
assert "https://cluster.kusto.windows.net:443" in CloudSettings._cloud_cache
98+
99+
# Verify each URI gets the correct CloudInfo
100+
assert CloudSettings._cloud_cache["https://cluster.kusto.windows.net"] is cloud_info_default
101+
assert CloudSettings._cloud_cache["https://cluster.kusto.windows.net:443"] is cloud_info_with_port
102+
103+
# Additional verification with variations
104+
assert CloudSettings._cloud_cache[CloudSettings._normalize_uri("https://cluster.kusto.windows.net/database")] is cloud_info_default
105+
assert CloudSettings._cloud_cache[CloudSettings._normalize_uri("https://cluster.kusto.windows.net:443/database")] is cloud_info_with_port

0 commit comments

Comments
 (0)