Skip to content

Commit a3c580a

Browse files
committed
Fix updating account url for WasbHook
There are different ways users supply the hostname(account url) in azure, sometimes the host doesn't have a urlparse.scheme but has urlparse.path e.g name.blob.windows.net and other times, it will just be Azure ID e.g aldhjf9dads. While working on apache#32980, I assumed that if there's no scheme, then the hostname is not valid, that's incorrect since DNS can serve as the host. The fix was to check if we don't have netloc and that urlparse.path does not include a dot and if it does not, use the login/account_name to construct the account_url
1 parent e90febc commit a3c580a

File tree

2 files changed

+42
-10
lines changed
  • airflow/providers/microsoft/azure/hooks
  • tests/providers/microsoft/azure/hooks

2 files changed

+42
-10
lines changed

airflow/providers/microsoft/azure/hooks/wasb.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import logging
2929
import os
3030
from typing import Any, Union
31+
from urllib.parse import urlparse
3132

3233
from asgiref.sync import sync_to_async
3334
from azure.core.exceptions import HttpResponseError, ResourceExistsError, ResourceNotFoundError
@@ -152,11 +153,13 @@ def get_conn(self) -> BlobServiceClient:
152153
# connection_string auth takes priority
153154
return BlobServiceClient.from_connection_string(connection_string, **extra)
154155

155-
account_url = (
156-
conn.host
157-
if conn.host and conn.host.startswith("https://")
158-
else f"https://{conn.login}.blob.core.windows.net/"
159-
)
156+
account_url = conn.host if conn.host else f"https://{conn.login}.blob.core.windows.net/"
157+
parsed_url = urlparse(account_url)
158+
159+
if not parsed_url.netloc and "." not in parsed_url.path:
160+
# if there's no netloc and no dots in the path, then user only
161+
# provided the host ID, not the full URL or DNS name
162+
account_url = f"https://{conn.login}.blob.core.windows.net/"
160163

161164
tenant = self._get_field(extra, "tenant_id")
162165
if tenant:
@@ -555,11 +558,13 @@ async def get_async_conn(self) -> AsyncBlobServiceClient:
555558
)
556559
return self.blob_service_client
557560

558-
account_url = (
559-
conn.host
560-
if conn.host and conn.host.startswith("https://")
561-
else f"https://{conn.login}.blob.core.windows.net/"
562-
)
561+
account_url = conn.host if conn.host else f"https://{conn.login}.blob.core.windows.net/"
562+
parsed_url = urlparse(account_url)
563+
564+
if not parsed_url.netloc and "." not in parsed_url.path:
565+
# if there's no netloc and no dots in the path, then user only
566+
# provided the host ID, not the full URL or DNS name
567+
account_url = f"https://{conn.login}.blob.core.windows.net/"
563568

564569
tenant = self._get_field(extra, "tenant_id")
565570
if tenant:

tests/providers/microsoft/azure/hooks/test_wasb.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,33 @@ def test_extra_client_secret_auth_config_ad_connection(self, mock_get_conn):
359359
conn = hook.get_conn()
360360
assert conn.credential._authority == self.authority
361361

362+
@pytest.mark.parametrize(
363+
"provided_host, expected_host",
364+
[
365+
(
366+
"https://testaccountname.blob.core.windows.net",
367+
"https://testaccountname.blob.core.windows.net",
368+
),
369+
("testhost", "https://accountlogin.blob.core.windows.net/"),
370+
("testhost.dns", "testhost.dns"),
371+
("testhost.blob.net", "testhost.blob.net"),
372+
],
373+
)
374+
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
375+
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
376+
def test_proper_account_url_update(
377+
self, mock_get_conn, mock_blob_service_client, provided_host, expected_host
378+
):
379+
mock_get_conn.return_value = Connection(
380+
conn_id="test_conn",
381+
conn_type=self.connection_type,
382+
password="testpass",
383+
login="accountlogin",
384+
host=provided_host,
385+
)
386+
WasbHook(wasb_conn_id=self.shared_key_conn_id)
387+
mock_blob_service_client.assert_called_once_with(account_url=expected_host, credential="testpass")
388+
362389
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.BlobServiceClient")
363390
@mock.patch("airflow.providers.microsoft.azure.hooks.wasb.WasbHook.get_connection")
364391
def test_check_for_blob(self, mock_get_conn, mock_service):

0 commit comments

Comments
 (0)