astronomer
diff --git a/‎tests/apache/livy/hooks/__init__.py renamed to ‎astronomer/providers/microsoft/__init__.py b/‎tests/apache/livy/hooks/__init__.py renamed to ‎astronomer/providers/microsoft/__init__.py
diff --git a/‎tests/apache/livy/operators/__init__.py renamed to ‎astronomer/providers/microsoft/azure/__init__.py b/‎tests/apache/livy/operators/__init__.py renamed to ‎astronomer/providers/microsoft/azure/__init__.py
diff --git a/‎tests/apache/livy/triggers/__init__.py renamed to ‎astronomer/providers/microsoft/azure/example_dags/__init__.py b/‎tests/apache/livy/triggers/__init__.py renamed to ‎astronomer/providers/microsoft/azure/example_dags/__init__.py
diff --git a/‎astronomer/providers/microsoft/azure/example_dags/example_adf_run_pipeline.py
Lines changed: 203 additions & 0 deletions b/‎astronomer/providers/microsoft/azure/example_dags/example_adf_run_pipeline.py
Lines changed: 203 additions & 0 deletions
diff --git a/‎astronomer/providers/microsoft/azure/hooks/__init__.py b/‎astronomer/providers/microsoft/azure/hooks/__init__.py
diff --git a/‎astronomer/providers/microsoft/azure/hooks/data_factory.py
Lines changed: 94 additions & 0 deletions b/‎astronomer/providers/microsoft/azure/hooks/data_factory.py
Lines changed: 94 additions & 0 deletions
diff --git a/‎astronomer/providers/microsoft/azure/operators/__init__.py b/‎astronomer/providers/microsoft/azure/operators/__init__.py
diff --git a/‎astronomer/providers/microsoft/azure/operators/data_factory.py b/‎astronomer/providers/microsoft/azure/operators/data_factory.py
diff --git a/‎astronomer/providers/microsoft/azure/sensors/__init__.py b/‎astronomer/providers/microsoft/azure/sensors/__init__.py
diff --git a/‎astronomer/providers/microsoft/azure/sensors/data_factory.py
Lines changed: 56 additions & 0 deletions b/‎astronomer/providers/microsoft/azure/sensors/data_factory.py
Lines changed: 56 additions & 0 deletions
diff --git a/‎astronomer/providers/microsoft/azure/triggers/__init__.py b/‎astronomer/providers/microsoft/azure/triggers/__init__.py
@@ -0,0 +1,203 @@
+import logging
+import os
+import time
+from datetime import datetime, timedelta
+
+from airflow import DAG
+from airflow.operators.python import PythonOperator
+from airflow.providers.microsoft.azure.operators.data_factory import (
+    AzureDataFactoryRunPipelineOperator,
+)
+from azure.identity import ClientSecretCredential
+from azure.mgmt.datafactory import DataFactoryManagementClient
+from azure.mgmt.datafactory.models import (
+    AzureBlobDataset,
+    AzureStorageLinkedService,
+    BlobSink,
+    BlobSource,
+    CopyActivity,
+    DatasetReference,
+    DatasetResource,
+    Factory,
+    LinkedServiceReference,
+    LinkedServiceResource,
+    PipelineResource,
+    SecureString,
+)
+from azure.mgmt.resource import ResourceManagementClient
+
+from astronomer.providers.microsoft.azure.sensors.data_factory import (
+    AzureDataFactoryPipelineRunStatusSensorAsync,
+)
+
+default_args = {
+    "execution_timeout": timedelta(minutes=30),
+    "azure_data_factory_conn_id": "azure_data_factory_default",
+    "factory_name": "ADFProvidersTeamDataFactory",  # This can also be specified in the ADF connection.
+    "resource_group_name": "team_provider_resource_group_test",  # This can also be specified in the ADF connection.
+}
+
+CLIENT_ID = os.getenv("CLIENT_ID", "")
+CLIENT_SECRET = os.getenv("CLIENT_SECRET", "")
+TENANT_ID = os.getenv("TENANT_ID", "")
+SUBSCRIPTION_ID = os.getenv("SUBSCRIPTION_ID", "")
+RESOURCE_GROUP_NAME = os.getenv("RESOURCE_GROUP_NAME", "")
+DATAFACTORY_NAME = os.getenv("DATAFACTORY_NAME", "")
+LOCATION = os.getenv("LOCATION", "eastus")
+CONNECTION_STRING = os.getenv("CONNECTION_STRING", "")
+PIPELINE_NAME = os.getenv("PIPELINE_NAME", "pipeline1")
+ACTIVITY_NAME = os.getenv("ACTIVITY_NAME", "copyBlobtoBlob")
+DATASET_INPUT_NAME = os.getenv("DATASET_INPUT_NAME", "ds_in")
+DATASET_OUTPUT_NAME = os.getenv("DATASET_OUTPUT_NAME", "ds_out")
+BLOB_FILE_NAME = os.getenv("BLOB_FILE_NAME", "test.txt")
+OUTPUT_BLOB_PATH = os.getenv("OUTPUT_BLOB_PATH", "container1/output")
+BLOB_PATH = os.getenv("BLOB_PATH", "container1/input")
+STORAGE_LINKED_SERVICE_NAME = os.getenv("STORAGE_LINKED_SERVICE_NAME", "storageLinkedService001")
+rg_params = {"location": LOCATION}
+df_params = {"location": LOCATION}
+
+
+def create_adf_storage_pipeline() -> None:
+    """
+    Creates Azure resource if not present, Azure Data factory, Azure Storage linked service,
+    Azure blob dataset both input and output and Data factory pipeline
+    """
+    credentials = ClientSecretCredential(
+        client_id=CLIENT_ID, client_secret=CLIENT_SECRET, tenant_id=TENANT_ID
+    )
+    resource_client = ResourceManagementClient(credentials, SUBSCRIPTION_ID)
+    resource_group_exist = None
+    try:
+        resource_group_exist = resource_client.resource_groups.get(RESOURCE_GROUP_NAME)
+    except Exception:
+        logging.info("Resource group not found, so creating one")
+    if not resource_group_exist:
+        resource_client.resource_groups.create_or_update(RESOURCE_GROUP_NAME, rg_params)
+
+    # Create a data factory
+    adf_client = DataFactoryManagementClient(credentials, SUBSCRIPTION_ID)
+    df_resource = Factory(location=LOCATION)
+    df = adf_client.factories.create_or_update(RESOURCE_GROUP_NAME, DATAFACTORY_NAME, df_resource)
+    while df.provisioning_state != "Succeeded":
+        df = adf_client.factories.get(RESOURCE_GROUP_NAME, DATAFACTORY_NAME)
+        time.sleep(1)
+
+    # Create an Azure Storage linked service
+
+    # IMPORTANT: specify the name and key of your Azure Storage account.
+    storage_string = SecureString(value=CONNECTION_STRING)
+
+    ls_azure_storage = LinkedServiceResource(
+        properties=AzureStorageLinkedService(connection_string=storage_string)
+    )
+    adf_client.linked_services.create_or_update(
+        RESOURCE_GROUP_NAME, DATAFACTORY_NAME, STORAGE_LINKED_SERVICE_NAME, ls_azure_storage
+    )
+
+    # Create an Azure blob dataset (input)
+    ds_ls = LinkedServiceReference(reference_name=STORAGE_LINKED_SERVICE_NAME)
+    ds_azure_blob = DatasetResource(
+        properties=AzureBlobDataset(
+            linked_service_name=ds_ls, folder_path=BLOB_PATH, file_name=BLOB_FILE_NAME
+        )
+    )
+    adf_client.datasets.create_or_update(
+        RESOURCE_GROUP_NAME, DATAFACTORY_NAME, DATASET_INPUT_NAME, ds_azure_blob
+    )
+
+    # Create an Azure blob dataset (output)
+    ds_out_azure_blob = DatasetResource(
+        properties=AzureBlobDataset(linked_service_name=ds_ls, folder_path=OUTPUT_BLOB_PATH)
+    )
+    adf_client.datasets.create_or_update(
+        RESOURCE_GROUP_NAME, DATAFACTORY_NAME, DATASET_OUTPUT_NAME, ds_out_azure_blob
+    )
+
+    # Create a copy activity
+    blob_source = BlobSource()
+    blob_sink = BlobSink()
+    ds_in_ref = DatasetReference(reference_name=DATASET_INPUT_NAME)
+    ds_out_ref = DatasetReference(reference_name=DATASET_OUTPUT_NAME)
+    copy_activity = CopyActivity(
+        name=ACTIVITY_NAME, inputs=[ds_in_ref], outputs=[ds_out_ref], source=blob_source, sink=blob_sink
+    )
+
+    # Create a pipeline with the copy activity
+    p_obj = PipelineResource(activities=[copy_activity], parameters={})
+    adf_client.pipelines.create_or_update(RESOURCE_GROUP_NAME, DATAFACTORY_NAME, PIPELINE_NAME, p_obj)
+
+
+def delete_azure_data_factory_storage_pipeline() -> None:
+    """Delete data factory, storage linked service pipeline, dataset"""
+    credentials = ClientSecretCredential(
+        client_id=CLIENT_ID, client_secret=CLIENT_SECRET, tenant_id=TENANT_ID
+    )
+    # create resource client
+    resource_client = ResourceManagementClient(credentials, SUBSCRIPTION_ID)
+
+    # create Data factory client
+    adf_client = DataFactoryManagementClient(credentials, SUBSCRIPTION_ID)
+
+    # Delete pipeline
+    adf_client.pipelines.delete(RESOURCE_GROUP_NAME, DATAFACTORY_NAME, PIPELINE_NAME)
+
+    # Delete input dataset
+    adf_client.datasets.delete(RESOURCE_GROUP_NAME, DATAFACTORY_NAME, DATASET_INPUT_NAME)
+
+    # Delete output dataset
+    adf_client.datasets.delete(RESOURCE_GROUP_NAME, DATAFACTORY_NAME, DATASET_OUTPUT_NAME)
+
+    # Delete Linked services
+    adf_client.linked_services.delete(
+        RESOURCE_GROUP_NAME, DATAFACTORY_NAME, linked_service_name=STORAGE_LINKED_SERVICE_NAME
+    )
+
+    # Delete Data factory
+    adf_client.factories.delete(RESOURCE_GROUP_NAME, DATAFACTORY_NAME)
+
+    # Delete Resource Group
+    resource_client.resource_groups.begin_delete(RESOURCE_GROUP_NAME)
+
+
+with DAG(
+    dag_id="example_adf_run_pipeline",
+    start_date=datetime(2021, 8, 13),
+    schedule_interval=None,
+    catchup=False,
+    default_args=default_args,
+    tags=["example", "async", "Azure Pipeline"],
+) as dag:
+    # [START howto_create_resource_group]
+    create_azure_data_factory_storage_pipeline = PythonOperator(
+        task_id="create_azure_data_factory_storage_pipeline",
+        python_callable=create_adf_storage_pipeline,
+    )
+    # [END howto_create_resource_group]
+
+    # [START howto_operator_adf_run_pipeline]
+    run_pipeline = AzureDataFactoryRunPipelineOperator(
+        task_id="run_pipeline",
+        pipeline_name=PIPELINE_NAME,
+        wait_for_termination=False,
+    )
+    # [END howto_operator_adf_run_pipeline]
+
+    # [START howto_sensor_pipeline_run_sensor_async]
+    pipeline_run_sensor_async = AzureDataFactoryPipelineRunStatusSensorAsync(
+        task_id="pipeline_run_sensor_async",
+        run_id=run_pipeline.output["run_id"],
+    )
+    # [END howto_sensor_pipeline_run_sensor_async]
+
+    remove_azure_data_factory_storage_pipeline = PythonOperator(
+        task_id="remove_azure_data_factory_storage_pipeline",
+        python_callable=delete_azure_data_factory_storage_pipeline,
+        trigger_rule="all_done",
+    )
+
+    (
+        create_azure_data_factory_storage_pipeline
+        >> run_pipeline
+        >> pipeline_run_sensor_async
+        >> remove_azure_data_factory_storage_pipeline
+    )
@@ -0,0 +1,94 @@
+from typing import Any, Optional, Union
+
+from airflow import AirflowException
+from airflow.providers.microsoft.azure.hooks.data_factory import AzureDataFactoryHook
+from asgiref.sync import sync_to_async
+from azure.identity.aio import ClientSecretCredential, DefaultAzureCredential
+from azure.mgmt.datafactory.aio import DataFactoryManagementClient
+from azure.mgmt.datafactory.models import PipelineRun
+
+Credentials = Union[ClientSecretCredential, DefaultAzureCredential]
+
+
+class AzureDataFactoryHookAsync(AzureDataFactoryHook):
+    """
+    An Async Hook connects to Azure DataFactory to perform pipeline operations
+
+    :param azure_data_factory_conn_id: The :ref:`Azure Data Factory connection id<howto/connection:adf>`.
+    """
+
+    def __init__(self, azure_data_factory_conn_id: str):
+        self._async_conn: DataFactoryManagementClient = None
+        self.conn_id = azure_data_factory_conn_id
+        super().__init__(azure_data_factory_conn_id=azure_data_factory_conn_id)
+
+    async def get_async_conn(self) -> DataFactoryManagementClient:
+        """Get async connection and connect to azure data factory"""
+        if self._conn is not None:
+            return self._conn
+
+        conn = await sync_to_async(self.get_connection)(self.conn_id)
+        tenant = conn.extra_dejson.get("extra__azure_data_factory__tenantId")
+
+        try:
+            subscription_id = conn.extra_dejson["extra__azure_data_factory__subscriptionId"]
+        except KeyError:
+            raise ValueError("A Subscription ID is required to connect to Azure Data Factory.")
+
+        credential: Credentials
+        if conn.login is not None and conn.password is not None:
+            if not tenant:
+                raise ValueError("A Tenant ID is required when authenticating with Client ID and Secret.")
+
+            credential = ClientSecretCredential(
+                client_id=conn.login, client_secret=conn.password, tenant_id=tenant
+            )
+        else:
+            credential = DefaultAzureCredential()
+
+        return DataFactoryManagementClient(
+            credential=credential,
+            subscription_id=subscription_id,
+        )
+
+    async def get_pipeline_run(
+        self,
+        run_id: str,
+        resource_group_name: Optional[str] = None,
+        factory_name: Optional[str] = None,
+        **config: Any,
+    ) -> PipelineRun:
+        """
+        Connects to Azure Data Factory asynchronously to get the pipeline run details by run id
+
+        :param run_id: The pipeline run identifier.
+        :param resource_group_name: The resource group name.
+        :param factory_name: The factory name.
+        """
+        async with await self.get_async_conn() as client:
+            try:
+                pipeline_run = await client.pipeline_runs.get(resource_group_name, factory_name, run_id)
+                return pipeline_run
+            except Exception as e:
+                raise AirflowException(e)
+
+    async def get_adf_pipeline_run_status(
+        self, run_id: str, resource_group_name: Optional[str] = None, factory_name: Optional[str] = None
+    ) -> str:
+        """
+        Connects to Azure Data Factory asynchronously and gets the pipeline status by run_id
+
+        :param run_id: The pipeline run identifier.
+        :param resource_group_name: The resource group name.
+        :param factory_name: The factory name.
+        """
+        try:
+            pipeline_run = await self.get_pipeline_run(
+                run_id=run_id,
+                factory_name=factory_name,
+                resource_group_name=resource_group_name,
+            )
+            status: str = pipeline_run.status
+            return status
+        except Exception as e:
+            raise AirflowException(e)
@@ -0,0 +1,56 @@
+from typing import Any, Dict
+
+from airflow import AirflowException
+from airflow.providers.microsoft.azure.sensors.data_factory import (
+    AzureDataFactoryPipelineRunStatusSensor,
+)
+
+from astronomer.providers.microsoft.azure.triggers.data_factory import (
+    ADFPipelineRunStatusSensorTrigger,
+)
+
+
+class AzureDataFactoryPipelineRunStatusSensorAsync(AzureDataFactoryPipelineRunStatusSensor):
+    """
+    Checks the status of a pipeline run.
+
+    :param azure_data_factory_conn_id: The connection identifier for connecting to Azure Data Factory.
+    :param run_id: The pipeline run identifier.
+    :param resource_group_name: The resource group name.
+    :param factory_name: The data factory name.
+    """
+
+    def __init__(
+        self,
+        *,
+        poll_interval: float = 5,
+        **kwargs: Any,
+    ):
+        self.poll_interval = poll_interval
+        super().__init__(**kwargs)
+
+    def execute(self, context: Dict[Any, Any]) -> None:
+        """Defers trigger class to poll for state of the job run until it reaches a failure state or success state"""
+        self.defer(
+            timeout=self.execution_timeout,
+            trigger=ADFPipelineRunStatusSensorTrigger(
+                run_id=self.run_id,
+                azure_data_factory_conn_id=self.azure_data_factory_conn_id,
+                resource_group_name=self.resource_group_name,
+                factory_name=self.factory_name,
+                poll_interval=self.poll_interval,
+            ),
+            method_name="execute_complete",
+        )
+
+    def execute_complete(self, context: Dict[Any, Any], event: Dict[str, str]) -> None:
+        """
+        Callback for when the trigger fires - returns immediately.
+        Relies on trigger to throw an exception, otherwise it assumes execution was
+        successful.
+        """
+        if event:
+            if event["status"] == "error":
+                raise AirflowException(event["message"])
+            self.log.info(event["message"])
+        return None