Skip to content

Commit acf2921

Browse files
committed
Fix Google Provider Link Deprecations
The Dataproc Links have been deprecated, but they were deprecated wrongly - the Deprecation Warnings were always raised when the dataproc module has been deprecated, but only few classes there were deprecated. This was because the warnings were added as class-level warnings rather than as warnings in constructors. Also the deprecated classes have still been used in operators, raising the warnings even if the deprecation warnings have been moved from class to it's constructor. This PR fixes that: * moves deprecation warnings from classes to constructors * replaces usage of deprecated links with those links that replaced the deprecated ones Found during implementing of apache#33640
1 parent 911cf46 commit acf2921

File tree

4 files changed

+49
-43
lines changed

4 files changed

+49
-43
lines changed

airflow/providers/google/cloud/links/dataproc.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,14 @@ class DataprocLink(BaseOperatorLink):
7676
This link is deprecated.
7777
"""
7878

79-
warnings.warn(
80-
"This DataprocLink is deprecated.",
81-
AirflowProviderDeprecationWarning,
82-
)
79+
def __init__(self, *args, **kwargs):
80+
raise Exception()
81+
warnings.warn(
82+
"DataprocLink is deprecated. Please use Dataproc*Link classes",
83+
AirflowProviderDeprecationWarning,
84+
)
85+
super().__init__(*args, **kwargs)
86+
8387
name = "Dataproc resource"
8488
key = "conf"
8589

@@ -125,7 +129,13 @@ class DataprocListLink(BaseOperatorLink):
125129
This link is deprecated.
126130
"""
127131

128-
warnings.warn("This DataprocListLink is deprecated.", AirflowProviderDeprecationWarning)
132+
def __init__(self, *args, **kwargs):
133+
warnings.warn(
134+
"DataprocListLink is deprecated. Please use Dataproc*ListLink classes",
135+
AirflowProviderDeprecationWarning,
136+
)
137+
super().__init__(*args, **kwargs)
138+
129139
name = "Dataproc resources"
130140
key = "list_conf"
131141

airflow/providers/google/cloud/operators/dataproc.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,10 @@
4343
from airflow.providers.google.cloud.hooks.gcs import GCSHook
4444
from airflow.providers.google.cloud.links.dataproc import (
4545
DATAPROC_BATCH_LINK,
46-
DATAPROC_CLUSTER_LINK_DEPRECATED,
47-
DATAPROC_JOB_LINK_DEPRECATED,
4846
DataprocBatchesListLink,
4947
DataprocBatchLink,
5048
DataprocClusterLink,
5149
DataprocJobLink,
52-
DataprocLink,
5350
DataprocWorkflowLink,
5451
DataprocWorkflowTemplateLink,
5552
)
@@ -742,7 +739,7 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
742739

743740
template_fields: Sequence[str] = ("cluster_name", "project_id", "region", "impersonation_chain")
744741

745-
operator_extra_links = (DataprocLink(),)
742+
operator_extra_links = (DataprocClusterLink(),)
746743

747744
def __init__(
748745
self,
@@ -821,12 +818,15 @@ def execute(self, context: Context) -> None:
821818
update_mask = ["config.worker_config.num_instances", "config.secondary_worker_config.num_instances"]
822819

823820
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
821+
# Hook always has a project_id as fallback so we can ignore assignment
822+
project_id: str = self.project_id if self.project_id else hook.project_id # type: ignore[assignment]
824823
# Save data required to display extra link no matter what the cluster status will be
825-
DataprocLink.persist(
824+
DataprocClusterLink.persist(
826825
context=context,
827-
task_instance=self,
828-
url=DATAPROC_CLUSTER_LINK_DEPRECATED,
829-
resource=self.cluster_name,
826+
operator=self,
827+
cluster_id=self.cluster_name,
828+
project_id=project_id,
829+
region=self.region,
830830
)
831831
operation = hook.update_cluster(
832832
project_id=self.project_id,
@@ -1000,7 +1000,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
10001000

10011001
job_type = ""
10021002

1003-
operator_extra_links = (DataprocLink(),)
1003+
operator_extra_links = (DataprocJobLink(),)
10041004

10051005
def __init__(
10061006
self,
@@ -1034,7 +1034,8 @@ def __init__(
10341034
self.job_error_states = job_error_states if job_error_states is not None else {"ERROR"}
10351035
self.impersonation_chain = impersonation_chain
10361036
self.hook = DataprocHook(gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain)
1037-
self.project_id = self.hook.project_id if project_id is None else project_id
1037+
# Hook project id is used as fallback so we can ignore assignment
1038+
self.project_id: str = project_id if project_id else self.hook.project_id # type: ignore[assignment]
10381039
self.job_template: DataProcJobBuilder | None = None
10391040
self.job: dict | None = None
10401041
self.dataproc_job_id = None
@@ -1081,8 +1082,8 @@ def execute(self, context: Context):
10811082
job_id = job_object.reference.job_id
10821083
self.log.info("Job %s submitted successfully.", job_id)
10831084
# Save data required for extra links no matter what the job status will be
1084-
DataprocLink.persist(
1085-
context=context, task_instance=self, url=DATAPROC_JOB_LINK_DEPRECATED, resource=job_id
1085+
DataprocJobLink.persist(
1086+
context=context, operator=self, job_id=job_id, project_id=self.project_id, region=self.region
10861087
)
10871088

10881089
if self.deferrable:
@@ -1184,7 +1185,7 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
11841185
ui_color = "#0273d4"
11851186
job_type = "pig_job"
11861187

1187-
operator_extra_links = (DataprocLink(),)
1188+
operator_extra_links = (DataprocJobLink(),)
11881189

11891190
def __init__(
11901191
self,

airflow/providers/google/provider.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,8 +1089,6 @@ extra-links:
10891089
- airflow.providers.google.cloud.links.datacatalog.DataCatalogEntryGroupLink
10901090
- airflow.providers.google.cloud.links.datacatalog.DataCatalogEntryLink
10911091
- airflow.providers.google.cloud.links.datacatalog.DataCatalogTagTemplateLink
1092-
- airflow.providers.google.cloud.links.dataproc.DataprocLink
1093-
- airflow.providers.google.cloud.links.dataproc.DataprocListLink
10941092
- airflow.providers.google.cloud.links.dataproc.DataprocClusterLink
10951093
- airflow.providers.google.cloud.links.dataproc.DataprocJobLink
10961094
- airflow.providers.google.cloud.links.dataproc.DataprocWorkflowLink

tests/providers/google/cloud/operators/test_dataproc.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
)
3434
from airflow.models import DAG, DagBag
3535
from airflow.providers.google.cloud.links.dataproc import (
36-
DATAPROC_CLUSTER_LINK_DEPRECATED,
37-
DATAPROC_JOB_LINK_DEPRECATED,
3836
DataprocClusterLink,
3937
DataprocJobLink,
4038
DataprocWorkflowLink,
@@ -49,7 +47,6 @@
4947
DataprocGetBatchOperator,
5048
DataprocInstantiateInlineWorkflowTemplateOperator,
5149
DataprocInstantiateWorkflowTemplateOperator,
52-
DataprocLink,
5350
DataprocListBatchesOperator,
5451
DataprocScaleClusterOperator,
5552
DataprocSubmitHadoopJobOperator,
@@ -242,21 +239,19 @@
242239
f"project={GCP_PROJECT}"
243240
)
244241
DATAPROC_JOB_CONF_EXPECTED = {
245-
"resource": TEST_JOB_ID,
242+
"job_id": TEST_JOB_ID,
246243
"region": GCP_REGION,
247244
"project_id": GCP_PROJECT,
248-
"url": DATAPROC_JOB_LINK_DEPRECATED,
249245
}
250246
DATAPROC_JOB_EXPECTED = {
251247
"job_id": TEST_JOB_ID,
252248
"region": GCP_REGION,
253249
"project_id": GCP_PROJECT,
254250
}
255251
DATAPROC_CLUSTER_CONF_EXPECTED = {
256-
"resource": CLUSTER_NAME,
252+
"cluster_id": CLUSTER_NAME,
257253
"region": GCP_REGION,
258254
"project_id": GCP_PROJECT,
259-
"url": DATAPROC_CLUSTER_LINK_DEPRECATED,
260255
}
261256
DATAPROC_CLUSTER_EXPECTED = {
262257
"cluster_id": CLUSTER_NAME,
@@ -781,7 +776,9 @@ class TestDataprocClusterScaleOperator(DataprocClusterTestBase):
781776
def setup_class(cls):
782777
super().setup_class()
783778
cls.extra_links_expected_calls_base = [
784-
call.ti.xcom_push(execution_date=None, key="conf", value=DATAPROC_CLUSTER_CONF_EXPECTED)
779+
call.ti.xcom_push(
780+
execution_date=None, key="dataproc_cluster", value=DATAPROC_CLUSTER_CONF_EXPECTED
781+
)
785782
]
786783

787784
def test_deprecation_warning(self):
@@ -827,7 +824,7 @@ def test_execute(self, mock_hook):
827824
self.extra_links_manager_mock.assert_has_calls(expected_calls, any_order=False)
828825

829826
self.mock_ti.xcom_push.assert_called_once_with(
830-
key="conf",
827+
key="dataproc_cluster",
831828
value=DATAPROC_CLUSTER_CONF_EXPECTED,
832829
execution_date=None,
833830
)
@@ -855,28 +852,28 @@ def test_scale_cluster_operator_extra_links(dag_maker, create_task_instance_of_o
855852

856853
# Assert operator links for serialized DAG
857854
assert serialized_dag["dag"]["tasks"][0]["_operator_extra_links"] == [
858-
{"airflow.providers.google.cloud.links.dataproc.DataprocLink": {}}
855+
{"airflow.providers.google.cloud.links.dataproc.DataprocClusterLink": {}}
859856
]
860857

861858
# Assert operator link types are preserved during deserialization
862-
assert isinstance(deserialized_task.operator_extra_links[0], DataprocLink)
859+
assert isinstance(deserialized_task.operator_extra_links[0], DataprocClusterLink)
863860

864861
# Assert operator link is empty when no XCom push occurred
865-
assert ti.task.get_extra_links(ti, DataprocLink.name) == ""
862+
assert ti.task.get_extra_links(ti, DataprocClusterLink.name) == ""
866863

867864
# Assert operator link is empty for deserialized task when no XCom push occurred
868-
assert deserialized_task.get_extra_links(ti, DataprocLink.name) == ""
865+
assert deserialized_task.get_extra_links(ti, DataprocClusterLink.name) == ""
869866

870867
ti.xcom_push(
871868
key="conf",
872869
value=DATAPROC_CLUSTER_CONF_EXPECTED,
873870
)
874871

875872
# Assert operator links are preserved in deserialized tasks after execution
876-
assert deserialized_task.get_extra_links(ti, DataprocLink.name) == DATAPROC_CLUSTER_LINK_EXPECTED
873+
assert deserialized_task.get_extra_links(ti, DataprocClusterLink.name) == DATAPROC_CLUSTER_LINK_EXPECTED
877874

878875
# Assert operator links after execution
879-
assert ti.task.get_extra_links(ti, DataprocLink.name) == DATAPROC_CLUSTER_LINK_EXPECTED
876+
assert ti.task.get_extra_links(ti, DataprocClusterLink.name) == DATAPROC_CLUSTER_LINK_EXPECTED
880877

881878

882879
class TestDataprocClusterDeleteOperator:
@@ -1817,7 +1814,7 @@ class TestDataProcSparkOperator(DataprocJobTestBase):
18171814
@classmethod
18181815
def setup_class(cls):
18191816
cls.extra_links_expected_calls = [
1820-
call.ti.xcom_push(execution_date=None, key="conf", value=DATAPROC_JOB_CONF_EXPECTED),
1817+
call.ti.xcom_push(execution_date=None, key="dataproc_job", value=DATAPROC_JOB_CONF_EXPECTED),
18211818
call.hook().wait_for_job(job_id=TEST_JOB_ID, region=GCP_REGION, project_id=GCP_PROJECT),
18221819
]
18231820

@@ -1864,7 +1861,7 @@ def test_execute(self, mock_hook, mock_uuid):
18641861

18651862
op.execute(context=self.mock_context)
18661863
self.mock_ti.xcom_push.assert_called_once_with(
1867-
key="conf", value=DATAPROC_JOB_CONF_EXPECTED, execution_date=None
1864+
key="dataproc_job", value=DATAPROC_JOB_CONF_EXPECTED, execution_date=None
18681865
)
18691866

18701867
# Test whether xcom push occurs before polling for job
@@ -1893,25 +1890,25 @@ def test_submit_spark_job_operator_extra_links(mock_hook, dag_maker, create_task
18931890

18941891
# Assert operator links for serialized DAG
18951892
assert serialized_dag["dag"]["tasks"][0]["_operator_extra_links"] == [
1896-
{"airflow.providers.google.cloud.links.dataproc.DataprocLink": {}}
1893+
{"airflow.providers.google.cloud.links.dataproc.DataprocJobLink": {}}
18971894
]
18981895

18991896
# Assert operator link types are preserved during deserialization
1900-
assert isinstance(deserialized_task.operator_extra_links[0], DataprocLink)
1897+
assert isinstance(deserialized_task.operator_extra_links[0], DataprocJobLink)
19011898

19021899
# Assert operator link is empty when no XCom push occurred
1903-
assert ti.task.get_extra_links(ti, DataprocLink.name) == ""
1900+
assert ti.task.get_extra_links(ti, DataprocJobLink.name) == ""
19041901

19051902
# Assert operator link is empty for deserialized task when no XCom push occurred
1906-
assert deserialized_task.get_extra_links(ti, DataprocLink.name) == ""
1903+
assert deserialized_task.get_extra_links(ti, DataprocJobLink.name) == ""
19071904

19081905
ti.xcom_push(key="conf", value=DATAPROC_JOB_CONF_EXPECTED)
19091906

19101907
# Assert operator links after task execution
1911-
assert ti.task.get_extra_links(ti, DataprocLink.name) == DATAPROC_JOB_LINK_EXPECTED
1908+
assert ti.task.get_extra_links(ti, DataprocJobLink.name) == DATAPROC_JOB_LINK_EXPECTED
19121909

19131910
# Assert operator links are preserved in deserialized tasks
1914-
link = deserialized_task.get_extra_links(ti, DataprocLink.name)
1911+
link = deserialized_task.get_extra_links(ti, DataprocJobLink.name)
19151912
assert link == DATAPROC_JOB_LINK_EXPECTED
19161913

19171914

0 commit comments

Comments
 (0)