|
| 1 | +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. |
| 2 | + |
| 3 | +from unittest import mock |
| 4 | + |
| 5 | +from dagster import build_op_context |
| 6 | +from google.cloud.storage import Blob |
| 7 | +from orchestrator.assets import registry_entry |
| 8 | +from orchestrator.jobs.registry import add_new_metadata_partitions_op, remove_stale_metadata_partitions_op |
| 9 | + |
| 10 | + |
| 11 | +def test_basic_partition(): |
| 12 | + context = build_op_context() |
| 13 | + partition_key = "test_partition_key" |
| 14 | + |
| 15 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 16 | + assert len(existing_partitions) == 0 |
| 17 | + context.instance.add_dynamic_partitions(partition_key, ["partition_1", "partition_2"]) |
| 18 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 19 | + assert len(existing_partitions) == 2 |
| 20 | + |
| 21 | + |
| 22 | +def test_metadata_partition_remove(): |
| 23 | + mock_fresh_blob_1 = mock.create_autospec(Blob, instance=True) |
| 24 | + mock_fresh_blob_1.etag = "fresh_etag_1" |
| 25 | + mock_fresh_blob_1.name = "fresh_metadata" |
| 26 | + |
| 27 | + mock_fresh_blob_2 = mock.create_autospec(Blob, instance=True) |
| 28 | + mock_fresh_blob_2.etag = "fresh_etag_2" |
| 29 | + mock_fresh_blob_2.name = "fresh_metadata" |
| 30 | + |
| 31 | + mock_stale_blob = mock.create_autospec(Blob, instance=True) |
| 32 | + mock_stale_blob.etag = "stale_etag" |
| 33 | + mock_stale_blob.name = "stale_metadata" |
| 34 | + |
| 35 | + mock_metadata_file_blobs = [mock_fresh_blob_1, mock_fresh_blob_2] |
| 36 | + |
| 37 | + resources = {"all_metadata_file_blobs": mock_metadata_file_blobs} |
| 38 | + |
| 39 | + context = build_op_context(resources=resources) |
| 40 | + |
| 41 | + partition_key = registry_entry.metadata_partitions_def.name |
| 42 | + |
| 43 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 44 | + assert len(existing_partitions) == 0 |
| 45 | + |
| 46 | + context.instance.add_dynamic_partitions(partition_key, [mock_fresh_blob_1.etag, mock_stale_blob.etag]) |
| 47 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 48 | + assert len(existing_partitions) == 2 |
| 49 | + |
| 50 | + remove_stale_metadata_partitions_op(context) |
| 51 | + |
| 52 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 53 | + assert len(existing_partitions) == 1 |
| 54 | + assert mock_stale_blob.etag not in existing_partitions |
| 55 | + |
| 56 | + |
| 57 | +def test_metadata_partition_add(): |
| 58 | + mock_fresh_blob_1 = mock.create_autospec(Blob, instance=True) |
| 59 | + mock_fresh_blob_1.etag = "fresh_etag_1" |
| 60 | + mock_fresh_blob_1.name = "fresh_metadata" |
| 61 | + |
| 62 | + mock_fresh_blob_2 = mock.create_autospec(Blob, instance=True) |
| 63 | + mock_fresh_blob_2.etag = "fresh_etag_2" |
| 64 | + mock_fresh_blob_2.name = "fresh_metadata" |
| 65 | + |
| 66 | + mock_existing_blob = mock.create_autospec(Blob, instance=True) |
| 67 | + mock_existing_blob.etag = "existing_etag" |
| 68 | + mock_existing_blob.name = "existing_metadata" |
| 69 | + |
| 70 | + mock_stale_blob = mock.create_autospec(Blob, instance=True) |
| 71 | + mock_stale_blob.etag = "stale_etag" |
| 72 | + mock_stale_blob.name = "stale_metadata" |
| 73 | + |
| 74 | + mock_metadata_file_blobs = [mock_fresh_blob_1, mock_fresh_blob_2] |
| 75 | + |
| 76 | + mock_slack = mock.MagicMock() |
| 77 | + mock_slack.get_client = mock.MagicMock() |
| 78 | + chat_postMessage = mock.MagicMock() |
| 79 | + mock_slack.get_client.return_value = chat_postMessage |
| 80 | + |
| 81 | + resources = {"slack": mock_slack, "all_metadata_file_blobs": mock_metadata_file_blobs} |
| 82 | + |
| 83 | + context = build_op_context(resources=resources) |
| 84 | + |
| 85 | + partition_key = registry_entry.metadata_partitions_def.name |
| 86 | + |
| 87 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 88 | + assert len(existing_partitions) == 0 |
| 89 | + |
| 90 | + context.instance.add_dynamic_partitions(partition_key, [mock_stale_blob.etag, mock_existing_blob.etag]) |
| 91 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 92 | + assert len(existing_partitions) == 2 |
| 93 | + |
| 94 | + add_new_metadata_partitions_op(context) |
| 95 | + |
| 96 | + existing_partitions = context.instance.get_dynamic_partitions(partition_key) |
| 97 | + expected_partitions = [mock_fresh_blob_1.etag, mock_fresh_blob_2.etag, mock_existing_blob.etag, mock_stale_blob.etag] |
| 98 | + |
| 99 | + # assert all expected partitions are in the existing partitions, and no other partitions are present, order does not matter |
| 100 | + assert all([etag in existing_partitions for etag in expected_partitions]) |
0 commit comments