Skip to content

fix: #1572 - make where_clause_suffix an actual suffix #1573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import logging
from typing import ( # noqa: F401
Any, Dict, Iterator, Union,
)
Expand All @@ -9,11 +10,14 @@

from databuilder.extractor.base_postgres_metadata_extractor import BasePostgresMetadataExtractor

LOGGER = logging.getLogger(__name__)


class RedshiftMetadataExtractor(BasePostgresMetadataExtractor):
"""
Extracts Redshift table and column metadata from underlying meta store database using SQLAlchemyExtractor


This differs from the PostgresMetadataExtractor because in order to support Redshift's late binding views,
we need to join the INFORMATION_SCHEMA data against the function PG_GET_LATE_BINDING_VIEW_COLS().
"""
Expand All @@ -24,6 +28,15 @@ def get_sql_statement(self, use_catalog_as_cluster_name: bool, where_clause_suff
else:
cluster_source = f"'{self._cluster}'"

if where_clause_suffix:
if where_clause_suffix.lower().startswith("where"):
LOGGER.warning("you no longer need to begin with 'where' in your suffix")
where_clause = where_clause_suffix
else:
where_clause = f"where {where_clause_suffix}"
else:
where_clause = ""

return """
SELECT
*
Expand Down Expand Up @@ -74,11 +87,11 @@ def get_sql_statement(self, use_catalog_as_cluster_name: bool, where_clause_suff
FROM svv_external_columns
)

{where_clause_suffix}
{where_clause}
ORDER by cluster, schema, name, col_sort_order ;
""".format(
cluster_source=cluster_source,
where_clause_suffix=where_clause_suffix,
where_clause=where_clause,
)

def get_scope(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ def _union(self,


class TestRedshiftMetadataExtractorWithWhereClause(unittest.TestCase):
def setUp(self) -> None:
logging.basicConfig(level=logging.INFO)
self.where_clause_suffix = """
table_schema in ('public') and table_name = 'movies'
"""

config_dict = {
RedshiftMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY: self.where_clause_suffix,
f'extractor.sqlalchemy.{SQLAlchemyExtractor.CONN_STRING}':
'TEST_CONNECTION'
}
self.conf = ConfigFactory.from_dict(config_dict)

def test_sql_statement(self) -> None:
"""
Test extraction sql properly includes where suffix
"""
with patch.object(SQLAlchemyExtractor, '_get_connection'):
extractor = RedshiftMetadataExtractor()
extractor.init(self.conf)
expected_where_clause = f'where {self.where_clause_suffix}'

self.assertTrue(expected_where_clause in extractor.sql_stmt)


class TestRedshiftMetadataExtractorWithLegacyWhereClause(unittest.TestCase):
def setUp(self) -> None:
logging.basicConfig(level=logging.INFO)
self.where_clause_suffix = """
Expand All @@ -119,11 +145,12 @@ def setUp(self) -> None:

def test_sql_statement(self) -> None:
"""
Test Extraction with empty result from query
Test extraction sql properly includes where suffix using legacy specification
"""
with patch.object(SQLAlchemyExtractor, '_get_connection'):
extractor = RedshiftMetadataExtractor()
extractor.init(self.conf)

self.assertTrue(self.where_clause_suffix in extractor.sql_stmt)


Expand Down