Skip to content

feat: Neo4j 4.x support #1942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions databuilder/databuilder/extractor/neo4j_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@ class Neo4jExtractor(Extractor):
MODEL_CLASS_CONFIG_KEY = 'model_class'
NEO4J_AUTH_USER = 'neo4j_auth_user'
NEO4J_AUTH_PW = 'neo4j_auth_pw'
NEO4J_DATABASE_NAME = 'neo4j_database'
NEO4J_MAX_CONN_LIFE_TIME_SEC = 'neo4j_max_conn_life_time_sec'
NEO4J_ENCRYPTED = 'neo4j_encrypted'
"""NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting."""
NEO4J_VALIDATE_SSL = 'neo4j_validate_ssl'
"""NEO4J_VALIDATE_SSL is a boolean indicating whether to validate the server's SSL/TLS cert against system CAs."""

DEFAULT_CONFIG = ConfigFactory.from_dict({NEO4J_MAX_CONN_LIFE_TIME_SEC: 50,
NEO4J_ENCRYPTED: True,
NEO4J_VALIDATE_SSL: False})
DEFAULT_CONFIG = ConfigFactory.from_dict({
NEO4J_MAX_CONN_LIFE_TIME_SEC: 50,
NEO4J_DATABASE_NAME: neo4j.DEFAULT_DATABASE
})

def init(self, conf: ConfigTree) -> None:
"""
Expand Down Expand Up @@ -65,16 +63,13 @@ def close(self) -> None:
def _get_driver(self) -> Any:
"""
Create a Neo4j connection to Database
https://github.com/neo4j/neo4j-python-driver#connection-settings-breaking-change
"""
trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if self.conf.get_bool(Neo4jExtractor.NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
return GraphDatabase.driver(uri=self.graph_url,
max_connection_lifetime=self.conf.get_int(
Neo4jExtractor.NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_USER),
self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_PW)),
encrypted=self.conf.get_bool(Neo4jExtractor.NEO4J_ENCRYPTED),
trust=trust)
self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_PW)))

def _execute_query(self, tx: Any) -> Any:
"""
Expand All @@ -88,7 +83,9 @@ def _get_extract_iter(self) -> Iterator[Any]:
"""
Execute {cypher_query} and yield result one at a time
"""
with self.driver.session() as session:
with self.driver.session(
database=self.conf.get(Neo4jExtractor.NEO4J_DATABASE_NAME)
) as session:
if not hasattr(self, 'results'):
self.results = session.read_transaction(self._execute_query)

Expand Down
22 changes: 8 additions & 14 deletions databuilder/databuilder/publisher/neo4j_csv_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,7 @@

NEO4J_USER = 'neo4j_user'
NEO4J_PASSWORD = 'neo4j_password'
# NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting
NEO4J_ENCRYPTED = 'neo4j_encrypted'
# NEO4J_VALIDATE_SSL is a boolean indicating whether to validate the server's SSL/TLS
# cert against system CAs
NEO4J_VALIDATE_SSL = 'neo4j_validate_ssl'
NEO4J_DATABASE_NAME = 'neo4j_database'


# This will be used to provide unique tag to the node and relationship
Expand Down Expand Up @@ -109,8 +105,7 @@
NEO4J_PROGRESS_REPORT_FREQUENCY: 500,
NEO4J_RELATIONSHIP_CREATION_CONFIRM: False,
NEO4J_MAX_CONN_LIFE_TIME_SEC: 50,
NEO4J_ENCRYPTED: True,
NEO4J_VALIDATE_SSL: False,
NEO4J_DATABASE_NAME: neo4j.DEFAULT_DATABASE,
ADDITIONAL_FIELDS: {},
ADD_PUBLISHER_METADATA: True,
RELATION_PREPROCESSOR: NoopRelationPreprocessor()})
Expand Down Expand Up @@ -148,16 +143,15 @@ def init(self, conf: ConfigTree) -> None:
self._relation_files = self._list_files(conf, RELATION_FILES_DIR)
self._relation_files_iter = iter(self._relation_files)

trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
# https://github.com/neo4j/neo4j-python-driver#connection-settings-breaking-change
self._driver = \
GraphDatabase.driver(uri=conf.get_string(NEO4J_END_POINT_KEY),
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)))
self._db_name = conf.get_string(NEO4J_DATABASE_NAME)
self._session = self._driver.session(database=self._db_name)

self._transaction_size = conf.get_int(NEO4J_TRANSACTION_SIZE)
self._session = self._driver.session()
self._confirm_rel_created = conf.get_bool(NEO4J_RELATIONSHIP_CREATION_CONFIRM)

# config is list of node label.
Expand Down Expand Up @@ -488,7 +482,7 @@ def _try_create_index(self, label: str) -> None:
""").render(LABEL=label)

LOGGER.info(f'Trying to create index for label {label} if not exist: {stmt}')
with self._driver.session() as session:
with self._driver.session(self._db_name) as session:
try:
session.run(stmt)
except Neo4jError as e:
Expand Down
19 changes: 6 additions & 13 deletions databuilder/databuilder/task/neo4j_staleness_removal_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@
NEO4J_MAX_CONN_LIFE_TIME_SEC = 'neo4j_max_conn_life_time_sec'
NEO4J_USER = 'neo4j_user'
NEO4J_PASSWORD = 'neo4j_password'
NEO4J_ENCRYPTED = 'neo4j_encrypted'
"""NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting."""
NEO4J_VALIDATE_SSL = 'neo4j_validate_ssl'
"""NEO4J_VALIDATE_SSL is a boolean indicating whether to validate the server's SSL/TLS cert against system CAs."""

NEO4J_DATABASE_NAME = 'neo4j_database'
TARGET_NODES = "target_nodes"
TARGET_RELATIONS = "target_relations"
BATCH_SIZE = "batch_size"
Expand All @@ -41,8 +37,7 @@

DEFAULT_CONFIG = ConfigFactory.from_dict({BATCH_SIZE: 100,
NEO4J_MAX_CONN_LIFE_TIME_SEC: 50,
NEO4J_ENCRYPTED: True,
NEO4J_VALIDATE_SSL: False,
NEO4J_DATABASE_NAME: neo4j.DEFAULT_DATABASE,
STALENESS_MAX_PCT: 5,
TARGET_NODES: [],
TARGET_RELATIONS: [],
Expand Down Expand Up @@ -127,14 +122,12 @@ def init(self, conf: ConfigTree) -> None:
else:
self.marker = conf.get_string(JOB_PUBLISH_TAG)

trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
self._driver = \
GraphDatabase.driver(uri=conf.get_string(NEO4J_END_POINT_KEY),
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)))

self.db_name = conf.get(NEO4J_DATABASE_NAME)

def run(self) -> None:
"""
Expand Down Expand Up @@ -304,7 +297,7 @@ def _execute_cypher_query(self,

start = time.time()
try:
with self._driver.session() as session:
with self._driver.session(database=self.db_name) as session:
result = session.run(statement, **param_dict)
return [record for record in result]

Expand Down
2 changes: 1 addition & 1 deletion databuilder/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from setuptools import find_packages, setup

__version__ = '7.0.0'
__version__ = '7.1.0'

requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'requirements.txt')
Expand Down