Skip to content

feat: Neo4j 4.x support #1942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions databuilder/databuilder/extractor/neo4j_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Neo4jExtractor(Extractor):
MODEL_CLASS_CONFIG_KEY = 'model_class'
NEO4J_AUTH_USER = 'neo4j_auth_user'
NEO4J_AUTH_PW = 'neo4j_auth_pw'
NEO4J_DATABASE_NAME = 'neo4j_database'
NEO4J_MAX_CONN_LIFE_TIME_SEC = 'neo4j_max_conn_life_time_sec'
NEO4J_ENCRYPTED = 'neo4j_encrypted'
"""NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting."""
Expand Down Expand Up @@ -66,15 +67,29 @@ def _get_driver(self) -> Any:
"""
Create a Neo4j connection to Database
"""
trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if self.conf.get_bool(Neo4jExtractor.NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
return GraphDatabase.driver(uri=self.graph_url,
max_connection_lifetime=self.conf.get_int(
Neo4jExtractor.NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_USER),
self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_PW)),
encrypted=self.conf.get_bool(Neo4jExtractor.NEO4J_ENCRYPTED),
trust=trust)
# The config settings 'encrypted' and 'trust' can only be used with the URI
# schemes ['bolt', 'neo4j'].
uri = self.graph_url
if uri.startswith('bolt:') or uri.startswith('neo4j:'):
trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if self.conf.get_bool(Neo4jExtractor.NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=self.conf.get_int(
Neo4jExtractor.NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_USER),
self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_PW)),
encrypted=self.conf.get_bool(Neo4jExtractor.NEO4J_ENCRYPTED),
trust=trust)
else:
driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=self.conf.get_int(
Neo4jExtractor.NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_USER),
self.conf.get_string(Neo4jExtractor.NEO4J_AUTH_PW)))

return driver

def _execute_query(self, tx: Any) -> Any:
"""
Expand All @@ -88,16 +103,21 @@ def _get_extract_iter(self) -> Iterator[Any]:
"""
Execute {cypher_query} and yield result one at a time
"""
with self.driver.session() as session:
if not hasattr(self, 'results'):
self.results = session.read_transaction(self._execute_query)

for result in self.results:
if hasattr(self, 'model_class'):
obj = self.model_class(**result)
yield obj
else:
yield result
db_name = self.conf.get(Neo4jExtractor.NEO4J_DATABASE_NAME, None)
if db_name:
session = self.driver.session(database=db_name)
else:
session = self.driver.session()

if not hasattr(self, 'results'):
self.results = session.read_transaction(self._execute_query)

for result in self.results:
if hasattr(self, 'model_class'):
obj = self.model_class(**result)
yield obj
else:
yield result

def extract(self) -> Any:
"""
Expand Down
34 changes: 25 additions & 9 deletions databuilder/databuilder/publisher/neo4j_csv_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

NEO4J_USER = 'neo4j_user'
NEO4J_PASSWORD = 'neo4j_password'
NEO4J_DATABASE_NAME = 'neo4j_database'
# NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting
NEO4J_ENCRYPTED = 'neo4j_encrypted'
# NEO4J_VALIDATE_SSL is a boolean indicating whether to validate the server's SSL/TLS
Expand Down Expand Up @@ -148,16 +149,31 @@ def init(self, conf: ConfigTree) -> None:
self._relation_files = self._list_files(conf, RELATION_FILES_DIR)
self._relation_files_iter = iter(self._relation_files)

trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
self._driver = \
GraphDatabase.driver(uri=conf.get_string(NEO4J_END_POINT_KEY),
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
# The config settings 'encrypted' and 'trust' can only be used with the URI
# schemes ['bolt', 'neo4j'].
uri = conf.get_string(NEO4J_END_POINT_KEY)
if uri.startswith('bolt:') or uri.startswith('neo4j:'):
trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
self._driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
else:
self._driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)))

db_name = conf.get_string(NEO4J_DATABASE_NAME, None)
if db_name:
self._session = self._driver.session(database=db_name)
else:
self._session = self._driver.session()

self._transaction_size = conf.get_int(NEO4J_TRANSACTION_SIZE)
self._session = self._driver.session()
self._confirm_rel_created = conf.get_bool(NEO4J_RELATIONSHIP_CREATION_CONFIRM)

# config is list of node label.
Expand Down
38 changes: 27 additions & 11 deletions databuilder/databuilder/task/neo4j_staleness_removal_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
NEO4J_USER = 'neo4j_user'
NEO4J_PASSWORD = 'neo4j_password'
NEO4J_ENCRYPTED = 'neo4j_encrypted'
NEO4J_DATABASE_NAME = 'neo4j_database'
"""NEO4J_ENCRYPTED is a boolean indicating whether to use SSL/TLS when connecting."""
NEO4J_VALIDATE_SSL = 'neo4j_validate_ssl'
"""NEO4J_VALIDATE_SSL is a boolean indicating whether to validate the server's SSL/TLS cert against system CAs."""
Expand Down Expand Up @@ -127,14 +128,25 @@ def init(self, conf: ConfigTree) -> None:
else:
self.marker = conf.get_string(JOB_PUBLISH_TAG)

trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
self._driver = \
GraphDatabase.driver(uri=conf.get_string(NEO4J_END_POINT_KEY),
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
# The config settings 'encrypted' and 'trust' can only be used with the URI
# schemes ['bolt', 'neo4j'].
uri = conf.get_string(NEO4J_END_POINT_KEY)
if uri.startswith('bolt:') or uri.startswith('neo4j:'):
trust = neo4j.TRUST_SYSTEM_CA_SIGNED_CERTIFICATES if conf.get_bool(NEO4J_VALIDATE_SSL) \
else neo4j.TRUST_ALL_CERTIFICATES
self._driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)),
encrypted=conf.get_bool(NEO4J_ENCRYPTED),
trust=trust)
else:
self._driver = \
GraphDatabase.driver(uri=uri,
max_connection_lifetime=conf.get_int(NEO4J_MAX_CONN_LIFE_TIME_SEC),
auth=(conf.get_string(NEO4J_USER), conf.get_string(NEO4J_PASSWORD)))

self.db_name = conf.get(NEO4J_DATABASE_NAME, None)

def run(self) -> None:
"""
Expand Down Expand Up @@ -304,9 +316,13 @@ def _execute_cypher_query(self,

start = time.time()
try:
with self._driver.session() as session:
result = session.run(statement, **param_dict)
return [record for record in result]
if self.db_name:
session = self._driver.session(database=self.db_name)
else:
session = self._driver.session()

result = session.run(statement, **param_dict)
return [record for record in result]

finally:
LOGGER.debug('Cypher query execution elapsed for %i seconds', time.time() - start)
2 changes: 1 addition & 1 deletion databuilder/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from setuptools import find_packages, setup

__version__ = '7.0.0'
__version__ = '7.1.0'

requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'requirements.txt')
Expand Down