Skip to content

feat: join v_ahbesser with newly introduced ahb_expressions table #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3ef80b3
expressions WIP
Apr 15, 2025
d15ce3b
chore: include AHB PK in materialized AHB view
Apr 15, 2025
d8d0779
Merge branch 'ahb-pk' into expressions
Apr 15, 2025
fbbc081
wip
Apr 15, 2025
dab2f6a
Merge remote-tracking branch 'origin/main' into expressions
Apr 15, 2025
7e1d594
wip
Apr 15, 2025
cc2097f
wip
Apr 15, 2025
acce381
wip
Apr 15, 2025
2d68c15
args
Apr 15, 2025
661e862
linter pain
Apr 15, 2025
bdc0a86
Delete src/fundamend/sqlmodels/create_ahb_expressions_table.sql
hf-kklein Apr 15, 2025
1474fd2
isort
Apr 15, 2025
bb10d22
spell check
Apr 15, 2025
c9d9c4a
Merge remote-tracking branch 'origin/expressions' into expressions
Apr 15, 2025
1ec3624
formating linter conflict
Apr 15, 2025
8ccc0e3
Update src/fundamend/sqlmodels/expression_view.py
hf-kklein Apr 15, 2025
a48e75e
enable type checking again
Apr 15, 2025
3f0682d
Merge remote-tracking branch 'origin/expressions' into expressions
Apr 15, 2025
aecfbb2
use order by in snapshot test for reproducible, stable results
Apr 15, 2025
448d068
linters
Apr 15, 2025
4eb020f
wip
Apr 15, 2025
501b4c0
wip
Apr 15, 2025
fe1955e
don't include pruefi, drop empty rows
Apr 15, 2025
e655163
arg. this turned out to be more complicated than i thought ;)
Apr 15, 2025
97c0340
feat: join `v_ahbesser` with newly introduced `ahb_expressions` table
Apr 15, 2025
11668b0
linter
Apr 15, 2025
2b514ef
Merge branch 'expressions' into join
Apr 15, 2025
fb19e42
ooooooooooooooook
Apr 15, 2025
3223c93
Update src/fundamend/sqlmodels/create_ahbtabellen_view.sql
hf-kklein Apr 15, 2025
974c167
Merge branch 'main' into join
hf-kklein Apr 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions domain-specific-terms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ sie
rekursion
rekursive
finde
contrl
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ sqlmodels = [
"sqlmodel>=0.0.22",
"sqlalchemy[mypy]>=2.0.37"
]
ahbicht = [
"ahbicht>=0.13.2"
]
coverage = [
"coverage==7.8.0"
]
Expand Down
5 changes: 4 additions & 1 deletion src/fundamend/sqlmodels/ahbtabellen_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path
from uuid import UUID

from efoli import EdifactFormatVersion
from efoli import EdifactFormat, EdifactFormatVersion
from sqlalchemy.sql.functions import func
from sqlmodel import Field, Session, SQLModel, select

Expand Down Expand Up @@ -42,6 +42,7 @@ class AhbTabellenLine(SQLModel, table=True):
__tablename__ = "v_ahbtabellen"
id: UUID = Field(primary_key=True)
format_version: EdifactFormatVersion = Field()
format: EdifactFormat = Field()
pruefidentifikator: str = Field()
path: str = Field()
id_path: str = Field()
Expand All @@ -53,6 +54,8 @@ class AhbTabellenLine(SQLModel, table=True):
qualifier: str | None = Field()
line_ahb_status: str | None = Field()
line_name: str | None = Field()
bedingung: str | None = Field()
bedingungsfehler: str | None = Field()
sort_path: str = Field()


Expand Down
66 changes: 46 additions & 20 deletions src/fundamend/sqlmodels/create_ahbtabellen_view.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,53 @@

DROP TABLE IF EXISTS v_ahbtabellen; -- this is because sqlmodel tries to create a table first... it doesn't know that this is just a view. bit dirty but ok.
DROP VIEW IF EXISTS v_ahbtabellen;
CREATE VIEW v_ahbtabellen as
SELECT id as id,
edifact_format_version as format_version,
pruefidentifikator as pruefidentifikator,
path,
id_path,
kommunikation_von as direction,
beschreibung as description,
'SG' || segmentgroup_id as segmentgroup_key, -- eg 'SG6'
segment_id as segment_code, -- e.g 'NAD'
dataelement_id as data_element, -- e.g 'D_3035'

CREATE VIEW v_ahbtabellen AS
WITH consolidated_ahm AS (SELECT id,
edifact_format_version,
format,
pruefidentifikator,
path,
id_path,
kommunikation_von,
beschreibung,
segmentgroup_id,
segment_id,
dataelement_id,
code_value,
sort_path,
trim(coalesce(code_ahb_status, coalesce(dataelement_ahb_status,
coalesce(segment_ahb_status, segmentgroup_ahb_status)))) AS line_ahb_status,
coalesce(code_name, coalesce(dataelement_name, coalesce(dataelementgroup_name,
coalesce(segment_name, segmentgroup_name)))) AS line_name
FROM ahb_hierarchy_materialized ahm
WHERE ahm.TYPE != 'dataelementgroup'
AND (ahm.TYPE != 'dataelement' OR ahm.dataelement_ahb_status IS NOT NULL))

SELECT c.id as id,
c.edifact_format_version as format_version,
c.format as format,
c.pruefidentifikator as pruefidentifikator,
c.path,
c.id_path,
c.kommunikation_von as direction,
c.beschreibung as description,
'SG' || c.segmentgroup_id as segmentgroup_key, -- eg 'SG6'
c.segment_id as segment_code, -- e.g 'NAD'
c.dataelement_id as data_element, -- e.g 'D_3035'
--CASE
-- WHEN dataelement_id IS NOT NULL THEN SUBSTR(dataelement_id, 3)
-- END AS dataelement_without_leading_d_, -- e.g '3035'
code_value as qualifier,
c.code_value as qualifier,
c.line_ahb_status as line_ahb_status, -- e.g. 'Muss [28] ∧ [64]'
c.line_name as line_name, -- e.g. 'Datums- oder Uhrzeit- oder Zeitspannen-Format, Code' or 'Produkt-Daten für Lieferant relevant'
c.sort_path as sort_path,
NULLIF(ahe.node_texts, '') as bedingung,
NULLIF(ahe.ahbicht_error_message, '') as bedingungsfehler
FROM consolidated_ahm as c
LEFT JOIN ahb_expressions as ahe
ON ahe.edifact_format_version = c.edifact_format_version
AND ahe.format = c.format
AND ahe.expression = c.line_ahb_status;


coalesce(code_ahb_status, coalesce(dataelement_ahb_status,
coalesce(segment_ahb_status, segmentgroup_ahb_status))) as line_ahb_status, -- e.g. 'Muss [28] ∧ [64]'
coalesce(code_name, coalesce(dataelement_name, coalesce(dataelementgroup_name,
coalesce(segment_name, segmentgroup_name)))) as line_name, -- e.g. 'Datums- oder Uhrzeit- oder Zeitspannen-Format, Code' or 'Produkt-Daten für Lieferant relevant'
sort_path as sort_path
-- the bedingung column is still missing, but we'll solve this one separately
FROM ahb_hierarchy_materialized
WHERE TYPE != 'dataelementgroup' AND (TYPE != 'dataelement' OR dataelement_ahb_status IS NOT Null) ;
250 changes: 250 additions & 0 deletions src/fundamend/sqlmodels/expression_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
"""
helper module to create a table with a "Bedingung" column like the one in the PDF/docx AHBs
"""

import asyncio
import logging
import uuid
from contextvars import ContextVar
from typing import Optional

from efoli import EdifactFormat, EdifactFormatVersion

from fundamend.sqlmodels import AhbHierarchyMaterialized, Bedingung
from fundamend.sqlmodels.anwendungshandbuch import Paket, UbBedingung

try:
from sqlalchemy.sql.functions import func
from sqlmodel import Field, Session, SQLModel, UniqueConstraint, col, select

except ImportError as import_error:
import_error.msg += "; Did you install fundamend[sqlmodels] or did you try to import from fundamend.models instead?"
# sqlmodel is only an optional dependency when fundamend is used to fill a database
raise


try:
import inject
from ahbicht.content_evaluation.evaluationdatatypes import EvaluatableData, EvaluatableDataProvider
from ahbicht.content_evaluation.evaluator_factory import create_content_evaluation_result_based_evaluators
from ahbicht.content_evaluation.expression_check import is_valid_expression
from ahbicht.content_evaluation.token_logic_provider import SingletonTokenLogicProvider, TokenLogicProvider
from ahbicht.expressions.condition_expression_parser import extract_categorized_keys
from ahbicht.models.content_evaluation_result import ContentEvaluationResult, ContentEvaluationResultSchema
from lark.exceptions import VisitError
except ImportError as import_error:
import_error.msg += "; Did you install fundamend[sqlmodels,ahbicht]?"
# sqlmodel and ahbicht are only optional dependencies when fundamend is used to fill a database
raise

_logger = logging.getLogger(__name__)

_content_evaluation_result: ContextVar[Optional[ContentEvaluationResult]] = ContextVar(
"_content_evaluation_result", default=None
)


def _get_evaluatable_data() -> EvaluatableData[ContentEvaluationResult]:
"""
returns the _content_evaluation_result context var value wrapped in a EvaluatableData container.
This is the kind of data that the ContentEvaluationResultBased RC/FC Evaluators, HintsProvider and Package Resolver
require.
:return:
"""
cer = _content_evaluation_result.get()
return EvaluatableData(
body=ContentEvaluationResultSchema().dump(cer),
edifact_format=EdifactFormat.UTILMD, # not important, something has to be here
edifact_format_version=EdifactFormatVersion.FV2504, # not important, something has to be here
)


def _setup_weird_ahbicht_dependency_injection() -> None:
def configure(binder: inject.Binder) -> None:
binder.bind(
TokenLogicProvider,
SingletonTokenLogicProvider(
[*create_content_evaluation_result_based_evaluators(EdifactFormat.UTILMD, EdifactFormatVersion.FV2504)]
),
)
binder.bind_to_provider(EvaluatableDataProvider, _get_evaluatable_data)

inject.configure_once(configure)


def _generate_node_texts(session: Session, expression: str, ahb_pk: uuid.UUID) -> str:
categorized_key_extract = asyncio.run(extract_categorized_keys(expression))
bedingung_keys = (
categorized_key_extract.format_constraint_keys
+ categorized_key_extract.requirement_constraint_keys
+ categorized_key_extract.hint_keys
)
paket_keys = categorized_key_extract.package_keys
ubbedingung_keys = categorized_key_extract.time_condition_keys
# probably, we'd be faster if we just loaded all pakete and all bedingungen once instead of selecting over and over
# again for each expression
bedingungen = {
x.nummer: x.text
for x in session.exec(
select(Bedingung).where(
col(Bedingung.nummer).in_(bedingung_keys), # pylint:disable=no-member
Bedingung.anwendungshandbuch_primary_key == ahb_pk,
)
).all()
}
pakete = {
x.nummer: x.text
for x in session.exec(
select(Paket).where(
col(Paket.nummer).in_(paket_keys),
Paket.anwendungshandbuch_primary_key == ahb_pk, # pylint:disable=no-member
)
).all()
}
ubbedingungen = {
x.nummer: x.text
for x in session.exec(
select(UbBedingung).where(
col(UbBedingung.nummer).in_(ubbedingung_keys), # pylint:disable=no-member
UbBedingung.anwendungshandbuch_primary_key == ahb_pk,
)
).all()
}
joined_dict = {**bedingungen, **pakete, **ubbedingungen}
node_texts = "\n".join([f"[{key}] {value}" for key, value in joined_dict.items()])
return node_texts


def _get_validity_node_texts_and_error_message_cpu_intensive(
expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
) -> tuple[bool, str, str | None]:
try:
is_valid, error_message = asyncio.run(is_valid_expression(expression, _content_evaluation_result.set))
if is_valid: # we might actually get a meaningful node_texts even for invalid expressions, but I don't like it
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
else:
node_texts = ""
except NotImplementedError: # ahbicht fault/missing feature -> act like it's valid
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
error_message = None
return is_valid, node_texts, error_message


def _get_validity_node_texts_and_error_message_fast(
expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
) -> tuple[bool, str, str | None]:
try:
node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
except SyntaxError as syntax_error:
_logger.info("The expression '%s' could not be parsed: %s", expression, syntax_error)
return (
False,
"",
str(syntax_error),
) # I decided against returning the error message, although it's tempting - but still bad practice
except VisitError as visit_error:
_logger.info("The expression '%s' could not be parsed: %s", expression, visit_error)
return False, "", str(visit_error)
return True, node_texts, None


def create_and_fill_ahb_expression_table(session: Session, use_cpu_intensive_validity_check: bool = False) -> None:
"""
creates and fills the ahb_expressions table. It uses the ahb_hierarchy_materialized table to extract all expressions
and parses each expression with ahbicht. The latter has to be done in Python.
If the CPU intensive validity check is enabled, not only expression alone is checked but also all its possible
outcomes. This leads to only few additional expressions marked as invalid but is very slow.
"""
rows: list[tuple[EdifactFormatVersion | None, str, str | None, uuid.UUID]] = []
_setup_weird_ahbicht_dependency_injection()
for ahb_status_col in [
AhbHierarchyMaterialized.segmentgroup_ahb_status,
AhbHierarchyMaterialized.segment_ahb_status,
AhbHierarchyMaterialized.dataelement_ahb_status,
AhbHierarchyMaterialized.code_ahb_status,
]:
stmt = select(
AhbHierarchyMaterialized.edifact_format_version,
AhbHierarchyMaterialized.format,
ahb_status_col,
AhbHierarchyMaterialized.anwendungshandbuch_primary_key,
)
rows.extend(session.exec(stmt)) # type:ignore[arg-type]
non_empty_rows: list[tuple[EdifactFormatVersion, str, str, uuid.UUID]] = [
r for r in rows if r[2] is not None and r[0] is not None and r[2].strip() # type:ignore[misc]
]
if not any(rows):
raise ValueError(
"No rows found in ahb_hierarchy_materialized table; Run `create_db_and_populate_with_ahb_view` before."
)
non_empty_rows.sort(key=lambda x: (x[0], x[1], x[2]))
seen: set[tuple[str, str, str]] = set()
unique_rows = [
row
for row in non_empty_rows
if (key := (row[0], row[1], row[2].strip())) not in seen
and not seen.add(key) # type:ignore[ func-returns-value]
]
for row in unique_rows: # there are ~3600 unique rows for FV2410+FV2504 as of 2025-04-15
expression = row[2].strip()
if use_cpu_intensive_validity_check:
# as of 2025-04-15 I have no clue how long this actually takes for all expressions
_, node_texts, error_message = _get_validity_node_texts_and_error_message_cpu_intensive(
expression, session, row[3]
)
else:
_, node_texts, error_message = _get_validity_node_texts_and_error_message_fast(expression, session, row[3])
ahb_expression_row = AhbExpression(
edifact_format_version=row[0],
format=row[1],
expression=expression,
node_texts=node_texts,
anwendungshandbuch_primary_key=row[3],
ahbicht_error_message=error_message,
)
session.add(ahb_expression_row)
_logger.debug(
"Added row (%s, %s, %s) to the ahb_expressions_table",
ahb_expression_row.edifact_format_version,
ahb_expression_row.format,
ahb_expression_row.expression,
)
number_of_inserted_rows = session.scalar(
select(func.count(AhbExpression.id)) # type:ignore[arg-type]# pylint:disable=not-callable
)
_logger.info(
"Inserted %d rows into the table %s",
number_of_inserted_rows,
AhbExpression.__tablename__,
)
session.commit()


class AhbExpression(SQLModel, table=True):
"""
A table that contains all expressions that are used in any AHB, each with prüfidentifikator and format_version.
It's created by UNIONing all 'ahb_status' columns from all relevant tables.
Additionally, this table has a column that resolves the expression to a human-readable text.
"""

__tablename__ = "ahb_expressions"
__table_args__ = (
UniqueConstraint(
"edifact_format_version",
"format",
"expression",
name="idx_ahb_expressions_metadata_expression",
),
)
id: uuid.UUID = Field(primary_key=True, default_factory=uuid.uuid4)
edifact_format_version: EdifactFormatVersion = Field(index=True)
format: str = Field(index=True) # the edifact format, e.g. 'UTILMD'
# expressions and conditions are always interpreted on a per-format basis (no pruefidentifikator required)
expression: str = Field(index=True) #: e.g 'Muss [1] U [2]'
node_texts: str = Field()
"""
this contains the typical "[1] Foo Text\n[2] Bar Text" which explains the meaning of the nodes from inside the
respective Expression (e.g. for expression "Muss [1] U [2]")
"""
ahbicht_error_message: str | None = Field(default=None)
anwendungshandbuch_primary_key: uuid.UUID = Field()
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ deps =
-r requirements.txt
.[tests]
.[sqlmodels]
.[ahbicht]
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest --basetemp={envtmpdir} {posargs} -vv

Expand All @@ -24,6 +25,7 @@ deps =
-r requirements.txt
.[tests]
.[sqlmodels]
.[ahbicht]
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snapshot-update

Expand Down
Loading