Hochfrequenz · hf-kklein · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/domain-specific-terms.txt b/domain-specific-terms.txt
@@ -16,3 +16,4 @@ sie
 rekursion
 rekursive
 finde
+contrl
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,9 @@ sqlmodels = [
   "sqlmodel>=0.0.22",
   "sqlalchemy[mypy]>=2.0.37"
 ]
+ahbicht = [
+    "ahbicht>=0.13.2"
+]
 coverage = [
     "coverage==7.8.0"
 ]

diff --git a/src/fundamend/sqlmodels/ahbtabellen_view.py b/src/fundamend/sqlmodels/ahbtabellen_view.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from uuid import UUID
 
-from efoli import EdifactFormatVersion
+from efoli import EdifactFormat, EdifactFormatVersion
 from sqlalchemy.sql.functions import func
 from sqlmodel import Field, Session, SQLModel, select
 
@@ -42,6 +42,7 @@ class AhbTabellenLine(SQLModel, table=True):
     __tablename__ = "v_ahbtabellen"
     id: UUID = Field(primary_key=True)
     format_version: EdifactFormatVersion = Field()
+    format: EdifactFormat = Field()
     pruefidentifikator: str = Field()
     path: str = Field()
     id_path: str = Field()
@@ -53,6 +54,8 @@ class AhbTabellenLine(SQLModel, table=True):
     qualifier: str | None = Field()
     line_ahb_status: str | None = Field()
     line_name: str | None = Field()
+    bedingung: str | None = Field()
+    bedingungsfehler: str | None = Field()
     sort_path: str = Field()
 
 

diff --git a/src/fundamend/sqlmodels/create_ahbtabellen_view.sql b/src/fundamend/sqlmodels/create_ahbtabellen_view.sql
@@ -4,27 +4,53 @@
 
 DROP TABLE IF EXISTS v_ahbtabellen; -- this is because sqlmodel tries to create a table first... it doesn't know that this is just a view. bit dirty but ok.
 DROP VIEW IF EXISTS v_ahbtabellen;
-CREATE VIEW v_ahbtabellen as
-SELECT id                                                                                                   as id,
-       edifact_format_version                                                                               as format_version,
-       pruefidentifikator                                                                                   as pruefidentifikator,
-       path,
-       id_path,
-       kommunikation_von                                                                                    as direction,
-       beschreibung                                                                                         as description,
-       'SG' || segmentgroup_id                                                                              as segmentgroup_key, -- eg 'SG6'
-       segment_id                                                                                           as segment_code,     -- e.g 'NAD'
-       dataelement_id                                                                                       as data_element,     -- e.g 'D_3035'
+
+CREATE VIEW v_ahbtabellen AS
+WITH consolidated_ahm AS (SELECT id,
+                                 edifact_format_version,
+                                 format,
+                                 pruefidentifikator,
+                                 path,
+                                 id_path,
+                                 kommunikation_von,
+                                 beschreibung,
+                                 segmentgroup_id,
+                                 segment_id,
+                                 dataelement_id,
+                                 code_value,
+                                 sort_path,
+                                 trim(coalesce(code_ahb_status, coalesce(dataelement_ahb_status,
+                                                                         coalesce(segment_ahb_status, segmentgroup_ahb_status))))     AS line_ahb_status,
+                                 coalesce(code_name, coalesce(dataelement_name, coalesce(dataelementgroup_name,
+                                                                                         coalesce(segment_name, segmentgroup_name)))) AS line_name
+                          FROM ahb_hierarchy_materialized ahm
+                          WHERE ahm.TYPE != 'dataelementgroup'
+                            AND (ahm.TYPE != 'dataelement' OR ahm.dataelement_ahb_status IS NOT NULL))
+
+SELECT c.id                                  as id,
+       c.edifact_format_version              as format_version,
+       c.format                              as format,
+       c.pruefidentifikator                  as pruefidentifikator,
+       c.path,
+       c.id_path,
+       c.kommunikation_von                   as direction,
+       c.beschreibung                        as description,
+       'SG' || c.segmentgroup_id             as segmentgroup_key, -- eg 'SG6'
+       c.segment_id                          as segment_code,     -- e.g 'NAD'
+       c.dataelement_id                      as data_element,     -- e.g 'D_3035'
        --CASE
        --    WHEN dataelement_id IS NOT NULL THEN SUBSTR(dataelement_id, 3)
        --    END                                                                                              AS dataelement_without_leading_d_, -- e.g '3035'
-       code_value                                                                                           as qualifier,
+       c.code_value                          as qualifier,
+       c.line_ahb_status                     as line_ahb_status,  -- e.g. 'Muss [28] ∧ [64]'
+       c.line_name                           as line_name,        -- e.g. 'Datums- oder Uhrzeit- oder Zeitspannen-Format, Code' or 'Produkt-Daten für Lieferant relevant'
+       c.sort_path                           as sort_path,
+       NULLIF(ahe.node_texts, '')            as bedingung,
+       NULLIF(ahe.ahbicht_error_message, '') as bedingungsfehler
+FROM consolidated_ahm as c
+         LEFT JOIN ahb_expressions as ahe
+                   ON ahe.edifact_format_version = c.edifact_format_version
+                       AND ahe.format = c.format
+                       AND ahe.expression = c.line_ahb_status;
+
 
-       coalesce(code_ahb_status, coalesce(dataelement_ahb_status,
-                                          coalesce(segment_ahb_status, segmentgroup_ahb_status)))           as line_ahb_status,  -- e.g. 'Muss [28] ∧ [64]'
-       coalesce(code_name, coalesce(dataelement_name, coalesce(dataelementgroup_name,
-                                                               coalesce(segment_name, segmentgroup_name)))) as line_name,        -- e.g. 'Datums- oder Uhrzeit- oder Zeitspannen-Format, Code' or 'Produkt-Daten für Lieferant relevant'
-       sort_path                                                                                            as sort_path
--- the bedingung column is still missing, but we'll solve this one separately
-FROM ahb_hierarchy_materialized
-WHERE TYPE != 'dataelementgroup' AND (TYPE != 'dataelement' OR dataelement_ahb_status IS NOT Null) ;
diff --git a/src/fundamend/sqlmodels/expression_view.py b/src/fundamend/sqlmodels/expression_view.py
@@ -0,0 +1,250 @@
+"""
+helper module to create a table with a "Bedingung" column like the one in the PDF/docx AHBs
+"""
+
+import asyncio
+import logging
+import uuid
+from contextvars import ContextVar
+from typing import Optional
+
+from efoli import EdifactFormat, EdifactFormatVersion
+
+from fundamend.sqlmodels import AhbHierarchyMaterialized, Bedingung
+from fundamend.sqlmodels.anwendungshandbuch import Paket, UbBedingung
+
+try:
+    from sqlalchemy.sql.functions import func
+    from sqlmodel import Field, Session, SQLModel, UniqueConstraint, col, select
+
+except ImportError as import_error:
+    import_error.msg += "; Did you install fundamend[sqlmodels] or did you try to import from fundamend.models instead?"
+    # sqlmodel is only an optional dependency when fundamend is used to fill a database
+    raise
+
+
+try:
+    import inject
+    from ahbicht.content_evaluation.evaluationdatatypes import EvaluatableData, EvaluatableDataProvider
+    from ahbicht.content_evaluation.evaluator_factory import create_content_evaluation_result_based_evaluators
+    from ahbicht.content_evaluation.expression_check import is_valid_expression
+    from ahbicht.content_evaluation.token_logic_provider import SingletonTokenLogicProvider, TokenLogicProvider
+    from ahbicht.expressions.condition_expression_parser import extract_categorized_keys
+    from ahbicht.models.content_evaluation_result import ContentEvaluationResult, ContentEvaluationResultSchema
+    from lark.exceptions import VisitError
+except ImportError as import_error:
+    import_error.msg += "; Did you install fundamend[sqlmodels,ahbicht]?"
+    # sqlmodel and ahbicht are only optional dependencies when fundamend is used to fill a database
+    raise
+
+_logger = logging.getLogger(__name__)
+
+_content_evaluation_result: ContextVar[Optional[ContentEvaluationResult]] = ContextVar(
+    "_content_evaluation_result", default=None
+)
+
+
+def _get_evaluatable_data() -> EvaluatableData[ContentEvaluationResult]:
+    """
+    returns the _content_evaluation_result context var value wrapped in a EvaluatableData container.
+    This is the kind of data that the ContentEvaluationResultBased RC/FC Evaluators, HintsProvider and Package Resolver
+    require.
+    :return:
+    """
+    cer = _content_evaluation_result.get()
+    return EvaluatableData(
+        body=ContentEvaluationResultSchema().dump(cer),
+        edifact_format=EdifactFormat.UTILMD,  # not important, something has to be here
+        edifact_format_version=EdifactFormatVersion.FV2504,  # not important, something has to be here
+    )
+
+
+def _setup_weird_ahbicht_dependency_injection() -> None:
+    def configure(binder: inject.Binder) -> None:
+        binder.bind(
+            TokenLogicProvider,
+            SingletonTokenLogicProvider(
+                [*create_content_evaluation_result_based_evaluators(EdifactFormat.UTILMD, EdifactFormatVersion.FV2504)]
+            ),
+        )
+        binder.bind_to_provider(EvaluatableDataProvider, _get_evaluatable_data)
+
+    inject.configure_once(configure)
+
+
+def _generate_node_texts(session: Session, expression: str, ahb_pk: uuid.UUID) -> str:
+    categorized_key_extract = asyncio.run(extract_categorized_keys(expression))
+    bedingung_keys = (
+        categorized_key_extract.format_constraint_keys
+        + categorized_key_extract.requirement_constraint_keys
+        + categorized_key_extract.hint_keys
+    )
+    paket_keys = categorized_key_extract.package_keys
+    ubbedingung_keys = categorized_key_extract.time_condition_keys
+    # probably, we'd be faster if we just loaded all pakete and all bedingungen once instead of selecting over and over
+    # again for each expression
+    bedingungen = {
+        x.nummer: x.text
+        for x in session.exec(
+            select(Bedingung).where(
+                col(Bedingung.nummer).in_(bedingung_keys),  # pylint:disable=no-member
+                Bedingung.anwendungshandbuch_primary_key == ahb_pk,
+            )
+        ).all()
+    }
+    pakete = {
+        x.nummer: x.text
+        for x in session.exec(
+            select(Paket).where(
+                col(Paket.nummer).in_(paket_keys),
+                Paket.anwendungshandbuch_primary_key == ahb_pk,  # pylint:disable=no-member
+            )
+        ).all()
+    }
+    ubbedingungen = {
+        x.nummer: x.text
+        for x in session.exec(
+            select(UbBedingung).where(
+                col(UbBedingung.nummer).in_(ubbedingung_keys),  # pylint:disable=no-member
+                UbBedingung.anwendungshandbuch_primary_key == ahb_pk,
+            )
+        ).all()
+    }
+    joined_dict = {**bedingungen, **pakete, **ubbedingungen}
+    node_texts = "\n".join([f"[{key}] {value}" for key, value in joined_dict.items()])
+    return node_texts
+
+
+def _get_validity_node_texts_and_error_message_cpu_intensive(
+    expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
+) -> tuple[bool, str, str | None]:
+    try:
+        is_valid, error_message = asyncio.run(is_valid_expression(expression, _content_evaluation_result.set))
+        if is_valid:  # we might actually get a meaningful node_texts even for invalid expressions, but I don't like it
+            node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
+        else:
+            node_texts = ""
+    except NotImplementedError:  # ahbicht fault/missing feature -> act like it's valid
+        node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
+        error_message = None
+    return is_valid, node_texts, error_message
+
+
+def _get_validity_node_texts_and_error_message_fast(
+    expression: str, session: Session, anwendungshandbuch_pk: uuid.UUID
+) -> tuple[bool, str, str | None]:
+    try:
+        node_texts = _generate_node_texts(session, expression, anwendungshandbuch_pk)
+    except SyntaxError as syntax_error:
+        _logger.info("The expression '%s' could not be parsed: %s", expression, syntax_error)
+        return (
+            False,
+            "",
+            str(syntax_error),
+        )  # I decided against returning the error message, although it's tempting - but still bad practice
+    except VisitError as visit_error:
+        _logger.info("The expression '%s' could not be parsed: %s", expression, visit_error)
+        return False, "", str(visit_error)
+    return True, node_texts, None
+
+
+def create_and_fill_ahb_expression_table(session: Session, use_cpu_intensive_validity_check: bool = False) -> None:
+    """
+    creates and fills the ahb_expressions table. It uses the ahb_hierarchy_materialized table to extract all expressions
+    and parses each expression with ahbicht. The latter has to be done in Python.
+    If the CPU intensive validity check is enabled, not only expression alone is checked but also all its possible
+    outcomes. This leads to only few additional expressions marked as invalid but is very slow.
+    """
+    rows: list[tuple[EdifactFormatVersion | None, str, str | None, uuid.UUID]] = []
+    _setup_weird_ahbicht_dependency_injection()
+    for ahb_status_col in [
+        AhbHierarchyMaterialized.segmentgroup_ahb_status,
+        AhbHierarchyMaterialized.segment_ahb_status,
+        AhbHierarchyMaterialized.dataelement_ahb_status,
+        AhbHierarchyMaterialized.code_ahb_status,
+    ]:
+        stmt = select(
+            AhbHierarchyMaterialized.edifact_format_version,
+            AhbHierarchyMaterialized.format,
+            ahb_status_col,
+            AhbHierarchyMaterialized.anwendungshandbuch_primary_key,
+        )
+        rows.extend(session.exec(stmt))  # type:ignore[arg-type]
+    non_empty_rows: list[tuple[EdifactFormatVersion, str, str, uuid.UUID]] = [
+        r for r in rows if r[2] is not None and r[0] is not None and r[2].strip()  # type:ignore[misc]
+    ]
+    if not any(rows):
+        raise ValueError(
+            "No rows found in ahb_hierarchy_materialized table; Run `create_db_and_populate_with_ahb_view` before."
+        )
+    non_empty_rows.sort(key=lambda x: (x[0], x[1], x[2]))
+    seen: set[tuple[str, str, str]] = set()
+    unique_rows = [
+        row
+        for row in non_empty_rows
+        if (key := (row[0], row[1], row[2].strip())) not in seen
+        and not seen.add(key)  # type:ignore[ func-returns-value]
+    ]
+    for row in unique_rows:  # there are ~3600 unique rows for FV2410+FV2504 as of 2025-04-15
+        expression = row[2].strip()
+        if use_cpu_intensive_validity_check:
+            # as of 2025-04-15 I have no clue how long this actually takes for all expressions
+            _, node_texts, error_message = _get_validity_node_texts_and_error_message_cpu_intensive(
+                expression, session, row[3]
+            )
+        else:
+            _, node_texts, error_message = _get_validity_node_texts_and_error_message_fast(expression, session, row[3])
+        ahb_expression_row = AhbExpression(
+            edifact_format_version=row[0],
+            format=row[1],
+            expression=expression,
+            node_texts=node_texts,
+            anwendungshandbuch_primary_key=row[3],
+            ahbicht_error_message=error_message,
+        )
+        session.add(ahb_expression_row)
+        _logger.debug(
+            "Added row (%s, %s, %s) to the ahb_expressions_table",
+            ahb_expression_row.edifact_format_version,
+            ahb_expression_row.format,
+            ahb_expression_row.expression,
+        )
+    number_of_inserted_rows = session.scalar(
+        select(func.count(AhbExpression.id))  # type:ignore[arg-type]# pylint:disable=not-callable
+    )
+    _logger.info(
+        "Inserted %d rows into the table %s",
+        number_of_inserted_rows,
+        AhbExpression.__tablename__,
+    )
+    session.commit()
+
+
+class AhbExpression(SQLModel, table=True):
+    """
+    A table that contains all expressions that are used in any AHB, each with prüfidentifikator and format_version.
+    It's created by UNIONing all 'ahb_status' columns from all relevant tables.
+    Additionally, this table has a column that resolves the expression to a human-readable text.
+    """
+
+    __tablename__ = "ahb_expressions"
+    __table_args__ = (
+        UniqueConstraint(
+            "edifact_format_version",
+            "format",
+            "expression",
+            name="idx_ahb_expressions_metadata_expression",
+        ),
+    )
+    id: uuid.UUID = Field(primary_key=True, default_factory=uuid.uuid4)
+    edifact_format_version: EdifactFormatVersion = Field(index=True)
+    format: str = Field(index=True)  # the edifact format, e.g. 'UTILMD'
+    # expressions and conditions are always interpreted on a per-format basis (no pruefidentifikator required)
+    expression: str = Field(index=True)  #: e.g 'Muss [1] U [2]'
+    node_texts: str = Field()
+    """
+    this contains the typical "[1] Foo Text\n[2] Bar Text" which explains the meaning of the nodes from inside the
+    respective Expression (e.g. for expression "Muss [1] U [2]")
+    """
+    ahbicht_error_message: str | None = Field(default=None)
+    anwendungshandbuch_primary_key: uuid.UUID = Field()
diff --git a/tox.ini b/tox.ini
@@ -16,6 +16,7 @@ deps =
     -r requirements.txt
     .[tests]
     .[sqlmodels]
+    .[ahbicht]
 setenv = PYTHONPATH = {toxinidir}/src
 commands = python -m pytest --basetemp={envtmpdir} {posargs} -vv
 
@@ -24,6 +25,7 @@ deps =
     -r requirements.txt
     .[tests]
     .[sqlmodels]
+    .[ahbicht]
 setenv = PYTHONPATH = {toxinidir}/src
 commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snapshot-update
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,3 +16,4 @@ sie @@
     rekursion
     rekursive
     finde
+    contrl