Skip to content

chore: Update output data validations for frames and ctf #484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ def helper_angles_injection_errors(
codomain_angles: list[float],
domain_name: str,
codomain_name: str,
angle_tolerance: float = ANGLE_TOLERANCE,
) -> list[str]:
"""Helper function to check if all angles in the domain are in the codomain."""
errors = []
remaining_angles = codomain_angles.copy()
for domain_angle in domain_angles:
found_match = False
for codomain_angle in remaining_angles:
if abs(domain_angle - codomain_angle) < ANGLE_TOLERANCE:
if abs(domain_angle - codomain_angle) < angle_tolerance:
found_match = True
remaining_angles.remove(codomain_angle)
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ def test_frames_mdoc_range(self, mdoc_data: pd.DataFrame):
assert mdoc_data["TiltAngle"].min() >= -90, "Minimum tilt angle is less than -90"
assert mdoc_data["TiltAngle"].max() <= 90, "Maximum tilt angle is greater than 90"

@allure.title("Mdoc: number of mdoc sections equal number of frames files.")
def test_mdoc_frames(self, mdoc_data: pd.DataFrame, frames_files: list[str]):
@allure.title("Mdoc: number of mdoc sections, equal number of frames files, equals number of items in frames metadata.")
def test_mdoc_frames(self, mdoc_data: pd.DataFrame, frames_files: list[str], frame_metadata: dict[str, dict]):
frames_len = len(frames_files)
if frames_len == 0:
pytest.skip("No frame files to compare")
frames_metadata_len = len(frame_metadata["frames"])
mdoc_len = len(mdoc_data)
assert mdoc_len == frames_len, f"Number of mdoc sections {mdoc_len} mismatches number of frames: {frames_len}"
assert mdoc_len == frames_len == frames_metadata_len, f"Number of mdoc sections {mdoc_len} mismatches number of frames: {frames_len} or frames metadata: {frames_metadata_len}"

@allure.title("Mdoc: Every mdoc filename has an entry for SubFramePath.")
def test_mdoc_sub_frame_paths(self, mdoc_data: pd.DataFrame):
Expand Down Expand Up @@ -70,7 +71,8 @@ def test_mdoc_frame_paths(
if frames_with_missing_mdoc:
errors.append(f"Frames files do not have mdoc entries: {frames_with_missing_mdoc}")

assert len(errors) == 0, "\n".join(errors)
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title("Mdoc: number of subframes in mdoc matches the number of subframes in the frame file.")
def test_mdoc_numsubframes(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ def test_raw_tilt_mdoc(self, raw_tilt_data: pd.DataFrame, mdoc_data: pd.DataFram
"raw tilt file",
"mdoc file",
)
assert len(errors) == 0, "\n".join(errors)
if len(errors) > 0:
raise AssertionError("\n".join(errors))
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from typing import Any

import allure
import numpy as np
Expand All @@ -9,6 +10,17 @@
TILT_AXIS_ANGLE_REGEX = re.compile(r".*tilt\s*axis\s*angle\s*=\s*([-+]?(?:\d*\.*\d+))")



@pytest.fixture
def mdoc_tilt_axis_angle(mdoc_data: pd.DataFrame) -> float:
# To convert the data from the mdoc into a data frame, all the global records are added to each section's data
titles = mdoc_data["titles"][0]
for title in titles:
if result := re.match(TILT_AXIS_ANGLE_REGEX, title.lower()):
return float(result[1])
pytest.fail("No Tilt axis angle found")


class TiltSeriesHelper(HelperTestMRCZarrHeader):

@pytest.fixture(autouse=True)
Expand All @@ -24,18 +36,9 @@ def tiltseries_metadata_range(self, tiltseries_metadata: dict) -> list[float]:
tiltseries_metadata["tilt_step"],
).tolist()

@pytest.fixture
def mdoc_tilt_axis_angle(self, mdoc_data: pd.DataFrame) -> float:
# To convert the data from the mdoc into a data frame, all the global records are added to each section's data
titles = mdoc_data["titles"][0]
for title in titles:
if result := re.match(TILT_AXIS_ANGLE_REGEX, title.lower()):
return float(result[1])
pytest.fail("No Tilt axis angle found")

@allure.title("Tiltseries: tilt axis angle is consistent with mdoc file.")
def test_tilt_axis_angle(self, mdoc_tilt_axis_angle: float, tiltseries_metadata: dict):
metadata_tilt_axis = tiltseries_metadata.get("tilt_axis")
assert (
metadata_tilt_axis - 10 <= mdoc_tilt_axis_angle <= metadata_tilt_axis + 10
@allure.title("Tiltseries: tilt axis angle in mdoc file matches that in tilt series metadata (+/- 10 deg).")
def test_tilt_axis_angle(self, mdoc_tilt_axis_angle: float, tiltseries_metadata: dict[str, Any]):
metadata_tilt_axis = tiltseries_metadata["tilt_axis"]
assert (abs(
metadata_tilt_axis - mdoc_tilt_axis_angle) <= 10
), f"Tilt axis angle mismatch: MDOC: {mdoc_tilt_axis_angle} vs Metadata: {metadata_tilt_axis}"
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def frames_headers(
"""Get the headers for a list of frame files."""
return helper_util.get_tiff_mrc_headers(frames_files, filesystem)


@pytest.fixture(scope="session")
def frame_metadata(frames_meta_file: str, filesystem: FileSystemApi) -> Dict:
"""Load the frame metadata."""
with filesystem.open(frames_meta_file, "r") as f:
return json.load(f)
# ==================================================================================================
# Gain fixtures
# ==================================================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,16 @@ def frames_files(frames_dir: str, filesystem: FileSystemApi) -> List[str]:
return refined_files


@pytest.fixture(scope="session")
def frames_meta_file(frames_dir: str, filesystem: FileSystemApi) -> str:
"""[Dataset]/[ExperimentRun]/Frames/frames_metadata.json"""
dst = f"{frames_dir}/frames_metadata.json"
if filesystem.exists(dst):
return dst
else:
pytest.fail(f"The frames directory exists, but frames_metadata.json is not found: {dst}")


@pytest.fixture(scope="session")
def mdoc_file(frames_dir: str, filesystem: FileSystemApi) -> str:
"""[Dataset]/[ExperimentRun]/Frames/*.mdoc"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,29 @@
from data_validation.shared.helper.angles_helper import helper_angles_injection_errors


def matrix_to_angle(matrix: list[list[float]]) -> float:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@uermel Could you sanity check if this is the formula we should be using for our use case?

"""
Converts a 2x2 rotation matrix into an angle in degrees.

Args:
matrix: A 2x2 list of floats representing the rotation matrix.

Returns:
The rotation angle in degrees.
"""
# Extract the elements of the matrix
r00, r01 = matrix[0]
r10, r11 = matrix[1]

# Calculate the angle in radians using atan2
angle_radians = np.arctan2(r10, r00)

# Convert the angle to degrees
angle_degrees = np.rad2deg(angle_radians)

return angle_degrees


@pytest.mark.alignment
@pytest.mark.parametrize("dataset, run_name, aln_dir", pytest.cryoet.dataset_run_alignment_combinations, scope="session")
class TestAlignments:
Expand Down Expand Up @@ -52,7 +75,8 @@ def test_tilt_raw_tilt(self, alignment_tilt: pd.DataFrame, alignment_tiltseries_
"tilt file",
"raw tilt file",
)
assert len(errors) == 0, "\n".join(errors)
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title("Alignment: every tilt angle maps to a mdoc tilt angle.")
def test_tilt_mdoc(self, alignment_tilt: pd.DataFrame, mdoc_data: pd.DataFrame):
Expand All @@ -62,7 +86,8 @@ def test_tilt_mdoc(self, alignment_tilt: pd.DataFrame, mdoc_data: pd.DataFrame):
"tilt file",
"mdoc file",
)
assert len(errors) == 0, "\n".join(errors)
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title(
"Raw tilt: number of raw tilt angles are must be equal to tiltseries size['z'] (implied to be the number of frames files).",
Expand All @@ -89,4 +114,18 @@ def test_tilt_tiltseries_range(
+ f"\nRange: {alignment_tiltseries_metadata['tilt_range']['min']} to {alignment_tiltseries_metadata['tilt_range']['max']}, with step {alignment_tiltseries_metadata['tilt_step']}"
)

@allure.title("Alignment: tilt angle in mdoc file matches that in the alignment metadata [per_section_alignment_parameters.in_plane_rotation] (+/- 10 deg)")
def test_mdoc_tilt_axis_angle_in_alignment_per_section_alignment_parameters(self, mdoc_tilt_axis_angle: float, alignment_metadata: dict[str, dict]):
per_section_alignment_parameters = alignment_metadata.get("per_section_alignment_parameters")
if not per_section_alignment_parameters:
pytest.skip("Alignment metadata missing per_section_alignment_parameters.")
# convert all in_plane_rotation angles to degrees and sort them in ascending order
in_plane_rotations = [matrix_to_angle(psap["in_plane_rotation"]) for psap in per_section_alignment_parameters]
# check that all in_plane_rotation angles are equal
assert len(set(in_plane_rotations)) == 1, "in_plane_rotation angles are not all equal."
# check that in_plane_roation against mdoc_tilt_axis_angle
in_plane_rotation = in_plane_rotations[0]
assert in_plane_rotation == pytest.approx(mdoc_tilt_axis_angle, rel=10), f"Mdoc tilt axis angle {mdoc_tilt_axis_angle} does not match alignment metadata['per_section_alignment_parameters'][*]['in_plane_rotation']: {in_plane_rotation}"


### END Tiltseries consistency tests ###
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
from typing import Dict, List, Union

import allure
import pytest
import tifffile
from data_validation.shared.helper.twodee_helper import FrameTestHelper
from mrcfile.mrcinterpreter import MrcInterpreter


@pytest.mark.frame
@pytest.mark.parametrize("dataset, run_name", pytest.cryoet.dataset_run_combinations, scope="session")
class TestFrame(FrameTestHelper):
pass
@allure.title("Frames: When isGainCorrected == False, a Gains entity exists for the run")
def test_is_gain_corrected_false(self,
frame_metadata: Dict,
gain_headers: Dict[str, Union[List[tifffile.TiffPage], MrcInterpreter]], # this is skipped if it is not found
):
if not frame_metadata.get("is_gain_corrected"):
assert len(gain_headers) > 0

@allure.title("Frames: max(acquisitionOrder) <= number of frames -1")
def test_max_acquisition_order(self, frame_metadata: Dict):
acquisition_order_max = max(f.get("acquisition_order", 0) for f in frame_metadata["frames"])
assert acquisition_order_max <= len(frame_metadata["frames"]) - 1

@allure.title("Frames: Sorting acquisitionOrder low-to-high and accumulatedDose low-to-high results in the same order")
def test_sorting_acquisition_order_and_accumulated_dose(self, frame_metadata: Dict):
frames = frame_metadata["frames"]
frames_sorted_by_acquisition_order = sorted(frames, key=lambda f: (f["acquisition_order"]))
frames_sorted_by_accumulated_dose = sorted(frames, key=lambda f: (f["accumulated_dose"]))
assert frames_sorted_by_acquisition_order == frames_sorted_by_accumulated_dose
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import math
import os
from typing import Dict
from typing import Any, Dict

import allure
import numpy as np
import pandas as pd
import pytest
from data_validation.shared.helper.angles_helper import helper_angles_injection_errors
Expand Down Expand Up @@ -135,3 +136,79 @@ def test_mdoc_tiltseries_range(
)

### END metadata-mdoc consistency tests ###

@allure.title("Tiltseries: sum of exposureDose of all frames associated with a tilt series == totalFlux +-1 of tilt series")
def test_exposure_dose(self, frame_metadata: Dict, tiltseries_metadata: Dict):
assert sum(f.get("exposure_dose", 0) for f in frame_metadata["frames"]) == pytest.approx(tiltseries_metadata["total_flux"], abs=1)

@allure.title("PerSectionParameters: number of frames >= # of per section parameters.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The label of what it is testing against like "PerSectionParameters" is very neat! 😄

def test_per_section_parameter_with_num_frames(self, tiltseries_metadata: dict[str, Any], frame_metadata: dict[str, dict]):
num_frames = len(frame_metadata["frames"])
num_per_section_parameters = len(tiltseries_metadata["per_section_parameter"])
assert num_frames >= num_per_section_parameters, f"Number of frames {num_frames} is less than number of per section parameters {num_per_section_parameters}."

@allure.title("PerSectionParameters: -180 <= astigmatic_angle <= 180.")
def test_astigmatic_angle(self, tiltseries_metadata: dict[str, Any]):
errors = []
for i, per_section_parameter in enumerate(tiltseries_metadata["per_section_parameter"]):
astigmatic_angle = per_section_parameter["astigmatic_angle"]
if astigmatic_angle is None:
continue
try:
assert -180 <= astigmatic_angle <= 180
except AssertionError:
errors.append(f"per_section_parameter[{i}].astigmatic_angle= {astigmatic_angle} is out of range [-180, 180].")
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title("PerSectionParameters: 0 <= phaseShift <= 2*pi.")
def test_phase_shift(self, tiltseries_metadata: dict[str, Any]):
errors = []
for i, per_section_parameter in enumerate(tiltseries_metadata["per_section_parameter"]):
phase_shift = per_section_parameter["phase_shift"]
if phase_shift is None:
continue
try:
assert 0 <= phase_shift <= 2 * np.pi
except AssertionError:
errors.append(f"per_section_parameter[{i}].phase_shift= {phase_shift} is out of range [0, 2*pi].")
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title("PerSectionParameters: maxResolution > 0.")
def test_max_resolution(self, tiltseries_metadata: dict[str, Any]):
errors = []
for i, per_section_parameter in enumerate(tiltseries_metadata["per_section_parameter"]):
max_resolution = per_section_parameter["max_resolution"]
if max_resolution is None:
continue
try:
assert max_resolution > 0
except AssertionError:
errors.append(f"per_section_parameter[{i}].max_resolution= {max_resolution} is not greater than 0.")
if len(errors) > 0:
raise AssertionError("\n".join(errors))

@allure.title("PerSectionParameters: rawAngle matches mdoc TiltAngle (+-10^-3 deg).")
def test_raw_angle(self, tiltseries_metadata: dict[str, Any], mdoc_data: pd.DataFrame):
errors = helper_angles_injection_errors(
mdoc_data["TiltAngle"].to_list(),
[psp["raw_angle"] for psp in tiltseries_metadata["per_section_parameter"]],
"mdoc file",
"tiltseries metadata per_section_parameter raw_angle",
angle_tolerance=10 ** -3,
)
if errors:
raise AssertionError("\n".join(errors))

@allure.title("PerSectionParameters: 0 <= zIndex <= (z-Dimension of tilt series - 1).")
def test_z_index(self, tiltseries_metadata: dict[str, Any]):
errors = []
for i, per_section_parameter in enumerate(tiltseries_metadata["per_section_parameter"]):
z_index = per_section_parameter["z_index"]
try:
assert 0 <= z_index <= (tiltseries_metadata["size"]["z"] - 1)
except AssertionError:
errors.append(f"per_section_parameter[{i}].z_index= {z_index} is out of range [0, {tiltseries_metadata['size']['z'] - 1}].")
if len(errors) > 0:
raise AssertionError("\n".join(errors))
Loading