Skip to content

Commit 7d7461a

Browse files
committed
Enable 2011 transect interval filter file ingestion
1 parent 2695e16 commit 7d7461a

File tree

5 files changed

+79
-2
lines changed

5 files changed

+79
-2
lines changed

config_files/survey_year_2011_config.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ NASC:
6767
# file that includes all ages
6868
filename: Exports/US_CAN_NASC_2011_table_all_ages.xlsx
6969
sheetname: Sheet1
70+
transect_filter:
71+
# Transect interval filtering
72+
filename: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/echopop_2011/Kriging_files/Kriging_grid_files/Transect Bounds to 2011.xlsx
73+
sheetname: "1995-2011"
7074
export_regions:
7175
filename: Stratification/US&CAN_T_reg_haul_final.csv
7276
kriging:

echopop/compatibility_parameters_test.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@
157157
"ks"
158158
],
159159
"write_transect_region_file": false,
160-
"transect_replicates": 10
160+
"transect_replicates": 10,
161+
"transect_filter": {
162+
"subset_filter": "survey == 201103"
163+
}
161164
}
162165
}

echopop/spatial/transect.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import List, Union
1+
from typing import List, Optional, TYPE_CHECKING, Union
2+
import re
23

34
import geopandas as gpd
45
import geopy.distance
@@ -7,6 +8,8 @@
78
from shapely.geometry import Point, Polygon
89
from shapely.ops import unary_union
910

11+
if TYPE_CHECKING: from ..survey import Survey
12+
1013
from ..spatial.projection import wgs84_to_utm
1114

1215

@@ -131,6 +134,64 @@ def edit_transect_columns(transect_dict: dict, settings_dict: dict):
131134
# Return the output
132135
return transect_info.reset_index()
133136

137+
def filter_transect_intervals(
138+
survey_obj: "Survey",
139+
subset_filter: Optional[str] = None,
140+
) -> pd.DataFrame:
141+
"""
142+
Filter transect intervals based on a subset filter.
143+
"""
144+
145+
# Get configuration settings
146+
transect_filter_settings = survey_obj.config["transect_filter"]
147+
148+
# Read in file
149+
transect_filter_df = pd.read_excel(transect_filter_settings["filename"],
150+
sheet_name=transect_filter_settings["sheetname"])
151+
152+
# Lowercase column names
153+
transect_filter_df.columns = transect_filter_df.columns.str.lower()
154+
# ---- Apply hard-coded renaming
155+
transect_filter_df.rename(columns={"transect": "transect_num"}, inplace=True)
156+
157+
# Get the validated input data
158+
df = survey_obj.input["acoustics"]["nasc_df"].copy()
159+
# ---- Retain original column
160+
df_columns = df.columns.tolist()
161+
162+
# Apply a filter, if needed
163+
if subset_filter is not None:
164+
# ---- Extract tokens from string
165+
tokens = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', subset_filter)
166+
# ---- Provide typical Python operator keywords
167+
keywords = {"and", "or", "not", "in", "notin", "True", "False"}
168+
# ---- Check for column names
169+
column_names = [t for t in tokens if t not in keywords and not t.isnumeric()]
170+
# ---- Check if all referenced columns exist
171+
missing = [col for col in column_names if col not in transect_filter_df.columns]
172+
# ---- Raise error, if needed
173+
if missing:
174+
raise ValueError(f"Invalid column(s): {', '.join(missing)}")
175+
# ---- Apply filter
176+
else:
177+
transect_filter_df = transect_filter_df.copy().query(subset_filter)
178+
179+
# Perform a cross join to pair each row in `df` with every row in `fi`
180+
df_expanded = df.merge(transect_filter_df[["transect_num", "log_start", "log_end"]],
181+
on="transect_num",
182+
how="left")
183+
184+
# Check for overlap
185+
mask = (
186+
(df_expanded["distance_e"] >= df_expanded["log_start"]) &
187+
(df_expanded["distance_s"] <= df_expanded["log_end"])
188+
)
189+
190+
# Apply mask to original dataset
191+
survey_obj.input["acoustics"]["nasc_df"] = (
192+
df_expanded[mask].filter(df_columns).reset_index(drop=True)
193+
)
194+
134195

135196
def transect_spatial_features(transect_data: pd.DataFrame):
136197
"""

echopop/test_survey.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import glob
1818
import json
1919
import os
20+
import sys
2021

2122
####################################################################################################
2223
# CURRENT SURVEY YEAR BEING TESTED: 2019
@@ -54,6 +55,13 @@
5455
survey.load_acoustic_data()
5556
else:
5657
Survey(init_config_path, survey_year_config_path).load_acoustic_data()
58+
# ---- Test whether transect data must be subset
59+
if "transect_filter" in parameters:
60+
# ---- Conditional import
61+
if "filter_transect_intervals" not in dir():
62+
from echopop.spatial.transect import filter_transect_intervals
63+
# ---- Filter transect intervals
64+
filter_transect_intervals(survey, parameters["transect_filter"]["subset_filter"])
5765
# ---- Load survey data
5866
survey.load_survey_data()
5967
# ---- Initial transect analysis test

echopop/utils/validate_dict.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ class CONFIG_DATA_MODEL(InputModel):
565565
data_root_dir: Optional[str] = None
566566
CAN_haul_offset: Optional[int] = None
567567
ship_id: Optional[Union[int, str, float]] = None
568+
transect_filter: Optional[XLSXFile] = None
568569
export_regions: Optional[Union[Union[CSVFile, XLSXFile],
569570
Dict[str, Union[CSVFile, XLSXFile]]]] = None
570571

0 commit comments

Comments
 (0)