Skip to content

Commit 7bd02c0

Browse files
committed
Change to imported stratum-haul mapping for INPFC
1 parent 2b3fa08 commit 7bd02c0

File tree

6 files changed

+114
-106
lines changed

6 files changed

+114
-106
lines changed

docs/example_notebooks/example_echopop_workflow.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,9 @@
271271
" * `specimen_df`: aged length and weight measurements\n",
272272
"* `spatial`\n",
273273
" * `strata_df`: the `KS` stratum definitions and fraction of hake for each haul\n",
274-
" * `geo_strata_df`: latitudinal limits of the `KS` strata\n",
275-
" * `inpfc_strata_df`: the `INPFC` stratum definitions and their respective latitudinal limits\n",
274+
" * `inpfc_strata_df`: the `INPFC` stratum definitions and fraction of hake for each haul\n",
275+
" * `geo_strata_df`: latitudinal limits of the `KS` strata and their respective latitudinal limits\n",
276+
" * `inpfc_geo_strata_df`: the `INPFC` stratum definitions and their respective latitudinal limits\n",
276277
"* `statistics`\n",
277278
" * `kriging`\n",
278279
" * `mesh_df`: kriging mesh\n",

docs/implementation/preprocessing_data.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,17 @@ A color-coded schematic that provides a visual overview of how data are loaded a
111111
- `specimen` ➡️ `Survey.input["biology"]["specimen_df"]`
112112
- `Configuration` ➡️ `Survey.input["biology"]`
113113
- `bio_hake_len_bin`/`bio_hake_age_bin` ➡️ `Survey.input["biology"]["distributions]`
114-
- `Kriging` ➡️ `Survey.input["statistics"]`
114+
- `Kriging` ➡️ `Survey.input["statistics"]`
115115
- `mesh` ➡️ `Survey.input["statistics"]["kriging"]["mesh_df"]`
116116
- `isobath_200m` ➡️ `Survey.input["statistics"]["kriging"]["isobath_200m_df"]`
117117
- `vario_krig_para` ➡️ `Survey.input["statistics"]["kriging"]["vario_krig_para"]`
118118
- `NASC` ➡️ `Survey.input["acoustics"]`
119119
- `{GROUP}` (all) ➡️ `Survey.input["acoustics"]["nasc_df"]`
120120
- `Stratification` ➡️ `Survey.input["spatial"]`
121121
- `strata` ➡️: `Survey.input["spatial"]["strata_df"]`
122+
- `strata` ➡️: `Survey.input["spatial"]["inpfc_strata_df"]`
122123
- `geo_strata` ➡️ `Survey.input["spatial"]["geo_strata_df"]`
123-
- `geo_strata` ➡️ `Survey.input["spatial"]["inpfc_strata_df"]`
124+
- `geo_strata` ➡️ `Survey.input["spatial"]["inpfc_geo_strata_df"]`
124125

125126
Echoview exports can be **<span style="color:#6666FF">alternatively processed and loaded</span>** into `Echopop` by incorporating the `nasc_exports` parameters within `initialization_config.yml`. These files can also processed outside of the same `Echopop` workflow whereby the processed exports can then be saved and used to parameterize the `NASC` dataset definiations within the `survey_year_{YEAR}_config.yml` configuration file.
126127

echopop/core.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,9 @@
120120
},
121121
"spatial": {
122122
"strata_df": pd.DataFrame(),
123-
"geo_strata_df": pd.DataFrame(),
124123
"inpfc_strata_df": pd.DataFrame(),
124+
"geo_strata_df": pd.DataFrame(),
125+
"inpfc_geo_strata_df": pd.DataFrame(),
125126
},
126127
"statistics": {
127128
"kriging": {
@@ -182,7 +183,7 @@
182183
},
183184
"stratification": {
184185
"name": "spatial",
185-
"data": ["strata", "geo_strata", "inpfc_strata"],
186+
"data": ["strata", "inpfc_strata", "geo_strata", "inpfc_geo_strata"],
186187
"data_label": [
187188
"strata:KS strata",
188189
"geo_strata:Georeferenced KS strata",

echopop/survey.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,9 @@ def transect_analysis(
260260
"unique_strata": (
261261
np.unique(self.input["spatial"]["strata_df"]["stratum_num"])
262262
if stratum == "ks"
263-
else np.unique(self.input["spatial"]["inpfc_strata_df"]["stratum_inpfc"])
263+
else np.unique(
264+
self.input["spatial"]["inpfc_geo_strata_df"]["stratum_inpfc"]
265+
)
264266
),
265267
"exclude_age1": exclude_age1,
266268
}

echopop/utils/load.py

Lines changed: 53 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ def load_configuration(init_config_path: Path, survey_year_config_path: Path):
3131
the Survey class object. This initializes the `config` attribute that
3232
becomes available for future reference and functions.
3333
"""
34-
# Validate configuration files
35-
# Retrieve the module directory to begin mapping the configuration file location
36-
# current_directory = os.path.dirname(os.path.abspath(__file__))
3734

3835
# Build the full configuration file paths and verify they exist
3936
config_files = [init_config_path, survey_year_config_path]
@@ -297,21 +294,29 @@ def read_validated_data(
297294

298295
# A single dataframe per entry is expected, so no other fancy operations are needed
299296
if sheet_name.lower() == "inpfc":
300-
df_list = [input_dict[sub_attribute]["inpfc_strata_df"], df]
301-
input_dict[sub_attribute]["inpfc_strata_df"] = pd.concat(df_list)
297+
# ---- Create the full key name
298+
keyname = "inpfc_" + config_map[-1] + "_df"
299+
# ---- Create DataFrame list
300+
df_list = [input_dict[sub_attribute][keyname], df]
301+
# ---- Concatenate/update
302+
input_dict[sub_attribute][keyname] = pd.concat(df_list, ignore_index=True)
302303
else:
303304
if config_map[0] == "kriging" and config_map[1] == "vario_krig_para":
304305
df_list = [input_dict[sub_attribute]["kriging"][config_map[1] + "_df"], df]
305306
input_dict[sub_attribute]["kriging"][config_map[1] + "_df"] = pd.concat(
306-
df_list
307+
df_list,
308+
ignore_index=True
307309
).tail(1)
308310
elif config_map[0] == "kriging":
309311
df_list = [input_dict[sub_attribute]["kriging"][config_map[1] + "_df"], df]
310-
input_dict[sub_attribute]["kriging"][config_map[1] + "_df"] = pd.concat(df_list)
312+
input_dict[sub_attribute]["kriging"][config_map[1] + "_df"] = pd.concat(
313+
df_list,
314+
ignore_index=True
315+
)
311316
else:
312317
df_list = [input_dict[sub_attribute][config_map[1] + "_df"], df]
313-
input_dict[sub_attribute][config_map[1] + "_df"] = pd.concat(df_list)
314-
# TODO: This can be refactored out
318+
input_dict[sub_attribute][config_map[1] + "_df"] = pd.concat(df_list,
319+
ignore_index=True)
315320
elif sub_attribute == "acoustics":
316321

317322
# Toggle through including and excluding age-1
@@ -539,30 +544,36 @@ def preprocess_spatial(input_dict: dict) -> None:
539544
"""
540545

541546
# Update column names
542-
# ---- `geo_strata`
543-
input_dict["spatial"]["geo_strata_df"].columns = input_dict["spatial"][
544-
"geo_strata_df"
545-
].columns.str.replace(" ", "_")
546-
# ---- `inpfc_strata`
547-
input_dict["spatial"]["inpfc_strata_df"].columns = input_dict["spatial"][
548-
"inpfc_strata_df"
549-
].columns.str.replace(" ", "_")
550-
# ---- `inpfc_strata`: rename stratum column name to avoid conflicts
547+
# ---- INPFC entries
548+
# -------- `inpfc_strata`: rename stratum column name to avoid conflicts
551549
input_dict["spatial"]["inpfc_strata_df"].rename(
552550
columns={"stratum_num": "stratum_inpfc"}, inplace=True
553551
)
552+
# -------- `inpfc_geo_strata`: rename stratum column name to avoid conflicts
553+
input_dict["spatial"]["inpfc_geo_strata_df"].rename(
554+
columns={"stratum_num": "stratum_inpfc"}, inplace=True
555+
)
554556

555-
# Bin data
556-
# ---- Create latitude intervals to bin the strata
557-
latitude_bins = np.concatenate(
558-
[[-90],
559-
input_dict["spatial"]["inpfc_strata_df"]["northlimit_latitude"].unique(),
560-
[90]]
557+
558+
# Bin the geo-strata latitudes
559+
# ---- INPFC
560+
# -------- Latitude bins
561+
latitude_bins_inpfc = np.concatenate(
562+
[[-90], input_dict["spatial"]["inpfc_geo_strata_df"]["northlimit_latitude"].unique(), [90]]
561563
)
562-
# ---- Add categorical intervals
563-
input_dict["spatial"]["inpfc_strata_df"]["latitude_interval"] = pd.cut(
564-
input_dict["spatial"]["inpfc_strata_df"]["northlimit_latitude"] * 0.99,
565-
latitude_bins,
564+
# -------- Add categorical intervals
565+
input_dict["spatial"]["inpfc_geo_strata_df"]["latitude_interval"] = pd.cut(
566+
input_dict["spatial"]["inpfc_geo_strata_df"]["northlimit_latitude"],
567+
latitude_bins_inpfc,
568+
)
569+
# ---- KS
570+
latitude_bins_ks = np.concatenate(
571+
[[-90], input_dict["spatial"]["geo_strata_df"]["northlimit_latitude"].unique(), [90]]
572+
)
573+
# -------- Add categorical intervals
574+
input_dict["spatial"]["geo_strata_df"]["latitude_interval"] = pd.cut(
575+
input_dict["spatial"]["geo_strata_df"]["northlimit_latitude"],
576+
latitude_bins_ks,
566577
)
567578

568579

@@ -580,7 +591,7 @@ def preprocess_acoustic_spatial(input_dict: dict) -> None:
580591
# ---- Create latitude intervals to bin the strata
581592
latitude_bins = np.concatenate(
582593
[[-90],
583-
input_dict["spatial"]["inpfc_strata_df"]["northlimit_latitude"].unique(),
594+
input_dict["spatial"]["inpfc_geo_strata_df"]["northlimit_latitude"].unique(),
584595
[90]]
585596
)
586597
# ---- Bin NASC transects into appropriate INPFC strata
@@ -628,41 +639,13 @@ def preprocess_biology_spatial(input_dict: dict) -> None:
628639
Dictionary corresponding to the `input` attribute belonging to `Survey`-class
629640
"""
630641

631-
# Merge haul numbers and spatial information across biological variables
632-
# ---- Create interval key for haul numbers to assign INPFC stratum
633-
haul_bins = np.sort(
634-
np.unique(
635-
np.concatenate(
636-
[
637-
input_dict["spatial"]["inpfc_strata_df"]["haul_start"] - int(1),
638-
input_dict["spatial"]["inpfc_strata_df"]["haul_end"],
639-
]
640-
)
641-
)
642-
)
643-
# ---- Quantize the INPFC dataframe hauls based on strata
644-
input_dict["spatial"]["inpfc_strata_df"]["haul_bin"] = pd.cut(
645-
(
646-
input_dict["spatial"]["inpfc_strata_df"]["haul_start"]
647-
+ input_dict["spatial"]["inpfc_strata_df"]["haul_end"]
648-
)
649-
/ 2,
650-
haul_bins,
651-
)
652-
# ---- Rename `stratum_num` column
653-
input_dict["spatial"]["inpfc_strata_df"].rename(
654-
columns={"stratum_num": "stratum_inpfc"}, inplace=True
655-
)
656-
# ---- Set the index to `haul_bins`
657-
inpfc_df = (
658-
input_dict["spatial"]["inpfc_strata_df"].copy()
659-
.drop_duplicates("haul_bin")
660-
.set_index(["haul_bin"])
661-
)
662-
663-
# Get the KS-strata
642+
# Get the KS-strata (indexed by haul)
664643
strata_df = input_dict["spatial"]["strata_df"].copy().set_index(["haul_num"])
665-
644+
645+
# Get the INPFC strata (indexed by haul)
646+
inpfc_strata_df = input_dict["spatial"]["inpfc_strata_df"].copy().set_index(["haul_num"])
647+
648+
666649
# Loop through the KS-strata to map the correct strata values
667650
for keys, values in input_dict["biology"].items():
668651
if isinstance(values, pd.DataFrame) and "haul_num" in values.columns:
@@ -674,26 +657,16 @@ def preprocess_biology_spatial(input_dict: dict) -> None:
674657
input_dict["biology"][keys]["stratum_num"] = (
675658
input_dict["biology"][keys]["stratum_num"].fillna(0.0).astype(int)
676659
)
677-
# ---- Reset the index
678-
input_dict["biology"][keys].reset_index(inplace=True)
679-
# ---- Bin for `stratum_inpfc`
680-
input_dict["biology"][keys]["haul_bin"] = pd.cut(
681-
input_dict["biology"][keys]["haul_num"], haul_bins
682-
)
660+
# ---- Map the correct `stratum_inpfc` value
661+
input_dict["biology"][keys]["stratum_inpfc"] = inpfc_strata_df["stratum_inpfc"]
683662
# ---- NaN mask
684-
nan_mask = input_dict["biology"][keys]['haul_bin'].isna()
685-
# # ---- Valid haul bins
663+
nan_mask = input_dict["biology"][keys]["stratum_inpfc"].isna()
664+
# ---- Valid haul bins
686665
valid_haul_bins = input_dict["biology"][keys].copy().loc[~nan_mask]
687-
# ---- Set index to `haul_bins`
688-
valid_haul_bins.set_index(["haul_bin"], inplace=True)
689-
# ---- Merge
690-
valid_haul_bins["stratum_inpfc"] = inpfc_df["stratum_inpfc"]
691-
# ---- Reset indices
692-
valid_haul_bins.reset_index(inplace=True)
693-
# ---- Drop `haul_bin`
694-
valid_haul_bins.drop(columns=["haul_bin"], inplace=True)
666+
# ---- Change to integer
667+
valid_haul_bins["stratum_inpfc"] = valid_haul_bins["stratum_inpfc"].astype(int)
695668
# ---- Set
696-
input_dict["biology"][keys] = valid_haul_bins
669+
input_dict["biology"][keys] = valid_haul_bins.reset_index()
697670

698671

699672
def preprocess_acoustic_biology_spatial(input_dict: dict, configuration_dict: dict) -> None:

0 commit comments

Comments
 (0)