@@ -31,9 +31,6 @@ def load_configuration(init_config_path: Path, survey_year_config_path: Path):
31
31
the Survey class object. This initializes the `config` attribute that
32
32
becomes available for future reference and functions.
33
33
"""
34
- # Validate configuration files
35
- # Retrieve the module directory to begin mapping the configuration file location
36
- # current_directory = os.path.dirname(os.path.abspath(__file__))
37
34
38
35
# Build the full configuration file paths and verify they exist
39
36
config_files = [init_config_path , survey_year_config_path ]
@@ -297,21 +294,29 @@ def read_validated_data(
297
294
298
295
# A single dataframe per entry is expected, so no other fancy operations are needed
299
296
if sheet_name .lower () == "inpfc" :
300
- df_list = [input_dict [sub_attribute ]["inpfc_strata_df" ], df ]
301
- input_dict [sub_attribute ]["inpfc_strata_df" ] = pd .concat (df_list )
297
+ # ---- Create the full key name
298
+ keyname = "inpfc_" + config_map [- 1 ] + "_df"
299
+ # ---- Create DataFrame list
300
+ df_list = [input_dict [sub_attribute ][keyname ], df ]
301
+ # ---- Concatenate/update
302
+ input_dict [sub_attribute ][keyname ] = pd .concat (df_list , ignore_index = True )
302
303
else :
303
304
if config_map [0 ] == "kriging" and config_map [1 ] == "vario_krig_para" :
304
305
df_list = [input_dict [sub_attribute ]["kriging" ][config_map [1 ] + "_df" ], df ]
305
306
input_dict [sub_attribute ]["kriging" ][config_map [1 ] + "_df" ] = pd .concat (
306
- df_list
307
+ df_list ,
308
+ ignore_index = True
307
309
).tail (1 )
308
310
elif config_map [0 ] == "kriging" :
309
311
df_list = [input_dict [sub_attribute ]["kriging" ][config_map [1 ] + "_df" ], df ]
310
- input_dict [sub_attribute ]["kriging" ][config_map [1 ] + "_df" ] = pd .concat (df_list )
312
+ input_dict [sub_attribute ]["kriging" ][config_map [1 ] + "_df" ] = pd .concat (
313
+ df_list ,
314
+ ignore_index = True
315
+ )
311
316
else :
312
317
df_list = [input_dict [sub_attribute ][config_map [1 ] + "_df" ], df ]
313
- input_dict [sub_attribute ][config_map [1 ] + "_df" ] = pd .concat (df_list )
314
- # TODO: This can be refactored out
318
+ input_dict [sub_attribute ][config_map [1 ] + "_df" ] = pd .concat (df_list ,
319
+ ignore_index = True )
315
320
elif sub_attribute == "acoustics" :
316
321
317
322
# Toggle through including and excluding age-1
@@ -539,30 +544,36 @@ def preprocess_spatial(input_dict: dict) -> None:
539
544
"""
540
545
541
546
# Update column names
542
- # ---- `geo_strata`
543
- input_dict ["spatial" ]["geo_strata_df" ].columns = input_dict ["spatial" ][
544
- "geo_strata_df"
545
- ].columns .str .replace (" " , "_" )
546
- # ---- `inpfc_strata`
547
- input_dict ["spatial" ]["inpfc_strata_df" ].columns = input_dict ["spatial" ][
548
- "inpfc_strata_df"
549
- ].columns .str .replace (" " , "_" )
550
- # ---- `inpfc_strata`: rename stratum column name to avoid conflicts
547
+ # ---- INPFC entries
548
+ # -------- `inpfc_strata`: rename stratum column name to avoid conflicts
551
549
input_dict ["spatial" ]["inpfc_strata_df" ].rename (
552
550
columns = {"stratum_num" : "stratum_inpfc" }, inplace = True
553
551
)
552
+ # -------- `inpfc_geo_strata`: rename stratum column name to avoid conflicts
553
+ input_dict ["spatial" ]["inpfc_geo_strata_df" ].rename (
554
+ columns = {"stratum_num" : "stratum_inpfc" }, inplace = True
555
+ )
554
556
555
- # Bin data
556
- # ---- Create latitude intervals to bin the strata
557
- latitude_bins = np . concatenate (
558
- [[ - 90 ],
559
- input_dict [ "spatial" ][ "inpfc_strata_df" ][ "northlimit_latitude" ]. unique (),
560
- [90 ]]
557
+
558
+ # Bin the geo- strata latitudes
559
+ # ---- INPFC
560
+ # -------- Latitude bins
561
+ latitude_bins_inpfc = np . concatenate (
562
+ [[ - 90 ], input_dict [ "spatial" ][ "inpfc_geo_strata_df" ][ "northlimit_latitude" ]. unique (), [90 ]]
561
563
)
562
- # ---- Add categorical intervals
563
- input_dict ["spatial" ]["inpfc_strata_df" ]["latitude_interval" ] = pd .cut (
564
- input_dict ["spatial" ]["inpfc_strata_df" ]["northlimit_latitude" ] * 0.99 ,
565
- latitude_bins ,
564
+ # -------- Add categorical intervals
565
+ input_dict ["spatial" ]["inpfc_geo_strata_df" ]["latitude_interval" ] = pd .cut (
566
+ input_dict ["spatial" ]["inpfc_geo_strata_df" ]["northlimit_latitude" ],
567
+ latitude_bins_inpfc ,
568
+ )
569
+ # ---- KS
570
+ latitude_bins_ks = np .concatenate (
571
+ [[- 90 ], input_dict ["spatial" ]["geo_strata_df" ]["northlimit_latitude" ].unique (), [90 ]]
572
+ )
573
+ # -------- Add categorical intervals
574
+ input_dict ["spatial" ]["geo_strata_df" ]["latitude_interval" ] = pd .cut (
575
+ input_dict ["spatial" ]["geo_strata_df" ]["northlimit_latitude" ],
576
+ latitude_bins_ks ,
566
577
)
567
578
568
579
@@ -580,7 +591,7 @@ def preprocess_acoustic_spatial(input_dict: dict) -> None:
580
591
# ---- Create latitude intervals to bin the strata
581
592
latitude_bins = np .concatenate (
582
593
[[- 90 ],
583
- input_dict ["spatial" ]["inpfc_strata_df " ]["northlimit_latitude" ].unique (),
594
+ input_dict ["spatial" ]["inpfc_geo_strata_df " ]["northlimit_latitude" ].unique (),
584
595
[90 ]]
585
596
)
586
597
# ---- Bin NASC transects into appropriate INPFC strata
@@ -628,41 +639,13 @@ def preprocess_biology_spatial(input_dict: dict) -> None:
628
639
Dictionary corresponding to the `input` attribute belonging to `Survey`-class
629
640
"""
630
641
631
- # Merge haul numbers and spatial information across biological variables
632
- # ---- Create interval key for haul numbers to assign INPFC stratum
633
- haul_bins = np .sort (
634
- np .unique (
635
- np .concatenate (
636
- [
637
- input_dict ["spatial" ]["inpfc_strata_df" ]["haul_start" ] - int (1 ),
638
- input_dict ["spatial" ]["inpfc_strata_df" ]["haul_end" ],
639
- ]
640
- )
641
- )
642
- )
643
- # ---- Quantize the INPFC dataframe hauls based on strata
644
- input_dict ["spatial" ]["inpfc_strata_df" ]["haul_bin" ] = pd .cut (
645
- (
646
- input_dict ["spatial" ]["inpfc_strata_df" ]["haul_start" ]
647
- + input_dict ["spatial" ]["inpfc_strata_df" ]["haul_end" ]
648
- )
649
- / 2 ,
650
- haul_bins ,
651
- )
652
- # ---- Rename `stratum_num` column
653
- input_dict ["spatial" ]["inpfc_strata_df" ].rename (
654
- columns = {"stratum_num" : "stratum_inpfc" }, inplace = True
655
- )
656
- # ---- Set the index to `haul_bins`
657
- inpfc_df = (
658
- input_dict ["spatial" ]["inpfc_strata_df" ].copy ()
659
- .drop_duplicates ("haul_bin" )
660
- .set_index (["haul_bin" ])
661
- )
662
-
663
- # Get the KS-strata
642
+ # Get the KS-strata (indexed by haul)
664
643
strata_df = input_dict ["spatial" ]["strata_df" ].copy ().set_index (["haul_num" ])
665
-
644
+
645
+ # Get the INPFC strata (indexed by haul)
646
+ inpfc_strata_df = input_dict ["spatial" ]["inpfc_strata_df" ].copy ().set_index (["haul_num" ])
647
+
648
+
666
649
# Loop through the KS-strata to map the correct strata values
667
650
for keys , values in input_dict ["biology" ].items ():
668
651
if isinstance (values , pd .DataFrame ) and "haul_num" in values .columns :
@@ -674,26 +657,16 @@ def preprocess_biology_spatial(input_dict: dict) -> None:
674
657
input_dict ["biology" ][keys ]["stratum_num" ] = (
675
658
input_dict ["biology" ][keys ]["stratum_num" ].fillna (0.0 ).astype (int )
676
659
)
677
- # ---- Reset the index
678
- input_dict ["biology" ][keys ].reset_index (inplace = True )
679
- # ---- Bin for `stratum_inpfc`
680
- input_dict ["biology" ][keys ]["haul_bin" ] = pd .cut (
681
- input_dict ["biology" ][keys ]["haul_num" ], haul_bins
682
- )
660
+ # ---- Map the correct `stratum_inpfc` value
661
+ input_dict ["biology" ][keys ]["stratum_inpfc" ] = inpfc_strata_df ["stratum_inpfc" ]
683
662
# ---- NaN mask
684
- nan_mask = input_dict ["biology" ][keys ]['haul_bin' ].isna ()
685
- # # ---- Valid haul bins
663
+ nan_mask = input_dict ["biology" ][keys ]["stratum_inpfc" ].isna ()
664
+ # ---- Valid haul bins
686
665
valid_haul_bins = input_dict ["biology" ][keys ].copy ().loc [~ nan_mask ]
687
- # ---- Set index to `haul_bins`
688
- valid_haul_bins .set_index (["haul_bin" ], inplace = True )
689
- # ---- Merge
690
- valid_haul_bins ["stratum_inpfc" ] = inpfc_df ["stratum_inpfc" ]
691
- # ---- Reset indices
692
- valid_haul_bins .reset_index (inplace = True )
693
- # ---- Drop `haul_bin`
694
- valid_haul_bins .drop (columns = ["haul_bin" ], inplace = True )
666
+ # ---- Change to integer
667
+ valid_haul_bins ["stratum_inpfc" ] = valid_haul_bins ["stratum_inpfc" ].astype (int )
695
668
# ---- Set
696
- input_dict ["biology" ][keys ] = valid_haul_bins
669
+ input_dict ["biology" ][keys ] = valid_haul_bins . reset_index ()
697
670
698
671
699
672
def preprocess_acoustic_biology_spatial (input_dict : dict , configuration_dict : dict ) -> None :
0 commit comments