Changes to address odd 2012 virtual transect issue

brandynlucca · brandynlucca · commit 3dec49037c68 · 2024-12-13T14:27:53.000-08:00
diff --git a/config_files/initialization_config_2012.yml b/config_files/initialization_config_2012.yml
@@ -45,8 +45,7 @@
     file_columns: [transect_num, region_id, vessel_log_start, vessel_log_end, latitude, longitude,
                     stratum_num, transect_spacing, layer_mean_depth, layer_height, bottom_depth,
                     NASC, haul_num]
-  transect_region_mapping:
-    pattern: "{REGION_CLASS}{HAUL_NUM}{COUNTRY}"
+  transect_region_mapping:    
     parts:
       REGION_CLASS:
         - pattern: ^[hH](?![a-zA-Z]|1a)
@@ -69,6 +68,13 @@
           label: CAN
         - pattern: ^[uU]
           label: US
+    pattern: "{REGION_CLASS}{HAUL_NUM}{COUNTRY}"
+    inpfc_strata_region:
+      CAN: [6]
+      US: [1, 2, 3, 4, 5]
+    save_file_template: "{COUNTRY}_{YEAR}_transect_region_haul_{GROUP}.xlsx"      
+    save_file_directory: /Stratification
+    save_file_sheetname: Sheet1
 
 
   #####################################################################################################################
diff --git a/config_files/survey_year_2012_config.yml b/config_files/survey_year_2012_config.yml
@@ -67,6 +67,13 @@ NASC:
     # file that includes all ages
     filename: Exports/US_CAN_NASC_2012_table_all_ages.xlsx
     sheetname: Sheet1
+export_regions:
+  all_ages:
+    filename: Stratification/US&CAN_2012_transect_region_haul_age1+ auto final_new.xlsx
+    sheetname: Sheet1
+  no_age1:
+    filename: Stratification/US&CAN_2012_transect_region_haul_age2+ auto final_new.xlsx
+    sheetname: Sheet1
 kriging:
   mesh:
     filename: Kriging_files/Kriging_grid_files/krig_grid2_5nm_cut_centroids_2013.xlsx
diff --git a/echopop/statistics.py b/echopop/statistics.py
@@ -46,31 +46,46 @@ def stratified_transect_statistic(
     stratum_col = settings_dict["stratum_name"]
     # ---- Get the variable name
     var_name = settings_dict["variable"]
+    # ---- Create copy of transect data
+    transect_copy = transect_data.copy()
+    # ---- Create transect summary copy
+    summary_copy = transect_summary.copy()
+    # ---- Create strata summary copy
+    strata_copy = strata_summary.copy()
 
     # Get indexed transect distance
-    transect_distances = transect_summary.set_index([stratum_col, 
-                                                     "transect_num"])["transect_distance"]
+    transect_distances = summary_copy.set_index([stratum_col, 
+                                                 "transect_num"])["transect_distance"]
     # ---- Drop any transects where distance is 0.0 (i.e. from a single mesh node)
     if np.any(transect_distances == 0.0):
         # ---- Pick out transects where distance = 0.0 nmi
-        zero_distances = transect_distances[transect_distances == 0.0].index.to_numpy()
+        zero_distances = transect_distances[transect_distances == 0.0]
         # ---- Update `transect_distances`
         transect_distances = transect_distances[transect_distances > 0.0]
+        # ---- Set identical index 
+        transect_copy.set_index([stratum_col, "transect_num"], inplace=True)
         # ---- Update `transect_data`
-        transect_data = transect_data[~transect_data["transect_num"].isin(zero_distances)]
+        transect_copy.drop(zero_distances.index, inplace=True)
+        # ---- Reset
+        transect_copy.reset_index(inplace=True)
+        # ---- Set identical index
+        summary_copy.set_index([stratum_col, "transect_num"], inplace=True)
         # ---- Get the 'poor' transect strata
         zero_distances_strata = (
-            transect_summary.loc[zero_distances].groupby([stratum_col], observed=False).size()
+            summary_copy.loc[zero_distances.index]
+            .reset_index().groupby([stratum_col], observed=False).size()
         )
         # ---- Update `transect_summary`
-        transect_summary = transect_summary[~transect_summary["transect_num"].isin(zero_distances)]
+        summary_copy.drop(zero_distances.index, inplace=True)
+        # ---- Reset
+        summary_copy.reset_index(inplace=True)
         # ---- Update `strata_summary`
         # -------- Set index
-        strata_summary.set_index([stratum_col], inplace=True)
+        strata_copy.set_index([stratum_col], inplace=True) 
         # -------- Subtract the 'poor' transects from the total transect counts
-        strata_summary["transect_count"] = strata_summary["transect_count"] - zero_distances_strata
+        strata_copy["transect_count"] = strata_copy["transect_count"] - zero_distances_strata
         # -------- Reset index
-        strata_summary.reset_index(inplace=True)
+        strata_copy.reset_index(inplace=True)
 
         if settings_dict["verbose"]:
             if settings_dict["dataset"] == "kriging":
@@ -92,7 +107,7 @@ def stratified_transect_statistic(
 
     # Calculate the number of transects per stratum
     num_transects_to_sample = np.round(
-        strata_summary.set_index(stratum_col)["transect_count"] * transect_sample
+        strata_copy.set_index(stratum_col)["transect_count"] * transect_sample
     ).astype(int)
 
     # Offset term used for later variance calculation
@@ -102,25 +117,25 @@ def stratified_transect_statistic(
     sample_dof = num_transects_to_sample * (num_transects_to_sample - sample_offset)
 
     # Transect areas
-    transect_areas = transect_summary.groupby([stratum_col, "transect_num"], 
-                                              observed=False)["transect_area"].sum()
+    transect_areas = summary_copy.groupby([stratum_col, "transect_num"], 
+                                          observed=False)["transect_area"].sum()
 
     # Get indexed total transect area
-    total_transect_area = strata_summary.set_index(stratum_col)["transect_area_total"]
+    total_transect_area = strata_copy.set_index(stratum_col)["transect_area_total"]
 
     # Get indexed biological value
-    biological_values = transect_data.groupby([stratum_col, "transect_num"],
+    biological_values = transect_copy.groupby([stratum_col, "transect_num"],
                                               observed=False)[var_name].sum()
 
     # Get indexed transect numbers
-    transect_numbers = transect_summary.set_index(stratum_col)["transect_num"]
+    transect_numbers = summary_copy.set_index(stratum_col)["transect_num"]
 
     # Calculate the summed/mean density per transect
     # ---- Set temporary index
-    transect_summary.set_index([stratum_col, "transect_num"], inplace=True)
+    summary_copy.set_index([stratum_col, "transect_num"], inplace=True)
     # ---- Compute summed/mean density
-    transect_summary["density"] = transect_data.groupby([stratum_col, "transect_num"],
-                                                        observed=False)[
+    summary_copy["density"] = transect_copy.groupby([stratum_col, "transect_num"],
+                                                    observed=False)[
         settings_dict["variable"]
     ].sum()
 
@@ -204,17 +219,17 @@ def stratified_transect_statistic(
     total_area = area_array.sum()
     
     # Reset index for `transect_summary`
-    transect_summary.reset_index(inplace=True)
+    summary_copy.reset_index(inplace=True)
 
     # Compute the "population" (i.e. original data) statistics
     # This is necessary for constructing the bootstrapped confidence intervals
     # ---- Mean density
     if settings_dict["variable"] == "nasc":
         # ---- Compute sum per transect line first
-        line_density = transect_data.groupby([stratum_col, 
+        line_density = transect_copy.groupby([stratum_col, 
                                               "transect_num"])[var_name].sum().to_frame()
         # ---- Create copy of `transect_summary` and set index
-        line_length = transect_summary.copy().set_index([stratum_col, "transect_num"])
+        line_length = summary_copy.copy().set_index([stratum_col, "transect_num"])
         # ---- Add stratum
         line_density[stratum_col] = line_length[stratum_col]
         # ---- Convert to the density
@@ -229,10 +244,10 @@ def stratified_transect_statistic(
         survey_density_mean = stratum_density_means.mean()
     else:
         # ---- Get density column name
-        density_name = [col for col in transect_data.columns if "_density" in col]
+        density_name = [col for col in transect_copy.columns if "_density" in col]
         # ---- Calculate mean per stratum
         stratum_density_means = (
-            transect_data.groupby([stratum_col], observed=False)[density_name]
+            transect_copy.groupby([stratum_col], observed=False)[density_name]
             .mean()
             .to_numpy()
             .flatten()
@@ -241,7 +256,7 @@ def stratified_transect_statistic(
         survey_density_mean = stratum_density_means.mean()
     # ---- Total
     # -------- By stratum
-    stratum_total = transect_data.groupby([stratum_col], observed=False)[var_name].sum().to_numpy()
+    stratum_total = transect_copy.groupby([stratum_col], observed=False)[var_name].sum().to_numpy()
     # -------- By survey
     survey_total = stratum_total.sum()
     # ---- Compute the stratum total proportions relative to survey sum
@@ -305,7 +320,7 @@ def stratified_transect_statistic(
     stratified_results = {
         "variable": settings_dict["variable"],
         "ci_percentile": 0.95,
-        "num_transects": strata_summary["transect_count"].sum(),
+        "num_transects": strata_copy["transect_count"].sum(),
         "stratum_area": area_array,
         "total_area": total_area,
         "estimate": {
diff --git a/echopop/test_survey.py b/echopop/test_survey.py
@@ -22,7 +22,7 @@
 # CURRENT SURVEY YEAR BEING TESTED: 2019
 ####################################################################################################
 # Define current survey year
-SURVEY_YEAR = 2013
+SURVEY_YEAR = 2012
 
 # Initialization configuration
 init_config_path = f"C:/Users/Brandyn/Documents/GitHub/echopop/config_files/initialization_\