Skip to content

Commit 3dec490

Browse files
committed
Changes to address odd 2012 virtual transect issue
1 parent 189b886 commit 3dec490

File tree

4 files changed

+56
-28
lines changed

4 files changed

+56
-28
lines changed

config_files/initialization_config_2012.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
file_columns: [transect_num, region_id, vessel_log_start, vessel_log_end, latitude, longitude,
4646
stratum_num, transect_spacing, layer_mean_depth, layer_height, bottom_depth,
4747
NASC, haul_num]
48-
transect_region_mapping:
49-
pattern: "{REGION_CLASS}{HAUL_NUM}{COUNTRY}"
48+
transect_region_mapping:
5049
parts:
5150
REGION_CLASS:
5251
- pattern: ^[hH](?![a-zA-Z]|1a)
@@ -69,6 +68,13 @@
6968
label: CAN
7069
- pattern: ^[uU]
7170
label: US
71+
pattern: "{REGION_CLASS}{HAUL_NUM}{COUNTRY}"
72+
inpfc_strata_region:
73+
CAN: [6]
74+
US: [1, 2, 3, 4, 5]
75+
save_file_template: "{COUNTRY}_{YEAR}_transect_region_haul_{GROUP}.xlsx"
76+
save_file_directory: /Stratification
77+
save_file_sheetname: Sheet1
7278

7379

7480
#####################################################################################################################

config_files/survey_year_2012_config.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ NASC:
6767
# file that includes all ages
6868
filename: Exports/US_CAN_NASC_2012_table_all_ages.xlsx
6969
sheetname: Sheet1
70+
export_regions:
71+
all_ages:
72+
filename: Stratification/US&CAN_2012_transect_region_haul_age1+ auto final_new.xlsx
73+
sheetname: Sheet1
74+
no_age1:
75+
filename: Stratification/US&CAN_2012_transect_region_haul_age2+ auto final_new.xlsx
76+
sheetname: Sheet1
7077
kriging:
7178
mesh:
7279
filename: Kriging_files/Kriging_grid_files/krig_grid2_5nm_cut_centroids_2013.xlsx

echopop/statistics.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -46,31 +46,46 @@ def stratified_transect_statistic(
4646
stratum_col = settings_dict["stratum_name"]
4747
# ---- Get the variable name
4848
var_name = settings_dict["variable"]
49+
# ---- Create copy of transect data
50+
transect_copy = transect_data.copy()
51+
# ---- Create transect summary copy
52+
summary_copy = transect_summary.copy()
53+
# ---- Create strata summary copy
54+
strata_copy = strata_summary.copy()
4955

5056
# Get indexed transect distance
51-
transect_distances = transect_summary.set_index([stratum_col,
52-
"transect_num"])["transect_distance"]
57+
transect_distances = summary_copy.set_index([stratum_col,
58+
"transect_num"])["transect_distance"]
5359
# ---- Drop any transects where distance is 0.0 (i.e. from a single mesh node)
5460
if np.any(transect_distances == 0.0):
5561
# ---- Pick out transects where distance = 0.0 nmi
56-
zero_distances = transect_distances[transect_distances == 0.0].index.to_numpy()
62+
zero_distances = transect_distances[transect_distances == 0.0]
5763
# ---- Update `transect_distances`
5864
transect_distances = transect_distances[transect_distances > 0.0]
65+
# ---- Set identical index
66+
transect_copy.set_index([stratum_col, "transect_num"], inplace=True)
5967
# ---- Update `transect_data`
60-
transect_data = transect_data[~transect_data["transect_num"].isin(zero_distances)]
68+
transect_copy.drop(zero_distances.index, inplace=True)
69+
# ---- Reset
70+
transect_copy.reset_index(inplace=True)
71+
# ---- Set identical index
72+
summary_copy.set_index([stratum_col, "transect_num"], inplace=True)
6173
# ---- Get the 'poor' transect strata
6274
zero_distances_strata = (
63-
transect_summary.loc[zero_distances].groupby([stratum_col], observed=False).size()
75+
summary_copy.loc[zero_distances.index]
76+
.reset_index().groupby([stratum_col], observed=False).size()
6477
)
6578
# ---- Update `transect_summary`
66-
transect_summary = transect_summary[~transect_summary["transect_num"].isin(zero_distances)]
79+
summary_copy.drop(zero_distances.index, inplace=True)
80+
# ---- Reset
81+
summary_copy.reset_index(inplace=True)
6782
# ---- Update `strata_summary`
6883
# -------- Set index
69-
strata_summary.set_index([stratum_col], inplace=True)
84+
strata_copy.set_index([stratum_col], inplace=True)
7085
# -------- Subtract the 'poor' transects from the total transect counts
71-
strata_summary["transect_count"] = strata_summary["transect_count"] - zero_distances_strata
86+
strata_copy["transect_count"] = strata_copy["transect_count"] - zero_distances_strata
7287
# -------- Reset index
73-
strata_summary.reset_index(inplace=True)
88+
strata_copy.reset_index(inplace=True)
7489

7590
if settings_dict["verbose"]:
7691
if settings_dict["dataset"] == "kriging":
@@ -92,7 +107,7 @@ def stratified_transect_statistic(
92107

93108
# Calculate the number of transects per stratum
94109
num_transects_to_sample = np.round(
95-
strata_summary.set_index(stratum_col)["transect_count"] * transect_sample
110+
strata_copy.set_index(stratum_col)["transect_count"] * transect_sample
96111
).astype(int)
97112

98113
# Offset term used for later variance calculation
@@ -102,25 +117,25 @@ def stratified_transect_statistic(
102117
sample_dof = num_transects_to_sample * (num_transects_to_sample - sample_offset)
103118

104119
# Transect areas
105-
transect_areas = transect_summary.groupby([stratum_col, "transect_num"],
106-
observed=False)["transect_area"].sum()
120+
transect_areas = summary_copy.groupby([stratum_col, "transect_num"],
121+
observed=False)["transect_area"].sum()
107122

108123
# Get indexed total transect area
109-
total_transect_area = strata_summary.set_index(stratum_col)["transect_area_total"]
124+
total_transect_area = strata_copy.set_index(stratum_col)["transect_area_total"]
110125

111126
# Get indexed biological value
112-
biological_values = transect_data.groupby([stratum_col, "transect_num"],
127+
biological_values = transect_copy.groupby([stratum_col, "transect_num"],
113128
observed=False)[var_name].sum()
114129

115130
# Get indexed transect numbers
116-
transect_numbers = transect_summary.set_index(stratum_col)["transect_num"]
131+
transect_numbers = summary_copy.set_index(stratum_col)["transect_num"]
117132

118133
# Calculate the summed/mean density per transect
119134
# ---- Set temporary index
120-
transect_summary.set_index([stratum_col, "transect_num"], inplace=True)
135+
summary_copy.set_index([stratum_col, "transect_num"], inplace=True)
121136
# ---- Compute summed/mean density
122-
transect_summary["density"] = transect_data.groupby([stratum_col, "transect_num"],
123-
observed=False)[
137+
summary_copy["density"] = transect_copy.groupby([stratum_col, "transect_num"],
138+
observed=False)[
124139
settings_dict["variable"]
125140
].sum()
126141

@@ -204,17 +219,17 @@ def stratified_transect_statistic(
204219
total_area = area_array.sum()
205220

206221
# Reset index for `transect_summary`
207-
transect_summary.reset_index(inplace=True)
222+
summary_copy.reset_index(inplace=True)
208223

209224
# Compute the "population" (i.e. original data) statistics
210225
# This is necessary for constructing the bootstrapped confidence intervals
211226
# ---- Mean density
212227
if settings_dict["variable"] == "nasc":
213228
# ---- Compute sum per transect line first
214-
line_density = transect_data.groupby([stratum_col,
229+
line_density = transect_copy.groupby([stratum_col,
215230
"transect_num"])[var_name].sum().to_frame()
216231
# ---- Create copy of `transect_summary` and set index
217-
line_length = transect_summary.copy().set_index([stratum_col, "transect_num"])
232+
line_length = summary_copy.copy().set_index([stratum_col, "transect_num"])
218233
# ---- Add stratum
219234
line_density[stratum_col] = line_length[stratum_col]
220235
# ---- Convert to the density
@@ -229,10 +244,10 @@ def stratified_transect_statistic(
229244
survey_density_mean = stratum_density_means.mean()
230245
else:
231246
# ---- Get density column name
232-
density_name = [col for col in transect_data.columns if "_density" in col]
247+
density_name = [col for col in transect_copy.columns if "_density" in col]
233248
# ---- Calculate mean per stratum
234249
stratum_density_means = (
235-
transect_data.groupby([stratum_col], observed=False)[density_name]
250+
transect_copy.groupby([stratum_col], observed=False)[density_name]
236251
.mean()
237252
.to_numpy()
238253
.flatten()
@@ -241,7 +256,7 @@ def stratified_transect_statistic(
241256
survey_density_mean = stratum_density_means.mean()
242257
# ---- Total
243258
# -------- By stratum
244-
stratum_total = transect_data.groupby([stratum_col], observed=False)[var_name].sum().to_numpy()
259+
stratum_total = transect_copy.groupby([stratum_col], observed=False)[var_name].sum().to_numpy()
245260
# -------- By survey
246261
survey_total = stratum_total.sum()
247262
# ---- Compute the stratum total proportions relative to survey sum
@@ -305,7 +320,7 @@ def stratified_transect_statistic(
305320
stratified_results = {
306321
"variable": settings_dict["variable"],
307322
"ci_percentile": 0.95,
308-
"num_transects": strata_summary["transect_count"].sum(),
323+
"num_transects": strata_copy["transect_count"].sum(),
309324
"stratum_area": area_array,
310325
"total_area": total_area,
311326
"estimate": {

echopop/test_survey.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
# CURRENT SURVEY YEAR BEING TESTED: 2019
2323
####################################################################################################
2424
# Define current survey year
25-
SURVEY_YEAR = 2013
25+
SURVEY_YEAR = 2012
2626

2727
# Initialization configuration
2828
init_config_path = f"C:/Users/Brandyn/Documents/GitHub/echopop/config_files/initialization_\

0 commit comments

Comments
 (0)