Skip to content

Commit b802e8c

Browse files
emiliompre-commit-ci[bot]leewujung
authored
Move preprocess functions and tests new clean and commongrid subpackages (#993)
* Move noise functions and tests from preprocess to new filter subpackage * Fix docstring for new filter/api.py module * Add missing estimate_noise to filter __init__ * Move remaining preprocess modules and test to commongrid; update all references to preprocess * Update provenance attribute strings to replace preprocess with commongrid or filter * Create stub preprocess subpackage so function calls still work, with a deprecation warning * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename new 'filter' subpackage to 'clean', to avoid Python reserved word * Update echopype/clean/api.py Co-authored-by: Wu-Jung Lee <[email protected]> * Update .ci_helpers/run-test.py * Update echopype/commongrid/api.py Co-authored-by: Wu-Jung Lee <[email protected]> * Update echopype/__init__.py Sorting __all__ strings in alphabetical order --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Wu-Jung Lee <[email protected]>
1 parent 087e1bb commit b802e8c

File tree

15 files changed

+447
-388
lines changed

15 files changed

+447
-388
lines changed

.ci_helpers/run-test.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,17 @@
2525

2626
MODULES_TO_TEST = {
2727
"root": {}, # This is to test the root folder.
28-
"convert": {},
2928
"calibrate": {},
29+
"clean": {},
30+
"commongrid": {},
31+
"consolidate": {},
32+
"convert": {},
3033
"echodata": {},
34+
"mask": {},
35+
"metrics": {},
3136
"preprocess": {},
3237
"utils": {},
3338
"visualize": {},
34-
"metrics": {},
35-
"mask": {},
36-
"consolidate": {},
3739
}
3840

3941
if __name__ == "__main__":

echopype/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from _echopype_version import version as __version__ # noqa
44

5-
from . import calibrate, consolidate, mask, preprocess, utils
5+
from . import calibrate, clean, commongrid, consolidate, mask, preprocess, utils
66
from .convert.api import open_raw
77
from .echodata.api import open_converted
88
from .echodata.combine import combine_echodata
@@ -15,12 +15,14 @@
1515
init_ep_dir()
1616

1717
__all__ = [
18-
"open_raw",
19-
"open_converted",
20-
"combine_echodata",
2118
"calibrate",
19+
"clean",
20+
"combine_echodata",
21+
"commongrid",
2222
"consolidate",
2323
"mask",
24+
"open_converted",
25+
"open_raw",
2426
"preprocess",
2527
"utils",
2628
"verbose",

echopype/clean/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .api import estimate_noise, remove_noise
2+
3+
__all__ = [
4+
"estimate_noise",
5+
"remove_noise",
6+
]

echopype/clean/api.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Functions for reducing variabilities in backscatter data.
3+
"""
4+
5+
from ..utils.prov import echopype_prov_attrs
6+
from .noise_est import NoiseEst
7+
8+
9+
def estimate_noise(ds_Sv, ping_num, range_sample_num, noise_max=None):
10+
"""
11+
Estimate background noise by computing mean calibrated power of a collection of pings.
12+
13+
See ``remove_noise`` for reference.
14+
15+
Parameters
16+
----------
17+
ds_Sv : xr.Dataset
18+
dataset containing ``Sv`` and ``echo_range`` [m]
19+
ping_num : int
20+
number of pings to obtain noise estimates
21+
range_sample_num : int
22+
number of samples along the ``range_sample`` dimension to obtain noise estimates
23+
noise_max : float
24+
the upper limit for background noise expected under the operating conditions
25+
26+
Returns
27+
-------
28+
A DataArray containing noise estimated from the input ``ds_Sv``
29+
"""
30+
noise_obj = NoiseEst(ds_Sv=ds_Sv.copy(), ping_num=ping_num, range_sample_num=range_sample_num)
31+
noise_obj.estimate_noise(noise_max=noise_max)
32+
return noise_obj.Sv_noise
33+
34+
35+
def remove_noise(ds_Sv, ping_num, range_sample_num, noise_max=None, SNR_threshold=3):
36+
"""
37+
Remove noise by using estimates of background noise
38+
from mean calibrated power of a collection of pings.
39+
40+
Reference: De Robertis & Higginbottom. 2007.
41+
A post-processing technique to estimate the signal-to-noise ratio
42+
and remove echosounder background noise.
43+
ICES Journal of Marine Sciences 64(6): 1282–1291.
44+
45+
Parameters
46+
----------
47+
ds_Sv : xr.Dataset
48+
dataset containing ``Sv`` and ``echo_range`` [m]
49+
ping_num : int
50+
number of pings to obtain noise estimates
51+
range_sample_num : int
52+
number of samples along the ``range_sample`` dimension to obtain noise estimates
53+
noise_max : float
54+
the upper limit for background noise expected under the operating conditions
55+
SNR_threshold : float
56+
acceptable signal-to-noise ratio, default to 3 dB
57+
58+
Returns
59+
-------
60+
The input dataset with additional variables, including
61+
the corrected Sv (``Sv_corrected``) and the noise estimates (``Sv_noise``)
62+
"""
63+
noise_obj = NoiseEst(ds_Sv=ds_Sv.copy(), ping_num=ping_num, range_sample_num=range_sample_num)
64+
noise_obj.remove_noise(noise_max=noise_max, SNR_threshold=SNR_threshold)
65+
ds_Sv = noise_obj.ds_Sv
66+
67+
prov_dict = echopype_prov_attrs(process_type="processing")
68+
prov_dict["processing_function"] = "clean.remove_noise"
69+
ds_Sv = ds_Sv.assign_attrs(prov_dict)
70+
71+
return ds_Sv
File renamed without changes.

echopype/commongrid/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .api import compute_MVBS, compute_MVBS_index_binning
2+
3+
__all__ = [
4+
"compute_MVBS",
5+
"compute_MVBS_index_binning",
6+
]

echopype/commongrid/api.py

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
"""
2+
Functions for enhancing the spatial and temporal coherence of data.
3+
"""
4+
5+
import numpy as np
6+
import pandas as pd
7+
import xarray as xr
8+
9+
from ..utils.prov import echopype_prov_attrs
10+
from .mvbs import get_MVBS_along_channels
11+
12+
13+
def _set_MVBS_attrs(ds):
14+
"""
15+
Attach common attributes.
16+
17+
Parameters
18+
----------
19+
ds : xr.Dataset
20+
dataset containing MVBS
21+
"""
22+
ds["ping_time"].attrs = {
23+
"long_name": "Ping time",
24+
"standard_name": "time",
25+
"axis": "T",
26+
}
27+
28+
ds["Sv"].attrs = {
29+
"long_name": "Mean volume backscattering strength (MVBS, mean Sv re 1 m-1)",
30+
"units": "dB",
31+
"actual_range": [
32+
round(float(ds["Sv"].min().values), 2),
33+
round(float(ds["Sv"].max().values), 2),
34+
],
35+
}
36+
37+
38+
def compute_MVBS(ds_Sv, range_meter_bin=20, ping_time_bin="20S"):
39+
"""
40+
Compute Mean Volume Backscattering Strength (MVBS)
41+
based on intervals of range (``echo_range``) and ``ping_time`` specified in physical units.
42+
43+
Output of this function differs from that of ``compute_MVBS_index_binning``, which computes
44+
bin-averaged Sv according to intervals of ``echo_range`` and ``ping_time`` specified as
45+
index number.
46+
47+
Parameters
48+
----------
49+
ds_Sv : xr.Dataset
50+
dataset containing Sv and ``echo_range`` [m]
51+
range_meter_bin : Union[int, float]
52+
bin size along ``echo_range`` in meters, default to ``20``
53+
ping_time_bin : str
54+
bin size along ``ping_time``, default to ``20S``
55+
56+
Returns
57+
-------
58+
A dataset containing bin-averaged Sv
59+
"""
60+
61+
# create bin information for echo_range
62+
range_interval = np.arange(0, ds_Sv["echo_range"].max() + range_meter_bin, range_meter_bin)
63+
64+
# create bin information needed for ping_time
65+
ping_interval = (
66+
ds_Sv.ping_time.resample(ping_time=ping_time_bin, skipna=True).asfreq().ping_time.values
67+
)
68+
69+
# calculate the MVBS along each channel
70+
MVBS_values = get_MVBS_along_channels(ds_Sv, range_interval, ping_interval)
71+
72+
# create MVBS dataset
73+
ds_MVBS = xr.Dataset(
74+
data_vars={"Sv": (["channel", "ping_time", "echo_range"], MVBS_values)},
75+
coords={
76+
"ping_time": ping_interval,
77+
"channel": ds_Sv.channel,
78+
"echo_range": range_interval[:-1],
79+
},
80+
)
81+
82+
# TODO: look into why 'filenames' exist here as a variable
83+
# Added this check to support the test in test_process.py::test_compute_MVBS
84+
if "filenames" in ds_MVBS.variables:
85+
ds_MVBS = ds_MVBS.drop_vars("filenames")
86+
87+
# ping_time_bin parsing and conversions
88+
# Need to convert between pd.Timedelta and np.timedelta64 offsets/frequency strings
89+
# https://xarray.pydata.org/en/stable/generated/xarray.Dataset.resample.html
90+
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.resample.html
91+
# https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.html
92+
# https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html
93+
# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
94+
# https://numpy.org/devdocs/reference/arrays.datetime.html#datetime-units
95+
timedelta_units = {
96+
"d": {"nptd64": "D", "unitstr": "day"},
97+
"h": {"nptd64": "h", "unitstr": "hour"},
98+
"t": {"nptd64": "m", "unitstr": "minute"},
99+
"min": {"nptd64": "m", "unitstr": "minute"},
100+
"s": {"nptd64": "s", "unitstr": "second"},
101+
"l": {"nptd64": "ms", "unitstr": "millisecond"},
102+
"ms": {"nptd64": "ms", "unitstr": "millisecond"},
103+
"u": {"nptd64": "us", "unitstr": "microsecond"},
104+
"us": {"nptd64": "ms", "unitstr": "millisecond"},
105+
"n": {"nptd64": "ns", "unitstr": "nanosecond"},
106+
"ns": {"nptd64": "ms", "unitstr": "millisecond"},
107+
}
108+
ping_time_bin_td = pd.Timedelta(ping_time_bin)
109+
# res = resolution (most granular time unit)
110+
ping_time_bin_resunit = ping_time_bin_td.resolution_string.lower()
111+
ping_time_bin_resvalue = int(
112+
ping_time_bin_td / np.timedelta64(1, timedelta_units[ping_time_bin_resunit]["nptd64"])
113+
)
114+
ping_time_bin_resunit_label = timedelta_units[ping_time_bin_resunit]["unitstr"]
115+
116+
# Attach attributes
117+
_set_MVBS_attrs(ds_MVBS)
118+
ds_MVBS["echo_range"].attrs = {"long_name": "Range distance", "units": "m"}
119+
ds_MVBS["Sv"] = ds_MVBS["Sv"].assign_attrs(
120+
{
121+
"cell_methods": (
122+
f"ping_time: mean (interval: {ping_time_bin_resvalue} {ping_time_bin_resunit_label} " # noqa
123+
"comment: ping_time is the interval start) "
124+
f"echo_range: mean (interval: {range_meter_bin} meter "
125+
"comment: echo_range is the interval start)"
126+
),
127+
"binning_mode": "physical units",
128+
"range_meter_interval": str(range_meter_bin) + "m",
129+
"ping_time_interval": ping_time_bin,
130+
"actual_range": [
131+
round(float(ds_MVBS["Sv"].min().values), 2),
132+
round(float(ds_MVBS["Sv"].max().values), 2),
133+
],
134+
}
135+
)
136+
137+
prov_dict = echopype_prov_attrs(process_type="processing")
138+
prov_dict["processing_function"] = "commongrid.compute_MVBS"
139+
ds_MVBS = ds_MVBS.assign_attrs(prov_dict)
140+
ds_MVBS["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal
141+
142+
return ds_MVBS
143+
144+
145+
def compute_MVBS_index_binning(ds_Sv, range_sample_num=100, ping_num=100):
146+
"""
147+
Compute Mean Volume Backscattering Strength (MVBS)
148+
based on intervals of ``range_sample`` and ping number (``ping_num``) specified in index number.
149+
150+
Output of this function differs from that of ``compute_MVBS``, which computes
151+
bin-averaged Sv according to intervals of range (``echo_range``) and ``ping_time`` specified
152+
in physical units.
153+
154+
Parameters
155+
----------
156+
ds_Sv : xr.Dataset
157+
dataset containing ``Sv`` and ``echo_range`` [m]
158+
range_sample_num : int
159+
number of samples to average along the ``range_sample`` dimension, default to 100
160+
ping_num : int
161+
number of pings to average, default to 100
162+
163+
Returns
164+
-------
165+
A dataset containing bin-averaged Sv
166+
"""
167+
da_sv = 10 ** (ds_Sv["Sv"] / 10) # average should be done in linear domain
168+
da = 10 * np.log10(
169+
da_sv.coarsen(ping_time=ping_num, range_sample=range_sample_num, boundary="pad").mean(
170+
skipna=True
171+
)
172+
)
173+
174+
# Attach attributes and coarsened echo_range
175+
da.name = "Sv"
176+
ds_MVBS = da.to_dataset()
177+
ds_MVBS.coords["range_sample"] = (
178+
"range_sample",
179+
np.arange(ds_MVBS["range_sample"].size),
180+
{"long_name": "Along-range sample number, base 0"},
181+
) # reset range_sample to start from 0
182+
ds_MVBS["echo_range"] = (
183+
ds_Sv["echo_range"]
184+
.coarsen( # binned echo_range (use first value in each average bin)
185+
ping_time=ping_num, range_sample=range_sample_num, boundary="pad"
186+
)
187+
.min(skipna=True)
188+
)
189+
_set_MVBS_attrs(ds_MVBS)
190+
ds_MVBS["Sv"] = ds_MVBS["Sv"].assign_attrs(
191+
{
192+
"cell_methods": (
193+
f"ping_time: mean (interval: {ping_num} pings "
194+
"comment: ping_time is the interval start) "
195+
f"range_sample: mean (interval: {range_sample_num} samples along range "
196+
"comment: range_sample is the interval start)"
197+
),
198+
"comment": "MVBS binned on the basis of range_sample and ping number specified as index numbers", # noqa
199+
"binning_mode": "sample number",
200+
"range_sample_interval": f"{range_sample_num} samples along range",
201+
"ping_interval": f"{ping_num} pings",
202+
"actual_range": [
203+
round(float(ds_MVBS["Sv"].min().values), 2),
204+
round(float(ds_MVBS["Sv"].max().values), 2),
205+
],
206+
}
207+
)
208+
209+
prov_dict = echopype_prov_attrs(process_type="processing")
210+
prov_dict["processing_function"] = "commongrid.compute_MVBS_index_binning"
211+
ds_MVBS = ds_MVBS.assign_attrs(prov_dict)
212+
ds_MVBS["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal
213+
214+
return ds_MVBS
215+
216+
217+
def regrid():
218+
return 1
File renamed without changes.

echopype/consolidate/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def add_splitbeam_angle(
296296
source_Sv = xr.open_dataset(source_Sv, engine=file_type, chunks={}, **storage_options)
297297

298298
# raise not implemented error if source_Sv corresponds to MVBS
299-
if source_Sv.attrs["processing_function"] == "preprocess.compute_MVBS":
299+
if source_Sv.attrs["processing_function"] == "commongrid.compute_MVBS":
300300
raise NotImplementedError("Adding split-beam data to MVBS has not been implemented!")
301301

302302
# check that the appropriate waveform and encode mode have been given

echopype/preprocess/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
from .api import compute_MVBS, compute_MVBS_index_binning, remove_noise
1+
from .api import compute_MVBS, compute_MVBS_index_binning, estimate_noise, remove_noise
22

33
__all__ = [
4+
"estimate_noise",
5+
"remove_noise",
46
"compute_MVBS",
57
"compute_MVBS_index_binning",
6-
"remove_noise",
78
]

0 commit comments

Comments
 (0)