18
18
19
19
- identify and select all intervals in which a specified precipitation threshold is exceeded
20
20
- count the number of threshold exceedances
21
+ - calculate duration sums
21
22
22
23
.. autosummary::
23
24
:nosignatures:
24
25
:toctree: generated/
25
26
26
27
find_heavy_rainfalls
27
28
count_heavy_rainfall_intervals
29
+ duration_sum
28
30
29
31
30
32
.. module:: radproc.heavyrain
39
41
import numpy as np
40
42
import radproc .core as _core
41
43
import gc
44
+ import warnings , tables
42
45
43
46
44
47
def _exceeding (rowseries , thresholdValue , minArea ):
@@ -197,4 +200,88 @@ def count_heavy_rainfall_intervals(HDFFile, year_start, year_end, thresholdValue
197
200
interval_count = hr_intervals .resample (freq , closed = 'right' , label = 'right' ).sum ().dropna ()
198
201
elif pd_version < 19 :
199
202
interval_count = hr_intervals .resample (freq , how = "sum" , closed = 'right' , label = 'right' ).dropna ()
200
- return interval_count
203
+ return interval_count
204
+
205
+
206
+ def duration_sum (inHDFFile , D , year_start , year_end , outHDFFile , complevel = 9 ):
207
+ """
208
+ Calculate duration sum (Dauerstufe) of a defined time window D.
209
+ The output time series will have the same frequency as the input data,
210
+ but will contain the rolling sum of the defined time window with the label on the right,
211
+ e.g. for D = 15 the time step at 10:15 contains the precipitation sum from 10:00 until 10:15 o'clock.
212
+ Calculation can only be carried out for entire years since time windows between consecutive months are considered and included in calculations.
213
+ Output data will be saved in a new HDF5 file with the same monthly structure as the input data.
214
+ Consequently, the duration sum data can be loaded and processed with the same functions as the other precipitation data stored in HDF5.
215
+
216
+ :Parameters:
217
+ ------------
218
+
219
+ inHDFFile : string
220
+ Path and name of the input HDF5 file containing precipitation data with a temporal resolution of 5 minutes.
221
+ D : integer
222
+ Duration (length of time window) in minutes. Value must be divisible by 5.
223
+ year_start : integer
224
+ First year for which duration sums are to be calculated.
225
+ year_end : integer
226
+ Last year for which duration sums are to be calculated.
227
+ outHDFFile : string
228
+ Path and name of the output HDF5 file.
229
+ If the specified HDF5 file already exists, the new dataset will be appended; if the HDF5 file doesn't exist, it will be created.
230
+ complevel : integer (optional, default: 9)
231
+ defines the level of compression for the output HDF5 file.
232
+ complevel may range from 0 to 9, where 9 is the highest compression possible.
233
+ Using a high compression level reduces data size significantly,
234
+ but writing data to HDF5 takes more time and data import from HDF5 is slighly slower.
235
+
236
+ :Returns:
237
+ ---------
238
+
239
+ No return value
240
+
241
+ """
242
+
243
+ warnings .filterwarnings ('ignore' , category = tables .NaturalNameWarning )
244
+
245
+
246
+ months = [m for m in range (1 ,13 )]
247
+ years = [y for y in range (year_start , year_end + 1 )]
248
+ freqYW = 5
249
+ duration = '%smin' % D
250
+
251
+ for year in years :
252
+ for month in months :
253
+ #---only for first month in first year: initiate process----------------
254
+ if year == years [0 ] and month == months [0 ]:
255
+ # open outHDF, only month for which the previous month shall not be considered
256
+ # calculate number of intervals at end of month, which need to be passed to following month
257
+ # this only works for durations that can be divided by 5!
258
+ nIntervalsAtEndOfMonth = D / freqYW - 1
259
+ df = _core .load_month (HDFFile = inHDFFile , month = month , year = year )
260
+ # to be able to perform calculations on other than 5 min data in future: freq = df.index.freq
261
+ # set up rolling window of size=duration and calculate the sum of every window
262
+ # shift index 5 min forwards (to label = right). needed because index label is at beginning of 5 min interval in YW
263
+ # consequently, without shifting, the label describes the end of the duration interval - 5 minutes
264
+ durDF = df .rolling (duration ).sum ().shift (periods = 1 , freq = '5min' )
265
+ HDFDataset = "%s/%s" % (year , month )
266
+ durDF .to_hdf (path_or_buf = outHDFFile , key = HDFDataset , mode = "a" , format = "fixed" , data_columns = True , index = True , complevel = complevel , complib = "zlib" )
267
+ del durDF
268
+ gc .collect ()
269
+ print ("%s-%s done!" % (year , month ))
270
+ # continue in next loop iteration (next month) and skip remaining statements
271
+ continue
272
+ #-----------------------------------------------------------------------
273
+ # Only keep end of month (e.g. last two intervals for D=15 min) and append next month to it
274
+ df = df .iloc [- nIntervalsAtEndOfMonth : , ]
275
+ df = df .append (_core .load_month (HDFFile = inHDFFile , month = month , year = year )).asfreq ('5min' )
276
+ # rolling window of specified duration. sum is calculated for each window with label on the right (+5 minutes / shift(1), see above)
277
+ # remove first intervals (number equal to the intervals taken from previous month) with incorrect results due to missing data (intervals contained in previous month)
278
+ durDF = df .rolling (duration ).sum ().shift (periods = 1 , freq = '5min' ).iloc [nIntervalsAtEndOfMonth : , ]
279
+ HDFDataset = "%s/%s" % (year , month )
280
+ durDF .to_hdf (path_or_buf = outHDFFile , key = HDFDataset , mode = "a" , format = "fixed" , data_columns = True , index = True , complevel = complevel , complib = "zlib" )
281
+ del durDF
282
+ gc .collect ()
283
+ print ("%s-%s done!" % (year , month ))
284
+
285
+
286
+
287
+
0 commit comments