Skip to content

Commit 71b26ed

Browse files
committed
Edits and new docs for version 0.1.3
New function duration_sum included in heavyrain module Bugfix in radproc.raw.radolan_binaries_to_dataframe Revision of radproc.arcgis.join_df_columns_to_attribute_table version number increased to 0.1.3 edited release notes built new docs
1 parent 944b7d8 commit 71b26ed

File tree

85 files changed

+471
-198
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+471
-198
lines changed

build/lib/radproc/arcgis.py

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -700,44 +700,64 @@ def zonalstatistics(inZoneData, zoneField, inRaster, outTable, outFC):
700700
pass
701701

702702

703-
def join_df_columns_to_attribute_table(df, joinField, columns, fc):
703+
def join_df_columns_to_attribute_table(df, columns, fc, fcJoinField):
704704
"""
705-
Join DataFrame columns to attribute table of a feature class or shapefile.
705+
Join DataFrame columns to attribute table of a feature flass or Shapefile.
706+
The fields are added to the existing feature class, which will not be copied.
706707
707708
:Parameters:
708709
------------
709710
710711
df : pandas DataFrame
711712
with data columns to be joined to attribute table.
712-
joinField : String
713-
Name of join field. This field name must be containined in DataFrame and attribute table.
714-
columns : List of Strings
715-
containing names of columns which are to be joined to Feature Class. Must NOT contain the name of joinField.
713+
The join will be based on the DataFrame index by default.
714+
But if the DataFrame contains a column with exactly the same name as fcJoinField,
715+
the join will be based on this column instead of the index.
716+
columns : List of Strings or pandas column Index object
717+
containing names of columns which are to be joined to feature class.
718+
The column list may also be a pandas column index generated by calling df.columns
716719
fc : String
717-
Path and Name of Feature Class.
720+
Path and Name of feature class or shapefile.
721+
fcJoinField : String
722+
Name of the join field in the feature class.
723+
If a column with exactly the same name as fcJoinField is contained in the DataFrame,
724+
the join will be based on this column instead of the index.
718725
719726
:Returns:
720727
---------
721728
722729
None
723-
724-
:Note:
725-
------
726730
727-
If the join is supposed to be done based on the DataFrame index, you have to
728-
create a new column with the same name as the join field of the feature class.
729-
So, if you have a DataFrame with gauge station numbers as index and your feature class
730-
already contains a field called "Statnr" with the corresponding station numbers,
731-
you need to new to create a new column with
731+
"""
732732

733-
>>> df['Statnr'] = df.index
733+
# convert column index object to list if necessary
734+
if type(columns) == pd.core.indexes.base.Index:
735+
columns = list(df.columns)
734736

735-
"""
737+
# if fcJoinField is already contained in column list, delete it
738+
if fcJoinField in columns:
739+
del columns[columns.index(fcJoinField)]
736740

737-
columns.insert(0, joinField)
741+
# set name of join field to first position in column list
742+
columns.insert(0, fcJoinField)
743+
744+
# if the DataFrame does not contain a column names equal to fcJoinField, this column is generated from the index
745+
# else, the existing column will be used
746+
if not fcJoinField in df.columns:
747+
df[fcJoinField] = df.index
748+
749+
# select columns from column list
738750
df = df[columns]
751+
752+
# check if column names contain integers and replace them by Strings to avoid errors in AddField
753+
df.columns = ["F%s" % col if type(col) in [np.int64, np.int32, int] else col for col in df.columns]
754+
columns = ["F%s" % col if type(col) in [np.int64, np.int32, int] else col for col in columns]
755+
739756
n = len(columns)
757+
# [1:] to exclude column fcJoinField which was inserted ar first position
740758
for column in columns[1:]:
759+
# check data type of each column an add field of same type in fc
760+
#
741761
column_dtype = str(type(df[column].values[1]))
742762
if "float" in column_dtype:
743763
fieldType = "DOUBLE"
@@ -756,12 +776,18 @@ def join_df_columns_to_attribute_table(df, joinField, columns, fc):
756776

757777
with arcpy.da.UpdateCursor(fc, columns) as cursor:
758778
for fcrow in cursor:
759-
if fcrow[0] == dfrow[joinField]:
779+
# check index/join field values and insert corresponding row values
780+
if fcrow[0] == dfrow[fcJoinField]:
760781
for i in range(1,n):
761-
if np.isnan(dfrow[columns[i]]) == False:
782+
# try to check if value is not nan --> insert, else set to 0 to avoid nan in attribute table
783+
# will only work for numeric data. strings will be inserted "as is" in case of exception
784+
try:
785+
if np.isnan(dfrow[columns[i]]) == False:
786+
fcrow[i] = dfrow[columns[i]]
787+
else:
788+
fcrow[i] = 0
789+
except:
762790
fcrow[i] = dfrow[columns[i]]
763-
else:
764-
fcrow[i] = 0
765-
766791
cursor.updateRow(fcrow)
767-
break
792+
break
793+

build/lib/radproc/heavyrain.py

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
1919
- identify and select all intervals in which a specified precipitation threshold is exceeded
2020
- count the number of threshold exceedances
21+
- calculate duration sums
2122
2223
.. autosummary::
2324
:nosignatures:
2425
:toctree: generated/
2526
2627
find_heavy_rainfalls
2728
count_heavy_rainfall_intervals
29+
duration_sum
2830
2931
3032
.. module:: radproc.heavyrain
@@ -39,6 +41,7 @@
3941
import numpy as np
4042
import radproc.core as _core
4143
import gc
44+
import warnings, tables
4245

4346

4447
def _exceeding(rowseries, thresholdValue, minArea):
@@ -197,4 +200,88 @@ def count_heavy_rainfall_intervals(HDFFile, year_start, year_end, thresholdValue
197200
interval_count = hr_intervals.resample(freq, closed = 'right', label = 'right').sum().dropna()
198201
elif pd_version < 19:
199202
interval_count = hr_intervals.resample(freq, how = "sum", closed = 'right', label = 'right').dropna()
200-
return interval_count
203+
return interval_count
204+
205+
206+
def duration_sum(inHDFFile, D, year_start, year_end, outHDFFile, complevel=9):
207+
"""
208+
Calculate duration sum (Dauerstufe) of a defined time window D.
209+
The output time series will have the same frequency as the input data,
210+
but will contain the rolling sum of the defined time window with the label on the right,
211+
e.g. for D = 15 the time step at 10:15 contains the precipitation sum from 10:00 until 10:15 o'clock.
212+
Calculation can only be carried out for entire years since time windows between consecutive months are considered and included in calculations.
213+
Output data will be saved in a new HDF5 file with the same monthly structure as the input data.
214+
Consequently, the duration sum data can be loaded and processed with the same functions as the other precipitation data stored in HDF5.
215+
216+
:Parameters:
217+
------------
218+
219+
inHDFFile : string
220+
Path and name of the input HDF5 file containing precipitation data with a temporal resolution of 5 minutes.
221+
D : integer
222+
Duration (length of time window) in minutes. Value must be divisible by 5.
223+
year_start : integer
224+
First year for which duration sums are to be calculated.
225+
year_end : integer
226+
Last year for which duration sums are to be calculated.
227+
outHDFFile : string
228+
Path and name of the output HDF5 file.
229+
If the specified HDF5 file already exists, the new dataset will be appended; if the HDF5 file doesn't exist, it will be created.
230+
complevel : integer (optional, default: 9)
231+
defines the level of compression for the output HDF5 file.
232+
complevel may range from 0 to 9, where 9 is the highest compression possible.
233+
Using a high compression level reduces data size significantly,
234+
but writing data to HDF5 takes more time and data import from HDF5 is slighly slower.
235+
236+
:Returns:
237+
---------
238+
239+
No return value
240+
241+
"""
242+
243+
warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
244+
245+
246+
months = [m for m in range(1,13)]
247+
years = [y for y in range(year_start, year_end+1)]
248+
freqYW = 5
249+
duration = '%smin' % D
250+
251+
for year in years:
252+
for month in months:
253+
#---only for first month in first year: initiate process----------------
254+
if year == years[0] and month == months[0]:
255+
# open outHDF, only month for which the previous month shall not be considered
256+
# calculate number of intervals at end of month, which need to be passed to following month
257+
# this only works for durations that can be divided by 5!
258+
nIntervalsAtEndOfMonth = D/freqYW - 1
259+
df = _core.load_month(HDFFile=inHDFFile, month=month, year=year)
260+
# to be able to perform calculations on other than 5 min data in future: freq = df.index.freq
261+
# set up rolling window of size=duration and calculate the sum of every window
262+
# shift index 5 min forwards (to label = right). needed because index label is at beginning of 5 min interval in YW
263+
# consequently, without shifting, the label describes the end of the duration interval - 5 minutes
264+
durDF = df.rolling(duration).sum().shift(periods=1, freq = '5min')
265+
HDFDataset = "%s/%s" %(year, month)
266+
durDF.to_hdf(path_or_buf=outHDFFile, key=HDFDataset, mode="a", format="fixed", data_columns = True, index = True, complevel=complevel, complib="zlib")
267+
del durDF
268+
gc.collect()
269+
print("%s-%s done!" %(year, month))
270+
# continue in next loop iteration (next month) and skip remaining statements
271+
continue
272+
#-----------------------------------------------------------------------
273+
# Only keep end of month (e.g. last two intervals for D=15 min) and append next month to it
274+
df = df.iloc[-nIntervalsAtEndOfMonth: , ]
275+
df = df.append(_core.load_month(HDFFile=inHDFFile, month=month, year=year)).asfreq('5min')
276+
# rolling window of specified duration. sum is calculated for each window with label on the right (+5 minutes / shift(1), see above)
277+
# remove first intervals (number equal to the intervals taken from previous month) with incorrect results due to missing data (intervals contained in previous month)
278+
durDF = df.rolling(duration).sum().shift(periods=1, freq = '5min').iloc[nIntervalsAtEndOfMonth: , ]
279+
HDFDataset = "%s/%s" %(year, month)
280+
durDF.to_hdf(path_or_buf=outHDFFile, key=HDFDataset, mode="a", format="fixed", data_columns = True, index = True, complevel=complevel, complib="zlib")
281+
del durDF
282+
gc.collect()
283+
print("%s-%s done!" %(year, month))
284+
285+
286+
287+

build/lib/radproc/raw.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,11 +289,16 @@ def radolan_binaries_to_dataframe(inFolder, idArr=None):
289289
# check for RADOLAN product type and set frequency of DataFrame index
290290
# lists can be extended for other products...
291291
if metadata['producttype'] in ["RW"]:
292-
#df = df.asfreq('H')
293-
df.index.freq = pd.tseries.offsets.Hour()
292+
try:
293+
# try to prevent dataframe copying by .asfreq(). this does not seem to work in all pandas versions --> try - except
294+
df.index.freq = pd.tseries.offsets.Hour()
295+
except:
296+
df = df.asfreq('H')
294297
elif metadata['producttype'] in ["RY", "RZ", "YW"]:
295-
#df = df.asfreq('5min')
296-
df.index.freq = 5 * pd.tseries.offsets.Minute()
298+
try:
299+
df.index.freq = 5 * pd.tseries.offsets.Minute()
300+
except:
301+
df = df.asfreq('5min')
297302

298303
return df, metadata
299304

@@ -320,7 +325,7 @@ def radolan_binaries_to_hdf5(inFolder, HDFFile, idArr=None, complevel=9):
320325
containing ID values to select RADOLAN data of the cells located in the investigation area.
321326
If no idArr is specified, the ID array is automatically generated from RADOLAN metadata
322327
and RADOLAN precipitation data are not clipped to any investigation area.
323-
complevel : interger (optional, default: 9)
328+
complevel : integer (optional, default: 9)
324329
defines the level of compression for the output HDF5 file.
325330
complevel may range from 0 to 9, where 9 is the highest compression possible.
326331
Using a high compression level reduces data size significantly,

build/lib/radproc/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# THIS FILE IS GENERATED FROM RADPROC SETUP.PY
2-
version = '0.1.2'
2+
version = '0.1.3'

dist/radproc-0.1.3-py2-none-any.whl

41.5 KB
Binary file not shown.

dist/radproc-0.1.3.tar.gz

39.5 KB
Binary file not shown.

docs/_build/.doctrees/arcgis.doctree

0 Bytes
Binary file not shown.
2.84 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)