jkreklow
diff --git a/‎build/lib/radproc/arcgis.py
Lines changed: 50 additions & 24 deletions b/‎build/lib/radproc/arcgis.py
Lines changed: 50 additions & 24 deletions
diff --git a/‎build/lib/radproc/heavyrain.py
Lines changed: 88 additions & 1 deletion b/‎build/lib/radproc/heavyrain.py
Lines changed: 88 additions & 1 deletion
diff --git a/‎build/lib/radproc/raw.py
Lines changed: 10 additions & 5 deletions b/‎build/lib/radproc/raw.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎build/lib/radproc/version.py
Lines changed: 1 addition & 1 deletion b/‎build/lib/radproc/version.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dist/radproc-0.1.3-py2-none-any.whl
41.5 KB b/‎dist/radproc-0.1.3-py2-none-any.whl
41.5 KB
diff --git a/‎dist/radproc-0.1.3.tar.gz
39.5 KB b/‎dist/radproc-0.1.3.tar.gz
39.5 KB
diff --git a/‎docs/_build/.doctrees/arcgis.doctree
0 Bytes b/‎docs/_build/.doctrees/arcgis.doctree
0 Bytes
diff --git a/‎docs/_build/.doctrees/environment.pickle
2.84 KB b/‎docs/_build/.doctrees/environment.pickle
2.84 KB
diff --git a/‎docs/_build/.doctrees/generated/radproc.arcgis.clip_idraster.doctree
-5 Bytes b/‎docs/_build/.doctrees/generated/radproc.arcgis.clip_idraster.doctree
-5 Bytes
diff --git a/‎docs/_build/.doctrees/generated/radproc.arcgis.export_dfrows_to_gdb.doctree
5 Bytes b/‎docs/_build/.doctrees/generated/radproc.arcgis.export_dfrows_to_gdb.doctree
5 Bytes
@@ -700,44 +700,64 @@ def zonalstatistics(inZoneData, zoneField, inRaster, outTable, outFC):
         pass
 
 
-def join_df_columns_to_attribute_table(df, joinField, columns, fc):
+def join_df_columns_to_attribute_table(df, columns, fc, fcJoinField):
     """
-    Join DataFrame columns to attribute table of a feature class or shapefile.
+    Join DataFrame columns to attribute table of a feature flass or Shapefile.
+    The fields are added to the existing feature class, which will not be copied.
     
     :Parameters:
     ------------
 
          df : pandas DataFrame
              with data columns to be joined to attribute table.
-         joinField : String
-             Name of join field. This field name must be containined in DataFrame and attribute table.
-         columns : List of Strings
-             containing names of columns which are to be joined to Feature Class. Must NOT contain the name of joinField.
+             The join will be based on the DataFrame index by default.
+             But if the DataFrame contains a column with exactly the same name as fcJoinField,
+             the join will be based on this column instead of the index.
+         columns : List of Strings or pandas column Index object
+             containing names of columns which are to be joined to feature class.
+             The column list may also be a pandas column index generated by calling df.columns
          fc : String
-             Path and Name of Feature Class.
+             Path and Name of feature class or shapefile.
+         fcJoinField : String
+             Name of the join field in the feature class.
+             If a column with exactly the same name as fcJoinField is contained in the DataFrame,
+             the join will be based on this column instead of the index.
              
     :Returns:
     ---------
     
         None
-        
-    :Note:
-    ------
     
-    If the join is supposed to be done based on the DataFrame index, you have to
-    create a new column with the same name as the join field of the feature class.
-    So, if you have a DataFrame with gauge station numbers as index and your feature class
-    already contains a field called "Statnr" with the corresponding station numbers,
-    you need to new to create a new column with
+    """
 
-        >>> df['Statnr'] = df.index
+    # convert column index object to list if necessary
+    if type(columns) == pd.core.indexes.base.Index:
+        columns = list(df.columns)
 
-    """
+    # if fcJoinField is already contained in column list, delete it
+    if fcJoinField in columns:
+        del columns[columns.index(fcJoinField)]
 
-    columns.insert(0, joinField)
+    #  set name of join field to first position in column list
+    columns.insert(0, fcJoinField)
+    
+    # if the DataFrame does not contain a column names equal to fcJoinField, this column is generated from the index
+    # else, the existing column will be used
+    if not fcJoinField in df.columns:
+        df[fcJoinField] = df.index
+    
+    # select columns from column list
     df = df[columns]
+    
+    # check if column names contain integers and replace them by Strings to avoid errors in AddField 
+    df.columns = ["F%s" % col if type(col) in [np.int64, np.int32, int] else col for col in df.columns]
+    columns = ["F%s" % col if type(col) in [np.int64, np.int32, int] else col for col in columns]
+    
     n = len(columns)
+    # [1:] to exclude column fcJoinField which was inserted ar first position
     for column in columns[1:]:
+        # check data type of each column an add field of same type in fc
+        # 
         column_dtype = str(type(df[column].values[1]))
         if "float" in column_dtype:
             fieldType = "DOUBLE"
@@ -756,12 +776,18 @@ def join_df_columns_to_attribute_table(df, joinField, columns, fc):
 
         with arcpy.da.UpdateCursor(fc, columns) as cursor:
             for fcrow in cursor:
-                if fcrow[0] == dfrow[joinField]:
+                # check index/join field values and insert corresponding row values
+                if fcrow[0] == dfrow[fcJoinField]:
                     for i in range(1,n):
-                        if np.isnan(dfrow[columns[i]]) == False:
+                        # try to check if value is not nan --> insert, else set to 0 to avoid nan in attribute table
+                        # will only work for numeric data. strings will be inserted "as is" in case of exception
+                        try:                            
+                            if np.isnan(dfrow[columns[i]]) == False:
+                                fcrow[i] = dfrow[columns[i]]
+                            else:
+                                fcrow[i] = 0
+                        except:                            
                             fcrow[i] = dfrow[columns[i]]
-                        else:
-                            fcrow[i] = 0
-                    
                     cursor.updateRow(fcrow)
-                    break
+                    break
+                
@@ -18,13 +18,15 @@
 
     - identify and select all intervals in which a specified precipitation threshold is exceeded
     - count the number of threshold exceedances
+    - calculate duration sums
 
 .. autosummary::
    :nosignatures:
    :toctree: generated/
 
    find_heavy_rainfalls
    count_heavy_rainfall_intervals
+   duration_sum
    
    
 .. module:: radproc.heavyrain
@@ -39,6 +41,7 @@
 import numpy as np
 import radproc.core as _core
 import gc
+import warnings, tables
 
 
 def _exceeding(rowseries, thresholdValue, minArea):
@@ -197,4 +200,88 @@ def count_heavy_rainfall_intervals(HDFFile, year_start, year_end, thresholdValue
         interval_count = hr_intervals.resample(freq, closed = 'right', label = 'right').sum().dropna()
     elif pd_version < 19:
         interval_count = hr_intervals.resample(freq, how = "sum", closed = 'right', label = 'right').dropna()
-    return interval_count
+    return interval_count
+
+
+def duration_sum(inHDFFile, D, year_start, year_end, outHDFFile, complevel=9):
+    """
+    Calculate duration sum (Dauerstufe) of a defined time window D.
+    The output time series will have the same frequency as the input data,
+    but will contain the rolling sum of the defined time window with the label on the right,
+    e.g. for D = 15 the time step at 10:15 contains the precipitation sum from 10:00 until 10:15 o'clock.
+    Calculation can only be carried out for entire years since time windows between consecutive months are considered and included in calculations.
+    Output data will be saved in a new HDF5 file with the same  monthly structure as the input data.
+    Consequently, the duration sum data can be loaded and processed with the same functions as the other precipitation data stored in HDF5.
+
+    :Parameters:
+    ------------
+    
+    inHDFFile : string
+        Path and name of the input HDF5 file containing precipitation data with a temporal resolution of 5 minutes.
+    D : integer
+        Duration (length of time window) in minutes. Value must be divisible by 5.
+    year_start : integer
+        First year for which duration sums are to be calculated.    
+    year_end : integer
+        Last year for which duration sums are to be calculated.
+    outHDFFile : string
+        Path and name of the output HDF5 file.
+        If the specified HDF5 file already exists, the new dataset will be appended; if the HDF5 file doesn't exist, it will be created. 
+    complevel : integer (optional, default: 9)
+        defines the level of compression for the output HDF5 file.
+        complevel may range from 0 to 9, where 9 is the highest compression possible.
+        Using a high compression level reduces data size significantly,
+        but writing data to HDF5 takes more time and data import from HDF5 is slighly slower.
+        
+    :Returns:
+    ---------
+    
+        No return value
+        
+    """
+    
+    warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
+
+
+    months = [m for m in range(1,13)]
+    years = [y for y in range(year_start, year_end+1)]
+    freqYW = 5
+    duration = '%smin' % D
+    
+    for year in years:
+        for month in months:
+                #---only for first month in first year: initiate process----------------
+                if year == years[0] and month == months[0]:                    
+                    # open outHDF, only month for which the previous month shall not be considered
+                    # calculate number of intervals at end of month, which need to be passed to following month
+                    # this only works for durations that can be divided by 5!
+                    nIntervalsAtEndOfMonth = D/freqYW - 1
+                    df = _core.load_month(HDFFile=inHDFFile, month=month, year=year)
+                    # to be able to perform calculations on other than 5 min data in future: freq = df.index.freq
+                    # set up rolling window of size=duration and calculate the sum of every window
+                    # shift index 5 min forwards (to label = right). needed because index label is at beginning of 5 min interval in YW
+                    # consequently, without shifting, the label describes the end of the duration interval - 5 minutes
+                    durDF = df.rolling(duration).sum().shift(periods=1, freq = '5min')
+                    HDFDataset = "%s/%s" %(year, month)
+                    durDF.to_hdf(path_or_buf=outHDFFile, key=HDFDataset, mode="a", format="fixed", data_columns = True, index = True, complevel=complevel, complib="zlib")
+                    del durDF
+                    gc.collect()
+                    print("%s-%s done!" %(year, month))
+                    # continue in next loop iteration (next month) and skip remaining statements
+                    continue
+                #-----------------------------------------------------------------------    
+                # Only keep end of month (e.g. last two intervals for D=15 min) and append next month to it
+                df = df.iloc[-nIntervalsAtEndOfMonth: , ]
+                df = df.append(_core.load_month(HDFFile=inHDFFile, month=month, year=year)).asfreq('5min')
+                # rolling window of specified duration. sum is calculated for each window with label on the right (+5 minutes / shift(1), see above)
+                # remove first intervals (number equal to the intervals taken from previous month) with incorrect results due to missing data (intervals contained in previous month)
+                durDF = df.rolling(duration).sum().shift(periods=1, freq = '5min').iloc[nIntervalsAtEndOfMonth: , ]
+                HDFDataset = "%s/%s" %(year, month)
+                durDF.to_hdf(path_or_buf=outHDFFile, key=HDFDataset, mode="a", format="fixed", data_columns = True, index = True, complevel=complevel, complib="zlib")
+                del durDF
+                gc.collect()
+                print("%s-%s done!" %(year, month))
+                
+                
+                
+                
@@ -289,11 +289,16 @@ def radolan_binaries_to_dataframe(inFolder, idArr=None):
     # check for RADOLAN product type and set frequency of DataFrame index
     # lists can be extended for other products...    
     if metadata['producttype'] in ["RW"]:
-        #df = df.asfreq('H')
-        df.index.freq = pd.tseries.offsets.Hour()
+        try:
+            # try to prevent dataframe copying by .asfreq(). this does not seem to work in all pandas versions --> try - except
+            df.index.freq = pd.tseries.offsets.Hour()            
+        except:
+            df = df.asfreq('H')
     elif metadata['producttype'] in ["RY", "RZ", "YW"]:
-        #df = df.asfreq('5min')
-        df.index.freq = 5 * pd.tseries.offsets.Minute()
+        try:            
+            df.index.freq = 5 * pd.tseries.offsets.Minute()
+        except:
+            df = df.asfreq('5min')
 
     return df, metadata
 
@@ -320,7 +325,7 @@ def radolan_binaries_to_hdf5(inFolder, HDFFile, idArr=None, complevel=9):
             containing ID values to select RADOLAN data of the cells located in the investigation area.
             If no idArr is specified, the ID array is automatically generated from RADOLAN metadata
             and RADOLAN precipitation data are not clipped to any investigation area.
-        complevel : interger (optional, default: 9)
+        complevel : integer (optional, default: 9)
             defines the level of compression for the output HDF5 file.
             complevel may range from 0 to 9, where 9 is the highest compression possible.
             Using a high compression level reduces data size significantly,
 
@@ -1,2 +1,2 @@
 # THIS FILE IS GENERATED FROM RADPROC SETUP.PY
-version = '0.1.2'
+version = '0.1.3'
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# THIS FILE IS GENERATED FROM RADPROC SETUP.PY`
`2`		`-version = '0.1.2'`
	`2`	`+version = '0.1.3'`