Correct calculation of metrics with masking (keras-team#2260)

Bojan Karlas · Bojan Karlas · commit 5b84c45e6266 · 2017-02-05T19:22:59.000+01:00
* Reshape y_pred and y_true from (samples, timesteps, ... ) to (samples * timesteps, ... )

* Filter out masked timesteps from y_pred and y_true

* Added K.where() and extended functionality of K.flatten()

* Added/changed corresponding tests
diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
@@ -1326,6 +1326,26 @@ def lesser_equal(x, y):
     return tf.less_equal(x, y)
 
 
+def where(x):
+    """Returns locations of true values in a boolean tensor.
+
+    This operation returns the coordinates of true elements in input. The coordinates are
+    returned in a 2-D tensor where the first dimension (rows) represents the number of
+    true elements, and the second dimension (columns) represents the coordinates of the
+    true elements. Keep in mind, the shape of the output tensor can vary depending on
+    how many true values there are in input.
+
+    # Arguments
+        x: input bool tensor.
+
+    # Returns
+        An integer tensor of indices.
+
+    """
+    x = tf.cast(x, tf.bool)
+    return tf.where(x)
+
+
 def maximum(x, y):
     """Element-wise maximum of two tensors.
 
@@ -1587,13 +1607,27 @@ def tile(x, n):
     return tf.tile(x, n)
 
 
-def flatten(x):
-    """Flatten a tensor.
+def flatten(x, outdim=1):
+    """Returns a view of this tensor with `outdim` dimensions, whose shape
+    for the first `outdim-1` dimensions will be the same as `x`, and
+    shape in the remaining dimension will be expanded to fit in
+    all the data from `x`.
+
+    # Arguments
+        x: input tensor.
+        outdim: number of dimensions in the output tensor.
 
     # Returns
-        A tensor, reshaped into 1-D
+        A tensor, reshaped outdim dimensions.
+
     """
-    return tf.reshape(x, [-1])
+
+    if outdim > 1:
+        shape = concatenate([tf.shape(x)[:outdim - 1], variable([-1], dtype='int32')])
+    else:
+        shape = [-1]
+
+    return tf.reshape(x, shape)
 
 
 def batch_flatten(x):
@@ -2023,7 +2057,10 @@ def rnn(step_function, inputs, initial_states,
 
     # TODO: remove later.
     if hasattr(tf, 'select'):
-        tf.where = tf.select
+        where_op = tf.select
+    else:
+        where_op = tf.where
+
     if hasattr(tf, 'stack'):
         stack = tf.stack
         unstack = tf.unstack
@@ -2069,14 +2106,14 @@ def rnn(step_function, inputs, initial_states,
                 else:
                     prev_output = successive_outputs[-1]
 
-                output = tf.where(tiled_mask_t, output, prev_output)
+                output = where_op(tiled_mask_t, output, prev_output)
 
                 return_states = []
                 for state, new_state in zip(states, new_states):
                     # (see earlier comment for tile explanation)
                     tiled_mask_t = tf.tile(mask_t,
                                            stack([1, tf.shape(new_state)[1]]))
-                    return_states.append(tf.where(tiled_mask_t,
+                    return_states.append(where_op(tiled_mask_t,
                                                   new_state,
                                                   state))
                 states = return_states
@@ -2145,8 +2182,8 @@ def _step(time, output_ta_t, *states):
                     new_state.set_shape(state.get_shape())
                 tiled_mask_t = tf.tile(mask_t,
                                        stack([1, tf.shape(output)[1]]))
-                output = tf.where(tiled_mask_t, output, states[0])
-                new_states = [tf.where(tiled_mask_t, new_states[i], states[i]) for i in range(len(states))]
+                output = where_op(tiled_mask_t, output, states[0])
+                new_states = [where_op(tiled_mask_t, new_states[i], states[i]) for i in range(len(states))]
                 output_ta_t = output_ta_t.write(time, output)
                 return (time + 1, output_ta_t) + tuple(new_states)
         else:
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
@@ -432,6 +432,25 @@ def lesser_equal(x, y):
     return T.le(x, y)
 
 
+def where(x):
+    """Returns locations of true values in a boolean tensor.
+
+    This operation returns the coordinates of true elements in input. The coordinates are
+    returned in a 2-D tensor where the first dimension (rows) represents the number of
+    true elements, and the second dimension (columns) represents the coordinates of the
+    true elements. Keep in mind, the shape of the output tensor can vary depending on
+    how many true values there are in input.
+
+    # Arguments
+        x: input bool tensor.
+
+    # Returns
+        An integer tensor of indices.
+
+    """
+    return transpose(x.nonzero(return_matrix=True))
+
+
 def maximum(x, y):
     return T.maximum(x, y)
 
@@ -687,9 +706,23 @@ def tile(x, n):
     return T.tile(x, n)
 
 
-def flatten(x):
+def flatten(x, outdim=1):
+    """Returns a view of this tensor with `outdim` dimensions, whose shape
+    for the first `outdim-1` dimensions will be the same as `x`, and
+    shape in the remaining dimension will be expanded to fit in
+    all the data from `x`.
+
+    # Arguments
+        x: input tensor.
+        outdim: number of dimensions in the output tensor.
+
+    # Returns
+        A tensor, reshaped outdim dimensions.
+
+    """
+
     # TODO: `keras_shape` inference.
-    return T.flatten(x)
+    return T.flatten(x, outdim)
 
 
 def batch_flatten(x):
diff --git a/keras/engine/training.py b/keras/engine/training.py
@@ -486,6 +486,39 @@ def stop(self, timeout=None):
         self.queue = None
 
 
+def masked_tensor(x, mask):
+    """ Applies a mask to an input tensor.
+
+    # Arguments
+        x: a tensor of shape `(samples, timesteps, ... )`
+        mask: a mask boolean tensor of shape `(samples, timesteps)` where each
+            value represents weather the given timestep in a given sample
+            should be masked out or not.
+
+    # Returns
+        A tensor of shape `(samples * timesteps, ... )` with all
+        timesteps from all samples that had value 1 in the mask tensor.
+
+    """
+
+    # Flatten first two dimensions of input tensor. We do it by shifting the first two
+    # dimensions to the end, flattening them and shifting them back to the front.
+    ndim = K.ndim(x)
+    shift_end_pattern = tuple(list(range(2, ndim)) + [0, 1])
+    shift_front_pattern = tuple([ndim - 2] + list(range(0, ndim - 2)))
+    x = K.permute_dimensions(x, shift_end_pattern)
+    x = K.flatten(x, ndim - 1)
+    x = K.permute_dimensions(x, shift_front_pattern)
+
+    # Also flatten the 2D mask tensor.
+    mask = K.flatten(mask)
+
+    # Extract indices of flattened mask tensor to keep.
+    indices = K.flatten(K.where(mask))
+
+    return K.gather(x, indices)
+
+
 class Model(Container):
 
     def compile(self, optimizer, loss, metrics=None, loss_weights=None,
@@ -694,6 +727,11 @@ def append_metric(layer_num, metric_name, metric_tensor):
             y_true = self.targets[i]
             y_pred = self.outputs[i]
             output_metrics = nested_metrics[i]
+            mask = masks[i]
+
+            if mask is not None:
+                y_true = masked_tensor(y_true, mask)
+                y_pred = masked_tensor(y_pred, mask)
 
             for metric in output_metrics:
                 if metric == 'accuracy' or metric == 'acc':
diff --git a/tests/keras/backend/test_backends.py b/tests/keras/backend/test_backends.py
@@ -105,6 +105,7 @@ def test_shape_operations(self):
                                       pattern=(2, 0, 1))
         check_single_tensor_operation('repeat', (4, 1), n=3)
         check_single_tensor_operation('flatten', (4, 1))
+        check_single_tensor_operation('flatten', (4, 4, 4), outdim=2)
         check_single_tensor_operation('expand_dims', (4, 3), dim=-1)
         check_single_tensor_operation('expand_dims', (4, 3, 2), dim=1)
         check_single_tensor_operation('squeeze', (4, 3, 1), axis=2)
@@ -839,6 +840,13 @@ def test_one_hot(self):
             koh = K.eval(K.one_hot(K.variable(indices, dtype='int32'), nb_classes))
             assert np.all(koh == oh)
 
+    def test_where(self):
+        x = np.random.randint(0, 2, size=(10, 10))
+        exp_out = np.stack(np.nonzero(x), axis=1)
+        for K in [KTH, KTF]:
+            k_out = K.eval(K.where(K.variable(x, dtype='int32')))
+            assert np.all(k_out == exp_out)
+
     def test_sparse_dot(self):
         x_d = np.array([0, 7, 2, 3], dtype=np.float32)
         x_r = np.array([0, 2, 2, 3], dtype=np.int64)
diff --git a/tests/test_loss_masking.py b/tests/test_loss_masking.py
@@ -2,8 +2,9 @@
 import pytest
 
 from keras.models import Sequential
-from keras.engine.training import weighted_objective
-from keras.layers.core import TimeDistributedDense, Masking
+from keras.engine.training import weighted_objective, masked_tensor
+from keras.layers.core import Dense, Masking
+from keras.layers.wrappers import TimeDistributed
 from keras.utils.test_utils import keras_test
 from keras import objectives
 from keras import backend as K
@@ -16,12 +17,14 @@ def test_masking():
                   [[0], [0]]])
     model = Sequential()
     model.add(Masking(mask_value=0, input_shape=(2, 1)))
-    model.add(TimeDistributedDense(1, init='one'))
-    model.compile(loss='mse', optimizer='sgd')
+    model.add(TimeDistributed(Dense(1, init='one')))
+    model.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])
     y = np.array([[[1], [1]],
                   [[1], [1]]])
-    loss = model.train_on_batch(X, y)
+    (loss, acc) = model.train_on_batch(X, y)
+
     assert loss == 0
+    assert acc == 1
 
 
 @keras_test
@@ -42,5 +45,17 @@ def test_loss_masking():
                                K.variable(mask)))
 
 
+@keras_test
+def test_masked_tensor():
+    x = np.random.randint(0, 10, size=(5, 10, 5))
+    mask = np.random.randint(0, 2, size=(5, 10))
+    i = np.nonzero(mask)
+    exp_out = x[i[0], i[1], :]
+
+    k_out = K.eval(masked_tensor(K.variable(x, dtype='int32'), K.variable(mask, dtype='int32')))
+
+    assert np.all(k_out == exp_out)
+
+
 if __name__ == '__main__':
     pytest.main([__file__])