Skip to content

feat: Change model prediction output from tuple to dict #85

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
File renamed without changes.
515 changes: 208 additions & 307 deletions experiments/Train.ipynb

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions lib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.data.experimental import dense_to_ragged_batch
Expand Down Expand Up @@ -197,3 +198,18 @@ def _has_labels(x, y):
return tf.math.reduce_max(y, 0) > 0

return ds.filter(_has_labels)


def as_dataframe(ds: tf.data.Dataset) -> pd.DataFrame:
"""
Return the dataset as a pandas dataframe.

Same as `tfds.as_dataframe`, but with properly decoded string tensors.
"""
def _maybe_decode(x):
try:
return x.decode()
except (UnicodeDecodeError, AttributeError):
return x

return tfds.as_dataframe(ds).applymap(_maybe_decode)
53 changes: 0 additions & 53 deletions lib/eval.py

This file was deleted.

114 changes: 67 additions & 47 deletions lib/io.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,73 @@
import json
import pathlib
import shutil
from typing import Dict
import tempfile
from typing import List

import tensorflow as tf

from lib import settings
from lib.model import to_serving_model


TRAINING_MODEL_SUBDIR = 'training_model'
SERVING_MODEL_SUBDIR = 'serving_model'


def save_model_bundle(
model_dir: pathlib.Path,
def save_model(
path: pathlib.Path,
model: tf.keras.Model,
categories_vocab: Dict[str, int]):
save_category_vocabulary(categories_vocab, model_dir)
model.save(model_dir/TRAINING_MODEL_SUBDIR)
to_serving_model(model, categories_vocab).save(model_dir/SERVING_MODEL_SUBDIR)


def load_training_model(model_dir: pathlib.Path) -> tf.keras.Model:
return tf.keras.models.load_model(model_dir/TRAINING_MODEL_SUBDIR)


def load_serving_model(model_dir: pathlib.Path) -> tf.keras.Model:
return tf.keras.models.load_model(model_dir/SERVING_MODEL_SUBDIR)


def save_category_vocabulary(category_vocab: Dict[str, int], model_dir: pathlib.Path):
category_to_ind = {name: idx for idx, name in enumerate(category_vocab)}
return save_json(category_to_ind, model_dir / settings.CATEGORY_VOC_NAME)


def load_category_vocabulary(model_dir: pathlib.Path):
return load_json(model_dir / settings.CATEGORY_VOC_NAME)


def copy_category_taxonomy(taxonomy_path: pathlib.Path, model_dir: pathlib.Path):
shutil.copy(str(taxonomy_path), str(model_dir / settings.CATEGORY_TAXONOMY_NAME))


def save_json(obj: object, path: pathlib.Path):
with path.open("w") as f:
return json.dump(obj, f)


def load_json(path: pathlib.Path):
with path.open("r") as f:
return json.load(f)
labels_vocab: List[str],
serving_func: tf.function = None,
**kwargs):
"""
Save the model and labels, with an optional custom serving function.

Parameters
----------
path: pathlib.Path
Path where the model will be saved.

model: tf.keras.Model
Keras model instance to be saved.

labels_vocab: List[str]
Label vocabulary.

serving_func: tf.function, optional
Custom serving function.
If passed, `serving_func` will be the default endpoint in tensorflow serving.

**kwargs: dict, optional
Additional keyword arguments passed to `tf.keras.Model.save`.
"""
tmp_dir = tempfile.TemporaryDirectory()
labels_path = pathlib.Path(tmp_dir.name).joinpath('labels_vocab.txt')
with labels_path.open('w') as w:
w.writelines([f"{label}\n" for label in labels_vocab])
model.labels_file = tf.saved_model.Asset(str(labels_path))

signatures = None
if serving_func:
arg_specs, kwarg_specs = model.save_spec()
concrete_func = serving_func.get_concrete_function(*arg_specs, **kwarg_specs)
signatures = {'serving_default': concrete_func}

model.save(str(path), signatures=signatures, **kwargs)

# must occur after model.save, so Asset source is still around for save
tmp_dir.cleanup()


def load_model(path: pathlib.Path, **kwargs):
"""
Load the model and labels.

Parameters
----------
path: pathlib.Path
Path to the saved model.

**kwargs: dict, optional
Additional keyword arguments passed to `tf.keras.models.load_model`.

Returns
-------
(tf.keras.Model, List[str])
Model and labels.
"""
model = tf.keras.models.load_model(str(path))
labels_file = model.labels_file.asset_path.numpy()
labels = open(labels_file).read().splitlines()
return model, labels
77 changes: 48 additions & 29 deletions lib/model.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,64 @@
from typing import List
from typing import List, Tuple, Union

import numpy as np
import pandas as pd
import tensorflow as tf


@tf.keras.utils.register_keras_serializable()
class OutputMapperLayer(tf.keras.layers.Layer):
"""
The OutputMapperLayer converts the label indices produced by the model to
the taxonomy category ids and limits them to top N labels.
@tf.function
def top_labeled_predictions(
predictions: Union[tf.Tensor, np.array],
labels: List[str],
k: int = 10):
"""
Top labeled predictions.

This `@tf.function` can be used as a custom serving function.

def __init__(self, labels: List[str], top_n: int, **kwargs):
self.labels = labels
self.top_n = top_n
Parameters
----------
predictions: tf.Tensor or np.array
Predictions, as returned by `model.predict` or equivalent.

super(OutputMapperLayer, self).__init__(**kwargs)
labels: List[str]
Label vocabulary.

def call(self, x):
batch_size = tf.shape(x)[0]
k: int, optional
Number of top predictions to return.

tf_labels = tf.constant([self.labels], dtype="string")
tf_labels = tf.tile(tf_labels, [batch_size, 1])
Returns
-------
{'labels': tf.Tensor, 'scores': tf.Tensor}
Top predicted labels with their scores.
Returned tensors will have shape `(predictions.shape[0], k)`.
"""
tf_labels = tf.constant([labels], dtype='string')

top_n = tf.nn.top_k(x, k=self.top_n, sorted=True, name="top_k").indices
top_indices = tf.nn.top_k(predictions, k=k, sorted=True, name='top_k').indices

top_conf = tf.gather(x, top_n, batch_dims=1)
top_labels = tf.gather(tf_labels, top_n, batch_dims=1)
top_labels = tf.experimental.numpy.take(tf_labels, top_indices)
top_scores = tf.gather(predictions, top_indices, batch_dims=1)

return (top_conf, top_labels)
return {'labels': top_labels, 'scores': top_scores}

def compute_output_shape(self, input_shape):
batch_size = input_shape[0]
top_shape = (batch_size, self.top_n)
return [top_shape, top_shape]

def get_config(self):
config = {"labels": self.labels, "top_n": self.top_n}
base_config = super(OutputMapperLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def top_predictions_table(labeled_predictions) -> pd.DataFrame:
"""
Format the top labeled predictions into a pretty table.

Parameters
----------
labeled_predictions: {'labels': tf.Tensor, 'scores': tf.Tensor}
Labeled predictions, as returned by `top_labeled_predictions`.

Returns
-------
pd.DataFrame
"""
labels = labeled_predictions['labels'].numpy()
scores = labeled_predictions['scores'].numpy()

cells = np.vectorize(lambda l, s: f"{l.decode()}: {s:.2%}")(labels, scores)
columns = [f"top prediction {i+1}" for i in range(labels.shape[1])]

def to_serving_model(base_model: tf.keras.Model, categories: List[str]) -> tf.keras.Model:
mapper_layer = OutputMapperLayer(categories, 50)(base_model.output)
return tf.keras.Model(base_model.input, mapper_layer)
return pd.DataFrame(cells, columns=columns)