Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Transformer toolkit updates #5270

Merged
merged 13 commits into from
Jun 21, 2021
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `on_backward` training callback which allows for control over backpropagation and gradient manipulation.
- Added `AdversarialBiasMitigator`, a Model wrapper to adversarially mitigate biases in predictions produced by a pretrained model for a downstream task.
- Added `which_loss` parameter to `ensure_model_can_train_save_and_load` in `ModelTestCase` to specify which loss to test.
- The activation layer in the transformer toolkit now can be queried for its output dimension.
- `TransformerEmbeddings` now takes, but ignores, a parameter for the attention mask. This is needed for compatibility with some other modules that get called the same way and use the mask.
- `TransformerPooler` can now be instantiated from a pretrained transformer module, just like the other modules in the transformer toolkit.

### Fixed

Expand Down
3 changes: 3 additions & 0 deletions allennlp/modules/transformer/activation_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def __init__(
self.act_fn = activation
self.pool = pool

def get_output_dim(self) -> int:
return self.dense.out_features

def forward(self, hidden_states):
if self.pool:
hidden_states = hidden_states[:, 0]
Expand Down
4 changes: 3 additions & 1 deletion allennlp/modules/transformer/transformer_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ class TransformerEmbeddings(Embeddings):
# Albert is a special case. A linear projection is applied to the embeddings,
# but that linear transformation lives in the encoder.
"albert.embeddings.LayerNorm": "layer_norm",
"albert.embeddings.LayerNorm": "layer_norm",
"albert.embeddings.word_embeddings": "embeddings.word_embeddings",
"albert.embeddings.position_embeddings": "embeddings.position_embeddings",
"albert.embeddings.token_type_embeddings": "embeddings.token_type_embeddings",
Expand Down Expand Up @@ -161,6 +160,7 @@ def __init__(
def forward( # type: ignore
self,
input_ids: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
token_type_ids: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
) -> torch.Tensor:
Expand All @@ -169,6 +169,8 @@ def forward( # type: ignore
# Parameters
input_ids : `torch.Tensor`
Shape `batch_size x seq_len`
attention_mask : `torch.Tensor`
Shape `batch_size x seq_len`. This parameter is ignored, but it is here for compatibility.
token_type_ids : `torch.Tensor`, optional
Shape `batch_size x seq_len`
position_ids : `torch.Tensor`, optional
Expand Down
35 changes: 34 additions & 1 deletion allennlp/modules/transformer/transformer_pooler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,44 @@
from typing import Dict, Optional, Any, Union, TYPE_CHECKING

import torch

from allennlp.common import FromParams
from allennlp.modules.transformer.activation_layer import ActivationLayer

if TYPE_CHECKING:
from transformers.configuration_utils import PretrainedConfig


class TransformerPooler(ActivationLayer, FromParams):

_pretrained_relevant_module = ["pooler", "bert.pooler"]

def __init__(
self,
hidden_size: int,
intermediate_size: int,
activation: Union[str, torch.nn.Module] = "relu",
):
super().__init__(hidden_size, intermediate_size, "relu", pool=True)
super().__init__(hidden_size, intermediate_size, activation, pool=True)

@classmethod
def _get_input_arguments(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't require this method any longer. from_config takes care of what we need.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed!

cls,
pretrained_module: torch.nn.Module,
source: str = "huggingface",
mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> Dict[str, Any]:
final_kwargs = {}

final_kwargs["hidden_size"] = pretrained_module.dense.in_features
final_kwargs["intermediate_size"] = pretrained_module.dense.out_features
final_kwargs["activation"] = pretrained_module.activation

final_kwargs.update(kwargs)

return final_kwargs

@classmethod
def _from_config(cls, config: "PretrainedConfig", **kwargs):
return cls(config.hidden_size, config.hidden_size, "tanh") # BERT has this hardcoded