From e6820200ba9090dafe7c45675d4052950c81d6aa Mon Sep 17 00:00:00 2001 From: regisss Date: Mon, 20 Jun 2022 20:46:35 +0200 Subject: [PATCH 1/6] Add support for image classification --- docs/source/onnxruntime/modeling_ort.mdx | 14 +-- docs/source/pipelines.mdx | 31 +++---- optimum/onnxruntime/__init__.py | 1 + optimum/onnxruntime/configuration.py | 1 - optimum/onnxruntime/modeling_ort.py | 95 +++++++++++++++++++- optimum/onnxruntime/optimization.py | 20 +++-- optimum/onnxruntime/quantization.py | 29 +++--- optimum/onnxruntime/runs/utils.py | 4 +- optimum/pipelines.py | 47 +++++++--- tests/onnxruntime/test_modeling_ort.py | 110 ++++++++++++++++++----- 10 files changed, 266 insertions(+), 86 deletions(-) diff --git a/docs/source/onnxruntime/modeling_ort.mdx b/docs/source/onnxruntime/modeling_ort.mdx index 5bbafe4388..09f9a2a8fa 100644 --- a/docs/source/onnxruntime/modeling_ort.mdx +++ b/docs/source/onnxruntime/modeling_ort.mdx @@ -12,13 +12,13 @@ specific language governing permissions and limitations under the License. # Optimum Inference with ONNX Runtime -Optimum is a utility package for building and running inference with accelerated runtime like ONNX Runtime. -Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines +Optimum is a utility package for building and running inference with accelerated runtime like ONNX Runtime. +Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines to run accelerated inference without rewriting your APIs. ## Switching from Transformers to Optimum Inference -The Optimum Inference models are API compatible with Hugging Face Transformers models. This means you can just replace your `AutoModelForXxx` class with the corresponding `ORTModelForXxx` class in `optimum`. For example, this is how you can use a question answering model in `optimum`: +The Optimum Inference models are API compatible with Hugging Face Transformers models. This means you can just replace your `AutoModelForXxx` class with the corresponding `ORTModelForXxx` class in `optimum`. For example, this is how you can use a question answering model in `optimum`: ```diff from transformers import AutoTokenizer, pipeline @@ -57,8 +57,8 @@ You can find a complete walkhrough Optimum Inference for ONNX Runtime in this [n ### Working with the Hugging Face Model Hub -The Optimum model classes like [`~onnxruntime.ORTModelForSequenceClassification`] are integrated with the [Hugging Face Model Hub](https://hf.co/models), which means you can not only -load model from the Hub, but also push your models to the Hub with `push_to_hub()` method. Below is an example which downloads a vanilla Transformers model +The Optimum model classes like [`~onnxruntime.ORTModelForSequenceClassification`] are integrated with the [Hugging Face Model Hub](https://hf.co/models), which means you can not only +load model from the Hub, but also push your models to the Hub with `push_to_hub()` method. Below is an example which downloads a vanilla Transformers model from the Hub and converts it to an optimum onnxruntime model and pushes it back into a new repository. @@ -105,3 +105,7 @@ from the Hub and converts it to an optimum onnxruntime model and pushes it back [[autodoc]] onnxruntime.modeling_ort.ORTModelForCausalLM +## ORTModelForImageClassification + +[[autodoc]] onnxruntime.modeling_ort.ORTModelForImageClassification + diff --git a/docs/source/pipelines.mdx b/docs/source/pipelines.mdx index 14ea3b2fb6..3cf4ec16c0 100644 --- a/docs/source/pipelines.mdx +++ b/docs/source/pipelines.mdx @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # Optimum pipelines for inference -The [`~pipelines.pipeline`] function makes it simple to use models from the [Model Hub](https://huggingface.co/models) for accelerated inference on a variety of tasks such as text classification. +The [`~pipelines.pipeline`] function makes it simple to use models from the [Model Hub](https://huggingface.co/models) for accelerated inference on a variety of tasks such as question answering or image classification. Even if you don't have experience with a specific modality or understand the code powering the models, you can still use them with the [`~pipelines.pipeline`] function! @@ -31,11 +31,12 @@ Currenlty supported tasks are: * `question-answering` * `zero-shot-classification` * `text-generation` +* `image-classification` ## Optimum pipeline usage -While each task has an associated pipeline class, it is simpler to use the general [`~pipelines.pipeline`] function which wraps all the task-specific pipelines in one object. -The [`~pipelines.pipeline`] function automatically loads a default model and tokenizer capable of inference for your task. +While each task has an associated pipeline class, it is simpler to use the general [`~pipelines.pipeline`] function which wraps all the task-specific pipelines in one object. +The [`~pipelines.pipeline`] function automatically loads a default model and tokenizer/feature-extractor capable of inference for your task. 1. Start by creating a pipeline by specifying an inference task: @@ -46,7 +47,7 @@ The [`~pipelines.pipeline`] function automatically loads a default model and tok ``` -2. Pass your input text to the [`~pipelines.pipeline`] function: +2. Pass your input text/image to the [`~pipelines.pipeline`] function: ```python >>> classifier("I like you. I love you.") @@ -57,9 +58,9 @@ _Note: The default models used in the [`~pipelines.pipeline`] function are not o ### Using vanilla Transformers model and converting to ONNX -The [`~pipelines.pipeline`] function accepts any supported model from the [Model Hub](https://huggingface.co/models). -There are tags on the Model Hub that allow you to filter for a model you'd like to use for your task. -Once you've picked an appropriate model, load it with the `from_pretrained("{model_id}",from_transformers=True)` method associated with the `ORTModelFor*` +The [`~pipelines.pipeline`] function accepts any supported model from the [Model Hub](https://huggingface.co/models). +There are tags on the Model Hub that allow you to filter for a model you'd like to use for your task. +Once you've picked an appropriate model, load it with the `from_pretrained("{model_id}",from_transformers=True)` method associated with the `ORTModelFor*` `AutoTokenizer' class. For example, here's how you can load the [`~onnxruntime.ORTModelForQuestionAnswering`] class for question answering: ```python @@ -80,10 +81,10 @@ Once you've picked an appropriate model, load it with the `from_pretrained("{mod ### Using Optimum models -The [`~pipelines.pipeline`] function is tightly integrated with [Model Hub](https://huggingface.co/models) and can load optimized models directly, e.g. those created with ONNX Runtime. -There are tags on the Model Hub that allow you to filter for a model you'd like to use for your task. +The [`~pipelines.pipeline`] function is tightly integrated with [Model Hub](https://huggingface.co/models) and can load optimized models directly, e.g. those created with ONNX Runtime. +There are tags on the Model Hub that allow you to filter for a model you'd like to use for your task. Once you've picked an appropriate model, load it with the `from_pretrained()` method associated with the corresponding `ORTModelFor*` -and `AutoTokenizer' class. For example, here's how you can load an optimized model for question answering: +and `AutoTokenizer'/`AutoFeatureExtractor` class. For example, here's how you can load an optimized model for question answering: ```python >>> from transformers import AutoTokenizer @@ -132,7 +133,7 @@ Below you can find two examples on how you could [`~onnxruntime.ORTOptimizer`] a onnx_quantized_model_output_path=save_path / "model-quantized.onnx", quantization_config=qconfig, ) ->>> quantizer.model.config.save_pretrained(save_path) # saves config.json +>>> quantizer.model.config.save_pretrained(save_path) # saves config.json # load optimized model from local path or repository >>> model = ORTModelForSequenceClassification.from_pretrained(save_path,file_name="model-quantized.onnx") @@ -176,7 +177,7 @@ Below you can find two examples on how you could [`~onnxruntime.ORTOptimizer`] a onnx_optimized_model_output_path=save_path / "model-optimized.onnx", optimization_config=optimization_config, ) ->>> optimizer.model.config.save_pretrained(save_path) # saves config.json +>>> optimizer.model.config.save_pretrained(save_path) # saves config.json # load optimized model from local path or repository >>> model = ORTModelForSequenceClassification.from_pretrained(save_path,file_name="model-optimized.onnx") @@ -198,8 +199,8 @@ Below you can find two examples on how you could [`~onnxruntime.ORTOptimizer`] a ## Transformers pipeline usage The [`~pipelines.pipeline`] function is just a light wrapper around the `transformers.pipeline` function to enable checks for supported tasks and additional features -, like quantization and optimization. This being said you can use the `transformers.pipeline` and just replace your `AutoFor*` with the optimum - `ORTModelFor*` class. +, like quantization and optimization. This being said you can use the `transformers.pipeline` and just replace your `AutoFor*` with the optimum + `ORTModelFor*` class. ```diff from transformers import AutoTokenizer, pipeline @@ -207,7 +208,7 @@ from transformers import AutoTokenizer, pipeline +from optimum.onnxruntime import ORTModelForQuestionAnswering -model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2") -+model = ORTModelForQuestionAnswering.from_transformers("optimum/roberta-base-squad2") ++model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2") tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2") onnx_qa = pipeline("question-answering",model=model,tokenizer=tokenizer) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index 20ae1283e5..ecc71e9fd5 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -53,6 +53,7 @@ class ORTQuantizableOperator(Enum): from .modeling_ort import ( ORTModelForCausalLM, ORTModelForFeatureExtraction, + ORTModelForImageClassification, ORTModelForQuestionAnswering, ORTModelForSequenceClassification, ORTModelForTokenClassification, diff --git a/optimum/onnxruntime/configuration.py b/optimum/onnxruntime/configuration.py index a33135aa15..f56fb7726f 100644 --- a/optimum/onnxruntime/configuration.py +++ b/optimum/onnxruntime/configuration.py @@ -20,7 +20,6 @@ from datasets import Dataset from packaging.version import Version, parse -from onnxruntime import GraphOptimizationLevel from onnxruntime import __version__ as ort_version from onnxruntime.quantization import CalibraterBase, CalibrationMethod, QuantFormat, QuantizationMode, QuantType from onnxruntime.quantization.calibrate import create_calibrator diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index e153954948..04be5713c0 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -9,10 +9,10 @@ AutoConfig, AutoModel, AutoModelForCausalLM, + AutoModelForImageClassification, AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoModelForTokenClassification, - AutoTokenizer, PretrainedConfig, ) from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, default_cache_path @@ -20,11 +20,13 @@ from transformers.modeling_outputs import ( BaseModelOutput, CausalLMOutputWithCrossAttentions, + ImageClassifierOutput, QuestionAnsweringModelOutput, SequenceClassifierOutput, TokenClassifierOutput, ) from transformers.onnx import FeaturesManager, export +from transformers.onnx.utils import get_preprocessor import onnxruntime as ort from huggingface_hub import HfApi, hf_hub_download @@ -37,6 +39,7 @@ _TOKENIZER_FOR_DOC = "AutoTokenizer" +_FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor" ONNX_MODEL_START_DOCSTRING = r""" This model inherits from [~`onnxruntime.modeling_ort.ORTModel`]. Check the superclass documentation for the generic methods the @@ -52,7 +55,7 @@ Args: input_ids (`torch.Tensor` of shape `({0})`): Indices of input sequence tokens in the vocabulary. - Indices can be obtained using [`AutoTokenizer`](https://huggingface.co/docs/transformers/autoclass_tutorial#autotokenizer). + Indices can be obtained using [`AutoTokenizer`](https://huggingface.co/docs/transformers/autoclass_tutorial#autotokenizer). See [`PreTrainedTokenizer.encode`](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.PreTrainedTokenizerBase.encode) and [`PreTrainedTokenizer.__call__`](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.PreTrainedTokenizerBase.__call__) for details. [What are input IDs?](https://huggingface.co/docs/transformers/glossary#input-ids) @@ -234,14 +237,14 @@ def _from_transformers( task = "default" # 2. convert to temp dir # FIXME: transformers.onnx conversion doesn't support private models - tokenizer = AutoTokenizer.from_pretrained(model_id) + preprocessor = get_preprocessor(model_id) model = FeaturesManager.get_model_from_feature(task, model_id) _, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=task) onnx_config = model_onnx_config(model.config) # export model export( - preprocessor=tokenizer, + preprocessor=preprocessor, model=model, config=onnx_config, opset=onnx_config.default_onnx_opset, @@ -730,3 +733,87 @@ def _prepare_attention_mask_for_generation( else: # Ensure attention mask is on the same device as the input IDs return torch.ones(inputs.shape[:2], dtype=torch.long, device=inputs.device) + + +IMAGE_CLASSIFICATION_SAMPLE = r""" + Example of image classification: + + ```python + >>> import requests + >>> from PIL import Image + >>> from optimum.onnxruntime import {model_class} + >>> from transformers import {processor_class} + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + + >>> preprocessor = {processor_class}.from_pretrained("{checkpoint}") + >>> model = {model_class}.from_pretrained("{checkpoint}") + + >>> inputs = preprocessor(images=image, return_tensors="pt") + + >>> outputs = model(**inputs) + >>> logits = outputs.logits + ``` + + Example using `transformers.pipeline`: + + ```python + >>> import requests + >>> from PIL import Image + >>> from transformers import {processor_class}, pipeline + >>> from optimum.onnxruntime import {model_class} + + >>> preprocessor = {processor_class}.from_pretrained("{checkpoint}") + >>> model = {model_class}.from_pretrained("{checkpoint}") + >>> onnx_image_classifier = pipeline("image-classification", model=model, feature_extractor=preprocessor) + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> pred = onnx_image_classifier(url) + ``` +""" + + +@add_start_docstrings( + """ + Onnx Model for image-classification tasks. + """, + ONNX_MODEL_START_DOCSTRING, +) +class ORTModelForImageClassification(ORTModel): + """ + Image Classification model for ONNX. + """ + + # used in from_transformers to export model to onnx + pipeline_task = "image-classification" + auto_model_class = AutoModelForImageClassification + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # create {name:idx} dict for model outputs + self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} + + @add_start_docstrings_to_model_forward( + ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") + + FEAUTRE_EXTRACTION_SAMPLE.format( + processor_class=_FEATURE_EXTRACTOR_FOR_DOC, + model_class="ORTModelForImageClassification", + checkpoint="optimum/vit-base-patch16-224", + ) + ) + def forward( + self, + pixel_values: torch.Tensor, + **kwargs, + ): + # converts pytorch inputs into numpy inputs for onnx + onnx_inputs = { + "pixel_values": pixel_values.cpu().detach().numpy(), + } + # run inference + outputs = self.model.run(None, onnx_inputs) + # converts output to namedtuple for pipelines post-processing + return ImageClassifierOutput( + logits=torch.from_numpy(outputs[self.model_outputs["logits"]]), + ) diff --git a/optimum/onnxruntime/optimization.py b/optimum/onnxruntime/optimization.py index d1e4bacee8..83e7cbd982 100644 --- a/optimum/onnxruntime/optimization.py +++ b/optimum/onnxruntime/optimization.py @@ -16,9 +16,10 @@ from pathlib import Path from typing import Callable, Dict, List, Optional, Tuple, Union -from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer +from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel from transformers.onnx import export from transformers.onnx.features import FeaturesManager +from transformers.onnx.utils import get_preprocessor from onnx import load_model from onnxruntime.transformers.fusion_options import FusionOptions @@ -41,7 +42,7 @@ def from_pretrained( model_name_or_path: Union[str, os.PathLike], feature: str, opset: Optional[int] = None ) -> "ORTOptimizer": """ - Instantiate a `ORTOptimizer` from a pretrained pytorch model and tokenizer. + Instantiate a `ORTOptimizer` from a pretrained pytorch model and preprocessor. Args: model_name_or_path (`Union[str, os.PathLike]`): @@ -54,22 +55,23 @@ def from_pretrained( Returns: An instance of `ORTOptimizer`. """ - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + preprocessor = get_preprocessor(model_name_or_path) model_class = FeaturesManager.get_model_class_for_feature(feature) model = model_class.from_pretrained(model_name_or_path) - return ORTOptimizer(tokenizer, model, feature, opset) + + return ORTOptimizer(preprocessor, model, feature, opset) def __init__( self, - tokenizer: PreTrainedTokenizer, + preprocessor: Union[AutoFeatureExtractor, AutoProcessor, AutoTokenizer], model: PreTrainedModel, feature: str = "default", opset: Optional[int] = None, ): """ Args: - tokenizer (`PreTrainedTokenizer`): - The tokenizer used to preprocess the data. + preprocessor (`Union[AutoFeatureExtractor, AutoProcessor, AutoTokenizer]`): + The preprocessor used to preprocess the data. model (`PreTrainedModel`): The model to optimize. feature (`str`, defaults to `"default"`): @@ -79,7 +81,7 @@ def __init__( """ super().__init__() - self.tokenizer = tokenizer + self.preprocessor = preprocessor self.model = model self.feature = feature self._model_type, onnx_config_factory = FeaturesManager.check_supported_model_or_raise(model, feature=feature) @@ -117,7 +119,7 @@ def export( # Export the model if it has not already been exported to ONNX IR if not onnx_model_path.exists(): - export(self.tokenizer, self.model, self._onnx_config, self.opset, onnx_model_path) + export(self.preprocessor, self.model, self._onnx_config, self.opset, onnx_model_path) ORTConfigManager.check_supported_model_or_raise(self._model_type) num_heads = getattr(self.model.config, ORTConfigManager.get_num_heads_name(self._model_type)) diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py index 346cab5d68..3a4fe063e4 100644 --- a/optimum/onnxruntime/quantization.py +++ b/optimum/onnxruntime/quantization.py @@ -20,9 +20,10 @@ from typing import Callable, Dict, List, Optional, Tuple, Union from datasets import Dataset, load_dataset -from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer +from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel from transformers.onnx import export from transformers.onnx.features import FeaturesManager +from transformers.onnx.utils import get_preprocessor import onnx from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantizationMode, QuantType @@ -85,7 +86,7 @@ def from_pretrained( model_name_or_path: Union[str, os.PathLike], feature: str, opset: Optional[int] = None ) -> "ORTQuantizer": """ - Instantiate a `ORTQuantizer` from a pretrained pytorch model and tokenizer. + Instantiate a `ORTQuantizer` from a pretrained pytorch model and preprocessor. Args: model_name_or_path (`Union[str, os.PathLike]`): @@ -98,23 +99,23 @@ def from_pretrained( Returns: An instance of `ORTQuantizer`. """ - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + preprocessor = get_preprocessor(model_name_or_path) model_class = FeaturesManager.get_model_class_for_feature(feature) model = model_class.from_pretrained(model_name_or_path) - return ORTQuantizer(tokenizer, model, feature, opset) + return ORTQuantizer(preprocessor, model, feature, opset) def __init__( self, - tokenizer: PreTrainedTokenizer, + preprocessor: Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor], model: PreTrainedModel, feature: str = "default", opset: Optional[int] = None, ): """ Args: - tokenizer (`PreTrainedTokenizer`): - The tokenizer used to preprocess the data. + preprocessor (`Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]`): + The preprocessor used to preprocess the data. model (`PreTrainedModel`): The model to optimize. feature (`str`, defaults to `"default"`): @@ -124,7 +125,7 @@ def __init__( """ super().__init__() - self.tokenizer = tokenizer + self.preprocessor = preprocessor self.model = model self.feature = feature @@ -236,7 +237,7 @@ def partial_fit( # Export the model to ONNX IR if not onnx_model_path.exists(): - export(self.tokenizer, self.model, self._onnx_config, self.opset, onnx_model_path) + export(self.preprocessor, self.model, self._onnx_config, self.opset, onnx_model_path) LOGGER.info(f"Exported model to ONNX at: {onnx_model_path.as_posix()}") @@ -278,7 +279,7 @@ def export( quantization_config: QuantizationConfig, calibration_tensors_range: Optional[Dict[NodeName, Tuple[float, float]]] = None, use_external_data_format: bool = False, - preprocessor: Optional[QuantizationPreprocessor] = None, + quantization_preprocessor: Optional[QuantizationPreprocessor] = None, ) -> Path: """ Quantize a model given the optimization specifications defined in `quantization_config`. @@ -295,7 +296,7 @@ def export( static quantization. use_external_data_format (`bool`, defaults to `False`): Whether to use external data format to store model which size is >= 2Gb. - preprocessor (`QuantizationPreprocessor`, *optional*): + quantization_preprocessor (`QuantizationPreprocessor`, *optional*): The preprocessor to use to collect the nodes to include or exclude from quantization. Returns: @@ -306,7 +307,7 @@ def export( # Export the model if it has not already been exported to ONNX IR (useful for dynamic quantization) if not onnx_model_path.exists(): - export(self.tokenizer, self.model, self._onnx_config, self.opset, onnx_model_path) + export(self.preprocessor, self.model, self._onnx_config, self.opset, onnx_model_path) use_qdq = quantization_config.is_static and quantization_config.format == QuantFormat.QDQ @@ -326,9 +327,9 @@ def export( f"Creating {'static' if quantization_config.is_static else 'dynamic'} quantizer: {quantization_config}" ) - if preprocessor is not None: + if quantization_preprocessor is not None: LOGGER.info("Preprocessor detected, collecting nodes to include/exclude") - nodes_to_quantize, nodes_to_exclude = preprocessor.collect(onnx_model_path) + nodes_to_quantize, nodes_to_exclude = quantization_preprocessor.collect(onnx_model_path) nodes_to_quantize.update(quantization_config.nodes_to_quantize) nodes_to_exclude.update(quantization_config.nodes_to_exclude) diff --git a/optimum/onnxruntime/runs/utils.py b/optimum/onnxruntime/runs/utils.py index 2d9d20296a..fb5fb36988 100644 --- a/optimum/onnxruntime/runs/utils.py +++ b/optimum/onnxruntime/runs/utils.py @@ -1,6 +1,7 @@ from optimum.onnxruntime.modeling_ort import ( ORTModelForCausalLM, ORTModelForFeatureExtraction, + ORTModelForImageClassification, ORTModelForQuestionAnswering, ORTModelForSequenceClassification, ORTModelForTokenClassification, @@ -8,9 +9,10 @@ task_ortmodel_map = { + "causal-lm": ORTModelForCausalLM, "feature-extraction": ORTModelForFeatureExtraction, + "image-classification": ORTModelForImageClassification, "question-answering": ORTModelForQuestionAnswering, "text-classification": ORTModelForSequenceClassification, "token-classification": ORTModelForTokenClassification, - "causal-lm": ORTModelForCausalLM, } diff --git a/optimum/pipelines.py b/optimum/pipelines.py index d68cabacff..837fa5c560 100644 --- a/optimum/pipelines.py +++ b/optimum/pipelines.py @@ -1,8 +1,8 @@ from typing import Any, Optional, Union from transformers import ( - AutoTokenizer, FeatureExtractionPipeline, + ImageClassificationPipeline, Pipeline, PreTrainedTokenizer, QuestionAnsweringPipeline, @@ -13,6 +13,9 @@ ) from transformers import pipeline as transformers_pipeline from transformers.feature_extraction_utils import PreTrainedFeatureExtractor +from transformers.models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING +from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING +from transformers.onnx.utils import get_preprocessor from optimum.utils import is_onnxruntime_available @@ -23,6 +26,7 @@ from optimum.onnxruntime import ( ORTModelForCausalLM, ORTModelForFeatureExtraction, + ORTModelForImageClassification, ORTModelForQuestionAnswering, ORTModelForSequenceClassification, ORTModelForTokenClassification, @@ -35,31 +39,36 @@ "class": (ORTModelForFeatureExtraction,) if is_onnxruntime_available() else (), "default": "distilbert-base-cased", }, + "image-classification": { + "impl": ImageClassificationPipeline, + "class": (ORTModelForImageClassification,) if is_onnxruntime_available() else (), + "default": "google/vit-base-patch16-224", + }, + "question-answering": { + "impl": QuestionAnsweringPipeline, + "class": (ORTModelForQuestionAnswering,) if is_onnxruntime_available() else (), + "default": "distilbert-base-cased-distilled-squad", + }, "text-classification": { "impl": TextClassificationPipeline, "class": (ORTModelForSequenceClassification,) if is_onnxruntime_available() else (), "default": "distilbert-base-uncased-finetuned-sst-2-english", }, + "text-generation": { + "impl": TextGenerationPipeline, + "class": (ORTModelForCausalLM,) if is_onnxruntime_available() else (), + "default": "distilgpt2", + }, "token-classification": { "impl": TokenClassificationPipeline, "class": (ORTModelForTokenClassification,) if is_onnxruntime_available() else (), "default": "dbmdz/bert-large-cased-finetuned-conll03-english", }, - "question-answering": { - "impl": QuestionAnsweringPipeline, - "class": (ORTModelForQuestionAnswering,) if is_onnxruntime_available() else (), - "default": "distilbert-base-cased-distilled-squad", - }, "zero-shot-classification": { "impl": ZeroShotClassificationPipeline, "class": (ORTModelForSequenceClassification,) if is_onnxruntime_available() else (), "default": "facebook/bart-large-mnli", }, - "text-generation": { - "impl": TextGenerationPipeline, - "class": (ORTModelForCausalLM,) if is_onnxruntime_available() else (), - "default": "distilgpt2", - }, } @@ -80,6 +89,9 @@ def pipeline( if accelerator != "ort": raise ValueError(f"Accelerator {accelerator} is not supported. Supported accelerators are ort") + load_tokenizer = type(model.config) in TOKENIZER_MAPPING or model.config.tokenizer_class is not None + load_feature_extractor = type(model.config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None + if model is None: model_id = SUPPORTED_TASKS[task]["default"] model = SUPPORTED_TASKS[task]["class"][0].from_pretrained(model_id, from_transformers=True) @@ -87,20 +99,27 @@ def pipeline( model_id = model model = SUPPORTED_TASKS[task]["class"][0].from_pretrained(model, from_transformers=True) elif isinstance(model, ORTModel): - if tokenizer is None: + if tokenizer is None and load_tokenizer: raise ValueError("If you pass a model as a ORTModel, you must pass a tokenizer as well") + if feature_extractor is None and load_feature_extractor: + raise ValueError("If you pass a model as a ORTModel, you must pass a feature extractor as well") else: raise ValueError( f"""Model {model} is not supported. Please provide a valid model either as string or ORTModel. You can also provide non model then a default one will be used""" ) - if tokenizer is None: - tokenizer = AutoTokenizer.from_pretrained(model_id) + + if tokenizer is None and load_tokenizer: + tokenizer = get_preprocessor(model_id) + if feature_extractor is None and load_feature_extractor: + feature_extractor = get_preprocessor(model_id) return transformers_pipeline( task, model=model, tokenizer=tokenizer, + feature_extractor=feature_extractor, use_fast=use_fast, + use_auth_token=use_auth_token, **kwargs, ) diff --git a/tests/onnxruntime/test_modeling_ort.py b/tests/onnxruntime/test_modeling_ort.py index 1496b4acbe..a1ff61b60f 100644 --- a/tests/onnxruntime/test_modeling_ort.py +++ b/tests/onnxruntime/test_modeling_ort.py @@ -5,22 +5,26 @@ from pathlib import Path import torch +from PIL import Image from transformers import ( AutoModel, AutoModelForCausalLM, + AutoModelForImageClassification, AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoModelForTokenClassification, - AutoTokenizer, PretrainedConfig, pipeline, ) +from transformers.onnx.utils import get_preprocessor import onnxruntime +import requests from optimum.onnxruntime import ( ONNX_WEIGHTS_NAME, ORTModelForCausalLM, ORTModelForFeatureExtraction, + ORTModelForImageClassification, ORTModelForQuestionAnswering, ORTModelForSequenceClassification, ORTModelForTokenClassification, @@ -128,7 +132,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): def test_model_call(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -145,7 +149,7 @@ def test_compare_to_transformers(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True) trfs_model = AutoModelForQuestionAnswering.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -162,7 +166,7 @@ def test_compare_to_transformers(self, *args, **kwargs): def test_pipeline(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) pp = pipeline("question-answering", model=onnx_model, tokenizer=tokenizer) question = "Whats my name?" context = "My Name is Philipp and I live in Nuremberg." @@ -199,7 +203,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForSequenceClassification.from_pretrained("t5-small", from_transformers=Tru) + model = ORTModelForSequenceClassification.from_pretrained("t5-small") self.assertTrue("Unrecognized configuration class", context.exception) @@ -207,7 +211,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): def test_model_forward_call(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -221,7 +225,7 @@ def test_compare_to_transformers(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True) trfs_model = AutoModelForSequenceClassification.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -237,7 +241,7 @@ def test_compare_to_transformers(self, *args, **kwargs): def test_pipeline(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) pp = pipeline("text-classification", model=onnx_model, tokenizer=tokenizer) text = "My Name is Philipp and i live in Germany." outputs = pp(text) @@ -250,7 +254,7 @@ def test_pipeline_zero_shot_classification(self): onnx_model = ORTModelForSequenceClassification.from_pretrained( "typeform/distilbert-base-uncased-mnli", from_transformers=True ) - tokenizer = AutoTokenizer.from_pretrained("typeform/distilbert-base-uncased-mnli") + tokenizer = get_preprocessor("typeform/distilbert-base-uncased-mnli") pp = pipeline("zero-shot-classification", model=onnx_model, tokenizer=tokenizer) sequence_to_classify = "Who are you voting for in 2020?" candidate_labels = ["Europe", "public health", "politics", "elections"] @@ -286,7 +290,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForTokenClassification.from_pretrained("t5-small", from_transformers=Tru) + model = ORTModelForTokenClassification.from_pretrained("t5-small") self.assertTrue("Unrecognized configuration class", context.exception) @@ -294,7 +298,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): def test_model_call(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -308,7 +312,7 @@ def test_compare_to_transformers(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True) trfs_model = AutoModelForTokenClassification.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -324,7 +328,7 @@ def test_compare_to_transformers(self, *args, **kwargs): def test_pipeline(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) pp = pipeline("token-classification", model=onnx_model, tokenizer=tokenizer) text = "My Name is Philipp and i live in Germany." outputs = pp(text) @@ -357,7 +361,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForFeatureExtraction.from_pretrained("google/vit-base-patch16-224", from_transformers=Tru) + model = ORTModelForFeatureExtraction.from_pretrained("google/vit-base-patch16-224") self.assertTrue("Unrecognized configuration class", context.exception) @@ -365,7 +369,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): def test_model_call(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForFeatureExtraction.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -379,7 +383,7 @@ def test_compare_to_transformers(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForFeatureExtraction.from_pretrained(model_id, from_transformers=True) trfs_model = AutoModel.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -395,7 +399,7 @@ def test_compare_to_transformers(self, *args, **kwargs): def test_pipeline(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForFeatureExtraction.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) pp = pipeline("feature-extraction", model=onnx_model, tokenizer=tokenizer) text = "My Name is Philipp and i live in Germany." outputs = pp(text) @@ -419,7 +423,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224", from_transformers=True) + model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224") self.assertTrue("Unrecognized configuration class", context.exception) @@ -427,7 +431,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): def test_model_call(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForCausalLM.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -440,7 +444,7 @@ def test_model_call(self, *args, **kwargs): def test_generate_utils(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForCausalLM.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) text = "This is a sample output" tokens = tokenizer( text, @@ -455,7 +459,7 @@ def test_generate_utils(self, *args, **kwargs): def test_generate_utils_with_input_ids(self, *args, **kwargs): model_arch, model_id = args model = ORTModelForCausalLM.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) text = "This is a sample output" tokens = tokenizer( text, @@ -471,7 +475,7 @@ def test_compare_to_transformers(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForCausalLM.from_pretrained(model_id, from_transformers=True) trfs_model = AutoModelForCausalLM.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) tokens = tokenizer( "This is a sample output", return_tensors="pt", @@ -487,7 +491,7 @@ def test_compare_to_transformers(self, *args, **kwargs): def test_pipeline(self, *args, **kwargs): model_arch, model_id = args onnx_model = ORTModelForCausalLM.from_pretrained(model_id, from_transformers=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = get_preprocessor(model_id) pp = pipeline("text-generation", model=onnx_model, tokenizer=tokenizer) text = "My Name is Philipp and i live" outputs = pp(text) @@ -495,3 +499,63 @@ def test_pipeline(self, *args, **kwargs): # compare model output class self.assertTrue(isinstance(outputs[0]["generated_text"], str)) self.assertTrue(len(outputs[0]["generated_text"]) > len(text)) + + +class ORTModelForImageClassificationIntergrationTest(unittest.TestCase): + SUPPORTED_ARCHITECTURES_WITH_MODEL_ID = { + "vit": "hf-internal-testing/tiny-random-vit", + } + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) + def test_supported_transformers_architectures(self, *args, **kwargs): + model_arch, model_id = args + model = ORTModelForImageClassification.from_pretrained(model_id, from_transformers=True) + self.assertIsInstance(model.model, onnxruntime.capi.onnxruntime_inference_collection.InferenceSession) + self.assertIsInstance(model.config, PretrainedConfig) + + def test_load_vanilla_transformers_which_is_not_supported(self): + with self.assertRaises(Exception) as context: + model = ORTModelForImageClassification.from_pretrained("facebook/convnext-tiny-224") + + self.assertTrue("Unrecognized configuration class", context.exception) + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) + def test_model_forward_call(self, *args, **kwargs): + model_arch, model_id = args + model = ORTModelForImageClassification.from_pretrained(model_id, from_transformers=True) + preprocessor = get_preprocessor(model_id) + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + image = Image.open(requests.get(url, stream=True).raw) + inputs = preprocessor(images=image, return_tensors="pt") + outputs = model(**inputs) + self.assertTrue("logits" in outputs) + self.assertTrue(isinstance(outputs.logits, torch.Tensor)) + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) + def test_compare_to_transformers(self, *args, **kwargs): + model_arch, model_id = args + onnx_model = ORTModelForImageClassification.from_pretrained(model_id, from_transformers=True) + trfs_model = AutoModelForImageClassification.from_pretrained(model_id) + preprocessor = get_preprocessor(model_id) + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + image = Image.open(requests.get(url, stream=True).raw) + inputs = preprocessor(images=image, return_tensors="pt") + with torch.no_grad(): + trtfs_outputs = trfs_model(**inputs) + onnx_outputs = onnx_model(**inputs) + + # compare tensor outputs + self.assertTrue(torch.allclose(onnx_outputs.logits, trtfs_outputs.logits, atol=1e-4)) + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) + def test_pipeline(self, *args, **kwargs): + model_arch, model_id = args + onnx_model = ORTModelForImageClassification.from_pretrained(model_id, from_transformers=True) + preprocessor = get_preprocessor(model_id) + pp = pipeline("image-classification", model=onnx_model, feature_extractor=preprocessor) + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + outputs = pp(url) + + # compare model output class + self.assertGreaterEqual(outputs[0]["score"], 0.0) + self.assertTrue(isinstance(outputs[0]["label"], str)) From 1865e2d6b19b0b9f8e260ef05dd941ada4408c32 Mon Sep 17 00:00:00 2001 From: regisss Date: Mon, 20 Jun 2022 21:49:14 +0200 Subject: [PATCH 2/6] Fix tests and backwards compatibility --- optimum/onnxruntime/quantization.py | 14 +++++++------- tests/onnxruntime/test_onnxruntime.py | 12 +++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py index 3a4fe063e4..31b325a42b 100644 --- a/optimum/onnxruntime/quantization.py +++ b/optimum/onnxruntime/quantization.py @@ -20,7 +20,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union from datasets import Dataset, load_dataset -from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel +from transformers import AutoFeatureExtractor, AutoTokenizer, PreTrainedModel from transformers.onnx import export from transformers.onnx.features import FeaturesManager from transformers.onnx.utils import get_preprocessor @@ -107,14 +107,14 @@ def from_pretrained( def __init__( self, - preprocessor: Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor], + preprocessor: Union[AutoTokenizer, AutoFeatureExtractor], model: PreTrainedModel, feature: str = "default", opset: Optional[int] = None, ): """ Args: - preprocessor (`Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]`): + preprocessor (`Union[AutoTokenizer, AutoFeatureExtractor]`): The preprocessor used to preprocess the data. model (`PreTrainedModel`): The model to optimize. @@ -279,7 +279,7 @@ def export( quantization_config: QuantizationConfig, calibration_tensors_range: Optional[Dict[NodeName, Tuple[float, float]]] = None, use_external_data_format: bool = False, - quantization_preprocessor: Optional[QuantizationPreprocessor] = None, + preprocessor: Optional[QuantizationPreprocessor] = None, ) -> Path: """ Quantize a model given the optimization specifications defined in `quantization_config`. @@ -296,7 +296,7 @@ def export( static quantization. use_external_data_format (`bool`, defaults to `False`): Whether to use external data format to store model which size is >= 2Gb. - quantization_preprocessor (`QuantizationPreprocessor`, *optional*): + preprocessor (`QuantizationPreprocessor`, *optional*): The preprocessor to use to collect the nodes to include or exclude from quantization. Returns: @@ -327,9 +327,9 @@ def export( f"Creating {'static' if quantization_config.is_static else 'dynamic'} quantizer: {quantization_config}" ) - if quantization_preprocessor is not None: + if preprocessor is not None: LOGGER.info("Preprocessor detected, collecting nodes to include/exclude") - nodes_to_quantize, nodes_to_exclude = quantization_preprocessor.collect(onnx_model_path) + nodes_to_quantize, nodes_to_exclude = preprocessor.collect(onnx_model_path) nodes_to_quantize.update(quantization_config.nodes_to_quantize) nodes_to_exclude.update(quantization_config.nodes_to_exclude) diff --git a/tests/onnxruntime/test_onnxruntime.py b/tests/onnxruntime/test_onnxruntime.py index 245be15e17..70c534ee1f 100644 --- a/tests/onnxruntime/test_onnxruntime.py +++ b/tests/onnxruntime/test_onnxruntime.py @@ -13,17 +13,15 @@ # limitations under the License. import gc -import os import tempfile import unittest from functools import partial from pathlib import Path -from transformers import AutoTokenizer from transformers.onnx import validate_model_outputs from onnxruntime.quantization import QuantFormat, QuantizationMode, QuantType -from optimum.onnxruntime import ORTConfig, ORTOptimizer, ORTQuantizableOperator, ORTQuantizer +from optimum.onnxruntime import ORTConfig, ORTOptimizer, ORTQuantizer from optimum.onnxruntime.configuration import ( AutoCalibrationConfig, AutoQuantizationConfig, @@ -68,7 +66,7 @@ def test_optimize(self): ) validate_model_outputs( optimizer._onnx_config, - optimizer.tokenizer, + optimizer.preprocessor, optimizer.model, optimized_model_path, list(optimizer._onnx_config.outputs.keys()), @@ -128,7 +126,7 @@ def test_dynamic_quantization(self): ) validate_model_outputs( quantizer._onnx_config, - quantizer.tokenizer, + quantizer.preprocessor, quantizer.model, q8_model_path, list(quantizer._onnx_config.outputs.keys()), @@ -163,7 +161,7 @@ def preprocess_function(examples, tokenizer): calibration_dataset = quantizer.get_calibration_dataset( "glue", dataset_config_name="sst2", - preprocess_function=partial(preprocess_function, tokenizer=quantizer.tokenizer), + preprocess_function=partial(preprocess_function, tokenizer=quantizer.preprocessor), num_samples=40, dataset_split="train", ) @@ -181,7 +179,7 @@ def preprocess_function(examples, tokenizer): ) validate_model_outputs( quantizer._onnx_config, - quantizer.tokenizer, + quantizer.preprocessor, quantizer.model, q8_model_path, list(quantizer._onnx_config.outputs.keys()), From 0d4616b356e55782d01f7bc7118d1f510931c460 Mon Sep 17 00:00:00 2001 From: regisss Date: Tue, 21 Jun 2022 15:27:16 +0200 Subject: [PATCH 3/6] Various small improvements --- optimum/onnxruntime/modeling_ort.py | 48 +++++++++++++------------- setup.py | 2 +- tests/onnxruntime/test_modeling_ort.py | 14 ++++---- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 04be5713c0..1a0a66c3e8 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -255,7 +255,7 @@ def _from_transformers( return cls._from_pretrained(save_dir.as_posix(), **kwargs) -FEAUTRE_EXTRACTION_SAMPLE = r""" +FEAUTRE_EXTRACTION_EXAMPLE = r""" Example of feature extraction: ```python @@ -304,14 +304,14 @@ class ORTModelForFeatureExtraction(ORTModel): pipeline_task = "default" auto_model_class = AutoModel - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + FEAUTRE_EXTRACTION_SAMPLE.format( + + FEAUTRE_EXTRACTION_EXAMPLE.format( processor_class=_TOKENIZER_FOR_DOC, model_class="ORTModelForFeatureExtraction", checkpoint="optimum/all-MiniLM-L6-v2", @@ -339,7 +339,7 @@ def forward( ) -QUESTION_ANSWERING_SAMPLE = r""" +QUESTION_ANSWERING_EXAMPLE = r""" Example of question answering: ```python @@ -390,14 +390,14 @@ class ORTModelForQuestionAnswering(ORTModel): pipeline_task = "question-answering" auto_model_class = AutoModelForQuestionAnswering - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + QUESTION_ANSWERING_SAMPLE.format( + + QUESTION_ANSWERING_EXAMPLE.format( processor_class=_TOKENIZER_FOR_DOC, model_class="ORTModelForQuestionAnswering", checkpoint="optimum/roberta-base-squad2", @@ -426,7 +426,7 @@ def forward( ) -SEQUENCE_CLASSIFICATION_SAMPLE = r""" +SEQUENCE_CLASSIFICATION_EXAMPLE = r""" Example of single-label classification: ```python @@ -491,15 +491,15 @@ class ORTModelForSequenceClassification(ORTModel): pipeline_task = "sequence-classification" auto_model_class = AutoModelForSequenceClassification - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} self.model_inputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_inputs())} @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + SEQUENCE_CLASSIFICATION_SAMPLE.format( + + SEQUENCE_CLASSIFICATION_EXAMPLE.format( processor_class=_TOKENIZER_FOR_DOC, model_class="ORTModelForSequenceClassification", checkpoint="optimum/distilbert-base-uncased-finetuned-sst-2-english", @@ -528,7 +528,7 @@ def forward( ) -TOKEN_CLASSIFICATION_SAMPLE = r""" +TOKEN_CLASSIFICATION_EXAMPLE = r""" Example of token classification: ```python @@ -578,14 +578,14 @@ class ORTModelForTokenClassification(ORTModel): pipeline_task = "token-classification" auto_model_class = AutoModelForTokenClassification - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + TOKEN_CLASSIFICATION_SAMPLE.format( + + TOKEN_CLASSIFICATION_EXAMPLE.format( processor_class=_TOKENIZER_FOR_DOC, model_class="ORTModelForTokenClassification", checkpoint="optimum/bert-base-NER", @@ -613,7 +613,7 @@ def forward( ) -TEXT_GENERATION_SAMPLE = r""" +TEXT_GENERATION_EXAMPLE = r""" Example of text generation: ```python @@ -662,8 +662,8 @@ class ORTModelForCausalLM(ORTModel, GenerationMixin): pipeline_task = "causal-lm" auto_model_class = AutoModelForCausalLM - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.main_input_name = "input_ids" self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} @@ -687,7 +687,7 @@ def prepare_inputs_for_generation(self, input_ids: torch.LongTensor, **kwargs) - @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + TEXT_GENERATION_SAMPLE.format( + + TEXT_GENERATION_EXAMPLE.format( processor_class=_TOKENIZER_FOR_DOC, model_class="ORTModelForCausalLM", checkpoint="optimum/gpt2", @@ -735,7 +735,7 @@ def _prepare_attention_mask_for_generation( return torch.ones(inputs.shape[:2], dtype=torch.long, device=inputs.device) -IMAGE_CLASSIFICATION_SAMPLE = r""" +IMAGE_CLASSIFICATION_EXAMPLE = r""" Example of image classification: ```python @@ -789,14 +789,14 @@ class ORTModelForImageClassification(ORTModel): pipeline_task = "image-classification" auto_model_class = AutoModelForImageClassification - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, model=None, config=None, **kwargs): + super().__init__(model, config, **kwargs) # create {name:idx} dict for model outputs self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())} @add_start_docstrings_to_model_forward( ONNX_INPUTS_DOCSTRING.format("batch_size, sequence_length") - + FEAUTRE_EXTRACTION_SAMPLE.format( + + FEAUTRE_EXTRACTION_EXAMPLE.format( processor_class=_FEATURE_EXTRACTOR_FOR_DOC, model_class="ORTModelForImageClassification", checkpoint="optimum/vit-base-patch16-224", diff --git a/setup.py b/setup.py index 7a45388bb7..54f4eeee1e 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ "huggingface_hub>=0.4.0", ] -TESTS_REQUIRE = ["pytest", "requests", "parameterized", "pytest-xdist"] +TESTS_REQUIRE = ["pytest", "requests", "parameterized", "pytest-xdist", "Pillow"] QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.5.4"] diff --git a/tests/onnxruntime/test_modeling_ort.py b/tests/onnxruntime/test_modeling_ort.py index a1ff61b60f..ef1d77f3bb 100644 --- a/tests/onnxruntime/test_modeling_ort.py +++ b/tests/onnxruntime/test_modeling_ort.py @@ -124,7 +124,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForQuestionAnswering.from_pretrained("t5-small") + model = ORTModelForQuestionAnswering.from_pretrained("t5-small", from_transformers=True) self.assertTrue("Unrecognized configuration class", context.exception) @@ -203,7 +203,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForSequenceClassification.from_pretrained("t5-small") + model = ORTModelForSequenceClassification.from_pretrained("t5-small", from_transformers=True) self.assertTrue("Unrecognized configuration class", context.exception) @@ -290,7 +290,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForTokenClassification.from_pretrained("t5-small") + model = ORTModelForTokenClassification.from_pretrained("t5-small", from_transformers=True) self.assertTrue("Unrecognized configuration class", context.exception) @@ -361,7 +361,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForFeatureExtraction.from_pretrained("google/vit-base-patch16-224") + model = ORTModelForFeatureExtraction.from_pretrained("google/vit-base-patch16-224", from_transformers=True) self.assertTrue("Unrecognized configuration class", context.exception) @@ -423,7 +423,7 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224") + model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224", from_transformers=True) self.assertTrue("Unrecognized configuration class", context.exception) @@ -515,7 +515,9 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForImageClassification.from_pretrained("facebook/convnext-tiny-224") + model = ORTModelForImageClassification.from_pretrained( + "facebook/convnext-tiny-224", from_transformers=True + ) self.assertTrue("Unrecognized configuration class", context.exception) From e0337ac0a4d62b626ac1b61dd002b51cb581f85b Mon Sep 17 00:00:00 2001 From: regisss Date: Tue, 21 Jun 2022 18:19:51 +0200 Subject: [PATCH 4/6] Fix tests --- docs/source/pipelines.mdx | 5 ++--- tests/onnxruntime/test_modeling_ort.py | 20 ++++++-------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/docs/source/pipelines.mdx b/docs/source/pipelines.mdx index 3cf4ec16c0..f4b135cb4a 100644 --- a/docs/source/pipelines.mdx +++ b/docs/source/pipelines.mdx @@ -12,8 +12,7 @@ specific language governing permissions and limitations under the License. # Optimum pipelines for inference -The [`~pipelines.pipeline`] function makes it simple to use models from the [Model Hub](https://huggingface.co/models) for accelerated inference on a variety of tasks such as question answering or image classification. -Even if you don't have experience with a specific modality or understand the code powering the models, you can still use them with the [`~pipelines.pipeline`] function! +The [`~pipelines.pipeline`] function makes it simple to use models from the [Model Hub](https://huggingface.co/models) for accelerated inference on a variety of tasks such as text classification, question answering and image classification. @@ -199,7 +198,7 @@ Below you can find two examples on how you could [`~onnxruntime.ORTOptimizer`] a ## Transformers pipeline usage The [`~pipelines.pipeline`] function is just a light wrapper around the `transformers.pipeline` function to enable checks for supported tasks and additional features -, like quantization and optimization. This being said you can use the `transformers.pipeline` and just replace your `AutoFor*` with the optimum +, like quantization and optimization. This being said you can use the `transformers.pipeline` and just replace your `AutoModelFor*` with the optimum `ORTModelFor*` class. ```diff diff --git a/tests/onnxruntime/test_modeling_ort.py b/tests/onnxruntime/test_modeling_ort.py index ef1d77f3bb..a73c87aa72 100644 --- a/tests/onnxruntime/test_modeling_ort.py +++ b/tests/onnxruntime/test_modeling_ort.py @@ -126,7 +126,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForQuestionAnswering.from_pretrained("t5-small", from_transformers=True) - self.assertTrue("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", context.exception) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -205,7 +205,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForSequenceClassification.from_pretrained("t5-small", from_transformers=True) - self.assertTrue("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", context.exception) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_forward_call(self, *args, **kwargs): @@ -292,7 +292,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForTokenClassification.from_pretrained("t5-small", from_transformers=True) - self.assertTrue("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", context.exception) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -359,12 +359,6 @@ def test_supported_transformers_architectures(self, *args, **kwargs): self.assertIsInstance(model.model, onnxruntime.capi.onnxruntime_inference_collection.InferenceSession) self.assertIsInstance(model.config, PretrainedConfig) - def test_load_vanilla_transformers_which_is_not_supported(self): - with self.assertRaises(Exception) as context: - model = ORTModelForFeatureExtraction.from_pretrained("google/vit-base-patch16-224", from_transformers=True) - - self.assertTrue("Unrecognized configuration class", context.exception) - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): model_arch, model_id = args @@ -425,7 +419,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224", from_transformers=True) - self.assertTrue("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", context.exception) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -515,11 +509,9 @@ def test_supported_transformers_architectures(self, *args, **kwargs): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - model = ORTModelForImageClassification.from_pretrained( - "facebook/convnext-tiny-224", from_transformers=True - ) + model = ORTModelForImageClassification.from_pretrained("t5-small", from_transformers=True) - self.assertTrue("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", context.exception) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_forward_call(self, *args, **kwargs): From 2cc24bfd11ce09ea1326f31723ed92d554466f09 Mon Sep 17 00:00:00 2001 From: regisss Date: Tue, 21 Jun 2022 19:04:12 +0200 Subject: [PATCH 5/6] Fix tests --- tests/onnxruntime/test_modeling_ort.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/onnxruntime/test_modeling_ort.py b/tests/onnxruntime/test_modeling_ort.py index a73c87aa72..2e5e110bb8 100644 --- a/tests/onnxruntime/test_modeling_ort.py +++ b/tests/onnxruntime/test_modeling_ort.py @@ -126,7 +126,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForQuestionAnswering.from_pretrained("t5-small", from_transformers=True) - self.assertIn("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -205,7 +205,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForSequenceClassification.from_pretrained("t5-small", from_transformers=True) - self.assertIn("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_forward_call(self, *args, **kwargs): @@ -292,7 +292,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForTokenClassification.from_pretrained("t5-small", from_transformers=True) - self.assertIn("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -419,7 +419,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForCausalLM.from_pretrained("google/vit-base-patch16-224", from_transformers=True) - self.assertIn("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_call(self, *args, **kwargs): @@ -511,7 +511,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: model = ORTModelForImageClassification.from_pretrained("t5-small", from_transformers=True) - self.assertIn("Unrecognized configuration class", context.exception) + self.assertIn("Unrecognized configuration class", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items()) def test_model_forward_call(self, *args, **kwargs): From 7e387cf07de1966e606656f29943edc8f9432a72 Mon Sep 17 00:00:00 2001 From: regisss Date: Wed, 22 Jun 2022 18:06:42 +0200 Subject: [PATCH 6/6] Make style --- tests/onnxruntime/test_modeling_ort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling_ort.py b/tests/onnxruntime/test_modeling_ort.py index 9fff9c9732..4fab52d4c3 100644 --- a/tests/onnxruntime/test_modeling_ort.py +++ b/tests/onnxruntime/test_modeling_ort.py @@ -16,8 +16,8 @@ PretrainedConfig, pipeline, ) -from transformers.testing_utils import require_torch_gpu from transformers.onnx.utils import get_preprocessor +from transformers.testing_utils import require_torch_gpu import onnxruntime import requests