🚀 Add FUVAS Video Anomaly Detection Model (#2654)

samet-akcay · jiaxian1-intel · Jiaxiang Jiang · web-flow · commit 4f4fd4339213 · 2025-04-10T16:19:34.000+01:00
* 🚀 Add `FUVAS` Video Anomaly Detection Model (#2627) * fuvas integrated * fuvas integrated with changemd * Update CHANGELOG.md Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Rename fuvas to Fuvas Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Address linting errors in feature extractor Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Fix the linting errors in torch model Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Update docstrings Signed-off-by: Samet Akcay <samet.akcay@intel.com> * use torchvision feature extractor * fuvas readme and change np to torch * fuvas readme modify * update readme and python annotation * add fvcore dependencies * test model change --------- Signed-off-by: Samet Akcay <samet.akcay@intel.com> Co-authored-by: Jiaxiang Jiang <jiaxian1@intel.com> Co-authored-by: Samet Akcay <samet.akcay@intel.com> * 🚀 Add FUVAS Video Anomaly Detection Model (#2652) * Fix pre-commit Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Skip video anomaly detection models for now Signed-off-by: Samet Akcay <samet.akcay@intel.com> --------- Signed-off-by: Samet Akcay <samet.akcay@intel.com> * 🔒 Fix bandit and semgrep issues of Fuvas video anomaly detection model. (#2655) * Address bandit and semgrep issues Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Address bandit and semgrep issues Signed-off-by: Samet Akcay <samet.akcay@intel.com> * Update src/anomalib/models/__init__.py Co-authored-by: Alexander Barabanov <97449232+AlexanderBarabanov@users.noreply.github.com> Signed-off-by: Samet Akcay <samet.akcay@intel.com> --------- Signed-off-by: Samet Akcay <samet.akcay@intel.com> Co-authored-by: Alexander Barabanov <97449232+AlexanderBarabanov@users.noreply.github.com> * 🗑️ Remove `TaskType` from `FUVAS` Video Anomaly Detection Model (#2658) remove task type from fuvas Signed-off-by: Samet Akcay <samet.akcay@intel.com> --------- Signed-off-by: Samet Akcay <samet.akcay@intel.com> Co-authored-by: Jiaxiang Jiang <123607553+jiaxian1-intel@users.noreply.github.com> Co-authored-by: Jiaxiang Jiang <jiaxian1@intel.com> Co-authored-by: Alexander Barabanov <97449232+AlexanderBarabanov@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+🚀 Add new SOTA video Anomaly detection module FUVAS
+
 - 🚀 Add VAD dataset by @abc-125 in https://github.com/openvinotoolkit/anomalib/pull/2603
 
 ### Removed
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,6 +56,7 @@ core = [
     #   torch.onnx.errors.UnsupportedOperatorError: Exporting the operator
     #   'aten::_native_multi_head_attention' to ONNX opset version 14 is not supported
     "open-clip-torch>=2.23.0,<2.26.1",
+    "fvcore",
 ]
 openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"]
 vlm = ["ollama>=0.4.0", "openai", "python-dotenv","transformers"]
diff --git a/src/anomalib/models/__init__.py b/src/anomalib/models/__init__.py
@@ -75,7 +75,15 @@
     VlmAd,
     WinClip,
 )
-from .video import AiVad
+from .video import AiVad, Fuvas
+
+# Whitelist of allowed modules for dynamic imports
+ALLOWED_MODULES = {
+    "anomalib.models",
+    "anomalib.models.image",
+    "anomalib.models.video",
+    "anomalib.models.components",
+}
 
 
 class UnknownModelError(ModuleNotFoundError):
@@ -103,6 +111,7 @@ class UnknownModelError(ModuleNotFoundError):
     "VlmAd",
     "WinClip",
     "AiVad",
+    "Fuvas",
 ]
 
 logger = logging.getLogger(__name__)
@@ -262,7 +271,19 @@ def get_model(model: DictConfig | str | dict | Namespace, *args, **kwdargs) -> A
             model = OmegaConf.create(model)
         try:
             if len(model.class_path.split(".")) > 1:
-                module = import_module(".".join(model.class_path.split(".")[:-1]))
+                # Security check: Only allow imports from whitelisted modules
+                module_path = ".".join(model.class_path.split(".")[:-1])
+                if module_path not in ALLOWED_MODULES:
+                    logger.error(
+                        f"Module import from '{module_path}' is not allowed. "
+                        f"Only imports from {ALLOWED_MODULES} are permitted.",
+                    )
+                    msg = f"Module import from '{module_path}' is not allowed."
+                    raise UnknownModelError(msg)
+
+                # Use a whitelist approach to prevent arbitrary code execution
+                # nosemgrep: python.lang.security.audit.non-literal-import.non-literal-import
+                module = import_module(module_path)
             else:
                 module = import_module("anomalib.models")
         except ModuleNotFoundError as exception:
diff --git a/src/anomalib/models/video/__init__.py b/src/anomalib/models/video/__init__.py
@@ -27,9 +27,10 @@
     - :class:`AiVad`: AI-based Video Anomaly Detection
 """
 
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 from .ai_vad import AiVad
+from .fuvas import Fuvas
 
-__all__ = ["AiVad"]
+__all__ = ["AiVad", "Fuvas"]
diff --git a/src/anomalib/models/video/fuvas/README.md b/src/anomalib/models/video/fuvas/README.md
@@ -0,0 +1,48 @@
+# [ICASSP 2025] FUVAS: Few-shot Unsupervised Video Anomaly Segmentation via Low-Rank Factorization of Spatio-Temporal Features
+
+## 📝 Description
+
+This folder contains the FUVAS video anomaly detection model which can support both transformer based backbone and CNN based backbone
+
+## 💡 Examples
+
+The following example shows how to use the FUVAS model to train on the ucsdped dataset.
+
+<summary>Training the Fuvas model on UCSDped video dataset</summary>
+
+```python
+# Import the necessary modules
+from anomalib.data import UCSDped
+from anomalib.models import Fuvas
+from anomalib.engine import Engine
+
+# Load the ucsdped dataset, model and engine.
+datamodule = UCSDped()
+model = Fuvas()
+engine = Engine()
+
+# Train the model
+engine.train(model, datamodule)
+```
+
+## Example running output
+
+| Test metric  | DataLoader 0       |
+| ------------ | ------------------ |
+| frame_AUROC  | 0.9135797023773193 |
+| mean_F1Score | 0.9350237846374512 |
+| pixel_AUROC  | 0.9756277996063232 |
+
+<section class="section" id="BibTeX">
+  <div class="container is-max-desktop content">
+    <h2 class="title">Citation</h2>
+    <pre><code>@inproceedings{icassp2025fuvas,
+  title={FUVAS: Few-shot Unsupervised Video Anomaly Segmentation via Low-Rank Factorization of Spatio-Temporal Features},
+  author={Jiang, Jiaxiang and Ndiour, Ibrahima J and Subedar, Mahesh and Tickoo, Omesh},
+  booktitle={ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  pages={1--5},
+  year={2025},
+  organization={IEEE}
+}</code></pre>
+  </div>
+</section>
diff --git a/src/anomalib/models/video/fuvas/__init__.py b/src/anomalib/models/video/fuvas/__init__.py
@@ -0,0 +1,29 @@
+"""FUVAS: Few-shot Unsupervised Video Anomaly Segmentation via Low-Rank Factorization of Spatio-Temporal Features.
+
+The FUVAS model extracts deep features from video clips using a pre-trained 3D CNN/transformer
+backbone and fits a PCA-based reconstruction model to detect anomalies. The model computes
+feature reconstruction errors to identify anomalous frames and regions in videos.
+
+Example:
+    >>> from anomalib.models.video import Fuvas
+    >>> model = Fuvas(
+    ...     backbone="x3d_s",
+    ...     layer="blocks.4"
+    ... )
+
+The model can be used with video anomaly detection datasets supported in anomalib.
+
+Notes:
+    The model implementation is available in the ``lightning_model`` module.
+
+See Also:
+    :class:`anomalib.models.video.fuvas.lightning_model.Fuvas`:
+        Lightning implementation of the FUVAS model.
+"""
+
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from .lightning_model import Fuvas
+
+__all__ = ["Fuvas"]
diff --git a/src/anomalib/models/video/fuvas/lightning_model.py b/src/anomalib/models/video/fuvas/lightning_model.py
@@ -0,0 +1,195 @@
+"""FUVAS: Few-shot Unsupervised Video Anomaly Segmentation via Low-Rank Factorization of Spatio-Temporal Features.
+
+This module provides a PyTorch Lightning implementation of the FUVAS model for
+video anomaly detection and segmentation. The model extracts deep features from video clips
+using a pre-trained 3D CNN/transformer backbone and fits a PCA-based reconstruction model
+to detect anomalies.
+
+Paper: https://ieeexplore.ieee.org/abstract/document/10887597
+
+Example:
+    >>> from anomalib.models.video import fuvas
+    >>> model = fuvas(
+    ...     backbone="x3d_s",
+    ...     layer="blocks.4",
+    ...     pre_trained=True
+    ... )
+
+Notes:
+    The model uses a pre-trained backbone to extract features and fits a PCA
+    transformation during training. No gradient updates are performed on the backbone.
+    Anomaly detection is based on feature reconstruction error.
+
+See Also:
+    :class:`anomalib.models.video.fuvas.torch_model.FUVASModel`:
+        PyTorch implementation of the FUVAS model.
+"""
+
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from typing import Any
+
+import torch
+from lightning.pytorch.utilities.types import STEP_OUTPUT
+
+from anomalib import LearningType
+from anomalib.data import Batch
+from anomalib.metrics import Evaluator
+from anomalib.models.components import AnomalibModule, MemoryBankMixin
+from anomalib.post_processing import PostProcessor
+from anomalib.pre_processing import PreProcessor
+from anomalib.visualization import Visualizer
+
+from .torch_model import FUVASModel
+
+logger = logging.getLogger(__name__)
+
+
+class Fuvas(MemoryBankMixin, AnomalibModule):
+    """FUVAS Lightning Module.
+
+    Args:
+        backbone (str): Name of the backbone 3D CNN/transformer network.
+            Defaults to ``"x3d_s"``.
+        layer (str): Name of the layer to extract features from the backbone.
+            Defaults to ``"blocks.4"``.
+        pre_trained (bool, optional): Whether to use a pre-trained backbone.
+            Defaults to ``True``.
+        spatial_pool (bool, optional): Whether to use spatial pooling on features.
+            Defaults to ``True``.
+        pooling_kernel_size (int, optional): Kernel size for pooling features.
+            Defaults to ``1``.
+        pca_level (float, optional): Ratio of variance to preserve in PCA.
+            Must be between 0 and 1.
+            Defaults to ``0.98``.
+        pre_processor (PreProcessor | bool, optional): Pre-processor to use.
+            If ``True``, uses the default pre-processor.
+            If ``False``, no pre-processing is performed.
+            Defaults to ``True``.
+        post_processor (PostProcessor | bool, optional): Post-processor to use.
+            If ``True``, uses the default post-processor.
+            If ``False``, no post-processing is performed.
+            Defaults to ``True``.
+        evaluator (Evaluator | bool, optional): Evaluator to use.
+            If ``True``, uses the default evaluator.
+            If ``False``, no evaluation is performed.
+            Defaults to ``True``.
+        visualizer (Visualizer | bool, optional): Visualizer to use.
+            If ``True``, uses the default visualizer.
+            If ``False``, no visualization is performed.
+            Defaults to ``True``.
+    """
+
+    def __init__(
+        self,
+        backbone: str = "x3d_s",
+        layer: str = "blocks.4",
+        pre_trained: bool = True,
+        spatial_pool: bool = True,
+        pooling_kernel_size: int = 1,
+        pca_level: float = 0.98,
+        pre_processor: PreProcessor | bool = True,
+        post_processor: PostProcessor | bool = True,
+        evaluator: Evaluator | bool = True,
+        visualizer: Visualizer | bool = True,
+    ) -> None:
+        super().__init__(
+            pre_processor=pre_processor,
+            post_processor=post_processor,
+            evaluator=evaluator,
+            visualizer=visualizer,
+        )
+
+        self.model: FUVASModel = FUVASModel(
+            backbone=backbone,
+            pre_trained=pre_trained,
+            layer=layer,
+            pooling_kernel_size=pooling_kernel_size,
+            n_comps=pca_level,
+            spatial_pool=spatial_pool,
+        )
+        self.embeddings: list[torch.Tensor] = []
+
+    @staticmethod
+    def configure_optimizers() -> None:  # pylint: disable=arguments-differ
+        """Configure optimizers for training.
+
+        Returns:
+            None: FUVAS doesn't require optimization.
+        """
+        return
+
+    def training_step(self, batch: Batch, *args, **kwargs) -> torch.Tensor:
+        """Extract features from the input batch during training.
+
+        Args:
+            batch (Batch): Input batch containing video clips.
+            *args: Additional positional arguments (unused).
+            **kwargs: Additional keyword arguments (unused).
+
+        Returns:
+            torch.Tensor: Dummy loss tensor for compatibility.
+        """
+        del args, kwargs  # These variables are not used.
+
+        # Ensure batch.image is a tensor
+        if batch.image is None or not isinstance(batch.image, torch.Tensor):
+            msg = "Expected batch.image to be a tensor, but got None or non-tensor type"
+            raise ValueError(msg)
+
+        embedding = self.model.get_features(batch.image)[0].squeeze()
+        self.embeddings.append(embedding)
+
+        # Return a dummy loss tensor
+        return torch.tensor(0.0, requires_grad=True, device=self.device)
+
+    def fit(self) -> None:
+        """Fit the PCA transformation to the embeddings.
+
+        The method aggregates embeddings collected during training and fits
+        the PCA transformation used for anomaly scoring.
+        """
+        logger.info("Aggregating the embedding extracted from the training set.")
+        embeddings = torch.vstack(self.embeddings)
+
+        logger.info("Fitting a PCA to dataset.")
+        self.model.fit(embeddings)
+
+    def validation_step(self, batch: Batch, *args, **kwargs) -> STEP_OUTPUT:
+        """Compute predictions for the input batch during validation.
+
+        Args:
+            batch (Batch): Input batch containing video clips.
+            *args: Additional positional arguments (unused).
+            **kwargs: Additional keyword arguments (unused).
+
+        Returns:
+            STEP_OUTPUT: Dictionary containing anomaly scores and maps.
+        """
+        del args, kwargs  # These variables are not used.
+
+        predictions = self.model(batch.image)
+        return batch.update(pred_score=predictions.pred_score, anomaly_map=predictions.anomaly_map)
+
+    @property
+    def trainer_arguments(self) -> dict[str, Any]:
+        """Get FUVAS-specific trainer arguments.
+
+        Returns:
+            dict[str, Any]: Dictionary of trainer arguments:
+                - ``gradient_clip_val`` (int): Disable gradient clipping
+                - ``max_epochs`` (int): Train for one epoch only
+                - ``num_sanity_val_steps`` (int): Skip validation sanity checks
+        """
+        return {"gradient_clip_val": 0, "max_epochs": 1, "num_sanity_val_steps": 0}
+
+    @property
+    def learning_type(self) -> LearningType:
+        """Get the learning type of the model.
+
+        Returns:
+            LearningType: The model uses one-class learning.
+        """
+        return LearningType.ONE_CLASS
diff --git a/src/anomalib/models/video/fuvas/torch_model.py b/src/anomalib/models/video/fuvas/torch_model.py
diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@ core = [`
`56`	`56`	`# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator`
`57`	`57`	`# 'aten::_native_multi_head_attention' to ONNX opset version 14 is not supported`
`58`	`58`	`"open-clip-torch>=2.23.0,<2.26.1",`
	`59`	`+ "fvcore",`
`59`	`60`	`]`
`60`	`61`	`openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"]`
`61`	`62`	`vlm = ["ollama>=0.4.0", "openai", "python-dotenv","transformers"]`