Merge branch 'main' into fix/instance-segmentation-post-processing

PawelPeczek-Roboflow · web-flow · commit c99725ef46c5 · 2025-07-24T09:40:43.000+02:00
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1
@@ -85,34 +85,34 @@ COPY Makefile Makefile
 RUN make create_inference_cli_whl PYTHON=python3.9
 RUN python3.9 -m pip install dist/inference_cli*.whl
 
-ENV VERSION_CHECK_MODE=continuous
-ENV PROJECT=roboflow-platform
-ENV ORT_TENSORRT_FP16_ENABLE=1
-ENV ORT_TENSORRT_ENGINE_CACHE_ENABLE=1
-ENV PROJECT=roboflow-platform
-ENV NUM_WORKERS=1
-ENV HOST=0.0.0.0
-ENV PORT=9001
-ENV OPENBLAS_CORETYPE=ARMV8
-ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
-ENV API_LOGGING_ENABLED=True
-ENV RUNS_ON_JETSON=True
-ENV ENABLE_PROMETHEUS=True
-ENV ENABLE_STREAM_API=True
-ENV STREAM_API_PRELOADED_PROCESSES=2
-
-ENV CORE_MODEL_GAZE_ENABLED=False
-ENV CORE_MODEL_OWLV2_ENABLED=False
-ENV CORE_MODEL_PE_ENABLED=False
-ENV CORE_MODEL_SAM_ENABLED=False
-ENV CORE_MODEL_SAM2_ENABLED=False
-ENV CORE_MODEL_TROCR_ENABLED=False
-ENV DEPTH_ESTIMATION_ENABLED=False
-ENV FLORENCE2_ENABLED=False
-ENV MOONDREAM2_ENABLED=False
-ENV PALIGEMMA_ENABLED=False
-ENV QWEN_2_5_ENABLED=False
-ENV SMOLVLM2_ENABLED=False
+ENV VERSION_CHECK_MODE=continuous \
+    PROJECT=roboflow-platform \
+    ORT_TENSORRT_FP16_ENABLE=1 \
+    ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \
+    PROJECT=roboflow-platform \
+    NUM_WORKERS=1 \
+    HOST=0.0.0.0 \
+    PORT=9001 \
+    OPENBLAS_CORETYPE=ARMV8 \
+    WORKFLOWS_STEP_EXECUTION_MODE=local \
+    WORKFLOWS_MAX_CONCURRENT_STEPS=2 \
+    API_LOGGING_ENABLED=True \
+    RUNS_ON_JETSON=True \
+    ENABLE_PROMETHEUS=True \
+    ENABLE_STREAM_API=True \
+    STREAM_API_PRELOADED_PROCESSES=2 \
+    CORE_MODEL_GAZE_ENABLED=False \
+    CORE_MODEL_OWLV2_ENABLED=False \
+    CORE_MODEL_PE_ENABLED=False \
+    CORE_MODEL_SAM_ENABLED=False \
+    CORE_MODEL_SAM2_ENABLED=False \
+    CORE_MODEL_TROCR_ENABLED=False \
+    DEPTH_ESTIMATION_ENABLED=False \
+    FLORENCE2_ENABLED=False \
+    MOONDREAM2_ENABLED=False \
+    PALIGEMMA_ENABLED=False \
+    QWEN_2_5_ENABLED=False \
+    SMOLVLM2_ENABLED=False \
+    PYTHONPATH=/app:$PYTHONPATH
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1
@@ -76,24 +76,25 @@ COPY Makefile Makefile
 RUN make create_inference_cli_whl PYTHON=python3.9
 RUN python3.9 -m pip install dist/inference_cli*.whl
 
-ENV VERSION_CHECK_MODE=continuous
-ENV PROJECT=roboflow-platform
-ENV ORT_TENSORRT_FP16_ENABLE=1
-ENV ORT_TENSORRT_ENGINE_CACHE_ENABLE=1
-ENV CORE_MODEL_SAM_ENABLED=False
-ENV PROJECT=roboflow-platform
-ENV NUM_WORKERS=1
-ENV HOST=0.0.0.0
-ENV PORT=9001
-ENV OPENBLAS_CORETYPE=ARMV8 
-ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
-ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
-ENV API_LOGGING_ENABLED=True
-ENV CORE_MODEL_TROCR_ENABLED=false
-ENV RUNS_ON_JETSON=True
-ENV ENABLE_PROMETHEUS=True
-ENV ENABLE_STREAM_API=True
-ENV STREAM_API_PRELOADED_PROCESSES=2
+ENV VERSION_CHECK_MODE=continuous \
+    PROJECT=roboflow-platform \
+    ORT_TENSORRT_FP16_ENABLE=1 \
+    ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \
+    CORE_MODEL_SAM_ENABLED=False \
+    PROJECT=roboflow-platform \
+    NUM_WORKERS=1 \
+    HOST=0.0.0.0 \
+    PORT=9001 \
+    OPENBLAS_CORETYPE=ARMV8 \
+    LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \
+    WORKFLOWS_STEP_EXECUTION_MODE=local \
+    WORKFLOWS_MAX_CONCURRENT_STEPS=2 \
+    API_LOGGING_ENABLED=True \
+    CORE_MODEL_TROCR_ENABLED=false \
+    RUNS_ON_JETSON=True \
+    ENABLE_PROMETHEUS=True \
+    ENABLE_STREAM_API=True \
+    STREAM_API_PRELOADED_PROCESSES=2 \
+    PYTHONPATH=/app:$PYTHONPATH
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1.stream_manager b/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1.stream_manager
@@ -65,17 +65,18 @@ COPY Makefile Makefile
 RUN make create_inference_cli_whl PYTHON=python3.9
 RUN python3.9 -m pip install dist/inference_cli*.whl
 
-ENV ORT_TENSORRT_FP16_ENABLE=1
-ENV ORT_TENSORRT_ENGINE_CACHE_ENABLE=1
-ENV CORE_MODEL_SAM_ENABLED=False
-ENV OPENBLAS_CORETYPE=ARMV8 
-ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1:/usr/local/lib/python3.8/dist-packages/torch.libs/libgomp-d22c30c5.so.1.0.0
-ENV VERSION_CHECK_MODE=continuous
-ENV PROJECT=roboflow-platform
-ENV HOST=0.0.0.0
-ENV PORT=7070
-ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
-ENV SUPERVISON_DEPRECATION_WARNING=0
+ENV ORT_TENSORRT_FP16_ENABLE=1 \
+    ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \
+    CORE_MODEL_SAM_ENABLED=False \
+    OPENBLAS_CORETYPE=ARMV8 \
+    LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1:/usr/local/lib/python3.8/dist-packages/torch.libs/libgomp-d22c30c5.so.1.0.0 \
+    VERSION_CHECK_MODE=continuous \
+    PROJECT=roboflow-platform \
+    HOST=0.0.0.0 \
+    PORT=7070 \
+    WORKFLOWS_STEP_EXECUTION_MODE=local \
+    WORKFLOWS_MAX_CONCURRENT_STEPS=1 \
+    SUPERVISON_DEPRECATION_WARNING=0 \
+    PYTHONPATH=/app:$PYTHONPATH
 
 ENTRYPOINT ["python3.9", "-m", "inference.enterprise.stream_management.manager.app"]
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0
@@ -73,7 +73,8 @@ ENV VERSION_CHECK_MODE=continuous \
     RUNS_ON_JETSON=True \
     ENABLE_PROMETHEUS=True \
     ENABLE_STREAM_API=True \
-    STREAM_API_PRELOADED_PROCESSES=2
+    STREAM_API_PRELOADED_PROCESSES=2 \
+    PYTHONPATH=/app:$PYTHONPATH
 
 # Expose the application port
 EXPOSE 9001
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0
@@ -78,7 +78,8 @@ ENV VERSION_CHECK_MODE=continuous \
     RUNS_ON_JETSON=True \
     ENABLE_PROMETHEUS=True \
     ENABLE_STREAM_API=True \
-    STREAM_API_PRELOADED_PROCESSES=2
+    STREAM_API_PRELOADED_PROCESSES=2 \
+    PYTHONPATH=/app:$PYTHONPATH
 
 # Expose the application port
 EXPOSE 9001
diff --git a/inference/core/constants.py b/inference/core/constants.py
@@ -2,3 +2,4 @@
 OBJECT_DETECTION_TASK = "object-detection"
 INSTANCE_SEGMENTATION_TASK = "instance-segmentation"
 KEYPOINTS_DETECTION_TASK = "keypoint-detection"
+PROCESSING_TIME_HEADER = "X-Processing-Time"
diff --git a/inference/core/interfaces/camera/video_source.py b/inference/core/interfaces/camera/video_source.py
@@ -550,21 +550,12 @@ def read_frame(self, timeout: Optional[float] = None) -> Optional[VideoFrame]:
             self._fps = source_metadata.source_properties.fps
             if not self._fps or self._fps <= 0 or self._fps > 1000:
                 self._fps = 30  # sane default
-        if not self._is_file:
-            current_timestamp = time.time_ns()
-            if (current_timestamp - self._last_frame_timestamp) / 1e9 < 1 / self._fps:
-                time.sleep(
-                    (1 / self._fps)
-                    - (current_timestamp - self._last_frame_timestamp) / 1e9
-                )
         video_frame: Optional[Union[VideoFrame, str]] = get_from_queue(
             queue=self._frames_buffer,
             on_successful_read=self._video_consumer.notify_frame_consumed,
             timeout=timeout,
             purge=self._buffer_consumption_strategy is BufferConsumptionStrategy.EAGER,
         )
-        if not self._is_file:
-            self._last_frame_timestamp = time.time_ns()
         if video_frame == POISON_PILL:
             raise EndOfStreamError(
                 "Attempted to retrieve frame from stream that already ended."
@@ -926,7 +917,7 @@ def _set_file_mode_buffering_strategies(self) -> None:
 
     def _set_stream_mode_buffering_strategies(self) -> None:
         if self._buffer_filling_strategy is None:
-            self._buffer_filling_strategy = BufferFillingStrategy.DROP_OLDEST
+            self._buffer_filling_strategy = BufferFillingStrategy.ADAPTIVE_DROP_OLDEST
 
     def _video_fps_should_be_sub_sampled(self) -> bool:
         if self._desired_fps is None:
diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
@@ -15,6 +15,7 @@
 from starlette.middleware.base import BaseHTTPMiddleware
 
 from inference.core import logger
+from inference.core.constants import PROCESSING_TIME_HEADER
 from inference.core.devices.utils import GLOBAL_INFERENCE_SERVER_ID
 from inference.core.entities.requests.clip import (
     ClipCompareRequest,
@@ -536,7 +537,7 @@ async def dispatch(self, request, call_next):
         t1 = time.time()
         response = await call_next(request)
         t2 = time.time()
-        response.headers["X-Processing-Time"] = str(t2 - t1)
+        response.headers[PROCESSING_TIME_HEADER] = str(t2 - t1)
         return response
 
 
@@ -625,6 +626,7 @@ async def on_shutdown():
                 allow_credentials=True,
                 allow_methods=["*"],
                 allow_headers=["*"],
+                expose_headers=[PROCESSING_TIME_HEADER],
             )
 
         # Optionally add middleware for profiling the FastAPI server and underlying inference API code
diff --git a/inference/core/interfaces/stream_manager/manager_app/inference_pipeline_manager.py b/inference/core/interfaces/stream_manager/manager_app/inference_pipeline_manager.py
@@ -280,8 +280,6 @@ def start_loop(loop: asyncio.AbstractEventLoop):
                     from_inference_queue=from_inference_queue,
                     asyncio_loop=loop,
                     webcam_fps=webcam_fps,
-                    max_consecutive_timeouts=parsed_payload.max_consecutive_timeouts,
-                    min_consecutive_on_time=parsed_payload.min_consecutive_on_time,
                     processing_timeout=parsed_payload.processing_timeout,
                     fps_probe_frames=parsed_payload.fps_probe_frames,
                     data_output=data_output,
diff --git a/inference/core/interfaces/stream_manager/manager_app/webrtc.py b/inference/core/interfaces/stream_manager/manager_app/webrtc.py
@@ -1,5 +1,6 @@
 import asyncio
 import concurrent.futures
+import datetime
 import json
 import time
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -62,10 +63,27 @@ def get_frame_from_workflow_output(
 ) -> Optional[np.ndarray]:
     step_output = workflow_output.get(frame_output_key)
     if isinstance(step_output, WorkflowImageData):
+        if (
+            step_output.video_metadata
+            and step_output.video_metadata.frame_timestamp is not None
+        ):
+            latency = (
+                datetime.datetime.now() - step_output.video_metadata.frame_timestamp
+            )
+            logger.info("Processing latency: %ss", latency.total_seconds())
         return step_output.numpy_image
     elif isinstance(step_output, dict):
         for frame_output in step_output.values():
             if isinstance(frame_output, WorkflowImageData):
+                if (
+                    frame_output.video_metadata
+                    and frame_output.video_metadata.frame_timestamp is not None
+                ):
+                    latency = (
+                        datetime.datetime.now()
+                        - frame_output.video_metadata.frame_timestamp
+                    )
+                    logger.info("Processing latency: %ss", latency.total_seconds())
                 return frame_output.numpy_image
 
 
@@ -77,8 +95,6 @@ def __init__(
         asyncio_loop: asyncio.AbstractEventLoop,
         processing_timeout: float,
         fps_probe_frames: int,
-        min_consecutive_on_time: int,
-        max_consecutive_timeouts: Optional[int] = None,
         webcam_fps: Optional[float] = None,
         *args,
         **kwargs,
@@ -104,10 +120,6 @@ def __init__(
         self.incoming_stream_fps: Optional[float] = webcam_fps
 
         self._last_frame: Optional[VideoFrame] = None
-        self._consecutive_timeouts: int = 0
-        self._consecutive_on_time: int = 0
-        self._max_consecutive_timeouts: Optional[int] = max_consecutive_timeouts
-        self._min_consecutive_on_time: int = min_consecutive_on_time
 
         self._av_logging_set: bool = False
 
@@ -146,7 +158,7 @@ async def recv(self):
 
         if not await self.to_inference_queue.async_full():
             await self.to_inference_queue.async_put(frame)
-        elif not self._last_frame:
+        else:
             await self.to_inference_queue.async_get_nowait()
             await self.to_inference_queue.async_put_nowait(frame)
 
@@ -157,55 +169,22 @@ async def recv(self):
             )
             new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
             self._last_frame = new_frame
-
-            if self._max_consecutive_timeouts:
-                self._consecutive_on_time += 1
-                if self._consecutive_on_time >= self._min_consecutive_on_time:
-                    self._consecutive_timeouts = 0
         except asyncio.TimeoutError:
-            while not await self.to_inference_queue.async_empty():
-                await self.to_inference_queue.async_get_nowait()
-            if self._last_frame:
-                if self._max_consecutive_timeouts:
-                    self._consecutive_timeouts += 1
-                    if self._consecutive_timeouts >= self._max_consecutive_timeouts:
-                        self._consecutive_on_time = 0
-
-        workflow_too_slow_message = [
-            "Workflow is too heavy to process all frames on time..."
-        ]
+            pass
+
         if np_frame is None:
             if not self._last_frame:
                 np_frame = overlay_text_on_np_frame(
                     frame.to_ndarray(format="bgr24"),
                     ["Inference pipeline is starting..."],
                 )
                 new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
-            elif (
-                self._max_consecutive_timeouts
-                and self._consecutive_timeouts >= self._max_consecutive_timeouts
-            ):
-                np_frame = overlay_text_on_np_frame(
-                    self._last_frame.to_ndarray(format="bgr24"),
-                    workflow_too_slow_message,
-                )
-                new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
             else:
                 new_frame = self._last_frame
         else:
-            if (
-                self._max_consecutive_timeouts
-                and self._consecutive_timeouts >= self._max_consecutive_timeouts
-            ):
-                np_frame = overlay_text_on_np_frame(
-                    self._last_frame.to_ndarray(format="bgr24"),
-                    workflow_too_slow_message,
-                )
-                new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
-            else:
-                new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
+            new_frame = VideoFrame.from_ndarray(np_frame, format="bgr24")
 
-        new_frame.pts = self._processed
+        new_frame.pts = frame.pts
         new_frame.time_base = frame.time_base
 
         return new_frame
@@ -312,8 +291,6 @@ async def init_rtc_peer_connection(
     asyncio_loop: asyncio.AbstractEventLoop,
     processing_timeout: float,
     fps_probe_frames: int,
-    max_consecutive_timeouts: int,
-    min_consecutive_on_time: int,
     webrtc_turn_config: Optional[WebRTCTURNConfig] = None,
     webcam_fps: Optional[float] = None,
     stream_output: Optional[str] = None,
@@ -326,8 +303,6 @@ async def init_rtc_peer_connection(
         webcam_fps=webcam_fps,
         processing_timeout=processing_timeout,
         fps_probe_frames=fps_probe_frames,
-        max_consecutive_timeouts=max_consecutive_timeouts,
-        min_consecutive_on_time=min_consecutive_on_time,
     )
 
     if webrtc_turn_config:
diff --git a/inference/core/version.py b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.51.5"
+__version__ = "0.51.7"
 
 
 if __name__ == "__main__":
diff --git a/inference/models/aliases.py b/inference/models/aliases.py
@@ -55,6 +55,9 @@
 RFDETR_ALIASES = {
     "rfdetr-base": "coco/36",
     "rfdetr-large": "coco/37",
+    "rfdetr-nano": "coco/38",
+    "rfdetr-small": "coco/39",
+    "rfdetr-medium": "coco/40",
 }
 
 REGISTERED_ALIASES = {
diff --git a/inference/models/rfdetr/rfdetr.py b/inference/models/rfdetr/rfdetr.py
diff --git a/inference/models/utils.py b/inference/models/utils.py
diff --git a/inference_sdk/http/utils/aliases.py b/inference_sdk/http/utils/aliases.py