update all python examples that used to use BGR->RGB conversion to use BGR directly

Wumpf · Wumpf · commit fff7b02556b0 · 2024-08-16T18:44:12.000+02:00
(tested all scripts touched here)
diff --git a/docs/snippets/all/archetypes/image_advanced.py b/docs/snippets/all/archetypes/image_advanced.py
@@ -33,6 +33,5 @@
 # Read with OpenCV
 image = cv2.imread(file_path)
 
-# OpenCV uses BGR ordering, so we need to convert to RGB.
-image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-rr.log("from_opencv", rr.Image(image))
+# OpenCV uses BGR ordering, we need to make this known to Rerun.
+rr.log("from_opencv", rr.Image(image, color_model="BGR"))
diff --git a/examples/python/arkit_scenes/arkit_scenes/__main__.py b/examples/python/arkit_scenes/arkit_scenes/__main__.py
@@ -225,7 +225,6 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
         rr.set_time_seconds("time", float(frame_timestamp))
         # load the lowres image and depth
         bgr = cv2.imread(f"{lowres_image_dir}/{video_id}_{frame_timestamp}.png")
-        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
         depth = cv2.imread(f"{lowres_depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH)
 
         high_res_exists: bool = (image_dir / f"{video_id}_{frame_timestamp}.png").exists() and include_highres
@@ -240,7 +239,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
                 LOWRES_POSED_ENTITY_PATH,
             )
 
-            rr.log(f"{LOWRES_POSED_ENTITY_PATH}/rgb", rr.Image(rgb).compress(jpeg_quality=95))
+            rr.log(f"{LOWRES_POSED_ENTITY_PATH}/bgr", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=95))
             rr.log(f"{LOWRES_POSED_ENTITY_PATH}/depth", rr.DepthImage(depth, meter=1000))
 
         # log the high res camera
@@ -260,9 +259,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
             highres_bgr = cv2.imread(f"{image_dir}/{video_id}_{frame_timestamp}.png")
             highres_depth = cv2.imread(f"{depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH)
 
-            highres_rgb = cv2.cvtColor(highres_bgr, cv2.COLOR_BGR2RGB)
-
-            rr.log(f"{HIGHRES_ENTITY_PATH}/rgb", rr.Image(highres_rgb).compress(jpeg_quality=75))
+            rr.log(f"{HIGHRES_ENTITY_PATH}/bgr", rr.Image(highres_bgr, color_model="BGR").compress(jpeg_quality=75))
             rr.log(f"{HIGHRES_ENTITY_PATH}/depth", rr.DepthImage(highres_depth, meter=1000))
 
 
@@ -293,9 +290,9 @@ def main() -> None:
                 # For this to work, the origin of the 2D views has to be a pinhole camera,
                 # this way the viewer knows how to project the 3D annotations into the 2D views.
                 rrb.Spatial2DView(
-                    name="RGB",
+                    name="BGR",
                     origin=primary_camera_entity,
-                    contents=["$origin/rgb", "/world/annotations/**"],
+                    contents=["$origin/bgr", "/world/annotations/**"],
                 ),
                 rrb.Spatial2DView(
                     name="Depth",
diff --git a/examples/python/face_tracking/face_tracking.py b/examples/python/face_tracking/face_tracking.py
@@ -357,15 +357,12 @@ def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count:
                 # On some platforms it always returns zero, so we compute from the frame counter and fps
                 frame_time_nano = int(frame_idx * 1000 / fps * 1e6)
 
-            # convert to rgb
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
             # log data
             rr.set_time_sequence("frame_nr", frame_idx)
             rr.set_time_nanos("frame_time", frame_time_nano)
             detector.detect_and_log(frame, frame_time_nano)
             landmarker.detect_and_log(frame, frame_time_nano)
-            rr.log("video/image", rr.Image(frame))
+            rr.log("video/image", rr.Image(frame, color_model="BGR"))
 
     except KeyboardInterrupt:
         pass
@@ -379,12 +376,11 @@ def run_from_sample_image(path: Path, max_dim: int | None, num_faces: int) -> No
     """Run the face detector on a single image."""
     image = cv2.imread(str(path))
     image = resize_image(image, max_dim)
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     logger = FaceDetectorLogger(video_mode=False)
     landmarker = FaceLandmarkerLogger(video_mode=False, num_faces=num_faces)
     logger.detect_and_log(image, 0)
     landmarker.detect_and_log(image, 0)
-    rr.log("video/image", rr.Image(image))
+    rr.log("video/image", rr.Image(image, color_model="BGR"))
 
 
 def main() -> None:
diff --git a/examples/python/gesture_detection/gesture_detection.py b/examples/python/gesture_detection/gesture_detection.py
@@ -192,8 +192,7 @@ def run_from_sample_image(path: Path | str) -> None:
     """Run the gesture recognition on a single image."""
     image = cv2.imread(str(path))
     # image = resize_image(image, max_dim)
-    show_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    rr.log("media/image", rr.Image(show_image))
+    rr.log("media/image", rr.Image(image, color_model="BGR"))
     logger = GestureDetectorLogger(video_mode=False)
     logger.detect_and_log(show_image, 0)
 
@@ -236,14 +235,11 @@ def run_from_video_capture(vid: int | str, max_frame_count: int | None) -> None:
                 # On some platforms it always returns zero, so we compute from the frame counter and fps
                 frame_time_nano = int(frame_idx * 1000 / fps * 1e6)
 
-            # convert to rgb
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
             # log data
             rr.set_time_sequence("frame_nr", frame_idx)
             rr.set_time_nanos("frame_time", frame_time_nano)
             detector.detect_and_log(frame, frame_time_nano)
-            rr.log("media/video", rr.Image(frame).compress(jpeg_quality=75))
+            rr.log("media/video", rr.Image(frame, color_model="BGR").compress(jpeg_quality=75))
 
     except KeyboardInterrupt:
         pass
diff --git a/examples/python/human_pose_tracking/human_pose_tracking.py b/examples/python/human_pose_tracking/human_pose_tracking.py
@@ -77,15 +77,14 @@ def track_pose(video_path: str, model_path: str, *, segment: bool, max_frame_cou
                 break
 
             mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=bgr_frame.data)
-            rgb = cv2.cvtColor(bgr_frame.data, cv2.COLOR_BGR2RGB)
             rr.set_time_seconds("time", bgr_frame.time)
             rr.set_time_sequence("frame_idx", bgr_frame.idx)
 
             results = pose_landmarker.detect_for_video(mp_image, int(bgr_frame.time * 1000))
-            h, w, _ = rgb.shape
+            h, w, _ = bgr_frame.data.shape
             landmark_positions_2d = read_landmark_positions_2d(results, w, h)
 
-            rr.log("video/rgb", rr.Image(rgb).compress(jpeg_quality=75))
+            rr.log("video/bgr", rr.Image(bgr_frame.data, color_model="BGR").compress(jpeg_quality=75))
             if landmark_positions_2d is not None:
                 rr.log(
                     "video/pose/points",
@@ -237,7 +236,7 @@ def main() -> None:
                 rrb.Spatial3DView(origin="person", name="3D pose"),
             ),
             rrb.Vertical(
-                rrb.Spatial2DView(origin="video/rgb", name="Raw video"),
+                rrb.Spatial2DView(origin="video/bgr", name="Raw video"),
                 rrb.TextDocumentView(origin="description", name="Description"),
                 row_shares=[2, 3],
             ),
diff --git a/examples/python/live_camera_edge_detection/live_camera_edge_detection.py b/examples/python/live_camera_edge_detection/live_camera_edge_detection.py
@@ -42,8 +42,7 @@ def run_canny(num_frames: int | None) -> None:
         frame_nr += 1
 
         # Log the original image
-        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        rr.log("image/rgb", rr.Image(rgb))
+        rr.log("image/rgb", rr.Image(img, color_model="BGR"))
 
         # Convert to grayscale
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
diff --git a/examples/python/ocr/ocr.py b/examples/python/ocr/ocr.py
@@ -365,7 +365,7 @@ def detect_and_log_layouts(file_path: str) -> None:
     else:
         # read image
         img = cv2.imread(file_path)
-        image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Rerun can handle BGR as well, but `ocr_model_pp` expects RGB
         images.append(image_rgb.astype(np.uint8))
 
     # Extracte the layout from each image
diff --git a/examples/python/rgbd/rgbd.py b/examples/python/rgbd/rgbd.py
@@ -44,13 +44,11 @@ def parse_timestamp(filename: str) -> datetime:
     return datetime.fromtimestamp(float(time))
 
 
-def read_image_rgb(buf: bytes) -> npt.NDArray[np.uint8]:
+def read_image_bgr(buf: bytes) -> npt.NDArray[np.uint8]:
     """Decode an image provided in `buf`, and interpret it as RGB data."""
     np_buf: npt.NDArray[np.uint8] = np.ndarray(shape=(1, len(buf)), dtype=np.uint8, buffer=buf)
-    # OpenCV reads images in BGR rather than RGB format
     img_bgr = cv2.imdecode(np_buf, cv2.IMREAD_COLOR)
-    img_rgb: npt.NDArray[Any] = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
-    return img_rgb
+    return img_bgr
 
 
 def read_depth_image(buf: bytes) -> npt.NDArray[Any]:
@@ -85,8 +83,8 @@ def log_nyud_data(recording_path: Path, subset_idx: int, frames: int) -> None:
 
             if f.filename.endswith(".ppm"):
                 buf = archive.read(f)
-                img_rgb = read_image_rgb(buf)
-                rr.log("world/camera/image/rgb", rr.Image(img_rgb).compress(jpeg_quality=95))
+                img_bgr = read_image_bgr(buf)
+                rr.log("world/camera/image/rgb", rr.Image(img_bgr, color_model="BGR").compress(jpeg_quality=95))
 
             elif f.filename.endswith(".pgm"):
                 buf = archive.read(f)
diff --git a/examples/python/segment_anything_model/segment_anything_model.py b/examples/python/segment_anything_model/segment_anything_model.py
@@ -138,6 +138,7 @@ def load_image(image_uri: str) -> cv2.typing.MatLike:
     else:
         image = cv2.imread(image_uri, cv2.IMREAD_COLOR)
 
+    # Rerun can handle BGR as well, but SAM requires RGB.
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     return image
 
diff --git a/examples/python/structure_from_motion/structure_from_motion/__main__.py b/examples/python/structure_from_motion/structure_from_motion/__main__.py
@@ -162,8 +162,7 @@ def read_and_log_sparse_reconstruction(dataset_path: Path, filter_output: bool,
         if resize:
             bgr = cv2.imread(str(image_file))
             bgr = cv2.resize(bgr, resize)
-            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
-            rr.log("camera/image", rr.Image(rgb).compress(jpeg_quality=75))
+            rr.log("camera/image", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=75))
         else:
             rr.log("camera/image", rr.EncodedImage(path=dataset_path / "images" / image.name))