Skip to content

Commit

Permalink
update all python examples that used to use BGR->RGB conversion to us…
Browse files Browse the repository at this point in the history
…e BGR directly

(tested all scripts touched here)
  • Loading branch information
Wumpf committed Aug 16, 2024
1 parent 47edbf6 commit fff7b02
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 37 deletions.
5 changes: 2 additions & 3 deletions docs/snippets/all/archetypes/image_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,5 @@
# Read with OpenCV
image = cv2.imread(file_path)

# OpenCV uses BGR ordering, so we need to convert to RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
rr.log("from_opencv", rr.Image(image))
# OpenCV uses BGR ordering, we need to make this known to Rerun.
rr.log("from_opencv", rr.Image(image, color_model="BGR"))
11 changes: 4 additions & 7 deletions examples/python/arkit_scenes/arkit_scenes/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,6 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
rr.set_time_seconds("time", float(frame_timestamp))
# load the lowres image and depth
bgr = cv2.imread(f"{lowres_image_dir}/{video_id}_{frame_timestamp}.png")
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
depth = cv2.imread(f"{lowres_depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH)

high_res_exists: bool = (image_dir / f"{video_id}_{frame_timestamp}.png").exists() and include_highres
Expand All @@ -240,7 +239,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
LOWRES_POSED_ENTITY_PATH,
)

rr.log(f"{LOWRES_POSED_ENTITY_PATH}/rgb", rr.Image(rgb).compress(jpeg_quality=95))
rr.log(f"{LOWRES_POSED_ENTITY_PATH}/bgr", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=95))
rr.log(f"{LOWRES_POSED_ENTITY_PATH}/depth", rr.DepthImage(depth, meter=1000))

# log the high res camera
Expand All @@ -260,9 +259,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
highres_bgr = cv2.imread(f"{image_dir}/{video_id}_{frame_timestamp}.png")
highres_depth = cv2.imread(f"{depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH)

highres_rgb = cv2.cvtColor(highres_bgr, cv2.COLOR_BGR2RGB)

rr.log(f"{HIGHRES_ENTITY_PATH}/rgb", rr.Image(highres_rgb).compress(jpeg_quality=75))
rr.log(f"{HIGHRES_ENTITY_PATH}/bgr", rr.Image(highres_bgr, color_model="BGR").compress(jpeg_quality=75))
rr.log(f"{HIGHRES_ENTITY_PATH}/depth", rr.DepthImage(highres_depth, meter=1000))


Expand Down Expand Up @@ -293,9 +290,9 @@ def main() -> None:
# For this to work, the origin of the 2D views has to be a pinhole camera,
# this way the viewer knows how to project the 3D annotations into the 2D views.
rrb.Spatial2DView(
name="RGB",
name="BGR",
origin=primary_camera_entity,
contents=["$origin/rgb", "/world/annotations/**"],
contents=["$origin/bgr", "/world/annotations/**"],
),
rrb.Spatial2DView(
name="Depth",
Expand Down
8 changes: 2 additions & 6 deletions examples/python/face_tracking/face_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,12 @@ def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count:
# On some platforms it always returns zero, so we compute from the frame counter and fps
frame_time_nano = int(frame_idx * 1000 / fps * 1e6)

# convert to rgb
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# log data
rr.set_time_sequence("frame_nr", frame_idx)
rr.set_time_nanos("frame_time", frame_time_nano)
detector.detect_and_log(frame, frame_time_nano)
landmarker.detect_and_log(frame, frame_time_nano)
rr.log("video/image", rr.Image(frame))
rr.log("video/image", rr.Image(frame, color_model="BGR"))

except KeyboardInterrupt:
pass
Expand All @@ -379,12 +376,11 @@ def run_from_sample_image(path: Path, max_dim: int | None, num_faces: int) -> No
"""Run the face detector on a single image."""
image = cv2.imread(str(path))
image = resize_image(image, max_dim)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
logger = FaceDetectorLogger(video_mode=False)
landmarker = FaceLandmarkerLogger(video_mode=False, num_faces=num_faces)
logger.detect_and_log(image, 0)
landmarker.detect_and_log(image, 0)
rr.log("video/image", rr.Image(image))
rr.log("video/image", rr.Image(image, color_model="BGR"))


def main() -> None:
Expand Down
8 changes: 2 additions & 6 deletions examples/python/gesture_detection/gesture_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ def run_from_sample_image(path: Path | str) -> None:
"""Run the gesture recognition on a single image."""
image = cv2.imread(str(path))
# image = resize_image(image, max_dim)
show_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
rr.log("media/image", rr.Image(show_image))
rr.log("media/image", rr.Image(image, color_model="BGR"))
logger = GestureDetectorLogger(video_mode=False)
logger.detect_and_log(show_image, 0)

Expand Down Expand Up @@ -236,14 +235,11 @@ def run_from_video_capture(vid: int | str, max_frame_count: int | None) -> None:
# On some platforms it always returns zero, so we compute from the frame counter and fps
frame_time_nano = int(frame_idx * 1000 / fps * 1e6)

# convert to rgb
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# log data
rr.set_time_sequence("frame_nr", frame_idx)
rr.set_time_nanos("frame_time", frame_time_nano)
detector.detect_and_log(frame, frame_time_nano)
rr.log("media/video", rr.Image(frame).compress(jpeg_quality=75))
rr.log("media/video", rr.Image(frame, color_model="BGR").compress(jpeg_quality=75))

except KeyboardInterrupt:
pass
Expand Down
7 changes: 3 additions & 4 deletions examples/python/human_pose_tracking/human_pose_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,14 @@ def track_pose(video_path: str, model_path: str, *, segment: bool, max_frame_cou
break

mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=bgr_frame.data)
rgb = cv2.cvtColor(bgr_frame.data, cv2.COLOR_BGR2RGB)
rr.set_time_seconds("time", bgr_frame.time)
rr.set_time_sequence("frame_idx", bgr_frame.idx)

results = pose_landmarker.detect_for_video(mp_image, int(bgr_frame.time * 1000))
h, w, _ = rgb.shape
h, w, _ = bgr_frame.data.shape
landmark_positions_2d = read_landmark_positions_2d(results, w, h)

rr.log("video/rgb", rr.Image(rgb).compress(jpeg_quality=75))
rr.log("video/bgr", rr.Image(bgr_frame.data, color_model="BGR").compress(jpeg_quality=75))
if landmark_positions_2d is not None:
rr.log(
"video/pose/points",
Expand Down Expand Up @@ -237,7 +236,7 @@ def main() -> None:
rrb.Spatial3DView(origin="person", name="3D pose"),
),
rrb.Vertical(
rrb.Spatial2DView(origin="video/rgb", name="Raw video"),
rrb.Spatial2DView(origin="video/bgr", name="Raw video"),
rrb.TextDocumentView(origin="description", name="Description"),
row_shares=[2, 3],
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def run_canny(num_frames: int | None) -> None:
frame_nr += 1

# Log the original image
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rr.log("image/rgb", rr.Image(rgb))
rr.log("image/rgb", rr.Image(img, color_model="BGR"))

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
Expand Down
2 changes: 1 addition & 1 deletion examples/python/ocr/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def detect_and_log_layouts(file_path: str) -> None:
else:
# read image
img = cv2.imread(file_path)
image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Rerun can handle BGR as well, but `ocr_model_pp` expects RGB
images.append(image_rgb.astype(np.uint8))

# Extracte the layout from each image
Expand Down
10 changes: 4 additions & 6 deletions examples/python/rgbd/rgbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ def parse_timestamp(filename: str) -> datetime:
return datetime.fromtimestamp(float(time))


def read_image_rgb(buf: bytes) -> npt.NDArray[np.uint8]:
def read_image_bgr(buf: bytes) -> npt.NDArray[np.uint8]:
"""Decode an image provided in `buf`, and interpret it as RGB data."""
np_buf: npt.NDArray[np.uint8] = np.ndarray(shape=(1, len(buf)), dtype=np.uint8, buffer=buf)
# OpenCV reads images in BGR rather than RGB format
img_bgr = cv2.imdecode(np_buf, cv2.IMREAD_COLOR)
img_rgb: npt.NDArray[Any] = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
return img_rgb
return img_bgr


def read_depth_image(buf: bytes) -> npt.NDArray[Any]:
Expand Down Expand Up @@ -85,8 +83,8 @@ def log_nyud_data(recording_path: Path, subset_idx: int, frames: int) -> None:

if f.filename.endswith(".ppm"):
buf = archive.read(f)
img_rgb = read_image_rgb(buf)
rr.log("world/camera/image/rgb", rr.Image(img_rgb).compress(jpeg_quality=95))
img_bgr = read_image_bgr(buf)
rr.log("world/camera/image/rgb", rr.Image(img_bgr, color_model="BGR").compress(jpeg_quality=95))

elif f.filename.endswith(".pgm"):
buf = archive.read(f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def load_image(image_uri: str) -> cv2.typing.MatLike:
else:
image = cv2.imread(image_uri, cv2.IMREAD_COLOR)

# Rerun can handle BGR as well, but SAM requires RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,7 @@ def read_and_log_sparse_reconstruction(dataset_path: Path, filter_output: bool,
if resize:
bgr = cv2.imread(str(image_file))
bgr = cv2.resize(bgr, resize)
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
rr.log("camera/image", rr.Image(rgb).compress(jpeg_quality=75))
rr.log("camera/image", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=75))
else:
rr.log("camera/image", rr.EncodedImage(path=dataset_path / "images" / image.name))

Expand Down

0 comments on commit fff7b02

Please sign in to comment.