Return Option Of Not Including Image

ziqinyeow · Jan 10, 2024 · 1e9a956 · 1e9a956
1 parent ef396c6
commit 1e9a956
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 19 deletions.
diff --git a/demo.py b/demo.py
@@ -1,28 +1,51 @@
 # from juxtapose import RTM
-# import sys
+import sys
 
-# sys.path.insert(0, "src")
+sys.path.insert(0, "src")
 
 # import cv2
 
 
-from juxtapose import RTM
+from juxtapose import RTM, RTMDet
 
 
-model = RTM(det="rtmdet-m", tracker="bytetrack", pose="rtmpose-l")
-model("asset/bike2.mp4", show=True)
+# model = RTM(det="rtmdet-s", tracker="bytetrack", pose="rtmpose-l")
+# model("./asset/run.png", show=False)
+# model("asset/bike2.mp4")
 # p = RTMPose()
 
 
-# with pro[0]:
-#     m = RTMDet()
-# print("Loading rtmdet-m: ", pro[0].dt * 1e3 / 1)
+import supervision as sv
+import cv2
+import numpy as np
+
+bboxes = [
+    [178.18336181640626, 118.73443603515625, 405.0209228515625, 526.40869140625],
+    [555.400634765625, 76.04942321777344, 816.806787109375, 498.3839111328125],
+    [416.07158203125, 149.1138458251953, 721.727490234375, 495.57513427734375],
+]
+
+box = sv.BoxAnnotator()
+detection = sv.Detections(xyxy=np.array(bboxes))
+
+m = RTMDet("s")
 # with pro[1]:
-#     im = cv2.imread("./asset/football.jpeg")
-#     bboxes = m(im)
-#     kpts, scores = p(im, bboxes.xyxy)
-#     print(kpts.shape, scores.shape)
-#     # print(m(im))
+im = cv2.imread("./asset/football.jpeg")
+im = cv2.resize(im, (1024, 700))
+p = m(im)
+box.annotate(im, p)
+print(p.xyxy)
+# im, e = m.preprocess(im)
+# print(im.shape)
+# im = cv2.imread("./asset/run.png")
+# print(im.shape)
+# print(m(im).xyxy)
+cv2.imshow("p", im)
+cv2.waitKey(0)
+cv2.destroyAllWindows()
+# bboxes = m(im)
+# kpts, scores = p(im, bboxes.xyxy)
+# print(kpts.shape, scores.shape)
 # print("First Inference: ", pro[1].dt * 1e3 / 1)
 # with pro[2]:
 #     im = cv2.imread("./asset/run.png")

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "juxtapose"
-version = "0.0.12"
+version = "0.0.13"
 description = ""
 authors = ["Zi Qin <ziqinyeow@gmail.com>"]
 license = "MIT"

diff --git a/src/juxtapose/__init__.py b/src/juxtapose/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.0.12"
+__version__ = "0.0.13"
 
 from .rtm import RTM
 from .detectors import GroundingDino, RTMDet, YOLOv8

diff --git a/src/juxtapose/detectors/rtmdet/__init__.py b/src/juxtapose/detectors/rtmdet/__init__.py
@@ -58,6 +58,7 @@ def inference(self, im: np.ndarray):
             sess_output.append(out.name)
 
         outputs = self.session.run(sess_output, sess_input)
+        # print(outputs[0].shape, outputs[1].shape)
         return outputs
 
     def __call__(self, im):
@@ -87,6 +88,7 @@ def preprocess(self, img: np.ndarray):
             - center (np.ndarray): Center of image.
             - scale (np.ndarray): Scale of image.
         """
+        # print("pre", img.shape)
         if len(img.shape) == 3:
             padded_img = (
                 np.ones(
@@ -109,6 +111,7 @@ def preprocess(self, img: np.ndarray):
         ).astype(np.uint8)
         padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
         padded_img[: padded_shape[0], : padded_shape[1]] = resized_img
+        print(padded_shape, resized_img.shape, padded_img.shape)
 
         # normalize image
         if self.mean is not None:
@@ -183,6 +186,7 @@ def postprocess(
             # onnx contains nms module
 
             pack_dets = (outputs[0, :, :4], outputs[0, :, 4])
+            print(outputs.shape, pack_dets[0].shape, pack_dets[1].shape)
             final_boxes, final_scores = pack_dets
             final_boxes /= ratio
             isscore = final_scores > self.conf_thres

diff --git a/src/juxtapose/rtm.py b/src/juxtapose/rtm.py
@@ -41,7 +41,7 @@
 
 @dataclass
 class Result:
-    im: np.ndarray  # shape -> (h, w, c)
+    im: np.ndarray or None  # shape -> (h, w, c)
     kpts: List  # shape -> (number of humans, 17, 2)
     bboxes: List  # shape -> (number of humans, 4)
     speed: dict  # {'bboxes': ... ms, 'kpts': ... ms} -> used to record the milliseconds of the inference time
@@ -166,6 +166,7 @@ def stream_inference(
         save=False,
         save_dirs="",
         verbose=True,
+        return_im=False,
     ) -> Result:
         if show:
             check_imshow(warn=True)
@@ -261,9 +262,9 @@ def stream_inference(
                     self.annotator.draw_skeletons(im, kpts)
 
             result = Result(
-                im=im,
-                kpts=kpts,
-                bboxes=detections.xyxy,  # detections.xyxy,
+                im=im if return_im else None,
+                kpts=kpts.tolist(),
+                bboxes=detections.xyxy.tolist(),  # detections.xyxy,
                 speed={
                     "bboxes": profilers[0].dt * 1e3 / 1,
                     "track": profilers[1].dt * 1e3 / 1,
@@ -332,6 +333,7 @@ def __call__(
         save=False,
         save_dirs="",
         verbose=True,
+        return_im=False,
     ) -> Union[List[Result], Generator[Result, None, None]]:
         if stream:
             return self.stream_inference(
@@ -345,6 +347,7 @@ def __call__(
                 save=save,
                 save_dirs=save_dirs,
                 verbose=verbose,
+                return_im=return_im,
             )
         else:
             return list(
@@ -359,5 +362,6 @@ def __call__(
                     save=save,
                     save_dirs=save_dirs,
                     verbose=verbose,
+                    return_im=return_im,
                 )
             )
diff --git a/test.py b/test.py
@@ -0,0 +1,9 @@
+import sys
+
+sys.path.insert(0, "src")
+
+from juxtapose import RTM, RTMDet
+
+
+model = RTM(det="rtmdet-m", tracker="botsort", pose="rtmpose-l")
+print(model("./asset/football.jpeg", show=False, return_im=False))