support overlapped labels

x4Cx58x54 · Oct 13, 2022 · 910d00d · 910d00d
1 parent cf37c2a
commit 910d00d
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 32 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ dist/
 *.egg-info
 
 *.ass
+temp*
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ Import the library
 from vistal import vistal, ColourScheme, Colour
 ```
 
-By now this tool supports non-overlapping localization results, i.e. action segmentation results. Pack these temporal labels into a list of `tuple(start, end, label_id)`, for example:
+Pack the temporal labels into a list of `tuple(start, end, label_id)`, for example:
 
 ```python
 prediction = [
@@ -34,7 +34,7 @@ prediction = [
 ]
 ```
 
-`start` and `end` are integers or floats in seconds, and `label_id` are integer IDs for each action.
+`start` and `end` are integers or floats in seconds, and `label_id` are integer IDs for each action. It is best that the whole video duration is covered by `(start, end)` sections.
 
 And the actual temporal label, for example, is
 
@@ -112,7 +112,7 @@ Save to an `.ass` file:
 sub.save('tutorial.ass')
 ```
 
-Finally, play the video and load the subtitle to the player. Make sure your video player supports `.ass` subtitle, for example VLC media player and PotPlayer. Here is how it looks like on a blank video:
+Finally, play the video and load the subtitle to the player. Make sure your video player supports `.ass` subtitle, for example PotPlayer. (It is noticed that VLC Player sometimes does not display the timeline.) Here is how it looks like on a blank video:
 
 <p align="center" width="100%">
     <img width="60%" src="./img/tutorial_result.gif">

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name = 'vistal',
-    version = '0.0.2',
+    version = '0.1.0',
     author = 'x4Cx58x54',
     description = 'A visualization tool for temporal action localization',
     long_description = long_description,
@@ -32,6 +32,7 @@
     packages = setuptools.find_packages(),
     python_requires = '>=3.7',
     install_requires = [
+        'numpy',
         'distinctipy>=1.2.2',
     ],
 )
diff --git a/vistal/timeline.py b/vistal/timeline.py
@@ -7,7 +7,7 @@
 from .subtitle.sections import EventItem
 
 __all__ = [
-    'get_fit_rectangle',
+    'get_inline_rectangle',
     'ColourScheme',
     'ColourSchemeLegend',
     'TimelinePosition',
@@ -16,7 +16,7 @@
 ]
 
 
-def get_fit_rectangle(font_size):
+def get_inline_rectangle(font_size):
     return Rectangle(0, font_size/10, font_size/1.3, font_size/1.3)
 
 
@@ -135,6 +135,39 @@ def __iter__(self):
         return iter(self.event_items)
 
 
+import numpy as np
+
+def temporal_repartition(temporal_list):
+    '''
+    Handles overlaps between sections. Repartition the time dimension into
+    disjoint parts, each associated with a list of label IDs. For example:
+    temporal_repartition([
+        (1, 3, 0),
+        (2, 4, 1),
+    ])
+    =>
+    [
+        (1, 2, [0]),
+        (2, 3, [0, 1]),
+        (3, 4, [1]),
+    ]
+    '''
+    new_temporal_list = []
+    timestamps = []
+    for start, end, label_id in temporal_list:
+        timestamps.append(start)
+        timestamps.append(end)
+    timestamps = np.unique(timestamps)
+    for start, end in zip(timestamps[:-1], timestamps[1:]):
+        middle = (start+end)/2
+        middle_label_ids = []
+        for start_0, end_0, label_id_0 in temporal_list:
+            if start_0 <= middle < end_0:
+                middle_label_ids.append(label_id_0)
+        new_temporal_list.append((start,end,np.unique(middle_label_ids)))
+    return new_temporal_list
+
+
 class Timeline(EventItemContainer):
     def __init__(
         self, name: str,
@@ -155,38 +188,56 @@ def __init__(
         else:
             raise ValueError('Unsupported label_names type.')
 
+        temporal_list_rep = temporal_repartition(temporal_list)
+
         tl_pos = tl_pos_cal(idx)
-        for i in temporal_list:
-            start, end, label_id = i
-
-            # Text
-            text = Position(tl_pos.text_x, tl_pos.text_y)
-            label_id_str = str(label_id).rjust(max_label_len)
-            text += f'{self.name}: {label_id_str} '
-            text += colour_scheme[label_id].tag() # set colour for the square
-            text += '{\\bord1\\shad0}' # no border and shadow for the square
-            text += get_fit_rectangle(tl_pos_cal.font_size)
-            text += '{\\r}' # reset style
-            text += f' {label_names[label_id]}'
+
+        name_text = Position(tl_pos.text_x, tl_pos.text_y)
+        name_text += f'{self.name}:  '
+
+        # Label texts
+        for start, end, label_ids in temporal_list_rep:
+            label_texts = []
+            for label_i, label_id in enumerate(label_ids):
+                label_text = str(label_id).rjust(max_label_len)
+                label_text += colour_scheme[label_id].tag() # set colour for the square
+                label_text += ' {\\bord1\\shad0}' # no border and shadow for the square
+                label_text += get_inline_rectangle(tl_pos_cal.font_size)
+                label_text += '{\\r}' # reset style
+                label_text += f' {label_names[label_id]}'
+                label_texts.append(label_text)
             self.event_items.append(
                 EventItem(
                     name='Dialogue', Start=Time(start), End=Time(end),
-                    Style='TimelineText', Text=text
+                    Style='TimelineText', Text=name_text+', '.join(label_texts)
                 )
             )
-            if colour_scheme[label_id].alpha != 255:
-                rect_x = start / video_duration * tl_pos_cal.display_width
-                rect_y = tl_pos.timeline_y
-                rect_w = (end-start)/video_duration*tl_pos_cal.display_width
-                rect_h = tl_pos_cal.timeline_height
-                rect = Rectangle(rect_x, rect_y, rect_w, rect_h)
-                rect = Position(0, 0) + colour_scheme[label_id].tag() + rect
-                self.event_items.append(
-                    EventItem(
-                        'Dialogue', Start=Time(0), End=Time(video_duration),
-                        Style='TimelineRect', Text=rect
+
+        # Colour rectangles
+        for start, end, label_ids in temporal_list_rep:
+            rect_x = start / video_duration * tl_pos_cal.display_width
+            rect_y = tl_pos.timeline_y
+            rect_w = (end-start) / video_duration * tl_pos_cal.display_width
+            rect_h = tl_pos_cal.timeline_height
+            rect_l_h = rect_h / len(label_ids)
+
+            for label_i, label_id in enumerate(label_ids):
+                if colour_scheme[label_id].alpha != 255:
+                    rect = Rectangle(
+                        rect_x,
+                        rect_y + label_i*rect_l_h,
+                        rect_w,
+                        rect_l_h
                     )
-                )
+                    rect = colour_scheme[label_id].tag() + rect
+                    self.event_items.append(
+                        EventItem(
+                            'Dialogue', Start=Time(0), End=Time(video_duration),
+                            Style='TimelineRect', Text=Position(0, 0)+rect
+                        )
+                    )
+
+        # Moving cursor
         rect_cursor = Colour().tag()
         rect_cursor += Move(0, tl_pos.timeline_y,
         tl_pos_cal.display_width, tl_pos.timeline_y)
@@ -210,7 +261,7 @@ def __init__(
         text = [
             '{\\bord1\\shad0}'
             + colour_scheme[i].tag()
-            + get_fit_rectangle(tl_pos_cal.font_size)
+            + get_inline_rectangle(tl_pos_cal.font_size)
             + f'{{\\bord0\\shad0\\fs{max(1, tl_pos_cal.font_size//4)}}}\\h'
             # a small hard space
             + '{\\r}' # reset style