Skip to content

Commit

Permalink
support overlapped labels
Browse files Browse the repository at this point in the history
  • Loading branch information
x4Cx58x54 committed Oct 13, 2022
1 parent cf37c2a commit 910d00d
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 32 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dist/
*.egg-info

*.ass
temp*
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Import the library
from vistal import vistal, ColourScheme, Colour
```

By now this tool supports non-overlapping localization results, i.e. action segmentation results. Pack these temporal labels into a list of `tuple(start, end, label_id)`, for example:
Pack the temporal labels into a list of `tuple(start, end, label_id)`, for example:

```python
prediction = [
Expand All @@ -34,7 +34,7 @@ prediction = [
]
```

`start` and `end` are integers or floats in seconds, and `label_id` are integer IDs for each action.
`start` and `end` are integers or floats in seconds, and `label_id` are integer IDs for each action. It is best that the whole video duration is covered by `(start, end)` sections.

And the actual temporal label, for example, is

Expand Down Expand Up @@ -112,7 +112,7 @@ Save to an `.ass` file:
sub.save('tutorial.ass')
```

Finally, play the video and load the subtitle to the player. Make sure your video player supports `.ass` subtitle, for example VLC media player and PotPlayer. Here is how it looks like on a blank video:
Finally, play the video and load the subtitle to the player. Make sure your video player supports `.ass` subtitle, for example PotPlayer. (It is noticed that VLC Player sometimes does not display the timeline.) Here is how it looks like on a blank video:

<p align="center" width="100%">
<img width="60%" src="./img/tutorial_result.gif">
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name = 'vistal',
version = '0.0.2',
version = '0.1.0',
author = 'x4Cx58x54',
description = 'A visualization tool for temporal action localization',
long_description = long_description,
Expand All @@ -32,6 +32,7 @@
packages = setuptools.find_packages(),
python_requires = '>=3.7',
install_requires = [
'numpy',
'distinctipy>=1.2.2',
],
)
107 changes: 79 additions & 28 deletions vistal/timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .subtitle.sections import EventItem

__all__ = [
'get_fit_rectangle',
'get_inline_rectangle',
'ColourScheme',
'ColourSchemeLegend',
'TimelinePosition',
Expand All @@ -16,7 +16,7 @@
]


def get_fit_rectangle(font_size):
def get_inline_rectangle(font_size):
return Rectangle(0, font_size/10, font_size/1.3, font_size/1.3)


Expand Down Expand Up @@ -135,6 +135,39 @@ def __iter__(self):
return iter(self.event_items)


import numpy as np

def temporal_repartition(temporal_list):
'''
Handles overlaps between sections. Repartition the time dimension into
disjoint parts, each associated with a list of label IDs. For example:
temporal_repartition([
(1, 3, 0),
(2, 4, 1),
])
=>
[
(1, 2, [0]),
(2, 3, [0, 1]),
(3, 4, [1]),
]
'''
new_temporal_list = []
timestamps = []
for start, end, label_id in temporal_list:
timestamps.append(start)
timestamps.append(end)
timestamps = np.unique(timestamps)
for start, end in zip(timestamps[:-1], timestamps[1:]):
middle = (start+end)/2
middle_label_ids = []
for start_0, end_0, label_id_0 in temporal_list:
if start_0 <= middle < end_0:
middle_label_ids.append(label_id_0)
new_temporal_list.append((start,end,np.unique(middle_label_ids)))
return new_temporal_list


class Timeline(EventItemContainer):
def __init__(
self, name: str,
Expand All @@ -155,38 +188,56 @@ def __init__(
else:
raise ValueError('Unsupported label_names type.')

temporal_list_rep = temporal_repartition(temporal_list)

tl_pos = tl_pos_cal(idx)
for i in temporal_list:
start, end, label_id = i

# Text
text = Position(tl_pos.text_x, tl_pos.text_y)
label_id_str = str(label_id).rjust(max_label_len)
text += f'{self.name}: {label_id_str} '
text += colour_scheme[label_id].tag() # set colour for the square
text += '{\\bord1\\shad0}' # no border and shadow for the square
text += get_fit_rectangle(tl_pos_cal.font_size)
text += '{\\r}' # reset style
text += f' {label_names[label_id]}'

name_text = Position(tl_pos.text_x, tl_pos.text_y)
name_text += f'{self.name}: '

# Label texts
for start, end, label_ids in temporal_list_rep:
label_texts = []
for label_i, label_id in enumerate(label_ids):
label_text = str(label_id).rjust(max_label_len)
label_text += colour_scheme[label_id].tag() # set colour for the square
label_text += ' {\\bord1\\shad0}' # no border and shadow for the square
label_text += get_inline_rectangle(tl_pos_cal.font_size)
label_text += '{\\r}' # reset style
label_text += f' {label_names[label_id]}'
label_texts.append(label_text)
self.event_items.append(
EventItem(
name='Dialogue', Start=Time(start), End=Time(end),
Style='TimelineText', Text=text
Style='TimelineText', Text=name_text+', '.join(label_texts)
)
)
if colour_scheme[label_id].alpha != 255:
rect_x = start / video_duration * tl_pos_cal.display_width
rect_y = tl_pos.timeline_y
rect_w = (end-start)/video_duration*tl_pos_cal.display_width
rect_h = tl_pos_cal.timeline_height
rect = Rectangle(rect_x, rect_y, rect_w, rect_h)
rect = Position(0, 0) + colour_scheme[label_id].tag() + rect
self.event_items.append(
EventItem(
'Dialogue', Start=Time(0), End=Time(video_duration),
Style='TimelineRect', Text=rect

# Colour rectangles
for start, end, label_ids in temporal_list_rep:
rect_x = start / video_duration * tl_pos_cal.display_width
rect_y = tl_pos.timeline_y
rect_w = (end-start) / video_duration * tl_pos_cal.display_width
rect_h = tl_pos_cal.timeline_height
rect_l_h = rect_h / len(label_ids)

for label_i, label_id in enumerate(label_ids):
if colour_scheme[label_id].alpha != 255:
rect = Rectangle(
rect_x,
rect_y + label_i*rect_l_h,
rect_w,
rect_l_h
)
)
rect = colour_scheme[label_id].tag() + rect
self.event_items.append(
EventItem(
'Dialogue', Start=Time(0), End=Time(video_duration),
Style='TimelineRect', Text=Position(0, 0)+rect
)
)

# Moving cursor
rect_cursor = Colour().tag()
rect_cursor += Move(0, tl_pos.timeline_y,
tl_pos_cal.display_width, tl_pos.timeline_y)
Expand All @@ -210,7 +261,7 @@ def __init__(
text = [
'{\\bord1\\shad0}'
+ colour_scheme[i].tag()
+ get_fit_rectangle(tl_pos_cal.font_size)
+ get_inline_rectangle(tl_pos_cal.font_size)
+ f'{{\\bord0\\shad0\\fs{max(1, tl_pos_cal.font_size//4)}}}\\h'
# a small hard space
+ '{\\r}' # reset style
Expand Down

0 comments on commit 910d00d

Please sign in to comment.