Skip to content

Commit

Permalink
Implement different options for audio overlay
Browse files Browse the repository at this point in the history
Change-Id: If1109f459cf846269d867b100980ec4c01e53d8e
  • Loading branch information
mohabfekry committed Nov 19, 2024
1 parent 62854c5 commit 422d0f7
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 14 deletions.
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ limitations under the License.

Update to the latest version by running `npm run update-app` after pulling the latest changes from the repository via `git pull --rebase --autostash`; you would need to redploy the *UI* for features marked as `frontend`, and *GCP components* for features marked as `backend`.

* [November 2024]: General bug fixes and performance improvements.
* [November 2024]
* `frontend` + `backend`: General bug fixes and performance improvements.
* `frontend` + `backend`: Added possibility to select the timing for audio and music overlays. Read more [here](#42-user-controls-for-video-rendering).
* [October 2024]
* `frontend` + `backend`: Added functionality to "fade out" audio at the end of generated videos. Read more [here](#42-user-controls-for-video-rendering).
* `frontend`: Added functionality to regenerate Demand Gen text assets. Read more [here](#6-output-videos).
Expand Down Expand Up @@ -246,6 +248,16 @@ Users are now ready for combination. They can view the A/V segments and generate
* Vigenair supports different rendering settings for the audio of the generated videos. The image below describes the supported options and how they differ:

<center><img src='./img/audio.png' width="350px" alt="Vigenair's audio rendering options" /></center>

Furthermore, if *Music* or *All audio* overlay is selected, the user can additionally decide how the overlay should be done via one of the following options:

<center><img src='./img/render-settings-overlay.png' alt="Vigenair UI: Variants render settings for audio overlay" /></center>

* **Variant start** (default): Audio will start from the beginning of the first segment in the variant.
* **Video start**: Audio will start from the beginning of the original video, regardless of when the variant starts.
* **Video end**: Audio will end with the ending of the original video, regardless of when the variant ends.
* **Variant end**: Audio will end with the ending of the last segment in the variant.

* Whether to fade out audio at the end of generated videos. When selected, videos will be faded out for `1s` (configured by the `CONFIG_DEFAULT_FADE_OUT_DURATION` environment variable for the Combiner service).
* Whether to generate [Demand Gen](https://support.google.com/google-ads/answer/13695777) campaign text and image assets alongside the variant or not. Defaults to generating Demand Gen assets using *multimodal* models on Vertex AI, which offers the highest quality of output assets.
* Which formats (horizontal, vertical and square) assets to render. Defaults to rendering horizontal assets only.
Expand Down
Binary file added img/render-settings-overlay.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 29 additions & 8 deletions service/combiner/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class VideoVariantRenderSettings:
audio track for the video variant instead of the individual segments'
audio track portions.
fade_out: Whether to fade out the end of the video variant.
overlay_type: How to overlay music / audio for the variant.
"""

generate_image_assets: bool = False
Expand All @@ -61,6 +62,7 @@ class VideoVariantRenderSettings:
use_music_overlay: bool = False
use_continuous_audio: bool = False
fade_out: bool = False
overlay_type: Utils.RenderOverlayType = None

def __init__(self, **kwargs):
field_names = set([f.name for f in dataclasses.fields(self)])
Expand All @@ -76,7 +78,8 @@ def __str__(self):
f'formats={self.formats}, '
f'use_music_overlay={self.use_music_overlay}, '
f'use_continuous_audio={self.use_continuous_audio}, '
f'fade_out={self.fade_out})'
f'fade_out={self.fade_out}, '
f'overlay_type={self.overlay_type})'
)


Expand Down Expand Up @@ -687,14 +690,16 @@ def _render_video_variant(
shot_groups,
)
)
video_duration = Utils.get_media_duration(video_file_path)
(
full_av_select_filter,
music_overlay_select_filter,
continuous_audio_select_filter,
) = _build_ffmpeg_filters(
shot_timestamps,
has_audio,
video_variant.render_settings.fade_out,
video_variant.render_settings,
video_duration,
)

ffmpeg_cmds = _get_variant_ffmpeg_commands(
Expand Down Expand Up @@ -1192,13 +1197,17 @@ def _group_consecutive_segments(
def _build_ffmpeg_filters(
shot_timestamps: Sequence[Tuple[float, float]],
has_audio: bool,
fade_out: bool,
render_settings: VideoVariantRenderSettings,
video_duration: float,
) -> Tuple[str, str, str]:
"""Builds the ffmpeg filters.
Args:
shot_timestamps: A sequence of tuples, where each tuple contains the start
and end timestamps of a shot.
has_audio: Whether the video has audio.
render_settings: The render settings to use.
video_duration: The duration of the video.
Returns:
A tuple containing the full audio/video, music overlay and continuous audio
Expand All @@ -1209,7 +1218,8 @@ def _build_ffmpeg_filters(
select_filter_concat = []
idx = 0
duration = 0
all_start = sys.maxsize
variant_first_segment_start = sys.maxsize
variant_last_segment_end = 0
for start, end in shot_timestamps:
selection_filter = f'between(t,{start},{end})'
video_select_filter.append(
Expand All @@ -1222,17 +1232,28 @@ def _build_ffmpeg_filters(
)
select_filter_concat.append(f'[a{idx}]')
duration += end - start
all_start = min(all_start, start)
variant_first_segment_start = min(variant_first_segment_start, start)
variant_last_segment_end = max(variant_last_segment_end, end)
idx += 1

fade_out_duration = float(ConfigService.CONFIG_DEFAULT_FADE_OUT_DURATION)
fade_out_buffer = float(ConfigService.CONFIG_DEFAULT_FADE_OUT_BUFFER)
fade_out_start = duration - fade_out_duration - fade_out_buffer
fade_out_filter = (
f';[outa]afade=t=out:st={fade_out_start}:d={fade_out_duration}[outa]'
if fade_out else ''
if render_settings.fade_out else ''
)

match render_settings.overlay_type:
case Utils.RenderOverlayType.VIDEO_START.value:
overlay_start = 0
case Utils.RenderOverlayType.VIDEO_END.value:
overlay_start = video_duration - duration
case Utils.RenderOverlayType.VARIANT_END.value:
overlay_start = variant_last_segment_end - duration
case Utils.RenderOverlayType.VARIANT_START.value | _:
overlay_start = variant_first_segment_start

full_av_select_filter = ''.join(
video_select_filter + audio_select_filter + select_filter_concat
+ [f'concat=n={idx}:v=1:a=1[outv][outa]', fade_out_filter]
Expand All @@ -1243,7 +1264,7 @@ def _build_ffmpeg_filters(
music_overlay_select_filter = ''.join(
video_select_filter
+ [entry.replace('0:a', '1:a') for entry in audio_select_filter] + [
f"[2:a]aselect='between(t,{all_start},{all_start+duration})'"
f"[2:a]aselect='between(t,{overlay_start},{overlay_start+duration})'"
',asetpts=N/SR/TB[music];'
] + select_filter_concat + [
f'concat=n={idx}:v=1:a=1[outv][tempa];',
Expand All @@ -1253,7 +1274,7 @@ def _build_ffmpeg_filters(
) if has_audio else ''
continuous_audio_select_filter = ''.join(
video_select_filter + [
f"[0:a]aselect='between(t,{all_start},{all_start+duration})'"
f"[0:a]aselect='between(t,{overlay_start},{overlay_start+duration})'"
',asetpts=N/SR/TB[outa];'
] + [entry for entry in select_filter_concat if entry.startswith('[v')]
+ [f'concat=n={idx}:v=1[outv]', fade_out_filter]
Expand Down
9 changes: 9 additions & 0 deletions service/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ class RenderFormatType(enum.Enum):
SQUARE = 'square'


class RenderOverlayType(enum.Enum):
"""Enum of possible render overlays."""

VARIANT_START = 'variant_start'
VARIANT_END = 'variant_end'
VIDEO_START = 'video_start'
VIDEO_END = 'video_end'


class VideoExtension(enum.Enum):
"""Enum of supported video file extensions."""

Expand Down
6 changes: 6 additions & 0 deletions ui/src/ui/src/app/api-calls/api-calls.service.interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
import { Observable } from 'rxjs';

export type FormatType = 'horizontal' | 'vertical' | 'square';
export type OverlayType =
| 'variant_start'
| 'variant_end'
| 'video_start'
| 'video_end';

export interface GenerationSettings {
prompt: string;
Expand Down Expand Up @@ -61,6 +66,7 @@ export interface RenderSettings {
use_music_overlay: boolean;
use_continuous_audio: boolean;
fade_out: boolean;
overlay_type: OverlayType;
}

export interface RenderQueue {
Expand Down
35 changes: 32 additions & 3 deletions ui/src/ui/src/app/app.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,27 @@
</mat-select>
</mat-form-field>
</div>
<div style="margin-top: 8px" *ngIf="audioSettings !== 'segment'">
<mat-form-field subscriptSizing="dynamic">
<mat-label>Overlay settings:</mat-label>
<mat-select [(ngModel)]="overlaySettings">
<span matTooltip="Start from the first segment of your variant">
<mat-option value="variant_start">Variant start</mat-option>
</span>
<span
matTooltip="Start from the beginning of the original video"
>
<mat-option value="video_start">Video start</mat-option>
</span>
<span matTooltip="End with the ending of the original video">
<mat-option value="video_end">Video end</mat-option>
</span>
<span matTooltip="End with the last segment of your variant">
<mat-option value="variant_end">Variant end</mat-option>
</span>
</mat-select>
</mat-form-field>
</div>
<div class="user-settings" style="margin-top: 8px">
<span matTooltip="Fades out audio at the end of the video">
<mat-checkbox color="primary" [(ngModel)]="fadeOut">
Expand Down Expand Up @@ -459,7 +480,11 @@
: 'Click to select/unselect each format'
}}"
>
<mat-button-toggle [checked]="!nonLandscapeInputVideo" disabled value="horizontal">
<mat-button-toggle
[checked]="!nonLandscapeInputVideo"
disabled
value="horizontal"
>
<mat-icon>computer</mat-icon>
</mat-button-toggle>
<mat-button-toggle value="vertical">
Expand Down Expand Up @@ -641,9 +666,13 @@
<b>Audio:</b>
{{
(variant.render_settings.use_continuous_audio
? 'Overlay all'
? 'Overlay all (' +
variant.render_settings.overlay_type.replace('_', ' ') +
')'
: variant.render_settings.use_music_overlay
? 'Overlay music'
? 'Overlay music (' +
variant.render_settings.overlay_type.replace('_', ' ') +
')'
: 'Individual segments') +
(variant.render_settings.fade_out ? ' + Fade out' : '')
}}
Expand Down
10 changes: 10 additions & 0 deletions ui/src/ui/src/app/app.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import {
AvSegment,
FormatType,
GenerateVariantsResponse,
OverlayType,
RenderedVariant,
RenderQueueVariant,
RenderSettings,
Expand Down Expand Up @@ -144,6 +145,7 @@ export class AppComponent {
duration = 0;
step = 0;
audioSettings = 'segment';
overlaySettings: OverlayType = 'variant_start';
fadeOut = false;
demandGenAssets = true;
analyseAudio = true;
Expand Down Expand Up @@ -530,6 +532,11 @@ export class AppComponent {
this.subtitlesTrack = '';
this.cropAreaRect = undefined;
this.nonLandscapeInputVideo = false;
this.audioSettings = 'segment';
this.overlaySettings = 'variant_start';
this.fadeOut = false;
this.demandGenAssets = true;
this.analyseAudio = true;
this.previewVideoElem.nativeElement.pause();
this.VideoComboComponent?.videoElem.nativeElement.pause();
this.videoMagicPanel.close();
Expand Down Expand Up @@ -998,6 +1005,7 @@ export class AppComponent {
use_music_overlay: this.audioSettings === 'music',
use_continuous_audio: this.audioSettings === 'continuous',
fade_out: this.fadeOut,
overlay_type: this.overlaySettings,
};
const selectedScenes = selectedSegments.map(
(segment: AvSegment) => segment.av_segment_id
Expand Down Expand Up @@ -1067,6 +1075,8 @@ export class AppComponent {
: variant.render_settings.use_continuous_audio
? 'continuous'
: 'segment';
this.fadeOut = variant.render_settings.fade_out;
this.overlaySettings = variant.render_settings.overlay_type;
this.closeRenderQueueSidenav();
setTimeout(() => {
this.loadingVariant = false;
Expand Down
8 changes: 6 additions & 2 deletions ui/src/ui/src/app/video-combo/video-combo.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,13 @@
Audio:
{{
(combo.render_settings.use_continuous_audio
? 'Overlay all'
? 'Overlay all (' +
combo.render_settings.overlay_type.replace('_', ' ') +
')'
: combo.render_settings.use_music_overlay
? 'Overlay music'
? 'Overlay music (' +
combo.render_settings.overlay_type.replace('_', ' ') +
')'
: 'Individual segments') +
(combo.render_settings.fade_out ? ' + Fade out' : '')
}}
Expand Down

0 comments on commit 422d0f7

Please sign in to comment.