Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] spokewoz recipe #1174

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lhotse/bin/modes/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from .rir_noise import *
from .speechcommands import *
from .spgispeech import *
from .spokenwoz import *
from .stcmds import *
from .switchboard import *
from .tal_asr import *
Expand Down
62 changes: 62 additions & 0 deletions lhotse/bin/modes/recipes/spokenwoz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from typing import Sequence

import click

from lhotse.bin.modes import download, prepare
from lhotse.recipes import download_spokenwoz, prepare_spokenwoz
from lhotse.utils import Pathlike

__all__ = ["spokenwoz"]


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
@click.argument("output_dir", type=click.Path())
@click.option(
"-j",
"--num-jobs",
type=int,
default=1,
help="How many jobs to use (can give good speed-ups with slow disks).",
)
@click.option(
"-p",
"--dataset-splits",
type=str,
default=["all"],
multiple=True,
help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
"Example: `-p train -p dev -p test`",
)
def spokenwoz(
corpus_dir: Pathlike,
output_dir: Pathlike,
dataset_splits: Sequence[str],
num_jobs: int,
):
"""SpokenWOZ data preparation."""
prepare_spokenwoz(

Check warning on line 38 in lhotse/bin/modes/recipes/spokenwoz.py

View check run for this annotation

Codecov / codecov/patch

lhotse/bin/modes/recipes/spokenwoz.py#L38

Added line #L38 was not covered by tests
corpus_dir,
output_dir=output_dir,
num_jobs=num_jobs,
dataset_splits=dataset_splits,
)


@download.command(context_settings=dict(show_default=True))
@click.argument("target_dir", type=click.Path())
@click.option(
"-p",
"--dataset-parts",
type=str,
default=["all"],
multiple=True,
help="List of dataset parts to download. To prepare multiple parts, pass each with `-p` "
"Example: `-p train_dev -p test`",
)
def spokenwoz(
target_dir: Pathlike,
dataset_parts: Sequence[str],
):
"""SpokenWOZ data download."""
download_spokenwoz(target_dir, dataset_parts=dataset_parts)

Check warning on line 62 in lhotse/bin/modes/recipes/spokenwoz.py

View check run for this annotation

Codecov / codecov/patch

lhotse/bin/modes/recipes/spokenwoz.py#L62

Added line #L62 was not covered by tests
1 change: 1 addition & 0 deletions lhotse/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from .rir_noise import download_rir_noise, prepare_rir_noise
from .speechcommands import download_speechcommands, prepare_speechcommands
from .spgispeech import download_spgispeech, prepare_spgispeech
from .spokenwoz import download_spokenwoz, prepare_spokenwoz
from .stcmds import download_stcmds, prepare_stcmds
from .switchboard import prepare_switchboard
from .tedlium import download_tedlium, prepare_tedlium
Expand Down
Loading
Loading