From 332cb08ac02ba2a88751c1cfb87d252413753720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Liisa=20R=C3=A4tsep?= Date: Tue, 25 May 2021 15:59:17 +0300 Subject: [PATCH] environment specification, estonian test sentences --- config/session_paths.yaml | 2 +- config/tts_config_est.yaml | 42 ++++++++++++++++++++++++++++++++++++++ environment.yml | 21 +++++++++++++++++++ test_sentences_est.txt | 6 ++++++ 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 config/tts_config_est.yaml create mode 100755 environment.yml create mode 100755 test_sentences_est.txt diff --git a/config/session_paths.yaml b/config/session_paths.yaml index caf249d..993f323 100644 --- a/config/session_paths.yaml +++ b/config/session_paths.yaml @@ -3,7 +3,7 @@ wav_directory: '/path/to/wav_directory' # path to directory cointaining the wavs metadata_path: '/path/to/metadata.csv' # name of metadata file under wav_directory log_directory: '/path/to/logs_directory' # weights and logs are stored here train_data_directory: 'transformer_tts_data' # training data is stored here -data_config: 'config/data_config_est.yaml' +data_config: 'config/data_config.yaml' aligner_config: 'config/aligner_config.yaml' tts_config: 'config/tts_config.yaml' diff --git a/config/tts_config_est.yaml b/config/tts_config_est.yaml new file mode 100755 index 0000000..925e5ed --- /dev/null +++ b/config/tts_config_est.yaml @@ -0,0 +1,42 @@ +# ARCHITECTURE +decoder_model_dimension: 384 +encoder_model_dimension: 384 +decoder_num_heads: [2, 2, 2, 2, 2, 2] # the length of this defines the number of layers +encoder_num_heads: [2, 2, 2, 2, 2, 2] # the length of this defines the number of layers +encoder_feed_forward_dimension: null +decoder_feed_forward_dimension: null +decoder_prenet_dimension: 384 +encoder_prenet_dimension: 384 +encoder_attention_conv_filters: [1536, 384] +decoder_attention_conv_filters: [1536, 384] +encoder_attention_conv_kernel: 3 +decoder_attention_conv_kernel: 3 +encoder_max_position_encoding: 2000 +decoder_max_position_encoding: 10000 +encoder_dense_blocks: 0 +decoder_dense_blocks: 0 +# STATS PREDICTORS ARCHITECTURE +duration_conv_filters: [256, 226] +pitch_conv_filters: [256, 226] +duration_kernel_size: 3 +pitch_kernel_size: 3 + +# TRAINING +predictors_dropout: 0.1 +dropout_rate: 0.1 +learning_rate_schedule: + - [0, 1.0e-4] +max_steps: 260_000 +debug: False + +# LOGGING +validation_frequency: 5_000 +prediction_frequency: 5_000 +weights_save_frequency: 5_000 +train_images_plotting_frequency: 1_000 +keep_n_weights: 5 +keep_checkpoint_every_n_hours: 12 +n_steps_avg_losses: [100, 500, 1_000, 5_000] # command line display of average loss values for the last n steps +prediction_start_step: 4_000 +text_prediction: + - test_sentences_est.txt \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100755 index 0000000..5e155cb --- /dev/null +++ b/environment.yml @@ -0,0 +1,21 @@ +channels: + - conda-forge + - anaconda +dependencies: + - python==3.8 + - matplotlib==3.2.2 + - librosa==0.7.1 + - numba==0.48 + - numpy==1.17.4 + - ruamel.yaml==0.16.6 + - cudnn + - tqdm==4.42.1 + - pysoundfile + - scipy + - pip + - pip: + - tensorflow-gpu==2.5.0 + - webrtcvad + - p_tqdm + - pyworld + - phonemizer==2.2.1 \ No newline at end of file diff --git a/test_sentences_est.txt b/test_sentences_est.txt new file mode 100755 index 0000000..70d238d --- /dev/null +++ b/test_sentences_est.txt @@ -0,0 +1,6 @@ +külma on üks kuni viis kraadi ja saartel on õhutemperatuur miinus ühe ja pluss ühe kraadi vahel. +kanepi läbis austraalias kvalifikatsiooni edukalt ja pääses kolmekümne kahe parema hulka. +võõra viipekaardi leidnud alaealised lõid laiaks suure summa. +las vegases lasi mees maha kaks hotelli turvatöötajat. +muudatuste eesmärk on vähendada haigestumisel töötajate omavastutust ning langetada inimeste haigena tööl käimise riski ning koroonaviiruse levikut. +"teie räägite hirmsaid asju!" hüüdis perenaine, nagu hakkaks tal õudne, ja villul oli tundmus, et perenaine nihkub talle pisut lähemale. \ No newline at end of file