From 332cb08ac02ba2a88751c1cfb87d252413753720 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Liisa=20R=C3=A4tsep?= <liisa.ratsep@ut.ee>
Date: Tue, 25 May 2021 15:59:17 +0300
Subject: [PATCH] environment specification, estonian test sentences

---
 config/session_paths.yaml  |  2 +-
 config/tts_config_est.yaml | 42 ++++++++++++++++++++++++++++++++++++++
 environment.yml            | 21 +++++++++++++++++++
 test_sentences_est.txt     |  6 ++++++
 4 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100755 config/tts_config_est.yaml
 create mode 100755 environment.yml
 create mode 100755 test_sentences_est.txt

diff --git a/config/session_paths.yaml b/config/session_paths.yaml
index caf249d..993f323 100644
--- a/config/session_paths.yaml
+++ b/config/session_paths.yaml
@@ -3,7 +3,7 @@ wav_directory: '/path/to/wav_directory' # path to directory cointaining the wavs
 metadata_path: '/path/to/metadata.csv'  # name of metadata file under wav_directory
 log_directory: '/path/to/logs_directory'   # weights and logs are stored here
 train_data_directory: 'transformer_tts_data'   # training data is stored here
-data_config: 'config/data_config_est.yaml'
+data_config: 'config/data_config.yaml'
 aligner_config: 'config/aligner_config.yaml'
 tts_config: 'config/tts_config.yaml'
 
diff --git a/config/tts_config_est.yaml b/config/tts_config_est.yaml
new file mode 100755
index 0000000..925e5ed
--- /dev/null
+++ b/config/tts_config_est.yaml
@@ -0,0 +1,42 @@
+# ARCHITECTURE
+decoder_model_dimension: 384
+encoder_model_dimension: 384
+decoder_num_heads: [2, 2, 2, 2, 2, 2]  # the length of this defines the number of layers
+encoder_num_heads: [2, 2, 2, 2, 2, 2]  # the length of this defines the number of layers
+encoder_feed_forward_dimension: null
+decoder_feed_forward_dimension: null
+decoder_prenet_dimension: 384
+encoder_prenet_dimension: 384
+encoder_attention_conv_filters: [1536, 384]
+decoder_attention_conv_filters: [1536, 384]
+encoder_attention_conv_kernel: 3
+decoder_attention_conv_kernel: 3
+encoder_max_position_encoding: 2000
+decoder_max_position_encoding: 10000
+encoder_dense_blocks: 0
+decoder_dense_blocks: 0
+# STATS PREDICTORS ARCHITECTURE
+duration_conv_filters: [256, 226]
+pitch_conv_filters: [256, 226]
+duration_kernel_size: 3
+pitch_kernel_size: 3
+
+# TRAINING
+predictors_dropout: 0.1
+dropout_rate: 0.1
+learning_rate_schedule:
+  - [0, 1.0e-4]
+max_steps: 260_000
+debug: False
+
+# LOGGING
+validation_frequency: 5_000
+prediction_frequency: 5_000
+weights_save_frequency: 5_000
+train_images_plotting_frequency: 1_000
+keep_n_weights: 5
+keep_checkpoint_every_n_hours: 12
+n_steps_avg_losses: [100, 500, 1_000, 5_000]  # command line display of average loss values for the last n steps
+prediction_start_step: 4_000
+text_prediction:
+  - test_sentences_est.txt
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
new file mode 100755
index 0000000..5e155cb
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,21 @@
+channels:
+  - conda-forge
+  - anaconda
+dependencies:
+  - python==3.8
+  - matplotlib==3.2.2
+  - librosa==0.7.1
+  - numba==0.48
+  - numpy==1.17.4
+  - ruamel.yaml==0.16.6
+  - cudnn
+  - tqdm==4.42.1
+  - pysoundfile
+  - scipy
+  - pip
+  - pip:
+      - tensorflow-gpu==2.5.0
+      - webrtcvad
+      - p_tqdm
+      - pyworld
+      - phonemizer==2.2.1
\ No newline at end of file
diff --git a/test_sentences_est.txt b/test_sentences_est.txt
new file mode 100755
index 0000000..70d238d
--- /dev/null
+++ b/test_sentences_est.txt
@@ -0,0 +1,6 @@
+külma on üks kuni viis kraadi ja saartel on õhutemperatuur miinus ühe ja pluss ühe kraadi vahel.
+kanepi läbis austraalias kvalifikatsiooni edukalt ja pääses kolmekümne kahe parema hulka.
+võõra viipekaardi leidnud alaealised lõid laiaks suure summa.
+las vegases lasi mees maha kaks hotelli turvatöötajat.
+muudatuste eesmärk on vähendada haigestumisel töötajate omavastutust ning langetada inimeste haigena tööl käimise riski ning koroonaviiruse levikut.
+"teie räägite hirmsaid asju!" hüüdis perenaine, nagu hakkaks tal õudne, ja villul oli tundmus, et perenaine nihkub talle pisut lähemale.
\ No newline at end of file