Merge pull request #1 from david-ryan-snyder/xvector

Xvector
LvHang · Feb 22, 2016 · 33206a0 · 33206a0
2 parents c5e78c7 + fbfc27b
commit 33206a0
Show file tree

Hide file tree

Showing 94 changed files with 3,337 additions and 1,390 deletions.
diff --git a/egs/ami/s5/cmd.sh b/egs/ami/s5/cmd.sh
@@ -1,37 +1,38 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 1G"
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated but it is sometimes still used in nnet1
+# scripts.
+export cuda_cmd="queue.pl --gpu 1 --mem 20G"
+
+# the rest of this file is present for historical reasons.
+# In general it's best to rely on conf/queue.conf for cluster-specific
+# configuration.
 
 # On Eddie use:
 #export train_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=08:00:00"
 #export decode_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=05:00:00 -pe memory-2G 4"
 #export highmem_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=05:00:00 -pe memory-2G 4"
 #export scoring_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=00:20:00"
 
-# JSALT2015 workshop, cluster AWS-EC2, (setup from Vijay)
-export train_cmd="queue.pl -l arch=*64* --mem 1G"
-export decode_cmd="queue.pl -l arch=*64* --mem 2G"
-export highmem_cmd="queue.pl -l arch=*64* --mem 4G"
-export scoring_cmd="queue.pl -l arch=*64*"
-export cuda_cmd="queue.pl --gpu 1 -l mem_free=20G,ram_free=20G"
-export cntk_decode_cmd="queue.pl -l arch=*64* --mem 1G -pe smp 2"
-
-# To run locally, use:
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export highmem_cmd=run.pl
-#export cuda_cmd=run.pl
-
 if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
   # BUT cluster:
   queue="all.q@@blade,all.q@@speech"
-  gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
+  gpu_queue="long.q@@gpu"
   storage="matylda5"
-  export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
-  export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
+  export train_cmd="queue.pl -q $queue -l ram_free=1.5G,mem_free=1.5G,${storage}=1"
+  export decode_cmd="queue.pl -q $queue -l ram_free=2.5G,mem_free=2.5G,${storage}=0.5"
   export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
-fi 
+fi
 
diff --git a/egs/ami/s5/run_ihm.sh b/egs/ami/s5/run_ihm.sh
@@ -10,13 +10,13 @@ mic=ihm
 stage=0
 . utils/parse_options.sh
 
-# Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : 
+# Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :
 # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',
 set -euxo pipefail
 
 # Path where AMI gets downloaded (or where locally available):
-AMI_DIR=$PWD/wav_db # Default, 
-case $(hostname -d) in 
+AMI_DIR=$PWD/wav_db # Default,
+case $(hostname -d) in
   fit.vutbr.cz) AMI_DIR=/mnt/scratch05/iveselyk/KALDI_AMI_WAV ;; # BUT,
   clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
   cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
@@ -86,7 +86,7 @@ if [ $stage -le 5 ]; then
     data/$mic/train data/lang exp/$mic/tri2a exp/$mic/tri2_ali
   # Decode,
   graph_dir=exp/$mic/tri2a/graph_${LM}
-  $highmem_cmd $graph_dir/mkgraph.log \
+  $cmd --mem 4G $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri2a $graph_dir
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
     $graph_dir data/$mic/dev exp/$mic/tri2a/decode_dev_${LM}
@@ -104,26 +104,26 @@ if [ $stage -le 6 ]; then
     data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_ali
   # Decode,
   graph_dir=exp/$mic/tri3a/graph_${LM}
-  $highmem_cmd $graph_dir/mkgraph.log \
+  $cmd --mem 4G $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri3a $graph_dir
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
-    $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM} 
+    $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM}
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
     $graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${LM}
-fi 
+fi
 
 if [ $stage -le 7 ]; then
   # Train tri4a, which is LDA+MLLT+SAT,
   steps/train_sat.sh  --cmd "$train_cmd" \
     5000 80000 data/$mic/train data/lang exp/$mic/tri3a_ali exp/$mic/tri4a
-  # Decode,  
+  # Decode,
   graph_dir=exp/$mic/tri4a/graph_${LM}
   $highmem_cmd $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri4a $graph_dir
   steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd"  --config conf/decode.conf \
-    $graph_dir data/$mic/dev exp/$mic/tri4a/decode_dev_${LM} 
+    $graph_dir data/$mic/dev exp/$mic/tri4a/decode_dev_${LM}
   steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
-    $graph_dir data/$mic/eval exp/$mic/tri4a/decode_eval_${LM} 
+    $graph_dir data/$mic/eval exp/$mic/tri4a/decode_eval_${LM}
 fi
 
 nj_mmi=80
@@ -160,11 +160,11 @@ if [ $stage -le 11 ]; then
     decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_dev_${i}.mdl_${LM}
     steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
       --transform-dir exp/$mic/tri4a/decode_dev_${LM} --iter $i \
-      $graph_dir data/$mic/dev $decode_dir 
+      $graph_dir data/$mic/dev $decode_dir
     decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_eval_${i}.mdl_${LM}
     steps/decode.sh --nj $nj --cmd "$decode_cmd"  --config conf/decode.conf \
       --transform-dir exp/$mic/tri4a/decode_eval_${LM} --iter $i \
-      $graph_dir data/$mic/eval $decode_dir 
+      $graph_dir data/$mic/eval $decode_dir
   done
 fi
 
@@ -181,7 +181,7 @@ if [ $stage -le 13 ]; then
     --hidden-dim 950 \
     --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
     --use-sat-alignments true
-  
+
   local/online/run_nnet2_ms_sp_disc.sh  \
     --mic $mic  \
     --gmm-dir exp/$mic/tri4a \

diff --git a/egs/aurora4/s5/cmd.sh b/egs/aurora4/s5/cmd.sh
@@ -1,29 +1,18 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
 
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64 --mem 2G"
-export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G"
-export big_memory_cmd="queue.pl -l arch=*64 --mem 8G"
+export train_cmd="queue.pl --mem 4G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
+# the use of cuda_cmd is deprecated but it's still used in some example scripts
+# here.
 export cuda_cmd="queue.pl --gpu 1"
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
diff --git a/egs/babel/s5/cmd.sh b/egs/babel/s5/cmd.sh
@@ -1,29 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="..."
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/babel/s5b/cmd.sh b/egs/babel/s5b/cmd.sh
@@ -1,29 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="..."
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 4G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/babel/s5c/cmd.sh b/egs/babel/s5c/cmd.sh
@@ -1,29 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="..."
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/bn_music_speech/v1/cmd.sh b/egs/bn_music_speech/v1/cmd.sh
@@ -1,17 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64*"
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-
-#c) run it locally...
-#export train_cmd=run.pl
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 4G"
 
 
diff --git a/egs/callhome_egyptian/s5/cmd.sh b/egs/callhome_egyptian/s5/cmd.sh
@@ -1,18 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#train_cmd='queue.pl -q all.q@a03.clsp.jhu.edu,all.q@a06.clsp.jhu.edu,all.q@a05.clsp.jhu.edu,all.q@v01.clsp.jhu.edu,all.q@a10.clsp.jhu.edu,all.q@a04.clsp.jhu.edu,all.q@a13.clsp.jhu.edu,all.q@a11.clsp.jhu.edu -l arch=*64'
-#decode_cmd='queue.pl -q all.q@a03.clsp.jhu.edu,all.q@a06.clsp.jhu.edu,all.q@a05.clsp.jhu.edu,all.q@v01.clsp.jhu.edu,all.q@a10.clsp.jhu.edu,all.q@a04.clsp.jhu.edu,all.q@a13.clsp.jhu.edu,all.q@a11.clsp.jhu.edu -l arch=*64'
-train_cmd="queue.pl -l arch=*64"
-decode_cmd="queue.pl -l arch=*64"
-#train_cmd="run.pl"
-# Do training locally.  Note: for jobs on smallish subsets,
-# it's way faster to run on a single machine with a handful of CPUs, as
-# you avoid the latency of starting GridEngine jobs.
-
-
-
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"