diff --git a/.gitignore b/.gitignore
index cdcd13ec8b5..7317468dba0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,10 +73,10 @@ GSYMS
 /src/kaldi.mk.bak
 
 # /egs/
-/egs/*/s*/mfcc
-/egs/*/s*/plp
-/egs/*/s*/exp
-/egs/*/s*/data
+/egs/*/*/mfcc
+/egs/*/*/plp
+/egs/*/*/exp
+/egs/*/*/data
 
 # /tools/
 /tools/pocolm/
diff --git a/egs/bentham/README.txt b/egs/bentham/README.txt
new file mode 100644
index 00000000000..02870c265f6
--- /dev/null
+++ b/egs/bentham/README.txt
@@ -0,0 +1,5 @@
+This directory contains example scripts for handwriting recognition on
+the Bentham dataset:
+http://www.transcriptorium.eu/~htrcontest/contestICFHR2014/public_html/
+In the ICFHR 2014 contest, the best performing system in the unrestricted 
+track obtained a WER of 8.6%.
diff --git a/egs/bentham/v1/cmd.sh b/egs/bentham/v1/cmd.sh
new file mode 100755
index 00000000000..3c8eb9f93a5
--- /dev/null
+++ b/egs/bentham/v1/cmd.sh
@@ -0,0 +1,13 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export cmd="queue.pl"
diff --git a/egs/bentham/v1/image b/egs/bentham/v1/image
new file mode 120000
index 00000000000..6a4b3afeb09
--- /dev/null
+++ b/egs/bentham/v1/image
@@ -0,0 +1 @@
+../../cifar/v1/image
\ No newline at end of file
diff --git a/egs/bentham/v1/local/chain/compare_wer.sh b/egs/bentham/v1/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..2ce14e13694
--- /dev/null
+++ b/egs/bentham/v1/local/chain/compare_wer.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/cnn{1a,1b}"
+  exit 1
+fi
+. ./path.sh
+
+echo "# $0 $*"
+used_epochs=false
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+echo -n "# WER                        "
+for x in $*; do
+  wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored)             "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_test_rescored ] && wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER                        "
+for x in $*; do
+  cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored)             "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_test_rescored ] && cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# WER val                    "
+for x in $*; do
+  wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored) val         "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_val_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER val                    "
+for x in $*; do
+  cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored) val         "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_val_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Parameters                 "
+for x in $*; do
+  params=$(nnet3-info $x/final.mdl 2>/dev/null | grep num-parameters | cut -d' ' -f2 | awk '{printf "%0.2fM\n",$1/1000000}')
+  printf "% 10s" $params
+done
+echo
diff --git a/egs/bentham/v1/local/chain/run_cnn_e2eali.sh b/egs/bentham/v1/local/chain/run_cnn_e2eali.sh
new file mode 120000
index 00000000000..e2545b0186e
--- /dev/null
+++ b/egs/bentham/v1/local/chain/run_cnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/bentham/v1/local/chain/run_e2e_cnn.sh b/egs/bentham/v1/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..d26ba0182ce
--- /dev/null
+++ b/egs/bentham/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
new file mode 100755
index 00000000000..6bac5a22398
--- /dev/null
+++ b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ exp/chain/cnn_e2eali_1a
+# System                      e2e_cnn_1a cnn_e2eali_1a
+# WER                             13.72      8.14
+# WER (rescored)                  13.40      8.00
+# CER                              6.56      2.82
+# CER (rescored)                   6.33      2.73
+# WER val                         13.51      8.19
+# WER (rescored) val              13.38      7.97
+# CER val                          6.40      2.93
+# CER (rescored) val               6.29      2.90
+# Final train prob               0.1037   -0.0613
+# Final valid prob               0.0720   -0.0988
+# Final train prob (xent)                 -0.3706
+# Final valid prob (xent)                 -0.4669
+# Parameters                     11.54M     4.29M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a
+# exp/chain/cnn_e2eali_1a: num-iters=20 nj=3..5 num-params=4.3M dim=40->336 combine=-0.066->-0.066 (over 1) xent:train/valid[12,19,final]=(-0.822,-0.437,-0.371/-0.859,-0.514,-0.467) logprob:train/valid[12,19,final]=(-0.188,-0.078,-0.061/-0.204,-0.114,-0.099)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+decode_val=true
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=true
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=5 \
+    --trainer.frames-per-iter=1500000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=5 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context $chunk_left_context \
+      --extra-right-context $chunk_right_context \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
+fi
+
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
new file mode 100755
index 00000000000..716bdce3729
--- /dev/null
+++ b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1b
+# WER                             13.72
+# WER (rescored)                  13.40
+# CER                              6.56
+# CER (rescored)                   6.33
+# WER val                         13.51
+# WER (rescored) val              13.38
+# CER val                          6.40
+# CER (rescored) val               6.29
+# Final train prob               0.1037
+# Final valid prob               0.0720
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                     11.54M
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=26 nj=2..4 num-params=11.5M dim=40->17112 combine=0.054->0.054 (over 1) logprob:train/valid[16,25,final]=(0.078,0.102,0.104/0.051,0.069,0.072)
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+nj=30
+
+# training options
+tdnn_dim=450
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+train_set=train
+decode_val=true
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 4 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1
+  done
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/bentham/v1/local/check_tools.sh b/egs/bentham/v1/local/check_tools.sh
new file mode 100755
index 00000000000..5b4d3107d3b
--- /dev/null
+++ b/egs/bentham/v1/local/check_tools.sh
@@ -0,0 +1,43 @@
+#!/bin/bash -u
+
+# Copyright 2015 (c) Johns Hopkins University (Jan Trmal <jtrmal@gmail.com>)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+[ -f ./path.sh ] && . ./path.sh
+set +e
+
+command -v python3 >&/dev/null \
+  || { echo  >&2 "python3 not found on PATH. You will have to install Python3, preferably >= 3.6"; exit 1; }
+
+python3 -c "import numpy"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs numpy installed."
+  exit 1
+fi
+
+python3 -c "import scipy"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs scipy installed."
+  exit 1
+fi
+
+python3 -c "import scipy.misc; scipy.misc.__dict__['imread']"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs scipy-image and  Pillow installed."
+  exit 1
+fi
+
+
+exit  0
diff --git a/egs/bentham/v1/local/create_splits.sh b/egs/bentham/v1/local/create_splits.sh
new file mode 100755
index 00000000000..93e8bf1b12e
--- /dev/null
+++ b/egs/bentham/v1/local/create_splits.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright   2018   Desh Raj (Johns Hopkins University) 
+
+# This script reads the extracted Bentham database files and creates
+#    the following files (for all the data subsets):
+#    text, utt2spk, images.scp.
+
+download_dir=$1
+save_dir=$2
+mkdir -p $save_dir/{train,val,test}
+touch $save_dir/{train,val,test}/{text,images.scp,utt2spk,spk2utt}
+
+partition_dir=$download_dir"/gt/Partitions/"
+lines_dir=$download_dir"/gt/Images/Lines/"
+text_dir=$download_dir"/gt/Transcriptions/" 
+
+function split {
+	echo "Creating $1 split"
+	split_dir=$save_dir/$1
+	line_file=$partition_dir/$2
+
+	while read -r line; do
+	    name="$line"
+        spkid=${name:0:11}
+        echo -n $name" " | cat - $text_dir/$name* >> $split_dir/text
+        echo >> $split_dir/text
+        echo $name $lines_dir"/"$name".png" >> $split_dir/images.scp
+        echo $name $spkid >> $split_dir/utt2spk 
+	done < "$line_file"
+    
+    sed -i '/^\s*$/d' $split_dir/images.scp
+    sed -i '/^\s*$/d' $split_dir/text
+    sed -i '/^\s*$/d' $split_dir/utt2spk
+    utils/utt2spk_to_spk2utt.pl $split_dir/utt2spk > $split_dir/spk2utt
+}
+
+split train TrainLines.lst
+split val ValidationLines.lst
+split test TestLines.lst
diff --git a/egs/bentham/v1/local/download_bentham_text.sh b/egs/bentham/v1/local/download_bentham_text.sh
new file mode 100755
index 00000000000..e09403718a1
--- /dev/null
+++ b/egs/bentham/v1/local/download_bentham_text.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright    2018   Desh Raj
+# Apache 2.0
+
+## Download all written works of Jeremy Bentham for the Bentham HWR task LM training
+
+baseurl='http://oll.libertyfund.org/titles/'
+savedir=$1
+
+mkdir -p $savedir
+
+declare -a texts=("bentham-the-works-of-jeremy-bentham-vol-1/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-2/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-3/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-5-scotch-reform-real-property-codification-petitions/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-6/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-7-rationale-of-judicial-evidence-part-2/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-8/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-9-constitutional-code"
+                "bentham-the-works-of-jeremy-bentham-vol-10-memoirs-part-i-and-correspondence/simple"
+                "bentham-the-works-of-jeremy-bentham-vol-11-memoirs-of-bentham-part-ii-and-analytical-index")
+
+counter=1
+for i in "${texts[@]}"
+do
+    echo "Downloading $baseurl$i"
+    curl -s -N {$baseurl}{$i} | sed -e 's/<[^>]*>//g' > $savedir"/bentham"$counter".txt"
+    ((counter++))
+done
+
+cat $savedir"/*.txt" > $savedir"/complete.txt"
+rm $savedir"/bentham*.txt"
diff --git a/egs/bentham/v1/local/extract_features.sh b/egs/bentham/v1/local/extract_features.sh
new file mode 100755
index 00000000000..460e467e99c
--- /dev/null
+++ b/egs/bentham/v1/local/extract_features.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment='no_aug'
+fliplr=false
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \
+    --num-channels 4 \
+    --feat-dim $feat_dim --fliplr $fliplr --augment_type $augment \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/bentham/v1/local/gen_topo.py b/egs/bentham/v1/local/gen_topo.py
new file mode 100755
index 00000000000..540bfbcf270
--- /dev/null
+++ b/egs/bentham/v1/local/gen_topo.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Copyright 2017 (author: Chun-Chieh Chang)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs. This is a modified version of
+# 'utils/gen_topo.pl'. The difference is that this creates two topologies for
+# the non-silence HMMs. The number of states for punctuations is different than
+# the number of states for other characters.
+
+from __future__ import print_function
+import argparse
+import string
+
+parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
+                                             "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
+                                             "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
+                                 epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
+parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
+parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
+parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
+parser.add_argument("nonsilence_phones", type=str,
+                    help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
+parser.add_argument("silence_phones", type=str,
+                    help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
+parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+
+args = parser.parse_args()
+
+silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
+nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
+all_phones = silence_phones +  nonsilence_phones
+
+punctuation_phones = []
+exclude = set("!(),.?;:'-\"")
+with open(args.phone_list) as f:
+    for line in f:
+        line = line.strip()
+        phone = line.split(' ')[0]
+        if len(phone) == 1 and phone in exclude:
+            punctuation_phones.append(int(line.split(' ')[1]))
+# For nonsilence phones that are not punctuations
+print("<Topology>")
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x not in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_nonsil_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_nonsil_states) + " </State>")
+print("</TopologyEntry>")
+
+# For nonsilence phones that ar punctuations
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_punctuation_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_punctuation_states) + " </State>")
+print("</TopologyEntry>")
+
+# For silence phones
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in silence_phones]))
+print("</ForPhones>")
+if(args.num_sil_states > 1):
+    transp = 1.0 / (args.num_sil_states - 1)
+    
+    state_str = "<State> 0 <PdfClass> 0 "
+    for x in range(0, (args.num_sil_states - 1)):
+        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+    state_str = state_str + "</State>"
+    print(state_str)
+
+    for x in range(1, (args.num_sil_states - 1)):
+        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        for y in range(1, args.num_sil_states):
+            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+        state_str = state_str + "</State>"
+        print(state_str)
+    second_last = args.num_sil_states - 1
+    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+else:
+    print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+print("</TopologyEntry>")
+print("</Topology>")
diff --git a/egs/bentham/v1/local/prepare_data.sh b/egs/bentham/v1/local/prepare_data.sh
new file mode 100755
index 00000000000..bbcc9863611
--- /dev/null
+++ b/egs/bentham/v1/local/prepare_data.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright      2018  Desh Raj (Johns Hopkins University) 
+
+# Apache 2.0
+
+# This script downloads the Bentham handwriting database and prepares the training
+# and test data (i.e text, images.scp, utt2spk and spk2utt) by calling create_splits.sh.
+
+# In addition, it downloads data for all texts of Bentham for LM training purpose.
+
+stage=0
+download_dir=data/local/download/
+database_dir=""
+text_corpus_dir=""
+
+mkdir -p $download_dir
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+BENTHAM_IMAGES_URL='http://transcriptorium.eu/~tsdata/BenthamR0/BenthamDatasetR0-Images.zip'
+BENTHAM_GT_URL='http://transcriptorium.eu/~tsdata/BenthamR0/BenthamDatasetR0-GT.zip'
+bentham_images=$database_dir"/images.zip"
+bentham_gt=$database_dir"/gt.zip"
+bentham_text=$download_dir"/text"
+
+# download and extract images and transcriptions
+if [ ! -f $bentham_images ]; then
+  echo "Downloading images and transcriptions to $database_dir"
+  mkdir -p $database_dir
+  wget $BENTHAM_IMAGES_URL -O $bentham_images
+  wget $BENTHAM_GT_URL -O $bentham_gt
+else
+  echo "Not downloading since corpus already exists"
+fi
+
+if [ ! -d $download_dir/"gt" ]; then
+  unzip $bentham_gt -d $download_dir
+  mv $download_dir"/BenthamDatasetR0-GT" $download_dir"/gt"
+else
+  echo "Local extracted corpus already exists"
+fi
+
+# Download extra Bentham text for LM training
+if [ -d $text_corpus_dir ]; then
+  echo "$0: Not downloading Bentham text corpus as it is already there."
+else
+  local/download_bentham_text.sh $text_corpus_dir
+fi
+
+# Copy extra Bentham text to local
+if [ -d $bentham_text ]; then
+  echo "$0: Not copying as local Bentham already present."
+else
+  mkdir -p $bentham_text
+  cp $text_corpus_dir/Bentham-Text/* $bentham_text
+  echo "$0: Done copying extra Bentham text to local."
+fi
+
+# Creating train, val, and test splits for all directories
+if [ -d data/train ]; then
+  echo "Data splits and files already exist. Not creating again."
+else
+  echo "Creating train, val, and test splits and corresponding files.."
+  local/create_splits.sh $download_dir "data/"
+fi
+
diff --git a/egs/bentham/v1/local/prepare_dict.sh b/egs/bentham/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..22db5ae834d
--- /dev/null
+++ b/egs/bentham/v1/local/prepare_dict.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Copyright      2017  Hossein Hadian
+#                2017  Babak Rekabdar
+#                2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+# This script prepares the dictionary.
+
+set -e
+dir=data/local/dict
+. ./utils/parse_options.sh || exit 1;
+
+mkdir -p $dir
+
+local/prepare_lexicon.py $dir
+
+cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1;
+
+echo '<sil> SIL' >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/bentham/v1/local/prepare_lexicon.py b/egs/bentham/v1/local/prepare_lexicon.py
new file mode 100755
index 00000000000..3de96056c2a
--- /dev/null
+++ b/egs/bentham/v1/local/prepare_lexicon.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Babak Rekabdar
+#                2017  Hossein Hadian
+#                2017  Chun Chieh Chang
+#                2017  Ashish Arora
+# Apache 2.0
+
+# This script prepares lexicon for BPE. It gets the set of all words that occur in data/train/text.
+# Since this lexicon is based on BPE, it replaces '|' with silence.
+
+import argparse
+import os
+
+parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+parser.add_argument('dir', type=str, help='output path')
+args = parser.parse_args()
+
+### main ###
+lex = {}
+text_path = os.path.join('data', 'train', 'text')
+with open(text_path, 'r', encoding='utf-8') as f:
+    for line in f:
+        line_vect = line.strip().split(' ')
+        for i in range(1, len(line_vect)):
+            characters = list(line_vect[i])
+            characters = " ".join([ 'SIL' if char == '|' else char for char in characters])
+            characters = list(characters)
+            characters = "".join([ '<HASH>' if char == '#' else char for char in characters])
+            lex[line_vect[i]] = characters
+
+with open(os.path.join(args.dir, 'lexicon.txt'), 'w', encoding='utf-8') as fp:
+    for key in sorted(lex):
+        fp.write(key + " " + lex[key] + "\n")
diff --git a/egs/bentham/v1/local/score.sh b/egs/bentham/v1/local/score.sh
new file mode 100755
index 00000000000..1d84815fc69
--- /dev/null
+++ b/egs/bentham/v1/local/score.sh
@@ -0,0 +1,6 @@
+
+#!/bin/bash
+
+
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/bentham/v1/local/train_lm.sh b/egs/bentham/v1/local/train_lm.sh
new file mode 100755
index 00000000000..48632a90769
--- /dev/null
+++ b/egs/bentham/v1/local/train_lm.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+# Copyright 2016  Vincent Nguyen
+#           2016  Johns Hopkins University (author: Daniel Povey)
+#           2017  Ashish Arora
+#           2017  Hossein Hadian
+#           2018  Desh Raj
+# Apache 2.0
+#
+# This script trains an LM on the Bentham text corpus and training transcriptions.
+# It is based on the example scripts distributed with PocoLM
+
+# It will check if pocolm is installed and if not will proceed with installation
+
+set -e
+stage=0
+vocab_size=50000
+
+echo "$0 $@"  # Print the command line for logging
+. ./utils/parse_options.sh || exit 1;
+
+dir=data/local/local_lm
+lm_dir=${dir}/data
+bentham_text_dir=data/local/download/text/
+
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+( # First make sure the pocolm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d pocolm ]; then
+   echo Not installing the pocolm toolkit since it is already there.
+ else
+   echo "$0: Please install the PocoLM toolkit with: "
+   echo " cd ../../../tools; extras/install_pocolm.sh; cd -"
+   exit 1;
+ fi
+) || exit 1;
+
+bypass_metaparam_optim_opt=
+# If you want to bypass the metaparameter optimization steps with specific metaparameters
+# un-comment the following line, and change the numbers to some appropriate values.
+# You can find the values from output log of train_lm.py.
+# These example numbers of metaparameters is for 4-gram model (with min-counts)
+# running with train_lm.py.
+# The dev perplexity should be close to the non-bypassed model.
+#bypass_metaparam_optim_opt=
+# Note: to use these example parameters, you may need to remove the .done files
+# to make sure the make_lm_dir.py be called and tain only 3-gram model
+#for order in 3; do
+#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
+
+if [ $stage -le 0 ]; then
+  mkdir -p ${dir}/data
+  mkdir -p ${dir}/data/text
+
+  echo "$0: Getting the Data sources"
+
+  rm ${dir}/data/text/* 2>/dev/null || true
+
+  # Using Bentham text with last 5000 lines for dev
+
+  cat $bentham_text_dir/complete.txt | \
+    sed '/^\s*$/d' | \
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+    | sed 's/@@//g' > ${dir}/bentham.txt
+  tail -n +5000 ${dir}/bentham.txt > ${dir}/data/text/bentham.txt
+
+  # use the validation data as the dev set.
+  # Note: the name 'dev' is treated specially by pocolm, it automatically
+  # becomes the dev set.
+  head -5000 ${dir}/bentham.txt > ${dir}/data/text/dev.txt
+
+  # use the training data as an additional data source.
+  # we can later fold the dev data into this.
+  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/hwr.txt
+
+  # for reporting perplexities, we'll use the "real" dev set.
+  # (the validation data is used as ${dir}/data/text/dev.txt to work
+  # out interpolation weights.)
+  # note, we can't put it in ${dir}/data/text/, because then pocolm would use
+  # it as one of the data sources.
+  cut -d " " -f 2-  < data/val/text  > ${dir}/data/real_dev_set.txt
+
+  # get the wordlist from Bentham text
+  cat ${dir}/data/text/{bentham,hwr}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  head -n $vocab_size ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
+fi
+
+order=6
+
+if [ $stage -le 1 ]; then
+  # decide on the vocabulary.
+  # Note: you'd use --wordlist if you had a previously determined word-list
+  # that you wanted to use.
+  # Note: if you have more than one order, use a certain amount of words as the
+  # vocab and want to restrict max memory for 'sort',
+  echo "$0: training the unpruned LM"
+  min_counts='bentham=1 hwr=1'
+  wordlist=${dir}/data/wordlist
+
+  lm_name="`basename ${wordlist}`_${order}"
+  if [ -n "${min_counts}" ]; then
+    lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
+  fi
+  unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+
+  train_lm.py  --wordlist=${wordlist} --num-splits=10 --warm-start-ratio=20  \
+               --limit-unk-history=true \
+               ${bypass_metaparam_optim_opt} \
+               ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
+
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: pruning the LM (to larger size)"
+  # Using 1 million n-grams for a big LM for rescoring purposes.
+  size=1000000
+  prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: pruning the LM (to smaller size)"
+  # Using 500,000 n-grams for a smaller LM for graph building.  Prune from the
+  # bigger-pruned LM, it'll be faster.
+  size=500000
+  prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
+
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
+fi
diff --git a/egs/bentham/v1/local/wer_output_filter b/egs/bentham/v1/local/wer_output_filter
new file mode 100755
index 00000000000..24691a160a9
--- /dev/null
+++ b/egs/bentham/v1/local/wer_output_filter
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# Copyright      2017  Hossein Hadian
+
+# This is a filter used in scoring. It separates all
+# punctuations from words. For e.g. this sentence:
+
+# "They have come!" he said reverently, gripping his
+# hands. "Isn't it a glorious thing! Long awaited."
+
+# is converted to this:
+
+# " They have come ! " he said reverently , gripping his
+# hands . " Isn ' t it a glorious thing ! Long awaited . "
+
+# Sample BPE-based output:
+# |He |ro se |from |his |b re ak f as t - s ch oo l |b en ch
+
+import sys
+import re
+
+punctuations = "!(),.?;:'-\""
+escaped_punctuations = re.escape(punctuations)
+
+for line in sys.stdin:
+  words = line.strip().split()
+  uttid = words[0]
+  transcript = ''.join(words[1:])
+  transcript = transcript.replace('|', ' ')
+  split_transcript = " ".join(re.split("([{}])".format(escaped_punctuations),
+                                       transcript)).strip()
+  print("{} {}".format(uttid, split_transcript))
diff --git a/egs/bentham/v1/path.sh b/egs/bentham/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/bentham/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/bentham/v1/run_end2end.sh b/egs/bentham/v1/run_end2end.sh
new file mode 100755
index 00000000000..63c034e41f6
--- /dev/null
+++ b/egs/bentham/v1/run_end2end.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# Copyright     2018    Ashish Arora (Johns Hopkins University)
+#               2018    Desh Raj (Johns Hopkins University)
+
+set -e
+stage=0
+nj=20
+# bentham_hwr_database points to the official database path on the JHU grid. If you have not
+# already downloaded the data, you will have to first download it and then name the Images
+# and Ground Truth zipped files as images.zip and gt.zip. Then, point the path below to the
+# location where your zipped files are present on the grid.
+bentham_hwr_database=/export/corpora5/handwriting_ocr/hwr1/ICDAR-HTR-Competition-2015
+# bentham_text_database points to the database path on the JHU grid.
+# It contains all of the written works of Bentham, and can be used to train
+# an LM for the HWR task. We have provided a script which downloads the data
+# and saves it to the location provided below.
+bentham_text_corpus=/export/corpora5/handwriting_ocr/hwr1/ICDAR-HTR-Competition-2015/Bentham-Text
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+
+
+./local/check_tools.sh
+
+if [ $stage -le 0 ]; then
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --database-dir $bentham_hwr_database \
+    --text-corpus-dir $bentham_text_corpus
+fi
+
+if [ $stage -le 1 ]; then
+  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  echo "$(date) Extracting features, creating feats.scp file"
+  for dataset in train val test; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/$dataset  
+    steps/compute_cmvn_stats.sh data/$dataset
+  done
+  utils/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
+  # getting non-silence phones.
+  cut -d' ' -f2- data/train/text | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+for phone in phone_dict.keys():
+    output.write(phone+ '\n');
+END
+  ) > data/local/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/train_data.txt
+  cat data/local/phones.txt data/local/train_data.txt | \
+    utils/lang/bpe/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+  for set in test train val; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
+    data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+    data/lang data/lang_rescore_6g
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Calling the flat-start chain recipe..."
+  local/chain/run_e2e_cnn.sh
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  local/chain/run_cnn_e2eali.sh
+fi
diff --git a/egs/bentham/v1/steps b/egs/bentham/v1/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/bentham/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/bentham/v1/utils b/egs/bentham/v1/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/bentham/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/cifar/v1/image/get_allowed_lengths.py b/egs/cifar/v1/image/get_allowed_lengths.py
index 02321fdd2df..44e17028695 100755
--- a/egs/cifar/v1/image/get_allowed_lengths.py
+++ b/egs/cifar/v1/image/get_allowed_lengths.py
@@ -117,7 +117,7 @@ def find_allowed_durations(start_len, end_len, args):
                           (length // args.frame_subsampling_factor))
             allowed_lengths.append(length)
             fp.write("{}\n".format(int(length)))
-            length *= args.factor
+            length = max(length * args.factor, length + args.frame_subsampling_factor)
     return allowed_lengths
 
 
diff --git a/egs/cifar/v1/image/ocr/make_features.py b/egs/cifar/v1/image/ocr/make_features.py
index a11cbcc7a82..aa909f596c9 100755
--- a/egs/cifar/v1/image/ocr/make_features.py
+++ b/egs/cifar/v1/image/ocr/make_features.py
@@ -4,6 +4,7 @@
 #                2017  Ashish Arora
 #                2017  Yiwen Shao
 #                2018  Hossein Hadian
+#                2018  Desh Raj
 
 """ This script converts images to Kaldi-format feature matrices. The input to
     this script is the path to a data directory, e.g. "data/train". This script
@@ -88,10 +89,16 @@ def horizontal_pad(im, allowed_lengths = None):
         left_padding = int(padding // 2)
         right_padding = padding - left_padding
     dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding, args.num_channels),
-                                           dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding, args.num_channels),
-                                                    dtype=int)), axis=1)
+    if args.num_channels in [1,4]:
+        im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
+                                               dtype=int), im), axis=1)
+        im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
+                                                        dtype=int)), axis=1)
+    else:
+        im_pad = np.concatenate((255 * np.ones((dim_y, left_padding, args.num_channels),
+                                               dtype=int), im), axis=1)
+        im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding, args.num_channels),
+                                                        dtype=int)), axis=1)
     return im_pad1
 
 def get_scaled_image_aug(im, mode='normal'):
@@ -169,7 +176,10 @@ def vertical_shift(im, mode='normal'):
         line_vect = line.split(' ')
         image_id = line_vect[0]
         image_path = line_vect[1]
-        im = misc.imread(image_path)
+        if args.num_channels == 4:
+            im = misc.imread(image_path, mode='L')
+        else:
+            im = misc.imread(image_path)
         if args.fliplr:
             im = np.fliplr(im)
         if args.augment_type == 'no_aug' or 'random_shift':
@@ -184,7 +194,7 @@ def vertical_shift(im, mode='normal'):
             im = vertical_shift(im, 'normal')
         elif args.augment_type == 'random_shift':
             im = vertical_shift(im, 'notmid')
-        if args.num_channels == 1:
+        if args.num_channels in [1,4]:
             data = np.transpose(im, (1, 0))
         elif args.num_channels == 3:
             H = im.shape[0]