diff --git a/egs/multi_en/s5/local/g2p/apply_g2p.sh b/egs/multi_en/s5/local/g2p/apply_g2p.sh
deleted file mode 100755
index 8484155800d..00000000000
--- a/egs/multi_en/s5/local/g2p/apply_g2p.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-# Copyright 2016  Allen Guo
-#           2017  Xiaohui Zhang
-# Apache License 2.0
-
-# This script applies a trained Phonetisarus G2P model to
-# synthesize pronunciations for missing words (i.e., words in
-# transcripts but not the lexicon), and output the expanded lexicon.
-
-var_counts=1
-
-. ./path.sh || exit 1
-. parse_options.sh || exit 1;
-
-if [ $# -ne "4" ]; then
-  echo "Usage: $0 <g2p-model> <g2p-tmp-dir> <current-lexicon> <output-lexicon>"
-  exit 1
-fi
-
-model=$1
-workdir=$2
-lexicon=$3
-outlexicon=$4
-
-mkdir -p $workdir
-
-# awk command from http://stackoverflow.com/questions/2626274/print-all-but-the-first-three-columns
-echo 'Gathering missing words...'
-cat data/*/train/text | \
-  local/count_oovs.pl $lexicon | \
-  awk '{if (NF > 3 ) {for(i=4; i<NF; i++) printf "%s ",$i; print $NF;}}' | \
-  perl -ape 's/\s/\n/g;' | \
-  sort | uniq > $workdir/missing.txt
-cat $workdir/missing.txt | \
-  grep "^[a-z]*$"  > $workdir/missing_onlywords.txt
-
-echo 'Synthesizing pronunciations for missing words...'
-phonetisaurus-apply --nbest $var_counts --model $model --thresh 5 --accumulate --word_list $workdir/missing_onlywords.txt > $workdir/missing_g2p_${var_counts}.txt 
-
-echo "Adding new pronunciations to $lexicon"
-cat "$lexicon" $workdir/missing_g2p_${var_counts}.txt | sort | uniq > $outlexicon
diff --git a/egs/multi_en/s5/local/g2p/train_g2p.sh b/egs/multi_en/s5/local/g2p/train_g2p.sh
deleted file mode 100755
index 43e75f6608d..00000000000
--- a/egs/multi_en/s5/local/g2p/train_g2p.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017  Intellisist, Inc. (Author: Navneeth K)
-#           2017  Xiaohui Zhang
-# Apache License 2.0
-
-# This script trains a g2p model using Phonetisaurus and SRILM.
-
-stage=0
-silence_phones=
-
-echo "$0 $@"  # Print the command line for logging
-
-[ -f ./path.sh ] && . ./path.sh; # source the path.
-. utils/parse_options.sh || exit 1;
-
-
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 <dictdir> <outdir>"
-  exit 1;
-fi
-
-lexicondir=$1
-outdir=$2
-
-[ ! -f $lexicondir/lexicon.txt ] && echo "Cannot find $lexicondir/lexicon.txt" && exit
-
-isuconv=`which uconv`
-if [ -z $isuconv ]; then
-  echo "uconv was not found. You must install the icu4c package."
-  exit 1;
-fi
-
-mkdir -p $outdir
-
-
-# For input lexicon, remove pronunciations containing non-utf-8-encodable characters,
-# and optionally remove words that are mapped to a single silence phone from the lexicon.
-if [ $stage -le 0 ]; then
-  lexicon=$lexicondir/lexicon.txt
-  if [ ! -z "$silence_phones" ]; then
-    awk 'NR==FNR{a[$1] = 1; next} {s=$2;for(i=3;i<=NF;i++) s=s" "$i; if(!(s in a)) print $1" "s}' \
-      $silence_phones $lexicon | \
-      awk '{printf("%s\t",$1); for (i=2;i<NF;i++){printf("%s ",$i);} printf("%s\n",$NF);}' | \
-      uconv -f utf-8  -t utf-8 -x Any-NFC - | awk 'NF > 0'> $outdir/lexicon_tab_separated.txt
-  else
-    awk '{printf("%s\t",$1); for (i=2;i<NF;i++){printf("%s ",$i);} printf("%s\n",$NF);}' $lexicon | \
-      uconv -f utf-8  -t utf-8 -x Any-NFC - | awk 'NF > 0'> $outdir/lexicon_tab_separated.txt
-  fi
-fi
-
-if [ $stage -le 1 ]; then
-  # Align lexicon stage. Lexicon is assumed to have first column tab separated
-  phonetisaurus-align --input=$outdir/lexicon_tab_separated.txt --ofile=${outdir}/aligned_lexicon.corpus || exit 1;
-fi
-
-if [ $stage -le 2 ]; then
-  # Convert aligned lexicon to arpa using srilm.
-  ngram-count -order 7 -kn-modify-counts-at-end -gt1min 0 -gt2min 0 \
-    -gt3min 0 -gt4min 0 -gt5min 0 -gt6min 0 -gt7min 0 -ukndiscount \
-    -text ${outdir}/aligned_lexicon.corpus -lm ${outdir}/aligned_lexicon.arpa
-fi
-
-if [ $stage -le 3 ]; then
-  # Convert the arpa file to FST.
-  phonetisaurus-arpa2wfst --lm=${outdir}/aligned_lexicon.arpa --ofile=${outdir}/model.fst
-fi
diff --git a/egs/multi_en/s5/run.sh b/egs/multi_en/s5/run.sh
index c3eb3503237..f2d29e8de32 100755
--- a/egs/multi_en/s5/run.sh
+++ b/egs/multi_en/s5/run.sh
@@ -137,8 +137,6 @@ if [ $stage -le 4 ]; then
   cat "$lexicon" $g2p_tmp_dir/missing_lexicon.txt | sort | uniq > $expanded_lexicon
 fi
 
-exit 0
-
 # We'll do multiple iterations of pron/sil-prob estimation. So the structure of
 # the dict/lang dirs are designed as ${dict/lang_root}_${dict_affix}, where dict_affix
 # is "nosp" or the name of the acoustic model we use to estimate pron/sil-probs.
diff --git a/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh b/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
index 55eab2748f6..4a0bcb88024 100755
--- a/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
+++ b/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
@@ -72,7 +72,7 @@ fi
 
 if [ $stage -le 2 ]; then
   # Convert aligned lexicon to arpa using make_kn_lm.py, a re-implementation of srilm's ngram-count functionality.
-  ./steps/dict/make_kn_lm.py -ngram-order 7 -text ${wdir}/aligned_lexicon.corpus -lm ${wdir}/aligned_lexicon.arpa
+  ./utils/lang/make_kn_lm.py -ngram-order 7 -text ${wdir}/aligned_lexicon.corpus -lm ${wdir}/aligned_lexicon.arpa
 fi
 
 if [ $stage -le 3 ]; then
diff --git a/egs/wsj/s5/steps/dict/make_kn_lm.py b/egs/wsj/s5/utils/lang/make_kn_lm.py
similarity index 100%
rename from egs/wsj/s5/steps/dict/make_kn_lm.py
rename to egs/wsj/s5/utils/lang/make_kn_lm.py