PaddlePaddle · kuke · Sep 18, 2017 · Jun 29, 2017 · Jun 29, 2017 · Jun 29, 2017
diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md
@@ -176,6 +176,7 @@ Data augmentation has often been a highly effective technique to boost the deep
 
 Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline.
 
+### Inference
   - Volume Perturbation
   - Speed Perturbation
   - Shifting Perturbation

diff --git a/deep_speech_2/decoders/__init__.py b/deep_speech_2/decoders/__init__.py
diff --git a/deep_speech_2/model_utils/decoder.py → deep_speech_2/decoders/decoder_deprecated.py b/deep_speech_2/model_utils/decoder.py → deep_speech_2/decoders/decoder_deprecated.py
@@ -42,8 +42,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary):
 def ctc_beam_search_decoder(probs_seq,
                             beam_size,
                             vocabulary,
-                            blank_id,
                             cutoff_prob=1.0,
+                            cutoff_top_n=40,
                             ext_scoring_func=None,
                             nproc=False):
     """CTC Beam search decoder.
@@ -66,8 +66,6 @@ def ctc_beam_search_decoder(probs_seq,
     :type beam_size: int
     :param vocabulary: Vocabulary list.
     :type vocabulary: list
-    :param blank_id: ID of blank.
-    :type blank_id: int
     :param cutoff_prob: Cutoff probability in pruning,
                         default 1.0, no pruning.
     :type cutoff_prob: float
@@ -87,9 +85,8 @@ def ctc_beam_search_decoder(probs_seq,
             raise ValueError("The shape of prob_seq does not match with the "
                              "shape of the vocabulary.")
 
-    # blank_id check
-    if not blank_id < len(probs_seq[0]):
-        raise ValueError("blank_id shouldn't be greater than probs dimension")
+    # blank_id assign
+    blank_id = len(vocabulary)
 
     # If the decoder called in the multiprocesses, then use the global scorer
     # instantiated in ctc_beam_search_decoder_batch().
@@ -114,14 +111,15 @@ def ctc_beam_search_decoder(probs_seq,
         prob_idx = list(enumerate(probs_seq[time_step]))
         cutoff_len = len(prob_idx)
         #If pruning is enabled
-        if cutoff_prob < 1.0:
+        if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len:
             prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True)
             cutoff_len, cum_prob = 0, 0.0
             for i in xrange(len(prob_idx)):
                 cum_prob += prob_idx[i][1]
                 cutoff_len += 1
                 if cum_prob >= cutoff_prob:
                     break
+            cutoff_len = min(cutoff_top_n, cutoff_top_n)
             prob_idx = prob_idx[0:cutoff_len]
 
         for l in prefix_set_prev:
@@ -191,9 +189,9 @@ def ctc_beam_search_decoder(probs_seq,
 def ctc_beam_search_decoder_batch(probs_split,
                                   beam_size,
                                   vocabulary,
-                                  blank_id,
                                   num_processes,
                                   cutoff_prob=1.0,
+                                  cutoff_top_n=40,
                                   ext_scoring_func=None):
     """CTC beam search decoder using multiple processes.
 
@@ -204,8 +202,6 @@ def ctc_beam_search_decoder_batch(probs_split,
     :type beam_size: int
     :param vocabulary: Vocabulary list.
     :type vocabulary: list
-    :param blank_id: ID of blank.
-    :type blank_id: int
     :param num_processes: Number of parallel processes.
     :type num_processes: int
     :param cutoff_prob: Cutoff probability in pruning,
@@ -232,8 +228,8 @@ def ctc_beam_search_decoder_batch(probs_split,
     pool = multiprocessing.Pool(processes=num_processes)
     results = []
     for i, probs_list in enumerate(probs_split):
-        args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, None,
-                nproc)
+        args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob,
+                cutoff_top_n, None, nproc)
         results.append(pool.apply_async(ctc_beam_search_decoder, args))
 
     pool.close()

diff --git a/deep_speech_2/model_utils/lm_scorer.py → ...speech_2/decoders/lm_scorer_deprecated.py b/deep_speech_2/model_utils/lm_scorer.py → ...speech_2/decoders/lm_scorer_deprecated.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 
-class LmScorer(object):
+class Scorer(object):
     """External scorer to evaluate a prefix or whole sentence in
        beam search decoding, including the score from n-gram language
        model and word count.

diff --git a/deep_speech_2/decoders/swig/__init__.py b/deep_speech_2/decoders/swig/__init__.py
diff --git a/deep_speech_2/deploy/_init_paths.py → deep_speech_2/decoders/swig/_init_paths.py b/deep_speech_2/deploy/_init_paths.py → deep_speech_2/decoders/swig/_init_paths.py