-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add optimized decoder for the deployment of DS2 #139
Changes from 39 commits
724b0fb
348d6bb
59b4b87
a506198
903c300
1ca3814
34f98e0
ae05535
4e5b345
908932f
ac3a49c
9ff48b0
32047c7
f41375b
a96c650
3441148
89c4a96
bbbc988
d68732b
955d293
20d13a4
09f4c6e
b5c4d83
202a06a
beb0c07
103a6ac
efc5d9b
5a318e9
f8c7d46
e49f505
d75f27d
41e9e59
c4bc822
52a862d
552dd52
0bda37c
902c35b
bb35363
15728d0
e6740af
8c5576d
bcc236e
98d35b9
d7a9752
cc2f91f
f1cd672
cfecaa8
9db0d25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -176,6 +176,7 @@ Data augmentation has often been a highly effective technique to boost the deep | |
|
||
Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline. | ||
|
||
### Inference | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove L179 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
- Volume Perturbation | ||
- Speed Perturbation | ||
- Shifting Perturbation | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,8 +42,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary): | |
def ctc_beam_search_decoder(probs_seq, | ||
beam_size, | ||
vocabulary, | ||
blank_id, | ||
cutoff_prob=1.0, | ||
cutoff_top_n=40, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a param used by Mandarin vocabulary cutoff |
||
ext_scoring_func=None, | ||
nproc=False): | ||
"""CTC Beam search decoder. | ||
|
@@ -66,8 +66,6 @@ def ctc_beam_search_decoder(probs_seq, | |
:type beam_size: int | ||
:param vocabulary: Vocabulary list. | ||
:type vocabulary: list | ||
:param blank_id: ID of blank. | ||
:type blank_id: int | ||
:param cutoff_prob: Cutoff probability in pruning, | ||
default 1.0, no pruning. | ||
:type cutoff_prob: float | ||
|
@@ -87,9 +85,8 @@ def ctc_beam_search_decoder(probs_seq, | |
raise ValueError("The shape of prob_seq does not match with the " | ||
"shape of the vocabulary.") | ||
|
||
# blank_id check | ||
if not blank_id < len(probs_seq[0]): | ||
raise ValueError("blank_id shouldn't be greater than probs dimension") | ||
# blank_id assign | ||
blank_id = len(vocabulary) | ||
|
||
# If the decoder called in the multiprocesses, then use the global scorer | ||
# instantiated in ctc_beam_search_decoder_batch(). | ||
|
@@ -114,14 +111,15 @@ def ctc_beam_search_decoder(probs_seq, | |
prob_idx = list(enumerate(probs_seq[time_step])) | ||
cutoff_len = len(prob_idx) | ||
#If pruning is enabled | ||
if cutoff_prob < 1.0: | ||
if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len: | ||
prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True) | ||
cutoff_len, cum_prob = 0, 0.0 | ||
for i in xrange(len(prob_idx)): | ||
cum_prob += prob_idx[i][1] | ||
cutoff_len += 1 | ||
if cum_prob >= cutoff_prob: | ||
break | ||
cutoff_len = min(cutoff_top_n, cutoff_top_n) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could move There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
prob_idx = prob_idx[0:cutoff_len] | ||
|
||
for l in prefix_set_prev: | ||
|
@@ -191,9 +189,9 @@ def ctc_beam_search_decoder(probs_seq, | |
def ctc_beam_search_decoder_batch(probs_split, | ||
beam_size, | ||
vocabulary, | ||
blank_id, | ||
num_processes, | ||
cutoff_prob=1.0, | ||
cutoff_top_n=40, | ||
ext_scoring_func=None): | ||
"""CTC beam search decoder using multiple processes. | ||
|
||
|
@@ -204,8 +202,6 @@ def ctc_beam_search_decoder_batch(probs_split, | |
:type beam_size: int | ||
:param vocabulary: Vocabulary list. | ||
:type vocabulary: list | ||
:param blank_id: ID of blank. | ||
:type blank_id: int | ||
:param num_processes: Number of parallel processes. | ||
:type num_processes: int | ||
:param cutoff_prob: Cutoff probability in pruning, | ||
|
@@ -232,8 +228,8 @@ def ctc_beam_search_decoder_batch(probs_split, | |
pool = multiprocessing.Pool(processes=num_processes) | ||
results = [] | ||
for i, probs_list in enumerate(probs_split): | ||
args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, None, | ||
nproc) | ||
args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can comment more to clarify why using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would append the comment in later pr. |
||
cutoff_top_n, None, nproc) | ||
results.append(pool.apply_async(ctc_beam_search_decoder, args)) | ||
|
||
pool.close() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why to add L191-192?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a mistake. Removed