Skip to content

Commit

Permalink
[egs] Add example for Yomdle Farsi OCR (kaldi-asr#2702)
Browse files Browse the repository at this point in the history
  • Loading branch information
ChunChiehChang authored and danpovey committed Oct 5, 2018
1 parent ad2bb82 commit c0e34f8
Show file tree
Hide file tree
Showing 49 changed files with 4,521 additions and 3 deletions.
14 changes: 11 additions & 3 deletions egs/cifar/v1/image/ocr/make_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
parser.add_argument('--padding', type=int, default=5,
help='Number of white pixels to pad on the left'
'and right side of the image.')
parser.add_argument('--num-channels', type=int, default=1,
help='Number of color channels')
parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
help="Flip the image left-right for right to left languages")
parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
Expand Down Expand Up @@ -84,9 +86,9 @@ def horizontal_pad(im, allowed_lengths = None):
left_padding = int(padding // 2)
right_padding = padding - left_padding
dim_y = im.shape[0] # height
im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
im_pad = np.concatenate((255 * np.ones((dim_y, left_padding, args.num_channels),
dtype=int), im), axis=1)
im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding, args.num_channels),
dtype=int)), axis=1)
return im_pad1

Expand Down Expand Up @@ -150,7 +152,13 @@ def get_scaled_image_aug(im, mode='normal'):
if im_horizontal_padded is None:
num_fail += 1
continue
data = np.transpose(im_horizontal_padded, (1, 0))
if args.num_channels == 1:
data = np.transpose(im_horizontal_padded, (1, 0))
elif args.num_channels == 3:
H = im_horizontal_padded.shape[0]
W = im_horizontal_padded.shape[1]
C = im_horizontal_padded.shape[2]
data = np.reshape(np.transpose(im_horizontal_padded, (1, 0, 2)), (W, H * C))
data = np.divide(data, 255.0)
num_ok += 1
write_kaldi_matrix(out_fh, data, image_id)
Expand Down
3 changes: 3 additions & 0 deletions egs/yomdle_fa/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This directory contains example scripts for OCR on the Yomdle and Slam datasets.
Training is done on the Yomdle dataset and testing is done on Slam.
LM rescoring is also done with extra corpus data obtained from various newswires (e.g. Hamshahri)
13 changes: 13 additions & 0 deletions egs/yomdle_fa/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export cmd="queue.pl"
1 change: 1 addition & 0 deletions egs/yomdle_fa/v1/image
35 changes: 35 additions & 0 deletions egs/yomdle_fa/v1/local/augment_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora

# Apache 2.0
# This script performs data augmentation.

nj=4
cmd=run.pl
feat_dim=40
fliplr=false
echo "$0 $@"

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh || exit 1;

srcdir=$1
outdir=$2
datadir=$3

mkdir -p $datadir/augmentations
echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"

for set in aug1; do
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
$srcdir $datadir/augmentations/$set
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
--fliplr $fliplr --augment true $datadir/augmentations/$set
done

echo " combine original data and data from different augmentations"
utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
58 changes: 58 additions & 0 deletions egs/yomdle_fa/v1/local/bidi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3
# Copyright 2018 Chun-Chieh Chang

# This script is largely written by Stephen Rawls
# and uses the python package https://pypi.org/project/PyICU_BiDi/
# The code leaves right to left text alone and reverses left to right text.

import icu_bidi
import io
import sys
import unicodedata
# R=strong right-to-left; AL=strong arabic right-to-left
rtl_set = set(chr(i) for i in range(sys.maxunicode)
if unicodedata.bidirectional(chr(i)) in ['R','AL'])
def determine_text_direction(text):
# Easy case first
for char in text:
if char in rtl_set:
return icu_bidi.UBiDiLevel.UBIDI_RTL
# If we made it here we did not encounter any strongly rtl char
return icu_bidi.UBiDiLevel.UBIDI_LTR

def utf8_visual_to_logical(text):
text_dir = determine_text_direction(text)

bidi = icu_bidi.Bidi()
bidi.inverse = True
bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_INVERSE_LIKE_DIRECT
bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT # icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS

bidi.set_para(text, text_dir, None)

res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)

return res

def utf8_logical_to_visual(text):
text_dir = determine_text_direction(text)

bidi = icu_bidi.Bidi()

bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_DEFAULT
bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT #icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS

bidi.set_para(text, text_dir, None)

res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)

return res


##main##
sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf8")
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf8")
for line in sys.stdin:
line = line.strip()
line = utf8_logical_to_visual(line)[::-1]
sys.stdout.write(line + '\n')
67 changes: 67 additions & 0 deletions egs/yomdle_fa/v1/local/chain/compare_wer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}

# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora

if [ $# == 0 ]; then
echo "Usage: $0: <dir1> [<dir2> ... ]"
echo "e.g.: $0 exp/chain/cnn{1a,1b}"
exit 1
fi

echo "# $0 $*"
used_epochs=false

echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
echo

echo -n "# WER "
for x in $*; do
wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
printf "% 10s" $wer
done
echo

echo -n "# CER "
for x in $*; do
cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
printf "% 10s" $cer
done
echo


if $used_epochs; then
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
fi

echo -n "# Final train prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo
Loading

0 comments on commit c0e34f8

Please sign in to comment.