-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a298ac7
commit 9536efe
Showing
7 changed files
with
358 additions
and
234 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
MAKEFILE_JUSTNAME := $(firstword $(MAKEFILE_LIST)) | ||
MAKEFILE_COMPLETE := $(CURDIR)/$(MAKEFILE_JUSTNAME) | ||
|
||
.PHONY: all clean-tmp prereqs | ||
all: fst-dir analyser-gt-desc-L2.hfstol clean-tmp | ||
|
||
clean-tmp: | ||
-rm *tmp* | ||
|
||
fst-dir: | ||
$(MAKE) -C fst --makefile=Makefile_L2 | ||
|
||
L2_ERRS := ii FV NoFV Pal SRo SRy | ||
|
||
analyser-gt-desc-L2.hfstol: analyser-gt-desc-L2.hfst | ||
hfst-fst2fst -w -i analyser-gt-desc-L2.hfst -o analyser-gt-desc-L2.hfstol | ||
|
||
generator-raw-gt-desc.tmp1.hfst: | ||
$(MAKE) generator-raw-gt-desc.tmp1.hfst | ||
|
||
generator-raw-gt-desc-err-L2_%.tmp1.hfst: fst | ||
echo "##### compiling the raw $* generator FST..." | ||
hfst-determinize -v fst/lexicon.hfst \ | ||
| hfst-minimize -v \ | ||
| hfst-compose-intersect \ | ||
-v \ | ||
-2 fst/phonology-err-L2_$*.hfst \ | ||
| hfst-minimize -v \ | ||
-o generator-raw-gt-desc-err-L2_$*.tmp1.hfst | ||
|
||
generator-raw-gt-desc-err-L2_%.tmp2.hfst: generator-raw-gt-desc-err-L2_%.tmp1.hfst | ||
echo "##### removing entries in $* that overlap with the std FST..." | ||
hfst-subtract generator-raw-gt-desc-err-L2_$*.tmp1.hfst \ | ||
generator-raw-gt-desc.tmp1.hfst \ | ||
> generator-raw-gt-desc-err-L2_$*.tmp2.hfst | ||
|
||
generator-raw-gt-desc-err-L2_%.tmp.hfst: generator-raw-gt-desc-err-L2_%.tmp2.hfst | ||
echo "##### composing $* with some filters for the generator..." | ||
printf "read regex \ | ||
@\"filters/reorder-subpos-tags.hfst\" \ | ||
.o. @\"filters/reorder-semantic-tags.hfst\" \ | ||
.o. @\"filters/remove-mwe-tags.hfst\" \ | ||
.o. @\"generator-raw-gt-desc-err-L2_$*.tmp2.hfst\" \ | ||
;\n\ | ||
save stack generator-raw-gt-desc-err-L2_$*.tmp.hfst\n\ | ||
quit\n" | hfst-xfst -p -v --format=foma | ||
|
||
generator-raw-gt-desc-err-L2_%.hfst: generator-raw-gt-desc-err-L2_%.tmp.hfst | ||
cp -f generator-raw-gt-desc-err-L2_$*.tmp.hfst generator-raw-gt-desc-err-L2_$*.hfst | ||
|
||
analyser-raw-gt-desc-err-L2_%.hfst: generator-raw-gt-desc-err-L2_%.hfst | ||
cp generator-raw-gt-desc-err-L2_$*.hfst analyser-raw-gt-desc-err-L2_$*.hfst | ||
|
||
analyser-gt-desc-err-L2_%.tmp.hfst: analyser-raw-gt-desc-err-L2_%.hfst | ||
printf "read regex \ | ||
@\"filters/remove-area-tags.hfst\" \ | ||
.o. @\"filters/remove-dialect-tags.hfst\" \ | ||
.o. @\"filters/remove-number-string-tags.hfst\" \ | ||
.o. @\"filters/remove-usage-tags.hfst\" \ | ||
.o. @\"filters/remove-semantic-tags.hfst\" \ | ||
.o. @\"filters/remove-orig_lang-tags.hfst\" \ | ||
.o. @\"filters/remove-orthography-tags.hfst\" \ | ||
.o. @\"filters/remove-Orth_IPA-strings.hfst\" \ | ||
.o. @\"analyser-raw-gt-desc-err-L2_$*.hfst\" \ | ||
.o. @\"orthography/downcase-derived_proper-strings.compose.hfst\" \ | ||
.o. @\"filters/remove-hyphenation-marks.hfst\" \ | ||
.o. @\"filters/remove-infl_deriv-borders.hfst\" \ | ||
.o. @\"filters/remove-word-boundary.hfst\" \ | ||
; \n\ | ||
define fst \n\ | ||
set flag-is-epsilon ON\n\ | ||
read regex fst \ | ||
.o. @\"orthography/inituppercase.compose.hfst\" \ | ||
.o. @\"orthography/spellrelax.compose.hfst\" \ | ||
;\n\ | ||
save stack analyser-gt-desc-err-L2_$*.tmp.hfst\n\ | ||
quit\n" | hfst-xfst -p -v --format=foma | ||
|
||
analyser-gt-desc-err-L2_%.tmp1.hfst: analyser-gt-desc-err-L2_%.tmp.hfst | ||
echo "##### making stress optional for $*..." | ||
printf "read regex @\"analyser-gt-desc-err-L2_$*.tmp.hfst\" \ | ||
.o. @\"orthography/destressOptional.compose.hfst\" \ | ||
;\n \ | ||
invert net\n \ | ||
save stack analyser-gt-desc-err-L2_$*.tmp1.hfst\n \ | ||
quit\n" | hfst-xfst -p -v --format=foma | ||
|
||
analyser-gt-desc-err-L2_%.tmp2.hfst: analyser-gt-desc-err-L2_%.tmp1.hfst fst/add-tag-err-L2_%.hfst | ||
echo "##### adding +Err tags to the $* transducer..." ; | ||
hfst-compose-intersect -v -1 analyser-gt-desc-err-L2_$*.tmp1.hfst \ | ||
-2 fst/add-tag-err-L2_$*.hfst \ | ||
-o analyser-gt-desc-err-L2_$*.tmp2.hfst | ||
|
||
analyser-gt-desc-L2.hfst: analyser-gt-desc.hfst | ||
$(MAKE) analyser-gt-desc.hfst | ||
cp analyser-gt-desc.hfst analyser-gt-desc-L2.tmp.hfst | ||
for tag in $(L2_ERRS) ; \ | ||
do \ | ||
$(MAKE) --makefile=$(MAKEFILE_COMPLETE) analyser-gt-desc-err-L2_$${tag}.tmp1.hfst ; \ | ||
$(MAKE) --makefile=$(MAKEFILE_COMPLETE) analyser-gt-desc-err-L2_$${tag}.tmp2.hfst ; \ | ||
hfst-disjunct -1 analyser-gt-desc-L2.tmp.hfst \ | ||
-2 analyser-gt-desc-err-L2_$${tag}.tmp2.hfst \ | ||
> err.tmp.hfst && mv err.tmp.hfst analyser-gt-desc-L2.tmp.hfst ; \ | ||
done | ||
hfst-minimize analyser-gt-desc-L2.tmp.hfst > fst.tmp | ||
mv fst.tmp analyser-gt-desc-L2.hfst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
.PHONY: all | ||
all: add-tag-err-L2_ii.hfst phonology-err-L2_ii.twolc phonology-err-L2_ii.hfst \ | ||
add-tag-err-L2_FV.hfst phonology-err-L2_FV.twolc phonology-err-L2_FV.hfst \ | ||
add-tag-err-L2_NoFV.hfst phonology-err-L2_NoFV.twolc phonology-err-L2_NoFV.hfst \ | ||
add-tag-err-L2_Pal.hfst phonology-err-L2_Pal.twolc phonology-err-L2_Pal.hfst \ | ||
add-tag-err-L2_SRo.hfst phonology-err-L2_SRo.twolc phonology-err-L2_SRo.hfst \ | ||
add-tag-err-L2_SRy.hfst phonology-err-L2_SRy.twolc phonology-err-L2_SRy.hfst \ | ||
lexicon.hfst phonology.hfst | ||
|
||
RULES_MAIN := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002 \ | ||
phonology-rules/003 phonology-rules/004 phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014 \ | ||
phonology-rules/015 phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_FV := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002b \ | ||
phonology-rules/003b phonology-rules/004b phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014 \ | ||
phonology-rules/015 phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_NoFV := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002a \ | ||
phonology-rules/003a phonology-rules/004a phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008a \ | ||
phonology-rules/009a phonology-rules/010a phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014 \ | ||
phonology-rules/015 phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_ii := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002 \ | ||
phonology-rules/003 phonology-rules/004 phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014 \ | ||
phonology-rules/015 phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019a phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_Pal := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002 \ | ||
phonology-rules/003 phonology-rules/004 phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012a phonology-rules/013 phonology-rules/014a \ | ||
phonology-rules/015a phonology-rules/016 phonology-rules/017a \ | ||
phonology-rules/018a phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023a \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_SRo := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002 \ | ||
phonology-rules/003 phonology-rules/004 phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014b \ | ||
phonology-rules/015 phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
RULES_L2_SRy := phonology-rules/twolc_header \ | ||
phonology-rules/000 phonology-rules/001 phonology-rules/002 \ | ||
phonology-rules/003 phonology-rules/004 phonology-rules/005 \ | ||
phonology-rules/006 phonology-rules/007 phonology-rules/008 \ | ||
phonology-rules/009 phonology-rules/010 phonology-rules/011 \ | ||
phonology-rules/012 phonology-rules/013 phonology-rules/014 \ | ||
phonology-rules/015b phonology-rules/016 phonology-rules/017 \ | ||
phonology-rules/018 phonology-rules/019 phonology-rules/020 \ | ||
phonology-rules/021 phonology-rules/022 phonology-rules/023 \ | ||
phonology-rules/024 phonology-rules/025 phonology-rules/026 \ | ||
phonology-rules/027 phonology-rules/028 phonology-rules/029 \ | ||
phonology-rules/030 phonology-rules/031 phonology-rules/032 \ | ||
phonology-rules/033 phonology-rules/034 phonology-rules/035 \ | ||
phonology-rules/036 phonology-rules/037 phonology-rules/038 \ | ||
phonology-rules/039 phonology-rules/040 phonology-rules/041 \ | ||
phonology-rules/042 phonology-rules/043 phonology-rules/044 \ | ||
phonology-rules/045 phonology-rules/046 phonology-rules/047 \ | ||
phonology-rules/048 phonology-rules/049 | ||
|
||
add-tag-err-L2_%.hfst: add-tag-err-L2_%.regex | ||
hfst-regexp2fst --format=foma --xerox-composition=ON -v \ | ||
-S $< -o $@ | ||
|
||
lexicon.hfst: | ||
$(MAKE) lexicon.hfst | ||
|
||
phonology.hfst: | ||
$(MAKE) phonology.hfst | ||
|
||
# Using `tail -n +1` adds the filename headings (as opposed to `cat`) | ||
phonology.twolc: $(RULES_MAIN) | ||
tail -n +1 $(RULES_MAIN) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology.twolc | ||
|
||
phonology-err-L2_ii.twolc: $(RULES_L2_ii) | ||
tail -n +1 $(RULES_L2_ii) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_ii.twolc | ||
|
||
phonology-err-L2_FV.twolc: $(RULES_L2_FV) | ||
tail -n +1 $(RULES_L2_FV) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_FV.twolc | ||
|
||
phonology-err-L2_NoFV.twolc: $(RULES_L2_NoFV) | ||
tail -n +1 $(RULES_L2_NoFV) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_NoFV.twolc | ||
|
||
phonology-err-L2_Pal.twolc: $(RULES_L2_Pal) | ||
tail -n +1 $(RULES_L2_Pal) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_Pal.twolc | ||
|
||
phonology-err-L2_SRo.twolc: $(RULES_L2_SRo) | ||
tail -n +1 $(RULES_L2_SRo) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_SRo.twolc | ||
|
||
phonology-err-L2_SRy.twolc: $(RULES_L2_SRy) | ||
tail -n +1 $(RULES_L2_SRy) \ | ||
| sed "s/^==>/!! ==> sourced from/g" \ | ||
> phonology-err-L2_SRy.twolc | ||
|
||
phonology-err-L2_%.hfst: phonology-err-L2_%.twolc | ||
hfst-twolc -v --format=foma -i $< -o $@ |
Oops, something went wrong.