Skip to content

Commit

Permalink
add phonetic transcriptions term meta type (#434)
Browse files Browse the repository at this point in the history
* move dictionary files to dictionary folder

* wip

* move dictionary files to dictionary folder

* add ipa term meta

* wip

* fixing comments wip

* fixing comments wip

* fixing comments wip

* fixing comments wip

* fixing comments wip

* fixing comments wip

* fix comments

* fix comments

* update test data

* fix gitignore

* engines

* add tests

* update database test

* fix test
  • Loading branch information
StefanVukovic99 authored Dec 28, 2023
1 parent 60cd218 commit fc2123a
Show file tree
Hide file tree
Showing 25 changed files with 1,715 additions and 119 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@
"ext/js/general/text-source-map.js",
"ext/js/language/deinflector.js",
"ext/js/dictionary/dictionary-database.js",
"ext/js/language/sandbox/dictionary-data-util.js",
"ext/js/dictionary/dictionary-data-util.js",
"ext/js/language/sandbox/japanese-util.js",
"ext/js/language/translator.js",
"ext/js/media/audio-downloader.js",
Expand Down
2 changes: 1 addition & 1 deletion dev/jsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
"../ext/js/language/deinflector.js",
"../ext/js/dictionary/dictionary-importer.js",
"../ext/js/dictionary/dictionary-database.js",
"../ext/js/language/sandbox/dictionary-data-util.js",
"../ext/js/dictionary/dictionary-data-util.js",
"../ext/js/language/sandbox/japanese-util.js",
"../ext/js/language/translator.js",
"../ext/js/media/media-util.js",
Expand Down
52 changes: 50 additions & 2 deletions ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
},
{
"type": "string",
"enum": ["freq", "pitch"],
"description": "Type of data. \"freq\" corresponds to frequency information; \"pitch\" corresponds to pitch information."
"enum": ["freq", "pitch", "ipa"],
"description": "Type of data. \"freq\" corresponds to frequency information; \"pitch\" corresponds to pitch information. \"ipa\" corresponds to IPA transcription."
},
{
"description": "Data for the term."
Expand Down Expand Up @@ -164,6 +164,54 @@
}
}
]
},
{
"minItems": 3,
"maxItems": 3,
"items": [
{},
{"const": "ipa"},
{
"type": ["object"],
"description": "IPA transcription information for the term.",
"required": [
"reading",
"transcriptions"
],
"additionalProperties": false,
"properties": {
"reading": {
"type": "string",
"description": "Reading for the term."
},
"transcriptions": {
"type": "array",
"description": "List of different IPA transcription information for the term and reading combination.",
"items": {
"type": "object",
"required": [
"ipa"
],
"additionalProperties": false,
"properties": {
"ipa": {
"type": "string",
"description": "IPA transcription for the term."
},
"tags": {
"type": "array",
"description": "List of tags for this IPA transcription.",
"items": {
"type": "string",
"description": "Tag for this IPA transcription."
}
}
}
}
}
}
}
]
}
]
}
Expand Down
21 changes: 21 additions & 0 deletions ext/data/templates/default-anki-field-templates.handlebars
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,27 @@
{{/inline}}
{{! End Pitch Accents }}

{{#*inline "phonetic-transcriptions"}}
{{~#if (op ">" definition.phoneticTranscriptions.length 0)~}}
<ul>
{{~#each definition.phoneticTranscriptions~}}
{{~#each phoneticTranscriptions~}}
<li>
{{~set "any" false~}}
{{~#each tags~}}
{{~#if (get "any")}}, {{else}}<i>({{/if~}}
{{name}}
{{~set "any" true~}}
{{~/each~}}
{{~#if (get "any")}})</i> {{/if~}}
{{ipa~}}
</li>
{{~/each~}}
{{~/each~}}
</ul>
{{~/if~}}
{{/inline}}

{{#*inline "clipboard-image"}}
{{~#if (hasMedia "clipboardImage")~}}
<img src="{{getMedia "clipboardImage"}}" />
Expand Down
102 changes: 94 additions & 8 deletions ext/js/data/sandbox/anki-note-data-creator.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export class AnkiNoteDataCreator {
const context2 = this.createCachedValue(this._getPublicContext.bind(this, context));
const pitches = this.createCachedValue(this._getPitches.bind(this, dictionaryEntry));
const pitchCount = this.createCachedValue(this._getPitchCount.bind(this, pitches));
const phoneticTranscriptions = this.createCachedValue(this._getPhoneticTranscriptions.bind(this, dictionaryEntry));

if (typeof media !== 'object' || media === null || Array.isArray(media)) {
media = {
audio: void 0,
Expand Down Expand Up @@ -82,6 +84,7 @@ export class AnkiNoteDataCreator {
get uniqueReadings() { return self.getCachedValue(uniqueReadings); },
get pitches() { return self.getCachedValue(pitches); },
get pitchCount() { return self.getCachedValue(pitchCount); },
get phoneticTranscriptions() { return self.getCachedValue(phoneticTranscriptions); },
get context() { return self.getCachedValue(context2); },
media,
dictionaryEntry
Expand Down Expand Up @@ -193,7 +196,11 @@ export class AnkiNoteDataCreator {
for (const {dictionary, pronunciations} of DictionaryDataUtil.getGroupedPronunciations(dictionaryEntry)) {
/** @type {import('anki-templates').Pitch[]} */
const pitches = [];
for (const {terms, reading, position, nasalPositions, devoicePositions, tags, exclusiveTerms, exclusiveReadings} of pronunciations) {
for (const groupedPronunciation of pronunciations) {
const {pronunciation} = groupedPronunciation;
if (pronunciation.type !== 'pitch-accent') { continue; }
const {position, nasalPositions, devoicePositions, tags} = pronunciation;
const {terms, reading, exclusiveTerms, exclusiveReadings} = groupedPronunciation;
pitches.push({
expressions: terms,
reading,
Expand All @@ -211,6 +218,35 @@ export class AnkiNoteDataCreator {
return results;
}

/**
* @param {import('dictionary').DictionaryEntry} dictionaryEntry
* @returns {import('anki-templates').TranscriptionGroup[]}
*/
_getPhoneticTranscriptions(dictionaryEntry) {
const results = [];
if (dictionaryEntry.type === 'term') {
for (const {dictionary, pronunciations} of DictionaryDataUtil.getGroupedPronunciations(dictionaryEntry)) {
const phoneticTranscriptions = [];
for (const groupedPronunciation of pronunciations) {
const {pronunciation} = groupedPronunciation;
if (pronunciation.type !== 'phonetic-transcription') { continue; }
const {ipa, tags} = pronunciation;
const {terms, reading, exclusiveTerms, exclusiveReadings} = groupedPronunciation;
phoneticTranscriptions.push({
expressions: terms,
reading,
ipa,
tags,
exclusiveExpressions: exclusiveTerms,
exclusiveReadings
});
}
results.push({dictionary, phoneticTranscriptions});
}
}
return results;
}

/**
* @param {import('anki-templates-internal').CachedValue<import('anki-templates').PitchGroup[]>} cachedPitches
* @returns {number}
Expand Down Expand Up @@ -353,6 +389,7 @@ export class AnkiNoteDataCreator {
const expressions = this.createCachedValue(this._getTermExpressions.bind(this, dictionaryEntry));
const frequencies = this.createCachedValue(this._getTermFrequencies.bind(this, dictionaryEntry));
const pitches = this.createCachedValue(this._getTermPitches.bind(this, dictionaryEntry));
const phoneticTranscriptions = this.createCachedValue(this._getTermPhoneticTranscriptions.bind(this, dictionaryEntry));
const glossary = this.createCachedValue(this._getTermGlossaryArray.bind(this, dictionaryEntry, type));
const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context));
const furiganaSegments = this.createCachedValue(this._getTermFuriganaSegments.bind(this, dictionaryEntry, type));
Expand Down Expand Up @@ -389,6 +426,7 @@ export class AnkiNoteDataCreator {
get definitions() { return self.getCachedValue(commonInfo).definitions; },
get frequencies() { return self.getCachedValue(frequencies); },
get pitches() { return self.getCachedValue(pitches); },
get phoneticTranscriptions() { return self.getCachedValue(phoneticTranscriptions); },
sourceTermExactMatchCount,
url,
get cloze() { return self.getCachedValue(cloze); },
Expand Down Expand Up @@ -485,15 +523,16 @@ export class AnkiNoteDataCreator {

/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('anki-templates').TermPronunciation[]}
* @returns {import('anki-templates').TermPitchAccent[]}
*/
_getTermPitches(dictionaryEntry) {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const self = this;
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of dictionaryEntry.pronunciations) {
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pronunciations} of dictionaryEntry.pronunciations) {
const {term, reading} = headwords[headwordIndex];
const pitches = DictionaryDataUtil.getPronunciationsOfType(pronunciations, 'pitch-accent');
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
Expand All @@ -512,8 +551,8 @@ export class AnkiNoteDataCreator {
}

/**
* @param {import('dictionary').TermPitch[]} pitches
* @returns {import('anki-templates').TermPitch[]}
* @param {import('dictionary').PitchAccent[]} pitches
* @returns {import('anki-templates').PitchAccent[]}
*/
_getTermPitchesInner(pitches) {
// eslint-disable-next-line @typescript-eslint/no-this-alias
Expand All @@ -529,6 +568,52 @@ export class AnkiNoteDataCreator {
return results;
}

/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('anki-templates').TermPhoneticTranscription[]}
*/
_getTermPhoneticTranscriptions(dictionaryEntry) {
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pronunciations} of dictionaryEntry.pronunciations) {
const {term, reading} = headwords[headwordIndex];
const phoneticTranscriptions = DictionaryDataUtil.getPronunciationsOfType(pronunciations, 'phonetic-transcription');
const termPhoneticTranscriptions = this._getTermPhoneticTranscriptionsInner(phoneticTranscriptions);
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
get phoneticTranscriptions() { return termPhoneticTranscriptions; }
});
}

return results;
}

/**
* @param {import('dictionary').PhoneticTranscription[]} phoneticTranscriptions
* @returns {import('anki-templates').PhoneticTranscription[]}
*/
_getTermPhoneticTranscriptionsInner(phoneticTranscriptions) {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const self = this;
const results = [];
for (const {ipa, tags} of phoneticTranscriptions) {
const cachedTags = this.createCachedValue(this._convertTags.bind(this, tags));
results.push({
ipa,
get tags() { return self.getCachedValue(cachedTags); }
});
}
return results;
}

/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @returns {import('anki-templates').TermHeadword[]}
Expand Down Expand Up @@ -592,16 +677,17 @@ export class AnkiNoteDataCreator {
/**
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
* @param {number} i
* @returns {import('anki-templates').TermPronunciation[]}
* @returns {import('anki-templates').TermPitchAccent[]}
*/
_getTermExpressionPitches(dictionaryEntry, i) {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const self = this;
const results = [];
const {headwords, pronunciations} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of pronunciations) {
const {headwords, pronunciations: termPronunciations} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pronunciations} of termPronunciations) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
const pitches = DictionaryDataUtil.getPronunciationsOfType(pronunciations, 'pitch-accent');
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
Expand Down
Loading

0 comments on commit fc2123a

Please sign in to comment.