Skip to content

Commit

Permalink
Add auto syllabification for other languages (#30)
Browse files Browse the repository at this point in the history
Add auto-syllabification for English, Polish and Spanish with possibility to easily add other languages if necessary.
  • Loading branch information
Nianna authored Jul 14, 2023
1 parent cbbaf73 commit e9cea10
Show file tree
Hide file tree
Showing 29 changed files with 17,977 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
build/
doc/
/target/
.idea
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<junit.version>5.9.2</junit.version>
<mp3agic.version>0.8.1</mp3agic.version>
<richtextfx.version>0.11.0</richtextfx.version>
<hyphenator.version>1.0.0</hyphenator.version>
</properties>

<build>
Expand Down Expand Up @@ -282,6 +283,7 @@
<modulePath>${settings.localRepository}\org\openjfx\javafx-base\${javafx.version}</modulePath>
<modulePath>${settings.localRepository}\org\openjfx\javafx-graphics\${javafx.version}</modulePath>
<modulePath>${settings.localRepository}\org\controlsfx\controlsfx\${controlsfx.version}</modulePath>
<modulePath>${settings.localRepository}\io\github\nianna\hyphenator\${hyphenator.version}</modulePath>
<modulePath>${project.jars.outputDirectory}</modulePath>
</modulePaths>
<javaOptions>
Expand Down Expand Up @@ -330,6 +332,12 @@

<dependencies>

<dependency>
<groupId>io.github.nianna</groupId>
<artifactId>hyphenator</artifactId>
<version>${hyphenator.version}</version>
</dependency>

<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>jlayer</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
package com.github.nianna.karedi.context;

import com.github.nianna.karedi.I18N;
import com.github.nianna.karedi.song.Song;
import com.github.nianna.karedi.song.tag.TagKey;
import com.github.nianna.karedi.syllabizer.Syllabizer;
import com.github.nianna.karedi.syllabizer.SyllabizerFactory;
import com.github.nianna.karedi.syllabizer.SyllabizerInitializationFailedException;
import com.github.nianna.karedi.util.Language;
import javafx.beans.property.ReadOnlyObjectProperty;

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.logging.Logger;

public class SyllabizerContext {

private static final Map<Language, Syllabizer> SYLLABIZERS = new HashMap<>();
private static final Logger LOGGER = Logger.getLogger(SyllabizerContext.class.getName());

private static final ConcurrentMap<Language, Syllabizer> SYLLABIZERS = new ConcurrentHashMap<>();

private final ReadOnlyObjectProperty<Song> activeSongProperty;

SyllabizerContext(ActiveSongContext activeSongContext) {
this.activeSongProperty = activeSongContext.activeSongProperty();
SyllabizerFactory.supportedLanguages()
.forEach(language -> SYLLABIZERS.put(language, SyllabizerFactory.createFor(language)));
}

public Optional<Syllabizer> findSyllabizer() {
return Optional.of(activeSongProperty.get())
.flatMap(song -> song.getTagValue(TagKey.LANGUAGE))
.flatMap(Language::parse)
.map(SYLLABIZERS::get);
.map(language -> SYLLABIZERS.computeIfAbsent(language, this::createSyllabizer));
}

private Syllabizer createSyllabizer(Language language) {
try {
return SyllabizerFactory.createFor(language);
} catch (SyllabizerInitializationFailedException exception) {
LOGGER.severe(I18N.get("syllabizer.init_failed"));
return SyllabizerFactory.createNoopSyllabizer();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.github.nianna.karedi.syllabizer;

class EnglishSyllabizer extends PatternSyllabizer {

EnglishSyllabizer() {
super("hyph_en_US.dic", ".*[aeiou].*|.+y");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import java.util.List;
import java.util.Set;

class JapaneseSyllabizer implements Syllabizer {
class JapaneseSyllabizer extends Syllabizer {

private static final Set<Character> CONSONANTS = Set.of(
'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'
Expand All @@ -23,12 +23,7 @@ class JapaneseSyllabizer implements Syllabizer {
* </p>
*/
@Override
public List<String> syllabize(String input) {
String normalized = input.replaceAll("\\R", " ").stripTrailing();
return syllabizeNormalized(normalized);
}

private List<String> syllabizeNormalized(String input) {
protected List<String> syllabizeNormalized(String input) {
List<String> results = new LinkedList<>();
int nextSyllableStartIndex = 0;
boolean canNextSyllableBeFinished = false;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.github.nianna.karedi.syllabizer;

import java.util.List;

public class NoopSyllabizer extends Syllabizer {

@Override
protected List<String> syllabizeNormalized(String input) {
return List.of(" " + input);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.github.nianna.karedi.syllabizer;

import com.github.nianna.karedi.util.ResourceUtils;
import io.github.nianna.api.HyphenatedText;
import io.github.nianna.api.Hyphenator;
import io.github.nianna.api.HyphenatorProperties;

import java.util.List;

abstract class PatternSyllabizer extends Syllabizer {

private final Hyphenator hyphenator;

private final SyllablesSanitizer sanitizer;

PatternSyllabizer(String dictionaryName, String syllablesPattern) {
try {
List<String> patterns = ResourceUtils.readLines("/syllabizer/%s".formatted(dictionaryName));
HyphenatorProperties properties = new HyphenatorProperties(1, 1);
hyphenator = new Hyphenator(patterns, properties);
sanitizer = new SyllablesSanitizer(syllablesPattern);
} catch (Exception e) {
e.printStackTrace();
throw new SyllabizerInitializationFailedException(e);
}
}

@Override
public List<String> syllabizeNormalized(String input) {
HyphenatedText text = hyphenator.hyphenateText(input);
String separator = "\t";
String textToSplit = " " + text.read(separator + " ", separator);
return sanitizer.sanitize(textToSplit.split(separator));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.github.nianna.karedi.syllabizer;

class PolishSyllabizer extends PatternSyllabizer {

PolishSyllabizer() {
super("hyph_pl_PL.dic", ".*[aąeęioóuy].*");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.github.nianna.karedi.syllabizer;

class SpanishSyllabizer extends PatternSyllabizer {

SpanishSyllabizer() {
super("hyph_es_ES.dic", ".*[aeiouáéíóú].*");
}

}
12 changes: 10 additions & 2 deletions src/main/java/com/github/nianna/karedi/syllabizer/Syllabizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@

import java.util.List;

public interface Syllabizer {
public abstract class Syllabizer {

List<String> syllabize(String input);
public final List<String> syllabize(String input) {
String normalized = input.replaceAll("\\s+", " ").strip();
if (normalized.isEmpty()) {
return List.of();
}
return syllabizeNormalized(normalized);
}

protected abstract List<String> syllabizeNormalized(String input);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,23 @@

import com.github.nianna.karedi.util.Language;

import java.util.Set;

import static com.github.nianna.karedi.util.Language.JAPANESE;

public class SyllabizerFactory {

private SyllabizerFactory() {

}

public static Syllabizer createFor(Language language) {
if (language == JAPANESE) {
return new JapaneseSyllabizer();
}
return null;
return switch (language) {
case ENGLISH -> new EnglishSyllabizer();
case JAPANESE -> new JapaneseSyllabizer();
case POLISH -> new PolishSyllabizer();
case SPANISH, ESPANOL -> new SpanishSyllabizer();
default -> null;
};
}

public static Set<Language> supportedLanguages() {
return Set.of(JAPANESE);
public static Syllabizer createNoopSyllabizer() {
return new NoopSyllabizer();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.github.nianna.karedi.syllabizer;

public class SyllabizerInitializationFailedException extends IllegalStateException {

public SyllabizerInitializationFailedException(Throwable cause) {
super(cause);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package com.github.nianna.karedi.syllabizer;

import java.util.ArrayList;
import java.util.List;

class SyllablesSanitizer {

private final String syllablesPattern;

SyllablesSanitizer(String syllablesPattern) {
this.syllablesPattern = syllablesPattern;
}

List<String> sanitize(String[] syllables) {
ArrayList<String> result = new ArrayList<>();
for (int i = 0; i < syllables.length; i++) {
String syllable = syllables[i];
if (syllable.toLowerCase().strip().matches(syllablesPattern)) {
result.add(syllable);
} else {
if (result.isEmpty() || syllable.startsWith(" ")) {
if (i < syllables.length - 1) {
syllables[i + 1] = syllable + syllables[i + 1];
} else {
result.add(syllable);
}
} else {
result.set(result.size() - 1, result.get(result.size() - 1) + syllable);
}
}
}
return result;
}

}
23 changes: 23 additions & 0 deletions src/main/java/com/github/nianna/karedi/util/ResourceUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.github.nianna.karedi.util;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

public class ResourceUtils {

private ResourceUtils() {

}

public static List<String> readLines(String path) {
List<String> lines = new ArrayList<>();
try (Scanner scan = new Scanner(ResourceUtils.class.getResourceAsStream(path), StandardCharsets.UTF_8)) {
while (scan.hasNextLine()) {
lines.add(scan.nextLine());
}
}
return lines;
}
}
1 change: 1 addition & 0 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
requires org.fxmisc.richtext;
requires org.fxmisc.flowless;
requires wellbehavedfx;
requires nianna.hyphenator;

opens com.github.nianna.karedi to javafx.graphics;
opens com.github.nianna.karedi.controller to javafx.fxml;
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/messages.properties
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ songsaver.export.fail=Export failed - file not found
songsaver.save.success=Save finished
songsaver.save.fail=Save failed
syllabizer.init_failed=Failed to initialize auto-syllabizer
player.midi.unavailable=Midi sequencer unavailable
player.midi.invalid_data=Invalid midi sequence requested
player.mp3.fail=Playback failed
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/messages_en_GB.properties
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ songsaver.export.fail=Export failed - file not found
songsaver.save.success=Save finished
songsaver.save.fail=Save failed
syllabizer.init_failed=Failed to initialize auto-syllabizer
player.midi.unavailable=Midi sequencer unavailable
player.midi.invalid_data=Invalid midi sequence requested
player.mp3.fail=Playback failed
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/messages_pl_PL.properties
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ songsaver.export.fail=Eksportowanie nie powiod\u0142o si\u0119
songsaver.save.success=Zapisano zmiany
songsaver.save.fail=Zapisanie zmian nie powiod\u0142o si\u0119

syllabizer.init_failed=Inicjalizacja sylabizatora nie powiod\u0142a si\u0119

player.midi.unavailable=Sekwencera midi niedost\u0119pny
player.midi.invalid_data=B\u0142edna sekwencja midi
player.mp3.fail=B\u0142\u0105d odtwarzania
Expand Down
Loading

0 comments on commit e9cea10

Please sign in to comment.