Skip to content

Commit

Permalink
Update dictionary source
Browse files Browse the repository at this point in the history
Changes:
- Can have multiple parts of speech instead of one. Previously str, now list of strs
- Additional "information" field. May contain extra grammar info
- Quotes in definitions use different encoding, for easier FE use
  • Loading branch information
stscoundrel committed Dec 25, 2022
1 parent ec074b7 commit 394c012
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 14 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ from old_swedish_dictionary.dictionary import DictionaryEntry

{
headword: str
part_of_speech: str
part_of_speech: list[str]
grammatical_aspect: str
information: str
definitions: list[str]
alternative_forms: list[str]
}
Expand Down
8 changes: 5 additions & 3 deletions src/old_swedish_dictionary/dictionary.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from typing import NamedTuple, Tuple
from typing import Final, NamedTuple, Tuple

from . import reader

DICTIONARY_PATH = "old-swedish-dictionary.json"
DICTIONARY_PATH: Final[str] = "old-swedish-dictionary.json"


class DictionaryEntry(NamedTuple):
headword: str
part_of_speech: str
part_of_speech: list[str]
grammatical_aspect: str
information: str
definitions: list[str]
alternative_forms: list[str]

Expand All @@ -23,6 +24,7 @@ def get_dictionary() -> Tuple[DictionaryEntry, ...]:
raw_entry["c"],
raw_entry["d"],
raw_entry["e"],
raw_entry["f"],
)
for raw_entry in raw_data
)

Large diffs are not rendered by default.

24 changes: 15 additions & 9 deletions tests/test_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,31 @@ def test_dictionary_has_expected_content() -> None:

expected_100 = DictionaryEntry(
headword="af bränna",
part_of_speech="vb",
part_of_speech=["vb"],
grammatical_aspect="v.",
information="",
definitions=[
"afbränna, genom eld förstöra. hans trähws the af brendhe RK 2: 2757 . ib 1511. halff stadhen är affbrändh BSH 5: 132 ( 1506) . Jfr bränna af."
],
alternative_forms=[],
)
expected_1000 = DictionaryEntry(
headword="annat thera",
part_of_speech="kn",
part_of_speech=["kn"],
grammatical_aspect="konj.",
information="",
definitions=[
" (eg. n. af annar i förening med gen. pl. af pron. thän) antingen. annat thera . . . älla, antingen . . . eller. annat thera skal jach felle thegh dödhan eller tu mik Di 218 . " maa han i noghraa mattho wara annath thera gwdh eller man Lg 3: 108. ""
" (eg. n. af annar i förening med gen. pl. af pron. thän) antingen. annat thera . . . älla, antingen . . . eller. annat thera skal jach felle thegh dödhan eller tu mik Di 218 . \" maa han i noghraa mattho wara annath thera gwdh eller man Lg 3: 108. \""
],
alternative_forms=[],
)
expected_5000 = DictionaryEntry(
headword="diost",
part_of_speech="nn",
part_of_speech=["nn"],
grammatical_aspect="",
information="",
definitions=[
"ridderlig tvekamp till häst. vid hvilken de stridande med lansarne angrepo hvarandra. Jfr Diez, Etym. Wörterb. 1: 216; Viollet-le-Duc, Dictionnarie du nobilier francais 2: 366 f.; Schultz. Höf. Leb. 2: 107, 110 f., samt Niedner, Das deutsche Turnier s. 38 f tornäy ok dyost Iv 1560 . ib 1847, 1962, 4213, (Cod. B. C) 948. ij torney ij diwst äller ok ij striidh ib 3514 . " sik manleka brukande i dyysth ällar thorney " Su 176 . ther mz thenna dost for gik ok torney burdhis riddirlik Fr 1829 . Va 52 . " ther war dust ok behordh " RK 1: 3518 . riddara oc swena the giordho ther gaman mz diost oc bobordh Iv 46 . " mz hoff oc danz oc leek oc diost " MD 190 . " viisto riddara thera leek . . . mz dust at stangana gingo sönder " RK 1: 1100 . ib 1104 . " diwst at ridha " Al 454 . Fr 1671, 1719 . RK 2: 196 . Lg 3: 64 . rida i döst Di 17 . ränna diost Al 3865 . ib 4751 . öfde ther dyst RK 2: 5177 . " bruka . . . dwst ällar spärbräkningh " Lg 3: 66 . - envig? som androm biwdher diwsth SGGK 106 ."
"ridderlig tvekamp till häst. vid hvilken de stridande med lansarne angrepo hvarandra. Jfr Diez, Etym. Wörterb. 1: 216; Viollet-le-Duc, Dictionnarie du nobilier francais 2: 366 f.; Schultz. Höf. Leb. 2: 107, 110 f., samt Niedner, Das deutsche Turnier s. 38 f tornäy ok dyost Iv 1560 . ib 1847, 1962, 4213, (Cod. B. C) 948. ij torney ij diwst äller ok ij striidh ib 3514 . \" sik manleka brukande i dyysth ällar thorney \" Su 176 . ther mz thenna dost for gik ok torney burdhis riddirlik Fr 1829 . Va 52 . \" ther war dust ok behordh \" RK 1: 3518 . riddara oc swena the giordho ther gaman mz diost oc bobordh Iv 46 . \" mz hoff oc danz oc leek oc diost \" MD 190 . \" viisto riddara thera leek . . . mz dust at stangana gingo sönder \" RK 1: 1100 . ib 1104 . \" diwst at ridha \" Al 454 . Fr 1671, 1719 . RK 2: 196 . Lg 3: 64 . rida i döst Di 17 . ränna diost Al 3865 . ib 4751 . öfde ther dyst RK 2: 5177 . \" bruka . . . dwst ällar spärbräkningh \" Lg 3: 66 . - envig? som androm biwdher diwsth SGGK 106 ."
],
alternative_forms=[
"dyost Iv 1560 . ",
Expand All @@ -54,19 +57,21 @@ def test_dictionary_has_expected_content() -> None:
)
expected_10000 = DictionaryEntry(
headword="gangilse",
part_of_speech="nn",
part_of_speech=["nn"],
grammatical_aspect="",
information="",
definitions=[
" ? Se Sdw 2: 1226. - Jfr fore-, fram-, ivir-, mote-, um-, vidher-gangilse."
],
alternative_forms=[],
)
expected_20000 = DictionaryEntry(
headword="löna",
part_of_speech="vb",
part_of_speech=["vb"],
grammatical_aspect="v.",
information="",
definitions=[
"löna, vedergälla. " med personens dat. och ack. betecknande det för hvilket lön gifves. minom och ack. betecknande det för hvilket lön gifves. minom winom lönir iak lydhno oc älskogha mz miskund " MB 1: 332 . hånom hans gif löna KS 70 (112, 77) . gudh löne henne sin stora kärlek Lg 807 . KL 296 . han honom thz ille lönte RK 2: 3459 . - pass. thz kan thöm aldrigh vardha lönt Iv 4959 . - med personens dat. och ack. betecknande det som gifves ss lön. honum löna synda giäl Al 2329 . - med personens dat. them löne RK 2: 3216 . " löna wäl jlum " Bil 104 . " iak lönir hwariom enom äptir sinne forskullan " Bir 1: 381 . MB 2: 369 . " om honum skwlde saa wordhe lönth for hans langlige . . . trotieniste " BSH 4: 220 ( 1497) . - löna, aflöna. them som ey löna sino ärfuodhis folke rättelica MP 2: 111 . " löne . . . af thy sama sinom piltom oc tiänarom " Bir 5: 115 . ib 116 . " sinom soldenärom skulu the ey vara pliktoghe antiggia harnisk. kost äller fordenskap til land ällir watn. ey oc gull äller päninga til them at löna " MB 2: 242 . ib 243 . - betala. badh löna sik Bo 14 . - belöna. war gudh ther badhe lönar wälgerninga ok plicta synde Bil 458 . "
"löna, vedergälla. \" med personens dat. och ack. betecknande det för hvilket lön gifves. minom och ack. betecknande det för hvilket lön gifves. minom winom lönir iak lydhno oc älskogha mz miskund \" MB 1: 332 . hånom hans gif löna KS 70 (112, 77) . gudh löne henne sin stora kärlek Lg 807 . KL 296 . han honom thz ille lönte RK 2: 3459 . - pass. thz kan thöm aldrigh vardha lönt Iv 4959 . - med personens dat. och ack. betecknande det som gifves ss lön. honum löna synda giäl Al 2329 . - med personens dat. them löne RK 2: 3216 . \" löna wäl jlum \" Bil 104 . \" iak lönir hwariom enom äptir sinne forskullan \" Bir 1: 381 . MB 2: 369 . \" om honum skwlde saa wordhe lönth for hans langlige . . . trotieniste \" BSH 4: 220 ( 1497) . - löna, aflöna. them som ey löna sino ärfuodhis folke rättelica MP 2: 111 . \" löne . . . af thy sama sinom piltom oc tiänarom \" Bir 5: 115 . ib 116 . \" sinom soldenärom skulu the ey vara pliktoghe antiggia harnisk. kost äller fordenskap til land ällir watn. ey oc gull äller päninga til them at löna \" MB 2: 242 . ib 243 . - betala. badh löna sik Bo 14 . - belöna. war gudh ther badhe lönar wälgerninga ok plicta synde Bil 458 . "
],
alternative_forms=[
"lönar Bil 458 . ",
Expand All @@ -79,8 +84,9 @@ def test_dictionary_has_expected_content() -> None:
)
expected_40000 = DictionaryEntry(
headword="välrotadher",
part_of_speech="",
part_of_speech=[],
grammatical_aspect="",
information="p. adj. ",
definitions=["väl rotad. välrodat trä Bir 3: 61 ."],
alternative_forms=["-rod- )"],
)
Expand Down

0 comments on commit 394c012

Please sign in to comment.