Skip to content

Commit

Permalink
Merge pull request #104 from stscoundrel/feature/old-danish-dictionary
Browse files Browse the repository at this point in the history
Feature/old danish dictionary
  • Loading branch information
stscoundrel authored Feb 24, 2024
2 parents 5020ec5 + 8d4a6fd commit 880a582
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 22 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,30 @@ Parses sources from following dictionary projects:
- [Old Icelandic Dictionary](https://old-icelandic.vercel.app/)
- [Old Norwegian Dictionary](https://old-norwegian-dictionary.vercel.app/)
- [Old Swedish Dictionary](https://old-swedish-dictionary.vercel.app/)
- [Old Danish Dictionary](https://old-danish-dictionary.vercel.app/)


The parser finds over 1 000 entries that are present in all four dictionaries. There are also over 20 000 entries that appear in at least two different dictionaries, making them worth a crosslink.

### Install

`yarn add scandinavian-dictionary-crosslinker`


### Download sitemaps.

Run `cargo run` in `downloader` folder. Downloads latest XML sitemaps to `resources` folder.


### Generate crosslinks

Run `go run *.go` in `crosslinks` folder. Generates crosslinks json to `resources` folder.


### Minify outout

Run `nimble build` and `./minifier` in `minifier` folder. Generates minified & gzipped json outputs.

### Update data to NPM module.

Run `go run main.go` in root folder to update json & readme to NPM module.
1 change: 1 addition & 0 deletions crosslinker/internal/sitemaps/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ var sitemapPaths = map[string]string{
"old-icelandic": "../resources/old-icelandic.xml",
"old-norwegian": "../resources/old-norwegian.xml",
"old-swedish": "../resources/old-swedish.xml",
"old-danish": "../resources/old-danish.xml",
}

func readXmlSitemaps() (map[string][]byte, error) {
Expand Down
1 change: 1 addition & 0 deletions downloader/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ async fn main() {
urls.insert("old-icelandic", "https://old-icelandic.vercel.app/sitemap.xml");
urls.insert("old-norwegian", "https://old-norwegian-dictionary.vercel.app/sitemap.xml");
urls.insert("old-swedish", "https://old-swedish-dictionary.vercel.app/sitemap.xml");
urls.insert("old-danish", "https://old-danish-dictionary.vercel.app/sitemap.xml");

let client = Client::new();

Expand Down
2 changes: 2 additions & 0 deletions minifier/src/minifier.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ proc minify_link(link: string): string =
.replace("https://old-norwegian-dictionary.vercel.app/word/", "")
.replace("https://old-swedish-dictionary.vercel.app/word/", "")
.replace("https://cleasby-vigfusson-dictionary.vercel.app/word/", "")
.replace("https://old-danish-dictionary.vercel.app/word/", "")


proc minify_source(link: string): string =
Expand All @@ -21,6 +22,7 @@ proc minify_source(link: string): string =
.replace("old-swedish", "os")
.replace("old-norse", "on")
.replace("old-icelandic", "oi")
.replace("old-danish", "od")


for slug, links in jsonContent:
Expand Down
3 changes: 2 additions & 1 deletion npm-module/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ Parses sources from following dictionary projects:
- [Old Icelandic Dictionary](https://old-icelandic.vercel.app/)
- [Old Norwegian Dictionary](https://old-norwegian-dictionary.vercel.app/)
- [Old Swedish Dictionary](https://old-swedish-dictionary.vercel.app/)
- [Old Danish Dictionary](https://old-danish-dictionary.vercel.app/)


The parser finds over 1 000 entries that are present in all four dictionaries. There are also over 20 000 entries that appear in at least two different dictionaries, making them worth a crosslink.

### Install

`yarn add scandinavian-dictionary-crosslinker`
`yarn add scandinavian-dictionary-crosslinker`
2 changes: 1 addition & 1 deletion npm-module/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "scandinavian-dictionary-crosslinker",
"version": "0.5.1",
"version": "0.6.0",
"description": "Finds shared entries in dictionary sitemaps, allowing crosslinking",
"repository": "https://github.com/stscoundrel/scandinavian-dictionary-crosslinker.git",
"author": "stscoundrel <silvennoinen.sampo@gmail.com>",
Expand Down
Binary file modified npm-module/resources/crosslinks.json.gz
Binary file not shown.
5 changes: 5 additions & 0 deletions npm-module/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,9 @@ export const getOldSwedishCrosslinks = (slug: string): Crosslink[] => filterCros
DictionarySource.OldSwedish,
);

export const getOldDanishCrosslinks = (slug: string): Crosslink[] => filterCrosslinksByLanguage(
slug,
DictionarySource.OldDanish,
);

export { Crosslink, DictionarySource } from './models';
3 changes: 2 additions & 1 deletion npm-module/src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ export enum DictionarySource {
OldNorse = 'old-norse',
OldIcelandic = 'old-icelandic',
OldNorwegian = 'old-norwegian',
OldSwedish = 'old-swedish'
OldSwedish = 'old-swedish',
OldDanish = 'old-danish'
}

export interface Crosslink{
Expand Down
37 changes: 18 additions & 19 deletions npm-module/tests/crosslinks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ describe('Crosslinks tests', () => {
test('Crosslinks contain correct amount of slug entries', () => {
const result = getCrosslinks();

expect(Object.keys(result).length).toBe(44545);
expect(Object.keys(result).length).toBe(48294);
});

test('Crosslinks contain correct amount of summed individual links', () => {
Expand All @@ -22,7 +22,7 @@ describe('Crosslinks tests', () => {
sum += result[key].length;
});

expect(sum).toEqual(120313);
expect(sum).toEqual(128440);
});

test('Crosslink entries are returned in correct object format', () => {
Expand All @@ -41,23 +41,19 @@ describe('Crosslinks tests', () => {

expect(result.abyrgdarhlutr).toEqual(
[
{
url: 'https://old-norwegian-dictionary.vercel.app/word/abyrgdarhlutr',
source: DictionarySource.OldNorwegian,
},
{
url: 'https://old-icelandic.vercel.app/word/abyrgdarhlutr',
source: DictionarySource.OldIcelandic,
},
{
url: 'https://old-norwegian-dictionary.vercel.app/word/abyrgdarhlutr',
source: DictionarySource.OldNorwegian,
},
],
);

expect(result.hneyking).toEqual(
[
{
url: 'https://old-norwegian-dictionary.vercel.app/word/hneyking',
source: DictionarySource.OldNorwegian,
},
{
url: 'https://cleasby-vigfusson-dictionary.vercel.app/word/hneyking',
source: DictionarySource.OldNorse,
Expand All @@ -66,19 +62,23 @@ describe('Crosslinks tests', () => {
url: 'https://old-icelandic.vercel.app/word/hneyking',
source: DictionarySource.OldIcelandic,
},
{
url: 'https://old-norwegian-dictionary.vercel.app/word/hneyking',
source: DictionarySource.OldNorwegian,
},
],
);

expect(result.skurfir).toEqual(
[
{
url: 'https://old-norwegian-dictionary.vercel.app/word/skurfir',
source: DictionarySource.OldNorwegian,
},
{
url: 'https://cleasby-vigfusson-dictionary.vercel.app/word/skurfir',
source: DictionarySource.OldNorse,
},
{
url: 'https://old-norwegian-dictionary.vercel.app/word/skurfir',
source: DictionarySource.OldNorwegian,
},
],
);

Expand All @@ -94,10 +94,6 @@ describe('Crosslinks tests', () => {
]);

expect(result['otta-lauss']).toEqual([
{
url: 'https://old-norwegian-dictionary.vercel.app/word/ottalauss',
source: DictionarySource.OldNorwegian,
},
{
source: DictionarySource.OldNorse,
url: 'https://cleasby-vigfusson-dictionary.vercel.app/word/otta-lauss',
Expand All @@ -106,7 +102,10 @@ describe('Crosslinks tests', () => {
source: DictionarySource.OldIcelandic,
url: 'https://old-icelandic.vercel.app/word/ottalauss',
},

{
url: 'https://old-norwegian-dictionary.vercel.app/word/ottalauss',
source: DictionarySource.OldNorwegian,
},
]);
});
});
Expand Down

0 comments on commit 880a582

Please sign in to comment.