Skip to content

Commit

Permalink
Merge pull request #170 from uga-rosa/database
Browse files Browse the repository at this point in the history
feat: Use Deno.Kv to create a database of dictionaries
  • Loading branch information
uga-rosa authored Dec 28, 2023
2 parents 2518fbf + fd72326 commit 6021153
Show file tree
Hide file tree
Showing 10 changed files with 381 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
# - macos-latest
- ubuntu-latest
version:
- "1.32.0"
- "1.38.0"
- "1.x"
host_version:
- vim: "v9.0.1499"
Expand Down
6 changes: 6 additions & 0 deletions autoload/skkeleton.vim
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,9 @@ endfunction
function! skkeleton#initialize() abort
call skkeleton#notify_async('initialize', [])
endfunction

function! skkeleton#update_database(path, ...) abort
let encoding = a:0 > 0 ? a:1 : ''
let force = a:0 > 1 ? a:2 : v:false
call skkeleton#notify_async('updateDatabase', [a:path, encoding, force])
endfunction
1 change: 1 addition & 0 deletions deno.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"lock": false,
"unstable": ["kv"],
"tasks": {
"fmt-check": "deno fmt --check denops",
"lint": "deno lint",
Expand Down
8 changes: 4 additions & 4 deletions denops/skkeleton/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export const config: ConfigOptions = {
usePopup: true,
useSkkServer: false,
userJisyo: "~/.skkeleton",
databasePath: "",
};

type Validators = {
Expand All @@ -54,8 +55,7 @@ const validators: Validators = {
globalDictionaries: (x): (string | [string, string])[] => {
if (
!is.ArrayOf(
(x): x is string | [string, string] =>
is.String(x) || is.ArrayOf(is.String)(x) && x.length === 2,
is.OneOf([is.String, is.TupleOf([is.String, is.String])] as const),
)(x)
) {
throw TypeError("'globalDictionaries' must be array of two string tuple");
Expand All @@ -67,8 +67,7 @@ const validators: Validators = {
globalKanaTableFiles: (x): (string | [string, string])[] => {
if (
!is.ArrayOf(
(x): x is string | [string, string] =>
is.String(x) || is.ArrayOf(is.String)(x) && x.length === 2,
is.OneOf([is.String, is.TupleOf([is.String, is.String])] as const),
)(x)
) {
throw TypeError(
Expand Down Expand Up @@ -111,6 +110,7 @@ const validators: Validators = {
useGoogleJapaneseInput: (x) => ensure(x, is.Boolean),
useSkkServer: (x) => ensure(x, is.Boolean),
userJisyo: (x) => ensure(x, is.String),
databasePath: (x) => ensure(x, is.String),
};

export async function setConfig(
Expand Down
14 changes: 11 additions & 3 deletions denops/skkeleton/jisyo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { RomanNum } from "./deps/roman.ts";
import { zip } from "./deps/std/collections.ts";
import type { CompletionData, RankData } from "./types.ts";
import { SkkDictionary } from "./jisyo/skk_dictionary.ts";
import { DenoKvDictionary } from "./jisyo/deno_kv.ts";
import { UserDictionary, UserDictionaryPath } from "./jisyo/user_dictionary.ts";
import { SkkServer } from "./jisyo/skk_server.ts";
import { GoogleJapaneseInput } from "./jisyo/google_japanese_input.ts";
Expand Down Expand Up @@ -263,17 +264,24 @@ export async function load(
): Promise<Library> {
const globalDictionaries = await Promise.all(
globalDictionaryConfig.map(async ([path, encodingName]) => {
const dict = new SkkDictionary();
try {
await dict.load(path, encodingName);
if (config.databasePath) {
const dict = await DenoKvDictionary.create(path, encodingName);
await dict.load();
return dict;
} else {
const dict = new SkkDictionary();
await dict.load(path, encodingName);
return dict;
}
} catch (e) {
console.error("globalDictionary loading failed");
console.error(`at ${path}`);
if (config.debug) {
console.error(e);
}
return new SkkDictionary();
}
return dict;
}),
);

Expand Down
236 changes: 236 additions & 0 deletions denops/skkeleton/jisyo/deno_kv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
import { config } from "../config.ts";
import { getKanaTable } from "../kana.ts";
import { readFileWithEncoding } from "../util.ts";
import type { CompletionData } from "../types.ts";
import {
Dictionary,
HenkanType,
okuriAriMarker,
okuriNasiMarker,
} from "../jisyo.ts";
import { jisyoschema, jsonschema, msgpack, yaml } from "../deps/jisyo.ts";

interface Jisyo {
okuri_ari: Record<string, string[]>;
okuri_nasi: Record<string, string[]>;
}

const Encoder = new TextEncoder();
function encode(str: string): Uint8Array {
return Encoder.encode(str);
}

function calcKeySize(keys: string[]): number {
let size = 0;
for (const key of keys) {
const encoded = encode(key);
size += encoded.reduce((acc, cur) => acc + (cur === 0x00 ? 2 : 1), 2);
}
return size;
}

export class DenoKvDictionary implements Dictionary {
#db: Deno.Kv;
#atm: Deno.AtomicOperation;
#path: string;
#encoding: string;

constructor(
database: Deno.Kv,
path: string,
encoding: string,
) {
this.#db = database;
this.#atm = database.atomic();
this.#path = path;
this.#encoding = encoding;
}

static async create(
path: string,
encoding: string,
databasePath?: string,
): Promise<DenoKvDictionary> {
return new DenoKvDictionary(
await Deno.openKv(databasePath ?? config.databasePath),
path,
encoding,
);
}

cleanup() {
this.#db.close();
}

async getHenkanResult(
type: HenkanType,
word: string,
): Promise<string[]> {
const result = await this.#db.get<string[]>([this.#path, type, ...word]);
return result.value ?? [];
}

async getCompletionResult(
prefix: string,
feed: string,
): Promise<CompletionData> {
const candidates: CompletionData = [];

if (feed != "") {
const table = getKanaTable();
for (const [key, kanas] of table) {
if (key.startsWith(feed) && kanas.length > 1) {
const feedPrefix = prefix + (kanas as string[])[0];
// `start` is need to get the exact matched entry.
// https://github.com/denoland/deno/issues/21711
for await (
const entry of this.#db.list<string[]>({
prefix: [this.#path, "okurinasi", ...feedPrefix],
start: [this.#path, "okurinasi", ...feedPrefix],
})
) {
candidates.push([entry.key.slice(2).join(""), entry.value]);
}
}
}
} else {
for await (
const entry of this.#db.list<string[]>({
prefix: [this.#path, "okurinasi", ...prefix],
start: [this.#path, "okurinasi", ...prefix],
})
) {
candidates.push([entry.key.slice(2).join(""), entry.value]);
}
}

candidates.sort((a, b) => a[0].localeCompare(b[0]));
return Promise.resolve(candidates);
}

async load(force = false) {
const stat = await Deno.stat(this.#path);
const mtime = stat.mtime?.getTime();
if (
!force && mtime &&
(await this.#db.get([this.#path, "mtime"])).value === mtime
) {
return this;
}

if (this.#path.endsWith(".json")) {
await this.loadJson();
} else if (this.#path.endsWith(".yaml") || this.#path.endsWith(".yml")) {
await this.loadYaml();
} else if (this.#path.endsWith(".mpk")) {
await this.loadMsgpack();
} else {
await this.loadString();
}
await this.#atm.commit();
await this.#db.set([this.#path, "mtime"], mtime);

return this;
}

#mutationCount = 0;
#totalKeySize = 0;
private async setDatabase(
type: HenkanType,
k: string,
v: string[],
) {
const key = [this.#path, type, ...k];
const keySize = calcKeySize(key);
if (this.#mutationCount > 1000 || this.#totalKeySize + keySize > 81920) {
await this.#atm.commit();
this.#atm = this.#db.atomic();
this.#mutationCount = 0;
this.#totalKeySize = 0;
}
this.#atm = this.#atm.set(key, v);
this.#mutationCount++;
this.#totalKeySize += keySize;
}

private async loadJson() {
const data = await Deno.readTextFile(this.#path);
const jisyo = JSON.parse(data) as Jisyo;
const validator = new jsonschema.Validator();
const result = validator.validate(jisyo, jisyoschema);
if (!result.valid) {
for (const error of result.errors) {
throw Error(error.message);
}
}
for (const [k, v] of Object.entries(jisyo.okuri_ari)) {
await this.setDatabase("okuriari", k, v);
}
for (const [k, v] of Object.entries(jisyo.okuri_nasi)) {
await this.setDatabase("okurinasi", k, v);
}
}

private async loadYaml() {
const data = await Deno.readTextFile(this.#path);
const jisyo = yaml.parse(data) as Jisyo;
const validator = new jsonschema.Validator();
const result = validator.validate(jisyo, jisyoschema);
if (!result.valid) {
for (const error of result.errors) {
throw Error(error.message);
}
}
for (const [k, v] of Object.entries(jisyo.okuri_ari)) {
await this.setDatabase("okuriari", k, v);
}
for (const [k, v] of Object.entries(jisyo.okuri_nasi)) {
await this.setDatabase("okurinasi", k, v);
}
}

private async loadMsgpack() {
const data = await Deno.readFile(this.#path);
const jisyo = msgpack.decode(data) as Jisyo;
const validator = new jsonschema.Validator();
const result = validator.validate(jisyo, jisyoschema);
if (!result.valid) {
for (const error of result.errors) {
throw Error(error.message);
}
}
for (const [k, v] of Object.entries(jisyo.okuri_ari)) {
await this.setDatabase("okuriari", k, v);
}
for (const [k, v] of Object.entries(jisyo.okuri_nasi)) {
await this.setDatabase("okurinasi", k, v);
}
}

private async loadString() {
const data = await readFileWithEncoding(this.#path, this.#encoding);
let mode: HenkanType | "" = "";
for (const line of data.split("\n")) {
if (line === okuriAriMarker) {
mode = "okuriari";
continue;
}

if (line === okuriNasiMarker) {
mode = "okurinasi";
continue;
}

if (mode === "") continue;

const pos = line.indexOf(" ");
if (pos !== -1) {
await this.setDatabase(
mode,
line.substring(0, pos),
line.slice(pos + 2, -1).split("/"),
);
}
}
}
}
Loading

0 comments on commit 6021153

Please sign in to comment.