Skip to content

Commit

Permalink
fix: Use tokenize function instead of getTokenizer to use cache and p…
Browse files Browse the repository at this point in the history
…revent memory leak caused by inifinite cache. (#19)
  • Loading branch information
k-tahiro authored Dec 10, 2021
1 parent 60ca07f commit e363b7e
Showing 1 changed file with 2 additions and 6 deletions.
8 changes: 2 additions & 6 deletions src/analyze.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// LICENSE : MIT
"use strict";
const getTokenizer = require("kuromojin").getTokenizer;
const tokenize = require("kuromojin").tokenize;
/**
* token object
* @typedef {{word_id: number, word_type: string, word_position: number, surface_form: string, pos: string, pos_detail_1: string, pos_detail_2: string, pos_detail_3: string, conjugated_type: string, conjugated_form: string, basic_form: string, reading: string, pronunciation: string}} AnalyzedToken
Expand All @@ -12,8 +12,6 @@ const getTokenizer = require("kuromojin").getTokenizer;
* @typedef {{type:string, value:string, surface: string, token:AnalyzedToken, index: number}} AnalyzedResultObject
*/

// Cache tokens
const _tokensCacheMap = {};
/**
* デフォルトのオプション値
* @type {{ignoreConjunction: boolean}}
Expand Down Expand Up @@ -144,9 +142,7 @@ const mapToAnalyzedResult = (tokens) => {
export function analyze(text, options = defaultOptions) {
const ignoreConjunction =
options.ignoreConjunction !== undefined ? options.ignoreConjunction : defaultOptions.ignoreConjunction;
return getTokenizer().then((tokenizer) => {
const tokens = _tokensCacheMap[text] ? _tokensCacheMap[text] : tokenizer.tokenizeForSentence(text);
_tokensCacheMap[text] = tokens;
return tokenize(text).then((tokens) => {
const filterByType = tokens.filter((token, index) => {
const nextToken = tokens[index + 1];
// token[特殊・ダ] + nextToken[アル] なら 常体(である調) として認識する
Expand Down

0 comments on commit e363b7e

Please sign in to comment.