Skip to content

Commit

Permalink
feat(loadmodule): support environment override
Browse files Browse the repository at this point in the history
  • Loading branch information
kwonoj committed Jan 31, 2019
1 parent 430f850 commit 10cffa8
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 53 deletions.
137 changes: 107 additions & 30 deletions src/cldLoader.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
import { CldAsmModule, ResultVector } from './cldAsmModule';
import { ENVIRONMENT } from 'emscripten-wasm-loader';
import { CldAsmModule, LanguageResult } from './cldAsmModule';
import { CldFactory } from './cldFactory';
import { LanguageCode } from './languageCode';
import { log } from './util/logger';
import { wrapCldInterface } from './wrapCldInterface';

/**
* @internal
* Flatten vector object returned by cld3 into array.
*
*/
const munge_vector = (vector: ResultVector) => {
const size = vector.size();
const ret = [];
for (let idx = 0; idx < size; idx++) {
ret.push(vector.get(idx));
}
return ret;
};
// size of pointer to calculate pointer position.
const PTR_SIZE = 4;

/**
* @internal
Expand All @@ -25,20 +17,105 @@ const munge_vector = (vector: ResultVector) => {
*
* @returns {CldFactory} Factory function manages lifecycle of cld3 language identifier.
*/
export const cldLoader = (asmModule: CldAsmModule): CldFactory => ({
create: (
minBytes: number = asmModule.NNetLanguageIdentifier.kMinNumBytesToConsider,
maxBytes: number = asmModule.NNetLanguageIdentifier.kMaxNumBytesToConsider
) => {
const identifier = new asmModule.NNetLanguageIdentifier(minBytes, maxBytes);
return {
findLanguage: (text: string) => identifier.FindLanguage(text),
findMostFrequentLanguages: (text: string, numLangs: number) => {
const resultVector = identifier.FindTopNMostFreqLangs(text, numLangs);
const resultArray = munge_vector(resultVector);
return resultArray.filter(x => !!x && !!x.language && x.language !== LanguageCode.UNKNOWN);
},
dispose: () => identifier.delete()
export const cldLoader = (asmModule: CldAsmModule, _environment: ENVIRONMENT): CldFactory => {
const { cwrap, _free, allocateUTF8, _malloc, getValue, Pointer_stringify, setValue } = asmModule;
const cldInterface = wrapCldInterface(cwrap);

/**
* Naive auto-dispose interface to call cld interface with string params.
*
*/
const usingParamPtr = <T = void>(...args: Array<string | ((...args: Array<number>) => T)>): T => {
const params = [...args];
const fn = params.pop()!;
const paramsPtr = params.map((param: string) => allocateUTF8(param));
const ret = (fn as Function)(...paramsPtr);
paramsPtr.forEach(paramPtr => _free(paramPtr));
return ret;
};

// grab constant values from cld3 library
const unknownIdentifier = Pointer_stringify(cldInterface.getUnknownIdentifier());
const minBytesDefault = cldInterface.getMinNumBytesDefault();
const maxBytesDefault = cldInterface.getMaxNumBytesDefault();
const maxBytesInput = cldInterface.getMaxNumBytesInput();
const languageResultStructSize = cldInterface.sizeLanguageResult();

log(`cldLoader: cld3 wasm initialized with default values`, {
unknownIdentifier,
minBytesDefault,
maxBytesDefault,
maxBytesInput,
languageResultStructSize
});

/**
* Wrapper function to read LanguageResult struct from pointer.
* After interop, pointer will be freed.
*
* @param structPtr
*/
const volatileReadResultStruct = (structPtr: number) => {
// get value of first field of LanguageResult struct (char*)
const languageStringPtr = getValue(structPtr + PTR_SIZE * 0, '*');

// be careful to match order of properties to match pointer to struct field.
const ret: LanguageResult = {
language: Pointer_stringify(languageStringPtr) as LanguageCode,
probability: getValue(structPtr + PTR_SIZE * 1, 'float'),
is_reliable: !!getValue(structPtr + PTR_SIZE * 2, 'i8'),
proportion: getValue(structPtr + PTR_SIZE * 3, 'float')
};
}
});

//free char* for language string
_free(languageStringPtr);
//free struct
_free(structPtr);

return ret;
};

return {
create: (minBytes: number = minBytesDefault, maxBytes: number = maxBytesDefault) => {
const cldPtr = cldInterface.create(minBytes, maxBytes);

return {
unknownIdentifier,
findLanguage: (text: string) => {
// `findLanguage` requires caller must allocate memory for return value.
const resultPtr = _malloc(languageResultStructSize);
usingParamPtr(text, textPtr => cldInterface.findLanguage(cldPtr, textPtr, resultPtr));
return volatileReadResultStruct(resultPtr);
},
findMostFrequentLanguages: (text: string, numLangs: number) => {
// `findMostFrequentLanguages` requires caller must allocate memory for return value.
const languageListPtr = _malloc(numLangs * PTR_SIZE);

// For convinience, we'll store allocated pointer to each empty LanguageResult for return value
const resultStructsPtr: Array<number> = [];

//allocate memory in js. `findTopNMostFreqLangs` always returns vector with given num_langs, allows predictable memory allocation.
for (let idx = 0; idx < numLangs; idx++) {
const resultPtr = _malloc(languageResultStructSize);
resultStructsPtr.push(resultPtr);
// fill in array with allocated struct ptr
setValue(languageListPtr + idx * PTR_SIZE, resultPtr, '*');
}

const languageCount = usingParamPtr(text, textPtr =>
cldInterface.findTopNMostFreqLangs(cldPtr, textPtr, numLangs, languageListPtr)
);

// if `numLangs` exceeds number of languages detected rest of array will be filled with default result with unknown language identifier
const ret = resultStructsPtr.map((ptr) => volatileReadResultStruct(ptr)).filter((x) => x.language !== unknownIdentifier);

// each LanguageResult struct is freed via `volatileReadResultStruct` already. delete allocated memory for array itself.
_free(languageListPtr);

return languageCount > 0 ? ret : [];
},
dispose: () => cldInterface.destroy(cldPtr)
};
}
};
};
44 changes: 21 additions & 23 deletions src/loadModule.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,43 @@
import { getModuleLoader, isNode } from 'emscripten-wasm-loader';
import { ENVIRONMENT, getModuleLoader, isNode } from 'emscripten-wasm-loader';
import { CldAsmModule } from './cldAsmModule';
import { CldFactory } from './cldFactory';
import { cldLoader } from './cldLoader';
import { log } from './util/logger';

/**
* Load, initialize wasm / asm.js binary to use actual cld wasm instances.
* Load, initialize wasm binary to use actual cld wasm instances.
*
* @param [InitOptions] Options to initialize cld3 wasm binary.
* @param {number} [InitOptions.timeout] - timeout to wait wasm binary compilation & load.
* @param {string | object} [InitOptions.locateBinary] - custom resolution logic for wasm binary.
* @param {string | object} [InitOptions.locateBinary] - custom resolution logic for wasm binary. (not supported)
* @param {ENVIRONMENT} [InitOptions.environment] For overriding running environment
* It could be either remote endpoint url, or loader-returned object for bundler. Check examples/browser_* for references.
*
* @returns {() => Promise<CldFactory>} Function to load module
*/
const loadModule = async ({
timeout,
locateBinary
}: Partial<{ timeout: number; locateBinary: (filePath: string) => string | object }> = {}) => {
log(`loadModule: loading cld3 module`);

const loadModule = async (
initOptions: Partial<{
timeout: number;
environment?: ENVIRONMENT;
}> = {}
) => {
//imports MODULARIZED emscripten preamble
const runtimeModule = isNode() ? require(`./lib/cld3_node`) : require(`./lib/cld3_web`); //tslint:disable-line:no-require-imports no-var-requires

//tslint:disable-next-line:no-require-imports no-var-requires
const lookupBinary = locateBinary || ((_filePath: string) => require('./lib/cld3_web.wasm'));
const runtime = require(`./lib/cld3`);

const { environment, timeout } = initOptions;
const env = environment ? environment : isNode() ? ENVIRONMENT.NODE : ENVIRONMENT.WEB;

log(`loadModule: loading cld3 wasm binary`, { initOptions });

//https://github.com/kwonoj/docker-hunspell-wasm/issues/63
//Build module object to construct wasm binary module via emscripten preamble.
//This allows to override default wasm binary resolution in preamble.
//By default, cld3-asm overrides to direct require to binary on *browser* environment to allow bundler like webpack resolves it.
//On node, it relies on default resolution logic.
const overriddenModule =
isNode() && !locateBinary
? undefined
: {
locateFile: (filePath: string) => (filePath.endsWith('.wasm') ? lookupBinary(filePath) : filePath)
};
//apply overridden environment values to custom patched hunspell preamble.
const overriddenModule = { ENVIRONMENT: env };

const moduleLoader = await getModuleLoader<CldFactory, CldAsmModule>(
(runtime: CldAsmModule) => cldLoader(runtime),
runtimeModule,
(runtime: CldAsmModule) => cldLoader(runtime, env),
runtime,
overriddenModule,
{ timeout }
);
Expand Down

0 comments on commit 10cffa8

Please sign in to comment.