Skip to content

Commit

Permalink
feat: add dense example creation func to wasm (#4615)
Browse files Browse the repository at this point in the history
* feat: add dense example creation func to wasm

* formatting
  • Loading branch information
jackgerrits authored Jun 14, 2023
1 parent b8c4ee3 commit 677750e
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 36 deletions.
1 change: 1 addition & 0 deletions wasm/developer_readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Make sure Emscripten is activated.
```sh
emcmake cmake --preset wasm -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ext_libs/vcpkg/scripts/buildsystems/vcpkg.cmake
cmake --build build --target vw-wasm
npm run build
```

### Test
Expand Down
85 changes: 50 additions & 35 deletions wasm/src/vw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,18 @@ export default new Promise((resolve) => {

/**
* The current total sum of the progressive validation loss
*
*
* @returns {number} the sum of all losses accumulated by the model
*/
sumLoss(): number {
return this._instance.sumLoss();
}

/**
*
*
* Takes a file location and stores the VW model in binary format in the file.
*
* @param {string} model_file the path to the file where the model will be saved
*
* @param {string} model_file the path to the file where the model will be saved
*/
saveModelToFile(model_file: string) {
let char_vector = this._instance.getModel();
Expand All @@ -124,9 +124,9 @@ export default new Promise((resolve) => {

/**
* Gets the VW model in binary format as a Uint8Array that can be saved to a file.
* There is no need to delete or free the array returned by this function.
* There is no need to delete or free the array returned by this function.
* If the same array is however used to re-load the model into VW, then the array needs to be stored in wasm memory (see loadModelFromArray)
*
*
* @returns {Uint8Array} the VW model in binary format
*/
getModelAsArray(): Uint8Array {
Expand All @@ -142,9 +142,9 @@ export default new Promise((resolve) => {
}

/**
*
*
* Takes a file location and loads the VW model from the file.
*
*
* @param {string} model_file the path to the file where the model will be loaded from
*/
loadModelFromFile(model_file: string) {
Expand All @@ -159,9 +159,9 @@ export default new Promise((resolve) => {
/**
* Takes a model in an array binary format and loads it into the VW instance.
* The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function.
*
*
* @param {number} model_array_ptr the pre-loaded model's array pointer
* The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function.
* The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function.
* @param {number} model_array_len the pre-loaded model's array length
*/
loadModelFromArray(model_array_ptr: number, model_array_len: number) {
Expand All @@ -186,7 +186,7 @@ export default new Promise((resolve) => {
/**
* Creates a new Vowpal Wabbit workspace.
* Can accept either or both string arguments and a model file.
*
*
* @constructor
* @param {Function} readSync - A function that reads a file synchronously and returns a buffer
* @param {Function} writeSync - A function that writes a buffer to a file synchronously
Expand All @@ -206,20 +206,35 @@ export default new Promise((resolve) => {
}

/**
* Parse a line of text into a VW example.
* The example can then be used for prediction or learning.
* Parse a line of text into a VW example.
* The example can then be used for prediction or learning.
* finishExample() must be called and then delete() on the example, when it is no longer needed.
*
* @param {string} line
*
* @param {string} line
* @returns a parsed vw example that can be used for prediction or learning
*/
parse(line: string): object {
return this._instance.parse(line);
}

/**
* Creates a new example from a dense array of features, where the key of the map is the namespace.
*
* @example
* let example = model.create_example_from_dense({
* my_namespace: [0.3, 0.2, 0.1, 0.3, 0.5, 0.9]
* });
* @param {Map<string, number[]>} features
* @param {string} label Empty label by default
* @returns a parsed vw example that can be used for prediction or learning
*/
createExampleFromDense(features: Map<string, number[]>, label: string = ""): object {
return this._instance.createExampleFromDense(features, label);
}

/**
* Calls vw predict on the example and returns the prediction.
*
*
* @param {object} example returned from parse()
* @returns the prediction with a type corresponding to the reduction that was used
* @throws {VWError} Throws an error if the example is not well defined
Expand All @@ -234,7 +249,7 @@ export default new Promise((resolve) => {

/**
* Calls vw learn on the example and updates the model
*
*
* @param {object} example returned from parse()
* @throws {VWError} Throws an error if the example is not well defined
*/
Expand All @@ -248,7 +263,7 @@ export default new Promise((resolve) => {

/**
* Cleans the example and returns it to the pool of available examples. delete() must also be called on the example object
*
*
* @param {object} example returned from parse()
*/
finishExample(example: object) {
Expand All @@ -266,7 +281,7 @@ export default new Promise((resolve) => {
/**
* Creates a new Vowpal Wabbit workspace for Contextual Bandit exploration algorithms.
* Can accept either or both string arguments and a model file.
*
*
* @constructor
* @param {Function} readSync - A function that reads a file synchronously and returns a buffer
* @param {Function} writeSync - A function that writes a buffer to a file synchronously
Expand All @@ -291,10 +306,10 @@ export default new Promise((resolve) => {
/**
* Takes a CB example and returns an array of (action, score) pairs, representing the probability mass function over the available actions
* The returned pmf can be used with samplePmf to sample an action
*
*
* Example must have the following properties:
* - text_context: a string representing the context
*
*
* @param {object} example the example object that will be used for prediction
* @returns {array} probability mass function, an array of action,score pairs that was returned by predict
* @throws {VWError} Throws an error if the example text_context is missing from the example
Expand All @@ -309,17 +324,17 @@ export default new Promise((resolve) => {

/**
* Takes a CB example and uses it to update the model
*
*
* Example must have the following properties:
* - text_context: a string representing the context
* - labels: an array of label objects (usually one), each label object must have the following properties:
* - action: the action index
* - cost: the cost of the action
* - probability: the probability of the action
*
*
* A label object should have more than one labels only if a reduction that accepts multiple labels was used (e.g. graph_feedback)
*
*
*
*
* @param {object} example the example object that will be used for prediction
* @throws {VWError} Throws an error if the example does not have the required properties to learn
*/
Expand All @@ -335,7 +350,7 @@ export default new Promise((resolve) => {
/**
* Accepts a CB example (in text format) line by line. Once a full CB example is passed in it will call learnFromString.
* This is intended to be used with files that have CB examples, that were logged using logCBExampleToStream and are being read line by line.
*
*
* @param {string} line a string representing a line from a CB example in text Vowpal Wabbit format
*/
addLine(line: string) {
Expand All @@ -351,7 +366,7 @@ export default new Promise((resolve) => {

/**
* Takes a full multiline CB example in text format and uses it to update the model. This is intended to be used with examples that are logged to a file using logCBExampleToStream.
*
*
* @param {string} example a string representing the CB example in text Vowpal Wabbit format
* @throws {Error} Throws an error if the example is an object with a label and/or a text_context
*/
Expand All @@ -368,10 +383,10 @@ export default new Promise((resolve) => {
}

/**
*
*
* Takes an exploration prediction (array of action, score pairs) and returns a single action and score,
* along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
*
*
* @param {array} pmf probability mass function, an array of action,score pairs that was returned by predict
* @returns {object} an object with the following properties:
* - action: the action index that was sampled
Expand All @@ -392,10 +407,10 @@ export default new Promise((resolve) => {
}

/**
*
*
* Takes an exploration prediction (array of action, score pairs) and a unique id that is used to seed the sampling,
* and returns a single action index and the corresponding score.
*
*
* @param {array} pmf probability mass function, an array of action,score pairs that was returned by predict
* @param {string} uuid a unique id that can be used to seed the prediction
* @returns {object} an object with the following properties:
Expand All @@ -415,11 +430,11 @@ export default new Promise((resolve) => {
}

/**
*
*
* Takes an example with a text_context field and calls predict. The prediction (a probability mass function over the available actions)
* will then be sampled from, and only the chosen action index and the corresponding score will be returned,
* along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
*
*
* @param {object} example an example object containing the context to be used during prediction
* @returns {object} an object with the following properties:
* - action: the action index that was sampled
Expand All @@ -439,11 +454,11 @@ export default new Promise((resolve) => {
}

/**
*
*
* Takes an example with a text_context field and calls predict, and a unique id that is used to seed the sampling.
* The prediction (a probability mass function over the available actions) will then be sampled from, and only the chosen action index
* and the corresponding score will be returned, along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
*
*
* @param {object} example an example object containing the context to be used during prediction
* @returns {object} an object with the following properties:
* - action: the action index that was sampled
Expand Down
47 changes: 46 additions & 1 deletion wasm/src/wasm_wrapper.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include "vw/common/text_utils.h"
#include "vw/config/options.h"
#include "vw/core/example.h"
#include "vw/core/learner.h"
#include "vw/core/parse_example.h"
#include "vw/core/parse_primitives.h"
#include "vw/core/parse_regressor.h"
#include "vw/core/parser.h"
#include "vw/core/prediction_type.h"
#include "vw/core/shared_data.h"
#include "vw/core/vw.h"
Expand Down Expand Up @@ -199,6 +201,48 @@ struct vw_model_basic

prediction_type_t get_prediction_type() const { return vw_ptr->l->get_output_prediction_type(); }

std::vector<std::shared_ptr<example_ptr>> create_example_from_dense_features(
const emscripten::val& features, const std::string& label)
{
std::vector<std::shared_ptr<example_ptr>> example_collection;
auto* ex = &VW::get_unused_example(this->vw_ptr.get());

emscripten::val keys = emscripten::val::global("Object").call<emscripten::val>("keys", features);
int length = keys["length"].as<int>();

for (int i = 0; i < length; ++i)
{
auto key = keys[i].as<std::string>();
if (features.hasOwnProperty(key.c_str()))
{
auto values = emscripten::convertJSArrayToNumberVector<float>(features[key]);
auto namespace_hash = VW::hash_space(*this->vw_ptr, key);
auto namespace_slot = key.length() > 0 ? key[0] : ' ';
auto anon_index = 0;
auto& feature_group = ex->feature_space[namespace_slot];
auto it = std::find(ex->indices.begin(), ex->indices.end(), namespace_slot);
if (it == ex->indices.end()) { ex->indices.push_back(namespace_slot); }

feature_group.indices.reserve(feature_group.indices.size() + values.size());
feature_group.values.reserve(feature_group.values.size() + values.size());
for (auto v : values)
{
feature_group.indices.push_back(anon_index++);
feature_group.values.push_back(v);
}
}
}

this->vw_ptr->parser_runtime.example_parser->lbl_parser.default_label(ex->l);
this->vw_ptr->parser_runtime.example_parser->words.clear();
VW::tokenize(' ', label, this->vw_ptr->parser_runtime.example_parser->words);
this->vw_ptr->parser_runtime.example_parser->lbl_parser.parse_label(ex->l, ex->ex_reduction_features,
this->vw_ptr->parser_runtime.example_parser->parser_memory_to_reuse, this->vw_ptr->sd->ldict.get(),
this->vw_ptr->parser_runtime.example_parser->words, this->vw_ptr->logger);
VW::setup_example(*this->vw_ptr, ex);
return {example_ptr::wrap_pooled_example(ex, this->vw_ptr)};
}

std::shared_ptr<vw> vw_ptr;
std::string args;
};
Expand Down Expand Up @@ -491,7 +535,8 @@ EMSCRIPTEN_BINDINGS(vwwasm)
.function("getModel", &vw_model_basic::get_model)
.function("sumLoss", &vw_model_basic::sum_loss)
.function("weightedLabeledExamples", &vw_model_basic::weighted_labeled_examples)
.function("predictionType", &vw_model_basic::get_prediction_type);
.function("predictionType", &vw_model_basic::get_prediction_type)
.function("createExampleFromDense", &vw_model_basic::create_example_from_dense_features);

// Currently this is structured such that parse returns a vector of example but to JS that is opaque.
// All the caller can do is pass this opaque object to the other functions. Is it possible to convert this to a JS
Expand Down
36 changes: 36 additions & 0 deletions wasm/test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -687,4 +687,40 @@ describe('Call WASM VWModule', () => {
example.delete();
model.delete();
});

it("create dense example", () => {
let model = new vw.Workspace({ args_str: "" });
let example = model.createExampleFromDense({
myns: [1, 2, 2],
ns2: [3, 4, 5],
"": [34, 1]
});
let prediction = model.predict(example);

assert.equal(model.predictionType(), vw.Prediction.Type.Scalar);
assert.equal(typeof prediction, "number");
model.finishExample(example);
example.delete();
model.delete();
});

it("create dense example with label", () => {
let model = new vw.Workspace({ args_str: "" });
let example = model.createExampleFromDense({
myns: [1, 2, 2],
ns2: [3, 4, 5],
"": [34, 1]
}, "1");
let prediction = model.predict(example);
model.learn(example);
let prediction2 = model.predict(example);


assert.equal(model.predictionType(), vw.Prediction.Type.Scalar);
assert.equal(typeof prediction, "number");
assert.notEqual(prediction, prediction2);
model.finishExample(example);
example.delete();
model.delete();
});
});

0 comments on commit 677750e

Please sign in to comment.