feat: add dense example creation func to wasm (#4615)

* feat: add dense example creation func to wasm * formatting
VowpalWabbit · Jun 14, 2023 · 677750e · 677750e
1 parent b8c4ee3
commit 677750e
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 36 deletions.
diff --git a/wasm/developer_readme.md b/wasm/developer_readme.md
@@ -38,6 +38,7 @@ Make sure Emscripten is activated.
 ```sh
 emcmake cmake --preset wasm -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ext_libs/vcpkg/scripts/buildsystems/vcpkg.cmake
 cmake --build build --target vw-wasm
+npm run build
 ```
 
 ### Test

diff --git a/wasm/src/vw.ts b/wasm/src/vw.ts
@@ -94,18 +94,18 @@ export default new Promise((resolve) => {
 
             /**
              * The current total sum of the progressive validation loss
-             * 
+             *
              * @returns {number} the sum of all losses accumulated by the model
              */
             sumLoss(): number {
                 return this._instance.sumLoss();
             }
 
             /**
-             * 
+             *
              * Takes a file location and stores the VW model in binary format in the file.
-             * 
-             * @param {string} model_file the path to the file where the model will be saved 
+             *
+             * @param {string} model_file the path to the file where the model will be saved
              */
             saveModelToFile(model_file: string) {
                 let char_vector = this._instance.getModel();
@@ -124,9 +124,9 @@ export default new Promise((resolve) => {
 
             /**
              * Gets the VW model in binary format as a Uint8Array that can be saved to a file.
-             * There is no need to delete or free the array returned by this function. 
+             * There is no need to delete or free the array returned by this function.
              * If the same array is however used to re-load the model into VW, then the array needs to be stored in wasm memory (see loadModelFromArray)
-             * 
+             *
              * @returns {Uint8Array} the VW model in binary format
              */
             getModelAsArray(): Uint8Array {
@@ -142,9 +142,9 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * 
+             *
              * Takes a file location and loads the VW model from the file.
-             * 
+             *
              * @param {string} model_file the path to the file where the model will be loaded from
              */
             loadModelFromFile(model_file: string) {
@@ -159,9 +159,9 @@ export default new Promise((resolve) => {
             /**
              * Takes a model in an array binary format and loads it into the VW instance.
              * The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function.
-             * 
+             *
              * @param {number} model_array_ptr the pre-loaded model's array pointer
-             *  The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function. 
+             *  The memory must be allocated via the WebAssembly module's _malloc function and should later be freed via the _free function.
              * @param {number} model_array_len the pre-loaded model's array length
              */
             loadModelFromArray(model_array_ptr: number, model_array_len: number) {
@@ -186,7 +186,7 @@ export default new Promise((resolve) => {
             /**
              * Creates a new Vowpal Wabbit workspace.
              * Can accept either or both string arguments and a model file.
-             * 
+             *
              * @constructor
              * @param {Function} readSync - A function that reads a file synchronously and returns a buffer
              * @param {Function} writeSync - A function that writes a buffer to a file synchronously
@@ -206,20 +206,35 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * Parse a line of text into a VW example. 
-             * The example can then be used for prediction or learning. 
+             * Parse a line of text into a VW example.
+             * The example can then be used for prediction or learning.
              * finishExample() must be called and then delete() on the example, when it is no longer needed.
-             * 
-             * @param {string} line 
+             *
+             * @param {string} line
              * @returns a parsed vw example that can be used for prediction or learning
              */
             parse(line: string): object {
                 return this._instance.parse(line);
             }
 
+            /**
+             * Creates a new example from a dense array of features, where the key of the map is the namespace.
+             *
+             * @example
+             * let example = model.create_example_from_dense({
+             *     my_namespace: [0.3, 0.2, 0.1, 0.3, 0.5, 0.9]
+             * });
+             * @param {Map<string, number[]>} features
+             * @param {string} label Empty label by default
+             * @returns a parsed vw example that can be used for prediction or learning
+             */
+            createExampleFromDense(features: Map<string, number[]>, label: string = ""): object {
+                return this._instance.createExampleFromDense(features, label);
+            }
+
             /**
              * Calls vw predict on the example and returns the prediction.
-             * 
+             *
              * @param {object} example returned from parse()
              * @returns the prediction with a type corresponding to the reduction that was used
              * @throws {VWError} Throws an error if the example is not well defined
@@ -234,7 +249,7 @@ export default new Promise((resolve) => {
 
             /**
              * Calls vw learn on the example and updates the model
-             * 
+             *
              * @param {object} example returned from parse()
              * @throws {VWError} Throws an error if the example is not well defined
              */
@@ -248,7 +263,7 @@ export default new Promise((resolve) => {
 
             /**
              * Cleans the example and returns it to the pool of available examples. delete() must also be called on the example object
-             * 
+             *
              * @param {object} example returned from parse()
              */
             finishExample(example: object) {
@@ -266,7 +281,7 @@ export default new Promise((resolve) => {
             /**
              * Creates a new Vowpal Wabbit workspace for Contextual Bandit exploration algorithms.
              * Can accept either or both string arguments and a model file.
-             * 
+             *
              * @constructor
              * @param {Function} readSync - A function that reads a file synchronously and returns a buffer
              * @param {Function} writeSync - A function that writes a buffer to a file synchronously
@@ -291,10 +306,10 @@ export default new Promise((resolve) => {
             /**
              * Takes a CB example and returns an array of (action, score) pairs, representing the probability mass function over the available actions
              * The returned pmf can be used with samplePmf to sample an action
-             * 
+             *
              * Example must have the following properties:
              * - text_context: a string representing the context
-             * 
+             *
              * @param {object} example the example object that will be used for prediction
              * @returns {array} probability mass function, an array of action,score pairs that was returned by predict
              * @throws {VWError} Throws an error if the example text_context is missing from the example
@@ -309,17 +324,17 @@ export default new Promise((resolve) => {
 
             /**
              * Takes a CB example and uses it to update the model
-             * 
+             *
              * Example must have the following properties:
              * - text_context: a string representing the context
              * - labels: an array of label objects (usually one), each label object must have the following properties:
              *  - action: the action index
              *  - cost: the cost of the action
              *  - probability: the probability of the action
-             * 
+             *
              * A label object should have more than one labels only if a reduction that accepts multiple labels was used (e.g. graph_feedback)
-             * 
-             * 
+             *
+             *
              * @param {object} example the example object that will be used for prediction
              * @throws {VWError} Throws an error if the example does not have the required properties to learn
              */
@@ -335,7 +350,7 @@ export default new Promise((resolve) => {
             /**
              * Accepts a CB example (in text format) line by line. Once a full CB example is passed in it will call learnFromString.
              * This is intended to be used with files that have CB examples, that were logged using logCBExampleToStream and are being read line by line.
-             * 
+             *
              * @param {string} line a string representing a line from a CB example in text Vowpal Wabbit format
              */
             addLine(line: string) {
@@ -351,7 +366,7 @@ export default new Promise((resolve) => {
 
             /**
              * Takes a full multiline CB example in text format and uses it to update the model. This is intended to be used with examples that are logged to a file using logCBExampleToStream.
-             * 
+             *
              * @param {string} example a string representing the CB example in text Vowpal Wabbit format
              * @throws {Error} Throws an error if the example is an object with a label and/or a text_context
              */
@@ -368,10 +383,10 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * 
+             *
              * Takes an exploration prediction (array of action, score pairs) and returns a single action and score,
              * along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
-             * 
+             *
              * @param {array} pmf probability mass function, an array of action,score pairs that was returned by predict
              * @returns {object} an object with the following properties:
              * - action: the action index that was sampled
@@ -392,10 +407,10 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * 
+             *
              * Takes an exploration prediction (array of action, score pairs) and a unique id that is used to seed the sampling,
              * and returns a single action index and the corresponding score.
-             * 
+             *
              * @param {array} pmf probability mass function, an array of action,score pairs that was returned by predict
              * @param {string} uuid a unique id that can be used to seed the prediction
              * @returns {object} an object with the following properties:
@@ -415,11 +430,11 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * 
+             *
              * Takes an example with a text_context field and calls predict. The prediction (a probability mass function over the available actions)
              * will then be sampled from, and only the chosen action index and the corresponding score will be returned,
              * along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
-             * 
+             *
              * @param {object} example an example object containing the context to be used during prediction
              * @returns {object} an object with the following properties:
              * - action: the action index that was sampled
@@ -439,11 +454,11 @@ export default new Promise((resolve) => {
             }
 
             /**
-             * 
+             *
              * Takes an example with a text_context field and calls predict, and a unique id that is used to seed the sampling.
              * The prediction (a probability mass function over the available actions) will then be sampled from, and only the chosen action index
              * and the corresponding score will be returned, along with a unique id that was used to seed the sampling and that can be used to track and reproduce the sampling.
-             * 
+             *
              * @param {object} example an example object containing the context to be used during prediction
              * @returns {object} an object with the following properties:
              * - action: the action index that was sampled

diff --git a/wasm/src/wasm_wrapper.cc b/wasm/src/wasm_wrapper.cc
@@ -1,9 +1,11 @@
+#include "vw/common/text_utils.h"
 #include "vw/config/options.h"
 #include "vw/core/example.h"
 #include "vw/core/learner.h"
 #include "vw/core/parse_example.h"
 #include "vw/core/parse_primitives.h"
 #include "vw/core/parse_regressor.h"
+#include "vw/core/parser.h"
 #include "vw/core/prediction_type.h"
 #include "vw/core/shared_data.h"
 #include "vw/core/vw.h"
@@ -199,6 +201,48 @@ struct vw_model_basic
 
   prediction_type_t get_prediction_type() const { return vw_ptr->l->get_output_prediction_type(); }
 
+  std::vector<std::shared_ptr<example_ptr>> create_example_from_dense_features(
+      const emscripten::val& features, const std::string& label)
+  {
+    std::vector<std::shared_ptr<example_ptr>> example_collection;
+    auto* ex = &VW::get_unused_example(this->vw_ptr.get());
+
+    emscripten::val keys = emscripten::val::global("Object").call<emscripten::val>("keys", features);
+    int length = keys["length"].as<int>();
+
+    for (int i = 0; i < length; ++i)
+    {
+      auto key = keys[i].as<std::string>();
+      if (features.hasOwnProperty(key.c_str()))
+      {
+        auto values = emscripten::convertJSArrayToNumberVector<float>(features[key]);
+        auto namespace_hash = VW::hash_space(*this->vw_ptr, key);
+        auto namespace_slot = key.length() > 0 ? key[0] : ' ';
+        auto anon_index = 0;
+        auto& feature_group = ex->feature_space[namespace_slot];
+        auto it = std::find(ex->indices.begin(), ex->indices.end(), namespace_slot);
+        if (it == ex->indices.end()) { ex->indices.push_back(namespace_slot); }
+
+        feature_group.indices.reserve(feature_group.indices.size() + values.size());
+        feature_group.values.reserve(feature_group.values.size() + values.size());
+        for (auto v : values)
+        {
+          feature_group.indices.push_back(anon_index++);
+          feature_group.values.push_back(v);
+        }
+      }
+    }
+
+    this->vw_ptr->parser_runtime.example_parser->lbl_parser.default_label(ex->l);
+    this->vw_ptr->parser_runtime.example_parser->words.clear();
+    VW::tokenize(' ', label, this->vw_ptr->parser_runtime.example_parser->words);
+    this->vw_ptr->parser_runtime.example_parser->lbl_parser.parse_label(ex->l, ex->ex_reduction_features,
+        this->vw_ptr->parser_runtime.example_parser->parser_memory_to_reuse, this->vw_ptr->sd->ldict.get(),
+        this->vw_ptr->parser_runtime.example_parser->words, this->vw_ptr->logger);
+    VW::setup_example(*this->vw_ptr, ex);
+    return {example_ptr::wrap_pooled_example(ex, this->vw_ptr)};
+  }
+
   std::shared_ptr<vw> vw_ptr;
   std::string args;
 };
@@ -491,7 +535,8 @@ EMSCRIPTEN_BINDINGS(vwwasm)
       .function("getModel", &vw_model_basic::get_model)
       .function("sumLoss", &vw_model_basic::sum_loss)
       .function("weightedLabeledExamples", &vw_model_basic::weighted_labeled_examples)
-      .function("predictionType", &vw_model_basic::get_prediction_type);
+      .function("predictionType", &vw_model_basic::get_prediction_type)
+      .function("createExampleFromDense", &vw_model_basic::create_example_from_dense_features);
 
   // Currently this is structured such that parse returns a vector of example but to JS that is opaque.
   // All the caller can do is pass this opaque object to the other functions. Is it possible to convert this to a JS

diff --git a/wasm/test/test.js b/wasm/test/test.js
@@ -687,4 +687,40 @@ describe('Call WASM VWModule', () => {
         example.delete();
         model.delete();
     });
+
+    it("create dense example", () => {
+      let model = new vw.Workspace({ args_str: "" });
+      let example = model.createExampleFromDense({
+        myns: [1, 2, 2],
+        ns2: [3, 4, 5],
+        "": [34, 1]
+      });
+      let prediction = model.predict(example);
+
+      assert.equal(model.predictionType(), vw.Prediction.Type.Scalar);
+      assert.equal(typeof prediction, "number");
+      model.finishExample(example);
+      example.delete();
+      model.delete();
+    });
+
+    it("create dense example with label", () => {
+        let model = new vw.Workspace({ args_str: "" });
+        let example = model.createExampleFromDense({
+          myns: [1, 2, 2],
+          ns2: [3, 4, 5],
+          "": [34, 1]
+        }, "1");
+        let prediction = model.predict(example);
+        model.learn(example);
+        let prediction2 = model.predict(example);
+
+
+        assert.equal(model.predictionType(), vw.Prediction.Type.Scalar);
+        assert.equal(typeof prediction, "number");
+        assert.notEqual(prediction, prediction2);
+        model.finishExample(example);
+        example.delete();
+        model.delete();
+      });
 });