From 2004e7205b751b976f2a19b09f8c723e8c62412f Mon Sep 17 00:00:00 2001
From: Jose Orlando <joseorlando182@hotmail.com>
Date: Thu, 29 Feb 2024 18:54:18 +0000
Subject: [PATCH 1/3] docs: Fix typo in Command Line Basics tutorial (#4679)

* docs: Fix typo in "Command Line Basics" tutorial

* Update cmd_first_steps.md

---------

Co-authored-by: Griffin Bassman <griffinbassman@gmail.com>
Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com>
---
 python/docs/source/tutorials/cmd_first_steps.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/docs/source/tutorials/cmd_first_steps.md b/python/docs/source/tutorials/cmd_first_steps.md
index 9bb0c502dbd..b37d28ea012 100644
--- a/python/docs/source/tutorials/cmd_first_steps.md
+++ b/python/docs/source/tutorials/cmd_first_steps.md
@@ -116,6 +116,6 @@ The model predicted a value of **0**. This result means our house will not need
 ## More to explore
 
 - See [Python tutorial](python_first_steps.ipynb) for a quick introduction to the basics of training and testing your model.
-- To learn more about how to approach a contextual bandits problem using  tVowpal Wabbit — including how to  work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
+- To learn more about how to approach a contextual bandits problem using Vowpal Wabbit — including how to work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
 - For more on the contextual bandits approach to reinforcement learning, including a content personalization scenario, see the [Contextual Bandit Simulation Tutorial](python_Simulating_a_news_personalization_scenario_using_Contextual_Bandits.ipynb).
 - See the [Linear Regression Tutorial](cmd_linear_regression.md) for a different look at the roof replacement problem and learn more about Vowpal Wabbit's format and understanding the results.

From 80e832fb3ab30bf092fae43ec898fd307b8a50c0 Mon Sep 17 00:00:00 2001
From: beygel <beygel@users.noreply.github.com>
Date: Thu, 7 Mar 2024 08:55:02 -0500
Subject: [PATCH 2/3] feat: direct interface for active.cc and variable rename
 for understandability (#4671)

* updated active.cc

* updates to active.cc

* updates to tests

* revert accidental help change in diff

* removed diagnostic print statements

---------

Co-authored-by: Alina Beygelzimer <beygel@yahooinc.com>
Co-authored-by: Alexey Taymanov <41013086+ataymano@users.noreply.github.com>
---
 .../ref/active-simulation.t24.stderr          | 19 ++---
 test/train-sets/ref/help.stdout               |  8 +-
 vowpalwabbit/core/src/reductions/active.cc    | 83 +++++++++++++++----
 3 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/test/train-sets/ref/active-simulation.t24.stderr b/test/train-sets/ref/active-simulation.t24.stderr
index 8394160e69a..29f2786913e 100644
--- a/test/train-sets/ref/active-simulation.t24.stderr
+++ b/test/train-sets/ref/active-simulation.t24.stderr
@@ -11,20 +11,13 @@ Output pred = SCALAR
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
 1.000000 1.000000            1            1.0        -1.0000         0.0000      128
-0.791125 0.755288            2            6.8        -1.0000        -0.1309       44
-1.274829 1.444750            8           26.3         1.0000        -0.2020       34
-1.083985 0.895011           73           52.8         1.0000         0.0214       21
-0.887295 0.693362          130          106.3        -1.0000        -0.3071      146
-0.788245 0.690009          233          213.6        -1.0000         0.0421       47
-0.664628 0.541195          398          427.4        -1.0000        -0.1863       68
-0.634406 0.604328          835          856.9        -1.0000        -0.4327       40
 
 finished run
 number of examples = 1000
-weighted example sum = 1014.004519
-weighted label sum = -68.618036
-average loss = 0.630964
-best constant = -0.067670
-best constant's loss = 0.995421
+weighted example sum = 1.000000
+weighted label sum = -1.000000
+average loss = 1.000000
+best constant = -1.000000
+best constant's loss = 0.000000
 total feature number = 78739
-total queries = 474
+total queries = 1
diff --git a/test/train-sets/ref/help.stdout b/test/train-sets/ref/help.stdout
index b9d4fca2f7b..96601833d2e 100644
--- a/test/train-sets/ref/help.stdout
+++ b/test/train-sets/ref/help.stdout
@@ -221,8 +221,12 @@ Weight Options:
 [Reduction] Active Learning Options:
     --active                                Enable active learning (type: bool, keep, necessary)
     --simulation                            Active learning simulation mode (type: bool)
-    --mellowness arg                        Active learning mellowness parameter c_0. Default 8 (type: float,
-                                            default: 8, keep)
+    --direct                                Active learning via the tag and predictions interface. Tag should
+                                            start with "query?" to get query decision. Returned prediction
+                                            is either -1 for no or the importance weight for yes. (type:
+                                            bool)
+    --mellowness arg                        Active learning mellowness parameter c_0. Default 1. (type: float,
+                                            default: 1, keep)
 [Reduction] Active Learning with Cover Options:
     --active_cover                          Enable active learning with cover (type: bool, keep, necessary)
     --mellowness arg                        Active learning mellowness parameter c_0 (type: float, default:
diff --git a/vowpalwabbit/core/src/reductions/active.cc b/vowpalwabbit/core/src/reductions/active.cc
index ea8b66c40e4..a7449affde2 100644
--- a/vowpalwabbit/core/src/reductions/active.cc
+++ b/vowpalwabbit/core/src/reductions/active.cc
@@ -31,31 +31,41 @@ using namespace VW::config;
 using namespace VW::reductions;
 namespace
 {
-float get_active_coin_bias(float k, float avg_loss, float g, float c0)
-{
-  const float b = c0 * (std::log(k + 1.f) + 0.0001f) / (k + 0.0001f);
-  const float sb = std::sqrt(b);
+float get_active_coin_bias(float example_count, float avg_loss, float alt_label_error_rate_diff, float mellowness)
+{//implementation follows https://web.archive.org/web/20120525164352/http://books.nips.cc/papers/files/nips23/NIPS2010_0363.pdf
+  const float mellow_log_e_count_over_e_count = mellowness * (std::log(example_count + 1.f) + 0.0001f) / (example_count + 0.0001f);
+  const float sqrt_mellow_lecoec = std::sqrt(mellow_log_e_count_over_e_count);
   // loss should be in [0,1]
   avg_loss = VW::math::clamp(avg_loss, 0.f, 1.f);
 
-  const float sl = std::sqrt(avg_loss) + std::sqrt(avg_loss + g);
-  if (g <= sb * sl + b) { return 1; }
-  const float rs = (sl + std::sqrt(sl * sl + 4 * g)) / (2 * g);
-  return b * rs * rs;
+  const float sqrt_avg_loss_plus_sqrt_alt_loss = std::min(1.f, //std::sqrt(avg_loss) + // commented out because two square roots appears to conservative.
+							  std::sqrt(avg_loss + alt_label_error_rate_diff));//emperical variance deflater.
+  //std::cout << "example_count = " << example_count << " avg_loss = " << avg_loss << " alt_label_error_rate_diff = " << alt_label_error_rate_diff << " mellowness = " << mellowness << " mlecoc = " << mellow_log_e_count_over_e_count
+  //	    << " sqrt_mellow_lecoec = " << sqrt_mellow_lecoec << " double sqrt = " << sqrt_avg_loss_plus_sqrt_alt_loss << std::endl;
+
+  if (alt_label_error_rate_diff <= sqrt_mellow_lecoec * sqrt_avg_loss_plus_sqrt_alt_loss//deflater in use.
+      + mellow_log_e_count_over_e_count) { return 1; }
+  //old equation
+  //  const float rs = (sqrt_avg_loss_plus_sqrt_alt_loss + std::sqrt(sqrt_avg_loss_plus_sqrt_alt_loss * sqrt_avg_loss_plus_sqrt_alt_loss + 4 * alt_label_error_rate_diff)) / (2 * alt_label_error_rate_diff);
+  //  return mellow_log_e_count_over_e_count * rs * rs;
+  const float sqrt_s = (sqrt_mellow_lecoec + std::sqrt(mellow_log_e_count_over_e_count+4*alt_label_error_rate_diff*mellow_log_e_count_over_e_count)) / 2*alt_label_error_rate_diff;
+  //  std::cout << "sqrt_s = " << sqrt_s << std::endl;
+  return sqrt_s*sqrt_s;
 }
 
-float query_decision(const active& a, float ec_revert_weight, float k)
+float query_decision(const active& a, float updates_to_change_prediction, float example_count)
 {
   float bias;
-  if (k <= 1.f) { bias = 1.f; }
+  if (example_count <= 1.f) { bias = 1.f; }
   else
   {
-    const auto weighted_queries = static_cast<float>(a._shared_data->weighted_labeled_examples);
-    const float avg_loss = (static_cast<float>(a._shared_data->sum_loss) / k) +
-        std::sqrt((1.f + 0.5f * std::log(k)) / (weighted_queries + 0.0001f));
-    bias = get_active_coin_bias(k, avg_loss, ec_revert_weight / k, a.active_c0);
+    //    const auto weighted_queries = static_cast<float>(a._shared_data->weighted_labeled_examples);
+    const float avg_loss = (static_cast<float>(a._shared_data->sum_loss) / example_count);
+      //+ std::sqrt((1.f + 0.5f * std::log(example_count)) / (weighted_queries + 0.0001f));  Commented this out, not following why we need it from the theory.
+    //    std::cout << "avg_loss = " << avg_loss << " weighted_queries = " << weighted_queries << " sum_loss = " << a._shared_data->sum_loss << " example_count = " << example_count << std::endl;
+    bias = get_active_coin_bias(example_count, avg_loss, updates_to_change_prediction / example_count, a.active_c0);
   }
-
+  //  std::cout << "bias = " << bias << std::endl;
   return (a._random_state->get_and_update_random() < bias) ? 1.f / bias : -1.f;
 }
 
@@ -110,6 +120,34 @@ void predict_or_learn_active(active& a, learner& base, VW::example& ec)
   }
 }
 
+template <bool is_learn>
+void predict_or_learn_active_direct(active& a, learner& base, VW::example& ec)
+{ 
+  if (is_learn) { base.learn(ec); }
+  else { base.predict(ec); }
+    
+  if (ec.l.simple.label == FLT_MAX)
+  {
+    if (std::string(ec.tag.begin(), ec.tag.begin()+6) == "query?")
+    { 
+      const float threshold = (a._shared_data->max_label + a._shared_data->min_label) * 0.5f;
+      // We want to understand the change in prediction if the label were to be
+      // the opposite of what was predicted. 0 and 1 are used for the expected min
+      // and max labels to be coming in from the active interactor.
+      ec.l.simple.label = (ec.pred.scalar >= threshold) ? a._min_seen_label : a._max_seen_label;
+      ec.confidence = std::abs(ec.pred.scalar - threshold) / base.sensitivity(ec);
+      ec.l.simple.label = FLT_MAX;
+      ec.pred.scalar = query_decision(a, ec.confidence, static_cast<float>(a._shared_data->weighted_unlabeled_examples));
+    }
+  }
+  else
+  { 
+    // Update seen labels based on the current example's label.
+    a._min_seen_label = std::min(ec.l.simple.label, a._min_seen_label);
+    a._max_seen_label = std::max(ec.l.simple.label, a._max_seen_label);
+  } 
+}   
+
 void active_print_result(
     VW::io::writer* f, float res, float weight, const VW::v_array<char>& tag, VW::io::logger& logger)
 {
@@ -189,14 +227,16 @@ std::shared_ptr<VW::LEARNER::learner> VW::reductions::active_setup(VW::setup_bas
 
   bool active_option = false;
   bool simulation = false;
+  bool direct = false;
   float active_c0;
   option_group_definition new_options("[Reduction] Active Learning");
   new_options.add(make_option("active", active_option).keep().necessary().help("Enable active learning"))
       .add(make_option("simulation", simulation).help("Active learning simulation mode"))
+      .add(make_option("direct", direct).help("Active learning via the tag and predictions interface.  Tag should start with \"query?\" to get query decision.  Returned prediction is either -1 for no or the importance weight for yes."))
       .add(make_option("mellowness", active_c0)
                .keep()
-               .default_value(8.f)
-               .help("Active learning mellowness parameter c_0. Default 8"));
+               .default_value(1.f)
+               .help("Active learning mellowness parameter c_0. Default 1."));
 
   if (!options.add_parse_and_check_necessary(new_options)) { return nullptr; }
 
@@ -223,6 +263,15 @@ std::shared_ptr<VW::LEARNER::learner> VW::reductions::active_setup(VW::setup_bas
     print_update_func = VW::details::print_update_simple_label<active>;
     reduction_name.append("-simulation");
   }
+  else if (direct)
+  {
+    learn_func = predict_or_learn_active_direct<true>;
+    pred_func = predict_or_learn_active_direct<false>;
+    update_stats_func = update_stats_active;
+    output_example_prediction_func = VW::details::output_example_prediction_simple_label<active>;
+    print_update_func = VW::details::print_update_simple_label<active>;
+    learn_returns_prediction = base->learn_returns_prediction;
+  }
   else
   {
     all.reduction_state.active = true;

From 9837a0e937fca2758383dc96f028124251079b50 Mon Sep 17 00:00:00 2001
From: Alexey Taymanov <41013086+ataymano@users.noreply.github.com>
Date: Fri, 8 Mar 2024 16:20:46 -0500
Subject: [PATCH 3/3] style: forgotten lint fix (#4688)

* lint

* undo fake commit

* undo fake commit

* forgotten space
---
 vowpalwabbit/core/src/reductions/active.cc | 67 ++++++++++++++--------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/vowpalwabbit/core/src/reductions/active.cc b/vowpalwabbit/core/src/reductions/active.cc
index a7449affde2..b41717a60df 100644
--- a/vowpalwabbit/core/src/reductions/active.cc
+++ b/vowpalwabbit/core/src/reductions/active.cc
@@ -32,25 +32,37 @@ using namespace VW::reductions;
 namespace
 {
 float get_active_coin_bias(float example_count, float avg_loss, float alt_label_error_rate_diff, float mellowness)
-{//implementation follows https://web.archive.org/web/20120525164352/http://books.nips.cc/papers/files/nips23/NIPS2010_0363.pdf
-  const float mellow_log_e_count_over_e_count = mellowness * (std::log(example_count + 1.f) + 0.0001f) / (example_count + 0.0001f);
+{  // implementation follows
+   // https://web.archive.org/web/20120525164352/http://books.nips.cc/papers/files/nips23/NIPS2010_0363.pdf
+  const float mellow_log_e_count_over_e_count =
+      mellowness * (std::log(example_count + 1.f) + 0.0001f) / (example_count + 0.0001f);
   const float sqrt_mellow_lecoec = std::sqrt(mellow_log_e_count_over_e_count);
   // loss should be in [0,1]
   avg_loss = VW::math::clamp(avg_loss, 0.f, 1.f);
 
-  const float sqrt_avg_loss_plus_sqrt_alt_loss = std::min(1.f, //std::sqrt(avg_loss) + // commented out because two square roots appears to conservative.
-							  std::sqrt(avg_loss + alt_label_error_rate_diff));//emperical variance deflater.
-  //std::cout << "example_count = " << example_count << " avg_loss = " << avg_loss << " alt_label_error_rate_diff = " << alt_label_error_rate_diff << " mellowness = " << mellowness << " mlecoc = " << mellow_log_e_count_over_e_count
-  //	    << " sqrt_mellow_lecoec = " << sqrt_mellow_lecoec << " double sqrt = " << sqrt_avg_loss_plus_sqrt_alt_loss << std::endl;
-
-  if (alt_label_error_rate_diff <= sqrt_mellow_lecoec * sqrt_avg_loss_plus_sqrt_alt_loss//deflater in use.
-      + mellow_log_e_count_over_e_count) { return 1; }
-  //old equation
-  //  const float rs = (sqrt_avg_loss_plus_sqrt_alt_loss + std::sqrt(sqrt_avg_loss_plus_sqrt_alt_loss * sqrt_avg_loss_plus_sqrt_alt_loss + 4 * alt_label_error_rate_diff)) / (2 * alt_label_error_rate_diff);
-  //  return mellow_log_e_count_over_e_count * rs * rs;
-  const float sqrt_s = (sqrt_mellow_lecoec + std::sqrt(mellow_log_e_count_over_e_count+4*alt_label_error_rate_diff*mellow_log_e_count_over_e_count)) / 2*alt_label_error_rate_diff;
+  const float sqrt_avg_loss_plus_sqrt_alt_loss =
+      std::min(1.f,  // std::sqrt(avg_loss) + // commented out because two square roots appears to conservative.
+          std::sqrt(avg_loss + alt_label_error_rate_diff));  // emperical variance deflater.
+  // std::cout << "example_count = " << example_count << " avg_loss = " << avg_loss << " alt_label_error_rate_diff = "
+  // << alt_label_error_rate_diff << " mellowness = " << mellowness << " mlecoc = " << mellow_log_e_count_over_e_count
+  //	    << " sqrt_mellow_lecoec = " << sqrt_mellow_lecoec << " double sqrt = " << sqrt_avg_loss_plus_sqrt_alt_loss
+  //<< std::endl;
+
+  if (alt_label_error_rate_diff <= sqrt_mellow_lecoec * sqrt_avg_loss_plus_sqrt_alt_loss  // deflater in use.
+          + mellow_log_e_count_over_e_count)
+  {
+    return 1;
+  }
+  // old equation
+  //   const float rs = (sqrt_avg_loss_plus_sqrt_alt_loss + std::sqrt(sqrt_avg_loss_plus_sqrt_alt_loss *
+  //   sqrt_avg_loss_plus_sqrt_alt_loss + 4 * alt_label_error_rate_diff)) / (2 * alt_label_error_rate_diff); return
+  //   mellow_log_e_count_over_e_count * rs * rs;
+  const float sqrt_s = (sqrt_mellow_lecoec +
+                           std::sqrt(mellow_log_e_count_over_e_count +
+                               4 * alt_label_error_rate_diff * mellow_log_e_count_over_e_count)) /
+      2 * alt_label_error_rate_diff;
   //  std::cout << "sqrt_s = " << sqrt_s << std::endl;
-  return sqrt_s*sqrt_s;
+  return sqrt_s * sqrt_s;
 }
 
 float query_decision(const active& a, float updates_to_change_prediction, float example_count)
@@ -61,8 +73,10 @@ float query_decision(const active& a, float updates_to_change_prediction, float
   {
     //    const auto weighted_queries = static_cast<float>(a._shared_data->weighted_labeled_examples);
     const float avg_loss = (static_cast<float>(a._shared_data->sum_loss) / example_count);
-      //+ std::sqrt((1.f + 0.5f * std::log(example_count)) / (weighted_queries + 0.0001f));  Commented this out, not following why we need it from the theory.
-    //    std::cout << "avg_loss = " << avg_loss << " weighted_queries = " << weighted_queries << " sum_loss = " << a._shared_data->sum_loss << " example_count = " << example_count << std::endl;
+    //+ std::sqrt((1.f + 0.5f * std::log(example_count)) / (weighted_queries + 0.0001f));  Commented this out, not
+    // following why we need it from the theory.
+    //    std::cout << "avg_loss = " << avg_loss << " weighted_queries = " << weighted_queries << " sum_loss = " <<
+    //    a._shared_data->sum_loss << " example_count = " << example_count << std::endl;
     bias = get_active_coin_bias(example_count, avg_loss, updates_to_change_prediction / example_count, a.active_c0);
   }
   //  std::cout << "bias = " << bias << std::endl;
@@ -122,14 +136,14 @@ void predict_or_learn_active(active& a, learner& base, VW::example& ec)
 
 template <bool is_learn>
 void predict_or_learn_active_direct(active& a, learner& base, VW::example& ec)
-{ 
+{
   if (is_learn) { base.learn(ec); }
   else { base.predict(ec); }
-    
+
   if (ec.l.simple.label == FLT_MAX)
   {
-    if (std::string(ec.tag.begin(), ec.tag.begin()+6) == "query?")
-    { 
+    if (std::string(ec.tag.begin(), ec.tag.begin() + 6) == "query?")
+    {
       const float threshold = (a._shared_data->max_label + a._shared_data->min_label) * 0.5f;
       // We want to understand the change in prediction if the label were to be
       // the opposite of what was predicted. 0 and 1 are used for the expected min
@@ -137,16 +151,17 @@ void predict_or_learn_active_direct(active& a, learner& base, VW::example& ec)
       ec.l.simple.label = (ec.pred.scalar >= threshold) ? a._min_seen_label : a._max_seen_label;
       ec.confidence = std::abs(ec.pred.scalar - threshold) / base.sensitivity(ec);
       ec.l.simple.label = FLT_MAX;
-      ec.pred.scalar = query_decision(a, ec.confidence, static_cast<float>(a._shared_data->weighted_unlabeled_examples));
+      ec.pred.scalar =
+          query_decision(a, ec.confidence, static_cast<float>(a._shared_data->weighted_unlabeled_examples));
     }
   }
   else
-  { 
+  {
     // Update seen labels based on the current example's label.
     a._min_seen_label = std::min(ec.l.simple.label, a._min_seen_label);
     a._max_seen_label = std::max(ec.l.simple.label, a._max_seen_label);
-  } 
-}   
+  }
+}
 
 void active_print_result(
     VW::io::writer* f, float res, float weight, const VW::v_array<char>& tag, VW::io::logger& logger)
@@ -232,7 +247,9 @@ std::shared_ptr<VW::LEARNER::learner> VW::reductions::active_setup(VW::setup_bas
   option_group_definition new_options("[Reduction] Active Learning");
   new_options.add(make_option("active", active_option).keep().necessary().help("Enable active learning"))
       .add(make_option("simulation", simulation).help("Active learning simulation mode"))
-      .add(make_option("direct", direct).help("Active learning via the tag and predictions interface.  Tag should start with \"query?\" to get query decision.  Returned prediction is either -1 for no or the importance weight for yes."))
+      .add(make_option("direct", direct)
+               .help("Active learning via the tag and predictions interface.  Tag should start with \"query?\" to get "
+                     "query decision.  Returned prediction is either -1 for no or the importance weight for yes."))
       .add(make_option("mellowness", active_c0)
                .keep()
                .default_value(1.f)