From 0182155324e21d874cdde0b0bf12132d1100b6f2 Mon Sep 17 00:00:00 2001 From: Antonio Velazquez Date: Wed, 30 Sep 2020 01:09:43 -0700 Subject: [PATCH 1/2] Change the _maxCalibrationExamples default --- src/Microsoft.ML.Data/Prediction/Calibrator.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 375e2a8fb1..8e15da1ec7 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -838,7 +838,9 @@ internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object internal static class CalibratorUtils { // maximum number of rows passed to the calibrator. - private const int _maxCalibrationExamples = 1000000; + // if 0, we'll actually look through the whole dataset to + // when training the calibrator + private const int _maxCalibrationExamples = 0; private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibratorTrainer calibrator, ITrainer trainer, IPredictor predictor, RoleMappedSchema schema) @@ -988,6 +990,10 @@ public static ICalibrator TrainCalibrator(IHostEnvironment env, IChannel ch, ICa caliTrainer.ProcessTrainingExample(score, label > 0, weight); if (maxRows > 0 && ++num >= maxRows) + // If maxRows was 0, we'll process all of the rows in the dataset + // Notice that depending of the calibrator, "processing" means + // only using N random rows of the ones that where processed + // to actually train the calibrator. break; } } From d08a3d81d236f32a0a321c6aa3011330a4457541 Mon Sep 17 00:00:00 2001 From: Antonio Velazquez Date: Wed, 30 Sep 2020 11:19:31 -0700 Subject: [PATCH 2/2] Improving comments --- src/Microsoft.ML.Data/Prediction/Calibrator.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 8e15da1ec7..becf8312df 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -837,9 +837,8 @@ internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object [BestFriend] internal static class CalibratorUtils { - // maximum number of rows passed to the calibrator. - // if 0, we'll actually look through the whole dataset to - // when training the calibrator + // Maximum number of rows to process when training the Calibrator. + // If 0, we'll actually process the whole dataset. private const int _maxCalibrationExamples = 0; private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibratorTrainer calibrator, @@ -991,8 +990,8 @@ public static ICalibrator TrainCalibrator(IHostEnvironment env, IChannel ch, ICa if (maxRows > 0 && ++num >= maxRows) // If maxRows was 0, we'll process all of the rows in the dataset - // Notice that depending of the calibrator, "processing" means - // only using N random rows of the ones that where processed + // Notice that depending on the calibrator, "processing" might mean + // randomly choosing some of the "processed" rows // to actually train the calibrator. break; }