Skip to content

Commit

Permalink
TrainTestSplit should be inside MLContext.Data (#2907)
Browse files Browse the repository at this point in the history
* TrainTestSplit should be inside MLContext.Data

* fix md files
  • Loading branch information
ganik authored Mar 11, 2019
1 parent 005fe05 commit cd333c5
Show file tree
Hide file tree
Showing 47 changed files with 155 additions and 157 deletions.
2 changes: 1 addition & 1 deletion docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ var pipeline =
.Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());

// Split the data 90:10 into train and test sets, train and evaluate.
var split = mlContext.MulticlassClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
2 changes: 1 addition & 1 deletion docs/code/experimental/MlNetCookBookStaticApi.md
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ var learningPipeline = loader.MakeNewEstimator()
Predictions: mlContext.MulticlassClassification.Trainers.Sdca(r.Label, r.Features)));

// Split the data 90:10 into train and test sets, train and evaluate.
var (trainData, testData) = mlContext.MulticlassClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Train the model.
var model = learningPipeline.Fit(trainData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static void Example()

IDataView data = loader.Load(dataFilePath);

var split = ml.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
var split = ml.Data.TrainTestSplit(data, testFraction: 0.2);

var pipeline = ml.Transforms.Concatenate("Text", "workclass", "education", "marital-status",
"relationship", "ethnicity", "sex", "native-country")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new AveragedPerceptronTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public static void Example()
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1);

// Create the Estimator.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1);

// Create the pipeline with LightGbm Estimator using advanced options.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new SdcaBinaryTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescentNonCalibrated();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new SgdBinaryTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(labelColumnName: "IsOver50K", numberOfIterations: 25);
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.SymbolicStochasticGradientDescentClassificationTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static void Example()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.5);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static void Example()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.5);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ public static void Example()

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
// the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ public static void Example()

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
// the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create the estimator, here we only need LightGbm trainer
// as data is already processed in a form consumable by the trainer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create a pipeline with LightGbm estimator with advanced options.
// Here we only need LightGbm trainer as data is already processed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var pipeline = mlContext.Regression.Trainers.StochasticDualCoordinateAscent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create trainer options.
var options = new SdcaRegressionTrainer.Options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static void AveragedPerceptronBinaryClassification()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public static void FastTreeBinaryClassification()

// Loader the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public static void LightGbmBinaryClassification()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void MultiClassLightGbmStaticPipelineWithInMemoryData()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(staticDataView, testFraction: 0.5);
var (trainingData, testingData) = mlContext.Data.TrainTestSplit(staticDataView, testFraction: 0.5);

// Train the model.
var model = pipe.Fit(trainingData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static void LightGbmRegression()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(new MultiFileSource(dataFile));
var (trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// The predictor that gets produced out of training
LightGbmRegressionModelParameters pred = null;
Expand Down
Loading

0 comments on commit cd333c5

Please sign in to comment.