diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index be86b60b5b03..1774d4e3874e 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -833,6 +833,27 @@ lgb.dump <- function(booster, num_iteration = NULL){ #' #' @return vector of evaluation result #' +#' @examples +#' \dontrun{ +#' library(lightgbm) +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' params <- list(objective = "regression", metric = "l2") +#' valids <- list(test = dtest) +#' model <- lgb.train(params, +#' dtrain, +#' 100, +#' valids, +#' min_data = 1, +#' learning_rate = 1, +#' early_stopping_rounds = 10) +#' lgb.get.eval.result(model, "test", "l2") +#' } +#' #' @rdname lgb.get.eval.result #' @export lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) { diff --git a/R-package/R/lgb.prepare.R b/R-package/R/lgb.prepare.R index 450ef2b72130..ef2372540990 100644 --- a/R-package/R/lgb.prepare.R +++ b/R-package/R/lgb.prepare.R @@ -8,35 +8,35 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -#' -#' # When lightgbm package is installed, and you do not want to load it -#' # You can still use the function! -#' lgb.unloader() -#' str(lightgbm::lgb.prepare(data = iris)) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +#' +#' # When lightgbm package is installed, and you do not want to load it +#' # You can still use the function! +#' lgb.unloader() +#' str(lightgbm::lgb.prepare(data = iris)) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... #' } #' #' @export diff --git a/R-package/R/lgb.prepare2.R b/R-package/R/lgb.prepare2.R index f6d4d50b7757..571bbf19b7bd 100644 --- a/R-package/R/lgb.prepare2.R +++ b/R-package/R/lgb.prepare2.R @@ -8,36 +8,36 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' -#' # When lightgbm package is installed, and you do not want to load it -#' # You can still use the function! -#' lgb.unloader() -#' str(lightgbm::lgb.prepare2(data = iris)) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... +#' +#' # When lightgbm package is installed, and you do not want to load it +#' # You can still use the function! +#' lgb.unloader() +#' str(lightgbm::lgb.prepare2(data = iris)) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... +#' #' } #' #' @export diff --git a/R-package/R/lgb.prepare_rules.R b/R-package/R/lgb.prepare_rules.R index 0011ba2b3b74..8e436eee3446 100644 --- a/R-package/R/lgb.prepare_rules.R +++ b/R-package/R/lgb.prepare_rules.R @@ -9,63 +9,63 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' new_iris <- lgb.prepare_rules(data = iris) # Autoconverter -#' str(new_iris$data) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -#' -#' data(iris) # Erase iris dataset -#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) -#' # Warning message: -#' In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : -#' invalid factor level, NA generated -#' -#' # Use conversion using known rules -#' # Unknown factors become 0, excellent for sparse datasets -#' newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) -#' -#' # Unknown factor is now zero, perfect for sparse datasets -#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor -#' # Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#' # 1 5.1 3.5 1.4 0.2 0 -#' -#' newer_iris$data[1, 5] <- 1 # Put back real initial value -#' -#' # Is the newly created dataset equal? YES! -#' all.equal(new_iris$data, newer_iris$data) -#' # [1] TRUE -#' -#' # Can we test our own rules? -#' data(iris) # Erase iris dataset -#' -#' # We remapped values differently -#' personal_rules <- list(Species = c("setosa" = 3, -#' "versicolor" = 2, -#' "virginica" = 1)) -#' newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) -#' str(newest_iris$data) # SUCCESS! -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 3 3 3 3 3 3 3 3 3 3 ... -#' +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' new_iris <- lgb.prepare_rules(data = iris) # Autoconverter +#' str(new_iris$data) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +#' +#' data(iris) # Erase iris dataset +#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +#' # Warning message: +#' # In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +#' # invalid factor level, NA generated +#' +#' # Use conversion using known rules +#' # Unknown factors become 0, excellent for sparse datasets +#' newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) +#' +#' # Unknown factor is now zero, perfect for sparse datasets +#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor +#' # Sepal.Length Sepal.Width Petal.Length Petal.Width Species +#' # 1 5.1 3.5 1.4 0.2 0 +#' +#' newer_iris$data[1, 5] <- 1 # Put back real initial value +#' +#' # Is the newly created dataset equal? YES! +#' all.equal(new_iris$data, newer_iris$data) +#' # [1] TRUE +#' +#' # Can we test our own rules? +#' data(iris) # Erase iris dataset +#' +#' # We remapped values differently +#' personal_rules <- list(Species = c("setosa" = 3, +#' "versicolor" = 2, +#' "virginica" = 1)) +#' newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) +#' str(newest_iris$data) # SUCCESS! +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 3 3 3 3 3 3 3 3 3 3 ... +#' #' } #' #' @export diff --git a/R-package/R/lgb.prepare_rules2.R b/R-package/R/lgb.prepare_rules2.R index bd4d08a1d141..6be9a5800599 100644 --- a/R-package/R/lgb.prepare_rules2.R +++ b/R-package/R/lgb.prepare_rules2.R @@ -9,63 +9,63 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter -#' str(new_iris$data) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' -#' data(iris) # Erase iris dataset -#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) -#' # Warning message: -#' In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : -#' invalid factor level, NA generated -#' -#' # Use conversion using known rules -#' # Unknown factors become 0, excellent for sparse datasets -#' newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) -#' -#' # Unknown factor is now zero, perfect for sparse datasets -#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor -#' # Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#' # 1 5.1 3.5 1.4 0.2 0 -#' -#' newer_iris$data[1, 5] <- 1 # Put back real initial value -#' -#' # Is the newly created dataset equal? YES! -#' all.equal(new_iris$data, newer_iris$data) -#' # [1] TRUE -#' -#' # Can we test our own rules? -#' data(iris) # Erase iris dataset -#' -#' # We remapped values differently -#' personal_rules <- list(Species = c("setosa" = 3L, -#' "versicolor" = 2L, -#' "virginica" = 1L)) -#' newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) -#' str(newest_iris$data) # SUCCESS! -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 3 3 3 3 3 3 3 3 3 3 ... -#' +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter +#' str(new_iris$data) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... +#' +#' data(iris) # Erase iris dataset +#' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +#' # Warning message: +#' # In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +#' # invalid factor level, NA generated +#' +#' # Use conversion using known rules +#' # Unknown factors become 0, excellent for sparse datasets +#' newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) +#' +#' # Unknown factor is now zero, perfect for sparse datasets +#' newer_iris$data[1, ] # Species became 0 as it is an unknown factor +#' # Sepal.Length Sepal.Width Petal.Length Petal.Width Species +#' # 1 5.1 3.5 1.4 0.2 0 +#' +#' newer_iris$data[1, 5] <- 1 # Put back real initial value +#' +#' # Is the newly created dataset equal? YES! +#' all.equal(new_iris$data, newer_iris$data) +#' # [1] TRUE +#' +#' # Can we test our own rules? +#' data(iris) # Erase iris dataset +#' +#' # We remapped values differently +#' personal_rules <- list(Species = c("setosa" = 3L, +#' "versicolor" = 2L, +#' "virginica" = 1L)) +#' newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) +#' str(newest_iris$data) # SUCCESS! +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 3 3 3 3 3 3 3 3 3 3 ... +#' #' } #' #' @export diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index 1e376fb2d419..e001800dea97 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -9,24 +9,24 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' data(agaricus.test, package = "lightgbm") -#' test <- agaricus.test -#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -#' params <- list(objective = "regression", metric = "l2") -#' valids <- list(test = dtest) -#' model <- lgb.train(params, -#' dtrain, -#' 100, -#' valids, -#' min_data = 1, -#' learning_rate = 1, -#' early_stopping_rounds = 10) -#' saveRDS.lgb.Booster(model, "model.rds") -#' new_model <- readRDS.lgb.Booster("model.rds") +#' library(lightgbm) +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' params <- list(objective = "regression", metric = "l2") +#' valids <- list(test = dtest) +#' model <- lgb.train(params, +#' dtrain, +#' 100, +#' valids, +#' min_data = 1, +#' learning_rate = 1, +#' early_stopping_rounds = 10) +#' saveRDS.lgb.Booster(model, "model.rds") +#' new_model <- readRDS.lgb.Booster("model.rds") #' } #' #' @export diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index e92fc51fc001..97083763bff8 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -14,23 +14,23 @@ #' #' @examples #' \dontrun{ -#' library(lightgbm) -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' data(agaricus.test, package = "lightgbm") -#' test <- agaricus.test -#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -#' params <- list(objective = "regression", metric = "l2") -#' valids <- list(test = dtest) -#' model <- lgb.train(params, -#' dtrain, -#' 100, -#' valids, -#' min_data = 1, -#' learning_rate = 1, -#' early_stopping_rounds = 10) -#' saveRDS.lgb.Booster(model, "model.rds") +#' library(lightgbm) +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' params <- list(objective = "regression", metric = "l2") +#' valids <- list(test = dtest) +#' model <- lgb.train(params, +#' dtrain, +#' 100, +#' valids, +#' min_data = 1, +#' learning_rate = 1, +#' early_stopping_rounds = 10) +#' saveRDS.lgb.Booster(model, "model.rds") #' } #' #' @export diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R index 5c4682f26f5c..bd2b30e892c4 100644 --- a/R-package/demo/boost_from_prediction.R +++ b/R-package/demo/boost_from_prediction.R @@ -16,7 +16,6 @@ print("Start running example to start from a initial prediction") param <- list(num_leaves = 4, learning_rate = 1, nthread = 2, - silent = 1, objective = "binary") bst <- lgb.train(param, dtrain, 1, valids = valids) diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd index 2124a2678a39..ba04ebfef888 100644 --- a/R-package/man/lgb.get.eval.result.Rd +++ b/R-package/man/lgb.get.eval.result.Rd @@ -24,3 +24,25 @@ vector of evaluation result \description{ Get record evaluation result from booster } +\examples{ +\dontrun{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +lgb.get.eval.result(model, "test", "l2") +} + +} diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd index 254f5dfd2c95..8b309c4e5a8d 100644 --- a/R-package/man/lgb.prepare.Rd +++ b/R-package/man/lgb.prepare.Rd @@ -17,35 +17,35 @@ Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors } \examples{ \dontrun{ - library(lightgbm) - data(iris) - - str(iris) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - - str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... - - # When lightgbm package is installed, and you do not want to load it - # You can still use the function! - lgb.unloader() - str(lightgbm::lgb.prepare(data = iris)) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... } } diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd index 18d18576c954..c042d19c1d75 100644 --- a/R-package/man/lgb.prepare2.Rd +++ b/R-package/man/lgb.prepare2.Rd @@ -17,36 +17,36 @@ Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors } \examples{ \dontrun{ - library(lightgbm) - data(iris) - - str(iris) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - - str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - - # When lightgbm package is installed, and you do not want to load it - # You can still use the function! - lgb.unloader() - str(lightgbm::lgb.prepare2(data = iris)) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare2(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + } } diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd index 66332f171d5b..b3705da58075 100644 --- a/R-package/man/lgb.prepare_rules.Rd +++ b/R-package/man/lgb.prepare_rules.Rd @@ -19,63 +19,63 @@ Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors } \examples{ \dontrun{ - library(lightgbm) - data(iris) - - str(iris) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - - new_iris <- lgb.prepare_rules(data = iris) # Autoconverter - str(new_iris$data) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... - - data(iris) # Erase iris dataset - iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) - # Warning message: - In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : - invalid factor level, NA generated - - # Use conversion using known rules - # Unknown factors become 0, excellent for sparse datasets - newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) - - # Unknown factor is now zero, perfect for sparse datasets - newer_iris$data[1, ] # Species became 0 as it is an unknown factor - # Sepal.Length Sepal.Width Petal.Length Petal.Width Species - # 1 5.1 3.5 1.4 0.2 0 - - newer_iris$data[1, 5] <- 1 # Put back real initial value - - # Is the newly created dataset equal? YES! - all.equal(new_iris$data, newer_iris$data) - # [1] TRUE - - # Can we test our own rules? - data(iris) # Erase iris dataset - - # We remapped values differently - personal_rules <- list(Species = c("setosa" = 3, - "versicolor" = 2, - "virginica" = 1)) - newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) - str(newest_iris$data) # SUCCESS! - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : num 3 3 3 3 3 3 3 3 3 3 ... - +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3, + "versicolor" = 2, + "virginica" = 1)) +newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 3 3 3 3 3 3 3 3 3 3 ... + } } diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd index ce8a2b564495..0b0e799cc7d8 100644 --- a/R-package/man/lgb.prepare_rules2.Rd +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -19,63 +19,63 @@ Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors } \examples{ \dontrun{ - library(lightgbm) - data(iris) - - str(iris) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - - new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter - str(new_iris$data) - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - - data(iris) # Erase iris dataset - iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) - # Warning message: - In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : - invalid factor level, NA generated - - # Use conversion using known rules - # Unknown factors become 0, excellent for sparse datasets - newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) - - # Unknown factor is now zero, perfect for sparse datasets - newer_iris$data[1, ] # Species became 0 as it is an unknown factor - # Sepal.Length Sepal.Width Petal.Length Petal.Width Species - # 1 5.1 3.5 1.4 0.2 0 - - newer_iris$data[1, 5] <- 1 # Put back real initial value - - # Is the newly created dataset equal? YES! - all.equal(new_iris$data, newer_iris$data) - # [1] TRUE - - # Can we test our own rules? - data(iris) # Erase iris dataset - - # We remapped values differently - personal_rules <- list(Species = c("setosa" = 3L, - "versicolor" = 2L, - "virginica" = 1L)) - newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) - str(newest_iris$data) # SUCCESS! - # 'data.frame': 150 obs. of 5 variables: - # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... - # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... - # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... - # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... - # $ Species : int 3 3 3 3 3 3 3 3 3 3 ... - +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3L, + "versicolor" = 2L, + "virginica" = 1L)) +newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 3 3 3 3 3 3 3 3 3 3 ... + } } diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 6db7baa1786a..8158ad2a1d83 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -19,24 +19,24 @@ Attemps to load a model using RDS. } \examples{ \dontrun{ - library(lightgbm) - data(agaricus.train, package = "lightgbm") - train <- agaricus.train - dtrain <- lgb.Dataset(train$data, label = train$label) - data(agaricus.test, package = "lightgbm") - test <- agaricus.test - dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) - params <- list(objective = "regression", metric = "l2") - valids <- list(test = dtest) - model <- lgb.train(params, - dtrain, - 100, - valids, - min_data = 1, - learning_rate = 1, - early_stopping_rounds = 10) - saveRDS.lgb.Booster(model, "model.rds") - new_model <- readRDS.lgb.Booster("model.rds") +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +saveRDS.lgb.Booster(model, "model.rds") +new_model <- readRDS.lgb.Booster("model.rds") } } diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index 7b846b1f04de..eac224ebe77a 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -30,23 +30,23 @@ Attemps to save a model using RDS. Has an additional parameter (\code{raw}) whic } \examples{ \dontrun{ - library(lightgbm) - data(agaricus.train, package = "lightgbm") - train <- agaricus.train - dtrain <- lgb.Dataset(train$data, label = train$label) - data(agaricus.test, package = "lightgbm") - test <- agaricus.test - dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) - params <- list(objective = "regression", metric = "l2") - valids <- list(test = dtest) - model <- lgb.train(params, - dtrain, - 100, - valids, - min_data = 1, - learning_rate = 1, - early_stopping_rounds = 10) - saveRDS.lgb.Booster(model, "model.rds") +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +saveRDS.lgb.Booster(model, "model.rds") } }