diff --git a/DESCRIPTION b/DESCRIPTION index a788cd89..c2aa0a51 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,6 +29,7 @@ Imports: rlang (>= 1.1.0), sfd, tibble, + tidyr, utils, vctrs (>= 0.3.8), withr diff --git a/NAMESPACE b/NAMESPACE index 9cf05e13..ae39bdd9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,10 @@ S3method(finalize,list) S3method(finalize,logical) S3method(finalize,param) S3method(finalize,parameters) +S3method(grid_hybrid,default) +S3method(grid_hybrid,list) +S3method(grid_hybrid,param) +S3method(grid_hybrid,parameters) S3method(grid_latin_hypercube,list) S3method(grid_latin_hypercube,param) S3method(grid_latin_hypercube,parameters) @@ -87,6 +91,7 @@ export(get_n_frac) export(get_n_frac_range) export(get_p) export(get_rbf_range) +export(grid_hybrid) export(grid_latin_hypercube) export(grid_max_entropy) export(grid_random) diff --git a/NEWS.md b/NEWS.md index 603b2bc7..67f484a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,10 @@ # dials (development version) +* _Hybrid_ grids that cross space-filling and regular grids was added (`grid_hybrid()`). + * `splitting_rule()` gains support for survival forests via a new `ranger_survival_rules` vector (`"logrank"`, `"extratrees"`, `"C"`, `"maxstat"`), which is now included in `ranger_split_rules`. + # dials 1.4.3 ## New parameters diff --git a/R/hybrid.R b/R/hybrid.R new file mode 100644 index 00000000..0b82f644 --- /dev/null +++ b/R/hybrid.R @@ -0,0 +1,248 @@ +#' Hybrid space-filling/regular parameter grids +#' +#' A grid can be created that is efficient for some parameter(s) but +#' fine-grained for others. This can be especially helpful when a model has +#' a "submodel" parameter that can be tuned with virtually no cost. +#' +#' +#' @inheritParams grid_regular +#' @inheritParams grid_space_filling +#' @param size An integer for the maximum size of the space-filling portion +#' of the design. +#' @param levels The number of values for _each_ regular grid parameter. +#' @param parameters A character string that matches the _names_ of the +#' parameter(s) that are used to make the regular portion of the grid. If no +#' value is given, a space-filling design with `size` candidates is created. If +#' all parameters are selected, a regular grid with `levels^p` candidates is +#' created where `p` is the length of `parameters`. +#' @details +#' +#' This function first creates a space-filling design for the parameters that +#' do not match `parameters` (with `size` total candidates). Then a regular grid +#' is created with `levels^p` candidates where `p` is the length of +#' `parameters`. These two grids are crossed to produce the end result. + +#' @examples +#' if (rlang::is_installed("ggplot2")) { +#' +#' library(dplyr) +#' library(ggplot2) +#' +#' # Most boosting methods can make many predictions across a number of trees +#' # from a single model fit object. Those are nearly free (computationally) so +#' # we would do many tree values for each of the other parameters. +#' +#' # To illustrate, we'll only show four tree values: +#' +#' boost_example <- +#' parameters(list(trees = trees(), learn_rate = learn_rate(), min_n = min_n())) +#' +#' boost_example |> +#' grid_hybrid(parameters = "trees", size = 20, levels = 4) |> +#' ggplot(aes(learn_rate, min_n)) + +#' geom_point() + +#' facet_wrap(~ trees, labeller = "label_both") + +#' scale_x_log10() +#' +#' # In other cases, we have 1+ parameters with very few values. We can make a +#' # small regular grid over these and a much larger space-filling design +#' +#' nnet_example <- +#' parameters( +#' list( +#' learn_rate = learn_rate(), +#' dropout = dropout(), +#' # Only a few values: +#' activation = activation(c("relu", "tanh", "elu")), +#' schedule = rate_schedule(c("none", "cyclic", "step")) # note different name +#' ) +#' ) +#' +#' nnet_example |> +#' grid_hybrid(parameters = c("activation", "schedule"), size = 20, levels = 3) |> +#' ggplot(aes(learn_rate, dropout)) + +#' geom_point() + +#' facet_grid(activation ~ schedule, labeller = "label_both") + +#' scale_x_log10() +#' } +#' +#' @export +grid_hybrid <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + UseMethod("grid_hybrid") +} + +#' @export +#' @rdname grid_hybrid +grid_hybrid.default <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + if (missing(x)) { + cli::cli_abort("At least one parameter object is required.") + } + cli::cli_abort( + "{.arg x} must be a {.cls param} object, list, or {.cls parameters} object, + not {.obj_type_friendly {x}}." + ) +} + +#' @export +#' @rdname grid_hybrid +grid_hybrid.parameters <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + check_dots_empty() + + if (nrow(x) == 0) { + cli::cli_abort("At least one parameter object is required.") + } + for (i in seq_along(x$object)) { + check_param( + x$object[[i]], + allow_na = FALSE, + allow_unknown = FALSE, + arg = x$id[i] + ) + } + + grd <- make_hybrid( + x, + parameters = parameters, + size = size, + levels = levels, + original = original, + type = type + ) + grd +} + +#' @export +#' @rdname grid_hybrid +grid_hybrid.list <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + check_dots_empty() + + if (length(x) == 0) { + cli::cli_abort("At least one parameter object is required.") + } + param_names <- names(x) + for (i in seq_along(x)) { + check_param( + x[[i]], + allow_na = FALSE, + allow_unknown = FALSE, + arg = param_arg_name(param_names[i], x[[i]], i) + ) + } + + params <- parameters(x) + grd <- make_hybrid( + params, + parameters = parameters, + size = size, + levels = levels, + original = original, + type = type + ) + grd +} + + +#' @export +#' @rdname grid_hybrid +grid_hybrid.param <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + param_list <- list(x, ...) + param_names <- names(param_list) + for (i in seq_along(param_list)) { + check_param( + param_list[[i]], + allow_na = FALSE, + allow_unknown = FALSE, + arg = param_arg_name(param_names[i], param_list[[i]], i) + ) + } + + params <- parameters(param_list) + grd <- make_hybrid( + params, + parameters = parameters, + size = size, + levels = levels, + original = original, + type = type + ) + grd +} + + +make_hybrid <- function( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) { + if (is.null(parameters)) { + res <- grid_space_filling(x, size = size, original = original, type = type) + return(res) + } else { + reg_param <- x$id %in% parameters + if (!any(reg_param)) { + cli::cli_abort( + "The {.arg parameters} argument value {.val {parameters}} did not select + any of the parameter identifiers: {.val {x$id}}" + ) + } else if (all(reg_param)) { + res <- grid_regular(x, levels = levels, original = original) + return(res) + } + } + + sfd_param <- x[!reg_param, ] + reg_param <- x[reg_param, ] + + sfd <- grid_space_filling( + sfd_param, + size = size, + original = original, + type = type + ) + reg <- grid_regular(reg_param, levels = levels, original = original) + tidyr::crossing(sfd, reg) +} diff --git a/man/grid_hybrid.Rd b/man/grid_hybrid.Rd new file mode 100644 index 00000000..4e4f7680 --- /dev/null +++ b/man/grid_hybrid.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hybrid.R +\name{grid_hybrid} +\alias{grid_hybrid} +\alias{grid_hybrid.default} +\alias{grid_hybrid.parameters} +\alias{grid_hybrid.list} +\alias{grid_hybrid.param} +\title{Hybrid space-filling/regular parameter grids} +\usage{ +grid_hybrid( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) + +\method{grid_hybrid}{default}( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) + +\method{grid_hybrid}{parameters}( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) + +\method{grid_hybrid}{list}( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) + +\method{grid_hybrid}{param}( + x, + ..., + parameters = NULL, + size = 10, + levels = 20, + original = TRUE, + type = "any" +) +} +\arguments{ +\item{x}{A \code{param} object, list, or \code{parameters}.} + +\item{...}{One or more \code{param} objects (such as \code{\link[=mtry]{mtry()}} or +\code{\link[=penalty]{penalty()}}). None of the objects can have \code{unknown()} values in +the parameter ranges or values.} + +\item{parameters}{A character string that matches the \emph{names} of the +parameter(s) that are used to make the regular portion of the grid. If no +value is given, a space-filling design with \code{size} candidates is created. If +all parameters are selected, a regular grid with \code{levels^p} candidates is +created where \code{p} is the length of \code{parameters}.} + +\item{size}{An integer for the maximum size of the space-filling portion +of the design.} + +\item{levels}{The number of values for \emph{each} regular grid parameter.} + +\item{original}{A logical: should the parameters be in the original units or +in the transformed space (if any)?} + +\item{type}{A character string with possible values: \code{"any"}, +\code{"audze_eglais"}, \code{"max_min_l1"}, \code{"max_min_l2"}, \code{"uniform"}, +\code{"max_entropy"}, or \code{"latin_hypercube"}. A value of \code{"any"} will choose the +first design available (in the order listed above, excluding +\code{"latin_hypercube"}). For a single-point design, a random grid is created.} +} +\description{ +A grid can be created that is efficient for some parameter(s) but +fine-grained for others. This can be especially helpful when a model has +a "submodel" parameter that can be tuned with virtually no cost. +} +\details{ +This function first creates a space-filling design for the parameters that +do not match \code{parameters} (with \code{size} total candidates). Then a regular grid +is created with \code{levels^p} candidates where \code{p} is the length of +\code{parameters}. These two grids are crossed to produce the end result. +} +\examples{ +if (rlang::is_installed("ggplot2")) { + + library(dplyr) + library(ggplot2) + + # Most boosting methods can make many predictions across a number of trees + # from a single model fit object. Those are nearly free (computationally) so + # we would do many tree values for each of the other parameters. + + # To illustrate, we'll only show four tree values: + + boost_example <- + parameters(list(trees = trees(), learn_rate = learn_rate(), min_n = min_n())) + + boost_example |> + grid_hybrid(parameters = "trees", size = 20, levels = 4) |> + ggplot(aes(learn_rate, min_n)) + + geom_point() + + facet_wrap(~ trees, labeller = "label_both") + + scale_x_log10() + + # In other cases, we have 1+ parameters with very few values. We can make a + # small regular grid over these and a much larger space-filling design + + nnet_example <- + parameters( + list( + learn_rate = learn_rate(), + dropout = dropout(), + # Only a few values: + activation = activation(c("relu", "tanh", "elu")), + schedule = rate_schedule(c("none", "cyclic", "step")) # note different name + ) + ) + + nnet_example |> + grid_hybrid(parameters = c("activation", "schedule"), size = 20, levels = 3) |> + ggplot(aes(learn_rate, dropout)) + + geom_point() + + facet_grid(activation ~ schedule, labeller = "label_both") + + scale_x_log10() +} + +} diff --git a/tests/testthat/_snaps/hybrid.md b/tests/testthat/_snaps/hybrid.md new file mode 100644 index 00000000..0411ece3 --- /dev/null +++ b/tests/testthat/_snaps/hybrid.md @@ -0,0 +1,162 @@ +# basic hybrid designs + + Code + grid_hybrid(params_3, parameters = character(0)) + Condition + Error in `make_hybrid()`: + ! The `parameters` argument value did not select any of the parameter identifiers: "n_min", "hidden_units", and "neighbors" + +--- + + Code + grid_hybrid(params_3, parameters = "potato") + Condition + Error in `make_hybrid()`: + ! The `parameters` argument value "potato" did not select any of the parameter identifiers: "n_min", "hidden_units", and "neighbors" + +--- + + Code + grid_hybrid(parameters(), parameters = "potato") + Condition + Error in `parameters()`: + ! No input provided. Please supply at least one parameter object. + +# grid_hybrid() errors with non-param inputs + + Code + grid_hybrid() + Condition + Error in `grid_hybrid()`: + ! At least one parameter object is required. + +--- + + Code + grid_hybrid("not a param") + Condition + Error in `grid_hybrid()`: + ! `x` must be a object, list, or object, not a string. + +--- + + Code + grid_hybrid(penalty(), "min_n") + Condition + Error in `grid_hybrid()`: + ! `Argument 2` must be a object without unknowns, not the string "min_n". + +--- + + Code + grid_hybrid(mtry(), "min_n") + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(list()) + Condition + Error in `grid_hybrid()`: + ! At least one parameter object is required. + +--- + + Code + grid_hybrid(list(penalty(), "min_n")) + Condition + Error in `grid_hybrid()`: + ! `Argument 2` must be a object without unknowns, not the string "min_n". + +--- + + Code + grid_hybrid(list(mtry(), "min_n")) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +# grid_hybrid.parameters() checks for NA + + Code + grid_hybrid(p) + Condition + Error in `grid_hybrid()`: + ! `penalty` must be a object without unknowns, not `NA`. + +# grid_hybrid() errors with params containing unknowns + + Code + grid_hybrid(parameters(mtry())) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(mtry()) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(mtry(), sample_size()) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(list(mtry())) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(list(mtry_custom_name = mtry())) + Condition + Error in `grid_hybrid()`: + x `mtry_custom_name` must be a object without unknowns. + i See the `dials::finalize()` function. + +--- + + Code + grid_hybrid(list(mtry(), sample_size())) + Condition + Error in `grid_hybrid()`: + x `mtry` must be a object without unknowns. + i See the `dials::finalize()` function. + +# grid_hybrid() errors with duplicate parameter ids + + Code + grid_hybrid(penalty(), penalty()) + Condition + Error in `parameters()`: + x `id` must have unique values. + i Duplicates: "penalty" + +--- + + Code + grid_hybrid(list(a = penalty(), a = mtry())) + Condition + Error in `grid_hybrid()`: + x `a` must be a object without unknowns. + i See the `dials::finalize()` function. + diff --git a/tests/testthat/test-hybrid.R b/tests/testthat/test-hybrid.R new file mode 100644 index 00000000..7c1f2ada --- /dev/null +++ b/tests/testthat/test-hybrid.R @@ -0,0 +1,208 @@ +test_that("basic hybrid designs", { + params_3 <- parameters(list(n_min = min_n(), hidden_units(), neighbors())) + grid_1 <- grid_hybrid( + params_3, + parameters = "n_min", + size = 5, + levels = 6 + ) + + expect_equal(vctrs::vec_unique_count(grid_1$n_min), 6L) + expect_equal(vctrs::vec_unique_count(grid_1$hidden_units), 5L) + expect_equal(vctrs::vec_unique_count(grid_1$neighbors), 5L) + expect_equal(nrow(grid_1), 5 * 6) + + grid_2 <- grid_hybrid( + params_3, + parameters = c("n_min", "neighbors"), + size = 5, + levels = 6 + ) + + expect_equal(vctrs::vec_unique_count(grid_2$n_min), 6L) + expect_equal(vctrs::vec_unique_count(grid_2$hidden_units), 5L) + expect_equal(vctrs::vec_unique_count(grid_2$neighbors), 6L) + expect_equal(nrow(grid_2), 5 * 6 * 6) + + grid_3 <- grid_hybrid( + params_3, + parameters = c("n_min", "neighbors", "hidden_units"), + size = 5, + levels = 6 + ) + + expect_equal(vctrs::vec_unique_count(grid_3$n_min), 6L) + expect_equal(vctrs::vec_unique_count(grid_3$hidden_units), 6L) + expect_equal(vctrs::vec_unique_count(grid_3$neighbors), 6L) + expect_equal(nrow(grid_3), 6^3) + + grid_4 <- grid_hybrid( + params_3, + size = 5, + levels = 6 + ) + + expect_equal(vctrs::vec_unique_count(grid_4$n_min), 5L) + expect_equal(vctrs::vec_unique_count(grid_4$hidden_units), 5L) + expect_equal(vctrs::vec_unique_count(grid_4$neighbors), 5L) + expect_equal(nrow(grid_4), 5) + + expect_snapshot( + grid_hybrid(params_3, parameters = character(0)), + error = TRUE + ) + + expect_snapshot( + grid_hybrid(params_3, parameters = "potato"), + error = TRUE + ) + + expect_snapshot( + grid_hybrid(parameters(), parameters = "potato"), + error = TRUE + ) +}) + +test_that("other arguments work", { + params_3 <- parameters(list(n_min = min_n(), penalty(), learn_rate())) + grid_1 <- grid_hybrid( + params_3, + parameters = "n_min", + size = 5, + levels = 6, + type = "audze_eglais" + ) + grid_2 <- grid_hybrid( + params_3, + parameters = "n_min", + size = 5, + levels = 6, + type = "uniform" + ) + grid_3 <- grid_hybrid( + params_3, + parameters = "n_min", + size = 5, + levels = 6, + type = "uniform" + ) + + expect_false(isTRUE(all.equal(grid_1, grid_2))) + expect_true(isTRUE(all.equal(grid_2, grid_3))) + + grid_4 <- grid_hybrid( + params_3, + parameters = "n_min", + size = 5, + levels = 6, + type = "uniform", + original = FALSE + ) + expect_false(isTRUE(all.equal(grid_3, grid_4))) +}) + + +test_that("S3 methods for hybrid designs", { + size <- 12 + lvls <- 4 + prm <- parameters(mixture(), mom = momentum(), activation(c("relu", "tanh"))) + + design_paramset <- grid_hybrid( + prm, + parameters = "mixture", + size = size, + levels = lvls, + type = "uniform" + ) + design_dots <- + grid_hybrid( + mixture(), + mom = momentum(), + activation(c("relu", "tanh")), + parameters = "mixture", + size = size, + levels = lvls, + type = "uniform" + ) + expect_equal(design_paramset, design_dots) + + ### + + design_list <- + grid_hybrid( + list( + mixture(), + mom = momentum(), + activation(c("relu", "tanh")) + ), + parameters = "mixture", + size = size, + levels = lvls, + type = "uniform" + ) + expect_equal(design_paramset, design_list) +}) + +test_that("1-point grid", { + size <- 12 + lvls <- 4 + prm <- parameters(mixture(), mom = momentum(), activation(c("relu", "tanh"))) + + grid_1 <- grid_hybrid(prm, parameters = "mixture", size = 1, levels = 1) + expect_equal(nrow(grid_1), 1L) + + grid_2 <- grid_hybrid(prm, parameters = "mixture", size = 4, levels = 1) + expect_equal(nrow(grid_2), 4L) + + grid_3 <- grid_hybrid(prm, parameters = "mixture", size = 1, levels = 4) + expect_equal(nrow(grid_3), 4L) +}) + +test_that("grid_hybrid() errors with non-param inputs", { + # default method + expect_snapshot(error = TRUE, grid_hybrid()) + expect_snapshot(error = TRUE, grid_hybrid("not a param")) + + # param method + expect_snapshot(error = TRUE, grid_hybrid(penalty(), "min_n")) + expect_snapshot(error = TRUE, grid_hybrid(mtry(), "min_n")) + + # list method + expect_snapshot(error = TRUE, grid_hybrid(list())) + expect_snapshot(error = TRUE, grid_hybrid(list(penalty(), "min_n"))) + expect_snapshot(error = TRUE, grid_hybrid(list(mtry(), "min_n"))) +}) + +test_that("grid_hybrid.parameters() checks for NA", { + p <- parameters(penalty()) + p <- update(p, penalty = NA) + expect_snapshot(error = TRUE, grid_hybrid(p)) +}) + +test_that("grid_hybrid() errors with params containing unknowns", { + # parameters method + expect_snapshot(error = TRUE, grid_hybrid(parameters(mtry()))) + + # param method + expect_snapshot(error = TRUE, grid_hybrid(mtry())) + expect_snapshot(error = TRUE, grid_hybrid(mtry(), sample_size())) + + # list method + expect_snapshot(error = TRUE, grid_hybrid(list(mtry()))) + expect_snapshot( + error = TRUE, + grid_hybrid(list(mtry_custom_name = mtry())) + ) + expect_snapshot(error = TRUE, grid_hybrid(list(mtry(), sample_size()))) +}) + +test_that("grid_hybrid() errors with duplicate parameter ids", { + # param method + expect_snapshot(error = TRUE, grid_hybrid(penalty(), penalty())) + + # list method + expect_snapshot( + error = TRUE, + grid_hybrid(list(a = penalty(), a = mtry())) + ) +})