% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/make_spatial_folds.R
\name{make_spatial_folds}
\alias{make_spatial_folds}
\title{Create multiple spatially independent training and testing folds}
\usage{
make_spatial_folds(
  data = NULL,
  dependent.variable.name = NULL,
  xy.selected = NULL,
  xy = NULL,
  distance.step.x = NULL,
  distance.step.y = NULL,
  training.fraction = 0.75,
  n.cores = parallel::detectCores() - 1,
  cluster = NULL
)
}
\arguments{
\item{data}{Data frame containing response variable and predictors. Required only for binary response variables.}

\item{dependent.variable.name}{Character string with the name of the response variable. Must be a column name in \code{data}. Required only for binary response variables.}

\item{xy.selected}{Data frame with columns "x" (longitude), "y" (latitude), and "id" (record identifier). Defines the focal points for fold creation. Typically a spatially thinned subset of \code{xy} created with \code{\link[=thinning]{thinning()}} or \code{\link[=thinning_til_n]{thinning_til_n()}}.}

\item{xy}{Data frame with columns "x" (longitude), "y" (latitude), and "id" (record identifier). Contains all spatial coordinates for the dataset.}

\item{distance.step.x}{Numeric value specifying the buffer growth increment along the x-axis. Default: \code{NULL} (automatically set to 1/1000th of the x-coordinate range).}

\item{distance.step.y}{Numeric value specifying the buffer growth increment along the y-axis. Default: \code{NULL} (automatically set to 1/1000th of the y-coordinate range).}

\item{training.fraction}{Numeric value between 0.1 and 0.9 specifying the fraction of records to include in the training fold. Default: \code{0.75}.}

\item{n.cores}{Integer specifying the number of CPU cores for parallel execution. Default: \code{parallel::detectCores() - 1}.}

\item{cluster}{Optional cluster object created with \code{\link[parallel:makeCluster]{parallel::makeCluster()}}. If provided, overrides \code{n.cores}. User is responsible for stopping the cluster with \code{\link[parallel:makeCluster]{parallel::stopCluster()}}. Default: \code{NULL}.}
}
\value{
List where each element corresponds to a row in \code{xy.selected} and contains:
\itemize{
\item \code{training}: Integer vector of record IDs (from \code{xy$id}) in the training fold.
\item \code{testing}: Integer vector of record IDs (from \code{xy$id}) in the testing fold.
}
}
\description{
Applies \code{\link[=make_spatial_fold]{make_spatial_fold()}} to every row in \code{xy.selected}, generating one spatially independent fold centered on each focal point. Used for spatial cross-validation in \code{\link[=rf_evaluate]{rf_evaluate()}}.
}
\details{
This function creates multiple spatially independent folds for spatial cross-validation by calling \code{\link[=make_spatial_fold]{make_spatial_fold()}} once for each row in \code{xy.selected}. Each fold is created by growing a rectangular buffer from the corresponding focal point until the desired \code{training.fraction} is achieved.

\strong{Parallel execution:}

The function uses parallel processing to speed up fold creation. You can control parallelization with \code{n.cores} or provide a pre-configured cluster object.

\strong{Typical workflow:}
\enumerate{
\item Thin spatial points with \code{\link[=thinning]{thinning()}} or \code{\link[=thinning_til_n]{thinning_til_n()}} to create \code{xy.selected}
\item Create spatial folds with this function
\item Use the folds for spatial cross-validation in \code{\link[=rf_evaluate]{rf_evaluate()}}
}
}
\examples{
data(plants_df, plants_xy)

# Thin to 10 focal points to speed up example
xy.thin <- thinning_til_n(
  xy = plants_xy,
  n = 10
)

# Create spatial folds centered on the 10 thinned points
folds <- make_spatial_folds(
  xy.selected = xy.thin,
  xy = plants_xy,
  distance.step.x = 0.05,
  training.fraction = 0.6,
  n.cores = 1
)

# Each element is a fold with training and testing indices
length(folds)  # 10 folds
names(folds[[1]])  # "training" and "testing"

# Visualize first fold (training = red, testing = blue, center = black)
if (interactive()) {
  plot(plants_xy[c("x", "y")], type = "n", xlab = "", ylab = "")
  points(plants_xy[folds[[1]]$training, c("x", "y")], col = "red4", pch = 15)
  points(plants_xy[folds[[1]]$testing, c("x", "y")], col = "blue4", pch = 15)
  points(
    plants_xy[folds[[1]]$training[1], c("x", "y")],
    col = "black",
    pch = 15,
    cex = 2
  )
}

}
\seealso{
\code{\link[=make_spatial_fold]{make_spatial_fold()}}, \code{\link[=rf_evaluate]{rf_evaluate()}}, \code{\link[=thinning]{thinning()}}, \code{\link[=thinning_til_n]{thinning_til_n()}}

Other preprocessing: 
\code{\link{auto_cor}()},
\code{\link{auto_vif}()},
\code{\link{case_weights}()},
\code{\link{default_distance_thresholds}()},
\code{\link{double_center_distance_matrix}()},
\code{\link{is_binary}()},
\code{\link{make_spatial_fold}()},
\code{\link{the_feature_engineer}()},
\code{\link{weights_from_distance_matrix}()}
}
\concept{preprocessing}
