% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PipeOpDecode.R
\name{mlr_pipeops_decode}
\alias{mlr_pipeops_decode}
\alias{PipeOpDecode}
\title{Reverse Factor Encoding}
\format{
\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
}
\description{
Reverses one-hot or treatment encoding of columns. It collapses multiple \code{numeric} or \code{integer} columns into one \code{factor}
column based on a pre-specified grouping pattern of column names.

May be applied to multiple groups of columns, grouped by matching a common naming pattern. The grouping pattern is
extracted to form the name of the newly derived \code{factor} column, and levels are constructed from the previous column
names, with parts matching the grouping pattern removed (see examples). The level per row of the new factor column is generally
determined as the name of the column with the maximum value in the group.
}
\section{Construction}{


\if{html}{\out{<div class="sourceCode">}}\preformatted{PipeOpEncode$new(id = "decode", param_vals = list())
}\if{html}{\out{</div>}}
\itemize{
\item \code{id} :: \code{character(1)}\cr
Identifier of resulting object, default \code{"decode"}.
\item \code{param_vals} :: named \code{list}\cr
List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
}
}

\section{Input and Output Channels}{

Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}.

The output is the input \code{\link[mlr3:Task]{Task}} with encoding columns collapsed into new decoded columns.
}

\section{State}{

The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
\itemize{
\item \code{colmaps} :: named \code{list}\cr
Named list of named character vectors. Each element is named according to the new column name extracted by
\code{group_pattern}. Each vector contains the level names for the new factor column that should be created, named by
the corresponding old column name. If \code{treatment_encoding} is \code{TRUE}, then each vector also contains \code{ref_name} as the
reference class with an empty string as name.
\item \code{treatment_encoding} :: \code{logical(1)}\cr
Value of \code{treatment_encoding} hyperparameter.
\item \code{cutoff} :: \code{numeric(1)}\cr
Value of \code{treatment_encoding} hyperparameter, or \code{0} if that is not given.
\item \code{ties_method} :: \code{character(1)}\cr
Value of \code{ties_method} hyperparameter.
}
}

\section{Parameters}{

The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
\itemize{
\item \code{group_pattern} :: \code{character(1)}\cr
A regular expression to be applied to column names. Should contain a capturing group for the new
column name, and match everything that should not be interpreted as the new factor levels (which are constructed as
the difference between column names and what \code{group_pattern} matches).
If set to \code{""}, all columns matching the \code{group_pattern} are collapsed into one factor column called
\code{pipeop.decoded}. Use \code{\link{PipeOpRenameColumns}} to rename this column.
Initialized to \code{"^([^.]+)\\\\."}, which would extract everything up to the first dot as the new column name and
construct new levels as everything after the first dot.
\item \code{treatment_encoding} :: \code{logical(1)}\cr
If \code{TRUE}, treatment encoding is assumed instead of one-hot encoding. Initialized to \code{FALSE}.
\item \code{treatment_cutoff} :: \code{numeric(1)}\cr
If \code{treatment_encoding} is \code{TRUE}, specifies a cutoff value for identifying the reference level. The reference level
is set to \code{ref_name} in rows where the value is less than or equal to a specified cutoff value (e.g., \code{0}) in all
columns in that group. Default is \code{0}.
\item \code{ref_name} :: \code{character(1)}\cr
If \code{treatment_encoding} is \code{TRUE}, specifies the name for reference levels. Default is \code{"ref"}.
\item \code{ties_method} :: \code{character(1)}\cr
Method for resolving ties if multiple columns have the same value. Specifies the value from which of the columns
with the same value is to be picked. Options are \code{"first"}, \code{"last"}, or \code{"random"}. Initialized to \code{"random"}.
}
}

\section{Fields}{

Only fields inherited from \code{\link{PipeOp}}.
}

\section{Methods}{

Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
}

\examples{
library("mlr3")

# Reverse one-hot encoding
df = data.frame(
  target = runif(4),
  x.1 = rep(c(1, 0), 2),
  x.2 = rep(c(0, 1), 2),
  y.1 = rep(c(1, 0), 2),
  y.2 = rep(c(0, 1), 2),
  a = runif(4)
)
task_one_hot = TaskRegr$new(id = "example", backend = df, target = "target")

pop = po("decode")

train_out = pop$train(list(task_one_hot))[[1]]
# x.1 and x.2 are collapsed into x, same for y; a is ignored.
train_out$data()

# Reverse treatment encoding from PipeOpEncode
df = data.frame(
  target = runif(6),
  fct = factor(rep(c("a", "b", "c"), 2))
)
task = TaskRegr$new(id = "example", backend = df, target = "target")

po_enc = po("encode", method = "treatment")
task_encoded = po_enc$train(list(task))[[1]]
task_encoded$data()

po_dec = po("decode", treatment_encoding = TRUE)
task_decoded = pop$train(list(task))[[1]]
# x.1 and x.2 are collapsed into x. All rows where all values
# are smaller or equal to 0, the level is set to the reference level.
task_decoded$data()

# Different group_pattern
df = data.frame(
  target = runif(4),
  x_1 = rep(c(1, 0), 2),
  x_2 = rep(c(0, 1), 2),
  y_1 = rep(c(2, 0), 2),
  y_2 = rep(c(0, 1), 2)
)
task = TaskRegr$new(id = "example", backend = df, target = "target")

# Grouped by first underscore
pop = po("decode", group_pattern = "^([^_]+)\\\\_")
train_out = pop$train(list(task))[[1]]
# x_1 and x_2 are collapsed into x, same for y
train_out$data()

# Empty string to collapse all matches into one factor column.
pop$param_set$set_values(group_pattern = "")
train_out = pop$train(list(task))[[1]]
# All columns are combined into a single column.
# The level for each row is determined by the column with the largest value in that row.
# By default, ties are resolved randomly.
train_out$data()

}
\seealso{
https://mlr-org.com/pipeops.html

Other PipeOps: 
\code{\link{PipeOp}},
\code{\link{PipeOpEncodePL}},
\code{\link{PipeOpEnsemble}},
\code{\link{PipeOpImpute}},
\code{\link{PipeOpTargetTrafo}},
\code{\link{PipeOpTaskPreproc}},
\code{\link{PipeOpTaskPreprocSimple}},
\code{\link{mlr_pipeops}},
\code{\link{mlr_pipeops_adas}},
\code{\link{mlr_pipeops_blsmote}},
\code{\link{mlr_pipeops_boxcox}},
\code{\link{mlr_pipeops_branch}},
\code{\link{mlr_pipeops_chunk}},
\code{\link{mlr_pipeops_classbalancing}},
\code{\link{mlr_pipeops_classifavg}},
\code{\link{mlr_pipeops_classweights}},
\code{\link{mlr_pipeops_colapply}},
\code{\link{mlr_pipeops_collapsefactors}},
\code{\link{mlr_pipeops_colroles}},
\code{\link{mlr_pipeops_copy}},
\code{\link{mlr_pipeops_datefeatures}},
\code{\link{mlr_pipeops_encode}},
\code{\link{mlr_pipeops_encodeimpact}},
\code{\link{mlr_pipeops_encodelmer}},
\code{\link{mlr_pipeops_encodeplquantiles}},
\code{\link{mlr_pipeops_encodepltree}},
\code{\link{mlr_pipeops_featureunion}},
\code{\link{mlr_pipeops_filter}},
\code{\link{mlr_pipeops_fixfactors}},
\code{\link{mlr_pipeops_histbin}},
\code{\link{mlr_pipeops_ica}},
\code{\link{mlr_pipeops_imputeconstant}},
\code{\link{mlr_pipeops_imputehist}},
\code{\link{mlr_pipeops_imputelearner}},
\code{\link{mlr_pipeops_imputemean}},
\code{\link{mlr_pipeops_imputemedian}},
\code{\link{mlr_pipeops_imputemode}},
\code{\link{mlr_pipeops_imputeoor}},
\code{\link{mlr_pipeops_imputesample}},
\code{\link{mlr_pipeops_info}},
\code{\link{mlr_pipeops_isomap}},
\code{\link{mlr_pipeops_kernelpca}},
\code{\link{mlr_pipeops_learner}},
\code{\link{mlr_pipeops_learner_pi_cvplus}},
\code{\link{mlr_pipeops_learner_quantiles}},
\code{\link{mlr_pipeops_missind}},
\code{\link{mlr_pipeops_modelmatrix}},
\code{\link{mlr_pipeops_multiplicityexply}},
\code{\link{mlr_pipeops_multiplicityimply}},
\code{\link{mlr_pipeops_mutate}},
\code{\link{mlr_pipeops_nearmiss}},
\code{\link{mlr_pipeops_nmf}},
\code{\link{mlr_pipeops_nop}},
\code{\link{mlr_pipeops_ovrsplit}},
\code{\link{mlr_pipeops_ovrunite}},
\code{\link{mlr_pipeops_pca}},
\code{\link{mlr_pipeops_proxy}},
\code{\link{mlr_pipeops_quantilebin}},
\code{\link{mlr_pipeops_randomprojection}},
\code{\link{mlr_pipeops_randomresponse}},
\code{\link{mlr_pipeops_regravg}},
\code{\link{mlr_pipeops_removeconstants}},
\code{\link{mlr_pipeops_renamecolumns}},
\code{\link{mlr_pipeops_replicate}},
\code{\link{mlr_pipeops_rowapply}},
\code{\link{mlr_pipeops_scale}},
\code{\link{mlr_pipeops_scalemaxabs}},
\code{\link{mlr_pipeops_scalerange}},
\code{\link{mlr_pipeops_select}},
\code{\link{mlr_pipeops_smote}},
\code{\link{mlr_pipeops_smotenc}},
\code{\link{mlr_pipeops_spatialsign}},
\code{\link{mlr_pipeops_subsample}},
\code{\link{mlr_pipeops_targetinvert}},
\code{\link{mlr_pipeops_targetmutate}},
\code{\link{mlr_pipeops_targettrafoscalerange}},
\code{\link{mlr_pipeops_textvectorizer}},
\code{\link{mlr_pipeops_threshold}},
\code{\link{mlr_pipeops_tomek}},
\code{\link{mlr_pipeops_tunethreshold}},
\code{\link{mlr_pipeops_unbranch}},
\code{\link{mlr_pipeops_updatetarget}},
\code{\link{mlr_pipeops_vtreat}},
\code{\link{mlr_pipeops_yeojohnson}}
}
\concept{PipeOps}
