% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kija_covariate_balance.R
\name{CovariateBalance}
\alias{CovariateBalance}
\title{Plots for checking covariate balance in causal forest}
\usage{
CovariateBalance(
  cf,
  plots = c("all", "Love", "density", "ecdf"),
  balance_table = TRUE,
  covariates = NULL,
  names = NULL,
  factor = NULL,
  treatment_name = "W",
  love_breaks = NULL,
  love_xlim = NULL,
  love_scale_color = NULL,
  cd_nrow = NULL,
  cd_ncol = NULL,
  cd_x_scale_width = NULL,
  cd_bar_width = NULL,
  cd_scale_fill = NULL,
  ec_nrow = NULL,
  ec_ncol = NULL,
  ec_x_scale_width = NULL,
  ec_scale_color = NULL
)
}
\arguments{
\item{cf}{An object of class causal_forest (and inheriting from class grf).}

\item{plots}{Character, \code{"all"} returns both Love plots and density plots,
\code{"Love"} returns only Love plots, \code{"density"} returns only density plots.}

\item{balance_table}{Boolean, TRUE to return a table with balance statistics.}

\item{covariates}{A vector to select covariates to show in balance plots. If
\code{cf$X.orig} is an unnamed matrix, use a numeric vector to select variables.
Otherwise use a character vector. Names provided in the \code{names} argument
takes priority over existing names in \code{cf$X.orig}. If discrete covariates
have been one-hot encoded using \link[EpiForsk]{DiscreteCovariatesToOneHot}
the name of these discrete covariates can be provided in \code{covariates} to
select it and to collect all levels into a bar plot to show the
distribution.}

\item{names}{A named character vector. The vector itself should contain
covariate names from the causal_forest object, while the names attribute
should contain the names to use when plotting. If discrete covariates have
been one-hot encoded using \link[EpiForsk]{DiscreteCovariatesToOneHot},
providing just the name of a discrete covariate will modify the name of all
levels for plotting. If the vector is unnamed, the provided vector will act
as the new covariate names, given in the order of \code{cf$X_orig}. If \code{NULL}
(the default), the original names are used.}

\item{factor}{A named list with covariates to be converted to factor. Note
that one-hot encoded covariates are automatically converted, so need not be
specified in the factor argument. Each component of the list must contain
the factor levels, using a named vector to supply custom labels.}

\item{treatment_name}{Character, name of treatment.}

\item{love_breaks}{Numeric, breaks used in the plot of absolute standardized
mean differences.}

\item{love_xlim}{Numeric, \code{x}-limits used in the plot of absolute
standardized mean differences.}

\item{love_scale_color}{Function, \code{scale_color_.} function to use in the plot
of absolute standardized mean differences.}

\item{cd_nrow, cd_ncol}{Numeric, the dimensions of the grid to create in
covariate distribution plots. If both are \code{NULL} it will use the same logic
as \link[ggplot2]{facet_wrap} to set the dimensions.}

\item{cd_x_scale_width}{Numeric, the distance between major \code{x}-axis tics in
the covariate distribution plots. If \code{NULL}, a width is chosen to display
approximately six major tics. If length 1, the same width is used for all
covariate plots. If the same length as the number of covariates included,
each number is used as the width for different covariates, in the order of
the covariates after selection with the tidy-select expression in
\code{covariates}.}

\item{cd_bar_width}{Numeric, the width of the bars in the covariate
distribution plots (barplots for categorical variables, histograms for
continuous variables). If \code{NULL}, a width is chosen to display
approximately 50 bars in histograms, while 0.9 times the resolution of the
data is used in bar plots. If length 1, the same width is used for all
covariate plots. This is not recommended if there are both categorical and
continuous covariates. If the same length as the number of covariates
included, each number is used as the bar width for different covariates, in
the order of the covariates after selection with the tidy-select expression
in \code{covariates}.}

\item{cd_scale_fill}{Function, \code{scale_fill_.} function to use in covariate
distribution plots.}

\item{ec_nrow, ec_ncol}{Numeric, the dimensions of the grid to create in
empirical CDF plots. If both are \code{NULL} it will use the same logic
as \link[ggplot2]{facet_wrap} to set the dimensions.}

\item{ec_x_scale_width}{Numeric, the distance between major \code{x}-axis tics in
the empirical CDF plots. If \code{NULL}, a width is chosen to display
approximately six major tics. If length 1, the same width is used for all
plots. If the same length as the number of covariates included, each number
is used as the width for different covariates, in the order of the
covariates after selection with the tidy-select expression in \code{covariates}.}

\item{ec_scale_color}{Function, \code{scale_color_.} function to use in empirical
CDF plots.}
}
\value{
A list with up to five elements:
\itemize{
\item love_data: data used to plot the absolute standardized mean differences.
\item love: plot object for absolute standardized mean differences.
\item cd_data: data used to plot covariate distributions.
\item cd_unadjusted: plot of unadjusted covariate distributions in the exposure
groups.
\item cd_adjusted: plot of adjusted covariate distributions in the exposure
groups.
}
}
\description{
Generate plots showing balance in the covariates before and after propensity
score weighting with a causal forest object.
}
\details{
If an unnamed character vector is provided in \code{names}, it must have length
\code{ncol(cf$X.orig)}. Names of covarates not selected by \code{covariates} can be set
to \code{NA}. If a named character vector is provided in \code{names}, all renamed
covariates will be kept regardless if they are selected in \code{covariates}.
Thus to select only renamed covariates, \code{character(0)} can be used in
\code{covariates}. The plot theme can be adjusted using ggplot2 active theme
modifiers, see \link[ggplot2]{theme_get}.
}
\examples{
\donttest{
n <- 1000
p <- 5
X <- matrix(rnorm(n * p), n, p) |>
as.data.frame() |>
dplyr::bind_cols(
  DiscreteCovariatesToOneHot(
    dplyr::tibble(
      D1 = factor(
        sample(1:3, n, replace = TRUE, prob = c(0.2, 0.3, 0.5)),
        labels = c("first", "second", "third")
      ),
      D2 = factor(
        sample(1:2, n, replace = TRUE, prob = c(0.2, 0.8)),
        labels = c("a", "b")
      )
    )
  )
) |>
dplyr::select(
  V1,
  V2,
  dplyr::starts_with("D1"),
  V3,
  V4,
  dplyr::starts_with("D2"),
  V5
)
expo_prob <- 1 / (1 + exp(0.4 * X[, 1] + 0.2 * X[, 2] - 0.6 * X[, 3] +
                          0.4 * X[, 6] + 0.6 * X[, 8] - 0.2 * X[, 9]))
W <- rbinom(n, 1, expo_prob)
event_prob <- 1 / (1 + exp(2 * (pmax(2 * X[, 1], 0) * W - X[, 2] +
                           X[, 6] + 3 * X[, 9])))
Y <- rbinom(n, 1, event_prob)
cf <- grf::causal_forest(X, Y, W)
cb1 <- CovariateBalance(cf)
cb2 <- CovariateBalance(
  cf,
  covariates = character(0),
  names = c(
  "medium imbalance" = "V1",
  "low imbalance" = "V2",
  "high imbalance" = "V3",
  "no imbalance" = "V4",
  "discrete 1" = "D1",
  "discrete 2" = "D2"
  )
)
cb3 <- CovariateBalance(
  cf,
  covariates = character(0),
  names = c(
    "medium imbalance" = "V1",
    "low imbalance" = "V2",
    "high imbalance" = "V3",
    "no imbalance" = "V4"
  ),
  treatment_name = "Treatment",
  love_breaks = seq(0, 0.5, 0.1),
  love_xlim = c(0, 0.5),
  cd_nrow = 2,
  cd_x_scale_width = 1,
  cd_bar_width = 0.3
)
}

}
\author{
KIJA
}
