% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/residuals.R
\name{partial_residuals}
\alias{partial_residuals}
\title{Augment a model fit with partial residuals for all terms}
\usage{
partial_residuals(fit, predictors = everything())
}
\arguments{
\item{fit}{The model to obtain residuals for. This can be a model fit with
\code{lm()} or \code{glm()}, or any model with a \code{predict()} method that accepts a
\code{newdata} argument.}

\item{predictors}{Predictors to calculate partial residuals for. Defaults to
all predictors, skipping factors. Predictors can be specified using
tidyselect syntax; see \code{help("language", package = "tidyselect")} and the
examples below.}
}
\value{
Data frame (tibble) containing the model data and residuals in tidy
form. There is one row \emph{per selected predictor} per observation. All
predictors are included as columns, plus the following additional columns:

\item{.obs}{Row number of this observation in the original model data frame.}
\item{.predictor_name}{Name of the predictor this row gives the partial
residual for.}
\item{.predictor_value}{Value of the predictor this row gives the partial
residual for.}
\item{.partial_resid}{Partial residual for this predictor for this
observation.}
\item{.predictor_effect}{Predictor effect \eqn{\hat \mu(X_{if},
0)}{muhat(X_if, 0)} for this observation.}
}
\description{
Construct a data frame containing the model data, partial residuals for all
quantitative predictors, and predictor effects, for use in residual
diagnostic plots and other analyses. The result is in tidy form (one row per
predictor per observation), allowing it to be easily manipulated for plots
and simulations.
}
\section{Predictors and regressors}{
To define partial residuals, we must distinguish between the \emph{predictors},
the measured variables we are using to fit our model, and the \emph{regressors},
which are calculated from them. In a simple linear model, the regressors are
equal to the predictors. But in a model with polynomials, splines, or other
nonlinear terms, the regressors may be functions of the predictors.

For example, in a regression with a single predictor \eqn{X}, the regression
model \eqn{Y = \beta_0 + \beta_1 X + e} has one regressor, \eqn{X}. But if we
choose a polynomial of degree 3, the model is \eqn{Y = \beta_0 + \beta_1 X +
\beta_2 X^2 + \beta_3 X^3}, and the regressors are \eqn{\{X, X^2, X^3\}}{{X,
X^2, X^3}}.

Similarly, if we have predictors \eqn{X_1} and \eqn{X_2} and form a model
with main effects and an interaction, the regressors are \eqn{\{X_1, X_2, X_1
X_2\}}{{X_1, X_2, X_1 X_2}}.

Partial residuals are defined in terms of the predictors, not the regressors,
and are intended to allow us to see the shape of the relationship between a
particular predictor and the response, and to compare it to how we have
chosen to model it with regressors. Partial residuals are not useful for
categorical (factor) predictors, and so these are omitted.

Besides regressors, a model may have offset terms, which enter the model with
a fixed coefficient of 1. These are fixed to their mean value for partial
residual calculations.
}

\section{Linear models}{
Consider a linear model where \eqn{\mathbb{E}[Y \mid X = x] = \mu(x)}{E[Y | X
= x] = \mu(x)}. The mean function \eqn{\mu(x)} is a linear combination of
regressors. Let \eqn{\hat \mu}{muhat} be the fitted model and \eqn{\hat
\beta_0}{beta0hat} be its intercept.

Choose a predictor \eqn{X_f}, the \emph{focal} predictor, to calculate partial
residuals for. Write the mean function as \eqn{\mu(X_f, X_o)}, where
\eqn{X_f} is the value of the focal predictor, and \eqn{X_o} represents all
other predictors.

If \eqn{e_i} is the residual for observation \eqn{i}, the partial residual is

\deqn{r_{if} = e_i + (\hat \mu(x_{if}, 0) - \hat \beta_0).}{
r_if = e_i + (muhat(x_if, 0) - beta0hat).}

Setting \eqn{X_o = 0} means setting all other numeric predictors to 0; factor
predictors are set to their first (baseline) level.
}

\section{Generalized linear models}{
Consider a generalized linear model where \eqn{g(\mathbb{E}[Y \mid X = x]) =
\eta(x)}{g(E[Y | X = x]) = \eta(x)}, where \eqn{g} is a link function and
\eqn{\eta(x)} is the linear predictor. Let \eqn{\hat \eta}{etahat} be the
fitted linear predictor and \eqn{\hat \beta_0}{beta0hat} be its intercept.
Let \eqn{\hat \mu(x) = g^{-1}(\hat \eta(x))}{muhat(x) = g^{-1}(etahat(x))} be
the fitted mean function.

The \emph{working residual} \eqn{e_i} for observation \eqn{i} is

\deqn{e_i = (y_i - \hat \mu(x_i)) g'(\hat \mu(x_i)).}{e_i = (y_i -
muhat(x_i)) g'(muhat(x_i)).}

Choose a predictor \eqn{X_f}, the \emph{focal} predictor, to calculate partial
residuals for. Write \eqn{\eta} as \eqn{\eta(X_f, X_o)}, where \eqn{X_f} is the
value of the focal predictor, and \eqn{X_o} represents all other predictors.
Hence \eqn{\eta(X_f, X_o)} gives the model's prediction on the link scale.

The partial residual is again

\deqn{r_{if} = e_i + (\hat \eta(x_{if}, 0) - \hat \beta_0).}{
r_if = e_i + (etahat(x_{if}, 0) - beta0hat).}
}

\section{Interpretation}{
In linear regression, because the residuals \eqn{e_i} should have mean zero
in a well-specified model, plotting the partial residuals against \eqn{x_f}
should produce a shape matching the modeled relationship \eqn{\mu}. If the
model is wrong, the partial residuals will appear to deviate from the fitted
relationship. Provided the regressors are uncorrelated or approximately
linearly related to each other, the plotted trend should approximate the true
relationship between \eqn{x_f} and the response.

In generalized linear models, this is approximately true if the link function
\eqn{g} is approximately linear over the range of observed \eqn{x} values.

Additionally, the function \eqn{\mu(X_f, 0)} (in linear models) or
\eqn{\eta(X_f, 0)} (in generalized linear models) can be used to show the
relationship between the focal predictor and the response. In a linear model,
the function is linear; with polynomial or spline regressors, it is
nonlinear. This function is the \emph{predictor effect function}, and the
estimated predictor effects are included in this function's output.
}

\section{Limitations}{
Factor predictors (as factors, logical, or character vectors) are detected
automatically and omitted. However, if a numeric variable is converted to
factor in the model formula, such as with \code{y ~ factor(x)}, the function
cannot determine the appropriate type and will raise an error. Create factors
as needed in the source data frame \emph{before} fitting the model to avoid this
issue.
}

\examples{
fit <- lm(mpg ~ cyl + disp + hp, data = mtcars)
partial_residuals(fit)

# You can select predictors with tidyselect syntax:
partial_residuals(fit, c(disp, hp))

# Predictors with multiple regressors are supported:
fit2 <- lm(mpg ~ poly(disp, 2), data = mtcars)
partial_residuals(fit2)

# Allowing an interaction by number of cylinders is fine, but partial
# residuals are not generated for the factor. Notice the factor must be
# created first, not in the model formula:
mtcars$cylinders <- factor(mtcars$cyl)
fit3 <- lm(mpg ~ cylinders * disp + hp, data = mtcars)
partial_residuals(fit3)
}
\references{
R. Dennis Cook (1993). "Exploring Partial Residual Plots",
\emph{Technometrics}, 35:4, 351-362. \doi{10.1080/00401706.1993.10485350}

Cook, R. Dennis, and Croos-Dabrera, R. (1998).
"Partial Residual Plots in Generalized Linear Models." \emph{Journal of the
American Statistical Association} 93, no. 442: 730–39. \doi{10.2307/2670123}

Fox, J., & Weisberg, S. (2018).
"Visualizing Fit and Lack of Fit in Complex Regression Models with Predictor
Effect Plots and Partial Residuals." \emph{Journal of Statistical Software},
87(9). \doi{10.18637/jss.v087.i09}
}
\seealso{
\code{\link[=binned_residuals]{binned_residuals()}} for the related binned residuals;
\code{\link[=augment_longer]{augment_longer()}} for a similarly formatted data frame of ordinary
residuals; \code{vignette("linear-regression-diagnostics")},
\code{vignette("logistic-regression-diagnostics")}, and
\code{vignette("other-glm-diagnostics")} for examples of plotting and
interpreting partial residuals
}
