R Apply Family

# Apply Functions Over Array Margins
apply; sweep; scale

# Apply a Function over a List or Vector
lapply; sapply; vapply; replicate; rapply

# Apply a Function to Multiple List or Vector Arguments
mapply; Vectorize

# Apply a Function Over a Ragged Array
tapply

# Apply a Function to a Data Frame Split by Factors
by; aggregate; split

# Apply a Function Over Values in an Environment
eapply

apply sweep scale

# "apply" returns a vector or array or list of values obtained by applying
#   a function to margins of an array or matrix.
(m = matrix(1:6, nrow = 2))
apply(m, 1, sum)
apply(m, 1:2, sqrt)

# "sweep" returns an array obtained from an input array by sweeping out
#   a summary statistic.
(X = array(1:24, dim = 4:2))
sweep(X, 1, apply(X, 1, mean))

# "scale" is generic function whose default method centers and/or scales
#   the columns of a numeric matrix.
(mat = matrix(round(rnorm(10)*10), nrow = 5))
scale(mat)
apply(mat, 2, mean)
apply(mat, 2, sd)
cov(scale(mat))

lapply sapply vapply replicate rapply

# "lapply" returns a list of the same length as X, each element of which is
#   the result of applying FUN to the corresponding element of X.
lapply(split(Orange[2:3], Orange[1]), mean)
lapply(1:5, sqrt)

# "sapply" is a user-friendly version of lapply by default returning
#   a vector or matrix if appropriate.
sapply(split(Orange[2:3], Orange[1]), mean)
sapply(1:5, sqrt)

# "vapply" is similar to sapply, but has a pre-specified type of return value,
#   so it can be safer (and sometimes faster) to use.
vapply(1:5, sqrt, 1i)

# "replicate" is a wrapper for the common use of sapply for repeated evaluation
#   of an expression (which will usually involve random number generation).
replicate(3, rnorm(5))

# "rapply" is a recursive version of lapply.
(x = list(A = list(a=pi, b=list(b1=1)), B = "a character string"))
rapply(x, sqrt) # Error
rapply(x, sqrt, classes = "numeric", how = "unlist")
rapply(x, sqrt, classes = "numeric", how = "replace")
rapply(x, sqrt, classes = "numeric", how = "list")
rapply(x, sqrt, classes = "numeric", deflt = NA, how = "unlist")
rapply(x, sqrt, classes = "numeric", deflt = NA, how = "replace")
rapply(x, sqrt, classes = "numeric", deflt = NA, how = "list")
rapply(x, round, classes = "numeric", how = "replace", digits = 2)

mapply Vectorize

# "mapply" is a multivariate version of sapply. mapply applies FUN to
#   the first elements of each ... argument, the second elements,
#   the third elements, and so on. Arguments are recycled if necessary.
mapply(rep, LETTERS[1:3], 1:3)

# "Vectorize" returns a new function that acts as if mapply was called.
vrep = Vectorize(rep)
vrep(LETTERS[1:3], 1:3)
vrep = Vectorize(rep.int)
vrep(LETTERS[1:3], 1:3)

tapply

# "tapply" applies a function to each cell of a ragged array, that is
#   to each (non-empty) group of values given by a unique combination of
#   the levels of certain factors.
(m = matrix(1:6, 2, 3))
(fac = matrix(c(1,3,1,2,2,2), 2, 3))
tapply(m, fac, sum)

by aggregate split

# "by" is an object-oriented wrapper for tapply applied to data frames.
by(Orange[2:3], Orange[1], mean)
do.call("rbind", by(Orange[2:3], Orange[1], mean))

# "aggregate" splits the data into subsets, computes summary statistics
#   for each, and returns the result in a convenient form.
aggregate(Orange[2:3], Orange[1], mean)
# See help for usage of NA, formula, dot notation, xtabs, nfrequency.

# "split" divides the data in the vector x into the groups defined by f.
#   The replacement forms replace values corresponding to such a division.
split(Orange[2:3], Orange[1])

eapply

# "eapply" applies FUN to the named values from an environment and returns
#   the results as a list. The user can request that all named objects
#   are used (normally names that begin with a dot are not). The output
#   is not sorted and no parent environments are searched.
env = new.env()
env$x = 1:3
env$y = 4:6
eapply(env, sum)