#
# Prices of digital cameras
#
# The latest version of R changed the default behavior of the read.csv function so that variables that
# have observations with nonnumeric characters in them are treated as character variables, rather than
# factor variables. This often doesn't make a difference, but occasionally it does, so it's better to
# put in the setting telling R to treat them as factor variables.
#
cameras <- read.csv("c:/class/ascii/cameras.csv", stringsAsFactors = TRUE)
#
# The default coding for categorical (factor) variables in R is indicator variable coding (what R
# calls treatment contrasts), with the first category being the reference category. You can tell R
# to use effect codings by assigning what R calls sum contrasts to the factor variable with the
# command
# contrasts(cameras$Category) = contr.sum(6)
# (the "6" is because there are 6 levels to the Category variable). Of course, none of this affects
# the actual fit of the model, F-tests, etc.
#
attach(cameras)
#
# This applied functions like mean and sd to subgroups of the data
#
sapply(split(Price,Category),summary)
sapply(split(Price,Category),sd)
#
# This is the "old-fashioned" way of getting side-by-side boxplots using that same split function
#
boxplot(split(Price,Category))
#
# Note that since Category includes text, it is automatically a factor
# variable. If X defines the groups using only numbers, it has to be
# entered into the model as factor(X).
#
cameraa <- lm(Price ~ Category)
anova(cameraa)
summary(cameraa)
#
# Function for multiple comparisons in R. It does not give the Tukey adjustment, but
# does give the Bonferroni adjustment, which is usually similar
#
pairwise.t.test(Price,Category,p.adj="bonf")
#
# A way of getting the Tukey comparisons is to fit the model using the aov() command rather than the lm()
# command and then use the TukeyHSD() function. In case you're wondering, the fourinone function works
# on aov objects as well as lm objects.
#
camera.aov = aov(Price ~ Category)
TukeyHSD(camera.aov)
#
# A better alternative is the multcomp package, which gives various comparisons and other things.
# This would have failed if I hadn't told R to read in the Category variable as a factor, by the way.
#
library(multcomp)
cameraa <- lm(Price ~ Category)
camera.tukey = glht(cameraa, linfct=mcp(Category="Tukey"))
summary(camera.tukey)
#
# This gives simultaneous confidence intervals (and a plot of them) for the pairwise differences. Note that
# the group difference labels on the vertical axis might not fit in the plot if the group names are too
# long; this can be addressed by setting the left margin to be larger. This is done below using the
# par(mar=c(5,15,4,4)) command.
#
print(confint(camera.tukey))
par(mar=c(5,15,4,4))
plot(print(confint(camera.tukey)))
#
# The following functions can of course be used for any regression fit, and automatically handle ANOVA models.
#
sresa = rstandard(cameraa)
hata = hatvalues(cameraa)
cooka = cooks.distance(cameraa)
cbind(sresa,hata,cooka)
fourinone(cameraa, stdres=TRUE)
#
# Levene's test
#
absres <- abs(sresa)
anova(lm(absres ~ Category))
#
# Analysis for logged price
#
log.Price <- log10(Price)
boxplot(log.Price ~ Category)
camerab <- lm(log.Price ~ Category)
anova(camerab)
summary(glht(camerab, linfct=mcp(Category="Tukey")))
sresb = rstandard(camerab)
hatb = hatvalues(camerab)
cookb = cooks.distance(camerab)
cbind(sresb,hatb,cookb)
fourinone(camerab, stdres=TRUE)
absres <- abs(sresb)
anova(lm(absres ~ Category))
data.frame(Brand,Model,sresb,hatb,cookb)
boxplot(sresb ~ CR.Best.Buy)
#
# Getting weights
#
groupsd <- sapply(split(sresa,Category),sd)
#
# Setting up weights for point-and-shoot cameras to be 1
#
groupsd[["4 (Point-and-shoot)"]] <- 1
#
# The ordered() function orders the categories based on alphabetical order, and the as.numeric() function
# then turns those into the integers from 1 to 6. The wt variable takes on the appropriate weight based on
# which Category value each camera has.
#
wt <- 1/(groupsd[as.numeric(ordered(Category))]^2)
camerac <- lm(Price ~ Category, weight=wt)
anova(camerac)
summary(camerac)
summary(glht(camerac, linfct=mcp(Category="Tukey")))
sresc = rstandard(camerac)
fourinone(camerac, stdres=TRUE)
absres <- abs(sresc)
anova(lm(absres ~ Category))
camerad <- lm(log.Price ~ as.numeric(ordered(Category)))
summary(camerad)
#
# Partial F-test
#
anova(camerad,camerab)
#
# As is true for any regression (lm) model, predictions are made using the predict() command. Put the predictor values (here camera categories)
# you are interested in in a new data frame with the same variable name, put associated weights in a weight variable if doing WLS,
# and then (for predictions based on camerac, for example)
#
# predict(camerac, data.new, interval="prediction", weights=data.new$wt)
#