#
# Modeling Lowe's sales redux
#
lowes = read.csv("c:/class/ascii/lowes.csv")
attach(lowes)
lowest1 = lm(Log.Sales ~ Mortgage + Housing.starts)
summary(lowest1)
library(car)
vif(lowest1)
fourinone(lowest1, stdres=TRUE)
stdres1 = rstandard(lowest1)
n = length(Log.Sales)
#
# The lowes file already has the Time index variable included, but this is how you would create it
Time = 1:n
#
# The type="b" setting constructs a plot where the dots are connected in order by lines
#
plot(Time, stdres1, ylab="Standardized residuals", type="b")
#
# You can superimpose a smooth curve on the plot by first determining its coordinates, and then
# adding it to the plot using the lines() command
#
smcurve = loess.smooth(Time, stdres1, span=.2)
lines(smcurve)
#
# Durbin-Watson test
# First, load the lmtest library (it can be obtained from the CRAN library).
# Note that this function provides a p-value for the test as well. By default the test one-sided,
# looking only for positive autocorrelation; if the Durbin-Watson statistic is greater than 2 it
# would indicate negative autocorrelation, so you would need to add the 'alternative="less"' setting.
# A more conservative (and probably better) approach would be to always do a two-sided test, as in
# dwtest(y ~ x, alternative="two.sided")
#
library(lmtest)
dwtest(Log.Sales ~ Mortgage + Housing.starts)
#
# Note that R has the annoying property that the ACF plot includes
# the correlation for lag zero, which by definition must equal one. This can be avoided by
# omitting the first displayed lag in the plot. The plot below plots the first 20 lags; the
# use of "xlim=c(1,xxx)" suppresses the printing of the meaningless lag 0 value.
#
acf(stdres1, xlim=c(1,20), ylim=c(-.2, 1))
#
# The approximate standard error of the estimated autocorrelations is 1/sqrt(n), and
# these are the ones used in the plot to give the .05 significance limits, which is why they
# are straight instead of the more correct curved ones in Minitab.
#
# The randtests package (available on CRAN) gives the runs test.
#
library(randtests)
runs.test(stdres1, threshold=0)
#
# The Time.sq variable is already in the file, but it is of course created using the
# command Time.sq = Time*Time
#
lowest2 = lm(Log.Sales ~ Mortgage + Housing.starts + Time + Time.sq)
summary(lowest2)
vif(lowest2)
fourinone(lowest2, stdres=TRUE)
stdres2 = rstandard(lowest2)
plot(Time, stdres2, ylab="Standardized residuals", type="b")
dwtest(Log.Sales ~ Mortgage + Housing.starts + Time + Time.sq)
acf(stdres2, xlim=c(1,20), ylim=c(-.4, .6))
runs.test(stdres2, threshold=0)
lowest3 = lm(Log.Sales ~ Mortgage + Housing.starts + Time + Time.sq + Q1 + Q2 + Q3)
summary(lowest3)
vif(lowest3)
fourinone(lowest3, stdres=TRUE)
stdres3 = rstandard(lowest3)
plot(Time, stdres3, ylab="Standardized residuals", type="b")
dwtest(Log.Sales ~ Mortgage + Housing.starts + Time + Time.sq + Q1 + Q2 + Q3)
acf(stdres3, xlim=c(1,20), ylim=c(-.4, .5))
runs.test(stdres3, threshold=0)
library(leaps)
leaps(cbind(Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3),Log.Sales,nbest=2)
leaps(cbind(Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3),Log.Sales,nbest=2,method="adjr2")
leaps(cbind(Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3),Log.Sales,nbest=2,method="r2")
#
# The lag() command applies to time series objects, so you can create the lagged variable directly, putting
# in the missing value (NA) yourself. Unfortunately, the leaps command does not allow missing values, so
# you have to tell R to ignore the first observation.
#
Lagged.Log.Sales = c(NA,Log.Sales[1:(n-1)])
leaps(cbind(Lagged.Log.Sales,Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3)[2:n,],Log.Sales[2:n],nbest=2)
leaps(cbind(Lagged.Log.Sales,Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3)[2:n,],Log.Sales[2:n],nbest=2,method="adjr2")
leaps(cbind(Lagged.Log.Sales,Mortgage,Housing.starts,Time,Time.sq,Q1,Q2,Q3)[2:n,],Log.Sales[2:n],nbest=2,method="r2")
lowest4 = lm(Log.Sales ~ Lagged.Log.Sales + Housing.starts + Time + Time.sq + Q1 + Q2 + Q3)
summary(lowest4)
vif(lowest4)
fourinone(lowest4, stdres=TRUE)
stdres4 = rstandard(lowest4)
plot(Time[2:n], stdres4, ylab="Standardized residuals", type="b")
acf(stdres4, xlim=c(1,20), ylim=c(-.4, .5))
runs.test(stdres4, threshold=0)
#
# If we had decided to difference the logged sales response we would just create the variable
# diff.Log.Sales = Log.Sales - Lagged.Log.Sales
#