data(faithful) attach(faithful) # plot the data to see if we have a linear relationship plot(waiting~eruptions,main="Raw Data Scatterplot for Old Faithful") # insert regression line on graph faith.fit=lm(waiting~eruptions,data=faithful) abline(faith.fit) # correlation cor(waiting,eruptions) # Regression output table summary(faith.fit) # get residuals and predicted values # standardized residuals res=rstudent(faith.fit) pred=fitted(faith.fit) # checking assumptions: # mean of residuals = 0 hist(res) # homogenous (constant) variance plot(pred,res,main="Predicted vs. Residuals") abline(0,0) # independence of residuals order=rep(1:length(res)) plot(order,res,type='l') abline(0,0) # QQplot (normal probability plot) qqnorm(res) qqline(res) # another way to check independence of residuals # install car package if needed # want the dw to be between 1.5 and 2.5 library(car) dwt(faith.fit) # influencial points and outliers (see website for R bubble plot that will explain where to look # Influence plot in car-package combines the studentized residuals, hat values and Cook's distances # area of the circles correspond to Cook's distances which(res>abs(2)) avghat=mean(hatvalues(faith.fit)) # values with hat>avghat*2 have leverage cd=4/(nrow(faithful)-length(coefficients(faith.fit))-1)# values with cd>cdcutoff are influential influencePlot(faith.fit, xlim=c(0,avghat*3.5),ylim=c(-5,5)) rbind(avghat*2,cd) with(faithful,plot(eruptions,waiting,xlab="eruptions",ylab="waiting",main="Regression with Old Faithful")) with(faithful,points(eruptions,predict(faith.fit),pch="r")) with(faithful,points(eruptions,predict(faith.fit, interval="confidence")[,2],pch="c")) with(faithful,points(eruptions,predict(faith.fit, interval="confidence")[,3],pch="c")) abline(faith.fit) # here we print out the confidence and/or prediction interval values View(predict(faith.fit, interval="confidence")) View(predict(faith.fit, interval="prediction")) # can make specific estimations: newtime=data.frame(eruptions=c(3,4,5)) predict(faith.fit,newdata=newtime,interval='confidence') predict(faith.fit,newdata=newtime,interval='prediction')