data(faithful)
attach(faithful)

# plot the data to see if we have a linear relationship

plot(waiting~eruptions,main="Raw Data Scatterplot for Old Faithful")
# insert regression line on graph
faith.fit=lm(waiting~eruptions,data=faithful)
abline(faith.fit)

# correlation
cor(waiting,eruptions)

# Regression output table
summary(faith.fit)

# get residuals and predicted values
# standardized residuals
res=rstudent(faith.fit)
pred=fitted(faith.fit)

# checking assumptions:
# mean of residuals = 0
hist(res)
# homogenous (constant) variance
plot(pred,res,main="Predicted vs. Residuals")
abline(0,0)
# independence of residuals
order=rep(1:length(res))
plot(order,res,type='l')
abline(0,0)
# QQplot (normal probability plot)
qqnorm(res)
qqline(res)

# another way to check independence of residuals
# install car package if needed
# want the dw to be between 1.5 and 2.5
library(car)
dwt(faith.fit)

# influencial points and outliers (see website for R bubble plot that will explain where to look
# Influence plot in car-package combines the studentized residuals, hat values and Cook's distances
# area of the circles correspond to Cook's distances
which(res>abs(2))
avghat=mean(hatvalues(faith.fit)) # values with hat>avghat*2 have leverage
cd=4/(nrow(faithful)-length(coefficients(faith.fit))-1)# values with cd>cdcutoff are influential
influencePlot(faith.fit, xlim=c(0,avghat*3.5),ylim=c(-5,5))
rbind(avghat*2,cd)

with(faithful,plot(eruptions,waiting,xlab="eruptions",ylab="waiting",main="Regression with Old Faithful"))
with(faithful,points(eruptions,predict(faith.fit),pch="r"))
with(faithful,points(eruptions,predict(faith.fit, interval="confidence")[,2],pch="c"))
with(faithful,points(eruptions,predict(faith.fit, interval="confidence")[,3],pch="c"))

abline(faith.fit) 

# here we print out the confidence and/or prediction interval values
View(predict(faith.fit, interval="confidence"))
View(predict(faith.fit, interval="prediction"))

# can make specific estimations:
newtime=data.frame(eruptions=c(3,4,5))
predict(faith.fit,newdata=newtime,interval='confidence')
predict(faith.fit,newdata=newtime,interval='prediction')