x=read.csv("soil.csv",header=T) attach(x) boxplot(soil~mix,main="Boxplot of Soils by Mixture type") soil.fit=lm(soil~mix) anova(soil.fit) # or soil.fit=aov(soil~mix) summary(soil.fit) # notice the df for treatment is wrong! Why?!? # because the mix variable is numeric and it's not being treated as a factor # change the mix varaible type to factor and try again soil.fit=lm(soil~factor(mix)) anova(soil.fit) # or soil.fit=aov(soil~factor(mix)) summary(soil.fit) # note the df differences now (the 2nd analysis set is correct) # different types of Sums of Squares # type I is rarely used -- best for textbook examples only # type II is for when you have no significant interaction # type III is the best to use -- especially with an unbalanced design # SAS gives I and III automatically, R gives only type I unless you ask for others library(car) summary(soil.fit) # this was the aov() model Anova(soil.fit,type="II") Anova(soil.fit,type="III") # treatment means tapply(soil,mix,mean,na.rm=T) # assumptions res=rstudent(soil.fit) pred=fitted.values(soil.fit) par(mfrow=c(2,2)) hist(res,main='Histogram of Residuals',xlab='Residuals') plot(pred,res,main='Predicted vs. Standardized Residuals',xlab='Predicted Values',ylab='St. Residuals') abline(0,0) order=c(1:length(res)) plot(res~order,type="l",main=" Residuals vs. Order") abline(0,0) qqnorm(res,xlim=c(-3,3),ylim=c(-3,3)) qqline(res) par(mfrow=c(1,1)) library(car) durbinWatsonTest(soil.fit) #or dwt(soil.fit) # influence # Influence plot in car-package combines the studentized residuals, hat values and Cook's distances # area of the circles correspond to Cook's distances cd=4/(length(soil.fit$coefficients)-length(soil.fit$residuals)-1) avghat=mean(hatvalues(soil.fit)); avghat; avghat*2; avghat*3 influencePlot(soil.fit, xlim=c(0,avghat*3.5), ylim=c(-5,5)) detach(x)