studying=read.csv("https://webpages.uidaho.edu/~renaes/Data/Studying.csv",header=T) attach(studying) head(studying) # always look at your data before testing boxplot(Study.Hours~Plans,main="Hours studied") # can also do histogram by the grouping variable: # install.packages("lattice") library(lattice) histogram(~Study.Hours|Plans,data=studying,main="Weekly Hours studied \nby post-school plans",col="blueviolet") # test variances: # H0: sigma^2_1 = sigma^2_2 (equal variances) # Ha: sigma^2_1 != sigma^2_2 (unequal variances) var.test(Study.Hours~Plans) # the reason to use var.test(y~x) syntax is because the dataset form was one quantitative variable # and one grouping variable. If we had 2 quantitative variables (one for each group) # then we would have to use var.test(x,y) syntax # if pvalue <= alpha: reject H0 and variances are unequal (use unpooled) # if pvalue > alpha: do not reject H0 and variances are equal (use pooled) # H0 is rejected because pvalue<=alpha(0.05), use unpooled (Welch test in R) # unpooled means use var.equal=F option in t.test() # is there a significant difference in the study hours of those # planning graduate school (GS) vs. those not planning any more school (NS) # H0: mu1-mu2=0 (mu1=mu2) # Ha: mu1!=mu2 (mu1!=mu2) t.test(Study.Hours~Plans,var.equal=F) # different plots for normality # subset data into school and no school school=subset(studying,Plans=='GS') noschool=subset(studying,Plans=='NS') qqnorm(school$Study.Hours,xlab="Hours",main='Students going to grad school') qqline(school$Study.Hours) qqnorm(noschool$Study.Hours,xlab="Hours",main='Students not going to grad school') qqline(noschool$Study.Hours)