studying=read.csv("https://webpages.uidaho.edu/~renaes/Data/Studying.csv",header=T)
attach(studying)
head(studying)

# always look at your data before testing

boxplot(Study.Hours~Plans,main="Hours studied")

# can also do histogram by the grouping variable:
# install.packages("lattice")
library(lattice)
histogram(~Study.Hours|Plans,data=studying,main="Weekly Hours studied \nby post-school plans",col="blueviolet")


# test variances:
# H0: sigma^2_1 = sigma^2_2 (equal variances)
# Ha: sigma^2_1 != sigma^2_2 (unequal variances)

var.test(Study.Hours~Plans)
# the reason to use var.test(y~x) syntax is because the dataset form was one quantitative variable
# and one grouping variable. If we had 2 quantitative variables (one for each group)
# then we would have to use var.test(x,y) syntax

# if pvalue <= alpha: reject H0 and variances are unequal (use unpooled)
# if pvalue > alpha: do not reject H0 and variances are equal (use pooled)

# H0 is rejected because pvalue<=alpha(0.05), use unpooled (Welch test in R)
# unpooled means use var.equal=F option in t.test()

# is there a significant difference in the study hours of those
# planning graduate school (GS) vs. those not planning any more school (NS)

# H0: mu1-mu2=0 (mu1=mu2)
# Ha: mu1!=mu2 (mu1!=mu2)

t.test(Study.Hours~Plans,var.equal=F)

# different plots for normality
# subset data into school and no school
school=subset(studying,Plans=='GS')
noschool=subset(studying,Plans=='NS')


qqnorm(school$Study.Hours,xlab="Hours",main='Students going to grad school')
qqline(school$Study.Hours)
qqnorm(noschool$Study.Hours,xlab="Hours",main='Students not going to grad school')
qqline(noschool$Study.Hours)