# The examples I am working with are from an introductory for R called # “Verzani-SimpleR” # found at the R Cran website # https://cran.r-project.org/doc/contrib/Verzani-SimpleR.pdf # Keeping track of typos of various drafts. typos.draft1=c(2,3,0,3,1,0,0,1) typos.draft2=c(0,3,0,3,1,0,0,1) # We can apply commands (functions) to these vectors: mean(typos.draft1) mean(typos.draft2) median(typos.draft1) median(typos.draft2) var(typos.draft1) var(typos.draft2) sd(typos.draft1) sd(typos.draft2) typos.draft2=typos.draft1 # make a copy with a new name typos.draft2[1]=0 # assigns the first page with 0 typos typos.draft2 typos.draft2[2] # prints the 2nd pages’ value typos.draft2[4] # prints the 4th pages’ value typos.draft2[-4] # prints all but the 4th page typos.draft2[c(1,3,5)] # prints only the 1st, 3rd and 5th pages max(typos.draft2) # most number of typos min(typos.draft2) # least number of typos typos.draft2 == 3 # what pages have 3 typos? # The usage of the double equals sign (==) tests all values # to see if they are equal to 3. # The 2nd and 4th answer yes (TRUE) and the others no (FALSE). # How can we get the page numbers rather than a TRUE or FALSE? which(typos.draft2==3) # Now the answer tells us which page(s) have 3 typos. sum(typos.draft2) # total number of typos sum(typos.draft2>0) # how many pages with typos typos.draft2-typos.draft2 # difference between the two sets of drafts # reading in data with scan(), read.table() and read.csv() # scan() one column, input by hand: x=scan() # hit enter to start scan() 1 # hit enter after every entry 2 3 4 # hit enter twice after the last value to stop scan() # scan() pasting in one column x=scan() 1 2 3 4 # hit enter twice after pasting last value to stop scan() # scan() with matrix() to paste in more than 1 column of data data <- matrix(scan(),ncol=3,byrow=TRUE) 1 1 7.75 1 1 8.25 1 2 8.25 1 2 8.75 1 3 7.75 1 3 8.25 2 1 6.75 2 1 7.25 2 2 7.25 2 2 7.75 2 3 6.75 2 3 7.25 3 1 6.25 3 1 6.75 3 2 8.25 3 2 7.75 3 3 8.75 3 3 9.25 data HotDogs1 <- data.frame(data) colnames(HotDogs1) <- c("meat","salt","y") y <- HotDogs1$y # to make sure that meat is a factor, not numeric meat <- as.factor(HotDogs1$meat) # to make sure that salt is a factor, not numeric salt <- as.factor(HotDogs1$salt) # names for the 3 levels of meat levels(meat) <- c("beef","pork","chic") rm(data) # this removes the original dataset called data and leaves us with the new one # with row and column labels HotDogs1 # read.table() x=read.table('S:/Courses/stat-renaes/Data/hotdogs.csv',header=T,sep=‘,’) x # read.csv() y=read.csv('S:/Courses/stat-renaes/Data/hotdogs.csv',header=T) y # accessing variables in a dataset: # all variable names have a 2-level name. # an example with the Hot Dogs dataset # the variable names are meat, salt and y (y is the response -- taste rating) # the variable names (with the 2-level names) have to include the dataset name # the default way to "call" a variable of a dataset is: datasetname$variablename # the $ is necessary to extract "objects" from a dataset # so the names from the Hot Dog dataset: HotDogs1$meat HotDogs1$salt HotDogs1$y # but who wants to always type that much? attach() will help! # General form: attach(datasetname) # attach will "attach" the variable names to the variables and negates the need # for a 2-level name attach(HotDogs1) meat salt y # when you are finished with your R session, use detach() # General form: detach(datasetname) detach(HotDogs1) meat salt y HotDogs1$meat HotDogs1$salt HotDogs1$y