# converting into date setwd('S:/Courses/stat-renaes/Stat404') nonsales=read.csv('nonsales.csv',header=T) # look at Birth_Date and Hire_Date nonsales$Hire_Date nonsales$Birth_Date # Notice at the end of the date vectors that it says # "Levels: 01/01/1969 01/02/1969 01/02/1978 01/03/1984 01/05/1964 ... 31/12/1978" # That means that the variable(s) is being treated as a factor (like levels of a # treatment) rather than numeric or date # we create new variables by converting the vectors to dates # we also need date formats # quotes are needed: # "%m/%d/%Y" gives mm/dd/YYYY, etc. (web search for "R as.Date format") # the dates were entered as dd/mm/YYYY nonsales$Birthdate=as.Date(nonsales$Birthdate,format="%d/%m/%Y") nonsales$Hiredate=as.Date(nonsales$Hiredate,format="%d/%m/%Y") attach(nonsales) Birthdate Hiredate # no more factors # now we can calculate differences between the (new) dates and can tell if # hiredates are invalid nonsales$datediff=difftime(Hiredate,Birthdate,units='days') k=which(nonsales$datediff<0) nonsales[k,]