# data creation First<- c("Lars","Kari","Jonas") Gender=c("M","F","M") Country=c(rep("Denmark",3)) # or I could have written them all out without the rep( ) command #Country<- c("Denmark","Denmark","Denmark") but who wants to write all that out? :-) # create data frame with the Denmark employees empsDK=data.frame(First,Gender,Country) empsDK First<- c("Pierre","Sophie") Gender=c("M","F") Country=c(rep("France",2)) empsFR<- data.frame(First,Gender,Country) empsFR # merging like structures by country and sorted empsAll=merge(empsDK,empsFR,by="Country",all=T,sort=T) empsAll # but now there are "redundant" variables and more columns than we wanted # let's try using multiple BY variables # using all the variables as BY variables combines all vectors together like we want # basically, if it is not in the BY statement, it will not combine those variables at all empsAll=merge(empsDK,empsFR,by=c("First","Gender","Country"),all=T,sort=T) empsAll # unsorted values -- unsorted values mean that the row order is prevserved empsAll=merge(empsDK,empsFR,by=c("First","Gender","Country"),all=T,sort=F) empsAll # trying with rbind( ), cbind( ), c( ) and merge( ) df1 <- data.frame(name = c("tim", "tim", "tim", "ron"), val = 1:4) df2 <- data.frame(name = c("tim", "tim", "ron","ron"), val = 1:4) df1 df2 rbind(df1, df2) cbind(df1,df2) c(df1,df2) merge(df1, df2, by=c("name","val"), all=T)