## *7. R Data Structures ## 7.1 Vectors ## 7.1.1 Subsets of Vectors c(Andreas=178, John=185, Jeff=183)[c("John","Jeff")] ## 7.1.2 Patterned Data rep(c(2,3,5),4) rep(c(2,3,5),c(4,4,4)) # An alternative is rep(c(2,3,5), each=4) ## 7.2 Missing Values x <- c(1,6,2,NA) is.na(x) # TRUE for when NA appears, and otherwise FALSE x==NA # All elements are set to NA NA==NA y[x>2] <- x[x>2] ## 7.3 Data frames ## 7.3.1 Extraction of Component Parts of Data frames names(barley) levels(barley$site) Duluth1932 <- barley[barley$year=="1932" & barley$site=="Duluth", + c("variety","yield")] ## 7.3.2 Data Sets that Accompany R Packages data(package="datasets") ## 7.4 Data Entry Issues ## 7.4.1 Idiosyncrasies ## 7.4.2 Missing values when using read.table() ## 7.4.3 Separators when using read.table() ## 7.5 Factors and Ordered Factors as.character(islandcities$country) unclass(islandcities$country) table(islandcities$country) lev <- levels(islandcities$country) lev[c(7,4,6,2,5,3,1)] country <- factor(islandcities$country, levels=lev[c(7,4,6,2,5,3,1)]) table(country) ## 7.6 Ordered Factors stress.level<-rep(c("low","medium","high"),2) ordf.stress<-ordered(stress.level, levels=c("low","medium","high")) ordf.stress ordf.stress<"medium" ordf.stress>="medium" class(ordf.stress) ## 7.7 Lists elastic.lm <- lm(distance~stretch, data=elasticband) names(elastic.lm) elastic.lm$coefficients elastic.lm[["coefficients"]] elastic.lm[[1]] elastic.lm[1] options(digits=3) elastic.lm$residuals elastic.lm$call mode(elastic.lm$call) ## *7.8 Matrices and Arrays xx <- matrix(1:6,ncol=3) # Equivalently, enter matrix(1:6,nrow=2) xx x <- as.vector(xx) dim(xx) [1] 2 3 x34 <- matrix(1:12,ncol=4) x34 x34[2:3,c(1,4)] # Extract rows 2 & 3 & columns 1 & 4 x34[2,] # Extract the second row x34[-2,] # Extract all rows except the second x34[-2,-3] # Extract the matrix obtained by omitting row 2 & column 3 ## 7.8.1 Arrays x <- 1:24 dim(x) <- c(2,12) x dim(x) <-c(3,4,2) x ## 7.8.2 Conversion of Numeric Data frames into Matrices ## 7.9 Exercises ## a) answer <- c(2, 7, 1, 5, 12, 3, 4) for (j in 2:length(answer)){ answer[j] <- max(answer[j],answer[j-1])} ## b) answer <- c(2, 7, 1, 5, 12, 3, 4) for (j in 2:length(answer)){ answer[j] <- sum(answer[j],answer[j-1])}