## 1. Starting Up ## 1.1 Getting started under Windows ## 1.2 Use of an Editor Script Window ## 1.3 A Short R Session austpop <- read.table("d:/austpop.txt", header=TRUE) austpop names(austpop) ## 1.3.1 Entry of Data at the Command Line elasticband <- data.frame(stretch=c(46,54,48,50,44,42,52), distance=c(148,182,173,166,109,141,166)) ## 1.3.2 Entry and/or editing of data in an editor window elasticband <- edit(elasticband) ## 1.3.3 Options for read.table() ## 1.3.4 Options for plot() and allied functions ## 1.4 Further Notational Details ## 1.5 On-line Help help() help(plot) help.search("matrix") apropos("matrix") ## 1.6 The Loading or Attaching of Datasets attach("usingR.RData") ## 1.7 Exercises 2. An Overview of R ## 2.1 The Uses of R ## 2.1.1 R may be used as a calculator. 2+2 sqrt(10) 2*3*4*5 1000*(1+0.075)^5 - 1000 # Interest on $1000, compounded annually # at 7.5% p.a. for five years pi # R knows about pi 2*pi*6378 #Circumference of Earth at Equator, in km; radius is 6378 km sin(c(30,60,90)*pi/180) # Convert angles to radians, then take sin() ## 2.1.2 R will provide numerical or graphical summaries of data load("hills.Rdata") # Assumes hills.Rdata is in the working directory summary(hills) 2.1.3 R has extensive graphical abilities pairs(hills) ## 2.1.4 R will handle a variety of specific analyses options(digits=3) cor(hills) cor(log(hills)) plot(distance ~ stretch,data=elasticband, pch=16) elastic.lm <- lm(distance~stretch,data=elasticband) lm(distance ~stretch,data=elasticband) summary(lm(distance~stretch,data=elasticband)) ## 2.1.5 R is an Interactive Programming Language celsius <- 25:30 fahrenheit <- 9/5*celsius+32 conversion <- data.frame(Celsius=celsius, Fahrenheit=fahrenheit) print(conversion) ## 2.2 R Objects save.image() # Save contents of workspace, into the file .RData save.image(file="archive.RData") # Save into the file archive.RData save(celsius, fahrenheit, file="tempscales.RData") attach("tempscales.RData") ls(pos=2) # Check the contents of the file that has been attached ## *2.3 Looping for (i in 1:10) print(i) # Celsius to Fahrenheit for (celsius in 25:30) + print(c(celsius, 9/5*celsius + 32)) ## 2.3.1 More on looping answer <- 0 for (j in c(31,51,91)){answer <- j+answer} answer sum(c(31,51,91)) ## 2.4 Vectors c(2,3,5,2,7,1) 3:10 # The numbers 3, 4, .., 10 c(TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE) c("Canberra","Sydney","Newcastle","Darwin") ## 2.4.1 Joining (concatenating) vectors x <- c(2,3,5,2,7,1) x y <- c(10,15,12) y z <- c(x, y) z ## 2.4.2 Subsets of Vectors x <- c(3,11,8,15,12) # Assign to x the values 3, 11, 8, 15, 12 x[c(2,4)] # Extract elements (rows) 2 and 4 x <- c(3,11,8,15,12) x[-c(2,3)] x>10 # This generates a vector of logical (T or F) x[x>10] ## 2.4.3 The Use of NA in Vector Subscripts y <- c(1, NA, 3, 0, NA) y[is.na(y)] <- 0 ## 2.4.4 Factors gender <- c(rep("female",691), rep("male",692)) gender <- factor(gender): levels(gender) # Assumes gender is a factor, created as above gender <- relevel(gender, ref="male") gender <- factor(gender, levels=c("male", "female")) gender <- factor(c(rep("female",691), rep("male",692))) table(gender) gender <- factor(gender, levels=c("male", "female")) table(gender) gender <- factor(gender, levels=c("Male", "female")) # Erroneous - "male" rows now hold missing values table(gender) rm(gender) # Remove gender ## 2.5 Data Frames Cars93.summary type <- Cars93.summary$abbrev type <- Cars93.summary[,4] type <- Cars93.summary[,"abbrev"] type <- Cars93.summary[[4]] # Take the object that is stored # in the fourth list element. ## 2.5.1 Data frames as lists ## 2.5.2 Inclusion of character string vectors in data frames ## 2.5.3 Built-in data sets summary(trees) ## 2.6 Common Useful Functions print() # Prints a single R object cat() # Prints multiple objects, one after the other length() # Number of elements in a vector or of a list mean() median() range() unique() # Gives the vector of distinct values diff() # Replace a vector by the vector of first differences # N. B. diff(x) has one less element than x sort() # Sort elements into order, but omitting NAs order() # x[order(x)] orders elements of x, with NAs last cumsum() cumprod() rev() # reverse the order of vector elements x <- c(1, 20, 2, NA, 22) order(x) x[order(x)] sort(x) ## 2.6.1 Applying a function to all columns of a data frame sapply(rainforest, is.factor) sapply(rainforest[,-7], range) # The final column (7) is a factor range(rainforest$branch, na.rm=TRUE) # Omit NAs, then determine the range sapply(rainforest[,-7], range, na.rm=TRUE) ## 2.7 Making Tables library(lattice) # The data frame barley accompanies lattice table(barley$year, barley$site) x <- c(1,5,NA,8) x <- factor(x) x factor(x,exclude=NULL) ## 2.7.1 Numbers of NAs in subgroups of the data table(rainforest$species, !is.na(rainforest$branch)) ## 2.8 The Search List search() library(MASS) search() names(primates) Bodywt attach(primates) # R will now know where to find Bodywt Bodywt av <- with(primates, mean(Bodywt)) ## 2.9 Functions in R ## 2.9.1 An Approximate Miles to Kilometers Conversion miles.to.km <- function(miles)miles*8/5 miles.to.km(175) # Approximate distance to Sydney, in miles miles.to.km(c(100,200,300)) ## 2.9.2 A Plotting function attach(florida) plot(BUSH, BUCHANAN, xlab="Bush", ylab="Buchanan") detach(florida) # In S-PLUS, specify detach("florida") plot.florida <- function(xvar="BUSH", yvar="BUCHANAN"){ x <- florida[,xvar] y <- florida[,yvar] plot(x, y, xlab=xvar,ylab=yvar) mtext(side=3, line=1.75, "Votes in Florida, by county, in \nthe 2000 US Presidential election") } plot.florida(yvar="NADER") # yvar="NADER" over-rides the default plot.florida(xvar="GORE", yvar="NADER") ## 2.10 More Detailed Information ## 2.11 Exercises ## ex1. ## a) answer <- 0 for (j in 3:5){ answer <- j+answer } ## b) answer<- 10 for (j in 3:5){ answer <- j+answer } ## c) answer <- 10 for (j in 3:5){ answer <- j*answer }