8. Functions ## 8.1 Functions for Confidence Intervals and Tests ## 8.1.1 The t-test and associated confidence interval ## 8.1.2 Chi-Square tests for two-way tables ## 8.2 Matching and Ordering match(, ) ## For each element of , returns the ## position of the first occurrence in order() ## Returns the vector of subscripts giving ## the order in which elements must be taken ## so that will be sorted. rank() ## Returns the ranks of the successive elements. x <- rep(1:5,rep(3,5)) x two4 <- x %in% c(2,4) two4 # Now pick out the 2s and the 4s x[two4] ## 8.3 String Functions substring(, , ) nchar() ## Returns vector of number of characters in each element. ## *8.3.1 Operations with Vectors of Text Strings - A Further Example car.brandnames <- sapply(strsplit(as.character(Cars93$Make), " ", fixed=TRUE), + function(x)x[1]) car.brandnames[1:5] ## 8.4 Application of a Function to the Columns of an Array or Data Frame apply(, , ) lapply(, ) ## N. B. A dataframe is a list. Output is a list. sapply(, ) ## As lapply(), but simplify (e.g. to a vector ## or matrix), if possible. ## 8.4.1 apply() apply(airquality,2,mean) # All elements must be numeric! apply(airquality,2,mean,na.rm=TRUE) ## 8.4.2 sapply() sapply(airquality, function(x)sum(is.na(x))) sapply(moths,is.factor) # Determine which columns are factors # How many levels does each factor have? sapply(moths, function(x)if(!is.factor(x))return(0) else length(levels(x))) ## *8.5 aggregate() and tapply() str(cabbages) attach(cabbages) aggregate(HeadWt, by=list(Cult=Cult, Date=Date), FUN=mean) *8.6 Merging Data Frames Cars93.summary new.Cars93 <- merge(x=Cars93,y=Cars93.summary[,4,drop=F], by.x="Type",by.y="row.names") ## 8.7 Dates # Electricity Billing Dates dd <- as.Date(c("2003/08/24","2003/11/23","2004/02/22","2004/05/23")) diff(dd) as.Date("1/1/1960", format="%d/%m/%Y") as.Date("1:12:1960",format="%d:%m:%Y") as.Date("1960-12-1")-as.Date("1960-1-1") as.Date("31/12/1960","%d/%m/%Y") as.integer(as.Date("1/1/1970","%d/%m/%Y") as.integer(as.Date("1/1/2000","%d/%m/%Y")) dec1 <- as.Date("2004-12-1") format(dec1, format="%b %d %Y") format(dec1, format="%a %b %d %Y") ## 8.8. Writing Functions and other Code fahrenheit2celsius <- function(fahrenheit=32:40)(fahrenheit-32)*5/9 # Now invoke the function fahrenheit2celsius(c(40,50,60)) mean.and.sd <- function(x=1:10){ + av <- mean(x) + sd <- sqrt(var(x)) + c(mean=av, SD=sd) + } > # Now invoke the function mean.and.sd() mean.and.sd(hills$climb) ## 8.8.1 Syntax and Semantics ## 8.8.2 A Function that gives Data Frame Details faclev <- function(x)if(!is.factor(x))return(0) else length(levels(x)) sapply(moths, faclev) sapply(moths, function(x)if(!is.factor(x))return(0) else length(levels(x))) check.df <- function(df=moths) sapply(df, function(x)if(!is.factor(x))return(0) else length(levels(x))) ## 8.8.3 Compare Working Directory Data Sets with a Reference Set dsetnames <- objects() additions <- function(objnames = dsetnames) { newnames <- objects(pos=1) existing <- as.logical(match(newnames, objnames, nomatch = 0)) newnames[!existing] } additions(dsetnames) ## 8.8.4 Issues for the Writing and Use of Functions ## 8.8.5 Functions as aids to Data Management attributes(elasticband)$title <- "Extent of stretch of band, and Resulting Distance" ## 8.8.6 Graphs ## 8.8.7 A Simulation Example guesses <- runif(100) correct.answers <- 1*(guesses < .2) correct.answers ## 8.8.8 Poisson Random Numbers ## 8.9 Exercises ## ex4. plot.one <- function(){ xyrange <- range(milk) # Calculates the range of all values in the data frame par(pin=c(6.75, 6.75)) # Set plotting area = 6.75 in. by 6.75 in. plot(four, one, data=milk, xlim=xyrange, ylim=xyrange, pch=16) abline(0,1) # Line where four = one }