## 6. Multivariate and Tree-based Methods ## 6.1 Multivariate EDA, and Principal Components Analysis pairs(possum[,6:14], col=palette()[as.integer(possum$sex)]) pairs(possum[,6:14], col=palette()[as.integer(possum$site)]) here<-!is.na(possum$footlgth) # We need to exclude missing values print(sum(!here)) # Check how many values are missing possum.prc <- princomp(log(possum[here,6:14])) # Principal components # Print scores on second pc versus scores on first pc, # by populations and sex, identified by site xyplot(possum.prc$scores[,2] ~ possum.prc$scores[,1] | possum$Pop[here]+possum$sex[here], groups=possum$site, auto.key=list(columns=3)) ## 6.2 Cluster Analysis ## 6.3 Discriminant Analysis library(MASS) # Only if not already attached. here<- !is.na(possum$footlgth) possum.lda <- lda(site ~ hdlngth+skullw+totlngth+ + taillgth+footlgth+earconch+eye+chest+belly,data=possum, subset=here) options(digits=4) possum.lda$svd # Examine the singular values > plot(possum.lda, dimen=3) # Scatterplot matrix for scores on 1st 3 canonical variates, as in Figure22 ## 6.4 Decision Tree models (Tree-based models) library(rpart) # Use fgl: Forensic glass fragment data; from MASS package glass.tree <- rpart(type ~ RI+Na+Mg+Al+Si+K+Ca+Ba+Fe, data=fgl) plot(glass.tree); text(glass.tree) summary(glass.tree) ## 6.5 Exercises . ## 6.6 References