RDX2 X  acc.train.test sourceB 6function(dset=Golub.BM, cl=cancer.BM, traintest=gp.id, 8 levels=1:2, nfeatures=NULL, print.acc=FALSE){ " traintest <- factor(traintest) , train <- traintest==levels(traintest)[1] . testset <- traintest==levels(traintest)[2]  cl1 <- cl[train]  cl2 <- cl[testset]  ng1 <- length(cl1)  ng2 <- length(cl2) , maxg <- max(c(ng1-length(unique(cl1))-2, - ng2-length(unique(cl2))-2))  if(is.null(nfeatures)){  max.features <- maxg ! nfeatures <- 1:max.features } else  { D if(max(nfeatures)>maxg)nfeatures <- nfeatures[nfeatures<=maxg] $ max.features <- max(nfeatures)  } B ord1 <- order.features(dset, cl, subset=train)[1:max.features] D ord2 <- order.features(dset, cl, subset=testset)[1:max.features] ord <- unique(c(ord1, ord2))  sub1 <- match(ord1, ord)  sub2 <- match(ord2, ord) * df1 <- data.frame(t(dset[ord, train])) , df2 <- data.frame(t(dset[ord, testset])) + acc1 <- acc2 <- numeric(max(nfeatures))  for(i in nfeatures){ cat(paste(i, ":", sep="")) 7 df1.lda <- lda(df1[, sub1[1:i], drop=FALSE], cl1) J hat2 <- predict(df1.lda, newdata=df2[, sub1[1:i], drop=FALSE])$class  tab <- table(hat2, cl2) 6 acc1[i] <- sum(tab[row(tab)==col(tab)])/sum(tab) 7 df2.lda <- lda(df2[, sub2[1:i], drop=FALSE], cl2) J hat1 <- predict(df2.lda, newdata=df1[, sub2[1:i], drop=FALSE])$class  tab <- table(hat1, cl1) 6 acc2[i] <- sum(tab[row(tab)==col(tab)])/sum(tab)  } cat("\n")  if(print.acc){  print(round(acc1,2))  print(round(acc2,2))  }  maxacc1 <- max(acc1)  maxacc2 <- max(acc2) sub1 <- match(maxacc1, acc1) sub2 <- match(maxacc2, acc2) ! nextacc1 <- max(acc1[acc1<1]) ! nextacc2 <- max(acc1[acc1<2]) 5 lower1 <- maxacc1-sqrt(nextacc1*(1-nextacc1)/ng1) 5 lower2 <- maxacc2-sqrt(nextacc2*(1-nextacc2)/ng2) & lsub1 <- min((1:ng1)[acc1>lower1]) & lsub2 <- min((1:ng2)[acc2>lower2]) + lower <- c("Best accuracy, less 1SD ", G paste(paste(round(c(lower1, lower2),2), c(lsub1, lsub2), ? sep=" ("), " features) ", sep=""))  best <- c("Best accuracy", F paste(paste(round(c(maxacc1, maxacc2),2), c(sub1, sub2), ; sep=" ("), " features)", sep="")) acc.df <- cbind(lower, best) 5 dimnames(acc.df) <- list(c("Training/test split", > "I (training) / II (test) ", H "II (training) / I (test) "),c("",""))  print(acc.df, quote=FALSE) G invisible(list(sub1.2=ord1, acc1.2=acc1, sub2.1=ord2, acc2.1=acc2))  }$ dset Golub.BM$ cl cancer.BM$ traintest gp.id levels :?@ nfeatures print.acc  { <- factor train == [ ? testset @ cl1 cl2 ng1 length ng2 maxg max c - unique@@ if is.null   max.features  ?   >    <=    ord1 order.features subset ?  ord2$% ?  ord#& sub1 match#' sub2)&' df1 data.frame t' df2,-' acc1 acc2 numeric  for i   cat paste3 : sep  df1.lda lda+( ?3 drop  hat2 $ predict7 newdata.( ?39  class tab table:/3 / sum> row> col>A> df2.lda8.* ?39  hat1;<D=+* ?39  class>?E03@A>B>C>A>4     print round/@FG0@ maxacc1/ maxacc20()H/*)I0 nextacc1/ </? nextacc2/K/@ lower1H sqrt@ *J (?J lower2IN@OLP?L lsub1 minP ?!/M lsub2SP ?!0Q lower Best accuracy, less 1SD 55GMQ@RT6  ( features) 6  best Best accuracy55GHI@(*6  ( features)6  acc.df cbindUV dimnamesW list Training/test split I (training) / II (test)  II (training) / I (test)   FW quote  invisibleZ sub1.2# acc1.2/ sub2.1& acc2.10 balanced.sample function(cl, nset=2, seed=NULL){ $ if(!is.null(seed))set.seed(seed)  gp <- numeric(length(cl))  ord <- order(cl)  tab <- table(cl) M subs <- unlist(lapply(tab, function(x)sample(rep(1:nset, length.out=x))))  gp[ord] <- subs  gp  }$ nset@$ seed  !c set.seedc gp1' order>? subs unlist lapply> function x sample rep ?b length.outl ,function(x)sample(rep(1:nset, length.out=x))f'hf cvdiscw :function(dset=GolubB, cl=tissue.mfB, nfold=NULL, test="f", @ nfeatures=2, seed=31, funda=lda, print.progress=TRUE, 3 selectonce=FALSE, cv=TRUE, subset=NULL){ 6 ## If nfold is not specified, use leave-one-out CV . if(is.null(nfold))nfold <- sum(!is.na(cl)) ( ## Option to omit one or more points 6 if(!is.null(subset)){cl[!is.na(cl)][!subset] <- NA C nfold[1] <- min(nfold[1], sum(!is.na(cl)))  } 0 if(any(is.na(cl))){dset <- dset[,!is.na(cl)] + cl <- cl[!is.na(cl)]  } + if(length(nfold)==1)nfold <- c(nfold,1)  cl <- factor(cl)  ngp <- length(levels(cl))  genes <- rownames(dset)  nobs <- dim(dset)[2]  if(is.null(genes)){ $ genes <- paste(1:dim(dset)[1]) 9 print("Input rows (features) are not named. Names") E print(paste(1,":", dim(dset)[1], " will be assigned.", sep=""))  rownames(dset) <- genes  }  require(MASS) $ if(!is.null(seed))set.seed(seed)  Fcut <- NULL  maxgenes <- max(nfeatures)  if(selectonce){ . stat <- calc.matrixrows.f(dset=dset, cl) < Fcut <- list(F=sort(stat, decreasing=TRUE)[nfeatures], ) df=c(ngp-1, nobs-ngp)) * ord <- order(-abs(stat))[1:maxgenes]  genes.ord <- genes[ord] 9 selectonce.df <- data.frame(t(dset[ord, , drop=F])) ; acc.resub <- acc.sel1 <- as.numeric(rep(NA,maxgenes))  for(ng in nfeatures){ B resub.xda <- funda(cl~., data=selectonce.df[,1:ng,drop=F]) + hat.rsb <- predict(resub.xda)$class P hat.sel1 <- funda(cl~., data=selectonce.df[,1:ng,drop=F], CV=TRUE)$class % tab.rsb <- table(hat.rsb, cl) & tab.one <- table(hat.sel1, cl) N acc.resub[ng] <- sum(tab.rsb[row(tab.rsb)==col(tab.rsb)])/sum(tab.rsb) M acc.sel1[ng] <- sum(tab.one[row(tab.one)==col(tab.one)])/sum(tab.one)  }  } else {  Fcut <- NULL # acc.resub <- acc.sel1 <- NULL  genes.ord <- NULL  } P if(!cv)return(list(acc.resub=acc.resub, acc.sel1=acc.sel1, genes=genes.ord)) H######################################################################## $ ## Cross-validation calculations 7 if(nfold[1]==nobs)foldid <- sample(1:nfold[1]) else , foldid <- sapply(1:nfold[2], function(x) ? balanced.sample(source.mf, nset=nfold[1])) 8 genelist <- matrix("", nrow=maxgenes, ncol=nfold[1]) ! ufold <- sort(unique(foldid))  testscores <- NULL * acc.cv <- as.numeric(rep(NA,maxgenes))  if(print.progress) 9 cat("\n", "Preliminary per fold calculations","\n")  for(i in ufold){ 1 if(print.progress) cat(paste(i,":",sep="")) % trainset <- (1:nobs)[foldid!=i] ! cli <- factor(cl[trainset]) > stat <- calc.matrixrows.f(dset=dset[, trainset], cl=cli) + ordi <- order(-abs(stat))[1:maxgenes] ! genelist[,i] <- genes[ordi]  } ( ulist <- unique(as.vector(genelist)) . df <- data.frame(t(dset[ulist, , drop=F]))  names(df) <- ulist G####################################################################### O if(print.progress)cat("\n", "Show each choice of number of features:","\n")  for(ng in nfeatures){  hat <- cl 1 if(print.progress)cat(paste(ng,":",sep=""))  for(i in ufold){ & testset <- (1:nobs)[foldid==i] ' trainset <- (1:nobs)[foldid!=i] ntest <- length(testset)  ntrain <- nobs-ntest $ genes.i <- genelist[1:ng, i] , dfi <- df[-testset, genes.i, drop=F] . newdfi <- df[testset, genes.i, drop=F]  cli <- cl[-testset] ( xy.xda <- funda(cli~., data=dfi) 2 subs <- match(colnames(dfi), rownames(df)) I newpred.xda <- predict(xy.xda, newdata=newdfi, method="debiased") ) hat[testset] <- newpred.xda$class  }  tab <- table(hat,cl) 9 acc.cv[ng] <- sum(tab[row(tab)==col(tab)])/sum(tab)  } cat("\n") 4 if(length(nfeatures)>1&all(diff(nfeatures)==1)){  nobs <- length(cl)  ng1 <- length(acc.cv)  maxacc1 <- max(acc.cv) $ sub1 <- match(maxacc1, acc.cv) ' nextacc1 <- max(acc.cv[acc.cv<1]) 8 lower1 <- maxacc1-sqrt(nextacc1*(1-nextacc1)/nobs) * lsub1 <- min((1:ng1)[acc.cv>lower1]) - lower <- c("Best accuracy, less 1SD ", : paste(paste(round(c(lower1),2), c(lsub1), A sep=" ("), " features) ", sep="")) best <- c("Best accuracy", 9 paste(paste(round(c(maxacc1),2), c(sub1), = sep=" ("), " features)", sep="")) " acc.df <- cbind(lower, best) , dimnames(acc.df) <- list(c("Accuracy", @ "(Cross-validation)"),c("","")) print(acc.df, quote=FALSE)  } ? invisible(list(folds=foldid, data=df, cl=cl, acc.cv=acc.cv, : acc.sel1=acc.sel1, acc.resub=acc.resub, H genelist=genelist, genes.ord = genes.ord, Fcut=Fcut))  } GolubB tissue.mfB nfold test f @c@? funda8 print.progress  selectonce  cv % ssAd is.nad% dyd% s?Ss?Ady anyy dydys?ss? ngp  genes rownames nobs dim@| |5 ??F *Input rows (features) are not named. NamesF5? :?  will be assigned.6 }| require MASSdcec Fcut maxgenes w  stat calc.matrixrows.fZ F sort decreasing   df{?~{'g abs ? genes.ord|' selectonce.df,-'9 acc.resub acc.sel1 as.numericn 2 ng   resub.xdau ~ . data ?9 hat.rsb;< class hat.sel1;u ?9 CV  tab.rsb? tab.one?@ABCA@ABCA dx returnZ|s?~ foldidm ?s? sapply ?s@kla source.mfbs? function(x) > balanced.sample(source.mf, nset=nfold[1]) genelist matrix  nrow ncols? ufold testscores acc.cvn v4   !Preliminary per fold calculations  23 v453 :6  trainsetP ?~ !=3 cli ordig ?3| ulist as.vector,-9 namesv4   'Show each choice of number of features:  2   hatv45 :6 23 P ?~3P ?~3 ntest ntrain~ genes.i ?3 dfi9 newdfi9 xy.xdauh) colnames} newpred.xda<= method debiased;>?@A>B>C>A>4   &! ? all diff ? ~H()HJK?MHN@OJP?J~RSP ?!MU Best accuracy, less 1SD 55GM@R6  ( features) 6 V Best accuracy55GH@(6  ( features)6 WXUVYWZ Accuracy (Cross-validation)  FW[ \Z folds cvplot @function(scorelist=source.mf1.scores, ndisc=NULL, plot.disc=1:2, xlab=NULL, ylab=NULL, F params=list(points=list(cex=1, lwd=1.25, pch=1:8, col=1:8), > other=list(cex=0.65, lwd=1.25, pch=1:8, col=1:8), ? circle=list(cex=2, lwd=1, pch=1.75, col="gray40"), K legend.points=list(cex=1, cex.other=0.75, lwd=1, lwd.other=1), J legend.text=list(cex=1, cex.other=0.75, lwd=1, lwd.other=1)), H circle=NULL, cl.circle=NULL, circle.pos=c(1,1), adj.circle=1, G adj.title=0.5, join.legends=T, prefix.title="Golub data - ", 2 cex.title=1.0, ratio=1, plot.folds=F ){  library(MASS)  cl <- scorelist$cl " cl.other <- scorelist$cl.other $ nfeatures <- scorelist$nfeatures  if(length(plot.disc)==2){  n1 <- plot.disc[1]  n2 <- plot.disc[2] A if(is.null(xlab))xlab <- paste("Discriminant function", n1) A if(is.null(ylab))ylab <- paste("Discriminant function", n2) 9 } else stop("plot.disc must be a vector of length 2") & if(!is.factor(cl))cl <- factor(cl)  levnames <- levels(cl) 1 if(is.null(ndisc))ndisc <- length(levnames)-1 ! fitscores <- scorelist$scores # other.scores <- scorelist$other  ngp <- length(levnames) " n1lim <- range(fitscores[,n1]) " n2lim <- range(fitscores[,n2])  if(!is.null(cl.other)){ 1 n1lim <- range(c(n1lim, other.scores[,n1])) 1 n2lim <- range(c(n2lim, other.scores[,n2])) ! levnum <- unclass(cl.other) ( levnames.other <- levels(cl.other) ' intlev.other <- unclass(cl.other) 1 ngp.other <- length(levels(cl.other))  } * n1 <- plot.disc[1]; n2 <- plot.disc[2]  intlev <- unclass(cl)  oldpar <- par(lwd=1)  on.exit(par(oldpar)) $ eqscplot(n1lim, n2lim, type="n", / xlab=xlab, ylab=ylab, ratio=ratio)  with(params$points, < points(fitscores[,n1], fitscores[,n2], col=colr[intlev], . pch=pch[intlev], cex=cex, lwd=lwd))  if(!is.null(cl.other))  with(params$other, 2 points(other.scores[,n1], other.scores[,n2], # pch=pch[intlev.other], # col=col[intlev.other],  cex=cex, lwd=lwd))  if(!is.null(cl.circle)){ , cl.circle <- factor(cl.circle[circle]) % lev.circle <- levels(cl.circle)  wirh(params$circle, B points(fitscores[circle, n1], fitscores[circle,n2], pch=pch, < cex=cex, col=col[unclass(cl.circle)], lwd=lwd))  }  par(xpd=TRUE)  chw <- par()$cxy[1]  chh <- par()$cxy[2]  par(lwd=1.5)  ypos <- par()$usr[4] xmid <- mean(par()$usr[1:2])  top.pos <- 0 7 mtext(side=3, line=(top.pos+1), paste(prefix.title, A nfeatures, "features"), cex=cex.title, adj=adj.title) . ypos.legend <- ypos+(top.pos-0.45)*chh*0.8  ( if(join.legends&!is.null(cl.other)){ K leg.info <- legend(xmid, ypos.legend, xjust=0.5, yjust=0, plot=FALSE, B x.intersp=0.5, ncol=ngp, legend=levnames, 9 pt.lwd=params$legend.points$lwd, 9 pt.cex=params$legend.points$cex, 4 cex=params$legend.text$cex, / pch=params$points$pch) D legother.info <- legend(xmid, ypos.legend, xjust=0.5, yjust=0, 8 plot=FALSE, x.intersp=0.5, D ncol=ngp.other, legend=levnames.other, ? pt.lwd=params$legend.points$lwd.other, ? pt.cex=params$legend.points$cex.other, : cex=params$legend.text$cex.other, . pch=params$other$pch) 1 leftoff <- 0.5*legother.info$rect$w-0.5*chw - rightoff <- 0.5*leg.info$rect$w+0.5*chw  ypos.other <- ypos.legend  } else {  leftoff <- 0  rightoff <- 0 . ypos.other <- ypos+(top.pos-1.5)*chh*0.8  } 9 legend(xmid-leftoff, ypos.legend, xjust=0.5, yjust=0, * bty="n", pch=params$points$pch, - x.intersp=0.5, col=colr, ncol=ngp,  legend=levnames, + pt.lwd=params$legend.points$lwd, + pt.cex=params$legend.points$cex, & cex=params$legend.text$cex)  par(lwd=1)  if(!is.null(cl.other)) H lego.info <- legend(xmid+rightoff, ypos.other, xjust=0.5, yjust=0, > pch=params$other$pch, x.intersp=0.5, ? col=params$other$col, ncol=ngp.other, ? pt.lwd=params$legend.points$lwd.other, ? pt.cex=params$legend.points$cex.other, 0 legend=levnames.other, C cex=params$legend.text$cex.other, bty="n") ' if(!is.null(cl.other)&join.legends) D text(lego.info$rect$left+c(0.4*chw,lego.info$rect$w-0.25*chw), 8 rep(ypos.other,2)+0.8*chh, labels=c("(",")"), ' cex=params$legend$cex.other, 0 lwd=params$legend$lwd.other, bty="n")  par(lwd=params$circle$lwd) 1 if(!is.null(cl.circle))if(lev.circle[1]!=""){ % pch.circle <- params$circle$pch ( xy <- par()$usr[circle.pos+c(1,3)]  legend(xy[1], xy[2], N xjust=adj.circle[1], yjust=circle.pos[2], bty="n", x.intersp=0.5, K pch=rep(pch.circle,length(lev.circle)), col=params$circle$col, = ncol=1, legend=lev.circle, cex=0.85, pt.cex=1.5)  }  par(lwd=1, xpd=F)  if(plot.folds){ B mtext(side=1, line=1.25, "Discriminant function 1", outer=T) B mtext(side=2, line=1.25, "Discriminant function 2", outer=T)  }  } scorelist source.mf1.scores ndisc plot.disc ?@ xlab ylab$ paramsZ pointsZ cex? lwd? pch ?@ C ?@  otherZ?? ?@ C ?@  circleZ@??C gray40 legend.pointsZ? cex.other?? lwd.other? legend.textZ???? cl.circle circle.pos?? adj.circle? adj.title?$ join.legends T$ prefix.title Golub data -  cex.title? ratio? plot.folds  library; cl cl.other; cl.other ; nfeatures@  n1? n2@5 Discriminant function5 Discriminant function stop &plot.disc must be a vector of length 2d is.factor levnames ? fitscores; scores other.scores; other{ n1lim range n2limd  levnum unclass levnames.other  intlev.other ngp.other ?@ intlev oldpar par? on.exit eqscplot type n with; pointsC colrd; otherCCd  lev.circle  wirh;CC xpd  chw; cxy? chh; cxy@? ypos; usr@ xmid mean; usr ?@ top.pos mtext side@ lineP +?5  features adj ypos.legendOOP??陙d  leg.info legend xjust? yjust plot  x.intersp?{ pt.lwd;; legend.points lwd pt.cex;; legend.points cex;; legend.text cex;; points pch legother.info? ?;; legend.points lwd.other;; legend.points cex.other;; legend.text cex.other;; other pch leftoffO?;; rect wO? rightoffO?;; rect wO? ypos.other   OOP??陙? bty n;; points pch?C{;; legend.points lwd;; legend.points cex;; legend.text cex?d lego.info  ?;; other pch?C;; other col;; legend.points lwd.other;; legend.points cex.other;; legend.text cex.other  nd text;;  rect leftO?ٙ;;  rect wO?n @O?陙 labels ( );; legend cex.other;; legend lwd.other  n;; circle lwdd?   pch.circle;; xy; usr?@?@?@  n?nC;;C??333333?? ?? Discriminant function 1 outer@? Discriminant function 2 cvscores[ 7function(cvlist=source.mf1.cv, nfeatures=3, ndisc=NULL, 9 cl.other=factor("PB:f"), dset.other=Golub.PBf,  keepcols=NULL ){  library(MASS)  folds <- cvlist$folds > ugenes <- unique(as.vector(cvlist$genelist[1:nfeatures,]))  df <- cvlist$data[, ugenes]  cl <- cvlist$cl  if(!length(cl)==dim(df)[1]) = stop(paste("length(cl) =", length(cl),"does not equal", 4 "dim(cvlist$df)[1] =", dim(df)[1]))  levnames <- levels(cl) 1 if(is.null(ndisc))ndisc <- length(levnames)-1  ngp <- length(levnames)  nobs <- dim(df)[1] ufold <- sort(unique(folds))  nfold <- length(ufold) 7 allscores <- matrix(0, nrow=nobs, ncol=ndisc*nfold)  if(!is.null(cl.other)){ " cl.other <- factor(cl.other) J if(is.null(dim(dset.other)))stop("dset.other must have dimension 2") / if(!length(cl.other)==dim(dset.other)[2]) K stop(paste("length(cl.other) =", length(cl.other),"does not equal", ? "dim(dset.other)[2] =", dim(dset.other)[2])) @ df.other <- data.frame(t(dset.other[ugenes, ,drop=FALSE])) " colnames(df.other) <- ugenes  }  else other.scores <- NULL j <- 0  for(i in ufold){  j <- j+1 / cat(paste(if(j>1) ":" else "", i,sep="")) ! testi <- (1:nobs)[folds==i] " traini <- (1:nobs)[folds!=i]  ntest <- length(testi)  ntrain <- nobs-ntest 0 genes.i <- cvlist$genelist[1:nfeatures, i] 7 dfi <- as.data.frame(df[-testi, genes.i, drop=F]) 9 newdfi <- as.data.frame(df[testi, genes.i, drop=F])  cli <- cl[-testi] $ xy.xda <- lda(cli~., data=dfi) - allscores[, ((i-1)*ndisc)+(1:ndisc)] <- 2 predict(xy.xda, newdata=df, dimen=ndisc)$x  } cat("\n") F if(is.null(keepcols))keepcols <- min(nfeatures, dim(allscores)[2]) P allscores.pcp <- data.frame(pcp(allscores, varscores=FALSE)$g[, 1:keepcols]) C globals <- predict(lda(cl ~ ., data=allscores.pcp))$x[,1:ndisc] ! ntimes <- table(folds)[folds] 8 ntimes.genes <- table(cvlist$genelist[1:nfeatures,])  av <- colMeans(df) 1 fitscores <- matrix(0, nrow=nobs, ncol=ndisc) j <- 0  for(i in ufold){  j <- j+1 0 cat(paste(if (j>1) ":" else "", i,sep="")) ! testi <- (1:nobs)[folds==i] " traini <- (1:nobs)[folds!=i] 0 genes.i <- cvlist$genelist[1:nfeatures, i] 4 dfi <- data.frame(df[-testi, genes.i, drop=F]) 6 newdfi <- data.frame(df[testi, genes.i, drop=F])  cli <- cl[-testi] ( traini.xda <- lda(cli~., data=dfi) / scorei <- predict(traini.xda)$x[,1:ndisc] 8 newpred.xda <- predict(traini.xda, newdata=newdfi) 8 scorei.out <- newpred.xda$x[, 1:ndisc, drop=FALSE] , scorei.all <- globals[-testi, 1:ndisc] # avcol <- colMeans(scorei.all) 3 scorei.all <- sweep(scorei.all, 2, avcol,"-")  avi <- colMeans(scorei) ) scorei <- sweep(scorei, 2, avi,"-") + trans <- qr.solve(scorei, scorei.all) 2 scorei.out <- sweep(scorei.out, 2, avi, "-") C fitscores[testi,] <- sweep(scorei.out%*%trans, 2, avcol, "+")  }  if(!is.null(cl.other)){ " xy.xda <- lda(cl~., data=df) 4 train.scores <- predict(xy.xda, dimen=ndisc)$x 7 other.scores <- predict(xy.xda, newdata=df.other, , dimen=ndisc)$x avcol <- colMeans(globals) 0 all.scores <- sweep(globals, 2, avcol,"-") ( av.train <- colMeans(train.scores) ; train.scores <- sweep(train.scores, 2, av.train, "-") 1 trans <- qr.solve(train.scores, all.scores) @ other.scores <- sweep(other.scores%*%trans, 2, avcol, "+")  } ? invisible(list(scores=fitscores, cl=cl, other=other.scores, ; cl.other=cl.other, nfeatures=nfeatures))  }$ cvlist source.mf1.cv$ @$$ PB:f dset.other Golub.PBf keepcols ; folds ugenes; genelist ? ; data; cld?5 length(cl) = does not equal dim(cvlist$df)[1] =? ?{~?s allscores~Osd  dset.other must have dimension 2d@5 length(cl.other) = does not equal dim(dset.other)[2] =@ df.other,-9  j23 ?45!? : 36  testiP ?~3 trainiP ?~3~; genelist ? 3 as.data.frame998POP3?P ?;<= dimen x4  S @ allscores.pcp,; pcp varscores  g ? globals;<8! x ? ntimes? ntimes.genes?; genelist ?  av colMeans~23 ?45!? : 36 P ?~3P ?~3; genelist ? 3,9,9 traini.xda8 scorei;<) x ?<)= scorei.out; x ?9  scorei.all$ ? avcol(,, sweep,@- - avi(**.*@/ - trans qr.solve*,+.+@/ -. %*%+0@- +d 8 train.scores;< l;<= l-($ all.scores.$@- - av.train(33.3@5 -0134.20@- +\Z scores $ 8function(dset, cl, subset=NULL, test="f", values=FALSE){ D if(dim(dset)[2]!=length(cl))stop(paste("Dimension 2 of dset is", C dim(dset)[2], "differs from the length of cl (=",  length(cl))) ; ## Ensure that cl is a factor & has no redundant levels  if(is.null(subset))  cl <- factor(cl)  else  cl <- factor(cl[subset])  intlev <- unclass(cl) - 1  if(is.null(subset)) ) stat <- calc.matrixrows.f(dset, cl)  else 3 stat <- calc.matrixrows.f(dset[, subset], cl)  ord <- order(-abs(stat)) 6 if(!values)ord else(list(ord=ord, stat=stat[ord]))  }$%t f values  @5 Dimension 2 of dset is@ differs from the length of cl (=%%?%%'gd7'PZ'''". function(x=USArrests,  varscores=TRUE,  cases="rows",  center="vars",  standardize=FALSE,  scale.cases=1,  log=FALSE,  sc=1,  reflect=c(1,1), ...) {  x <- as.matrix(x) avv <- 0 sdv <- 1 ( casedim <- 2-as.logical(cases=="rows")  vardim <- 3-casedim 5 ## casedim=1 if rows are cases; otherwise casedim=2 9 ## scale.cases=0 gives a pure rotation of the variables 2 ## scale.cases=1 weights a/c the singular values  ncases <- dim(x)[casedim]  nvar <- dim(x)[vardim] ( if(is.null(sc))sc <- dim(x)[casedim]-1  if(log)x <- log(x, base=2)  if(standardize){ ! avv <- apply(x, vardim, mean) ' sdv <- apply(x, vardim, sd) " x <- sweep(x, vardim, avv,"-") " x <- sweep(x, vardim, sdv,"/")  } 8 else if(as.logical(match("vars", center, nomatch=0))){ avv <- apply(x,vardim, mean) # x <- sweep(x, vardim, avv,"-")} ) svdx <- La.svd(x, method = c("dgesdd")) h <- NULL  if(cases=="rows"){ ; g <- sweep(svdx$u, 2, svdx$d^scale.cases, "*")*sqrt(sc)  if(varscores) 9 h <- t((svdx$d^(1-scale.cases)* svdx$vt ))/sqrt(sc)  }  else if(cases=="columns"){ ? g <- sweep(t(svdx$vt), 2, svdx$d^scale.cases, "*")*sqrt(sc)  if(varscores) @ h <- sweep(svdx$u, 2, svdx$d^(1-scale.cases),"*")/sqrt(sc)  } F invisible(list(g=g, rotation=h, av=avv, sdev=svdx$d/sqrt(ncases-1))) }l USArrests$#  cases rows center vars standardize  scale.cases? log  sc? reflect?? ... l as.matrixl avv sdv? casedim@ as.logical9 rows vardim@D ncaseslD nvarlF>>lD?=l=l base@; B applylFCJlF sdl.lFB -l.lFC /E) vars: nomatch BJlFl.lFB - svdx La.svdl dgesdd h9 rows  gO.;M u@ ^;M d< *N>#O@-POQ;M dP?<;M vtN>9 columns PO.-;M vt@Q;M d< *N>#O@.;M u@Q;MSP?< *N>\ZPP rotationO'B sdev@;M dNG? plot.train.test Dfunction(dset=Golub, nfeatures=c(11,11), cl=cancer, traintest=gp.id, @ titles=c("A: I/II (train with I, scores are for II)", ; "B: II/I (train with II, scores are for I)")){ ( oldpar <- par(mfrow=c(1,2), pty="s")  on.exit(par(oldpar)) 9 if(length(nfeatures)==1)nfeatures <- rep(nfeatures,2) " traintest <- factor(traintest) , train <- traintest==levels(traintest)[1] . testset <- traintest==levels(traintest)[2]  cl1 <- cl[train]  cl2 <- cl[testset]  nf1 <- nfeatures[1] 2 ord1 <- order.features(dset, cl, subset=train) 2 df1 <- data.frame(t(dset[ord1[1:nf1], train])) 4 df2 <- data.frame(t(dset[ord1[1:nf1], testset])) df1.lda <- lda(df1, cl1) - scores <- predict(df1.lda, newdata=df2)$x M scoreplot(scores, cl2, scores.other=NULL, cl.other=NULL, title=titles[1])  nf2 <- nfeatures[2] 4 ord2 <- order.features(dset, cl, subset=testset) 4 df2 <- data.frame(t(dset[ord2[1:nf2], testset])) 2 df1 <- data.frame(t(dset[ord2[1:nf2], train])) df2.lda <- lda(df2, cl2) - scores <- predict(df2.lda, newdata=df1)$x < scoreplot(scores, cl1, scores.other=NULL, cl.other=NULL, ' ylab="", title=titles[2])  }$ Golub$ @&@&$ cancer$ titles )A: I/II (train with I, scores are for II) )B: II/I (train with II, scores are for I)  mfrow?@ pty s ? n @ ? @ nf1 ?#$%+,-# ?].,-# ?]78+6;<7=. x scoreplot6 scores.other titleZ? nf2 @&$%.,-& ?a+,-& ?aD8.6;<D=+ x^6_ `Z@ qqthin/ :function(x, y, ends=c(.01,.99), eps=0.001, plot.it = TRUE, 8 xlab = deparse(substitute(x)), adj.xlab=NULL, 9 ylab = deparse(substitute(y)), show.line=TRUE, ! centerline=TRUE, ...){ 8 ## qqthin() is a substitute for qqplot(), that thins 8 ## out plotted points from the region where they are 7 ## dense. Apart from the overlaid curve that shows 8 ## the region where points have been thinned, it may 4 ## be hard to distinguish the result of qqthin()  ## from that of qqplot()  xlab <- xlab  ylab <- ylab  x <- sort(x)  y <- sort(y)  dx<-diff(x) : epsdist <- sqrt(diff(range(x))^2+diff(range(y))^2)*eps . dx<-0.5*(c(dx[1],dx)+c(dx,dx[length(dx)]))  dy<-diff(y) . dy<-0.5*(c(dy[1],dy)+c(dy,dy[length(dy)])) & dpoints <- epsdist/sqrt(dx^2+dy^2) 9 ## dpoints is a local measure of the number of points : ## per unit distance along the diagonal, with the unit 4 ## set to approximately eps*(length of diagonal)  dig<-floor(dpoints)+1 ? ## dig is, roughly, the number of points per unit distance. ? ## We wish to retain one point per unit distance. For this > ## retain points where cdig rounds to an integer. For such > ## points, cdig has increased by approx 1, relative to the ' ## previous point that is retained.  cdig<-round(cumsum(1/dig)) # subs<-match(unique(cdig), cdig)  if(is.null(adj.xlab)) 0 plot(x[subs], y[subs], xlab=xlab, ylab=ylab) else { 0 plot(x[subs], y[subs], xlab="", ylab=ylab) : mtext(side=1, xlab, adj=adj.xlab, line=par()$mgp[1])  } & n1 <- min(subs[c(diff(subs),0)>1]) & n2 <- max(subs[c(0,diff(subs))>1])  ns1 <- match(n1, subs)  ns2 <- match(n2, subs) : print(paste("Graph retains", length(subs), "points."))  if(centerline) > lines(smooth.spline(x[subs[ns1:ns2]], y[subs[ns1:ns2]]),  col="grey", lwd=2) ( if(show.line)abline(0, 1, col="red")  }l y ends?zG{?zG eps?PbM plot.it $ deparse substitutel$ adj.xlab$ghc show.line  centerline @ llcc dxl epsdistONQl@Qc@elO?Pl?llll dycnO?Pn?nnnn dpoints@mNQl@Qn@ dig flooro? cdigG cumsum@?ph)rrilhch lhch ?i; mgp?Sh!h?h!h? ns1)h ns2)hF5 Graph retainsh points.k lines smooth.splinelh tuch tuC grey@j abline?C red^] ;function(scores, cl, scores.other, cl.other, plot.disc=1:2, G xlab=NULL, ylab=NULL, plotch=1:3, colr= palette()[c(2,4,6)], I plotch.other=4, colr.other="gray40", cex.other=1, lwd.other=1, H circle=NULL, cl.circle=NULL, circle.pos=c(1,1), adj.circle=1, ? circle.colr="gray40", lwd.circle=2, join.legends=T, ? cex.legend=0.85, ratio=1, title=NULL, cex.title=1.0) {  library(MASS)  oldpar <- par(lwd=1)  on.exit(par(oldpar))  if(length(plot.disc)==2){  n1 <- plot.disc[1]  n2 <- plot.disc[2] A if(is.null(xlab))xlab <- paste("Discriminant function", n1) A if(is.null(ylab))ylab <- paste("Discriminant function", n2) 9 } else stop("plot.disc must be a vector of length 2") & if(!is.factor(cl))cl <- factor(cl)  levnames <- levels(cl)  fitscores <- scores  ngp <- length(levnames) " n1lim <- range(fitscores[,n1]) " n2lim <- range(fitscores[,n2])  if(!is.null(cl.other)){ " cl.other <- factor(cl.other) 1 n1lim <- range(c(n1lim, scores.other[,n1])) 1 n2lim <- range(c(n2lim, scores.other[,n2])) ' intlev.other <- unclass(cl.other) ( levnames.other <- levels(cl.other) 1 ngp.other <- length(levels(cl.other))  }  intlev <- unclass(cl) : eqscplot(n1lim, n2lim, type="n", xlab=xlab, ylab=ylab) < points(fitscores[,n1], fitscores[,n2], col=colr[intlev],  pch=plotch[intlev])  if(!is.null(cl.other)) 2 points(scores.other[,n1], scores.other[,n2], , pch=plotch.other[intlev.other], * col=colr.other[intlev.other],  cex=cex.other)  if(!is.null(cl.circle)){ , cl.circle <- factor(cl.circle[circle]) % lev.circle <- levels(cl.circle) ? points(fitscores[circle,n1], fitscores[circle,n2], pch=1, K cex=1.75, col=circle.colr[unclass(cl.circle)], lwd=lwd.circle)  }  par(xpd=TRUE)  chw <- par()$cxy[1]  chh <- par()$cxy[2]  par(lwd=1.5)  ypos <- par()$usr[4] xmid <- mean(par()$usr[1:2])  top.pos <- 0 . ypos.legend <- ypos+(top.pos-0.45)*chh*0.8 4 line.title <- (ypos.legend-par()$usr[4])/chh+1.5 5 if(!is.null(title))mtext(side=3, line=line.title, > adj=0, text=title, cex=cex.title) ( if(join.legends&!is.null(cl.other)){ K leg.info <- legend(xmid, ypos.legend, xjust=0.5, yjust=0, plot=FALSE, L x.intersp=0.5, ncol=ngp, legend=levnames, cex=0.85) D legother.info <- legend(xmid, ypos.legend, xjust=0.5, yjust=0, J plot=FALSE, pch=plotch.other, x.intersp=0.5, N ncol=ngp.other, legend=levnames.other, cex=0.85) 1 leftoff <- 0.5*legother.info$rect$w-0.5*chw - rightoff <- 0.5*leg.info$rect$w+0.5*chw  ypos.other <- ypos.legend  } else {  leftoff <- 0  rightoff <- 0 . ypos.other <- ypos+(top.pos-1.5)*chh*0.8  } 9 legend(xmid-leftoff, ypos.legend, xjust=0.5, yjust=0,  bty="n", pch=plotch, - x.intersp=0.5, col=colr, ncol=ngp, + legend=levnames, cex=cex.legend)  if(!is.null(cl.other)) F lego.info <- legend(xmid+rightoff, ypos.other, xjust=0.5, yjust=0, H pch=plotch.other, x.intersp=0.5, col=colr.other, ' ncol=ngp.other, A legend=levnames.other, cex=0.85, bty="n") ' if(!is.null(cl.other)&join.legends) D text(lego.info$rect$left+c(0.4*chw,lego.info$rect$w-0.25*chw), 8 rep(ypos.other,2)+0.8*chh, labels=c("(",")"))  par(lwd=lwd.circle) 1 if(!is.null(cl.circle))if(lev.circle[1]!=""){ ( xy <- par()$usr[circle.pos+c(1,3)]  legend(xy[1], xy[2], N xjust=adj.circle[1], yjust=circle.pos[2], bty="n", x.intersp=0.5, < pch=rep(1,length(lev.circle)), col=circle.colr, C ncol=1, legend=lev.circle, cex=cex.legend, pt.cex=1.5)  }  par(lwd=1, xpd=F)  }6$_$ ?@$ plotch ?@ palette@@@ plotch.other@ colr.other gray40????? circle.colr gray40 lwd.circle@ cex.legend?333333?$`? ?@ ?@5 Discriminant function5 Discriminant function &plot.disc must be a vector of length 2d 6{d __   nCyd__{C|d  ??C}~ ; cxy?; cxy@?; usr@; usr ?@OOP??陙 line.title@P; usr@?d`@ `d ? ?{?333333? {??333333O?;; rect wO? O?;; rect wO?    OOP??陙?  ny?C{d   ?{?C|?333333  nd ;;  rect leftO?ٙ;;  rect wO?n @O?陙 ( )~d?  ;?@?@?@  n?n?C}??? simulate.scores =function(nfeatures=7129, cl=rep(1:3, c(19,10,2)), dset=NULL, > cl.other=4, dset.other=NULL, nf.use=15, seed=NULL){ $ if(!is.null(seed))set.seed(seed)  m <- length(cl)  m.other <- length(cl.other)  if(is.null(dset)){ 8 dset <- matrix(rnorm(nfeatures*m), nrow=nfeatures) * rownames(dset) <- paste(1:nfeatures)  } " else nfeatures <- dim(dset)[1]  if(is.null(dset.other)){ D dset.other <- matrix(rnorm(nfeatures*m.other), nrow=nfeatures) 0 rownames(dset.other) <- paste(1:nfeatures)  } 3 if(is.numeric(cl))cl <- paste("Gp", cl, sep="") E if(is.numeric(cl.other))cl.other <- paste("Gp", cl.other, sep="")  cl <- factor(cl) cl.other <- factor(cl.other) ; xx.random <- matrix(rnorm(nfeatures*m), nrow=nfeatures) = ordfeatures <- order.features(xx.random, cl=cl, values=T) & stat <- ordfeatures$stat[1:nf.use] ( ord.use <- ordfeatures$ord[1:nf.use] / dfUse.ord <- data.frame(t(dset[ord.use, ])) : dfUseOther.ord <- data.frame(t(dset.other[ord.use, ])) . ordUse.lda <- lda(dfUse.ord, grouping=cl) , scores <- predict(ordUse.lda, dimen=2)$x J scores.other <- predict(ordUse.lda, newdata=dfUseOther.ord, dimen=2)$x D invisible(list(scores=scores, cl=cl, scores.other=scores.other, & cl.other=cl.other))  }$ @$n ?@@3@$@$@$ nf.use@.$c dcec m m.other  rnormO  }5 ?  ? O  }5 ?  is.numeric5 Gp6 5 Gp6  xx.randomO   ordfeatures$7; stat ? ord.use; ord ? dfUse.ord,- dfUseOther.ord,- ordUse.lda8 grouping6;< @ x_;<= @ x\Z66__ +function(dset=Golub, cl=golub.info$cancer){  y <- t(dset) # qr.obj <- qr(model.matrix(~cl))  qty.obj <- qr.qty(qr.obj,y)  tab <- table(factor(cl))  dfb <- length(tab)-1  dfw <- sum(tab)-dfb-1 C ms.between <- apply(qty.obj[2:(dfb+1), , drop=F]^2, 2, sum)/dfb E ms.within <- apply(qty.obj[-(1:(dfb+1)), , drop=F]^2, 2, sum)/dfw ! Fstat <- ms.between/ms.within  }X; golub.infoY c- qr.obj qr model.matrix qty.obj qr.qtyc>? dfb>? dfwA>? ms.between@JQ @P?9@@A ms.within@JQP ?P?9@@A Fstat@