Bioconductor Code: MiPP

Browse code

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MiPP@22433 bc3139a8-67e5-0310-9ffc-ced21a209358

Sukwoo Kim authored on 05/02/2007 17:22:42
Showing 6 changed files

DESCRIPTION index 155c52a..8f01b23 100644
R/MiPP.R index 2834592..cbdd2e0 100644
R/MiPP.seq.R index 3028a3e..46d5463 100644
R/lda.R index d97c2d1..c0e5242 100644
R/svmlin.R index 3e556db..92b14a0 100644
R/svmrbf.R index 159d6c1..6421237 100644

History View file @ c716b94

@@ -1,14 +1,14 @@
                      Package: MiPP
                     -Version: 1.5.0
                     -Date: 2007-01-01
                     +Version: 1.6.0
                     +Date: 2007-01-31
                      Title: Misclassification Penalized Posterior Classification
                      Author: HyungJun Cho <[email protected]>,
                              Sukwoo Kim <[email protected]>,
                              Mat Soukup <[email protected]>, and
                              Jae K. Lee <[email protected]>
                      Maintainer: Sukwoo Kim <[email protected]>
                     -Depends: R (>= 2.4), Biobase, e1071
                     -Description: This package finds optimal sets of genes that seperate samples into multiple classes.
                     +Depends: R (>= 2.4), Biobase, e1071,MASS
                     +Description: This package finds optimal sets of genes that seperate samples into two or more classes.
                      License: GPL version 2 or newer
                      URL:http://www.healthsystem.virginia.edu/internet/hes/biostat/bioinformatics/
                     -biocViews: Microarray, DifferentialExpression
                     +biocViews: Microarray, Classification

R/MiPP.R

History View file @ c716b94

@@ -69,8 +69,8 @@ mipp <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
+                             }
                              if(length(ii) < 2) stop("There are too small number of candidate genes.")
                     -        x.tr <- x[,ii]; y.tr <- y
                     -        x.te <- x.test[,ii]; y.te <- y.test
                     +        x.tr <- x[,ii,drop=FALSE]; y.tr <- y
                     +        x.te <- x.test[,ii,drop=FALSE]; y.te <- y.test
                              out <- mipp.rule(x.train=x.tr, y.train=y.tr, x.test=x.te, y.test=y.te,
                                               nfold=nfold, min.sMiPP=min.sMiPP, n.drops=n.drops, rule=rule)
                              out[,2] <- probe.ID[ii[out[,2]]]
@@ -113,7 +113,7 @@ mipp <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
+                             }
                              if(length(ii) < 2) stop("There are too small number of candidate genes.")
                     -        x.tr <- x[,ii]
                     +        x.tr <- x[,ii,drop=FALSE]
                              y.tr <- y
                              out <- cv.mipp.rule(x=x.tr, y=y.tr, nfold=nfold, p.test=p.test, n.split=n.split, n.split.eval=n.split.eval,
                                                     model.sMiPP.margin=model.sMiPP.margin, min.sMiPP=min.sMiPP, n.drops=n.drops, rule=rule)
@@ -123,7 +123,7 @@ mipp <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
                              for(i in 1:n.split) {
                                  k <- ncol(out$CVCV.out)-9 ###note
                                  k <- max(which(!is.na(out$CVCV.out[i,1:k])))
                     -            kk <- as.numeric(out$CVCV.out[i,2:k])
                     +            kk <- as.numeric(out$CVCV.out[i, 2:k, drop=FALSE])
                                  out$CVCV.out[i,2:k] <- probe.ID[ii[kk]]
+                             }
@@ -185,8 +185,8 @@ cv.mipp.rule <- function(x, y, nfold, p.test, n.split, n.split.eval,
                              y.train <- y[-i.test]
                              y.test  <- y[ i.test]
                     -        x.train <- x[-i.test,]
                     -        x.test  <- x[ i.test,]
                     +        x.train <- x[-i.test,,drop=FALSE]
                     +        x.test  <- x[ i.test,,drop=FALSE]
                              if(is.data.frame(x.train)==FALSE) x.train <- data.frame(x.train)
                              if(is.data.frame(x.test)==FALSE) x.test <- data.frame(x.test)
@@ -205,8 +205,8 @@ cv.mipp.rule <- function(x, y, nfold, p.test, n.split, n.split.eval,
                           tmp <- apply(gene.list, 2, is.na)
                           i <- which(apply(tmp, 2, sum) >= n.split)
                     -     gene.list <- gene.list[,-i] #fixed on 01/17/2007
                     -     CV.out <- CV.out[-c(1:n.split),]
                     +     gene.list <- gene.list[,-i,drop=FALSE] #fixed on 01/17/2007
                     +     CV.out <- CV.out[-c(1:n.split),,drop=FALSE]
                           ###################################
@@ -229,17 +229,17 @@ cv.mipp.rule <- function(x, y, nfold, p.test, n.split, n.split.eval,
                              y.train <- y[-i.test]
                              y.test  <- y[ i.test]
                     -        x.train <- x[-i.test,]
                     -        x.test  <- x[ i.test,]
                     +        x.train <- x[-i.test,,drop=FALSE]
                     +        x.test  <- x[ i.test,,drop=FALSE]
                              if(is.data.frame(x.train)==FALSE) x.train <- data.frame(x.train)
                              if(is.data.frame(x.test)==FALSE) x.test <- data.frame(x.test)
                              for(jj in 1:n.split) { #Split
                     -            k <- max(which(!is.na(gene.list[jj,])==TRUE))
                     -            kk <- as.numeric(gene.list[jj,1:k])
                     -            xx.train <- x.train[,kk]
                     -            xx.test  <- x.test[,kk]
                     +            k <- max(which(!is.na(gene.list[jj,,drop=FALSE])==TRUE))
                     +            kk <- as.numeric(gene.list[jj,1:k,drop=FALSE])
                     +            xx.train <- x.train[,kk,drop=FALSE]
                     +            xx.test  <- x.test[,kk,drop=FALSE]
                                  if(is.data.frame(xx.train)==FALSE) xx.train <- data.frame(xx.train)
                                  if(is.data.frame(xx.test)==FALSE) xx.test <- data.frame(xx.test)
@@ -300,8 +300,8 @@ mipp.rule <- function(x.train, y.train, x.test=NULL, y.test=NULL, nfold=5, min.s
                              y.tr <- y.train[id!=i]
                              y.te <- y.train[id==i]
                              for(j in 1:n.gene) {
                     -             x.tr <- data.frame(x.train[id!=i,j])
                     -             x.te <- data.frame(x.train[id==i,j])
                     +             x.tr <- data.frame(x.train[id!=i,j,drop=FALSE])
                     +             x.te <- data.frame(x.train[id==i,j,drop=FALSE])
                                   out[i,j] <- get.mipp(x.tr, y.tr, x.te, y.te, rule=rule)$MiPP
+                             }
+                          }
@@ -311,13 +311,13 @@ mipp.rule <- function(x.train, y.train, x.test=NULL, y.test=NULL, nfold=5, min.s
                           pick.gene <- as.numeric(colnames(x.train)[pick.gene])
                           opt.genes <- c(opt.genes, pick.gene)
                     -     x.train.cand <- x.train[,-opt.genes]
                     -     x.train.opt  <- data.frame(x.train[,opt.genes])
                     +     x.train.cand <- x.train[,-opt.genes,drop=FALSE]
                     +     x.train.opt  <- data.frame(x.train[,opt.genes,drop=FALSE])
                           colnames(x.train.opt) <- opt.genes
                           #Evaluate by test set
                     -     xx.train <- data.frame(x.train[,opt.genes])
                     -     xx.test  <- data.frame(x.test[,opt.genes])
                     +     xx.train <- data.frame(x.train[,opt.genes,drop=FALSE])
                     +     xx.test  <- data.frame(x.test[,opt.genes,drop=FALSE])
                           tmp <- get.mipp(xx.train, y.train, xx.test, y.test, rule=rule)
                           opt.Er    <-c(opt.Er, tmp$ErrorRate)
                           opt.MiPP  <-c(opt.MiPP, tmp$MiPP)
@@ -341,8 +341,8 @@ mipp.rule <- function(x.train, y.train, x.test=NULL, y.test=NULL, nfold=5, min.s
                                  y.tr <- y.train[id!=i]
                                  y.te <- y.train[id==i]
                                  for(j in 1:n.gene.cand) {
                     -                x.tr <- data.frame(x.train.opt[id!=i,], x.train.cand[id!=i,j])
                     -                x.te <- data.frame(x.train.opt[id==i,], x.train.cand[id==i,j])
                     +                x.tr <- data.frame(x.train.opt[id!=i,,drop=FALSE], x.train.cand[id!=i,j,drop=FALSE])
                     +                x.te <- data.frame(x.train.opt[id==i,,drop=FALSE], x.train.cand[id==i,j,drop=FALSE])
                                      out[i,j] <- get.mipp(x.tr,y.tr, x.te, y.te, rule=rule)$MiPP
+                                 }
+                             }
@@ -351,13 +351,13 @@ mipp.rule <- function(x.train, y.train, x.test=NULL, y.test=NULL, nfold=5, min.s
                              pick.gene <- min(which(out.sum >= max(out.sum)))
                              pick.gene <- as.numeric(colnames(x.train.cand)[pick.gene])
                              opt.genes <- c(opt.genes, pick.gene)
                     -        x.train.opt  <- x.train[, opt.genes]
                     -        x.train.cand <- x.train[,-opt.genes]
                     +        x.train.opt  <- x.train[, opt.genes,drop=FALSE]
                     +        x.train.cand <- x.train[,-opt.genes,drop=FALSE]
                              #Evaluate by test set
                     -        xx.train <- x.train[,opt.genes]
                     -        xx.test  <- x.test[,opt.genes]
                     +        xx.train <- x.train[,opt.genes,drop=FALSE]
                     +        xx.test  <- x.test[,opt.genes,drop=FALSE]
                              tmp <- get.mipp(xx.train, y.train, xx.test,  y.test, rule=rule)
                              opt.Er    <-c(opt.Er, tmp$ErrorRate)
                              opt.MiPP  <-c(opt.MiPP, tmp$MiPP)

R/MiPP.seq.R

History View file @ c716b94

@@ -45,8 +45,8 @@ mipp.seq <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
                                 nc <- ifelse(remove.gene.each.model=="first", 1, k)
                                 best.genes <- sort(unique(c(best.genes, out$model$Gene[1:nc])))
                                 if(length(best.genes) < nrow(x)) {
                     -               x.sub <- x[-best.genes,]
                     -               x.test.sub <- x.test[-best.genes,]
                     +               x.sub <- x[-best.genes,,drop=FALSE]
                     +               x.test.sub <- x.test[-best.genes,,drop=FALSE]
                                     p.ID.sub <- p.ID[-best.genes]
+                                }
@@ -59,7 +59,7 @@ mipp.seq <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
+                            }
                             ###GENE ID
                     -       out2 <- out2[-1,]
                     +       out2 <- out2[-1,,drop=FALSE]
                             out2$Gene <- probe.ID[out2$Gene]
                             out2 <- cbind(Seq, out2)
                             rownames(out2) <- 1:nrow(out2)
@@ -108,9 +108,9 @@ mipp.seq <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
                                 nc <- ifelse(remove.gene.each.model=="first", 2, (n.sample+1))
                                 k <- which(CVCV.out2[,(1+n.sample+7)] >= cutoff.sMiPP)
                                 if(length(k) > 0) {
                     -              best.genes <- sort(unique(as.numeric(na.omit(as.vector(as.matrix(CVCV.out2[k,2:nc]))))))
                     +              best.genes <- sort(unique(as.numeric(na.omit(as.vector(as.matrix(CVCV.out2[k,2:nc,drop=FALSE]))))))
                                    if(length(best.genes) < nrow(x)) {
                     -                 x.sub <- x[-best.genes,]
                     +                 x.sub <- x[-best.genes,,drop=FALSE]
                                       p.ID.sub <- p.ID[-best.genes]
+                                   }
+                                }
@@ -126,7 +126,7 @@ mipp.seq <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda",
                             ###GENE ID
                             CV.out2$Gene <- probe.ID[CV.out2$Gene]
                     -       kk <- as.numeric(as.vector(as.matrix(CVCV.out2[,2:(n.sample+1)])))
                     +       kk <- as.numeric(as.vector(as.matrix(CVCV.out2[,2:(n.sample+1),drop=FALSE])))
                             CVCV.out2[,2:(n.sample+1)] <- probe.ID[kk]
                             #Remove missing columns and add seq

R/lda.R

History View file @ c716b94

@@ -23,7 +23,7 @@ get.mipp.lda <- function(x.train, y.train, x.test, y.test){
                           post.prob <-0
                           for(j in 1:n.class) {
                               i <- which(True.class == u.class[j])
                     -         post.prob <- post.prob + sum(out$post[i,j])
                     +         post.prob <- post.prob + sum(out$post[i,j,drop=FALSE])
+                          }
                           N <- length(True.class)

R/svmlin.R

History View file @ c716b94

@@ -12,7 +12,7 @@ get.mipp.svm.linear <- function(x.train, y.train, x.test, y.test){
                      	fofx <- numeric(length(y.test))
                      	for(i in 1:length(y.test)){
                     -		xin <- x.test[i,]
                     +		xin <- x.test[i,,drop=FALSE]
                      		fofx[i] <- linearkernel.decision.function(xin, x.train, fit)
+                     	}
@@ -41,10 +41,10 @@ linearkernel.decision.function <-function(newx, oldx, svmobj) {
                          # Extract b:
                          	svconstant <- -1*svmobj$rho
                          # Get the support vectors
                     -    	svdata <- oldx[svmobj$index,]
                     +    	svdata <- oldx[svmobj$index,,drop=FALSE]
                          # Reformat the new x
                          	xt <- newx
                     -    	nrowxt <- length(oldx[1,])
                     +    	nrowxt <- length(oldx[1,,drop=FALSE])
                          	dim(xt) <- c(nrowxt,1)
                          # linear kernel:
                          	prods <- svdata %*% xt

R/svmrbf.R

History View file @ c716b94

@@ -13,7 +13,7 @@ get.mipp.svm.rbf <- function(x.train, y.train, x.test, y.test){
                      	fofx <- numeric(length(y.test))
                      	for(i in 1:length(y.test)){
                     -		xin <- x.test[i,]
                     +		xin <- x.test[i,,drop=FALSE]
                      		fofx[i] <- rbfkernel.decision.function(xin, x.train, fit)
+                     	}
@@ -43,11 +43,11 @@ rbfkernel.decision.function <- function(newx, oldx, svmobj) {
                          # Extract gamma:
                          	svgamma <- svmobj$gamma
                          # Get the support vectors
                     -    	svdata <- oldx[svmobj$index,]
                     +    	svdata <- oldx[svmobj$index,,drop=FALSE]
                          # How many support vectors?
                          	numsv <- length(svmobj$index)
                          # reformat newx
                     -    	p <- length(oldx[1,])
                     +    	p <- length(oldx[1,,drop=FALSE])
                          	xt <- matrix(0, nrow=numsv, ncol=p)
                          	for(i in 1:p){
                              	xt[,i] <- rep(newx[i], numsv)