... | ... |
@@ -1,7 +1,8 @@ |
1 | 1 |
Package: TDbasedUFEadv |
2 | 2 |
Type: Package |
3 | 3 |
Title: TDbasedUFEadv |
4 |
-Version: 0.99.8 |
|
4 |
+Version: 0.99.9 |
|
5 |
+Language: en-US |
|
5 | 6 |
Authors@R: |
6 | 7 |
person("Taguchi", "Y-h.", , |
7 | 8 |
"[email protected]", role = c("aut", "cre"), |
... | ... |
@@ -10,7 +11,7 @@ Description: |
10 | 11 |
This is a comprehensive package to perform |
11 | 12 |
Tensor decomposition based unsupervised feature extraction. |
12 | 13 |
It can perform unsupervised feature extraction. |
13 |
- It uses tensor decompission. |
|
14 |
+ It uses tensor decomposition. |
|
14 | 15 |
It is applicable to gene expression, DNA methylation, and |
15 | 16 |
histone modification etc. |
16 | 17 |
It can perform multiomics analysis. |
... | ... |
@@ -19,7 +20,8 @@ biocViews: |
19 | 20 |
GeneExpression, |
20 | 21 |
FeatureExtraction, |
21 | 22 |
MethylationArray, |
22 |
- SingleCell |
|
23 |
+ SingleCell, |
|
24 |
+ Software |
|
23 | 25 |
License: GPL-3 |
24 | 26 |
Encoding: UTF-8 |
25 | 27 |
Imports: |
... | ... |
@@ -43,6 +45,10 @@ Imports: |
43 | 45 |
hash, |
44 | 46 |
shiny |
45 | 47 |
RoxygenNote: 7.2.3 |
48 |
+BugReports: |
|
49 |
+ https://github.com/tagtag/TDbasedUFEadv/issues |
|
50 |
+URL: |
|
51 |
+ https://github.com/tagtag/TDbasedUFEadv |
|
46 | 52 |
Suggests: |
47 | 53 |
knitr, |
48 | 54 |
rmarkdown, |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
-export(PrepareSummarizedExperimentTensorRect) |
|
4 | 3 |
export(computeSVD) |
5 | 4 |
export(prepareCondDrugandDisease) |
6 | 5 |
export(prepareCondTCGA) |
6 |
+export(prepareSummarizedExperimentTensorRect) |
|
7 | 7 |
export(prepareTensorfromList) |
8 | 8 |
export(prepareTensorfromMatrix) |
9 | 9 |
export(prepareexpDrugandDisease) |
... | ... |
@@ -31,6 +31,7 @@ importFrom(graphics,boxplot) |
31 | 31 |
importFrom(graphics,hist) |
32 | 32 |
importFrom(graphics,par) |
33 | 33 |
importFrom(hash,keys) |
34 |
+importFrom(methods,is) |
|
34 | 35 |
importFrom(methods,new) |
35 | 36 |
importFrom(rTensor,as.tensor) |
36 | 37 |
importFrom(rTensor,hosvd) |
... | ... |
@@ -49,4 +50,5 @@ importFrom(stats,filter) |
49 | 50 |
importFrom(stats,optim) |
50 | 51 |
importFrom(stats,p.adjust) |
51 | 52 |
importFrom(stats,pchisq) |
53 |
+importFrom(stats,var) |
|
52 | 54 |
importFrom(utils,read.csv) |
5 | 5 |
deleted file mode 100644 |
... | ... |
@@ -1,24 +0,0 @@ |
1 |
-#' Title Prepare tensor generated from two matricies that share samples |
|
2 |
-#' |
|
3 |
-#' @param sample Chracter vecor of sample names |
|
4 |
-#' @param feature list of features from two matrices |
|
5 |
-#' @param value array, contents of |
|
6 |
-#' @param featureRange Genomic Ranges to be associated with features |
|
7 |
-#' @param sampleData List of conditional labeling associated with samples |
|
8 |
-#' |
|
9 |
-#' @return Tensor generated from two matricies that share samples |
|
10 |
-#' @export |
|
11 |
-#' |
|
12 |
-#' @examples |
|
13 |
-#' matrix1 <- matrix(runif(10000),200) #row features, column samples |
|
14 |
-#' matrix2 <- matrix(runif(20000),400) #row features, column samples |
|
15 |
-#' Z <- prepareTensorfromMatrix(t(matrix1),t(matrix2)) |
|
16 |
-#' Z <- PrepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
17 |
-#' feature=list(as.character(seq_len(200)),as.character(seq_len(400))), |
|
18 |
-#' sampleData=list(rep(seq_len(2),each=25)),value=Z) |
|
19 |
-PrepareSummarizedExperimentTensorRect <- function(sample,feature,value, |
|
20 |
- featureRange=GRanges(NULL),sampleData=list(NULL)){ |
|
21 |
- new("SummarizedExperimentTensorRect",sample=sample, |
|
22 |
- feature=feature,value=value, |
|
23 |
- featureRange=featureRange,sampleData=sampleData) |
|
24 |
-} |
|
25 | 0 |
\ No newline at end of file |
... | ... |
@@ -7,20 +7,30 @@ |
7 | 7 |
#' @param scale If matrix should be scaled or not |
8 | 8 |
#' |
9 | 9 |
#' @return Singular value vectors attributed to two sets of objects associated |
10 |
-#' with eingular value vectors attriuted to features, by multiplying |
|
10 |
+#' with singular value vectors attributed to features, by multiplying |
|
11 | 11 |
#' @export |
12 | 12 |
#' |
13 | 13 |
#' @examples |
14 | 14 |
#' matrix1 <- matrix(runif(200),20) |
15 | 15 |
#' matrix2 <- matrix(runif(400),20) |
16 | 16 |
#' SVD <- computeSVD(matrix1,matrix2) |
17 |
-computeSVD <- function(matrix1,matrix2,dim=10,scale=TRUE){ |
|
18 |
- matrix1[is.na(matrix1)] <-0 |
|
19 |
- matrix2[is.na(matrix2)] <-0 |
|
20 |
- Z <- t(matrix1) %*% matrix2 |
|
21 |
- if (scale) Z <- Z/mean(Z) |
|
22 |
- SVD <- svd(Z,dim,dim) |
|
23 |
- u<- matrix1 %*% SVD$u |
|
24 |
- v<- matrix2 %*% SVD$v |
|
25 |
- return(list(SVD=SVD,u=u,v=v)) |
|
17 |
+computeSVD <- function(matrix1, matrix2, dim = as.integer(10), scale = TRUE) { |
|
18 |
+ # Argument check |
|
19 |
+ stopifnot("`matrix1` must be a matrix." = is.matrix(matrix1)) |
|
20 |
+ stopifnot("`matrix2` must be a matrix." = is.matrix(matrix2)) |
|
21 |
+ stopifnot("`dim` must be a integer." = is.integer(dim)) |
|
22 |
+ stopifnot("`scale` must be a logical." = is.logical(scale)) |
|
23 |
+ stopifnot( |
|
24 |
+ "# of rows must be common between `matrix1` and `matrix2`" |
|
25 |
+ = nrow(matrix1) == nrow(matrix2) |
|
26 |
+ ) |
|
27 |
+ # |
|
28 |
+ matrix1[is.na(matrix1)] <- 0 |
|
29 |
+ matrix2[is.na(matrix2)] <- 0 |
|
30 |
+ Z <- t(matrix1) %*% matrix2 |
|
31 |
+ if (scale) Z <- Z / mean(Z) |
|
32 |
+ SVD <- svd(Z, dim, dim) |
|
33 |
+ u <- matrix1 %*% SVD$u |
|
34 |
+ v <- matrix2 %*% SVD$v |
|
35 |
+ return(list(SVD = SVD, u = u, v = v)) |
|
26 | 36 |
} |
27 | 37 |
\ No newline at end of file |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#' Title Prepare condition matrix for expDrug |
|
1 |
+#' @title Prepare condition matrix for expDrug |
|
2 | 2 |
#' |
3 | 3 |
#' @param expDrug input gene expression profile |
4 | 4 |
#' |
... | ... |
@@ -8,20 +8,22 @@ |
8 | 8 |
#' @examples |
9 | 9 |
#' \donttest{ |
10 | 10 |
#' require(RTCGA.rnaseq) |
11 |
-#'LIST <- list(ACC.rnaseq, |
|
12 |
-#' BLCA.rnaseq, |
|
13 |
-#' BRCA.rnaseq) |
|
14 |
-#' dummy <- prepareexpDrugandDisease(LIST) |
|
15 |
-#' expDrug <- dummy[[1]] |
|
16 |
-#' Cond <- prepareCondDrugandDisease(expDrug) |
|
11 |
+#' Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
12 |
+#' Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
13 |
+#' Cond <- prepareCondDrugandDisease(Drug_and_Disease$expDrug) |
|
17 | 14 |
#' } |
18 |
-prepareCondDrugandDisease <- function(expDrug){ |
|
19 |
- dir <- system.file("extdata", package="TDbasedUFEadv") |
|
20 |
- TCGA <- read.csv(file.path(dir,"drug_response.txt"), sep="\t") |
|
21 |
- Cond <- table(TCGA[,2],TCGA[,3]) |
|
22 |
- ID <- lapply(strsplit(as.character(colnames(expDrug)),"-"),"[",seq_len(3)) |
|
23 |
- ID <- t(data.frame(ID)) |
|
24 |
- ID <- paste(ID[,1],ID[,2],ID[,3],sep="-") |
|
25 |
- Cond <- Cond[match(ID,rownames(Cond)),] |
|
26 |
- return(Cond) |
|
15 |
+prepareCondDrugandDisease <- function(expDrug) { |
|
16 |
+ # Arugument Check |
|
17 |
+ stopifnot( |
|
18 |
+ "`expDrug` must be an ExpressionSet." = is(expDrug, "ExpressionSet") |
|
19 |
+ ) |
|
20 |
+ # |
|
21 |
+ dir <- system.file("extdata", package = "TDbasedUFEadv") |
|
22 |
+ TCGA <- read.csv(file.path(dir, "drug_response.txt"), sep = "\t") |
|
23 |
+ Cond <- table(TCGA$patient.arr, TCGA$drug.name) |
|
24 |
+ ID <- lapply(strsplit(as.character(colnames(expDrug)), "-"), "[", seq_len(3)) |
|
25 |
+ ID <- t(data.frame(ID)) |
|
26 |
+ ID <- paste(ID[, 1], ID[, 2], ID[, 3], sep = "-") |
|
27 |
+ Cond <- Cond[match(ID, rownames(Cond)), ] |
|
28 |
+ return(Cond) |
|
27 | 29 |
} |
28 | 30 |
\ No newline at end of file |
... | ... |
@@ -1,10 +1,10 @@ |
1 |
-#' Title Prepare Sample label for TCGA data |
|
1 |
+#' @title Prepare Sample label for TCGA data |
|
2 | 2 |
#' |
3 | 3 |
#' @param Multi_sample list of sample ids |
4 | 4 |
#' @param Clinical List of clinical data matrix from RTCGA.clinical |
5 |
-#' @param k Column numbers used for conditions |
|
6 |
-#' @param j Column numbers that include corresponding sample ids |
|
7 |
-#' in clinical data |
|
5 |
+#' @param ID_column_of_Multi_sample Column numbers used for conditions |
|
6 |
+#' @param ID_column_of_Clinical Column numbers that include corresponding |
|
7 |
+#' sample ids in clinical data |
|
8 | 8 |
#' |
9 | 9 |
#' @return list of sample labels |
10 | 10 |
#' @export |
... | ... |
@@ -12,22 +12,47 @@ |
12 | 12 |
#' @examples |
13 | 13 |
#' require(RTCGA.clinical) |
14 | 14 |
#' require(RTCGA.rnaseq) |
15 |
-#' Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
16 |
-#' Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
17 |
-#' BRCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
18 |
-#' CESC.rnaseq[seq_len(100),1,drop=FALSE], |
|
19 |
-#' COAD.rnaseq[seq_len(100),1,drop=FALSE]) |
|
20 |
-#' k <- c(770,1482,773,791) |
|
21 |
-#' j <- c(20,20,12,14) |
|
22 |
-#' cond <- prepareCondTCGA(Multi_sample,Clinical,k,j) |
|
23 |
-prepareCondTCGA <- function(Multi_sample,Clinical,k,j) |
|
24 |
-{ |
|
25 |
- Cond <- rep(list(NA),length(Multi_sample)) |
|
26 |
- for (i in seq_len(length(Multi_sample))) |
|
27 |
- { |
|
28 |
- index <- match(tolower(substring(Multi_sample[[i]][,1],1,12)), |
|
29 |
- Clinical[[i]][,j[i]]) |
|
30 |
- Cond[[i]]<- Clinical[[i]][index,k[i]] |
|
31 |
- } |
|
32 |
- return(Cond) |
|
33 |
-} |
|
34 | 15 |
\ No newline at end of file |
16 |
+#' Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical) |
|
17 |
+#' Multi_sample <- list( |
|
18 |
+#' BLCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
19 |
+#' BRCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
20 |
+#' CESC.rnaseq[seq_len(100), 1, drop = FALSE], |
|
21 |
+#' COAD.rnaseq[seq_len(100), 1, drop = FALSE] |
|
22 |
+#' ) |
|
23 |
+#' ID_column_of_Multi_sample <- c(770, 1482, 773, 791) |
|
24 |
+#' ID_column_of_Clinical <- c(20, 20, 12, 14) |
|
25 |
+#' cond <- prepareCondTCGA( |
|
26 |
+#' Multi_sample, Clinical, |
|
27 |
+#' ID_column_of_Multi_sample, ID_column_of_Clinical |
|
28 |
+#' ) |
|
29 |
+prepareCondTCGA <- function(Multi_sample, Clinical, |
|
30 |
+ ID_column_of_Multi_sample, |
|
31 |
+ ID_column_of_Clinical) { |
|
32 |
+ # Argument check |
|
33 |
+ stopifnot("`Multi_sample` must be an list." = is.list(Multi_sample)) |
|
34 |
+ stopifnot("`Clinical` must be an list." = is.list(Multi_sample)) |
|
35 |
+ stopifnot( |
|
36 |
+ "`ID_column_of_Clinical` must be a vector" = is.vector(ID_column_of_Clinical) |
|
37 |
+ ) |
|
38 |
+ stopifnot( |
|
39 |
+ "`ID_column_of_Multi_sample` must be a vector" = is.vector(ID_column_of_Multi_sample) |
|
40 |
+ ) |
|
41 |
+ stopifnot( |
|
42 |
+ "Length must be common among `Multi_sample` " = var(unlist(lapply(list( |
|
43 |
+ Multi_sample, Clinical, |
|
44 |
+ ID_column_of_Multi_sample, |
|
45 |
+ ID_column_of_Clinical |
|
46 |
+ ), length))) == 0 |
|
47 |
+ ) |
|
48 |
+ # |
|
49 |
+ Cond <- rep(list(NA), length(Multi_sample)) |
|
50 |
+ for (i in seq_len(length(Multi_sample))) |
|
51 |
+ { |
|
52 |
+ index <- match( |
|
53 |
+ tolower(substring(Multi_sample[[i]][, 1], 1, 12)), |
|
54 |
+ Clinical[[i]][, ID_column_of_Clinical[i]] |
|
55 |
+ ) |
|
56 |
+ Cond[[i]] <- Clinical[[i]][index, ID_column_of_Multi_sample[i]] |
|
57 |
+ } |
|
58 |
+ return(Cond) |
|
59 |
+} |
35 | 60 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,34 @@ |
1 |
+#' @title Prepare tensor generated from two matrices that share samples |
|
2 |
+#' |
|
3 |
+#' @param sample Character vector of sample names |
|
4 |
+#' @param feature list of features from two matrices |
|
5 |
+#' @param value array, contents of |
|
6 |
+#' @param featureRange Genomic Ranges to be associated with features |
|
7 |
+#' @param sampleData List of conditional labeling associated with samples |
|
8 |
+#' |
|
9 |
+#' @return Tensor generated from two matrices that share samples |
|
10 |
+#' @export |
|
11 |
+#' |
|
12 |
+#' @examples |
|
13 |
+#' matrix1 <- matrix(runif(10000),200) #row features, column samples |
|
14 |
+#' matrix2 <- matrix(runif(20000),400) #row features, column samples |
|
15 |
+#' Z <- prepareTensorfromMatrix(t(matrix1),t(matrix2)) |
|
16 |
+#' Z <- prepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
17 |
+#' feature=list(as.character(seq_len(200)),as.character(seq_len(400))), |
|
18 |
+#' sampleData=list(rep(seq_len(2),each=25)),value=Z) |
|
19 |
+prepareSummarizedExperimentTensorRect <- function( |
|
20 |
+ sample, feature, value, |
|
21 |
+ featureRange = GRanges(NULL), sampleData = list(NULL)) { |
|
22 |
+ # Argument check |
|
23 |
+ stopifnot("`sample` must be a character." = is.character(sample)) |
|
24 |
+ stopifnot("`feature` must be a list." = is.list(feature)) |
|
25 |
+ stopifnot("`value` must be a array." = is.array(value)) |
|
26 |
+ stopifnot("`featureRange` must be an GRanges." = is(featureRange, "GRanges")) |
|
27 |
+ stopifnot("`sampleData` must be a list." = is.list(sampleData)) |
|
28 |
+ # |
|
29 |
+ new("SummarizedExperimentTensorRect", |
|
30 |
+ sample = sample, |
|
31 |
+ feature = feature, value = value, |
|
32 |
+ featureRange = featureRange, sampleData = sampleData |
|
33 |
+ ) |
|
34 |
+} |
|
0 | 35 |
\ No newline at end of file |
... | ... |
@@ -1,7 +1,7 @@ |
1 |
-#' Title Prepare tensor from a list that includes multiple profiles |
|
1 |
+#' @title Prepare tensor from a list that includes multiple profiles |
|
2 | 2 |
#' |
3 | 3 |
#' @param Multi a list that includes multiple profiles |
4 |
-#' @param L the number of projection dimensions |
|
4 |
+#' @param proj_dim the number of projection dimensions |
|
5 | 5 |
#' |
6 | 6 |
#' @return a tensor as a bundle of singular value vectors obtained by |
7 | 7 |
#' applying SVD to individual omics |
... | ... |
@@ -11,11 +11,23 @@ |
11 | 11 |
#' require(MOFAdata) |
12 | 12 |
#' data("CLL_data") |
13 | 13 |
#' data("CLL_covariates") |
14 |
-#' Z <- prepareTensorfromList(CLL_data,10) |
|
15 |
-prepareTensorfromList <- function(Multi,L) |
|
16 |
-{ |
|
17 |
- LIST <- lapply(Multi,function(x){x[is.na(x)]<-0;return(x)}) |
|
18 |
- SVD_lst <- lapply(LIST,function(x){SVD <- svd(x,L);return(t(SVD$v[,seq_len(L)]))}) |
|
19 |
- Z <- array(unlist(SVD_lst),c(dim(SVD_lst[[1]]),length(Multi))) |
|
20 |
- return(Z) |
|
14 |
+#' Z <- prepareTensorfromList(CLL_data,as.integer(10)) |
|
15 |
+prepareTensorfromList <- function(Multi, proj_dim) { |
|
16 |
+ # Argument check |
|
17 |
+ stopifnot("`Multi` must be a list." = is.list(Multi)) |
|
18 |
+ stopifnot("`Proj_dim` must be an integer." = is.integer(proj_dim)) |
|
19 |
+ # |
|
20 |
+ Multi_list <- lapply(Multi, function(x) { |
|
21 |
+ x[is.na(x)] <- 0 |
|
22 |
+ return(x) |
|
23 |
+ }) |
|
24 |
+ SVD_lst <- lapply( |
|
25 |
+ Multi_list, |
|
26 |
+ function(x) { |
|
27 |
+ SVD <- svd(x, proj_dim) |
|
28 |
+ return(t(SVD$v[, seq_len(proj_dim)])) |
|
29 |
+ } |
|
30 |
+ ) |
|
31 |
+ Z <- array(unlist(SVD_lst), c(dim(SVD_lst[[1]]), length(Multi))) |
|
32 |
+ return(Z) |
|
21 | 33 |
} |
22 | 34 |
\ No newline at end of file |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#' Title Generate tensor from two matricies |
|
1 |
+#' @title Generate tensor from two matrices |
|
2 | 2 |
#' |
3 | 3 |
#' @param matrix1 the first input matrix |
4 | 4 |
#' @param matrix2 the second input matrix |
... | ... |
@@ -8,14 +8,25 @@ |
8 | 8 |
#' |
9 | 9 |
#' @examples |
10 | 10 |
#' Z <- prepareTensorfromMatrix(matrix(runif(100),10),matrix(runif(100),10)) |
11 |
-prepareTensorfromMatrix <-function(matrix1,matrix2){ |
|
12 |
- L1 <- dim(matrix1)[2] |
|
13 |
- L2 <- dim(matrix2)[2] |
|
14 |
- matrix1[is.na(matrix1)] <-0 |
|
15 |
- matrix2[is.na(matrix2)] <-0 |
|
16 |
- Z <- apply(cbind(matrix1,matrix2),1, |
|
17 |
- function(x){outer(x[seq_len(L1)],x[L1+seq_len(L2)])}) |
|
18 |
- dim(Z) <- c(L1,L2,dim(Z)[2]) |
|
19 |
- Z <- aperm(Z,c(3,1,2)) |
|
20 |
- return(Z) |
|
11 |
+prepareTensorfromMatrix <- function(matrix1, matrix2) { |
|
12 |
+ # Argument Check |
|
13 |
+ stopifnot("`matrix1` must be a matrix." = is.matrix(matrix1)) |
|
14 |
+ stopifnot("`matrix2` must be a matrix." = is.matrix(matrix2)) |
|
15 |
+ # |
|
16 |
+ col_num1 <- dim(matrix1)[2] |
|
17 |
+ col_num2 <- dim(matrix2)[2] |
|
18 |
+ matrix1[is.na(matrix1)] <- 0 |
|
19 |
+ matrix2[is.na(matrix2)] <- 0 |
|
20 |
+ Z <- apply( |
|
21 |
+ cbind(matrix1, matrix2), 1, |
|
22 |
+ function(x) { |
|
23 |
+ outer( |
|
24 |
+ x[seq_len(col_num1)], |
|
25 |
+ x[col_num1 + seq_len(col_num2)] |
|
26 |
+ ) |
|
27 |
+ } |
|
28 |
+ ) |
|
29 |
+ dim(Z) <- c(col_num1, col_num2, dim(Z)[2]) |
|
30 |
+ Z <- aperm(Z, c(3, 1, 2)) |
|
31 |
+ return(Z) |
|
21 | 32 |
} |
22 | 33 |
\ No newline at end of file |
... | ... |
@@ -1,7 +1,8 @@ |
1 |
-#' Title Generating gene expression of drug treated cell lines and a disease |
|
1 |
+#' @title Generating gene expression of drug treated cell lines and a disease |
|
2 | 2 |
#' cell line |
3 | 3 |
#' |
4 |
-#' @param LIST list that includes indivisual data set from RTCGA.rnaseq |
|
4 |
+#' @param Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
5 |
+#' list that includes individual data set from RTCGA.rnaseq |
|
5 | 6 |
#' |
6 | 7 |
#' @return list of expDrug and expDisease |
7 | 8 |
#' @export |
... | ... |
@@ -9,35 +10,44 @@ |
9 | 10 |
#' @examples |
10 | 11 |
#' \donttest{ |
11 | 12 |
#' require(RTCGA.rnaseq) |
12 |
-#' LIST <- list(ACC.rnaseq, |
|
13 |
-#' BLCA.rnaseq, |
|
14 |
-#' BRCA.rnaseq) |
|
15 |
-#' dummy <- prepareexpDrugandDisease(LIST) |
|
13 |
+#' Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
14 |
+#' Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
16 | 15 |
#' } |
17 |
-prepareexpDrugandDisease <- function(LIST){ |
|
18 |
- dir <- system.file("extdata", package="TDbasedUFEadv") |
|
19 |
- TCGA <- read.csv(file.path(dir,"drug_response.txt"), sep="\t") |
|
20 |
- TCGAdb <- unique(TCGA[,1]) |
|
21 |
- if (length(TCGAdb)>16) TCGAdb <- TCGAdb[-16] |
|
22 |
- RTCGA_all <- matrix(NA, nrow=dim(TCGA)[1],ncol=dim(LIST[[1]])[2]) |
|
23 |
- LISTp <- rep(list(NA),length(LIST)) |
|
24 |
- toTCGA <- function(x){ |
|
25 |
- ID <- lapply(strsplit(as.character(x[,1]),"-"),"[",seq_len(3)) |
|
26 |
- ID <- t(data.frame(ID)) |
|
27 |
- ID <- paste(ID[,1],ID[,2],ID[,3],sep="-") |
|
28 |
- return( x[ID %in% TCGA[,2],]) |
|
29 |
- } |
|
30 |
- LISTp <- lapply(LIST,toTCGA) |
|
31 |
- expDrug <- NULL |
|
32 |
- for (i in seq_len(length(LISTp))) |
|
33 |
- { |
|
34 |
- expDrug <- rbind(expDrug,data.frame(LISTp[[i]])) |
|
35 |
- } |
|
36 |
- expDrug <- convertTCGA(expDrug) |
|
37 |
- ID <- lapply(strsplit(as.character(LIST[[3]][,1]),"-"),"[",seq_len(3)) |
|
16 |
+prepareexpDrugandDisease <- function(Cancer_cell_lines) { |
|
17 |
+ # Argument check |
|
18 |
+ stopifnot( |
|
19 |
+ "`Cancer_cell_lines` must be an list." = is.list(Cancer_cell_lines) |
|
20 |
+ ) |
|
21 |
+ # |
|
22 |
+ dir <- system.file("extdata", package = "TDbasedUFEadv") |
|
23 |
+ TCGA <- read.csv(file.path(dir, "drug_response.txt"), sep = "\t") |
|
24 |
+ TCGAdb <- unique(TCGA$cancers) |
|
25 |
+ if (length(TCGAdb) > 16) TCGAdb <- TCGAdb[-16] |
|
26 |
+ RTCGA_all <- matrix(NA, |
|
27 |
+ nrow = dim(TCGA)[1], |
|
28 |
+ ncol = dim(Cancer_cell_lines[[1]])[2] |
|
29 |
+ ) |
|
30 |
+ Cancer_cell_lines_p <- rep(list(NA), length(Cancer_cell_lines)) |
|
31 |
+ toTCGA <- function(x) { |
|
32 |
+ ID <- lapply(strsplit(as.character(x[, 1]), "-"), "[", seq_len(3)) |
|
38 | 33 |
ID <- t(data.frame(ID)) |
39 |
- ID <- paste(ID[,1],ID[,2],ID[,3],sep="-") |
|
40 |
- expDisease <- LIST[[3]][!(ID %in% TCGA[,2]),] |
|
41 |
- expDisease <- convertTCGA(expDisease) |
|
42 |
- return(list(expDrug=expDrug,expDisease=expDisease)) |
|
34 |
+ ID <- paste(ID[, 1], ID[, 2], ID[, 3], sep = "-") |
|
35 |
+ return(x[ID %in% TCGA$patient.arr, ]) |
|
36 |
+ } |
|
37 |
+ Cancer_cell_lines_p <- lapply(Cancer_cell_lines, toTCGA) |
|
38 |
+ expDrug <- NULL |
|
39 |
+ for (i in seq_len(length(Cancer_cell_lines_p))) |
|
40 |
+ { |
|
41 |
+ expDrug <- rbind(expDrug, data.frame(Cancer_cell_lines_p[[i]])) |
|
42 |
+ } |
|
43 |
+ expDrug <- convertTCGA(expDrug) |
|
44 |
+ ID <- lapply( |
|
45 |
+ strsplit(as.character(Cancer_cell_lines[[3]][, 1]), "-"), |
|
46 |
+ "[", seq_len(3) |
|
47 |
+ ) |
|
48 |
+ ID <- t(data.frame(ID)) |
|
49 |
+ ID <- paste(ID[, 1], ID[, 2], ID[, 3], sep = "-") |
|
50 |
+ expDisease <- Cancer_cell_lines[[3]][!(ID %in% TCGA$patient.arr), ] |
|
51 |
+ expDisease <- convertTCGA(expDisease) |
|
52 |
+ return(list(expDrug = expDrug, expDisease = expDisease)) |
|
43 | 53 |
} |
44 | 54 |
\ No newline at end of file |
... | ... |
@@ -1,13 +1,13 @@ |
1 |
-#' Title Select feature when projection strategy is employed for the |
|
1 |
+#' @title Select feature when projection strategy is employed for the |
|
2 | 2 |
#' case where features are shared with multiple omics profiles |
3 | 3 |
#' |
4 | 4 |
#' @param HOSVD HOSVD |
5 | 5 |
#' @param Multi list of omics profiles, row: sample, column: feature |
6 | 6 |
#' @param cond list of conditions for individual omics profiles |
7 |
-#' @param de initial value for optimization of statdard deviation |
|
7 |
+#' @param de initial value for optimization of standard deviation |
|
8 | 8 |
#' @param p0 Threshold P-value |
9 | 9 |
#' @param breaks The number of bins of histogram of P-values |
10 |
-#' @param input_all The number of selected feature. if null, intearactive mode |
|
10 |
+#' @param input_all The number of selected feature. if null, interactive mode |
|
11 | 11 |
#' is activated |
12 | 12 |
#' |
13 | 13 |
#' @return list composed of logical vector that represent which features are selected and p-values |
... | ... |
@@ -17,7 +17,7 @@ |
17 | 17 |
#' require(TDbasedUFE) |
18 | 18 |
#' Multi <- list(matrix(runif(1000),10),matrix(runif(1000),10), |
19 | 19 |
#' matrix(runif(1000),10),matrix(runif(1000),10)) |
20 |
-#' Z <- prepareTensorfromList(Multi,10) |
|
20 |
+#' Z <- prepareTensorfromList(Multi,as.integer(10)) |
|
21 | 21 |
#' Z <- aperm(Z,c(2,1,3)) |
22 | 22 |
#' Z <- PrepareSummarizedExperimentTensor(feature =as.character(1:10), |
23 | 23 |
#' sample=array("",1),value=Z) |
... | ... |
@@ -25,90 +25,135 @@ |
25 | 25 |
#' cond <- rep(list(rep(1:2,each=5)),4) |
26 | 26 |
#' index <- selectFeatureProj(HOSVD,Multi,cond,de=0.1,input_all=2) |
27 | 27 |
selectFeatureProj <- |
28 |
- function(HOSVD,Multi,cond,de=1e-4,p0=0.01,breaks=100,input_all=NULL){ |
|
28 |
+ function(HOSVD, Multi, cond, de = 1e-4, p0 = 0.01, breaks = as.integer(100), |
|
29 |
+ input_all = NULL) { |
|
30 |
+ # Augument check |
|
31 |
+ stopifnot("`HOSVD` must be a list." = is.list(HOSVD)) |
|
32 |
+ stopifnot("`Multi` must be a list." = is.list(Multi)) |
|
33 |
+ stopifnot("`de` must be a numeric." = is.numeric(de)) |
|
34 |
+ stopifnot("`p0` must be a numeric." = is.numeric(p0)) |
|
35 |
+ stopifnot("`breaks` must be a integer." = is.integer(breaks)) |
|
36 |
+ stopifnot("`input_all` must be a vector." = is.vector(input_all) | |
|
37 |
+ is.null(input_all)) |
|
38 |
+ # |
|
29 | 39 |
interact <- FALSE |
30 |
- if (is.null(input_all)) |
|
31 |
- { |
|
32 |
- interact <- TRUE |
|
33 |
- LIST1 <- lapply(Multi,function(x){data.matrix(x)%*%data.matrix(HOSVD$U[[1]])}) |
|
34 |
- j<-1 |
|
35 |
- ui <- fluidPage( |
|
40 |
+ Multi_list <- lapply( |
|
41 |
+ Multi, |
|
42 |
+ function(x) { |
|
43 |
+ data.matrix(x) %*% data.matrix(HOSVD$U[[1]]) |
|
44 |
+ } |
|
45 |
+ ) |
|
46 |
+ if (is.null(input_all)) { |
|
47 |
+ interact <- TRUE |
|
48 |
+ j <- 1 |
|
49 |
+ ui <- fluidPage( |
|
36 | 50 |
sidebarLayout( |
37 |
- sidebarPanel( |
|
38 |
- actionButton(inputId="action", label="Next"), |
|
39 |
- actionButton(inputId="prev", label="Prev"), |
|
40 |
- actionButton(inputId="select", label="Select")), |
|
41 |
- mainPanel( |
|
42 |
- plotOutput("plot") |
|
43 |
- ) |
|
51 |
+ sidebarPanel( |
|
52 |
+ actionButton(inputId = "action", label = "Next"), |
|
53 |
+ actionButton(inputId = "prev", label = "Prev"), |
|
54 |
+ actionButton(inputId = "select", label = "Select") |
|
55 |
+ ), |
|
56 |
+ mainPanel( |
|
57 |
+ plotOutput("plot") |
|
58 |
+ ) |
|
44 | 59 |
) |
45 |
- ) |
|
46 |
- server <- function(input, output){ |
|
60 |
+ ) |
|
61 |
+ server <- function(input, output) { |
|
47 | 62 |
observeEvent(input$action, { |
48 |
- if (j<dim(HOSVD$U[[1]])[2]) j<<-j+1 |
|
63 |
+ if (j < dim(HOSVD$U[[1]])[2]) j <<- j + 1 |
|
49 | 64 |
}) |
50 | 65 |
observeEvent(input$prev, { |
51 |
- if (j!=1){j<<-j-1} |
|
52 |
- }) |
|
66 |
+ if (j != 1) { |
|
67 |
+ j <<- j - 1 |
|
68 |
+ } |
|
69 |
+ }) |
|
53 | 70 |
observeEvent(input$select, { |
54 |
- input_all <<-j ; stopApp() |
|
55 |
- }) |
|
71 |
+ input_all <<- j |
|
72 |
+ stopApp() |
|
73 |
+ }) |
|
56 | 74 |
output$plot <- renderPlot({ |
57 |
- input$action |
|
58 |
- input$prev |
|
59 |
- par(mfrow=c(length(cond),1)) |
|
60 |
- par(mai=c(0.3,0.2,0.2,0.2)) |
|
61 |
- for (i in seq_len(length(cond))) |
|
62 |
- { |
|
63 |
- boxplot(LIST1[[i]][,j]~cond[[i]],main=paste(j,i,sep="-")) |
|
64 |
- abline(0,0,col=2,lty=2) |
|
65 |
- } |
|
66 |
- par(mfrow=c(1,1)) |
|
75 |
+ input$action |
|
76 |
+ input$prev |
|
77 |
+ par(mfrow = c(length(cond), 1)) |
|
78 |
+ par(mai = c(0.3, 0.2, 0.2, 0.2)) |
|
79 |
+ for (i in seq_len(length(cond))) |
|
80 |
+ { |
|
81 |
+ boxplot(Multi_list[[i]][, j] ~ cond[[i]], |
|
82 |
+ main = paste(j, i, sep = "-") |
|
83 |
+ ) |
|
84 |
+ abline(0, 0, col = 2, lty = 2) |
|
85 |
+ } |
|
86 |
+ par(mfrow = c(1, 1)) |
|
67 | 87 |
}) |
88 |
+ } |
|
89 |
+ app <- shinyApp(ui, server) |
|
90 |
+ runApp(app) |
|
91 |
+ input_all <- j |
|
92 |
+ } else { |
|
93 |
+ par(mfrow = c(length(cond), 1)) |
|
94 |
+ par(mai = c(0.3, 0.2, 0.2, 0.2)) |
|
95 |
+ for (i in seq_len(length(cond))) |
|
96 |
+ { |
|
97 |
+ boxplot(Multi_list[[i]][, input_all] ~ cond[[i]], |
|
98 |
+ main = paste(input_all, i, sep = "-") |
|
99 |
+ ) |
|
100 |
+ abline(0, 0, col = 2, lty = 2) |
|
101 |
+ } |
|
102 |
+ par(mfrow = c(1, 1)) |
|
68 | 103 |
} |
69 |
- app<- shinyApp(ui, server) |
|
70 |
- runApp(app) |
|
71 |
- input_all <- j |
|
72 |
- } |
|
73 |
- th <- function(sd,breaks,p0){ |
|
74 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
75 |
- hc<- hist(1-P2,breaks=breaks,plot=FALSE) |
|
76 |
- return(sd(hc$count[seq_len(sum(hc$breaks |
|
77 |
- <1-min(P2[p.adjust(P2,"BH")>p0])))])) |
|
104 |
+ th <- function(sd, breaks, p0) { |
|
105 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
106 |
+ hc <- hist(1 - P2, breaks = breaks, plot = FALSE) |
|
107 |
+ return(sd(hc$count[seq_len(sum(hc$breaks |
|
108 |
+ < 1 - min(P2[p.adjust(P2, "BH") > p0])))])) |
|
78 | 109 |
} |
79 |
- u<- HOSVD$U[[1]][,input_all] |
|
80 |
- sd <- optim(de,function(x){th(x,breaks,p0)}, |
|
81 |
- control=list(warn.1d.NelderMead=FALSE))$par |
|
82 |
- sd1 <- seq(0.1*sd,2*sd,by=0.1*sd) |
|
83 |
- th0 <- apply(matrix(sd1,ncol=1),1,function(x){th(x,breaks,p0)}) |
|
84 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
85 |
- ui <- fluidPage( |
|
86 |
- sidebarLayout( |
|
87 |
- sidebarPanel( |
|
88 |
- actionButton(inputId="action", label="Next")), |
|
89 |
- mainPanel( |
|
90 |
- plotOutput("plot") |
|
91 |
- ) |
|
92 |
- ) |
|
110 |
+ u <- HOSVD$U[[1]][, input_all] |
|
111 |
+ sd <- optim(de, function(x) { |
|
112 |
+ th(x, breaks, p0) |
|
113 |
+ }, |
|
114 |
+ control = list(warn.1d.NelderMead = FALSE) |
|
115 |
+ )$par |
|
116 |
+ sd1 <- seq(0.1 * sd, 2 * sd, by = 0.1 * sd) |
|
117 |
+ th0 <- apply(matrix(sd1, ncol = 1), 1, function(x) { |
|
118 |
+ th(x, breaks, p0) |
|
119 |
+ }) |
|
120 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
121 |
+ ui <- fluidPage( |
|
122 |
+ sidebarLayout( |
|
123 |
+ sidebarPanel( |
|
124 |
+ actionButton(inputId = "action", label = "Next") |
|
125 |
+ ), |
|
126 |
+ mainPanel( |
|
127 |
+ plotOutput("plot") |
|
93 | 128 |
) |
94 |
- server <- function(input, output){ |
|
95 |
- observeEvent(input$action, { |
|
96 |
- stopApp() |
|
97 |
- }) |
|
98 |
- output$plot <- renderPlot({ |
|
99 |
- input$action |
|
100 |
- par(mfrow=c(1,2)) |
|
101 |
- plot(sd1,th0,type="o") |
|
102 |
- arrows(sd,max(th0),sd,min(th0),col=2) |
|
103 |
- hist(1-P2,breaks=breaks) |
|
104 |
- par(mfrow=c(1,1)) |
|
105 |
- }) |
|
106 |
- } |
|
107 |
- app<- shinyApp(ui, server) |
|
108 |
- if (interact) runApp(app) |
|
109 |
- index <- p.adjust(P2,"BH")<p0 |
|
110 |
- index_all <- list(index=index,p.value=P2) |
|
111 |
- return(index_all) |
|
112 |
-} |
|
129 |
+ ) |
|
130 |
+ ) |
|
131 |
+ server <- function(input, output) { |
|
132 |
+ observeEvent(input$action, { |
|
133 |
+ stopApp() |
|
134 |
+ }) |
|
135 |
+ output$plot <- renderPlot({ |
|
136 |
+ input$action |
|
137 |
+ par(mfrow = c(1, 2)) |
|
138 |
+ plot(sd1, th0, type = "o") |
|
139 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
140 |
+ hist(1 - P2, breaks = breaks) |
|
141 |
+ par(mfrow = c(1, 1)) |
|
142 |
+ }) |
|
143 |
+ } |
|
144 |
+ app <- shinyApp(ui, server) |
|
145 |
+ if (interact) { |
|
146 |
+ runApp(app) |
|
147 |
+ } else { |
|
148 |
+ par(mfrow = c(1, 2)) |
|
149 |
+ plot(sd1, th0, type = "o") |
|
150 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
151 |
+ hist(1 - P2, breaks = breaks) |
|
152 |
+ par(mfrow = c(1, 1)) |
|
153 |
+ } |
|
154 |
+ index <- p.adjust(P2, "BH") < p0 |
|
155 |
+ index_all <- list(index = index, p.value = P2) |
|
156 |
+ return(index_all) |
|
157 |
+ } |
|
113 | 158 |
|
114 | 159 |
|
... | ... |
@@ -1,14 +1,14 @@ |
1 |
-#' Title Select features through the selection of singular value vectors |
|
1 |
+#' @title Select features through the selection of singular value vectors |
|
2 | 2 |
#' |
3 |
-#' @param SVD SVD compued from matrix generated by partial summation of a tensor |
|
3 |
+#' @param SVD SVD computed from matrix generated by partial summation of a tensor |
|
4 | 4 |
#' @param cond Condition to select singular value vectors |
5 | 5 |
#' @param de Initial values to be used for optimization of standard deviation |
6 | 6 |
#' @param p0 Threshold value for the significance |
7 |
-#' @param breaks Number of bins of hitogram of P-values |
|
7 |
+#' @param breaks Number of bins of histogram of P-values |
|
8 | 8 |
#' @param input_all The ID of selected singular value vectors. If it is null, |
9 | 9 |
#' interactive mode is activated. |
10 | 10 |
#' |
11 |
-#' @return List of lists that inlcudes P-vales as well as if individual |
|
11 |
+#' @return List of lists that includes P-vales as well as if individual |
|
12 | 12 |
#' features selected. |
13 | 13 |
#' @export |
14 | 14 |
#' |
... | ... |
@@ -20,96 +20,124 @@ |
20 | 20 |
#' index_all <- selectFeatureRect(SVD, |
21 | 21 |
#' list(NULL,rep(seq_len(2),each=5),rep(seq_len(2),each=10)),de=rep(0.5,2), |
22 | 22 |
#' input_all=1) |
23 |
-selectFeatureRect <- function(SVD,cond,de=rep(1e-4,2),p0=0.01, |
|
24 |
- breaks=100,input_all=NULL) |
|
25 |
- { |
|
26 |
- interact<-FALSE |
|
27 |
- if(is.null(input_all)) |
|
28 |
- { |
|
29 |
- interact<-TRUE |
|
30 |
- j<-1 |
|
31 |
- ui <- fluidPage( |
|
32 |
- sidebarLayout( |
|
33 |
- sidebarPanel( |
|
34 |
- actionButton(inputId="action", label="Next"), |
|
35 |
- actionButton(inputId="prev", label="Prev"), |
|
36 |
- actionButton(inputId="select", label="Select")), |
|
37 |
- mainPanel( |
|
38 |
- plotOutput("plot") |
|
39 |
- ) |
|
40 |
- ) |
|
41 |
- ) |
|
42 |
- |
|
43 |
- server <- function(input, output){ |
|
44 |
- observeEvent(input$action, { |
|
45 |
- if (j<dim(SVD$SVD$u)[2]) j<<-j+1 |
|
46 |
- }) |
|
47 |
- observeEvent(input$prev, { |
|
48 |
- if (j!=1){j<<-j-1} |
|
49 |
- }) |
|
50 |
- observeEvent(input$select, { |
|
51 |
- input_all <<-j ; stopApp() |
|
52 |
- }) |
|
53 |
- output$plot <- renderPlot({ |
|
54 |
- input$action |
|
55 |
- input$prev |
|
56 |
- par(mfrow=c(1,2)) |
|
57 |
- boxplot(SVD$SVD$u[,j]~cond[[2]],main=j) |
|
58 |
- abline(0,0,col=2,lty=2) |
|
59 |
- boxplot(SVD$SVD$v[,j]~cond[[3]],main=j) |
|
60 |
- abline(0,0,col=2,lty=2) |
|
61 |
- par(mfrow=c(1,1)) |
|
62 |
- }) |
|
63 |
- } |
|
64 |
- |
|
65 |
- app<- shinyApp(ui, server) |
|
66 |
- if(interact) runApp(app) |
|
67 |
- } |
|
68 |
- th <- function(sd,breaks,p0){ |
|
69 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
70 |
- hc<- hist(1-P2,breaks=breaks,plot=FALSE) |
|
71 |
- return(sd(hc$count[seq_len(sum(hc$breaks |
|
72 |
- <1-min(P2[p.adjust(P2,"BH")>p0])))])) |
|
23 |
+selectFeatureRect <- function(SVD, cond, de = rep(1e-4, 2), p0 = 0.01, |
|
24 |
+ breaks = as.integer(100), input_all = NULL) { |
|
25 |
+ # Augument check |
|
26 |
+ stopifnot("`SVD` must be a list." = is.list(SVD)) |
|
27 |
+ stopifnot("`cond` must be a list." = is.list(cond)) |
|
28 |
+ stopifnot("`de` must be a numeric." = is.numeric(de)) |
|
29 |
+ stopifnot("`p0` must be a numeric." = is.numeric(p0)) |
|
30 |
+ stopifnot("`breaks` must be a integer." = is.integer(breaks)) |
|
31 |
+ stopifnot("`input_all` must be a vector." = is.vector(input_all) | |
|
32 |
+ is.null(input_all)) |
|
33 |
+ # |
|
34 |
+ interact <- FALSE |
|
35 |
+ if (is.null(input_all)) { |
|
36 |
+ interact <- TRUE |
|
37 |
+ j <- 1 |
|
38 |
+ ui <- fluidPage( |
|
39 |
+ sidebarLayout( |
|
40 |
+ sidebarPanel( |
|
41 |
+ actionButton(inputId = "action", label = "Next"), |
|
42 |
+ actionButton(inputId = "prev", label = "Prev"), |
|
43 |
+ actionButton(inputId = "select", label = "Select") |
|
44 |
+ ), |
|
45 |
+ mainPanel( |
|
46 |
+ plotOutput("plot") |
|
47 |
+ ) |
|
48 |
+ ) |
|
49 |
+ ) |
|
50 |
+ |
|
51 |
+ server <- function(input, output) { |
|
52 |
+ observeEvent(input$action, { |
|
53 |
+ if (j < dim(SVD$SVD$u)[2]) j <<- j + 1 |
|
54 |
+ }) |
|
55 |
+ observeEvent(input$prev, { |
|
56 |
+ if (j != 1) { |
|
57 |
+ j <<- j - 1 |
|
73 | 58 |
} |
74 |
- index_all <- rep(list(NA)) |
|
75 |
- for (i in seq_len(2)) |
|
76 |
- { |
|
77 |
- u<- scale(SVD[[i+1]][,input_all]) |
|
78 |
- sd <- optim(de[i],function(x){th(x,breaks,p0)}, |
|
79 |
- control=list(warn.1d.NelderMead=FALSE))$par |
|
80 |
- sd1 <- seq(0.1*sd,2*sd,by=0.1*sd) |
|
81 |
- th0 <- apply(matrix(sd1,ncol=1),1,function(x){th(x,breaks,p0)}) |
|
82 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
83 |
- if (interact) |
|
84 |
- { |
|
85 |
- ui <- fluidPage( |
|
86 |
- sidebarLayout( |
|
87 |
- sidebarPanel( |
|
88 |
- actionButton(inputId="action", label="Next")), |
|
89 |
- mainPanel( |
|
90 |
- plotOutput("plot") |
|
91 |
- ) |
|
92 |
- ) |
|
93 |
- ) |
|
94 |
- server <- function(input, output){ |
|
95 |
- observeEvent(input$action, { |
|
96 |
- stopApp() |
|
97 |
- }) |
|
98 |
- output$plot <- renderPlot({ |
|
99 |
- input$action |
|
100 |
- par(mfrow=c(1,2)) |
|
101 |
- plot(sd1,th0,type="o") |
|
102 |
- arrows(sd,max(th0),sd,min(th0),col=2) |
|
103 |
- hist(1-P2,breaks=breaks) |
|
104 |
- par(mfrow=c(1,1)) |
|
105 |
- }) |
|
106 |
- } |
|
107 |
- app<- shinyApp(ui, server) |
|
108 |
- runApp(app) |
|
109 |
- } |
|
110 |
- index <- p.adjust(P2,"BH")<p0 |
|
111 |
- index_all[[i]] <- list(index=index,p.value=P2) |
|
112 |
- } |
|
113 |
- return(index_all) |
|
59 |
+ }) |
|
60 |
+ observeEvent(input$select, { |
|
61 |
+ input_all <<- j |
|
62 |
+ stopApp() |
|
63 |
+ }) |
|
64 |
+ output$plot <- renderPlot({ |
|
65 |
+ input$action |
|
66 |
+ input$prev |
|
67 |
+ par(mfrow = c(1, 2)) |
|
68 |
+ boxplot(SVD$SVD$u[, j] ~ cond[[2]], main = j) |
|
69 |
+ abline(0, 0, col = 2, lty = 2) |
|
70 |
+ boxplot(SVD$SVD$v[, j] ~ cond[[3]], main = j) |
|
71 |
+ abline(0, 0, col = 2, lty = 2) |
|
72 |
+ par(mfrow = c(1, 1)) |
|
73 |
+ }) |
|
74 |
+ } |
|
75 |
+ app <- shinyApp(ui, server) |
|
76 |
+ runApp(app) |
|
77 |
+ } else { |
|
78 |
+ par(mfrow = c(1, 2)) |
|
79 |
+ boxplot(SVD$SVD$u[, input_all] ~ cond[[2]], main = input_all) |
|
80 |
+ abline(0, 0, col = 2, lty = 2) |
|
81 |
+ boxplot(SVD$SVD$v[, input_all] ~ cond[[3]], main = input_all) |
|
82 |
+ abline(0, 0, col = 2, lty = 2) |
|
83 |
+ par(mfrow = c(1, 1)) |
|
84 |
+ } |
|
85 |
+ th <- function(sd, breaks, p0) { |
|
86 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
87 |
+ hc <- hist(1 - P2, breaks = breaks, plot = FALSE) |
|
88 |
+ return(sd(hc$count[seq_len(sum(hc$breaks |
|
89 |
+ < 1 - min(P2[p.adjust(P2, "BH") > p0])))])) |
|
90 |
+ } |
|
91 |
+ index_all <- rep(list(NA)) |
|
92 |
+ for (i in seq_len(2)) |
|
93 |
+ { |
|
94 |
+ u <- scale(SVD[[i + 1]][, input_all]) |
|
95 |
+ sd <- optim(de[i], function(x) { |
|
96 |
+ th(x, breaks, p0) |
|
97 |
+ }, |
|
98 |
+ control = list(warn.1d.NelderMead = FALSE) |
|
99 |
+ )$par |
|
100 |
+ sd1 <- seq(0.1 * sd, 2 * sd, by = 0.1 * sd) |
|
101 |
+ th0 <- apply(matrix(sd1, ncol = 1), 1, function(x) { |
|
102 |
+ th(x, breaks, p0) |
|
103 |
+ }) |
|
104 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
105 |
+ if (interact) { |
|
106 |
+ ui <- fluidPage( |
|
107 |
+ sidebarLayout( |
|
108 |
+ sidebarPanel( |
|
109 |
+ actionButton(inputId = "action", label = "Next") |
|
110 |
+ ), |
|
111 |
+ mainPanel( |
|
112 |
+ plotOutput("plot") |
|
113 |
+ ) |
|
114 |
+ ) |
|
115 |
+ ) |
|
116 |
+ server <- function(input, output) { |
|
117 |
+ observeEvent(input$action, { |
|
118 |
+ stopApp() |
|
119 |
+ }) |
|
120 |
+ output$plot <- renderPlot({ |
|
121 |
+ input$action |
|
122 |
+ par(mfrow = c(1, 2)) |
|
123 |
+ plot(sd1, th0, type = "o") |
|
124 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
125 |
+ hist(1 - P2, breaks = breaks) |
|
126 |
+ par(mfrow = c(1, 1)) |
|
127 |
+ }) |
|
128 |
+ } |
|
129 |
+ app <- shinyApp(ui, server) |
|
130 |
+ runApp(app) |
|
131 |
+ } else { |
|
132 |
+ par(mfrow = c(1, 2)) |
|
133 |
+ plot(sd1, th0, type = "o") |
|
134 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
135 |
+ hist(1 - P2, breaks = breaks) |
|
136 |
+ par(mfrow = c(1, 1)) |
|
114 | 137 |
} |
138 |
+ index <- p.adjust(P2, "BH") < p0 |
|
139 |
+ index_all[[i]] <- list(index = index, p.value = P2) |
|
140 |
+ } |
|
141 |
+ return(index_all) |
|
142 |
+} |
|
115 | 143 |
|
116 | 144 |
\ No newline at end of file |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#' Title Select features for a tensor generated from two matrices that |
|
1 |
+#' @title Select features for a tensor generated from two matrices that |
|
2 | 2 |
#' share samples. |
3 | 3 |
#' |
4 | 4 |
#' @param HOSVD HOSVD |
... | ... |
@@ -18,97 +18,128 @@ |
18 | 18 |
#' matrix1 <- matrix(runif(10000),200) #row features, column samples |
19 | 19 |
#' matrix2 <- matrix(runif(20000),400) #row features, column samples |
20 | 20 |
#' Z <- prepareTensorfromMatrix(t(matrix1),t(matrix2)) |
21 |
-#' Z <- PrepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
21 |
+#' Z <- prepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
22 | 22 |
#' feature=list(as.character(seq_len(200)),as.character(seq_len(400))), |
23 | 23 |
#' sampleData=list(rep(seq_len(2),each=25)),value=Z) |
24 | 24 |
#' HOSVD <- computeHosvd(Z) |
25 | 25 |
#' cond <- list(attr(Z,"sampleData")[[1]],NULL,NULL) |
26 | 26 |
#' index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01),input_all=2) |
27 |
-selectFeatureTransRect <- function(HOSVD,cond,de=rep(1e-4,2),p0=0.01, |
|
28 |
- breaks=100,input_all=NULL){ |
|
29 |
- { |
|
30 |
- interact<-FALSE |
|
31 |
- if(is.null(input_all)) |
|
32 |
- { |
|
33 |
- interact<-TRUE |
|
34 |
- j<-1 |
|
35 |
- ui <- fluidPage( |
|
36 |
- sidebarLayout( |
|
37 |
- sidebarPanel( |
|
38 |
- actionButton(inputId="action", label="Next"), |
|
39 |
- actionButton(inputId="prev", label="Prev"), |
|
40 |
- actionButton(inputId="select", label="Select")), |
|
41 |
- mainPanel( |
|
42 |
- plotOutput("plot") |
|
43 |
- ) |
|
44 |
- ) |
|
45 |
- ) |
|
46 |
- server <- function(input, output){ |
|
47 |
- observeEvent(input$action, { |
|
48 |
- if (j<dim(HOSVD$U[[1]])[2]) j<<-j+1 |
|
49 |
- }) |
|
50 |
- observeEvent(input$prev, { |
|
51 |
- if (j!=1){j<<-j-1} |
|
52 |
- }) |
|
53 |
- observeEvent(input$select, { |
|
54 |
- input_all <<-j ; stopApp() |
|
55 |
- }) |
|
56 |
- output$plot <- renderPlot({ |
|
57 |
- input$action |
|
58 |
- input$prev |
|
59 |
- for (i in seq_len(length(cond))) |
|
60 |
- { |
|
61 |
- boxplot(HOSVD$U[[1]][,j]~cond[[1]],main=j) |
|
62 |
- abline(0,0,col=2,lty=2) |
|
63 |
- } |
|
64 |
- }) |
|
65 |
- } |
|
66 |
- app<- shinyApp(ui, server) |
|
67 |
- runApp(app) |
|
68 |
- input_all <- j |
|
69 |
- } |
|
70 |
- th <- function(sd,breaks,p0){ |
|
71 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
72 |
- hc<- hist(1-P2,breaks=breaks,plot=FALSE) |
|
73 |
- return(sd(hc$count[seq_len(sum(hc$breaks |
|
74 |
- <1-min(P2[p.adjust(P2,"BH")>p0])))])) |
|
27 |
+selectFeatureTransRect <- function(HOSVD, cond, de = rep(1e-4, 2), p0 = 0.01, |
|
28 |
+ breaks = as.integer(100), input_all = NULL) {{ |
|
29 |
+ # Augument check |
|
30 |
+ stopifnot("`HOSVD` must be a list." = is.list(HOSVD)) |
|
31 |
+ stopifnot("`cond` must be a list." = is.list(cond)) |
|
32 |
+ stopifnot("`de` must be a numeric." = is.numeric(de)) |
|
33 |
+ stopifnot("`p0` must be a numeric." = is.numeric(p0)) |
|
34 |
+ stopifnot("`breaks` must be a integer." = is.integer(breaks)) |
|
35 |
+ stopifnot("`input_all` must be a vector." = is.vector(input_all) | |
|
36 |
+ is.null(input_all)) |
|
37 |
+ # |
|
38 |
+ interact <- FALSE |
|
39 |
+ if (is.null(input_all)) { |
|
40 |
+ interact <- TRUE |
|
41 |
+ j <- 1 |
|
42 |
+ ui <- fluidPage( |
|
43 |
+ sidebarLayout( |
|
44 |
+ sidebarPanel( |
|
45 |
+ actionButton(inputId = "action", label = "Next"), |
|
46 |
+ actionButton(inputId = "prev", label = "Prev"), |
|
47 |
+ actionButton(inputId = "select", label = "Select") |
|
48 |
+ ), |
|
49 |
+ mainPanel( |
|
50 |
+ plotOutput("plot") |
|
51 |
+ ) |
|
52 |
+ ) |
|
53 |
+ ) |
|
54 |
+ server <- function(input, output) { |
|
55 |
+ observeEvent(input$action, { |
|
56 |
+ if (j < dim(HOSVD$U[[1]])[2]) j <<- j + 1 |
|
57 |
+ }) |
|
58 |
+ observeEvent(input$prev, { |
|
59 |
+ if (j != 1) { |
|
60 |
+ j <<- j - 1 |
|
75 | 61 |
} |
76 |
- index_all <- rep(list(NA)) |
|
77 |
- for (i in seq_len(2)) |
|
62 |
+ }) |
|
63 |
+ observeEvent(input$select, { |
|
64 |
+ input_all <<- j |
|
65 |
+ stopApp() |
|
66 |
+ }) |
|
67 |
+ output$plot <- renderPlot({ |
|
68 |
+ input$action |
|
69 |
+ input$prev |
|
70 |
+ for (i in seq_len(length(cond))) |
|
78 | 71 |
{ |
79 |
- u<- HOSVD$U[[i+1]][,input_all] |
|
80 |
- sd <- optim(de[i],function(x){th(x,breaks,p0)}, |
|
81 |
- control=list(warn.1d.NelderMead=FALSE))$par |
|
82 |
- sd1 <- seq(0.1*sd,2*sd,by=0.1*sd) |
|
83 |
- th0 <- apply(matrix(sd1,ncol=1),1,function(x){th(x,breaks,p0)}) |
|
84 |
- P2 <- pchisq((u/sd)^2,1,lower.tail=FALSE) |
|
85 |
- ui <- fluidPage( |
|
86 |
- sidebarLayout( |
|
87 |
- sidebarPanel( |
|
88 |
- actionButton(inputId="action", label="Next")), |
|
89 |
- mainPanel( |
|
90 |
- plotOutput("plot") |
|
91 |
- ) |
|
92 |
- ) |
|
93 |
- ) |
|
94 |
- server <- function(input, output){ |
|
95 |
- observeEvent(input$action, { |
|
96 |
- stopApp() |
|
97 |
- }) |
|
98 |
- output$plot <- renderPlot({ |
|
99 |
- input$action |
|
100 |
- par(mfrow=c(1,2)) |
|
101 |
- plot(sd1,th0,type="o") |
|
102 |
- arrows(sd,max(th0),sd,min(th0),col=2) |
|
103 |
- hist(1-P2,breaks=breaks) |
|
104 |
- par(mfrow=c(1,1)) |
|
105 |
- }) |
|
106 |
- } |
|
107 |
- app<- shinyApp(ui, server) |
|
108 |
- if (interact) runApp(app) |
|
109 |
- index <- p.adjust(P2,"BH")<p0 |
|
110 |
- index_all[[i]] <- list(index=index,p.value=P2) |
|
72 |
+ boxplot(HOSVD$U[[1]][, j] ~ cond[[1]], main = j) |
|
73 |
+ abline(0, 0, col = 2, lty = 2) |
|
111 | 74 |
} |
112 |
- return(index_all) |
|
75 |
+ }) |
|
76 |
+ } |
|
77 |
+ app <- shinyApp(ui, server) |
|
78 |
+ runApp(app) |
|
79 |
+ input_all <- j |
|
80 |
+ } else { |
|
81 |
+ for (i in seq_len(length(cond))) |
|
82 |
+ { |
|
83 |
+ boxplot(HOSVD$U[[1]][, input_all] ~ cond[[1]], main = input_all) |
|
84 |
+ abline(0, 0, col = 2, lty = 2) |
|
85 |
+ } |
|
86 |
+ } |
|
87 |
+ th <- function(sd, breaks, p0) { |
|
88 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
89 |
+ hc <- hist(1 - P2, breaks = breaks, plot = FALSE) |
|
90 |
+ return(sd(hc$count[seq_len(sum(hc$breaks |
|
91 |
+ < 1 - min(P2[p.adjust(P2, "BH") > p0])))])) |
|
92 |
+ } |
|
93 |
+ index_all <- rep(list(NA)) |
|
94 |
+ for (i in seq_len(2)) |
|
95 |
+ { |
|
96 |
+ u <- HOSVD$U[[i + 1]][, input_all] |
|
97 |
+ sd <- optim(de[i], function(x) { |
|
98 |
+ th(x, breaks, p0) |
|
99 |
+ }, |
|
100 |
+ control = list(warn.1d.NelderMead = FALSE) |
|
101 |
+ )$par |
|
102 |
+ sd1 <- seq(0.1 * sd, 2 * sd, by = 0.1 * sd) |
|
103 |
+ th0 <- apply(matrix(sd1, ncol = 1), 1, function(x) { |
|
104 |
+ th(x, breaks, p0) |
|
105 |
+ }) |
|
106 |
+ P2 <- pchisq((u / sd)^2, 1, lower.tail = FALSE) |
|
107 |
+ ui <- fluidPage( |
|
108 |
+ sidebarLayout( |
|
109 |
+ sidebarPanel( |
|
110 |
+ actionButton(inputId = "action", label = "Next") |
|
111 |
+ ), |
|
112 |
+ mainPanel( |
|
113 |
+ plotOutput("plot") |
|
114 |
+ ) |
|
115 |
+ ) |
|
116 |
+ ) |
|
117 |
+ server <- function(input, output) { |
|
118 |
+ observeEvent(input$action, { |
|
119 |
+ stopApp() |
|
120 |
+ }) |
|
121 |
+ output$plot <- renderPlot({ |
|
122 |
+ input$action |
|
123 |
+ par(mfrow = c(1, 2)) |
|
124 |
+ plot(sd1, th0, type = "o") |
|
125 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
126 |
+ hist(1 - P2, breaks = breaks) |
|
127 |
+ par(mfrow = c(1, 1)) |
|
128 |
+ }) |
|
129 |
+ } |
|
130 |
+ app <- shinyApp(ui, server) |
|
131 |
+ if (interact) { |
|
132 |
+ runApp(app) |
|
133 |
+ } else { |
|
134 |
+ par(mfrow = c(1, 2)) |
|
135 |
+ plot(sd1, th0, type = "o") |
|
136 |
+ arrows(sd, max(th0), sd, min(th0), col = 2) |
|
137 |
+ hist(1 - P2, breaks = breaks) |
|
138 |
+ par(mfrow = c(1, 1)) |
|
113 | 139 |
} |
114 |
-} |
|
115 | 140 |
\ No newline at end of file |
141 |
+ |
|
142 |
+ index <- p.adjust(P2, "BH") < p0 |
|
143 |
+ index_all[[i]] <- list(index = index, p.value = P2) |
|
144 |
+ } |
|
145 |
+ return(index_all) |
|
146 |
+}} |
|
116 | 147 |
\ No newline at end of file |
... | ... |
@@ -1,7 +1,7 @@ |
1 |
-#' Title Convert SVD to that for the case where |
|
1 |
+#' @title Convert SVD to that for the case where |
|
2 | 2 |
#' samples are shared between two matrices |
3 | 3 |
#' |
4 |
-#' @param SVD inout SVD object genetated from computeSVD function |
|
4 |
+#' @param SVD input SVD object generated from computeSVD function |
|
5 | 5 |
#' |
6 | 6 |
#' @return converted SVD objects |
7 | 7 |
#' @export |
... | ... |
@@ -11,12 +11,15 @@ |
11 | 11 |
#' matrix2 <- matrix(runif(400),20) |
12 | 12 |
#' SVD <- computeSVD(matrix1,matrix2) |
13 | 13 |
#' SVD <- transSVD(SVD) |
14 |
-transSVD <- function(SVD){ |
|
15 |
- u0 <- SVD$SVD$u |
|
16 |
- v0 <- SVD$SVD$v |
|
17 |
- SVD$SVD$u <- scale(SVD$u) |
|
18 |
- SVD$SVD$v <- scale(SVD$v) |
|
19 |
- SVD$u <- scale(u0) |
|
20 |
- SVD$v <- scale(v0) |
|
21 |
- return(SVD) |
|
14 |
+transSVD <- function(SVD) { |
|
15 |
+ # Augument check |
|
16 |
+ stopifnot("`SVD` must be a list." = is.list(SVD)) |
|
17 |
+ # |
|
18 |
+ u0 <- SVD$SVD$u |
|
19 |
+ v0 <- SVD$SVD$v |
|
20 |
+ SVD$SVD$u <- scale(SVD$u) |
|
21 |
+ SVD$SVD$v <- scale(SVD$v) |
|
22 |
+ SVD$u <- scale(u0) |
|
23 |
+ SVD$v <- scale(v0) |
|
24 |
+ return(SVD) |
|
22 | 25 |
} |
23 | 26 |
\ No newline at end of file |
24 | 27 |
deleted file mode 100644 |
... | ... |
@@ -1,71 +0,0 @@ |
1 |
-output: github_document |
|
2 |
- |
|
3 |
-<!-- README.md is generated from README.Rmd. Please edit that file --> |
|
4 |
- |
|
5 |
-```{r, include = FALSE} |
|
6 |
-knitr::opts_chunk$set( |
|
7 |
- collapse = TRUE, |
|
8 |
- comment = "#>", |
|
9 |
- fig.path = "man/figures/README-", |
|
10 |
- out.width = "100%" |
|
11 |
-) |
|
12 |
-``` |
|
13 |
- |
|
14 |
-# TDbasedUFEadv |
|
15 |
- |
|
16 |
-<!-- badges: start --> |
|
17 |
-<!-- badges: end --> |
|
18 |
- |
|
19 |
-The goal of TDbasedUFEadv is to ... |
|
20 |
- |
|
21 |
-## Installation |
|
22 |
- |
|
23 |
-## Installation |
|
24 |
- |
|
25 |
-``` {r, eval = FALSE} |
|
26 |
-if (!require("BiocManager", quietly = TRUE)) |
|
27 |
- install.packages("BiocManager") |
|
28 |
-BiocManager::install("TDbasedUFEadv") |
|
29 |
-``` |
|
30 |
- |
|
31 |
-You can install the latest release of TDbasedUFEadv from [GitHub](https://github.com/) with: |
|
32 |
- |
|
33 |
-``` r |
|
34 |
-# install.packages("devtools") |
|
35 |
-devtools::install_github("tagtag/[email protected]") |
|
36 |
-``` |
|
37 |
- |
|
38 |
-You can install the development version of TDbasedUFEadv from [GitHub](https://github.com/) with: |
|
39 |
- |
|
40 |
-``` r |
|
41 |
-# install.packages("devtools") |
|
42 |
-devtools::install_github("tagtag/TDbasedUFEadv",build_vignettes = TRUE) |
|
43 |
-``` |
|
44 |
- |
|
45 |
-or if it does not work, try |
|
46 |
- |
|
47 |
-``` r |
|
48 |
-# install.packages("devtools") |
|
49 |
-devtools::install_github("tagtag/TDbasedUFEadv") |
|
50 |
-``` |
|
51 |
- |
|
52 |
-## Introduction |
|
53 |
- |
|
54 |
-It is an advanced version of [TDbasedUFE](https://github.com/tagtag/TDbasedUFE/releases/tag/v0.1.0). Thus people who would like to use TDbasedUFEadv, please first install and check TDbasedUFE. |
|
55 |
- |
|
56 |
- |
|
57 |
-## Vignettes |
|
58 |
- |
|
59 |
-How to use it |
|
60 |
- |
|
61 |
-vignette("QuickStart") |
|
62 |
- |
|
63 |
-vignette("QuickStart2") |
|
64 |
- |
|
65 |
-vignette("Enrichment") |
|
66 |
- |
|
67 |
-For more theoretical background |
|
68 |
- |
|
69 |
-vignette("TDbasedUFEadv") |
... | ... |
@@ -10,7 +10,7 @@ The goal of TDbasedUFEadv is to … |
10 | 10 |
|
11 | 11 |
## Installation |
12 | 12 |
|
13 |
-## Installation |
|
13 |
+ |
|
14 | 14 |
|
15 | 15 |
``` r |
16 | 16 |
if (!require("BiocManager", quietly = TRUE)) |
... | ... |
@@ -44,13 +44,13 @@ devtools::install_github("tagtag/TDbasedUFEadv") |
44 | 44 |
## Introduction |
45 | 45 |
|
46 | 46 |
It is an advanced version of |
47 |
-[TDbasedUFE](https://github.com/tagtag/TDbasedUFE/releases/tag/v0.1.0). |
|
47 |
+[TDbasedUFE](https://bioconductor.org/packages/devel/bioc/html/TDbasedUFE.html). |
|
48 | 48 |
Thus people who would like to use TDbasedUFEadv, please first install |
49 | 49 |
and check TDbasedUFE. |
50 | 50 |
|
51 | 51 |
## Vignettes |
52 | 52 |
|
53 |
-How to use it |
|
53 |
+How to use it. |
|
54 | 54 |
|
55 | 55 |
vignette(“QuickStart”) |
56 | 56 |
|
57 | 57 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,5 @@ |
1 |
+drug_response.txt is included in |
|
2 |
+http://lifeome.net/supp/drug_response/ |
|
3 |
+First, download |
|
4 |
+http://lifeome.net/supp/drug_response/response.zip |
|
5 |
+and unzip. Then you can get drug_response.txt. |
|
0 | 6 |
\ No newline at end of file |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/init.R |
|
2 |
+% Please edit documentation in R/AllClasses.R |
|
3 | 3 |
\docType{class} |
4 | 4 |
\name{SummarizedExperimentTensorRect-class} |
5 | 5 |
\alias{SummarizedExperimentTensorRect-class} |
6 |
-\title{Title} |
|
6 |
+\title{Class definitions} |
|
7 | 7 |
\description{ |
8 |
-Title |
|
8 |
+Class definitions |
|
9 | 9 |
} |
10 | 10 |
\section{Slots}{ |
11 | 11 |
|
... | ... |
@@ -6,7 +6,15 @@ |
6 | 6 |
\alias{TDbasedUFEadv-package} |
7 | 7 |
\title{TDbasedUFEadv: TDbasedUFEadv} |
8 | 8 |
\description{ |
9 |
-This is a comprehensive package to perform Tensor decomposition based unsupervised feature extraction. It can perform unsupervised feature extraction. It uses tensor decompission. It is applicable to gene expression, DNA methylation, and histone modification etc. It can perform multiomics analysis. It is also applicable to single cell omics data sets. |
|
9 |
+This is a comprehensive package to perform Tensor decomposition based unsupervised feature extraction. It can perform unsupervised feature extraction. It uses tensor decomposition. It is applicable to gene expression, DNA methylation, and histone modification etc. It can perform multiomics analysis. It is also applicable to single cell omics data sets. |
|
10 |
+} |
|
11 |
+\seealso{ |
|
12 |
+Useful links: |
|
13 |
+\itemize{ |
|
14 |
+ \item \url{https://github.com/tagtag/TDbasedUFEadv} |
|
15 |
+ \item Report bugs at \url{https://github.com/tagtag/TDbasedUFEadv/issues} |
|
16 |
+} |
|
17 |
+ |
|
10 | 18 |
} |
11 | 19 |
\author{ |
12 | 20 |
\strong{Maintainer}: Taguchi Y-h. \email{[email protected]} (\href{https://orcid.org/0000-0003-0867-8986}{ORCID}) |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Title Perform SVD toward reduced matrix generated from a tensor with |
6 | 6 |
partial summation} |
7 | 7 |
\usage{ |
8 |
-computeSVD(matrix1, matrix2, dim = 10, scale = TRUE) |
|
8 |
+computeSVD(matrix1, matrix2, dim = as.integer(10), scale = TRUE) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{matrix1}{The first original matrix that generates a tensor} |
... | ... |
@@ -18,7 +18,7 @@ computeSVD(matrix1, matrix2, dim = 10, scale = TRUE) |
18 | 18 |
} |
19 | 19 |
\value{ |
20 | 20 |
Singular value vectors attributed to two sets of objects associated |
21 |
-with eingular value vectors attriuted to features, by multiplying |
|
21 |
+with singular value vectors attributed to features, by multiplying |
|
22 | 22 |
} |
23 | 23 |
\description{ |
24 | 24 |
Title Perform SVD toward reduced matrix generated from a tensor with |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/prepareCondDrugandDisease.R |
3 | 3 |
\name{prepareCondDrugandDisease} |
4 | 4 |
\alias{prepareCondDrugandDisease} |
5 |
-\title{Title Prepare condition matrix for expDrug} |
|
5 |
+\title{Prepare condition matrix for expDrug} |
|
6 | 6 |
\usage{ |
7 | 7 |
prepareCondDrugandDisease(expDrug) |
8 | 8 |
} |
... | ... |
@@ -13,16 +13,13 @@ prepareCondDrugandDisease(expDrug) |
13 | 13 |
Condition matrix for expDrug |
14 | 14 |
} |
15 | 15 |
\description{ |
16 |
-Title Prepare condition matrix for expDrug |
|
16 |
+Prepare condition matrix for expDrug |
|
17 | 17 |
} |
18 | 18 |
\examples{ |
19 | 19 |
\donttest{ |
20 | 20 |
require(RTCGA.rnaseq) |
21 |
-LIST <- list(ACC.rnaseq, |
|
22 |
- BLCA.rnaseq, |
|
23 |
- BRCA.rnaseq) |
|
24 |
-dummy <- prepareexpDrugandDisease(LIST) |
|
25 |
-expDrug <- dummy[[1]] |
|
26 |
-Cond <- prepareCondDrugandDisease(expDrug) |
|
21 |
+Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
22 |
+Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
23 |
+Cond <- prepareCondDrugandDisease(Drug_and_Disease$expDrug) |
|
27 | 24 |
} |
28 | 25 |
} |
... | ... |
@@ -2,35 +2,45 @@ |
2 | 2 |
% Please edit documentation in R/prepareCondTCGA.R |
3 | 3 |
\name{prepareCondTCGA} |
4 | 4 |
\alias{prepareCondTCGA} |
5 |
-\title{Title Prepare Sample label for TCGA data} |
|
5 |
+\title{Prepare Sample label for TCGA data} |
|
6 | 6 |
\usage{ |
7 |
-prepareCondTCGA(Multi_sample, Clinical, k, j) |
|
7 |
+prepareCondTCGA( |
|
8 |
+ Multi_sample, |
|
9 |
+ Clinical, |
|
10 |
+ ID_column_of_Multi_sample, |
|
11 |
+ ID_column_of_Clinical |
|
12 |
+) |
|
8 | 13 |
} |
9 | 14 |
\arguments{ |
10 | 15 |
\item{Multi_sample}{list of sample ids} |
11 | 16 |
|
12 | 17 |
\item{Clinical}{List of clinical data matrix from RTCGA.clinical} |
13 | 18 |
|
14 |
-\item{k}{Column numbers used for conditions} |
|
19 |
+\item{ID_column_of_Multi_sample}{Column numbers used for conditions} |
|
15 | 20 |
|
16 |
-\item{j}{Column numbers that include corresponding sample ids |
|
17 |
-in clinical data} |
|
21 |
+\item{ID_column_of_Clinical}{Column numbers that include corresponding |
|
22 |
+sample ids in clinical data} |
|
18 | 23 |
} |
19 | 24 |
\value{ |
20 | 25 |
list of sample labels |
21 | 26 |
} |
22 | 27 |
\description{ |
23 |
-Title Prepare Sample label for TCGA data |
|
28 |
+Prepare Sample label for TCGA data |
|
24 | 29 |
} |
25 | 30 |
\examples{ |
26 | 31 |
require(RTCGA.clinical) |
27 | 32 |
require(RTCGA.rnaseq) |
28 |
-Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
29 |
-Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
30 |
- BRCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
31 |
- CESC.rnaseq[seq_len(100),1,drop=FALSE], |
|
32 |
- COAD.rnaseq[seq_len(100),1,drop=FALSE]) |
|
33 |
-k <- c(770,1482,773,791) |
|
34 |
-j <- c(20,20,12,14) |
|
35 |
-cond <- prepareCondTCGA(Multi_sample,Clinical,k,j) |
|
33 |
+Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical) |
|
34 |
+Multi_sample <- list( |
|
35 |
+ BLCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
36 |
+ BRCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
37 |
+ CESC.rnaseq[seq_len(100), 1, drop = FALSE], |
|
38 |
+ COAD.rnaseq[seq_len(100), 1, drop = FALSE] |
|
39 |
+) |
|
40 |
+ID_column_of_Multi_sample <- c(770, 1482, 773, 791) |
|
41 |
+ID_column_of_Clinical <- c(20, 20, 12, 14) |
|
42 |
+cond <- prepareCondTCGA( |
|
43 |
+ Multi_sample, Clinical, |
|
44 |
+ ID_column_of_Multi_sample, ID_column_of_Clinical |
|
45 |
+) |
|
36 | 46 |
} |
37 | 47 |
similarity index 59% |
38 | 48 |
rename from man/PrepareSummarizedExperimentTensorRect.Rd |
39 | 49 |
rename to man/prepareSummarizedExperimentTensorRect.Rd |
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/PrepareSummarizedExperimentTensorRect.R |
|
3 |
-\name{PrepareSummarizedExperimentTensorRect} |
|
4 |
-\alias{PrepareSummarizedExperimentTensorRect} |
|
5 |
-\title{Title Prepare tensor generated from two matricies that share samples} |
|
2 |
+% Please edit documentation in R/prepareSummarizedExperimentTensorRect.R |
|
3 |
+\name{prepareSummarizedExperimentTensorRect} |
|
4 |
+\alias{prepareSummarizedExperimentTensorRect} |
|
5 |
+\title{Prepare tensor generated from two matrices that share samples} |
|
6 | 6 |
\usage{ |
7 |
-PrepareSummarizedExperimentTensorRect( |
|
7 |
+prepareSummarizedExperimentTensorRect( |
|
8 | 8 |
sample, |
9 | 9 |
feature, |
10 | 10 |
value, |
... | ... |
@@ -13,7 +13,7 @@ PrepareSummarizedExperimentTensorRect( |
13 | 13 |
) |
14 | 14 |
} |
15 | 15 |
\arguments{ |
16 |
-\item{sample}{Chracter vecor of sample names} |
|
16 |
+\item{sample}{Character vector of sample names} |
|
17 | 17 |
|
18 | 18 |
\item{feature}{list of features from two matrices} |
19 | 19 |
|
... | ... |
@@ -24,16 +24,16 @@ PrepareSummarizedExperimentTensorRect( |
24 | 24 |
\item{sampleData}{List of conditional labeling associated with samples} |
25 | 25 |
} |
26 | 26 |
\value{ |
27 |
-Tensor generated from two matricies that share samples |
|
27 |
+Tensor generated from two matrices that share samples |
|
28 | 28 |
} |
29 | 29 |
\description{ |
30 |
-Title Prepare tensor generated from two matricies that share samples |
|
30 |
+Prepare tensor generated from two matrices that share samples |
|
31 | 31 |
} |
32 | 32 |
\examples{ |
33 | 33 |
matrix1 <- matrix(runif(10000),200) #row features, column samples |
34 | 34 |
matrix2 <- matrix(runif(20000),400) #row features, column samples |
35 | 35 |
Z <- prepareTensorfromMatrix(t(matrix1),t(matrix2)) |
36 |
-Z <- PrepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
36 |
+Z <- prepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
37 | 37 |
feature=list(as.character(seq_len(200)),as.character(seq_len(400))), |
38 | 38 |
sampleData=list(rep(seq_len(2),each=25)),value=Z) |
39 | 39 |
} |
... | ... |
@@ -2,25 +2,25 @@ |
2 | 2 |
% Please edit documentation in R/prepareTensorfromList.R |
3 | 3 |
\name{prepareTensorfromList} |
4 | 4 |
\alias{prepareTensorfromList} |
5 |
-\title{Title Prepare tensor from a list that includes multiple profiles} |
|
5 |
+\title{Prepare tensor from a list that includes multiple profiles} |
|
6 | 6 |
\usage{ |
7 |
-prepareTensorfromList(Multi, L) |
|
7 |
+prepareTensorfromList(Multi, proj_dim) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{Multi}{a list that includes multiple profiles} |
11 | 11 |
|
12 |
-\item{L}{the number of projection dimensions} |
|
12 |
+\item{proj_dim}{the number of projection dimensions} |
|
13 | 13 |
} |
14 | 14 |
\value{ |
15 | 15 |
a tensor as a bundle of singular value vectors obtained by |
16 | 16 |
applying SVD to individual omics |
17 | 17 |
} |
18 | 18 |
\description{ |
19 |
-Title Prepare tensor from a list that includes multiple profiles |
|
19 |
+Prepare tensor from a list that includes multiple profiles |
|
20 | 20 |
} |
21 | 21 |
\examples{ |
22 | 22 |
require(MOFAdata) |
23 | 23 |
data("CLL_data") |
24 | 24 |
data("CLL_covariates") |
25 |
-Z <- prepareTensorfromList(CLL_data,10) |
|
25 |
+Z <- prepareTensorfromList(CLL_data,as.integer(10)) |
|
26 | 26 |
} |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/prepareTensorfromMatrix.R |
3 | 3 |
\name{prepareTensorfromMatrix} |
4 | 4 |
\alias{prepareTensorfromMatrix} |
5 |
-\title{Title Generate tensor from two matricies} |
|
5 |
+\title{Generate tensor from two matrices} |
|
6 | 6 |
\usage{ |
7 | 7 |
prepareTensorfromMatrix(matrix1, matrix2) |
8 | 8 |
} |
... | ... |
@@ -15,7 +15,7 @@ prepareTensorfromMatrix(matrix1, matrix2) |
15 | 15 |
A tensor generated from the first and second matricies |
16 | 16 |
} |
17 | 17 |
\description{ |
18 |
-Title Generate tensor from two matricies |
|
18 |
+Generate tensor from two matrices |
|
19 | 19 |
} |
20 | 20 |
\examples{ |
21 | 21 |
Z <- prepareTensorfromMatrix(matrix(runif(100),10),matrix(runif(100),10)) |
... | ... |
@@ -2,27 +2,26 @@ |
2 | 2 |
% Please edit documentation in R/prepareexpDrugandDisease.R |
3 | 3 |
\name{prepareexpDrugandDisease} |
4 | 4 |
\alias{prepareexpDrugandDisease} |
5 |
-\title{Title Generating gene expression of drug treated cell lines and a disease |
|
5 |
+\title{Generating gene expression of drug treated cell lines and a disease |
|
6 | 6 |
cell line} |
7 | 7 |
\usage{ |
8 |
-prepareexpDrugandDisease(LIST) |
|
8 |
+prepareexpDrugandDisease(Cancer_cell_lines) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{LIST}{list that includes indivisual data set from RTCGA.rnaseq} |
|
11 |
+\item{Cancer_cell_lines}{<- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
12 |
+list that includes individual data set from RTCGA.rnaseq} |
|
12 | 13 |
} |
13 | 14 |
\value{ |
14 | 15 |
list of expDrug and expDisease |
15 | 16 |
} |
16 | 17 |
\description{ |
17 |
-Title Generating gene expression of drug treated cell lines and a disease |
|
18 |
+Generating gene expression of drug treated cell lines and a disease |
|
18 | 19 |
cell line |
19 | 20 |
} |
20 | 21 |
\examples{ |
21 | 22 |
\donttest{ |
22 | 23 |
require(RTCGA.rnaseq) |
23 |
-LIST <- list(ACC.rnaseq, |
|
24 |
- BLCA.rnaseq, |
|
25 |
- BRCA.rnaseq) |
|
26 |
-dummy <- prepareexpDrugandDisease(LIST) |
|
24 |
+Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
25 |
+Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
27 | 26 |
} |
28 | 27 |
} |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/selectFeatureProj.R |
3 | 3 |
\name{selectFeatureProj} |
4 | 4 |
\alias{selectFeatureProj} |
5 |
-\title{Title Select feature when projection strategy is employed for the |
|
5 |
+\title{Select feature when projection strategy is employed for the |
|
6 | 6 |
case where features are shared with multiple omics profiles} |
7 | 7 |
\usage{ |
8 | 8 |
selectFeatureProj( |
... | ... |
@@ -11,7 +11,7 @@ selectFeatureProj( |
11 | 11 |
cond, |
12 | 12 |
de = 1e-04, |
13 | 13 |
p0 = 0.01, |
14 |
- breaks = 100, |
|
14 |
+ breaks = as.integer(100), |
|
15 | 15 |
input_all = NULL |
16 | 16 |
) |
17 | 17 |
} |
... | ... |
@@ -22,27 +22,27 @@ selectFeatureProj( |
22 | 22 |
|
23 | 23 |
\item{cond}{list of conditions for individual omics profiles} |
24 | 24 |
|
25 |
-\item{de}{initial value for optimization of statdard deviation} |
|
25 |
+\item{de}{initial value for optimization of standard deviation} |
|
26 | 26 |
|
27 | 27 |
\item{p0}{Threshold P-value} |
28 | 28 |
|
29 | 29 |
\item{breaks}{The number of bins of histogram of P-values} |
30 | 30 |
|
31 |
-\item{input_all}{The number of selected feature. if null, intearactive mode |
|
31 |
+\item{input_all}{The number of selected feature. if null, interactive mode |
|
32 | 32 |
is activated} |
33 | 33 |
} |
34 | 34 |
\value{ |
35 | 35 |
list composed of logical vector that represent which features are selected and p-values |
36 | 36 |
} |
37 | 37 |
\description{ |
38 |
-Title Select feature when projection strategy is employed for the |
|
38 |
+Select feature when projection strategy is employed for the |
|
39 | 39 |
case where features are shared with multiple omics profiles |
40 | 40 |
} |
41 | 41 |
\examples{ |
42 | 42 |
require(TDbasedUFE) |
43 | 43 |
Multi <- list(matrix(runif(1000),10),matrix(runif(1000),10), |
44 | 44 |
matrix(runif(1000),10),matrix(runif(1000),10)) |
45 |
-Z <- prepareTensorfromList(Multi,10) |
|
45 |
+Z <- prepareTensorfromList(Multi,as.integer(10)) |
|
46 | 46 |
Z <- aperm(Z,c(2,1,3)) |
47 | 47 |
Z <- PrepareSummarizedExperimentTensor(feature =as.character(1:10), |
48 | 48 |
sample=array("",1),value=Z) |
... | ... |
@@ -2,19 +2,19 @@ |
2 | 2 |
% Please edit documentation in R/selectFeatureRect.R |
3 | 3 |
\name{selectFeatureRect} |
4 | 4 |
\alias{selectFeatureRect} |
5 |
-\title{Title Select features through the selection of singular value vectors} |
|
5 |
+\title{Select features through the selection of singular value vectors} |
|
6 | 6 |
\usage{ |
7 | 7 |
selectFeatureRect( |
8 | 8 |
SVD, |
9 | 9 |
cond, |
10 | 10 |
de = rep(1e-04, 2), |
11 | 11 |
p0 = 0.01, |
12 |
- breaks = 100, |
|
12 |
+ breaks = as.integer(100), |
|
13 | 13 |
input_all = NULL |
14 | 14 |
) |
15 | 15 |
} |
16 | 16 |
\arguments{ |
17 |
-\item{SVD}{SVD compued from matrix generated by partial summation of a tensor} |
|
17 |
+\item{SVD}{SVD computed from matrix generated by partial summation of a tensor} |
|
18 | 18 |
|
19 | 19 |
\item{cond}{Condition to select singular value vectors} |
20 | 20 |
|
... | ... |
@@ -22,17 +22,17 @@ selectFeatureRect( |
22 | 22 |
|
23 | 23 |
\item{p0}{Threshold value for the significance} |
24 | 24 |
|
25 |
-\item{breaks}{Number of bins of hitogram of P-values} |
|
25 |
+\item{breaks}{Number of bins of histogram of P-values} |
|
26 | 26 |
|
27 | 27 |
\item{input_all}{The ID of selected singular value vectors. If it is null, |
28 | 28 |
interactive mode is activated.} |
29 | 29 |
} |
30 | 30 |
\value{ |
31 |
-List of lists that inlcudes P-vales as well as if individual |
|
31 |
+List of lists that includes P-vales as well as if individual |
|
32 | 32 |
features selected. |
33 | 33 |
} |
34 | 34 |
\description{ |
35 |
-Title Select features through the selection of singular value vectors |
|
35 |
+Select features through the selection of singular value vectors |
|
36 | 36 |
} |
37 | 37 |
\examples{ |
38 | 38 |
set.seed(0) |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/selectFeatureTransRect.R |
3 | 3 |
\name{selectFeatureTransRect} |
4 | 4 |
\alias{selectFeatureTransRect} |
5 |
-\title{Title Select features for a tensor generated from two matrices that |
|
5 |
+\title{Select features for a tensor generated from two matrices that |
|
6 | 6 |
share samples.} |
7 | 7 |
\usage{ |
8 | 8 |
selectFeatureTransRect( |
... | ... |
@@ -10,7 +10,7 @@ selectFeatureTransRect( |
10 | 10 |
cond, |
11 | 11 |
de = rep(1e-04, 2), |
12 | 12 |
p0 = 0.01, |
13 |
- breaks = 100, |
|
13 |
+ breaks = as.integer(100), |
|
14 | 14 |
input_all = NULL |
15 | 15 |
) |
16 | 16 |
} |
... | ... |
@@ -33,7 +33,7 @@ list of logical vector that represent if the individual features |
33 | 33 |
are selected and P-values. |
34 | 34 |
} |
35 | 35 |
\description{ |
36 |
-Title Select features for a tensor generated from two matrices that |
|
36 |
+Select features for a tensor generated from two matrices that |
|
37 | 37 |
share samples. |
38 | 38 |
} |
39 | 39 |
\examples{ |
... | ... |
@@ -41,7 +41,7 @@ require(TDbasedUFE) |
41 | 41 |
matrix1 <- matrix(runif(10000),200) #row features, column samples |
42 | 42 |
matrix2 <- matrix(runif(20000),400) #row features, column samples |
43 | 43 |
Z <- prepareTensorfromMatrix(t(matrix1),t(matrix2)) |
44 |
-Z <- PrepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
44 |
+Z <- prepareSummarizedExperimentTensorRect(sample=as.character(seq_len(50)), |
|
45 | 45 |
feature=list(as.character(seq_len(200)),as.character(seq_len(400))), |
46 | 46 |
sampleData=list(rep(seq_len(2),each=25)),value=Z) |
47 | 47 |
HOSVD <- computeHosvd(Z) |
... | ... |
@@ -2,19 +2,19 @@ |
2 | 2 |
% Please edit documentation in R/transSVD.R |
3 | 3 |
\name{transSVD} |
4 | 4 |
\alias{transSVD} |
5 |
-\title{Title Convert SVD to that for the case where |
|
5 |
+\title{Convert SVD to that for the case where |
|
6 | 6 |
samples are shared between two matrices} |
7 | 7 |
\usage{ |
8 | 8 |
transSVD(SVD) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{SVD}{inout SVD object genetated from computeSVD function} |
|
11 |
+\item{SVD}{input SVD object generated from computeSVD function} |
|
12 | 12 |
} |
13 | 13 |
\value{ |
14 | 14 |
converted SVD objects |
15 | 15 |
} |
16 | 16 |
\description{ |
17 |
-Title Convert SVD to that for the case where |
|
17 |
+Convert SVD to that for the case where |
|
18 | 18 |
samples are shared between two matrices |
19 | 19 |
} |
20 | 20 |
\examples{ |
... | ... |
@@ -1,14 +1,8 @@ |
1 |
-test_that("multiplication works", { |
|
2 |
- expect_equal(2 * 2, 4) |
|
3 |
-}) |
|
4 | 1 |
require(RTCGA.rnaseq) |
5 |
-LIST <- list(ACC.rnaseq, |
|
6 |
- BLCA.rnaseq, |
|
7 |
- BRCA.rnaseq, |
|
8 |
- CESC.rnaseq) |
|
9 |
-dummy <- prepareexpDrugandDisease(LIST) |
|
10 |
-expDrug <- dummy[[1]] |
|
11 |
-expDisease <- dummy[[2]] |
|
2 |
+Cancer_cell_lines <- list(ACC.rnaseq,BLCA.rnaseq,BRCA.rnaseq) |
|
3 |
+Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
4 |
+expDrug <- Drug_and_Disease$expDrug |
|
5 |
+expDisease <- Drug_and_Disease$expDisease |
|
12 | 6 |
require(Biobase) |
13 | 7 |
Z <- prepareTensorfromMatrix(exprs(expDrug[seq_len(20),seq_len(10)]), |
14 | 8 |
exprs(expDisease[seq_len(20),seq_len(10)])) |
... | ... |
@@ -25,7 +19,7 @@ data("CLL_data") |
25 | 19 |
data("CLL_covariates") |
26 | 20 |
Z <- prepareTensorfromMatrix(t(CLL_data$Drugs[seq_len(200),seq_len(50)]), |
27 | 21 |
t(CLL_data$Methylation[seq_len(200),seq_len(50)])) |
28 |
-Z <- PrepareSummarizedExperimentTensorRect( |
|
22 |
+Z <- prepareSummarizedExperimentTensorRect( |
|
29 | 23 |
sample=colnames(CLL_data$Drugs)[seq_len(50)], |
30 | 24 |
feature=list(Drugs=rownames(CLL_data$Drugs)[seq_len(200)], |
31 | 25 |
Methylatiion=rownames(CLL_data$Methylation)[seq_len(200)]), |
... | ... |
@@ -37,24 +31,24 @@ index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01), |
37 | 31 |
input_all=8) #batch mode |
38 | 32 |
|
39 | 33 |
|
40 |
-SVD <- computeSVD(exprs(expDrug),exprs(expDisease)) |
|
41 |
-Z <- t(exprs(expDrug)) %*% exprs(expDisease) |
|
42 |
-sample<- outer(colnames(expDrug),colnames(expDisease), |
|
43 |
- function(x,y){paste(x,y)}) |
|
44 |
-Z <- PrepareSummarizedExperimentTensor(sample=sample, |
|
45 |
- feature=rownames(expDrug),value=Z) |
|
34 |
+SVD <- computeSVD(t(CLL_data$Drugs),t(CLL_data$Methylation)) |
|
35 |
+Z <-CLL_data$Drugs %*%t(CLL_data$Methylation) |
|
36 |
+sample<- colnames(CLL_data$Methylation) |
|
37 |
+Z <- prepareSummarizedExperimentTensorRect(sample=sample, |
|
38 |
+ feature=list(rownames(CLL_data$Drugs),rownames(CLL_data$Methylation)), |
|
39 |
+ value=array(NA,dim(Z)),sampleData=list(CLL_covariates[,1])) |
|
46 | 40 |
cond <- list(NULL,attr(Z,"sampleData")[[1]],attr(Z,"sampleData")[[1]]) |
47 | 41 |
SVD <- transSVD(SVD) |
48 | 42 |
index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) #batch mode |
49 | 43 |
|
50 |
-Z <- prepareTensorfromList(CLL_data,10) |
|
44 |
+Z <- prepareTensorfromList(CLL_data,as.integer(10)) |
|
51 | 45 |
|
52 | 46 |
Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
53 | 47 |
BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
54 | 48 |
CESC.rnaseq[seq_len(100),1+seq_len(1000)], |
55 | 49 |
COAD.rnaseq[seq_len(100),1+seq_len(1000)]) |
56 | 50 |
|
57 |
-Z <- prepareTensorfromList(Multi,10) |
|
51 |
+Z <- prepareTensorfromList(Multi,as.integer(10)) |
|
58 | 52 |
Z <- aperm(Z,c(2,1,3)) |
59 | 53 |
|
60 | 54 |
require(RTCGA.clinical) |
... | ... |
@@ -64,13 +58,15 @@ Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=FALSE], |
64 | 58 |
CESC.rnaseq[seq_len(100),1,drop=FALSE], |
65 | 59 |
COAD.rnaseq[seq_len(100),1,drop=FALSE]) |
66 | 60 |
#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
67 |
-k <- c(770,1482,773,791) |
|
61 |
+ID_column_of_Multi_sample <- c(770,1482,773,791) |
|
68 | 62 |
#patient.bcr_patient_barcode |
69 |
-j <- c(20,20,12,14) |
|
63 |
+ID_column_of_Clinical <- c(20,20,12,14) |
|
70 | 64 |
Z <- PrepareSummarizedExperimentTensor( |
71 | 65 |
feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
72 | 66 |
sample=array("",1),value=Z, |
73 |
- sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j)) |
|
67 |
+ sampleData=prepareCondTCGA(Multi_sample, |
|
68 |
+ Clinical,ID_column_of_Multi_sample, |
|
69 |
+ ID_column_of_Clinical)) |
|
74 | 70 |
HOSVD <- computeHosvd(Z) |
75 | 71 |
cond<- attr(Z,"sampleData") |
76 | 72 |
index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode |
... | ... |
@@ -21,6 +21,7 @@ BiocStyle::markdown() |
21 | 21 |
```{r, include = FALSE} |
22 | 22 |
knitr::opts_chunk$set( |
23 | 23 |
collapse = TRUE, |
24 |
+ crop = NULL, |
|
24 | 25 |
comment = "#>" |
25 | 26 |
) |
26 | 27 |
``` |
... | ... |
@@ -30,6 +31,12 @@ knitr::opts_chunk$set( |
30 | 31 |
```{r setup} |
31 | 32 |
library(TDbasedUFE) |
32 | 33 |
library(TDbasedUFEadv) |
34 |
+library(DOSE) |
|
35 |
+library(enrichplot) |
|
36 |
+library(RTCGA.rnaseq) |
|
37 |
+library(RTCGA.clinical) |
|
38 |
+library(enrichR) |
|
39 |
+library(STRINGdb) |
|
33 | 40 |
``` |
34 | 41 |
|
35 | 42 |
# Introduction |
... | ... |
@@ -37,36 +44,44 @@ library(TDbasedUFEadv) |
37 | 44 |
It might be helpful to demonstrate how to evaluate selected genes by enrichment analysis. Here, we show some of useful tools applied to the output from TDbasedUFEadv |
38 | 45 |
In order foe this, we reproduce one example in QuickStart2 as follows. |
39 | 46 |
|
40 |
-``` {r, fig.keep="none"} |
|
41 |
-require(RTCGA.rnaseq) |
|
42 |
-Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
43 |
- BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
44 |
- CESC.rnaseq[seq_len(100),1+seq_len(1000)], |
|
45 |
- COAD.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
46 |
-Z <- prepareTensorfromList(Multi,10) |
|
47 |
-Z <- aperm(Z,c(2,1,3)) |
|
48 |
-require(RTCGA.clinical) |
|
49 |
-Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
50 |
-Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
51 |
- BRCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
52 |
- CESC.rnaseq[seq_len(100),1,drop=FALSE], |
|
53 |
- COAD.rnaseq[seq_len(100),1,drop=FALSE]) |
|
54 |
-#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
55 |
-k <- c(770,1482,773,791) |
|
56 |
-#patient.bcr_patient_barcode |
|
57 |
-j <- c(20,20,12,14) |
|
47 |
+``` {r} |
|
48 |
+Multi <- list( |
|
49 |
+ BLCA.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
50 |
+ BRCA.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
51 |
+ CESC.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
52 |
+ COAD.rnaseq[seq_len(100), 1 + seq_len(1000)] |
|
53 |
+) |
|
54 |
+Z <- prepareTensorfromList(Multi, as.integer(10)) |
|
55 |
+Z <- aperm(Z, c(2, 1, 3)) |
|
56 |
+Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical) |
|
57 |
+Multi_sample <- list( |
|
58 |
+ BLCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
59 |
+ BRCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
60 |
+ CESC.rnaseq[seq_len(100), 1, drop = FALSE], |
|
61 |
+ COAD.rnaseq[seq_len(100), 1, drop = FALSE] |
|
62 |
+) |
|
63 |
+# patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
64 |
+ID_column_of_Multi_sample <- c(770, 1482, 773, 791) |
|
65 |
+# patient.bcr_patient_barcode |
|
66 |
+ID_column_of_Clinical <- c(20, 20, 12, 14) |
|
58 | 67 |
Z <- PrepareSummarizedExperimentTensor( |
59 |
- feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
|
60 |
- sample=array("",1),value=Z, |
|
61 |
- sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j)) |
|
68 |
+ feature = colnames(ACC.rnaseq)[1 + seq_len(1000)], |
|
69 |
+ sample = array("", 1), value = Z, |
|
70 |
+ sampleData = prepareCondTCGA( |
|
71 |
+ Multi_sample, Clinical, |
|
72 |
+ ID_column_of_Multi_sample, ID_column_of_Clinical |
|
73 |
+ ) |
|
74 |
+) |
|
62 | 75 |
HOSVD <- computeHosvd(Z) |
63 |
-cond<- attr(Z,"sampleData") |
|
64 |
-index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode |
|
65 |
-head(tableFeatures(Z,index)) |
|
66 |
-genes <-unlist(lapply(strsplit(tableFeatures(Z,index)[,1],"|", |
|
67 |
- fixed=TRUE),"[",1)) |
|
68 |
-entrez <- unlist(lapply(strsplit(tableFeatures(Z,index)[,1],"|", |
|
69 |
- fixed=TRUE),"[",2)) |
|
76 |
+cond <- attr(Z, "sampleData") |
|
77 |
+index <- selectFeatureProj(HOSVD, Multi, cond, de = 1e-3, input_all = 3) # Batch mode |
|
78 |
+head(tableFeatures(Z, index)) |
|
79 |
+genes <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|", |
|
80 |
+ fixed = TRUE |
|
81 |
+), "[", 1)) |
|
82 |
+entrez <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|", |
|
83 |
+ fixed = TRUE |
|
84 |
+), "[", 2)) |
|
70 | 85 |
``` |
71 | 86 |
|
72 | 87 |
# Enrichr |
... | ... |
@@ -75,12 +90,19 @@ Enrichr[@Enrichr] is one of tools that often provides us significant results |
75 | 90 |
toward genes selected by TDbasedUFE and TDbasedUFEadv. |
76 | 91 |
|
77 | 92 |
``` {r} |
78 |
-library(enrichR) |
|
79 |
-setEnrichrSite("Enrichr") |
|
93 |
+setEnrichrSite("Enrichr") |
|
80 | 94 |
websiteLive <- TRUE |
81 |
-dbs <- c("GO_Molecular_Function_2015", "GO_Cellular_Component_2015", "GO_Biological_Process_2015") |
|
82 |
-enriched <- enrichr(genes,dbs) |
|
83 |
-if (websiteLive) plotEnrich(enriched[[3]], showTerms = 20, numChar = 40, y = "Count", orderBy = "P.value") |
|
95 |
+dbs <- c( |
|
96 |
+ "GO_Molecular_Function_2015", "GO_Cellular_Component_2015", |
|
97 |
+ "GO_Biological_Process_2015" |
|
98 |
+) |
|
99 |
+enriched <- enrichr(genes, dbs) |
|
100 |
+if (websiteLive) { |
|
101 |
+ plotEnrich(enriched$GO_Biological_Process_2015, |
|
102 |
+ showTerms = 20, numChar = 40, y = "Count", |
|
103 |
+ orderBy = "P.value" |
|
104 |
+ ) |
|
105 |
+} |
|
84 | 106 |
``` |
85 | 107 |
|
86 | 108 |
Enrichr can provide you huge number of enrichment analyses, |
... | ... |
@@ -96,15 +118,18 @@ which is known to provide often significant results toward genes selected by |
96 | 118 |
TDbasedUFE as well as TDbasedUFEadv. |
97 | 119 |
|
98 | 120 |
```{r} |
99 |
-library(STRINGdb) |
|
100 |
-options(timeout=200) |
|
101 |
-string_db <- STRINGdb$new( version="11.5", |
|
102 |
- species=9606,score_threshold=200, |
|
103 |
- network_type="full", input_directory="") |
|
104 |
-example1_mapped <- string_db$map(data.frame(genes=genes), |
|
105 |
- "genes",removeUnmappedRows = TRUE ) |
|
121 |
+options(timeout = 200) |
|
122 |
+string_db <- STRINGdb$new( |
|
123 |
+ version = "11.5", |
|
124 |
+ species = 9606, score_threshold = 200, |
|
125 |
+ network_type = "full", input_directory = "" |
|
126 |
+) |
|
127 |
+example1_mapped <- string_db$map(data.frame(genes = genes), |
|
128 |
+ "genes", |
|
129 |
+ removeUnmappedRows = TRUE |
|
130 |
+) |
|
106 | 131 |
hits <- example1_mapped$STRING_id |
107 |
-string_db$plot_network( hits ) |
|
132 |
+string_db$plot_network(hits) |
|
108 | 133 |
``` |
109 | 134 |
|
110 | 135 |
# enrichplot |
... | ... |
@@ -120,8 +145,6 @@ an alternative. It can list significant ones among multiple categories. |
120 | 145 |
|
121 | 146 |
|
122 | 147 |
```{r} |
123 |
-require(DOSE) |
|
124 |
-require(enrichplot) |
|
125 | 148 |
edo <- enrichDGN(entrez) |
126 | 149 |
dotplot(edo, showCategory=30) + ggtitle("dotplot for ORA") |
127 | 150 |
``` |
128 | 151 |
similarity index 66% |
129 | 152 |
rename from vignettes/TDbasedUFEadv.Rmd |
130 | 153 |
rename to vignettes/Explanation_of_TDbasedUFEadv.Rmd |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
--- |
2 |
-title: "TDbasedUFEadv" |
|
2 |
+title: "Explanation of TDbasedUFEadv" |
|
3 | 3 |
author: |
4 | 4 |
- name: Y-h. Taguchi |
5 | 5 |
affiliation: Department of Physics, Chuo University, Tokyo 112-8551, Japan |
... | ... |
@@ -7,6 +7,7 @@ author: |
7 | 7 |
output: |
8 | 8 |
BiocStyle::html_document: |
9 | 9 |
toc: true |
10 |
+bibliography: references.bib |
|
10 | 11 |
vignette: > |
11 | 12 |
%\VignetteIndexEntry{TDbasedUFEadv} |
12 | 13 |
%\VignetteEngine{knitr::rmarkdown} |
... | ... |
@@ -30,9 +31,36 @@ knitr::opts_chunk$set( |
30 | 31 |
|
31 | 32 |
# Introduction |
32 | 33 |
|
33 |
-Since TDbasedUFEadv is an advanced package from TDbasedUFE, please master the |
|
34 |
+Since TDbasedUFEadv is an advanced package from TDbasedUFE [@TDbasedUFE], please master the |
|
34 | 35 |
contents in TDbasedUFE prior to the trial of this package. |
35 | 36 |
|
37 |
+## Motivations |
|
38 |
+ |
|
39 |
+Since the publication of the book [@Taguchibook] describing the methodology, |
|
40 |
+I have published numerous papers using this method. In spite of that, very |
|
41 |
+limited number of researcher used this method, possibly because of unfamiliarity |
|
42 |
+with the mathematical concepts used in this methodology, tensors. |
|
43 |
+Thus I decided to develop the packages by which users can use the methods |
|
44 |
+without detailed knowledge about the tensor. |
|
45 |
+ |
|
46 |
+## What differs from related packages. |
|
47 |
+ |
|
48 |
+The main purpose of this package is to select features (typically genes) based |
|
49 |
+upon provided omics data sets. In this sense, apparently the functionality of this |
|
50 |
+package is similar to [DESeq2](https://doi.org/doi:10.18129/B9.bioc.DESeq2) or [limma](https://doi.org/doi:10.18129/B9.bioc.limma), which have functionality that can |
|
51 |
+identify differentially expressed genes. In contrast to those supervised methods, |
|
52 |
+the present method is unsupervised one, which provides users what kind of |
|
53 |
+profiles are observed over samples, and users are advised to select one of |
|
54 |
+fivarite features by which features are selected. In addition to this, the |
|
55 |
+present method is suitable to small number of samples associated with large |
|
56 |
+number of features. Since this situation is very common in genomics, the |
|
57 |
+present method is supposed to be suitable to be applied to genomics, although |
|
58 |
+it does not look liked the methods very specific to genomics science. |
|
59 |
+Actually, we have published the number of papers using the metods inplemented |
|
60 |
+in the present package. I hope that one can make uss of this package for his/her |
|
61 |
+own reseraches. |
|
62 |
+ |
|
63 |
+ |
|
36 | 64 |
# Integrated analysis of two omics data sets |
37 | 65 |
|
38 | 66 |
|
... | ... |
@@ -40,14 +68,14 @@ contents in TDbasedUFE prior to the trial of this package. |
40 | 68 |
|
41 | 69 |
### Full tensor |
42 | 70 |
|
43 |
- |
|
71 |
+ |
|
44 | 72 |
|
45 | 73 |
Suppose we have two omics profiles |
46 | 74 |
$$ |
47 | 75 |
x_{ij} \in \mathbb{R}^{N \times M} \\ |
48 | 76 |
x_{ik} \in \mathbb{R}^{N \times K} |
49 | 77 |
$$ |
50 |
-that repesent values of $i$th feature of $j$th and $k$th objects, respectively |
|
78 |
+that represent values of $i$th feature of $j$th and $k$th objects, respectively |
|
51 | 79 |
(i.r., these two profiles share the features). |
52 | 80 |
In this case, we generate a tensor, $x_{ijk}$, by the product of two profiles as |
53 | 81 |
$$ |
... | ... |
@@ -60,17 +88,17 @@ x_{ijk} = \sum_{\ell_1} \sum_{\ell_2} \sum_{ell_3} G(\ell_1 \ell_2 \ell_3) |
60 | 88 |
u_{\ell_1 i} u_{\ell_2 j} u_{\ell_3 k} |
61 | 89 |
$$ |
62 | 90 |
After that we can follow the standard procedure to select features $i$s |
63 |
-associated with the desired properties represented by the selected signular |
|
64 |
-value vectors, $u_{\ell_2 j}$ and $u_{\ell_3 k}$, arrtibuted to objects, |
|
91 |
+associated with the desired properties represented by the selected singular |
|
92 |
+value vectors, $u_{\ell_2 j}$ and $u_{\ell_3 k}$, attributed to objects, |
|
65 | 93 |
$j$s and $k$s. |
66 | 94 |
|
67 |
-### Matrix generated by patial summation |
|
95 |
+### Matrix generated by partial summation |
|
68 | 96 |
|
69 | 97 |
 |
70 | 98 |
|
71 | 99 |
In the above, we dealt with full tensor. It is often difficult to treat full |
72 | 100 |
tensor, since it is as large as $N \times M times K$. In this case, we can take |
73 |
-the alternative approach. In order that we define reduced matix with taking |
|
101 |
+the alternative approach. In order that we define reduced matrix with taking |
|
74 | 102 |
partial summation |
75 | 103 |
$$ |
76 | 104 |
x_{jk} = \sum_i x_{ijk} |
... | ... |
@@ -79,22 +107,22 @@ and apply SVD to $x_{jk}$ as |
79 | 107 |
$$ |
80 | 108 |
x_{jk} = \sum_\ell \lambda_\ell u_{\ell j} v_{\ell k} |
81 | 109 |
$$ |
82 |
-and sigular value vectors attributed to samples as |
|
110 |
+and singular value vectors attributed to samples as |
|
83 | 111 |
$$ |
84 | 112 |
u^{(j)}_{\ell i} = \sum_j u_{\ell j} x_{ij} \\ |
85 | 113 |
u^{(k)}_{\ell i} = \sum_k v_{\ell k} x_{ik} |
86 | 114 |
$$ |
87 |
-In this case, signular value vectors are attributed separately to features |
|
115 |
+In this case, singular value vectors are attributed separately to features |
|
88 | 116 |
associated with objects $j$ and $k$, respectively. |
89 | 117 |
|
90 | 118 |
The feature selection can be done using these singular value vectors associated |
91 |
-with selected sigular value vectors attributed to samples, $j$ and $k$. |
|
119 |
+with selected singular value vectors attributed to samples, $j$ and $k$. |
|
92 | 120 |
|
93 | 121 |
## When samples are shared. |
94 | 122 |
|
95 | 123 |
### Full tensor |
96 | 124 |
|
97 |
- |
|
125 |
+ |
|
98 | 126 |
|
99 | 127 |
In the case where not features but samples are shared between two omics data, |
100 | 128 |
we can do something similar. |
... | ... |
@@ -116,7 +144,7 @@ u_{\ell_1 i} u_{\ell_2 j} u_{\ell_3 k} |
116 | 144 |
$$ |
117 | 145 |
After that we can follow the standard procedure to select features $i$s and $k$s |
118 | 146 |
associated with the desired properties represented by the selected singular |
119 |
-value vectors, $u_{\ell_2 j}$, arrtibuted to objects, $j$s. |
|
147 |
+value vectors, $u_{\ell_2 j}$, attributed to objects, $j$s. |
|
120 | 148 |
|
121 | 149 |
### Matrix generated from partial summation |
122 | 150 |
|
... | ... |
@@ -124,7 +152,7 @@ value vectors, $u_{\ell_2 j}$, arrtibuted to objects, $j$s. |
124 | 152 |
|
125 | 153 |
In the above, we dealt with full tensor. It is often difficult to treat full |
126 | 154 |
tensor, since it is as large as $N \times M times K$. In this case, we can take |
127 |
-the alternative approach. In order that we define reduced matix with taking |
|
155 |
+the alternative approach. In order that we define reduced matrix with taking |
|
128 | 156 |
partial summation |
129 | 157 |
$$ |
130 | 158 |
x_{ik} = \sum_j x_{ijk} |
... | ... |
@@ -173,7 +201,7 @@ Then $i_k$ can be selected as usual. |
173 | 201 |
|
174 | 202 |
## When features are shared. |
175 | 203 |
|
176 |
- |
|
204 |
+ |
|
177 | 205 |
|
178 | 206 |
Suppose we have multiple sets of samples as |
179 | 207 |
$$ |
180 | 208 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,510 @@ |
1 |
+--- |
|
2 |
+title: "QuickStart" |
|
3 |
+author: |
|
4 |
+- name: Y-h. Taguchi |
|
5 |
+ affiliation: Department of Physics, Chuo University, Tokyo 112-8551, Japan |
|
6 |
+ email: [email protected] |
|
7 |
+output: |
|
8 |
+ BiocStyle::html_document: |
|
9 |
+ toc: true |
|
10 |
+bibliography: references.bib |
|
11 |
+vignette: > |
|
12 |
+ %\VignetteIndexEntry{QuickStart} |
|
13 |
+ %\VignetteEngine{knitr::rmarkdown} |
|
14 |
+ %\VignetteEncoding{UTF-8} |
|
15 |
+--- |
|
16 |
+ |
|
17 |
+```{r style, echo = FALSE, results = 'asis'} |
|
18 |
+BiocStyle::markdown() |
|
19 |
+``` |
|
20 |
+ |
|
21 |
+```{r, include = FALSE} |
|
22 |
+knitr::opts_chunk$set( |
|
23 |
+ collapse = TRUE, |
|
24 |
+ crop = NULL, |
|
25 |
+ comment = "#>" |
|
26 |
+) |
|
27 |
+``` |
|
28 |
+ |
|
29 |
+```{r setup} |
|
30 |
+library(TDbasedUFEadv) |
|
31 |
+library(Biobase) |
|
32 |
+library(RTCGA.rnaseq) |
|
33 |
+library(TDbasedUFE) |
|
34 |
+library(MOFAdata) |
|
35 |
+library(TDbasedUFE) |
|
36 |
+library(RTCGA.clinical) |
|
37 |
+``` |
|
38 |
+# Installation |
|
39 |
+ |
|
40 |
+``` {r, eval = FALSE} |
|
41 |
+if (!require("BiocManager", quietly = TRUE)) |
|
42 |
+ install.packages("BiocManager") |
|
43 |
+BiocManager::install("TDbasedUFEadv") |
|
44 |
+``` |
|
45 |
+ |
|
46 |
+ |
|
47 |
+ |
|
48 |
+# Integrated analyses of two omics profiles |
|
49 |
+ |
|
50 |
+ |
|
51 |
+Here is a flowchart how we can make use of individual functions in |
|
52 |
+TDbasedUFE and TDbasedUFEadv. |
|
53 |
+ |
|
54 |
+ |
|
55 |
+ |
|
56 |
+ |
|
57 |
+ |
|
58 |
+## When features are shared. |
|
59 |
+ |
|
60 |
+In order to make use of TDbasedUFE for the drug repositioning, we previously |
|
61 |
+proposed[@Taguchi2017] the integrated analysis of two gene expression profiles, |
|
62 |
+each of which is composed of gene expression of drug treated one and disease |
|
63 |
+one. At first, we try to prepare two omics profiles, expDrug and expDisease, |
|
64 |
+that represent gene expression profiles of cell lines treated by various drugs |
|
65 |
+and a cell line of diseases by |
|
66 |
+``` {r} |
|
67 |
+Cancer_cell_lines <- list(ACC.rnaseq, BLCA.rnaseq, BRCA.rnaseq, CESC.rnaseq) |
|
68 |
+Drug_and_Disease <- prepareexpDrugandDisease(Cancer_cell_lines) |
|
69 |
+expDrug <- Drug_and_Disease$expDrug |
|
70 |
+expDisease <- Drug_and_Disease$expDisease |
|
71 |
+rm(Cancer_cell_lines) |
|
72 |
+``` |
|
73 |
+expDrug is taken from RTCGA package and those associated with Drugs based upon |
|
74 |
+[@Ding2016]. Those files are listed in drug_response.txt included in Clinical |
|
75 |
+drug responses at http://lifeome.net/supp/drug_response/. |
|
76 |
+expDisease is composed of files in BRCA.rnaseq, but not included in expDrug |
|
77 |
+(For more details, see source code of prepareexpDrugandDisease). |
|
78 |
+Then prepare a tensor as |
|
79 |
+```{r} |
|
80 |
+Z <- prepareTensorfromMatrix( |
|
81 |
+ exprs(expDrug[seq_len(200), seq_len(100)]), |
|
82 |
+ exprs(expDisease[seq_len(200), seq_len(100)]) |
|
83 |
+) |
|
84 |
+sample <- outer( |
|
85 |
+ colnames(expDrug)[seq_len(100)], |
|
86 |
+ colnames(expDisease)[seq_len(100)], function(x, y) { |
|
87 |
+ paste(x, y) |
|
88 |
+ } |
|
89 |
+) |
|
90 |
+Z <- PrepareSummarizedExperimentTensor( |
|
91 |
+ sample = sample, feature = rownames(expDrug)[seq_len(200)], value = Z |
|
92 |
+) |
|
93 |
+``` |
|
94 |
+In the above, sample are pairs of file IDs taken from expDrug and expDisease. |
|
95 |
+Since full data cannot be treated because of memory restriction, we restricted |
|
96 |
+the first two hundred features and the first one hundred samples, respectively |
|
97 |
+(In the below, we will introduce how to deal with the full data sets). |
|
98 |
+ |
|
99 |
+Then HOSVD is applied to a tensor as |
|
100 |
+``` {r} |
|
101 |
+HOSVD <- computeHosvd(Z) |
|
102 |
+``` |
|
103 |
+Here we tries to find if Cisplatin causes distinct expression (0: cell lines |
|
104 |
+treated with drugs other than Cisplatin, 1: cell lines treated with Cisplatin) |
|
105 |
+and those between two classes (1 vs 2) of BRCA (in this case, there are no |
|
106 |
+meaning of two classes) within top one hundred samples. |
|
107 |
+``` {r} |
|
108 |
+Cond <- prepareCondDrugandDisease(expDrug) |
|
109 |
+cond <- list(NULL, Cond[, colnames = "Cisplatin"][seq_len(100)], rep(1:2, each = 50)) |
|
110 |
+``` |
|
111 |
+Then try to select singular value vectors attributed to objects. |
|
112 |
+When you try this vignettes, although you can do it in the interactive |
|
113 |
+mode (see below), here we assume that you have already finished the selection. |
|
114 |
+ |
|
115 |
+```{r} |
|
116 |
+input_all <- selectSingularValueVectorLarge(HOSVD,cond,input_all=c(2,9)) #Batch mode |
|
117 |
+``` |
|
118 |
+ |
|
119 |
+In the case you prefer to select by yourself you can execute intearctive mode. |
|
120 |
+``` |
|
121 |
+input_all <- selectSingularValueVectorLarge(HOSVD,cond) |
|
122 |
+``` |
|
123 |
+When you can see ``Next'', ``Prev'', and ``Select'' radio buttons by which you |
|
124 |
+can performs selection as well as histogram and standard deviation optimiztion |
|
125 |
+by which you can verify the success of selection interactively. |
|
126 |
+ |
|
127 |
+ |
|
128 |
+Next we select which genes' expression is altered by Cisplatin. |
|
129 |
+```{r } |
|
130 |
+index <- selectFeature(HOSVD,input_all,de=0.05) |
|
131 |
+``` |
|
132 |
+ |
|
133 |
+You might need to specify suitable value for de which is initial value of |
|
134 |
+standard deviation. |
|
135 |
+ |
|
136 |
+Then we get the following plot. |
|
137 |
+ |
|
138 |
+ |
|
139 |
+ |
|
140 |
+Finally, list the genes selected as those associated with distinct expression. |
|
141 |
+```{r} |
|
142 |
+head(tableFeatures(Z,index)) |
|
143 |
+``` |
|
144 |
+```{r} |
|
145 |
+rm(Z) |
|
146 |
+rm(HOSVD) |
|
147 |
+detach("package:RTCGA.rnaseq") |
|
148 |
+rm(SVD) |
|
149 |
+``` |
|
150 |
+The described methods were frequently used |
|
151 |
+in the studies[@Taguchi2017a] [@Taguchi2018] [@Taguchi2020] by maintainers. |
|
152 |
+ |
|
153 |
+### Reduction of required memory using partial summation. |
|
154 |
+ |
|
155 |
+In the case that there are large number of features, it is impossible to apply |
|
156 |
+HOSVD to a full tensor (Then we have reduced the size of tensor). |
|
157 |
+In this case, we apply SVD instead of HOSVD to matrix |
|
158 |
+generated from a tensor as follows. |
|
159 |
+In contrast to the above where only top two hundred features and top one hundred |
|
160 |
+samples are included, the following one includes all features and all samples since |
|
161 |
+it can save required memory because partial summation of features. |
|
162 |
+``` {r} |
|
163 |
+SVD <- computeSVD(exprs(expDrug), exprs(expDisease)) |
|
164 |
+Z <- t(exprs(expDrug)) %*% exprs(expDisease) |
|
165 |
+sample <- outer( |
|
166 |
+ colnames(expDrug), colnames(expDisease), |
|
167 |
+ function(x, y) { |
|
168 |
+ paste(x, y) |
|
169 |
+ } |
|
170 |
+) |
|
171 |
+Z <- PrepareSummarizedExperimentTensor( |
|
172 |
+ sample = sample, |
|
173 |
+ feature = rownames(expDrug), value = Z |
|
174 |
+) |
|
175 |
+``` |
|
176 |
+ |
|
177 |
+Nest select singular value vectors attributed to drugs and cell lines then |
|
178 |
+identify features associated with altered expression by treatment of |
|
179 |
+Cisplatin as well as distinction between two classes. Again, it included |
|
180 |
+all samples for expDrug and expDisease. |
|
181 |
+``` {r} |
|
182 |
+cond <- list(NULL,Cond[,colnames="Cisplatin"],rep(1:2,each=dim(SVD$SVD$v)[1]/2)) |
|
183 |
+``` |
|
184 |
+ |
|
185 |
+Next we select singular value vectors and optimize standard deviation |
|
186 |
+as batch mode |
|
187 |
+```{r} |
|
188 |
+index_all <- selectFeatureRect(SVD,cond,de=c(0.01,0.01), |
|
189 |
+ input_all=3) #batch mode |
|
190 |
+``` |
|
191 |
+Again you need to select suitable de by trials and errors. |
|
192 |
+ |
|
193 |
+For interactive mode, one should do |
|
194 |
+``` |
|
195 |
+index_all <- selectFeatureRect(SVD,cond,de=c(0.01,0.01)) |
|
196 |
+``` |
|
197 |
+but it is not possible in vignettes that does not allow interactive mode. |
|
198 |
+ |
|
199 |
+ |
|
200 |
+Then you can see selected features as |
|
201 |
+```{r} |
|
202 |
+head(tableFeatures(Z,index_all[[1]])) |
|
203 |
+head(tableFeatures(Z,index_all[[2]])) |
|
204 |
+``` |
|
205 |
+The upper one is for distinct expression between cell lines treated with |
|
206 |
+Cisplatin and other cell lines and the lower one is for distinct expression |
|
207 |
+between two classes of BRCA cell lines. |
|
208 |
+ |
|
209 |
+Although they are highly coincident, not fully same ones (Row: expDrug, |
|
210 |
+column:expDisease). |
|
211 |
+```{r} |
|
212 |
+table(index_all[[1]]$index,index_all[[2]]$index) |
|
213 |
+``` |
|
214 |
+ |
|
215 |
+Confusion matrix of features selected between expDrug and expDisease. |
|
216 |
+ |
|
217 |
+The described methods were frequently used in the studies[@Taguchi2019a] by maintainers. |
|
218 |
+ |
|
219 |
+```{r} |
|
220 |
+rm(Z) |
|
221 |
+rm(SVD) |
|
222 |
+``` |
|
223 |
+ |
|
224 |
+## When samples are shared |
|
225 |
+ |
|
226 |
+The above procedure can be used when two omics data that shares samples must be integrated. |
|
227 |
+Prepare data set as |
|
228 |
+```{r} |
|
229 |
+data("CLL_data") |
|
230 |
+data("CLL_covariates") |
|
231 |
+``` |
|
232 |
+ |
|
233 |
+(see vignettes QuickStart in TDbasedUFE for more details about this |
|
234 |
+data set). |
|
235 |
+Generate tensor from matrix as in the above, but since not features but |
|
236 |
+samples are shared between two matrices, |
|
237 |
+the resulting Z has samples as features and features as samples, respectively. |
|
238 |
+```{r} |
|
239 |
+Z <- prepareTensorfromMatrix( |
|
240 |
+ t(CLL_data$Drugs[seq_len(200), seq_len(50)]), |
|
241 |
+ t(CLL_data$Methylation[seq_len(200), seq_len(50)]) |
|
242 |
+) |
|
243 |
+Z <- prepareSummarizedExperimentTensorRect( |
|
244 |
+ sample = colnames(CLL_data$Drugs)[seq_len(50)], |
|
245 |
+ feature = list( |
|
246 |
+ Drugs = rownames(CLL_data$Drugs)[seq_len(200)], |
|
247 |
+ Methylatiion = rownames(CLL_data$Methylation)[seq_len(200)] |
|
248 |
+ ), |
|
249 |
+ sampleData = list(CLL_covariates$Gender[seq_len(50)]), |
|
250 |
+ value = Z |
|
251 |
+) |
|
252 |
+``` |
|
253 |
+ |
|
254 |
+HOSVD was applied to Z as |
|
255 |
+```{r} |
|
256 |
+HOSVD <- computeHosvd(Z) |
|
257 |
+``` |
|
258 |
+ |
|
259 |
+```{r} |
|
260 |
+cond <- list(attr(Z,"sampleData")[[1]],NULL,NULL) |
|
261 |
+``` |
|
262 |
+Condition is distinction between male and female |
|
263 |
+(see QucikStart in TDbasedUFE package). |
|
264 |
+Then try to find singular value vectors distinct between male and female |
|
265 |
+in interactive mode. |
|
266 |
+```{r} |
|
267 |
+index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01), |
|
268 |
+ input_all=8) #batch mode |
|
269 |
+``` |
|
270 |
+In the above, selection was supposed to be performed before executing the |
|
271 |
+above, since vignettes does not allow interactive mode. |
|
272 |
+ In actual, you need to execute it in interactive mode |
|
273 |
+``` |
|
274 |
+index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01)) |
|
275 |
+``` |
|
276 |
+and try to select ireratively. Selected features can be shown in the below. |
|
277 |
+ |
|
278 |
+```{r} |
|
279 |
+head(tableFeaturesSquare(Z,index_all,1)) |
|
280 |
+head(tableFeaturesSquare(Z,index_all,2)) |
|
281 |
+``` |
|
282 |
+ |
|
283 |
+This method was used in the studies[@Taguchi2019] by the maintainer. |
|
284 |
+ |
|
285 |
+### Reduction of required memory using partial summation. |
|
286 |
+ |
|
287 |
+As in the case where two omics profiles share features, in the case where two |
|
288 |
+omics data share the samples, we can also take an alternative approach where |
|
289 |
+SVD is applied to an matrix generated from a tensor by taking partial summation. |
|
290 |
+```{r} |
|
291 |
+SVD <- computeSVD(t(CLL_data$Drugs), t(CLL_data$Methylation)) |
|
292 |
+Z <- CLL_data$Drugs %*% t(CLL_data$Methylation) |
|
293 |
+sample <- colnames(CLL_data$Methylation) |
|
294 |
+Z <- prepareSummarizedExperimentTensorRect( |
|
295 |
+ sample = sample, |
|
296 |
+ feature = list(rownames(CLL_data$Drugs), rownames(CLL_data$Methylation)), |
|
297 |
+ value = array(NA, dim(Z)), sampleData = list(CLL_covariates[, 1]) |
|
298 |
+) |
|
299 |
+``` |
|
300 |
+Condition is also distinction between male (m) and female (f). |
|
301 |
+```{r} |
|
302 |
+cond <- list(NULL,attr(Z,"sampleData")[[1]],attr(Z,"sampleData")[[1]]) |
|
303 |
+``` |
|
304 |
+In order to apply the previous function to SVD, we exchange feature singular |
|
305 |
+value vectors with sample singular value vectors. |
|
306 |
+```{r} |
|
307 |
+SVD <- transSVD(SVD) |
|
308 |
+``` |
|
309 |
+Then try to find which sample singular value vectors should be selected and |
|
310 |
+which features are selected based upon feature singular value vectors |
|
311 |
+corresponding to selected sample feature vectors. |
|
312 |
+Although I do not intend to repeat whole process, we decided to select the |
|
313 |
+sixth singular value vectors which are some what distinct between male |
|
314 |
+and female. Since package does not allow us interactive mode, we place here batch mode. |
|
315 |
+ |
|
316 |
+```{r} |
|
317 |
+index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) #batch mode |
|
318 |
+``` |
|
319 |
+ |
|
320 |
+In the real usage, we can activate |
|
321 |
+selectFeatureRect in interactive mode as well. |
|
322 |
+``` |
|
323 |
+index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5)) |
|
324 |
+``` |
|
325 |
+ |
|
326 |
+ |
|
327 |
+ |
|
328 |
+ |
|
329 |
+ |
|
330 |
+ |
|
331 |
+ |
|
332 |
+ |
|
333 |
+ |
|
334 |
+ |
|
335 |
+ |
|
336 |
+ |
|
337 |
+ |
|
338 |
+ |
|
339 |
+Then we can list the Drugs and Methylation sites selected as being distinct |
|
340 |
+between male and female. |
|
341 |
+ |
|
342 |
+```{r} |
|
343 |
+head(tableFeaturesSquare(Z,index_all,1)) |
|
344 |
+head(tableFeaturesSquare(Z,index_all,2)) |
|
345 |
+``` |
|
346 |
+ |
|
347 |
+ |
|
348 |
+ |
|
349 |
+This method was used in many studies[@Taguchi2018a] [@Taguchi2020a] by maintainer. |
|
350 |
+ |
|
351 |
+ |
|
352 |
+# Integrated analysis of multiple omics data |
|
353 |
+ |
|
354 |
+Here is a flowchart how we can make use of individual functions in TDbasedUFE and TDbasedUFEadv. |
|
355 |
+ |
|
356 |
+ |
|
357 |
+ |
|
358 |
+ |
|
359 |
+ |
|
360 |
+## When samples are shared |
|
361 |
+ |
|
362 |
+As an alternative approach that can integrate multiple omics that share sample, |
|
363 |
+we propose the method that makes use of projection provided by SVD. |
|
364 |
+ |
|
365 |
+We prepare a tensor that is a bundle of the first ten singular value vectors |
|
366 |
+generated by applying SVD to individual omics profiles. |
|
367 |
+ |
|
368 |
+```{r} |
|
369 |
+data("CLL_data") |
|
370 |
+data("CLL_covariates") |
|
371 |
+Z <- prepareTensorfromList(CLL_data, as.integer(10)) |
|
372 |
+Z <- PrepareSummarizedExperimentTensor( |
|
373 |
+ feature = character("1"), |
|
374 |
+ sample = array(colnames(CLL_data$Drugs), 1), value = Z, |
|
375 |
+ sampleData = list(CLL_covariates[, 1]) |
|
376 |
+) |
|
377 |
+``` |
|
378 |
+Then HOSVD was applied to a tensor |
|
379 |
+```{r} |
|
380 |
+HOSVD <- computeHosvd(Z,scale=FALSE) |
|
381 |
+``` |
|
382 |
+Next we select singular value vectors attributed to samples. |
|
383 |
+In order to select those distinct between male (m) and female (f), |
|
384 |
+we set conditions as |
|
385 |
+```{r} |
|
386 |
+cond <- list(NULL,attr(Z,"sampleData")[[1]],seq_len(4)) |
|
387 |
+``` |
|
388 |
+But here in order to include TDbasedUFEadv into package, we are forced to |
|
389 |
+execute function as batch mode as |
|
390 |
+```{r} |
|
391 |
+input_all <- selectSingularValueVectorLarge(HOSVD, |
|
392 |
+ cond, |
|
393 |
+ input_all = c(12, 1) |
|
394 |
+) # batch mode |
|
395 |
+``` |
|
396 |
+Interactive more can be activated as |
|
397 |
+``` |
|
398 |
+input_all <- selectSingularValueVectorLarge(HOSVD,cond) |
|
399 |
+``` |
|
400 |
+Although we do not intend to repeat how to use menu in interactive mode, please |
|
401 |
+select the 12th one and the third one. |
|
402 |
+ |
|
403 |
+Finally, we perform the following function to select features in individual |
|
404 |
+omics profiles in an batch mode, |
|
405 |
+since packaging does not allow interactive mode. |
|
406 |
+ |
|
407 |
+``` {r} |
|
408 |
+HOSVD$U[[1]] <- HOSVD$U[[2]] |
|
409 |
+index_all <- selectFeatureSquare(HOSVD, input_all, CLL_data, |
|
410 |
+ de = c(0.5, 0.1, 0.1, 1), interact = FALSE |
|
411 |
+) # Batch mode |
|
412 |
+``` |
|
413 |
+ |
|
414 |
+In actual usage, you can activate interactive mode as |
|
415 |
+``` |
|
416 |
+HOSVD$U[[1]] <- HOSVD$U[[2]] |
|
417 |
+index_all <- selectFeatureSquare(HOSVD, input_all, CLL_data, |
|
418 |
+ de = c(0.5, 0.1, 0.1, 1) |
|
419 |
+) |
|
420 |
+``` |
|
421 |
+ |
|
422 |
+Finally, we list the selected features for four omics profiles that share samples. |
|
423 |
+ |
|
424 |
+```{r} |
|
425 |
+for (id in c(1:4)) |
|
426 |
+{ |
|
427 |
+ attr(Z, "feature") <- rownames(CLL_data[[id]]) |
|
428 |
+ print(tableFeatures(Z, index_all[[id]])) |
|
429 |
+} |
|
430 |
+``` |
|
431 |
+ |
|
432 |
+This method was used in many studies[@Taguchi2022] by maintainer. |
|
433 |
+ |
|
434 |
+ |
|
435 |
+## When features are shared |
|
436 |
+ |
|
437 |
+Now we discuss what to do when multiple omics data share not samples but |
|
438 |
+features. We prepare data set from RTCGA.rnaseq as follows, with retrieving |
|
439 |
+reduced partial sets from four ones. One should notice that RTCGA is an old |
|
440 |
+package from TCGA (as for 2015). I used it only for demonstration purpose. |
|
441 |
+If you would like to use TCGA for your research, I recommend you to use |
|
442 |
+more modern packages, e.g., curatedTCGAData in Bioconductor. |
|
443 |
+```{r} |
|
444 |
+library(RTCGA.rnaseq) #it must be here, not in the first chunk |
|
445 |
+Multi <- list( |
|
446 |
+ BLCA.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
447 |
+ BRCA.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
448 |
+ CESC.rnaseq[seq_len(100), 1 + seq_len(1000)], |
|
449 |
+ COAD.rnaseq[seq_len(100), 1 + seq_len(1000)] |
|
450 |
+) |
|
451 |
+``` |
|
452 |
+Multi includes four objects, each of which is matrix that represent 100 samples (rows) and 1000 (features). Please note it is different from usual cases where columns and rows are features and samples, respectively. They are marge into tensor as follows |
|
453 |
+```{r} |
|
454 |
+Z <- prepareTensorfromList(Multi,as.integer(10)) |
|
455 |
+Z <- aperm(Z,c(2,1,3)) |
|
456 |
+``` |
|
457 |
+The function, prepareTeansorfromList which was used in the previous subsection |
|
458 |
+where samples are shared, can be used as it is. However, the first and second |
|
459 |
+modes of a tensor must be exchanged by aperm function for the latter analyses, |
|
460 |
+because of the difference as mentioned in the above. Then tensor object |
|
461 |
+associated with various information is generated as usual as follows and |
|
462 |
+HOSVD was applied to it. |
|
463 |
+``` {r} |
|
464 |
+Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical) |
|
465 |
+Multi_sample <- list( |
|
466 |
+ BLCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
467 |
+ BRCA.rnaseq[seq_len(100), 1, drop = FALSE], |
|
468 |
+ CESC.rnaseq[seq_len(100), 1, drop = FALSE], |
|
469 |
+ COAD.rnaseq[seq_len(100), 1, drop = FALSE] |
|
470 |
+) |
|
471 |
+# patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
472 |
+ID_column_of_Multi_sample <- c(770, 1482, 773, 791) |
|
473 |
+# patient.bcr_patient_barcode |
|
474 |
+ID_column_of_Clinical <- c(20, 20, 12, 14) |
|
475 |
+Z <- PrepareSummarizedExperimentTensor( |
|
476 |
+ feature = colnames(ACC.rnaseq)[1 + seq_len(1000)], |
|
477 |
+ sample = array("", 1), value = Z, |
|
478 |
+ sampleData = prepareCondTCGA( |
|
479 |
+ Multi_sample, Clinical, |
|
480 |
+ ID_column_of_Multi_sample, ID_column_of_Clinical |
|
481 |
+ ) |
|
482 |
+) |
|
483 |
+HOSVD <- computeHosvd(Z) |
|
484 |
+``` |
|
485 |
+In order to see which singular value vectors attributed to samples are used for the selection of singular value vectors attributed to features, we need to assign sample conditions. |
|
486 |
+```{r} |
|
487 |
+cond<- attr(Z,"sampleData") |
|
488 |
+``` |
|
489 |
+ |
|
490 |
+Since package does not allow us to include interactive mode, we place here batch mode as follows. |
|
491 |
+Finally, selected feature are listed as follows. |
|
492 |
+``` {r} |
|
493 |
+index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode |
|
494 |
+head(tableFeatures(Z,index)) |
|
495 |
+``` |
|
496 |
+In actual, you can activate interactive mode as |
|
497 |
+ |
|
498 |
+``` |
|
499 |
+par(mai=c(0.3,0.2,0.2,0.2)) |
|
500 |
+index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3) |
|
501 |
+``` |
|
502 |
+Although we do not intend to explain how to use menu interactively, |
|
503 |
+we select the third singular value vectors as shown in above. |
|
504 |
+ |
|
505 |
+This method was used in many studies[@Taguchi2021] by maintainer. |
|
506 |
+ |
|
507 |
+ |
|
508 |
+```{r} |
|
509 |
+sessionInfo() |
|
510 |
+``` |
|
0 | 511 |
\ No newline at end of file |
1 | 512 |
deleted file mode 100644 |
... | ... |
@@ -1,435 +0,0 @@ |
1 |
-title: "QuickStart" |
|
2 |
-author: |
|
3 |
-- name: Y-h. Taguchi |
|
4 |
- affiliation: Department of Physics, Chuo University, Tokyo 112-8551, Japan |
|
5 |
- email: [email protected] |
|
6 |
-output: |
|
7 |
- BiocStyle::html_document: |
|
8 |
- toc: true |
|
9 |
-bibliography: references.bib |
|
10 |
-vignette: > |
|
11 |
- %\VignetteIndexEntry{QuickStart} |
|
12 |
- %\VignetteEngine{knitr::rmarkdown} |
|
13 |
- %\VignetteEncoding{UTF-8} |
|
14 |
- |
|
15 |
-```{r style, echo = FALSE, results = 'asis'} |
|
16 |
-BiocStyle::markdown() |
|
17 |
-``` |
|
18 |
- |
|
19 |
-```{r, include = FALSE} |
|
20 |
-knitr::opts_chunk$set( |
|
21 |
- collapse = TRUE, |
|
22 |
- comment = "#>" |
|
23 |
-) |
|
24 |
-``` |
|
25 |
- |
|
26 |
-```{r setup} |
|
27 |
-library(TDbasedUFEadv) |
|
28 |
-``` |
|
29 |
-# Introduction |
|
30 |
-Since TDbasedUFEadv is an advanced package from TDbasedUFE, please master the |
|
31 |
-contents in TDbasedUFE prior to the trial of this package. Here is a flowchart |
|
32 |
-how we can make use of individual functions in TDbasedUFE and TDbasedUFEadv. |
|
33 |
- |
|
34 |
- |
|
35 |
- |
|
36 |
-# Integrated anayses of two omics profiles |
|
37 |
- |
|
38 |
-## When features are shared. |
|
39 |
- |
|
40 |
-In order to make use of TDbasedUFE for the drug repositioning, we previously |
|
41 |
-proposed[@Taguchi2017] the integrated analysis of two gene expression profiles, |
|
42 |
-each of which is composed of gene expression of drug treated one and disease |
|
43 |
-one. At first, we try to prepare two omics profiles, expDrug and expDisease, |
|
44 |
-that represent gene expression profiles of cell lines treated by various drugs |
|
45 |
-and a cell line of diseases by |
|
46 |
-```{r, eval=FALSE} |
|
47 |
-require(RTCGA.rnaseq) |
|
48 |
- LIST <- list(ACC.rnaseq, |
|
49 |
- BLCA.rnaseq, |
|
50 |
- BRCA.rnaseq, |
|
51 |
- CESC.rnaseq, |
|
52 |
- COAD.rnaseq, |
|
53 |
- ESCA.rnaseq, |
|
54 |
- GBM.rnaseq, |
|
55 |
- HNSC.rnaseq, |
|
56 |
- KICH.rnaseq, |
|
57 |
- KIRC.rnaseq, |
|
58 |
- KIRP.rnaseq, |
|
59 |
- LGG.rnaseq, |
|
60 |
- LIHC.rnaseq, |
|
61 |
- LUAD.rnaseq, |
|
62 |
- LUSC.rnaseq, |
|
63 |
- OV.rnaseq, |
|
64 |
- PAAD.rnaseq, |
|
65 |
- PCPG.rnaseq, |
|
66 |
- PRAD.rnaseq, |
|
67 |
- READ.rnaseq, |
|
68 |
- SARC.rnaseq, |
|
69 |
- SKCM.rnaseq, |
|
70 |
- STAD.rnaseq, |
|
71 |
- TGCT.rnaseq, |
|
72 |
- THCA.rnaseq, |
|
73 |
- UCEC.rnaseq, |
|
74 |
- UCS.rnaseq) |
|
75 |
-dummy <- prepareexpDrugandDisease(LIST) |
|
76 |
-expDrug <- dummy[[1]] |
|
77 |
-expDisease <- dummy[[2]] |
|
78 |
-rm(LIST) |
|
79 |
-``` |
|
80 |
-Although you should execute above, |
|
81 |
-in order to suppress execute time we instead perform here |
|
82 |
-during installation. |
|
83 |
-``` {r} |
|
84 |
-require(RTCGA.rnaseq) |
|
85 |
-LIST <- list(ACC.rnaseq, |
|
86 |
- BLCA.rnaseq, |
|
87 |
- BRCA.rnaseq, |
|
88 |
- CESC.rnaseq) |
|
89 |
-dummy <- prepareexpDrugandDisease(LIST) |
|
90 |
-expDrug <- dummy[[1]] |
|
91 |
-expDisease <- dummy[[2]] |
|
92 |
-rm(LIST) |
|
93 |
-``` |
|
94 |
-expDrug is taken from RTCGA package and those associated with Drugs based upon |
|
95 |
-[@Ding2016]. Those files are listed in drug_response.txt included in Clinical |
|
96 |
-drug responses at http://lifeome.net/supp/drug_response/. |
|
97 |
-expDisease is composed of files in BRCA.rnaseq, but not included in expDrug |
|
98 |
-(For more details, see source code of prepareexpDrugandDisease). |
|
99 |
-Then prepare a tensor as |
|
100 |
-```{r} |
|
101 |
-require(Biobase) |
|
102 |
-Z <- prepareTensorfromMatrix(exprs(expDrug[seq_len(200),seq_len(100)]), |
|
103 |
- exprs(expDisease[seq_len(200),seq_len(100)])) |
|
104 |
-sample<- outer(colnames(expDrug)[seq_len(100)], |
|
105 |
- colnames(expDisease)[seq_len(100)],function(x,y){paste(x,y)}) |
|
106 |
-require(TDbasedUFE) |
|
107 |
-Z <- PrepareSummarizedExperimentTensor( |
|
108 |
- sample=sample,feature=rownames(expDrug)[seq_len(200)],value=Z) |
|
109 |
-``` |
|
110 |
-In the above, sample are pairs of file IDs taken from expDrug and expDisease. |
|
111 |
-Since full data cannot be treated because of memory restriction, we restricted |
|
112 |
-the first two hundred features and the first one hundred samples, respectively |
|
113 |
-(In the below, we will introduce how to deal with the full data sets). |
|
114 |
- |
|
115 |
-Then HOSVD is applied to a tensor as |
|
116 |
-``` {r} |
|
117 |
-HOSVD <- computeHosvd(Z) |
|
118 |
-``` |
|
119 |
-Here we tries to find if Cisplatin causes distinct expression (0: cell lines |
|
120 |
-treated with drugs other than Cisplatin, 1: cell lines treated with Cisplatin) |
|
121 |
-and those between two classes (1 vs 2) of BRCA (in this case, there are no |
|
122 |
-meaning of two classes) within top one hundred samples. |
|
123 |
-``` {r} |
|
124 |
-Cond <- prepareCondDrugandDisease(expDrug) |
|
125 |
-cond <- list(NULL,Cond[,colnames="Cisplatin"][seq_len(100)],rep(1:2,each=50)) |
|
126 |
-``` |
|
127 |
-Then try to select singular value vectors attributed to objects. |
|
128 |
-When you try this vignettes, you can do it interactive mode as |
|
129 |
-``` |
|
130 |
-input_all <- selectSingularValueVectorLarge(HOSVD,cond) |
|
131 |
-``` |
|
132 |
-Then we can see the following plot |
|
133 |
- |
|
134 |
- |
|
135 |
- |
|
136 |
- |
|
137 |
-and a menu |
|
138 |
-``` |
|
139 |
-1: NEXT |
|
140 |
-2: PREV |
|
141 |
-3: SELCT |
|
142 |
-``` |
|
143 |
-It represents a distinction between cell lines treated with Cisplatin (1) and |
|
144 |
-others (0). Since these are not associated with clear distinction, we need to |
|
145 |
-see next singular value vector. Then type 1 and press enter. Then we get |
|
146 |
-this plot |
|
147 |
- |
|
148 |
- |
|
149 |
- |
|
150 |
-and menu |
|
151 |
-``` |
|
152 |
-1: NEXT |
|
153 |
-2: PREV |
|
154 |
-3: SELCT |
|
155 |
-``` |
|
156 |
-Since this one is somewhat distinct between 0 (cell lines treated with drugs |
|
157 |
-other than Cisplatin) and 1 (those treated with Cisplatin), we decide to |
|
158 |
-select this one. Then type 3 and press enter. |
|
159 |
- |
|
160 |
-Then we can see the following plot |
|
161 |
- |
|
162 |
- |
|
163 |
- |
|
164 |
-and a menu |
|
165 |
- |
|
166 |
-``` |
|
167 |
-1: NEXT |
|
168 |
-2: PREV |
|
169 |
-3: SELCT |
|
170 |
-``` |
|
171 |
- |
|
172 |
-This represent a distinction between two classes of BRCA cell lines (1 vs 2). |
|
173 |
-Since this one does not represent distinction we continue to type 1 and press |
|
174 |
-enter until we can see the ninth one as |
|
175 |
- |
|
176 |
- |
|
177 |
- |
|
178 |
-Since this one represents the distinction, we decided to select this one and |
|
179 |
-type 3 and press enter in the following menu. |
|
180 |
- |
|
181 |
-``` |
|
182 |
- |
|
183 |
-1: NEXT |
|
184 |
-2: PREV |
|
185 |
-3: SELCT |
|
186 |
-``` |
|
187 |
- |
|
188 |
-In order to place this vignettes in Bioconductor that does not allow |
|
189 |
-interactive input we write this function as |
|
190 |
-```{r, fig.keep = "none"} |
|
191 |
-input_all <- selectSingularValueVectorLarge(HOSVD,cond,input_all=c(2,9)) #Batch mode |
|
192 |
-``` |
|
193 |
-that works in batch mode. |
|
194 |
- |
|
195 |
-Next we select which genes' expression is altered by Cisplatin. |
|
196 |
-```{r, fig.keep = "none"} |
|
197 |
-index <- selectFeature(HOSVD,input_all,de=0.05) |
|
198 |
-``` |
|
199 |
- |
|
200 |
-You might need to specify suitable value for de which is initial value of |
|
201 |
-standard deviation. |
|
202 |
- |
|
203 |
-Then we get the following plot. |
|
204 |
- |
|
205 |
- |
|
206 |
- |
|
207 |
-Finally, list the genes selected as those associated with distinct expression. |
|
208 |
-```{r} |
|
209 |
-head(tableFeatures(Z,index)) |
|
210 |
-``` |
|
211 |
-```{r} |
|
212 |
-rm(Z) |
|
213 |
-rm(HOSVD) |
|
214 |
-detach("package:RTCGA.rnaseq") |
|
215 |
-rm(SVD) |
|
216 |
-``` |
|
217 |
-The described methods were frequently used in the studies[@Taguchi2017a] [@Taguchi2018] [@Taguchi2020] by maintainers. |
|
218 |
- |
|
219 |
-### Reduction of required memory using partial summation. |
|
220 |
- |
|
221 |
-In the case that there are large number of features, it is impossible to apply |
|
222 |
-HOSVD to a full tensor (Then we have reduced the size of tensor). |
|
223 |
-In this case, we apply SVD instead of HOSVD to matrix |
|
224 |
-generated from a tensor as follows. |
|
225 |
-In contrast to the above where only top two hundred features and top one hundred |
|
226 |
-samples are included, the following one includes all features and all samples since |
|
227 |
-it can save required memory because partial summation of features. |
|
228 |
-``` {r} |
|
229 |
-library(Biobase) |
|
230 |
-SVD <- computeSVD(exprs(expDrug),exprs(expDisease)) |
|
231 |
-Z <- t(exprs(expDrug)) %*% exprs(expDisease) |
|
232 |
-sample<- outer(colnames(expDrug),colnames(expDisease), |
|
233 |
- function(x,y){paste(x,y)}) |
|
234 |
-Z <- PrepareSummarizedExperimentTensor(sample=sample, |
|
235 |
- feature=rownames(expDrug),value=Z) |
|
236 |
-``` |
|
237 |
- |
|
238 |
-Nest select singular value vectors attributed to drugs and cell lines then |
|
239 |
-identify features associated with altered expression by treatment of |
|
240 |
-Cisplatin as well as distinction between two classes. Again, it included |
|
241 |
-all samples for expDrug and expDisease. |
|
242 |
-``` {r} |
|
243 |
-cond <- list(NULL,Cond[,colnames="Cisplatin"],rep(1:2,each=dim(SVD$SVD$v)[1]/2)) |
|
244 |
-``` |
|
245 |
-For interactive mode, one should do |
|
246 |
-``` |
|
247 |
-index_all <- selectFeatureRect(SVD,cond,de=c(0.01,0.01)) |
|
248 |
-``` |
|
249 |
-Again you need to select suitable de by trials and errors. |
|
250 |
- |
|
251 |
-Here we don't repeat whole processes, but please show the third one |
|
252 |
- |
|
253 |
- |
|
254 |
- |
|
255 |
-and type 3 to select this one and press enter. |
|
256 |
- |
|
257 |
-Then we can see the plot |
|
258 |
- |
|
259 |
- |
|
260 |
- |
|
261 |
-Press enter again to forward and we can see |
|
262 |
- |
|
263 |
- |
|
264 |
- |
|
265 |
- |
|
266 |
-then press enter to finalize the feature selection. |
|
267 |
- |
|
268 |
-Since Bioconductor does not allow interactive mode, in this vignettes, |
|
269 |
-we place the following. |
|
270 |
-```{r, fig.keep="none"} |
|
271 |
-index_all <- selectFeatureRect(SVD,cond,de=c(0.01,0.01), |
|
272 |
- input_all=3) #batch mode |
|
273 |
-``` |
|
274 |
-Then you can see selected features as |
|
275 |
-```{r} |
|
276 |
-head(tableFeatures(Z,index_all[[1]])) |
|
277 |
-head(tableFeatures(Z,index_all[[2]])) |
|
278 |
-``` |
|
279 |
-The upper one is for distinct expression between cell lines treated with |
|
280 |
-Cisplatin and other cell lines and the lower one is for distinct expression |
|
281 |
-between two classes of BRCA cell lines. |
|
282 |
- |
|
283 |
-Although they are highly coincident, not fully same ones (Row: expDrug, |
|
284 |
-column:expDisease). |
|
285 |
-```{r} |
|
286 |
-table(index_all[[1]]$index,index_all[[2]]$index) |
|
287 |
-``` |
|
288 |
- |
|
289 |
-Confusion matrix of features selected between expDrug and expDisease. |
|
290 |
- |
|
291 |
-The described methods were frequently used in the studies[@Taguchi2019a] by maintainers. |
|
292 |
- |
|
293 |
-```{r} |
|
294 |
-rm(Z) |
|
295 |
-rm(SVD) |
|
296 |
-``` |
|
297 |
- |
|
298 |
-## When samples are shared |
|
299 |
- |
|
300 |
-The above procedure can be used when two omics data that shares samples must be integrated. |
|
301 |
-Prepare data set as |
|
302 |
-```{r} |
|
303 |
-require(MOFAdata) |
|
304 |
-data("CLL_data") |
|
305 |
-data("CLL_covariates") |
|
306 |
-``` |
|
307 |
- |
|
308 |
-(see vignettes QuickStart in TDbasedUFE for more details about this |
|
309 |
-data set). |
|
310 |
-Generate tensor from matrix as in the above, but since not features but |
|
311 |
-samples are shared between two matrices, |
|
312 |
-the resulting Z has samples as features and features as samples, respectively. |
|
313 |
-```{r} |
|
314 |
-Z <- prepareTensorfromMatrix(t(CLL_data$Drugs[seq_len(200),seq_len(50)]), |
|
315 |
-t(CLL_data$Methylation[seq_len(200),seq_len(50)])) |
|
316 |
-require(TDbasedUFE) |
|
317 |
-Z <- PrepareSummarizedExperimentTensorRect( |
|
318 |
- sample=colnames(CLL_data$Drugs)[seq_len(50)], |
|
319 |
- feature=list(Drugs=rownames(CLL_data$Drugs)[seq_len(200)], |
|
320 |
- Methylatiion=rownames(CLL_data$Methylation)[seq_len(200)]), |
|
321 |
- sampleData=list(CLL_covariates[,1][seq_len(50)]), |
|
322 |
- value=Z) |
|
323 |
-``` |
|
324 |
- |
|
325 |
-HOSVD was applied to Z as |
|
326 |
-```{r} |
|
327 |
-HOSVD <- computeHosvd(Z) |
|
328 |
-``` |
|
329 |
- |
|
330 |
-```{r} |
|
331 |
-cond <- list(attr(Z,"sampleData")[[1]],NULL,NULL) |
|
332 |
-``` |
|
333 |
-Condition is distinction between male and female |
|
334 |
-(see QucikStart in TDbasedUFE package). |
|
335 |
-Then try to find singular value vectors distinct between male and female |
|
336 |
-in interactive mode. |
|
337 |
-``` |
|
338 |
-index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01)) |
|
339 |
-``` |
|
340 |
-Although we do not repeat the process, please select the eighth one as shown below |
|
341 |
- |
|
342 |
- |
|
343 |
- |
|
344 |
- |
|
345 |
- |
|
346 |
- |
|
347 |
- |
|
348 |
- |
|
349 |
- |
|
350 |
- |
|
351 |
- |
|
352 |
- |
|
353 |
- |
|
354 |
- |
|
355 |
-Since package does not allow interactive mode, we place here batch |
|
356 |
-mode as follows. |
|
357 |
-```{r, fig.keep="none"} |
|
358 |
-index_all <- selectFeatureTransRect(HOSVD,cond,de=c(0.01,0.01), |
|
359 |
- input_all=8) #batch mode |
|
360 |
-``` |
|
361 |
-```{r} |
|
362 |
-head(tableFeaturesSquare(Z,index_all,1)) |
|
363 |
-head(tableFeaturesSquare(Z,index_all,2)) |
|
364 |
-``` |
|
365 |
- |
|
366 |
-This method was used in the studies[@Taguchi2019] by the maintainer. |
|
367 |
- |
|
368 |
-### Reduction of required memory using partial summation. |
|
369 |
- |
|
370 |
-As in the case where two omics profiles share features, in the case where two |
|
371 |
-omics data share the samples, we can also take an alternative approach where |
|
372 |
-SVD is applied to an matrix generated from a tensor by taking partial summation. |
|
373 |
-```{r} |
|
374 |
-SVD <- computeSVD(t(CLL_data$Drugs),t(CLL_data$Methylation)) |
|
375 |
-Z <-CLL_data$Drugs %*%t(CLL_data$Methylation) |
|
376 |
-sample<- colnames(CLL_data$Methylation) |
|
377 |
-Z <- PrepareSummarizedExperimentTensorRect(sample=sample, |
|
378 |
- feature=list(rownames(CLL_data$Drugs),rownames(CLL_data$Methylation)), |
|
379 |
- value=array(NA,dim(Z)),sampleData=list(CLL_covariates[,1])) |
|
380 |
-``` |
|
381 |
-Condition is also distinction between male (m) and female (f). |
|
382 |
-```{r} |
|
383 |
-cond <- list(NULL,attr(Z,"sampleData")[[1]],attr(Z,"sampleData")[[1]]) |
|
384 |
-``` |
|
385 |
-In order to apply the previous function to SVD, we exchange feature singular |
|
386 |
-value vectors with sample singular value vectors. |
|
387 |
-```{r} |
|
388 |
-SVD <- transSVD(SVD) |
|
389 |
-``` |
|
390 |
-Then try to find which sample singular value vectors should be selected and |
|
391 |
-which features are selected based upon feature singular value vectors |
|
392 |
-corresponding to selected sample feature vectors. We can activate |
|
393 |
-selectFeatureRect in interactive mode as well. |
|
394 |
-``` |
|
395 |
-index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5)) |
|
396 |
-``` |
|
397 |
-Although I do not intend to repeat whole process, we decided to select the |
|
398 |
-sixth singular value vectors which are some what distinct between male |
|
399 |
-and female. |
|
400 |
- |
|
401 |
- |
|
402 |
- |
|
403 |
- |
|
404 |
- |
|
405 |
-Pressing enter we can get these two plots as well. |
|
406 |
- |
|
407 |
- |
|
408 |
- |
|
409 |
- |
|
410 |
- |
|
411 |
- |
|
412 |
- |
|
413 |
- |
|
414 |
- |
|
415 |
- |
|
416 |
- |
|
417 |
-Since package does not allow us interactive mode, we place here batch mode. |
|
418 |
-```{r, fig.keep='none'} |
|
419 |
-index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) #batch mode |
|
420 |
-``` |
|
421 |
-Then we can list the Drugs and Methylation sites selected as being distinct |
|
422 |
-between male and female. |
|
423 |
- |
|
424 |
-```{r} |
|
425 |
-head(tableFeaturesSquare(Z,index_all,1)) |
|
426 |
-head(tableFeaturesSquare(Z,index_all,2)) |
|
427 |
-``` |
|
428 |
- |
|
429 |
-This method was used in many studies[@Taguchi2018a] [@Taguchi2020a] by maintainer. |
|
430 |
- |
|
431 |
-```{r} |
|
432 |
-sessionInfo() |
|
433 |
-``` |
|
434 | 0 |
\ No newline at end of file |
435 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,184 +0,0 @@ |
1 |
-title: "QuickStart2" |
|
2 |
-author: |
|
3 |
-- name: Y-h. Taguchi |
|
4 |
- affiliation: Department of Physics, Chuo University, Tokyo 112-8551, Japan |
|
5 |
- email: [email protected] |
|
6 |
-output: |
|
7 |
- BiocStyle::html_document: |
|
8 |
- toc: true |
|
9 |
-bibliography: references.bib |
|
10 |
-vignette: > |
|
11 |
- %\VignetteIndexEntry{QuickStart2} |
|
12 |
- %\VignetteEngine{knitr::rmarkdown} |
|
13 |
- %\VignetteEncoding{UTF-8} |
|
14 |
- |
|
15 |
-```{r style, echo = FALSE, results = 'asis'} |
|
16 |
-BiocStyle::markdown() |
|
17 |
-``` |
|
18 |
- |
|
19 |
-```{r, include = FALSE} |
|
20 |
-knitr::opts_chunk$set( |
|
21 |
- collapse = TRUE, |
|
22 |
- comment = "#>" |
|
23 |
-) |
|
24 |
-``` |
|
25 |
- |
|
26 |
-```{r setup} |
|
27 |
-library(TDbasedUFEadv) |
|
28 |
-library(TDbasedUFE) |
|
29 |
-``` |
|
30 |
-# Introduction |
|
31 |
- |
|
32 |
-Since TDbasedUFEadv is an advanced package from TDbasedUFE, please master the |
|
33 |
-contents in TDbasedUFE prior to the trial of this package. And this is the second QuickStart. |
|
34 |
-Please master Quickstart before trying this one, QuickStart2. Here is a flowchart |
|
35 |
-how we can make use of individual functions in TDbasedUFE and TDbasedUFEadv. |
|
36 |
- |
|
37 |
- |
|
38 |
- |
|
39 |
-# Integrated analysis of multiple omics data |
|
40 |
- |
|
41 |
-## When samples are shared |
|
42 |
- |
|
43 |
-As an alternative approach that can integrate multiple omics that share sample, |
|
44 |
-we propose the method that makes use of projection provided by SVD. |
|
45 |
- |
|
46 |
-We prepare a tensor that is a bundle of the first ten singular value vectors |
|
47 |
-generated by applying SVD to individual omics profiles. |
|
48 |
- |
|
49 |
-```{r} |
|
50 |
-require(MOFAdata) |
|
51 |
-data("CLL_data") |
|
52 |
-data("CLL_covariates") |
|
53 |
-Z <- prepareTensorfromList(CLL_data,10) |
|
54 |
-Z <- PrepareSummarizedExperimentTensor(feature = character("1"), |
|
55 |
- sample=array(colnames(CLL_data$Drugs),1),value=Z, |
|
56 |
- sampleData=list(CLL_covariates[,1])) |
|
57 |
-``` |
|
58 |
-Then HOSVD was applied to a tensor |
|
59 |
-```{r} |
|
60 |
-HOSVD <- computeHosvd(Z,scale=FALSE) |
|
61 |
-``` |
|
62 |
-Next we select singular value vectors attributed to samples. |
|
63 |
-In order to select those distinct between male (m) and female (f), |
|
64 |
-we set conditions as |
|
65 |
-```{r} |
|
66 |
-cond <- list(NULL,attr(Z,"sampleData")[[1]],seq_len(4)) |
|
67 |
-``` |
|
68 |
-Interactive more can be activated as |
|
69 |
-``` |
|
70 |
-input_all <- selectSingularValueVectorLarge(HOSVD,cond) |
|
71 |
-``` |
|
72 |
- |
|
73 |
-Although we do not intend to repeat how to use menu in interactive mode, please |
|
74 |
-select the 12th one and the third one shown in below. |
|
75 |
- |
|
76 |
- |
|
77 |
- |
|
78 |
- |
|
79 |
-But here in order to include TDbasedUFEadv into package, we are forced to |
|
80 |
-execute function as batch mode as |
|
81 |
-```{r} |
|
82 |
-input_all <- selectSingularValueVectorLarge(HOSVD,cond,input_all=c(12,1)) |
|
83 |
-``` |
|
84 |
-Finally, we perform the following function to select features in individual |
|
85 |
-omics profiles in an iterative mode |
|
86 |
-``` |
|
87 |
- |
|
88 |
-index_all <- selectFeatureSquare(HOSVD,input_all,CLL_data, |
|
89 |
- de=c(0.5,0.1,0.1,1)) |
|
90 |
-``` |
|
91 |
-and we can see the following four plots (to proceed in an interactive mode, simply press enter). |
|
92 |
- |
|
93 |
- |
|
94 |
- |
|
95 |
- |
|
96 |
- |
|
97 |
-But packaging does not allow interactive mode, we place batch mode in this vignettes. |
|
98 |
- |
|
99 |
-``` {r, fig.keep="none"} |
|
100 |
-HOSVD$U[[1]] <- HOSVD$U[[2]] #selectFeatureSquareのHOSVD$U[[1]]を[[2]]にすればこれは不要 |
|
101 |
-index_all <- selectFeatureSquare(HOSVD,input_all,CLL_data, |
|
102 |
- de=c(0.5,0.1,0.1,1),interact=FALSE) #Batch mode |
|
103 |
-``` |
|
104 |
-Finally, we list the selected features for four omics profiles that share samples. |
|
105 |
- |
|
106 |
-```{r} |
|
107 |
-for (id in c(1:4)) |
|
108 |
-{ |
|
109 |
- attr(Z,"feature") <- rownames(CLL_data[[id]]) |
|
110 |
- print(tableFeatures(Z,index_all[[id]])) |
|
111 |
-} |
|
112 |
-``` |
|
113 |
- |
|
114 |
-This method was used in many studies[@Taguchi2022] by maintainer. |
|
115 |
- |
|
116 |
- |
|
117 |
-## When features are shared |
|
118 |
- |
|
119 |
-Now we discuss what to do when multiple omics data share not samples but |
|
120 |
-features. We prepare data set from RTCGA.rnaseq as follows, with retrieving |
|
121 |
-reduced partial sets from four ones. One should notice that RTCGA is an old |
|
122 |
-package from TCGA (as for 2015). I used it only for demonstration purpose. |
|
123 |
-If you would like to use TCGA for your research, I recommend you to use |
|
124 |
-more modern packages, e.g., curatedTCGAData in Bioconductor. |
|
125 |
-```{r} |
|
126 |
-require(RTCGA.rnaseq) |
|
127 |
-Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
128 |
- BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
129 |
- CESC.rnaseq[seq_len(100),1+seq_len(1000)], |
|
130 |
- COAD.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
131 |
-``` |
|
132 |
-Multi includes four objects, each of which is matrix that represent 100 samples (rows) and 1000 (features). Please note it is different from usual cases where columns and rows are features and samples, respectively. They are marge into tensor as follows |
|
133 |
-```{r} |
|
134 |
-Z <- prepareTensorfromList(Multi,10) |
|
135 |
-Z <- aperm(Z,c(2,1,3)) |
|
136 |
-``` |
|
137 |
-The function, prepareTeansorfromList which was used in the previous subsection where samples are shared, can be used as it is. However, the first and second modes of a tensor must be exchanged by aperm function for the latter analyses, because of the difference as mentioned in the above. Then tensor object associated with various information is generated as usual as follows and HOSVD was applied to it. |
|
138 |
-``` {r} |
|
139 |
-require(RTCGA.clinical) |
|
140 |
-Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
141 |
-Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
142 |
- BRCA.rnaseq[seq_len(100),1,drop=FALSE], |
|
143 |
- CESC.rnaseq[seq_len(100),1,drop=FALSE], |
|
144 |
- COAD.rnaseq[seq_len(100),1,drop=FALSE]) |
|
145 |
-#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
146 |
-k <- c(770,1482,773,791) |
|
147 |
-#patient.bcr_patient_barcode |
|
148 |
-j <- c(20,20,12,14) |
|
149 |
-Z <- PrepareSummarizedExperimentTensor( |
|
150 |
- feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
|
151 |
- sample=array("",1),value=Z, |
|
152 |
- sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j)) |
|
153 |
-HOSVD <- computeHosvd(Z) |
|
154 |
-``` |
|
155 |
-In order to see which singular value vectors attributed to samples are used for the selection of singular value vectors attributed to features, we need to assign sample conditions. |
|
156 |
-```{r} |
|
157 |
-cond<- attr(Z,"sampleData") |
|
158 |
-``` |
|
159 |
-Then perform this |
|
160 |
-``` |
|
161 |
-par(mai=c(0.3,0.2,0.2,0.2)) |
|
162 |
-index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3) |
|
163 |
-``` |
|
164 |
-Although we do not intend to explain how to use menu interactively, |
|
165 |
-we select the third singular value vectors as shown in below |
|
166 |
- |
|
167 |
- |
|
168 |
-and we get the following plot, too. |
|
169 |
- |
|
170 |
- |
|
171 |
-Since package does not allow us to include interactive mode, we place here batch mode as follows. |
|
172 |
-Finally, selected feature are listed as follows. |
|
173 |
-``` {r, fig.keep="none"} |
|
174 |
-index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode |
|
175 |
-head(tableFeatures(Z,index)) |
|
176 |
-``` |
|
177 |
- |
|
178 |
-This method was used in many studies[@Taguchi2021] by maintainer. |
|
179 |
- |
|
180 |
-```{r} |
|
181 |
-sessionInfo() |
|
182 |
-``` |
... | ... |
@@ -1,5 +1,19 @@ |
1 |
- |
|
2 |
- |
|
1 |
+@Manual{TDbasedUFE, |
|
2 |
+ title = {TDbasedUFE: Tensor Decomposition Bassed Unsupervised Feature Extraction}, |
|
3 |
+ author = {Taguchi, Y-H.}, |
|
4 |
+ year = {2023}, |
|
5 |
+ note = {R package version 0.99.0}, |
|
6 |
+ url = {https://github.com/tagtag/TDbasedUFE}, |
|
7 |
+ } |
|
8 |
+ |
|
9 |
+@book{Taguchibook, |
|
10 |
+ doi = {10.1007/978-3-030-22456-1}, |
|
11 |
+ url = {https://doi.org/10.1007/978-3-030-22456-1}, |
|
12 |
+ year = {2020}, |
|
13 |
+ publisher = {Springer International Publishing}, |
|
14 |
+ author = {Y-H. Taguchi}, |
|
15 |
+ title = {Unsupervised Feature Extraction Applied to Bioinformatics} |
|
16 |
+} |
|
3 | 17 |
@article{Metascape, |
4 | 18 |
year = {2019}, |
5 | 19 |
title = {{Metascape provides a biologist-oriented resource for the analysis of systems-level datasets}}, |
... | ... |
@@ -48,7 +62,7 @@ volume = {10} |
48 | 62 |
@article{Taguchi2022, |
49 | 63 |
year = {2022}, |
50 | 64 |
title = {{A tensor decomposition-based integrated analysis applicable to multiple gene expression profiles without sample matching}}, |
51 |
-author = {Taguchi, Y-h and Turki, Turki}, |
|
65 |
+author = {Taguchi, Y-H. and Turki, Turki}, |
|
52 | 66 |
journal = {Scientific Reports}, |
53 | 67 |
doi = {10.1038/s41598-022-25524-4}, |
54 | 68 |
abstract = {{The integrated analysis of multiple gene expression profiles previously measured in distinct studies is problematic since missing both sample matches and common labels prevent their integration in fully data-driven, unsupervised training. In this study, we propose a strategy to enable the integration of multiple gene expression profiles among multiple independent studies with neither labeling nor sample matching using tensor decomposition unsupervised feature extraction. We apply this strategy to Alzheimer窶冱 disease (AD)-related gene expression profiles that lack precise correspondence among samples, including AD single-cell RNA sequence (scRNA-seq) data. We were able to select biologically reasonable genes using the integrated analysis. Overall, integrated gene expression profiles can function analogously to prior- and/or transfer-learning strategies in other machine-learning applications. For scRNA-seq, the proposed approach significantly reduces the required computational memory.}}, |
... | ... |
@@ -57,7 +71,7 @@ number = {1}, |
57 | 71 |
volume = {12} |
58 | 72 |
} |
59 | 73 |
@Article{Taguchi2021, |
60 |
-AUTHOR = {Taguchi, Y-h. and Turki, Turki}, |
|
74 |
+AUTHOR = {Taguchi, Y-H. and Turki, Turki}, |
|
61 | 75 |
TITLE = {Tensor-Decomposition-Based Unsupervised Feature Extraction in Single-Cell Multiomics Data Analysis}, |
62 | 76 |
JOURNAL = {Genes}, |
63 | 77 |
VOLUME = {12}, |
... | ... |
@@ -86,7 +100,7 @@ volume = {10} |
86 | 100 |
} |
87 | 101 |
@ARTICLE{Taguchi2020, |
88 | 102 |
|
89 |
-AUTHOR={Taguchi, Yh. and Turki, Turki}, |
|
103 |
+AUTHOR={Taguchi, Y-H. and Turki, Turki}, |
|
90 | 104 |
|
91 | 105 |
TITLE={Universal Nature of Drug Treatment Responses in Drug-Tissue-Wide Model-Animal Experiments Using Tensor Decomposition-Based Unsupervised Feature Extraction}, |
92 | 106 |
|
... | ... |
@@ -106,7 +120,7 @@ ABSTRACT={Gene expression profiles of tissues treated with drugs have recently b |
106 | 120 |
} |
107 | 121 |
@ARTICLE{Taguchi2019a, |
108 | 122 |
|
109 |
-AUTHOR={Taguchi, Y-h. and Turki, Turki}, |
|
123 |
+AUTHOR={Taguchi, Y-H. and Turki, Turki}, |
|
110 | 124 |
|
111 | 125 |
TITLE={Tensor Decomposition-Based Unsupervised Feature Extraction Applied to Single-Cell Gene Expression Analysis}, |
112 | 126 |
|