9 | 10 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
1 |
+#' Title Prepare Sample label for TCGA data |
|
2 |
+#' |
|
3 |
+#' @param Multi_sample list of sample ids |
|
4 |
+#' @param Clinical List of clinical data matrix from RTCGA.clinical |
|
5 |
+#' @param k Column numbers used for conditions |
|
6 |
+#' @param j Column numbers that include corresponding sample ids |
|
7 |
+#' in clinical data |
|
8 |
+#' |
|
9 |
+#' @return list of sample labels |
|
10 |
+#' @export |
|
11 |
+#' |
|
12 |
+#' @examples |
|
13 |
+#' require(RTCGA.clinical) |
|
14 |
+#' require(RTCGA.rnaseq) |
|
15 |
+#' Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
16 |
+#' Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F], |
|
17 |
+#' BRCA.rnaseq[seq_len(100),1,drop=F], |
|
18 |
+#' CESC.rnaseq[seq_len(100),1,drop=F], |
|
19 |
+#' COAD.rnaseq[seq_len(100),1,drop=F]) |
|
20 |
+#' k <- c(770,1482,773,791) |
|
21 |
+#' j <- c(20,20,12,14) |
|
22 |
+#' cond <- prepareCondTCGA(Multi_sample,Clinical,k,j) |
|
23 |
+prepareCondTCGA <- function(Multi_sample,Clinical,k,j) |
|
24 |
+{ |
|
25 |
+ Cond <- rep(list(NA),length(Multi_sample)) |
|
26 |
+ for (i in seq_len(length(Multi_sample))) |
|
27 |
+ { |
|
28 |
+ index <- match(tolower(substring(Multi_sample[[i]][,1],1,12)), |
|
29 |
+ Clinical[[i]][,j[i]]) |
|
30 |
+ Cond[[i]]<- Clinical[[i]][index,k[i]] |
|
31 |
+ } |
|
32 |
+ return(Cond) |
|
33 |
+} |
|
0 | 34 |
\ No newline at end of file |
1 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/prepareCondTCGA.R |
|
3 |
+\name{prepareCondTCGA} |
|
4 |
+\alias{prepareCondTCGA} |
|
5 |
+\title{Title Prepare Sample label for TCGA data} |
|
6 |
+\usage{ |
|
7 |
+prepareCondTCGA(Multi_sample, Clinical, k, j) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{Multi_sample}{list of sample ids} |
|
11 |
+ |
|
12 |
+\item{Clinical}{List of clinical data matrix from RTCGA.clinical} |
|
13 |
+ |
|
14 |
+\item{k}{Column numbers used for conditions} |
|
15 |
+ |
|
16 |
+\item{j}{Column numbers that include corresponding sample ids |
|
17 |
+in clinical data} |
|
18 |
+} |
|
19 |
+\value{ |
|
20 |
+list of sample labels |
|
21 |
+} |
|
22 |
+\description{ |
|
23 |
+Title Prepare Sample label for TCGA data |
|
24 |
+} |
|
25 |
+\examples{ |
|
26 |
+require(RTCGA.clinical) |
|
27 |
+require(RTCGA.rnaseq) |
|
28 |
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
29 |
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F], |
|
30 |
+ BRCA.rnaseq[seq_len(100),1,drop=F], |
|
31 |
+ CESC.rnaseq[seq_len(100),1,drop=F], |
|
32 |
+ COAD.rnaseq[seq_len(100),1,drop=F]) |
|
33 |
+k <- c(770,1482,773,791) |
|
34 |
+j <- c(20,20,12,14) |
|
35 |
+cond <- prepareCondTCGA(Multi_sample,Clinical,k,j) |
|
36 |
+} |
... | ... |
@@ -39,17 +39,28 @@ In order foe this, we reproduce one exmaple in QuickStart2 as follows. |
39 | 39 |
|
40 | 40 |
``` {r} |
41 | 41 |
require(RTCGA.rnaseq) |
42 |
-Multi <- list(ACC.rnaseq[seq_len(100),1+seq_len(1000)], |
|
43 |
- BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
44 |
- BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
45 |
- CESC.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
42 |
+Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
43 |
+ BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
44 |
+ CESC.rnaseq[seq_len(100),1+seq_len(1000)], |
|
45 |
+ COAD.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
46 | 46 |
Z <- prepareTensorfromList(Multi,10) |
47 | 47 |
Z <- aperm(Z,c(2,1,3)) |
48 |
+require(RTCGA.clinical) |
|
49 |
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
50 |
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F], |
|
51 |
+ BRCA.rnaseq[seq_len(100),1,drop=F], |
|
52 |
+ CESC.rnaseq[seq_len(100),1,drop=F], |
|
53 |
+ COAD.rnaseq[seq_len(100),1,drop=F]) |
|
54 |
+#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
55 |
+k <- c(770,1482,773,791) |
|
56 |
+#patient.bcr_patient_barcode |
|
57 |
+j <- c(20,20,12,14) |
|
48 | 58 |
Z <- PrepareSummarizedExperimentTensor( |
49 | 59 |
feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
50 |
- sample=array("",1),value=Z) |
|
60 |
+ sample=array("",1),value=Z, |
|
61 |
+ sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j)) |
|
51 | 62 |
HOSVD <- computeHosvd(Z) |
52 |
-cond<- rep(list(rep(seq_len(2),each=50)),4) |
|
63 |
+cond<- attr(Z,"sampleData") |
|
53 | 64 |
index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=2) #Batch mode |
54 | 65 |
head(tableFeatures(Z,index)) |
55 | 66 |
genes <-unlist(lapply(strsplit(tableFeatures(Z,index)[,1],"|",fixed=T),"[",1)) |
... | ... |
@@ -394,7 +394,7 @@ Pressing enter we can get these two plots as well. |
394 | 394 |
|
395 | 395 |
Since package does not allow us interactive mode, we place here bacth mode. |
396 | 396 |
```{r, fig.keep='none'} |
397 |
-index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) |
|
397 |
+index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) #batch mode |
|
398 | 398 |
``` |
399 | 399 |
Then we can list the Drugs and Methylation sites selected as being distinct |
400 | 400 |
between male and female. |
... | ... |
@@ -122,8 +122,10 @@ Now we discuss what to do when multiple omics data share not samples but feature |
122 | 122 |
We prepare data set from RTCGA.rnaseq as follows, with retriieving reduced pertial sets from four ones. |
123 | 123 |
```{r} |
124 | 124 |
require(RTCGA.rnaseq) |
125 |
-Multi <- list(ACC.rnaseq[seq_len(100),1+seq_len(1000)],BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
126 |
- BRCA.rnaseq[seq_len(100),1+seq_len(1000)],CESC.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
125 |
+Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
126 |
+ BRCA.rnaseq[seq_len(100),1+seq_len(1000)], |
|
127 |
+ CESC.rnaseq[seq_len(100),1+seq_len(1000)], |
|
128 |
+ COAD.rnaseq[seq_len(100),1+seq_len(1000)]) |
|
127 | 129 |
``` |
128 | 130 |
Multi includes four objects, each of which is matrix that represent 100 samples (rows) and 1000 (featuers). Please note it is different from usual cases where columns and rows are features and samples, respectrively. They are marged into tensor as follows |
129 | 131 |
```{r} |
... | ... |
@@ -132,22 +134,33 @@ Z <- aperm(Z,c(2,1,3)) |
132 | 134 |
``` |
133 | 135 |
The function, prepareTeansorfromList which was used in the previous subsection where samples are shared, can be used as it is. However, the first and second modes of a tensor must be exchanged by aperm function for the latter analyses, because of the difference as mentioned in the above. Then tensor object associated with various information is generated as usual as follows and HOSVD was applied to it. |
134 | 136 |
``` {r} |
135 |
-Z <- PrepareSummarizedExperimentTensor(feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
|
136 |
- sample=array("",1),value=Z) |
|
137 |
+require(RTCGA.clinical) |
|
138 |
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical) |
|
139 |
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F], |
|
140 |
+ BRCA.rnaseq[seq_len(100),1,drop=F], |
|
141 |
+ CESC.rnaseq[seq_len(100),1,drop=F], |
|
142 |
+ COAD.rnaseq[seq_len(100),1,drop=F]) |
|
143 |
+#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m |
|
144 |
+k <- c(770,1482,773,791) |
|
145 |
+#patient.bcr_patient_barcode |
|
146 |
+j <- c(20,20,12,14) |
|
147 |
+Z <- PrepareSummarizedExperimentTensor( |
|
148 |
+ feature =colnames(ACC.rnaseq)[1+seq_len(1000)], |
|
149 |
+ sample=array("",1),value=Z, |
|
150 |
+ sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j)) |
|
137 | 151 |
HOSVD <- computeHosvd(Z) |
138 | 152 |
``` |
139 |
-In order to see which sibgular value vectors attributed to samples are used for the selection of signular value vectors attributed to features, we need to assign sample conditions. |
|
140 |
-Since we do not have any information about samples, we simply assume that they are devided into half and half as follows. |
|
153 |
+In order to see which singular value vectors attributed to samples are used for the selection of singular value vectors attributed to features, we need to assign sample conditions. |
|
141 | 154 |
```{r} |
142 |
-cond<- rep(list(rep(seq_len(2),each=50)),4) |
|
155 |
+cond<- attr(Z,"sampleData") |
|
143 | 156 |
``` |
144 | 157 |
Then perform this |
145 | 158 |
``` |
146 |
-par(mai=c(0.1,0.2,0.2,0.2)) |
|
159 |
+par(mai=c(0.3,0.2,0.2,0.2)) |
|
147 | 160 |
index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3) |
148 | 161 |
``` |
149 | 162 |
Although we do not intend to explain how to use menu interactively, |
150 |
-we select the second singular value vectors as shown in below |
|
163 |
+we select the third singular value vectors as shown in below |
|
151 | 164 |
|
152 | 165 |
 |
153 | 166 |
and we get the following plot, too. |
... | ... |
@@ -156,7 +169,7 @@ and we get the following plot, too. |
156 | 169 |
Since package does not allow us to include inteartive mode, we place here batch mode as follows. |
157 | 170 |
Finally, selected feature are listed as follows. |
158 | 171 |
``` {r, fig.keep="none"} |
159 |
-index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=2) #Batch mode |
|
172 |
+index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode |
|
160 | 173 |
head(tableFeatures(Z,index)) |
161 | 174 |
``` |
162 | 175 |
|