Browse code

Modification due to the usage of RTCGA.clinical

tagtag authored on 16/02/2023 06:54:26
Showing 9 changed files

... ...
@@ -38,7 +38,7 @@ Imports:
38 38
     RITAN,
39 39
     STRINGdb,
40 40
     enrichR
41
-RoxygenNote: 7.2.0
41
+RoxygenNote: 7.2.3
42 42
 Suggests: 
43 43
     knitr,
44 44
     rmarkdown,
... ...
@@ -3,6 +3,7 @@
3 3
 export(PrepareSummarizedExperimentTensorRect)
4 4
 export(computeSVD)
5 5
 export(prepareCondDrugandDisease)
6
+export(prepareCondTCGA)
6 7
 export(prepareTensorfromList)
7 8
 export(prepareTensorfromMatrix)
8 9
 export(prepareexpDrugandDisease)
9 10
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+#' Title Prepare Sample label for TCGA data
2
+#'
3
+#' @param Multi_sample list of sample ids
4
+#' @param Clinical List of clinical data matrix from RTCGA.clinical
5
+#' @param k Column numbers used for conditions
6
+#' @param j Column numbers that include corresponding sample ids 
7
+#' in clinical data
8
+#'
9
+#' @return list of sample labels
10
+#' @export
11
+#'
12
+#' @examples
13
+#' require(RTCGA.clinical)
14
+#' require(RTCGA.rnaseq)
15
+#' Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical)
16
+#' Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F],
17
+#'                     BRCA.rnaseq[seq_len(100),1,drop=F],
18
+#'                     CESC.rnaseq[seq_len(100),1,drop=F],
19
+#'                     COAD.rnaseq[seq_len(100),1,drop=F])
20
+#' k <- c(770,1482,773,791)
21
+#' j <- c(20,20,12,14)
22
+#' cond <- prepareCondTCGA(Multi_sample,Clinical,k,j)
23
+prepareCondTCGA <- function(Multi_sample,Clinical,k,j)
24
+{
25
+    Cond <- rep(list(NA),length(Multi_sample))
26
+    for (i in seq_len(length(Multi_sample)))
27
+    {
28
+        index <- match(tolower(substring(Multi_sample[[i]][,1],1,12)),
29
+                       Clinical[[i]][,j[i]])
30
+        Cond[[i]]<- Clinical[[i]][index,k[i]]
31
+    }
32
+    return(Cond)
33
+}
0 34
\ No newline at end of file
1 35
new file mode 100644
... ...
@@ -0,0 +1,36 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/prepareCondTCGA.R
3
+\name{prepareCondTCGA}
4
+\alias{prepareCondTCGA}
5
+\title{Title Prepare Sample label for TCGA data}
6
+\usage{
7
+prepareCondTCGA(Multi_sample, Clinical, k, j)
8
+}
9
+\arguments{
10
+\item{Multi_sample}{list of sample ids}
11
+
12
+\item{Clinical}{List of clinical data matrix from RTCGA.clinical}
13
+
14
+\item{k}{Column numbers used for conditions}
15
+
16
+\item{j}{Column numbers that include corresponding sample ids 
17
+in clinical data}
18
+}
19
+\value{
20
+list of sample labels
21
+}
22
+\description{
23
+Title Prepare Sample label for TCGA data
24
+}
25
+\examples{
26
+require(RTCGA.clinical)
27
+require(RTCGA.rnaseq)
28
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical)
29
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F],
30
+                    BRCA.rnaseq[seq_len(100),1,drop=F],
31
+                    CESC.rnaseq[seq_len(100),1,drop=F],
32
+                    COAD.rnaseq[seq_len(100),1,drop=F])
33
+k <- c(770,1482,773,791)
34
+j <- c(20,20,12,14)
35
+cond <- prepareCondTCGA(Multi_sample,Clinical,k,j)
36
+}
... ...
@@ -39,17 +39,28 @@ In order foe this, we reproduce one exmaple in QuickStart2 as follows.
39 39
 
40 40
 ``` {r}
41 41
 require(RTCGA.rnaseq)
42
-Multi <- list(ACC.rnaseq[seq_len(100),1+seq_len(1000)],
43
-             BLCA.rnaseq[seq_len(100),1+seq_len(1000)],
44
-             BRCA.rnaseq[seq_len(100),1+seq_len(1000)],
45
-             CESC.rnaseq[seq_len(100),1+seq_len(1000)])
42
+Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)],
43
+              BRCA.rnaseq[seq_len(100),1+seq_len(1000)],
44
+              CESC.rnaseq[seq_len(100),1+seq_len(1000)],
45
+              COAD.rnaseq[seq_len(100),1+seq_len(1000)])
46 46
 Z <- prepareTensorfromList(Multi,10)
47 47
 Z <- aperm(Z,c(2,1,3))
48
+require(RTCGA.clinical)
49
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical)
50
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F],
51
+              BRCA.rnaseq[seq_len(100),1,drop=F],
52
+              CESC.rnaseq[seq_len(100),1,drop=F],
53
+              COAD.rnaseq[seq_len(100),1,drop=F])
54
+#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m
55
+k <- c(770,1482,773,791)
56
+#patient.bcr_patient_barcode
57
+j <- c(20,20,12,14)
48 58
 Z <- PrepareSummarizedExperimentTensor(
49 59
     feature =colnames(ACC.rnaseq)[1+seq_len(1000)],
50
-                                       sample=array("",1),value=Z)
60
+    sample=array("",1),value=Z,
61
+    sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j))
51 62
 HOSVD <- computeHosvd(Z)
52
-cond<- rep(list(rep(seq_len(2),each=50)),4)
63
+cond<- attr(Z,"sampleData")
53 64
 index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=2) #Batch mode
54 65
 head(tableFeatures(Z,index))
55 66
 genes <-unlist(lapply(strsplit(tableFeatures(Z,index)[,1],"|",fixed=T),"[",1))
... ...
@@ -394,7 +394,7 @@ Pressing enter we can get these two plots as well.
394 394
 
395 395
 Since package does not allow us interactive mode, we place here bacth mode.
396 396
 ```{r, fig.keep='none'}
397
-index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6)
397
+index_all <- selectFeatureRect(SVD,cond,de=c(0.5,0.5),input_all=6) #batch mode
398 398
 ```
399 399
 Then we can list the Drugs and Methylation sites selected as being distinct 
400 400
 between male and female.
... ...
@@ -122,8 +122,10 @@ Now we discuss what to do when multiple omics data share not samples but feature
122 122
 We prepare data set from RTCGA.rnaseq as follows, with retriieving reduced pertial sets from four ones.
123 123
 ```{r}
124 124
 require(RTCGA.rnaseq)
125
-Multi <- list(ACC.rnaseq[seq_len(100),1+seq_len(1000)],BLCA.rnaseq[seq_len(100),1+seq_len(1000)],
126
-             BRCA.rnaseq[seq_len(100),1+seq_len(1000)],CESC.rnaseq[seq_len(100),1+seq_len(1000)])
125
+Multi <- list(BLCA.rnaseq[seq_len(100),1+seq_len(1000)],
126
+              BRCA.rnaseq[seq_len(100),1+seq_len(1000)],
127
+              CESC.rnaseq[seq_len(100),1+seq_len(1000)],
128
+              COAD.rnaseq[seq_len(100),1+seq_len(1000)])
127 129
 ```
128 130
 Multi includes four objects, each of which is matrix that represent 100 samples (rows) and 1000 (featuers). Please note it is different from usual cases where columns and rows are features and samples, respectrively. They are marged into tensor as follows
129 131
 ```{r}
... ...
@@ -132,22 +134,33 @@ Z <- aperm(Z,c(2,1,3))
132 134
 ```
133 135
 The function, prepareTeansorfromList which was used in the previous subsection where samples are shared, can be used as it is. However, the first and second modes of a tensor must be exchanged by aperm function for the latter analyses, because of the difference as mentioned in the above. Then tensor object associated with various information is generated as usual as follows and HOSVD was applied to it. 
134 136
 ``` {r}
135
-Z <- PrepareSummarizedExperimentTensor(feature =colnames(ACC.rnaseq)[1+seq_len(1000)],
136
-                                       sample=array("",1),value=Z)
137
+require(RTCGA.clinical)
138
+Clinical <- list(BLCA.clinical,BRCA.clinical,CESC.clinical,COAD.clinical)
139
+Multi_sample <- list(BLCA.rnaseq[seq_len(100),1,drop=F],
140
+              BRCA.rnaseq[seq_len(100),1,drop=F],
141
+              CESC.rnaseq[seq_len(100),1,drop=F],
142
+              COAD.rnaseq[seq_len(100),1,drop=F])
143
+#patient.stage_event.tnm_categories.pathologic_categories.pathologic_m
144
+k <- c(770,1482,773,791)
145
+#patient.bcr_patient_barcode
146
+j <- c(20,20,12,14)
147
+Z <- PrepareSummarizedExperimentTensor(
148
+    feature =colnames(ACC.rnaseq)[1+seq_len(1000)],
149
+    sample=array("",1),value=Z,
150
+    sampleData=prepareCondTCGA(Multi_sample,Clinical,k,j))
137 151
 HOSVD <- computeHosvd(Z)
138 152
 ```
139
-In order to see which sibgular value vectors attributed to samples are used for the selection of signular value vectors attributed to features, we need to assign sample conditions.
140
-Since we do not have any information about samples, we simply assume that they are devided into half and half as follows.
153
+In order to see which singular value vectors attributed to samples are used for the selection of singular value vectors attributed to features, we need to assign sample conditions.
141 154
 ```{r}
142
-cond<- rep(list(rep(seq_len(2),each=50)),4)
155
+cond<- attr(Z,"sampleData")
143 156
 ```
144 157
 Then perform this 
145 158
 ```
146
-par(mai=c(0.1,0.2,0.2,0.2))
159
+par(mai=c(0.3,0.2,0.2,0.2))
147 160
 index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3)
148 161
 ```
149 162
 Although we do not intend to explain how to use menu interactively, 
150
-we select the second singular value vectors as shown in below
163
+we select the third singular value vectors as shown in below
151 164
 
152 165
 ![The second singular value vectors](./fig21.jpg)
153 166
 and we get the following plot, too.
... ...
@@ -156,7 +169,7 @@ and we get the following plot, too.
156 169
 Since package does not allow us to include inteartive mode, we place here batch mode as follows.
157 170
 Finally, selected feature are listed as follows.
158 171
 ``` {r, fig.keep="none"}
159
-index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=2) #Batch mode
172
+index <- selectFeatureProj(HOSVD,Multi,cond,de=1e-3,input_all=3) #Batch mode
160 173
 head(tableFeatures(Z,index))
161 174
 ```
162 175
 
163 176
Binary files a/vignettes/fig21.jpg and b/vignettes/fig21.jpg differ
164 177
Binary files a/vignettes/fig22.jpg and b/vignettes/fig22.jpg differ