Browse code

fix feature_boxplot when NA present

Gavin Rhys Lloyd authored on 17/09/2021 08:28:52
Showing 1 changed files

... ...
@@ -5,124 +5,177 @@
5 5
 #' chart_plot(C,D)
6 6
 #' @export feature_boxplot
7 7
 feature_boxplot = function(label_outliers=TRUE,feature_to_plot,
8
-    factor_name,show_counts=TRUE,...) {
9
-    out=struct::new_struct('feature_boxplot',
10
-        label_outliers=label_outliers,
11
-        feature_to_plot=feature_to_plot,
12
-        factor_name=factor_name,
13
-        show_counts=show_counts,
14
-        ...)
15
-    return(out)
8
+                           factor_name,show_counts=TRUE,style='boxplot',jitter=FALSE,fill=FALSE,...) {
9
+  out=struct::new_struct('feature_boxplot',
10
+                         label_outliers=label_outliers,
11
+                         feature_to_plot=feature_to_plot,
12
+                         factor_name=factor_name,
13
+                         show_counts=show_counts,
14
+                         style=style,
15
+                         jitter=jitter,
16
+                         fill=fill,
17
+                         ...)
18
+  return(out)
16 19
 }
17 20
 
18 21
 
19 22
 .feature_boxplot<-setClass(
20
-    "feature_boxplot",
21
-    contains=c('chart','stato'),
22
-    slots=c(
23
-        # INPUTS
24
-        label_outliers='entity',
25
-        feature_to_plot='entity',
26
-        factor_name='entity',
27
-        show_counts='entity'
28
-    ),
29
-    prototype = list(name='Feature boxplot',
30
-        description='A boxplot to visualise the distribution of values within a feature.',
31
-        type="boxlot",
32
-        stato_id='STATO:0000243',
33
-        .params=c('label_outliers','feature_to_plot','factor_name','show_counts'),
34
-        
35
-        label_outliers=entity(name='Label outliers',
36
-            value=TRUE,
37
-            type='logical',
38
-            description=c(
39
-                'TRUE' = 'The index for outlier samples is included on the plot.',
40
-                'FALSE' = 'No labels are displayed.'
41
-            )
42
-        ),
43
-        feature_to_plot=entity(name='Feature to plot',
44
-            value='V1',
45
-            type=c('character','numeric','integer'),
46
-            description='The column name of the plotted feature.'
47
-        ),
48
-        factor_name=ents$factor_name,
49
-        show_counts=ents$show_counts
50
-    )
23
+  "feature_boxplot",
24
+  contains=c('chart'),
25
+  slots=c(
26
+    # INPUTS
27
+    label_outliers='entity',
28
+    feature_to_plot='entity',
29
+    factor_name='entity',
30
+    show_counts='entity',
31
+    style='enum',
32
+    jitter='entity',
33
+    fill='entity'
34
+  ),
35
+  prototype = list(name='Feature boxplot',
36
+                   description='A boxplot to visualise the distribution of values within a feature.',
37
+                   type="boxlot",
38
+                   .params=c('label_outliers','feature_to_plot','factor_name','show_counts','style','jitter','fill'),
39
+                   
40
+                   label_outliers=entity(name='Label outliers',
41
+                                         value=TRUE,
42
+                                         type='logical',
43
+                                         description=c(
44
+                                           'TRUE' = 'The index for outlier samples is included on the plot.',
45
+                                           'FALSE' = 'No labels are displayed.'
46
+                                         )
47
+                   ),
48
+                   feature_to_plot=entity(name='Feature to plot',
49
+                                          value='V1',
50
+                                          type=c('character','numeric','integer'),
51
+                                          description='The column name of the plotted feature.'
52
+                   ),
53
+                   factor_name=ents$factor_name,
54
+                   show_counts=ents$show_counts,
55
+                   style=enum(
56
+                     name='Plot style',
57
+                     description=c(
58
+                       'boxplot' = 'boxplot style',
59
+                       'violin' = 'violon plot style'
60
+                     ),
61
+                     allowed=c('boxplot','violin'),
62
+                     value='boxplot',
63
+                     max_length=1,
64
+                     type='character'
65
+                   ),
66
+                   jitter=entity(
67
+                     name = 'Jitter',
68
+                     description = 'Include points plotted with added jitter.',
69
+                     value=FALSE,
70
+                     type='logical',
71
+                     max_length=1
72
+                   ),
73
+                   fill=entity(
74
+                     name = 'Fill',
75
+                     description = 'Block fill the boxes or violins with the group colour.',
76
+                     value=FALSE,
77
+                     type='logical',
78
+                     max_length=1
79
+                   )
80
+  )
51 81
 )
52 82
 
53 83
 #' @export
54 84
 #' @template chart_plot
55 85
 setMethod(f="chart_plot",
56
-    signature=c("feature_boxplot",'DatasetExperiment'),
57
-    definition=function(obj,dobj)
58
-    {
59
-        # get options
60
-        opt=param_list(obj)
61
-        # get data
62
-        Xt=dobj$data
63
-        # column name
64
-        if (is.numeric(opt$feature_to_plot)) {
65
-            varn=colnames(Xt)[opt$feature_to_plot]
66
-        } else {
67
-            varn=opt$feature_to_plot
68
-        }
69
-        # get requested column
70
-        Xt=Xt[[opt$feature_to_plot]]
71
-        # meta data
72
-        SM=dobj$sample_meta
73
-        SM=SM[[obj$factor_name]]
74
-        
75
-        # remove NA
76
-        SM=SM[!is.na(Xt)]
77
-        Xt=Xt[!is.na(Xt)]
78
-        
79
-        # count number of values
80
-        L=levels(SM)
81
-        count=numeric(length(L))
82
-        for (i in 1:length(L)) {
83
-            count[i]=sum(SM==L[i])
84
-        }
85
-        
86
-        # get color pallete using pmp
87
-        clrs= createClassAndColors(class = SM)
88
-        SM=clrs$class
89
-        
90
-        #prep the plot
91
-        temp=data.frame(x=SM,y=Xt)
92
-        p<-ggplot(temp, aes_(x=~x,y=~y,color=~x)) +
93
-            geom_boxplot() +
94
-            xlab(opt$factor) +
95
-            ylab('') +
96
-            ggtitle(varn) +
97
-            scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
98
-            theme_Publication(base_size = 12) +
99
-            theme(legend.position="none")
100
-        
101
-        if (opt$show_counts) {
102
-            newlabels=as.character(count)
103
-            newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
104
-            p=p+scale_x_discrete(labels=newlabels)
105
-        }
106
-        
107
-        if (opt$label_outliers) {
108
-            outliers=numeric()
109
-            for (l in L) {
86
+          signature=c("feature_boxplot",'DatasetExperiment'),
87
+          definition=function(obj,dobj)
88
+          {
89
+            # get options
90
+            opt=param_list(obj)
91
+            # get data
92
+            Xt=dobj$data
93
+            # column name
94
+            if (is.numeric(opt$feature_to_plot)) {
95
+              varn=colnames(Xt)[opt$feature_to_plot]
96
+            } else {
97
+              varn=opt$feature_to_plot
98
+            }
99
+            # get requested column
100
+            Xt=Xt[[opt$feature_to_plot]]
101
+            # meta data
102
+            SM=dobj$sample_meta
103
+            SM=SM[[obj$factor_name]]
104
+            names(SM)=rownames(dobj)
105
+            
106
+            # remove NA
107
+            SM=SM[!is.na(Xt)]
108
+            Xt=Xt[!is.na(Xt)]
109
+            
110
+            # get color pallete using pmp
111
+            clrs= createClassAndColors(class = SM)
112
+            SM=clrs$class
113
+            
114
+            # count number of values
115
+            L=levels(SM)
116
+            count=numeric(length(L))
117
+            for (i in 1:length(L)) {
118
+              count[i]=sum(SM==L[i])
119
+            }
120
+            
121
+            #prep the plot
122
+            temp=data.frame(x=SM,y=Xt,row.names=names(SM))
123
+            
124
+            if (obj$fill) {
125
+              A = aes_string(x='x',y='y',color='x',fill='x')
126
+            } else {
127
+              A = aes_string(x='x',y='y',color='x')
128
+            }
129
+            
130
+            
131
+            p<-ggplot(temp, A) 
132
+            
133
+            if (obj$style=='boxplot') {
134
+              p=p+geom_boxplot(alpha=0.3)
135
+            } else {
136
+              p=p+geom_violin(alpha=0.3, draw_quantiles = c(0.25, 0.5, 0.75), 
137
+                              scale = "width", adjust = .5)
138
+            }
139
+            
140
+            if (obj$jitter) {
141
+              p=p+geom_jitter(show.legend = F, width = 0.1)
142
+            }
143
+            p=p+xlab(opt$factor) +
144
+              ylab('') +
145
+              ggtitle(varn) +
146
+              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name)
147
+            
148
+            p=p+scale_fill_manual(values=clrs$manual_colors,name=opt$factor_name)
149
+            
150
+            p=p+theme_Publication(base_size = 12) +
151
+              theme(legend.position="none")
152
+            
153
+            if (opt$show_counts) {
154
+              newlabels=as.character(count)
155
+              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
156
+              p=p+scale_x_discrete(labels=newlabels)
157
+            }
158
+            
159
+            if (opt$label_outliers) {
160
+              outliers=numeric()
161
+              for (l in L) {
110 162
                 IN=which(SM==l)
111 163
                 outliers=c(outliers,IN[which( Xt[IN]>(quantile(Xt[IN], 0.75) + 1.5*IQR(Xt[IN]))) ] )
112 164
                 outliers=c(outliers,IN[which( Xt[IN]<(quantile(Xt[IN], 0.25) - 1.5*IQR(Xt[IN]))) ] )
165
+              }
166
+              outlier_df=temp[outliers,]
167
+              outlier_df$out_label=paste0('  ',rownames(temp))[outliers]
168
+              p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left')
113 169
             }
114
-            outlier_df=temp[outliers,]
115
-            outlier_df$out_label=paste0('  ',rownames(dobj$data))[outliers]
116
-            p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left')
117
-        }
118
-        
119
-        return(p)
120
-        
121
-    }
170
+            
171
+            return(p)
172
+            
173
+          }
122 174
 )
123 175
 
124 176
 
125 177
 
178
+
126 179
 ######################################
127 180
 ######################################
128 181
 
... ...
@@ -138,64 +191,64 @@ setMethod(f="chart_plot",
138 191
 #' @return struct object
139 192
 #' @export mv_histogram
140 193
 mv_histogram = function(label_outliers=TRUE,by_sample=TRUE,...) {
141
-    out=struct::new_struct('mv_histogram',
142
-        label_outliers=label_outliers,
143
-        by_sample=by_sample,
144
-        ...)
145
-    return(out)
194
+  out=struct::new_struct('mv_histogram',
195
+                         label_outliers=label_outliers,
196
+                         by_sample=by_sample,
197
+                         ...)
198
+  return(out)
146 199
 }
147 200
 
148 201
 
149 202
 .mv_histogram<-setClass(
150
-    "mv_histogram",
151
-    contains='chart',
152
-    slots=c(
153
-        # INPUTS
154
-        label_outliers='entity',
155
-        by_sample='entity'
156
-    ),
157
-    prototype = list(name='Missing value histogram',
158
-        description='A histogram of the numbers of missing values per sample/feature',
159
-        type="histogram",
160
-        .params=c('label_outliers','by_sample'),
161
-        
162
-        by_sample=ents$by_sample,
163
-        label_outliers=ents$label_outliers
164
-    )
203
+  "mv_histogram",
204
+  contains='chart',
205
+  slots=c(
206
+    # INPUTS
207
+    label_outliers='entity',
208
+    by_sample='entity'
209
+  ),
210
+  prototype = list(name='Missing value histogram',
211
+                   description='A histogram of the numbers of missing values per sample/feature',
212
+                   type="histogram",
213
+                   .params=c('label_outliers','by_sample'),
214
+                   
215
+                   by_sample=ents$by_sample,
216
+                   label_outliers=ents$label_outliers
217
+  )
165 218
 )
166 219
 
167 220
 #' @export
168 221
 #' @template chart_plot
169 222
 setMethod(f="chart_plot",
170
-    signature=c("mv_histogram",'DatasetExperiment'),
171
-    definition=function(obj,dobj)
172
-    {
173
-        # get options
174
-        opt=param_list(obj)
175
-        # get data
176
-        Xt=dobj$data
177
-        # meta data
178
-        SM=dobj$sample_meta[ ,1]
179
-        
180
-        if (opt$by_sample) {
181
-            # count NS per sample
182
-            count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
183
-            txt='Missing values per sample'
184
-        } else {
185
-            # count NS per feature
186
-            count=apply(Xt,2,function(x) {sum(is.na(x))/length(x)*100})
187
-            txt='Missing values per feature'
188
-        }
189
-        
190
-        A=data.frame(x=count)
191
-        p=ggplot (data=A, aes_(x=~x)) + geom_histogram()+
192
-            xlab ("missing values, %")+ ggtitle(txt)+
193
-            xlim (0,100)+
194
-            scale_colour_Publication()+ theme_Publication(base_size = 12)
195
-        
196
-        return(p)
197
-        
198
-    }
223
+          signature=c("mv_histogram",'DatasetExperiment'),
224
+          definition=function(obj,dobj)
225
+          {
226
+            # get options
227
+            opt=param_list(obj)
228
+            # get data
229
+            Xt=dobj$data
230
+            # meta data
231
+            SM=dobj$sample_meta[ ,1]
232
+            
233
+            if (opt$by_sample) {
234
+              # count NS per sample
235
+              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
236
+              txt='Missing values per sample'
237
+            } else {
238
+              # count NS per feature
239
+              count=apply(Xt,2,function(x) {sum(is.na(x))/length(x)*100})
240
+              txt='Missing values per feature'
241
+            }
242
+            
243
+            A=data.frame(x=count)
244
+            p=ggplot (data=A, aes_(x=~x)) + geom_histogram()+
245
+              xlab ("missing values, %")+ ggtitle(txt)+
246
+              xlim (0,100)+
247
+              scale_colour_Publication()+ theme_Publication(base_size = 12)
248
+            
249
+            return(p)
250
+            
251
+          }
199 252
 )
200 253
 
201 254
 
... ...
@@ -212,131 +265,131 @@ setMethod(f="chart_plot",
212 265
 #' @import struct
213 266
 #' @export mv_boxplot
214 267
 mv_boxplot = function(label_outliers=TRUE,by_sample=TRUE,factor_name,show_counts=TRUE,...) {
215
-    out=struct::new_struct('mv_boxplot',
216
-        label_outliers=label_outliers,
217
-        by_sample=by_sample,
218
-        factor_name=factor_name,
219
-        show_counts=show_counts,
220
-        ...)
221
-    return(out)
268
+  out=struct::new_struct('mv_boxplot',
269
+                         label_outliers=label_outliers,
270
+                         by_sample=by_sample,
271
+                         factor_name=factor_name,
272
+                         show_counts=show_counts,
273
+                         ...)
274
+  return(out)
222 275
 }
223 276
 
224 277
 
225 278
 .mv_boxplot<-setClass(
226
-    "mv_boxplot",
227
-    contains='chart',
228
-    slots=c(
229
-        # INPUTS
230
-        label_outliers='entity',
231
-        by_sample='entity',
232
-        factor_name='entity',
233
-        show_counts='entity'
234
-    ),
235
-    prototype = list(name='Missing value boxplots',
236
-        description='Boxplots of the number of missing values per sample/feature.',
237
-        type="boxplot",
238
-        .params=c('label_outliers','by_sample','factor_name','show_counts'),
239
-        
240
-        by_sample=ents$by_sample,
241
-        label_outliers=ents$label_outliers,
242
-        factor_name=ents$factor_name,
243
-        show_counts=ents$show_counts
244
-    )
279
+  "mv_boxplot",
280
+  contains='chart',
281
+  slots=c(
282
+    # INPUTS
283
+    label_outliers='entity',
284
+    by_sample='entity',
285
+    factor_name='entity',
286
+    show_counts='entity'
287
+  ),
288
+  prototype = list(name='Missing value boxplots',
289
+                   description='Boxplots of the number of missing values per sample/feature.',
290
+                   type="boxplot",
291
+                   .params=c('label_outliers','by_sample','factor_name','show_counts'),
292
+                   
293
+                   by_sample=ents$by_sample,
294
+                   label_outliers=ents$label_outliers,
295
+                   factor_name=ents$factor_name,
296
+                   show_counts=ents$show_counts
297
+  )
245 298
 )
246 299
 
247 300
 #' @export
248 301
 #' @template chart_plot
249 302
 setMethod(f="chart_plot",
250
-    signature=c("mv_boxplot",'DatasetExperiment'),
251
-    definition=function(obj,dobj) {
252
-        # get options
253
-        opt=param_list(obj)
254
-        # get data
255
-        Xt=dobj$data
256
-        # meta data
257
-        SM=dobj$sample_meta[ ,obj$factor_name]
258
-        
259
-        L=levels(SM)
260
-        
261
-        if (opt$by_sample) {
262
-            # count NS per sample
263
-            count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
264
-            result=matrix(0,nrow=nrow(Xt),ncol=2)
265
-            result[,1]=count
266
-            result[,2]=SM
267
-            if (sum(result[,1])==0) {
303
+          signature=c("mv_boxplot",'DatasetExperiment'),
304
+          definition=function(obj,dobj) {
305
+            # get options
306
+            opt=param_list(obj)
307
+            # get data
308
+            Xt=dobj$data
309
+            # meta data
310
+            SM=dobj$sample_meta[ ,obj$factor_name]
311
+            
312
+            L=levels(SM)
313
+            
314
+            if (opt$by_sample) {
315
+              # count NS per sample
316
+              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
317
+              result=matrix(0,nrow=nrow(Xt),ncol=2)
318
+              result[,1]=count
319
+              result[,2]=SM
320
+              if (sum(result[,1])==0) {
268 321
                 warning('No missing values were detected')
269
-            }
270
-            txt='Missing values per sample'
271
-            # get color pallete using pmp
272
-            clrs= createClassAndColors(class = SM)
273
-            A=data.frame(x=clrs$class,y=result[,1])
274
-        } else {
275
-            for (i in 1:length(L)) {
322
+              }
323
+              txt='Missing values per sample'
324
+              # get color pallete using pmp
325
+              clrs= createClassAndColors(class = SM)
326
+              A=data.frame(x=clrs$class,y=result[,1])
327
+            } else {
328
+              for (i in 1:length(L)) {
276 329
                 # count NS per feature per group
277 330
                 count=apply(Xt[SM==L[i],,drop=FALSE],2,function(x) {sum(is.na(x))/sum(SM==L[i])*100})
278 331
                 temp=data.frame(y=count,x=L[i])
279 332
                 if (i==1) {
280
-                    result=temp
333
+                  result=temp
281 334
                 } else {
282
-                    result=rbind(result,temp)
335
+                  result=rbind(result,temp)
283 336
                 }
284
-            }
285
-            if (sum(result$y)==0) {
337
+              }
338
+              if (sum(result$y)==0) {
286 339
                 warning('No missing values were detected')
340
+              }
341
+              txt='Missing values per feature'
342
+              # get color pallete using pmp
343
+              clrs= createClassAndColors(class = as.factor(result$x))
344
+              A=data.frame(x=clrs$class,y=result$y)
287 345
             }
288
-            txt='Missing values per feature'
289
-            # get color pallete using pmp
290
-            clrs= createClassAndColors(class = as.factor(result$x))
291
-            A=data.frame(x=clrs$class,y=result$y)
292
-        }
293
-        
294
-        
295
-        p=ggplot (data=A, aes_(x=~x,y=~y,color=~x)) +
296
-            geom_boxplot() +
297
-            ggtitle(txt) +
298
-            xlab(opt$factor) +
299
-            ylim (0,100)+
300
-            scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
301
-            theme_Publication(base_size = 12) +
302
-            ylab ("missing values, %") +
303
-            coord_flip()+
304
-            theme(legend.position="none")
305
-        
306
-        if (opt$show_counts) {
307
-            L=levels(A$x)
308
-            num=numeric(length(L))
309
-            for (i in 1:length(L)) {
346
+            
347
+            
348
+            p=ggplot (data=A, aes_(x=~x,y=~y,color=~x)) +
349
+              geom_boxplot() +
350
+              ggtitle(txt) +
351
+              xlab(opt$factor) +
352
+              ylim (0,100)+
353
+              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
354
+              theme_Publication(base_size = 12) +
355
+              ylab ("missing values, %") +
356
+              coord_flip()+
357
+              theme(legend.position="none")
358
+            
359
+            if (opt$show_counts) {
360
+              L=levels(A$x)
361
+              num=numeric(length(L))
362
+              for (i in 1:length(L)) {
310 363
                 num[i]=sum(A$x==L[i])
364
+              }
365
+              newlabels=as.character(num)
366
+              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
367
+              p=p+scale_x_discrete(labels=newlabels)
311 368
             }
312
-            newlabels=as.character(num)
313
-            newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
314
-            p=p+scale_x_discrete(labels=newlabels)
315
-        }
316
-        
317
-        if (opt$label_outliers) {
318
-            outliers=numeric()
319
-            for (l in L) {
369
+            
370
+            if (opt$label_outliers) {
371
+              outliers=numeric()
372
+              for (l in L) {
320 373
                 IN=which(A$x==l)
321 374
                 outliers=c(outliers,IN[which( A$y[IN]>(quantile(A$y[IN], 0.75) + 1.5*IQR(A$y[IN]))) ] )
322 375
                 outliers=c(outliers,IN[which( A$y[IN]<(quantile(A$y[IN], 0.25) - 1.5*IQR(A$y[IN]))) ] )
323
-            }
324
-            outlier_df=A[outliers,]
325
-            
326
-            if (length(outliers)>0){
376
+              }
377
+              outlier_df=A[outliers,]
378
+              
379
+              if (length(outliers)>0){
327 380
                 if (opt$by_sample) {
328
-                    outlier_df$out_label=paste0('  ',rownames(dobj$data))[outliers]
381
+                  outlier_df$out_label=paste0('  ',rownames(dobj$data))[outliers]
329 382
                 } else
330 383
                 {
331
-                    outlier_df$out_label=paste0('  ',rep(colnames(dobj$data),length(L))[outliers])
384
+                  outlier_df$out_label=paste0('  ',rep(colnames(dobj$data),length(L))[outliers])
332 385
                 }
333 386
                 p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label,angle =~ 90),hjust='left')
387
+              }
388
+              
334 389
             }
335 390
             
336
-        }
337
-        
338
-        return(p)
339
-    }
391
+            return(p)
392
+          }
340 393
 )
341 394
 
342 395
 #' @eval get_description('DatasetExperiment_dist')
... ...
@@ -347,74 +400,74 @@ setMethod(f="chart_plot",
347 400
 #' @import struct
348 401
 #' @export DatasetExperiment_dist
349 402
 DatasetExperiment_dist = function(factor_name,per_class=TRUE,...) {
350
-    out=struct::new_struct('DatasetExperiment_dist',
351
-        factor_name=factor_name,
352
-        per_class=per_class,
353
-        ...)
354
-    return(out)
403
+  out=struct::new_struct('DatasetExperiment_dist',
404
+                         factor_name=factor_name,
405
+                         per_class=per_class,
406
+                         ...)
407
+  return(out)
355 408
 }
356 409
 
357 410
 .DatasetExperiment_dist<-setClass(
358
-    "DatasetExperiment_dist",
359
-    contains='chart',
360
-    slots=c(
361
-        # INPUTS
362
-        factor_name='entity',
363
-        per_class='entity'
364
-    ),
365
-    prototype = list(name='Feature distribution histogram',
366
-        description=paste0('A histogram to visualise the distribution of ',
367
-            'values within features.'),
368
-        type="histogram",
369
-        .params=c('factor_name','per_class'),
370
-        
371
-        factor_name=ents$factor_name,
372
-        per_class=entity(name='Plot per class',
373
-            value=TRUE,
374
-            type='logical',
375
-            description=c(
376
-                "TRUE" = 'The distributions are plotted for each class.',
377
-                "FALSE" = 'The distribution is plotted for all samples')
378
-        )
379
-    )
411
+  "DatasetExperiment_dist",
412
+  contains='chart',
413
+  slots=c(
414
+    # INPUTS
415
+    factor_name='entity',
416
+    per_class='entity'
417
+  ),
418
+  prototype = list(name='Feature distribution histogram',
419
+                   description=paste0('A histogram to visualise the distribution of ',
420
+                                      'values within features.'),
421
+                   type="histogram",
422
+                   .params=c('factor_name','per_class'),
423
+                   
424
+                   factor_name=ents$factor_name,
425
+                   per_class=entity(name='Plot per class',
426
+                                    value=TRUE,
427
+                                    type='logical',
428
+                                    description=c(
429
+                                      "TRUE" = 'The distributions are plotted for each class.',
430
+                                      "FALSE" = 'The distribution is plotted for all samples')
431
+                   )
432
+  )
380 433
 )
381 434
 
382 435
 #' @export
383 436
 #' @template chart_plot
384 437
 setMethod(f="chart_plot",
385
-    signature=c("DatasetExperiment_dist",'DatasetExperiment'),
386
-    definition=function(obj,dobj)
387
-    {
388
-        opt=param_list(obj)
389
-        X=as.matrix(dobj$data)
390
-        S=dobj$sample_meta[[opt$factor_name]]
391
-        
392
-        if (opt$per_class) {
393
-            L=levels(S)
394
-            for (k in L) {
438
+          signature=c("DatasetExperiment_dist",'DatasetExperiment'),
439
+          definition=function(obj,dobj)
440
+          {
441
+            opt=param_list(obj)
442
+            X=as.matrix(dobj$data)
443
+            S=dobj$sample_meta[[opt$factor_name]]
444
+            
445
+            if (opt$per_class) {
446
+              L=levels(S)
447
+              for (k in L) {
395 448
                 M=X[S==k,,drop=FALSE]
396 449
                 if (k==L[1]) {
397
-                    temp=data.frame(values=as.vector(M),group=k)
450
+                  temp=data.frame(values=as.vector(M),group=k)
398 451
                 } else {
399
-                    temp=rbind(temp,data.frame(values=as.vector(M),group=k))
452
+                  temp=rbind(temp,data.frame(values=as.vector(M),group=k))
400 453
                 }
401
-            }
402
-            out=ggplot(data=temp, aes_(x=~values,color=~group))+
454
+              }
455
+              out=ggplot(data=temp, aes_(x=~values,color=~group))+
403 456
                 geom_freqpoly(bins=100)
404
-        } else {
405
-            temp=data.frame(values=as.vector(X),group='Sample')
406
-            out=ggplot(data=temp, aes_(x=~values,color=~group)) +
457
+            } else {
458
+              temp=data.frame(values=as.vector(X),group='Sample')
459
+              out=ggplot(data=temp, aes_(x=~values,color=~group)) +
407 460
                 geom_freqpoly(bins=100,color='black')
408
-        }
409
-        out = out  +
410
-            xlab('Values') +
411
-            ylab('Density') +
412
-            scale_colour_Publication(name=opt$factor_name)+
413
-            theme_Publication(base_size = 12)
414
-        
415
-        return(out)
416
-        
417
-    }
461
+            }
462
+            out = out  +
463
+              xlab('Values') +
464
+              ylab('Density') +
465
+              scale_colour_Publication(name=opt$factor_name)+
466
+              theme_Publication(base_size = 12)
467
+            
468
+            return(out)
469
+            
470
+          }
418 471
 )
419 472
 
420 473
 #' @eval get_description('DatasetExperiment_boxplot')
... ...
@@ -425,107 +478,107 @@ setMethod(f="chart_plot",
425 478
 #' @return struct object
426 479
 #' @export DatasetExperiment_boxplot
427 480
 DatasetExperiment_boxplot = function(factor_name,by_sample=TRUE,per_class=TRUE,number=50,...) {
428
-    out=struct::new_struct('DatasetExperiment_boxplot',
429
-        factor_name=factor_name,
430
-        by_sample=by_sample,
431
-        per_class=per_class,
432
-        number=number,
433
-        ...)
434
-    return(out)
481
+  out=struct::new_struct('DatasetExperiment_boxplot',
482
+                         factor_name=factor_name,
483
+                         by_sample=by_sample,
484
+                         per_class=per_class,
485
+                         number=number,
486
+                         ...)
487
+  return(out)
435 488
 }
436 489
 
437 490
 
438 491
 .DatasetExperiment_boxplot<-setClass(
439
-    "DatasetExperiment_boxplot",
440
-    contains='chart',
441
-    slots=c(
442
-        # INPUTS
443
-        factor_name='entity',
444
-        by_sample='entity',
445
-        per_class='entity',
446
-        number='entity'
447
-    ),
448
-    prototype = list(name='Feature distribution histogram',
449
-        description=paste0('A boxplot to visualise the distribution of ',
450
-            'values within a subset of features.'),
451
-        type="boxplot",
452
-        .params=c('factor_name','by_sample','per_class','number'),
453
-        
454
-        factor_name=ents$factor_name,
455
-        by_sample=entity(name='Plot by sample',
456
-            value=TRUE,
457
-            type='logical',
458
-            description=c(
459
-                'TRUE' = 'The data is plotted across features for a subset of samples.',
460
-                'FALSE' = 'The data is plotted across samples for a subset of features.')
461
-        ),
462
-        per_class=entity(name='Plot per class',
463
-            value=TRUE,
464
-            type='logical',
465
-            description=c(
466
-                "TRUE" = 'The data is plotted for each class.',
467
-                "FALSE" = 'The data is plotted for all samples')
468
-        ),
469
-        number=entity(name='Number of features/samples',
470
-            value=50,
471
-            type=c('numeric','integer'),
472
-            description='The number of features/samples plotted.',
473
-            max_length=1
474
-        )
475
-    )
492
+  "DatasetExperiment_boxplot",
493
+  contains='chart',
494
+  slots=c(
495
+    # INPUTS
496
+    factor_name='entity',
497
+    by_sample='entity',
498
+    per_class='entity',
499
+    number='entity'
500
+  ),
501
+  prototype = list(name='Feature distribution histogram',
502
+                   description=paste0('A boxplot to visualise the distribution of ',
503
+                                      'values within a subset of features.'),
504
+                   type="boxplot",
505
+                   .params=c('factor_name','by_sample','per_class','number'),
506
+                   
507
+                   factor_name=ents$factor_name,
508
+                   by_sample=entity(name='Plot by sample',
509
+                                    value=TRUE,
510
+                                    type='logical',
511
+                                    description=c(
512
+                                      'TRUE' = 'The data is plotted across features for a subset of samples.',
513
+                                      'FALSE' = 'The data is plotted across samples for a subset of features.')
514
+                   ),
515
+                   per_class=entity(name='Plot per class',
516
+                                    value=TRUE,
517
+                                    type='logical',
518
+                                    description=c(
519
+                                      "TRUE" = 'The data is plotted for each class.',
520
+                                      "FALSE" = 'The data is plotted for all samples')
521
+                   ),
522
+                   number=entity(name='Number of features/samples',
523
+                                 value=50,
524
+                                 type=c('numeric','integer'),
525
+                                 description='The number of features/samples plotted.',
526
+                                 max_length=1
527
+                   )
528
+  )
476 529
 )
477 530
 
478 531
 #' @export
479 532
 #' @template chart_plot
480 533
 setMethod(f="chart_plot",
481
-    signature=c("DatasetExperiment_boxplot",'DatasetExperiment'),
482
-    definition=function(obj,dobj)
483
-    {
484
-        opt=param_list(obj)
485
-        X=dobj$data
486
-        SM=dobj$sample_meta
487
-        if (!opt$by_sample) {
488
-            #s=sample(ncol(X),min(c(ncol(X),opt$number)),replace=FALSE)
489
-            s=seq(from=1,to=ncol(X),length.out = min(c(opt$number,ncol(X))))
490
-            ylabel='Features'
491
-        } else {
492
-            #s=sample(nrow(X),min(c(nrow(X),opt$number)),replace=FALSE)
493
-            s=seq(from=1,to=nrow(X),length.out = min(c(opt$number,nrow(X))))
494
-            X=as.data.frame(t(X))
495
-            colnames(X)=rownames(dobj$data)
496
-            ylabel='Samples'
497
-        }
498
-        s=unique(floor(s))
499
-        
500
-        for (i in s) {
501
-            if (!opt$by_sample){
534
+          signature=c("DatasetExperiment_boxplot",'DatasetExperiment'),
535
+          definition=function(obj,dobj)
536
+          {
537
+            opt=param_list(obj)
538
+            X=dobj$data
539
+            SM=dobj$sample_meta
540
+            if (!opt$by_sample) {
541
+              #s=sample(ncol(X),min(c(ncol(X),opt$number)),replace=FALSE)
542
+              s=seq(from=1,to=ncol(X),length.out = min(c(opt$number,ncol(X))))
543
+              ylabel='Features'
544
+            } else {
545
+              #s=sample(nrow(X),min(c(nrow(X),opt$number)),replace=FALSE)
546
+              s=seq(from=1,to=nrow(X),length.out = min(c(opt$number,nrow(X))))
547
+              X=as.data.frame(t(X))
548
+              colnames(X)=rownames(dobj$data)
549
+              ylabel='Samples'
550
+            }
551
+            s=unique(floor(s))
552
+            
553
+            for (i in s) {
554
+              if (!opt$by_sample){
502 555
                 sm=SM[[obj$factor_name]]
503 556
                 fn=row
504
-            } else {
557
+              } else {
505 558
                 sm=SM[[obj$factor_name]][i]
506
-            }
507
-            if (!opt$per_class) {
559
+              }
560
+              if (!opt$per_class) {
508 561
                 sm='Sample'
509
-            }
510
-            if (i==s[1]) {
562
+              }
563
+              if (i==s[1]) {
511 564
                 temp=data.frame(value=X[,i],feature=colnames(X)[i],group=sm)
512
-            } else {
565
+              } else {
513 566
                 temp=rbind(temp,data.frame(value=X[,i],feature=colnames(X)[i],group=sm))
567
+              }
514 568
             }
515
-        }
516
-        out=ggplot(data=temp, aes_(x=~feature,y=~value,color=~group)) +
517
-            geom_boxplot()+
518
-            xlab(ylabel) +
519
-            ylab('Values') +
520
-            scale_colour_Publication(name=opt$factor_name)+
521
-            theme_Publication(base_size = 12)+
522
-            coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
523
-        if  (!opt$per_class) {
524
-            out=out+theme(legend.position="none")
525
-        }
526
-        return(out)
527
-        
528
-    }
569
+            out=ggplot(data=temp, aes_(x=~feature,y=~value,color=~group)) +
570
+              geom_boxplot()+
571
+              xlab(ylabel) +
572
+              ylab('Values') +
573
+              scale_colour_Publication(name=opt$factor_name)+
574
+              theme_Publication(base_size = 12)+
575
+              coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
576
+            if  (!opt$per_class) {
577
+              out=out+theme(legend.position="none")
578
+            }
579
+            return(out)
580
+            
581
+          }
529 582
 )
530 583
 
531 584
 
... ...
@@ -538,25 +591,25 @@ setMethod(f="chart_plot",
538 591
 #' @import struct
539 592
 #' @export compare_dist
540 593
 compare_dist = function(factor_name,...) {
541
-    out=struct::new_struct('compare_dist',
542
-        factor_name=factor_name,
543
-        ...)
544
-    return(out)
594
+  out=struct::new_struct('compare_dist',
595
+                         factor_name=factor_name,
596
+                         ...)
597
+  return(out)
545 598
 }
546 599
 
547 600
 .compare_dist<-setClass(
548
-    "compare_dist",
549
-    contains=c('chart','stato'),
550
-    slots=c(factor_name='entity'),
551
-    prototype = list(name='Compare distributions',
552
-        description=paste0('Histograms and boxplots computed across samples ',
553
-            'and features are used to visually compare two datasets e.g. before ',
554
-            'and after filtering and/or normalisation.'),
555
-        type="mixed",
556
-        stato_id='STATO:0000161',
557
-        .params=c('factor_name'),
558
-        factor_name=ents$factor_name
559
-    )
601
+  "compare_dist",
602
+  contains=c('chart','stato'),
603
+  slots=c(factor_name='entity'),
604
+  prototype = list(name='Compare distributions',
605
+                   description=paste0('Histograms and boxplots computed across samples ',
606
+                                      'and features are used to visually compare two datasets e.g. before ',
607
+                                      'and after filtering and/or normalisation.'),
608
+                   type="mixed",
609
+                   stato_id='STATO:0000161',
610
+                   .params=c('factor_name'),
611
+                   factor_name=ents$factor_name
612
+  )
560 613
 )
561 614
 
562 615
 #' @export
... ...
@@ -564,79 +617,79 @@ compare_dist = function(factor_name,...) {
564 617
 #' @template chart_plot
565 618
 #' @param eobj a second DatasetExperiment object to compare with the first
566 619
 setMethod(f="chart_plot",
567
-    signature=c("compare_dist",'DatasetExperiment'),
568
-    definition=function(obj,dobj,eobj)
569
-    {
570
-        
571
-        # match features across datasets
572
-        inboth=intersect(colnames(dobj),colnames(eobj))
573
-        dobj=dobj[,inboth]
574
-        eobj=eobj[,inboth]
575
-        
576
-        C=DatasetExperiment_boxplot(by_sample=FALSE,per_class=FALSE,number=30,factor_name=obj$factor_name)
577
-        
578
-        C1=chart_plot(C,dobj)+
579
-            labs(tag='c)')+
580
-            theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
581
-        
582
-        C2=chart_plot(C,eobj)+
583
-            labs(tag='d)')+
584
-            theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
585
-        
586
-        C=DatasetExperiment_dist(factor_name=obj$factor_name,per_class=TRUE)
587
-        
588
-        C3=chart_plot(C,dobj)+
589
-            labs(tag='a)')+
590
-            theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
591
-        
592
-        C4=chart_plot(C,eobj)+
593
-            labs(tag='b)')+
594
-            theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
595
-        
596
-        rC1=ggplot_build(C1)$layout$panel_scales_y[[1]]$range$range
597
-        rC2=ggplot_build(C2)$layout$panel_scales_y[[1]]$range$range
598
-        rC3=ggplot_build(C3)$layout$panel_scales_x[[1]]$range$range
599
-        rC4=ggplot_build(C4)$layout$panel_scales_x[[1]]$range$range
600
-        rC1=max(abs(rC1))
601
-        rC2=max(abs(rC2))
602
-        rC3=max(abs(rC3))
603
-        rC4=max(abs(rC4))
604
-        
605
-        C1=C1+ylim(c(-rC1,rC1))
606
-        C3=C3+xlim(c(-rC1,rC1))
607
-        C2=C2+ylim(c(-rC2,rC2))
608
-        C4=C4+xlim(c(-rC2,rC2))
609
-        
610
-        
611
-        gA <- ggplotGrob(C3)
612
-        gB <- ggplotGrob(C4)
613
-        gC <- ggplotGrob(C1)
614
-        gD <- ggplotGrob(C2)
615
-        
616
-        L=matrix(nrow=4,ncol=1)
617
-        L=as.matrix(rbind(c(1,2),c(3,4),c(3,4),c(3,4)))
618
-        
619
-        temp=gtable_cbind(gA,gB)
620
-        heights1=temp$heights
621
-        gA$heights=heights1
622
-        gB$heights=heights1
623
-        
624
-        temp=gtable_cbind(gC,gD)
625
-        heights2=temp$heights
626
-        gC$heights=heights2
627
-        gD$heights=heights2
628
-        
629
-        maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5],gC$widths[2:5],gD$widths[2:5])
630
-        gA$widths[2:5] <- as.list(maxWidth)
631
-        gB$widths[2:5] <- as.list(maxWidth)
632
-        gC$widths[2:5] <- as.list(maxWidth)
633
-        gD$widths[2:5] <- as.list(maxWidth)
634
-        
635
-        p=grid.arrange(grobs=list(gA,gB,gC,gD),layout_matrix=L)
636
-        
637
-        return(p)
638
-        
639
-    }
620
+          signature=c("compare_dist",'DatasetExperiment'),
621
+          definition=function(obj,dobj,eobj)
622
+          {
623
+            
624
+            # match features across datasets
625
+            inboth=intersect(colnames(dobj),colnames(eobj))
626
+            dobj=dobj[,inboth]
627
+            eobj=eobj[,inboth]
628
+            
629
+            C=DatasetExperiment_boxplot(by_sample=FALSE,per_class=FALSE,number=30,factor_name=obj$factor_name)
630
+            
631
+            C1=chart_plot(C,dobj)+
632
+              labs(tag='c)')+
633
+              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
634
+            
635
+            C2=chart_plot(C,eobj)+
636
+              labs(tag='d)')+
637
+              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
638
+            
639
+            C=DatasetExperiment_dist(factor_name=obj$factor_name,per_class=TRUE)
640
+            
641
+            C3=chart_plot(C,dobj)+
642
+              labs(tag='a)')+
643
+              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
644
+            
645
+            C4=chart_plot(C,eobj)+
646
+              labs(tag='b)')+
647
+              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
648
+            
649
+            rC1=ggplot_build(C1)$layout$panel_scales_y[[1]]$range$range
650
+            rC2=ggplot_build(C2)$layout$panel_scales_y[[1]]$range$range
651
+            rC3=ggplot_build(C3)$layout$panel_scales_x[[1]]$range$range
652
+            rC4=ggplot_build(C4)$layout$panel_scales_x[[1]]$range$range
653
+            rC1=max(abs(rC1))
654
+            rC2=max(abs(rC2))
655
+            rC3=max(abs(rC3))
656
+            rC4=max(abs(rC4))
657
+            
658
+            C1=C1+ylim(c(-rC1,rC1))
659
+            C3=C3+xlim(c(-rC1,rC1))
660
+            C2=C2+ylim(c(-rC2,rC2))
661
+            C4=C4+xlim(c(-rC2,rC2))
662
+            
663
+            
664
+            gA <- ggplotGrob(C3)
665
+            gB <- ggplotGrob(C4)
666
+            gC <- ggplotGrob(C1)
667
+            gD <- ggplotGrob(C2)
668
+            
669
+            L=matrix(nrow=4,ncol=1)
670
+            L=as.matrix(rbind(c(1,2),c(3,4),c(3,4),c(3,4)))
671
+            
672
+            temp=gtable_cbind(gA,gB)
673
+            heights1=temp$heights
674
+            gA$heights=heights1
675
+            gB$heights=heights1
676
+            
677
+            temp=gtable_cbind(gC,gD)
678
+            heights2=temp$heights
679
+            gC$heights=heights2
680
+            gD$heights=heights2
681
+            
682
+            maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5],gC$widths[2:5],gD$widths[2:5])
683
+            gA$widths[2:5] <- as.list(maxWidth)
684
+            gB$widths[2:5] <- as.list(maxWidth)
685
+            gC$widths[2:5] <- as.list(maxWidth)
686
+            gD$widths[2:5] <- as.list(maxWidth)
687
+            
688
+            p=grid.arrange(grobs=list(gA,gB,gC,gD),layout_matrix=L)
689
+            
690
+            return(p)
691
+            
692
+          }
640 693
 )
641 694
 
642 695
 
... ...
@@ -650,54 +703,54 @@ setMethod(f="chart_plot",
650 703
 #' C = DatasetExperiment_heatmap()
651 704
 #' chart_plot(C,D)
652 705
 DatasetExperiment_heatmap = function(na_colour='#FF00E4',...) {
653
-    out=struct::new_struct('DatasetExperiment_heatmap',
654
-        na_colour=na_colour,...)
655
-    return(out)
706
+  out=struct::new_struct('DatasetExperiment_heatmap',
707
+                         na_colour=na_colour,...)
708
+  return(out)
656 709
 }
657 710
 
658 711
 
659 712
 .DatasetExperiment_heatmap<-setClass(
660
-    "DatasetExperiment_heatmap",
661
-    contains=c('chart'),
662
-    slots=c(
663
-        # INPUTS
664
-        na_colour='entity'
665
-    ),
666
-    prototype = list(name='DatasetExperiment heatmap',
667
-        description='A heatmap to visualise the measured values in a data matrix.',
668
-        type="scatter",
669
-        libraries='reshape2',
670
-        .params=c('na_colour'),
671
-        
672
-        na_colour=entity(name='Missing value colour',
673
-            value='#FF00E4',
674
-            type='character',
675
-            description='The hex colour code used to plot missing values.',
676
-            max_length=1
677
-        )
678
-    )
713
+  "DatasetExperiment_heatmap",
714
+  contains=c('chart'),
715
+  slots=c(
716
+    # INPUTS
717
+    na_colour='entity'
718
+  ),
719
+  prototype = list(name='DatasetExperiment heatmap',
720
+                   description='A heatmap to visualise the measured values in a data matrix.',
721
+                   type="scatter",
722
+                   libraries='reshape2',
723
+                   .params=c('na_colour'),
724
+                   
725
+                   na_colour=entity(name='Missing value colour',
726
+                                    value='#FF00E4',
727
+                                    type='character',
728
+                                    description='The hex colour code used to plot missing values.',
729
+                                    max_length=1
730
+                   )
731
+  )
679 732
 )
680 733
 
681 734
 #' @export
682 735
 #' @template chart_plot
683 736
 setMethod(f="chart_plot",
684
-    signature=c("DatasetExperiment_heatmap",'DatasetExperiment'),
685
-    definition=function(obj,dobj)
686
-    {
687
-        X=reshape2::melt(as.matrix(dobj$data))
688
-        colnames(X)=c('Sample','Feature','Peak area')
689
-        p=ggplot(data=X,aes(x=`Feature`,y=`Sample`,fill=`Peak area`)) + geom_raster() +
690
-            scale_colour_Publication()+
691
-            theme_Publication(base_size = 12)+
692
-            scale_fill_viridis_c(na.value=obj$na_colour)+
693
-            theme(axis.text.x=element_blank(),
694
-                axis.ticks.x=element_blank(),
695
-                axis.text.y=element_blank(),
696
-                axis.ticks.y=element_blank()
697
-            )
698
-        
699
-        
700
-        return(p)
701
-        
702
-    }
737
+          signature=c("DatasetExperiment_heatmap",'DatasetExperiment'),
738
+          definition=function(obj,dobj)
739
+          {
740
+            X=reshape2::melt(as.matrix(dobj$data))
741
+            colnames(X)=c('Sample','Feature','Peak area')
742
+            p=ggplot(data=X,aes(x=`Feature`,y=`Sample`,fill=`Peak area`)) + geom_raster() +
743
+              scale_colour_Publication()+
744
+              theme_Publication(base_size = 12)+
745
+              scale_fill_viridis_c(na.value=obj$na_colour)+
746
+              theme(axis.text.x=element_blank(),
747
+                    axis.ticks.x=element_blank(),
748
+                    axis.text.y=element_blank(),
749
+                    axis.ticks.y=element_blank()
750
+              )
751
+            
752
+            
753
+            return(p)
754
+            
755
+          }
703 756
 )