Browse code

fix plotting of multiple factors

Gavin Rhys Lloyd authored on 06/02/2023 13:15:07
Showing 1 changed files
... ...
@@ -202,7 +202,7 @@ setMethod(f="chart_plot",
202 202
         }
203 203
         
204 204
         # build the plot
205
-        A <- data.frame (group=groups,x=x, y=y,slabels=slabels)
205
+        A <- data.frame (group=groups,x=x, y=y,slabels=slabels,shape=shapes)
206 206
         
207 207
         out = ggplot()
208 208
         
... ...
@@ -210,10 +210,11 @@ setMethod(f="chart_plot",
210 210
         out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE)
211 211
         
212 212
         if (length(shapes)>1) {
213
-            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes))
213
+            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shape)) 
214 214
         } else {
215
-            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes)
215
+            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes) 
216 216
         }
217
+
217 218
         out=out+
218 219
             
219 220
             geom_point(na.rm=TRUE) +
... ...
@@ -227,16 +228,23 @@ setMethod(f="chart_plot",
227 228
         }
228 229
         
229 230
         if (obj$ellipse %in% c('all','group')) {
230
-            out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group),type=obj$ellipse_type,
231
+            if (is.factor(groups)) {
232
+            out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group,group=~group),type=obj$ellipse_type,
231 233
                 level=obj$ellipse_confidence) # ellipse for individual groups
234
+            } else {
235
+                if (is.factor(shapes)) {
236
+                    out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,group=~shape),color="#C0C0C0",type=obj$ellipse_type,
237
+                        level=obj$ellipse_confidence) # ellipse for individual groups
238
+                }
239
+            }
232 240
         }
233 241
         
234 242
         if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package
235 243
             out=out+scale_colour_manual(values=plotClass$manual_colors,
236 244
                 name=obj$factor_name[[1]])
237
-        }else {# assume continuous and use the default colour gradient
245
+        } else {# assume continuous and use the default colour gradient
238 246
             out=out+scale_colour_viridis_c(limits=quantile(groups,
239
-                c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name[[1]])
247
+                c(0.05,0.95),na.rm = TRUE),oob=scales::squish,name=obj$factor_name[[1]])
240 248
         }
241 249
         out=out+theme_Publication(base_size = 12)
242 250
         # add ellipse for all samples (ignoring group)
Browse code

fix plotting of shapes for second factor

Gavin Rhys Lloyd authored on 06/02/2023 11:19:56
Showing 1 changed files
... ...
@@ -176,15 +176,14 @@ setMethod(f="chart_plot",
176 176
         } else {
177 177
             slabels = dobj$sample_meta[[obj$label_factor]]
178 178
         }
179
-        obj$factor_name=obj$factor_name[[1]] # only use the first factor from now on
180
-        
179
+
181 180
         x=dobj$data[,obj$xcol]
182 181
         y=dobj$data[,obj$ycol]
183 182
         xlabel=obj$xcol
184 183
         ylabel=obj$ycol
185 184
         
186 185
         # get the factor from meta data
187
-        groups=dobj$sample_meta[[obj$factor_name]]
186
+        groups=dobj$sample_meta[[obj$factor_name[[1]]]]
188 187
         
189 188
         # add a space to the front of the labels to offset them from the points, because nudge_x is in data units
190 189
         for (i in 1:length(slabels)) {
... ...
@@ -210,24 +209,21 @@ setMethod(f="chart_plot",
210 209
         # add invisible sample points for ellipse
211 210
         out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE)
212 211
         
213
-        
214
-        if (length(obj$factor_name)==2) {
212
+        if (length(shapes)>1) {
215 213
             out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes))
216
-        }   else {
217
-            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group))
214
+        } else {
215
+            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes)
218 216
         }
219
-        
220
-        
221 217
         out=out+
222 218
             
223 219
             geom_point(na.rm=TRUE) +
224 220
             xlab(xlabel) +
225 221
             ylab(ylabel) 
226 222
         
227
-        if (length(obj$factor_name)==2) {
223
+        if (length(shapes)>1) {
228 224
             out=out+labs(shape=obj$factor_name[[2]],colour=obj$factor_name[[1]])
229 225
         } else {
230
-            out=out+labs(shape=obj$factor_name[[1]])
226
+            out=out+labs(color=obj$factor_name[[1]])
231 227
         }
232 228
         
233 229
         if (obj$ellipse %in% c('all','group')) {
... ...
@@ -237,10 +233,10 @@ setMethod(f="chart_plot",
237 233
         
238 234
         if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package
239 235
             out=out+scale_colour_manual(values=plotClass$manual_colors,
240
-                name=obj$factor_name)
236
+                name=obj$factor_name[[1]])
241 237
         }else {# assume continuous and use the default colour gradient
242 238
             out=out+scale_colour_viridis_c(limits=quantile(groups,
243
-                c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name)
239
+                c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name[[1]])
244 240
         }
245 241
         out=out+theme_Publication(base_size = 12)
246 242
         # add ellipse for all samples (ignoring group)
Browse code

PLS updates (#64)

* add selectivity ratio

* replace vip_summary with feature_importance
renamed and now allows vip, sr and sr_pvalues to be plotted

* add equal_split model
random subsets so generate training sets with equal group numbers

* plot 1 - p-value
to conform with the "best" feature being a maximum value

* add resample iterator
subsample at random over a number of iterations. Option to use
different kinds of splitting methods. Corresponding chart.

* allow use of list() for factor_name

* force apply not to simplify output to guarantee returning a list

* update example

* add correct parameter
collect will collect the requested model output over all iterations in a list WORK IN PROGRESS

* add collection of multiple outputs of model sequence

* plot reg coeff on rhs

* match outputs of xval for use with grid search etc

* specify levels when converting predictions to factor

* change PLSDA to inherit from PLSR
rename some charts to be compatible with both PLSR and PLSDA

* allow y-block column selection

* re-assign y output after PLSR with factor

* update vignettes wrt PLS changes

* update documentation

* update R version to 4.1

* update documentation

* update documentation

* update scatter plot

- new scatter chart object
- used by PCA scores, PLSR/PLSDA scores
- other charts updated to reflect changes in scores plots where necessary
- added ycol param to plots for when y-block is a matrix

* add url to github

* add plsda scores alias

- plsda_scores_plot and pls_scores_plot do that same thing
Included for backwards compatability
- added components back as parameter for scores plots for backwards compatibility

* fix broken example

* fix broken tests

- scores is now returned as a DatasetExperiment object not a data.frame

* Update data_analysis_omics_using_the_structtoolbox.Rmd

- wrt changes in scores plots

* update documentation

* fix colnames for Y matrix

Gavin Rhys Lloyd authored on 28/02/2022 12:38:08 • GitHub committed on 28/02/2022 12:38:08
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,284 @@
1
+#' @eval get_description('scatter_chart')
2
+#' @import struct
3
+#' @export scatter_chart
4
+#' @examples
5
+#' D = iris_DatasetExperiment()
6
+#' C = scatter_chart(
7
+#'         xcol = 'Petal.Width',
8
+#'         ycol = 'Sepal.Width',
9
+#'         factor_name = 'Species'
10
+#'     )
11
+#' chart_plot(C,D)
12
+#'
13
+scatter_chart = function(
14
+    xcol=1,
15
+    ycol=2,
16
+    points_to_label='none',
17
+    factor_name='none',
18
+    ellipse='all',
19
+    ellipse_type='norm',
20
+    ellipse_confidence=0.95,
21
+    label_filter=character(0),
22
+    label_factor='rownames',
23
+    label_size=3.88,
24
+    ...) {
25
+    out=struct::new_struct('scatter_chart',
26
+        xcol=xcol,
27
+        ycol=ycol,
28
+        points_to_label=points_to_label,
29
+        factor_name=factor_name,
30
+        ellipse=ellipse,
31
+        label_filter=label_filter,
32
+        label_factor=label_factor,
33
+        label_size=label_size,
34
+        ellipse_type=ellipse_type,
35
+        ellipse_confidence=ellipse_confidence,
36
+        ...)
37
+    return(out)
38
+}
39
+
40
+
41
+.scatter_chart<-setClass(
42
+    "scatter_chart",
43
+    contains='chart',
44
+    slots=c(
45
+        # INPUTS
46
+        xcol='entity',
47
+        ycol='entity',
48
+        points_to_label='enum',
49
+        factor_name='entity',
50
+        ellipse='enum',
51
+        ellipse_type='enum',
52
+        ellipse_confidence='entity',
53
+        label_filter='entity',
54
+        label_factor='entity',
55
+        label_size='entity'
56
+    ),
57
+    
58
+    prototype = list(name='Group scatter chart',
59
+        description='Plots a 2d scatter plot of the input data.',
60
+        type="scatter",
61
+        .params=c('xcol','ycol','points_to_label','factor_name','ellipse',
62
+            'label_filter','label_factor','label_size','ellipse_type',
63
+            'ellipse_confidence'),
64
+        
65
+        xcol=entity(
66
+            name='x-axis column name',
67
+            value=1,
68
+            type=c('numeric','integer','character'),
69
+            description=paste0('The column name, or index, of data to plot on the x-axis'),
70
+            max_length=1
71
+        ),
72
+        
73
+        ycol=entity(
74
+            name='y-axis column name',
75
+            value=2,
76
+            type=c('numeric','integer','character'),
77
+            description=paste0('The column name, or index, of data to plot on the y-axis'),
78
+            max_length=1
79
+        ),
80
+        
81
+        points_to_label=enum(name='Points to label',
82
+            value='none',
83
+            type='character',
84
+            description=c(
85
+                'none' = 'No samples labels are displayed.', 
86
+                "all" = 'The labels for all samples are displayed.', 
87
+                "outliers" = 'Labels for for potential outlier samples are displayed.'
88
+            ),
89
+            allowed=c('none','all','outliers')
90
+        ),
91
+        factor_name=ents$factor_name,
92
+        ellipse=enum(
93
+            name = 'Plot ellipses',
94
+            description=c(
95
+                "all" = paste0('Ellipses are plotted for all groups and all samples.'),
96
+                "group" = 'Ellipses are plotted for all groups.',
97
+                "none" = 'Ellipses are not included on the plot.',
98
+                "sample" = 'An ellipse is plotted for all samples (ignoring group)'),
99
+            allowed=c('all','group','none','sample'),
100
+            value='all'
101
+        ),
102
+        
103
+        ellipse_type=enum(
104
+            name='Type of ellipse',
105
+            description=c(
106
+                'norm' = paste0('Multivariate normal (p = 0.95)'),
107
+                't' = paste0('Multivariate t (p = 0.95)')
108
+            ),
109
+            value='norm',
110
+            type='character',
111
+            max_length = 1,
112
+            allowed=c('norm','t')
113
+        ),
114
+        
115
+        ellipse_confidence=entity(
116
+            name='Ellipse confidence level',
117
+            description='The confidence level for plotting ellipses.',
118
+            value=0.95,
119
+            type='numeric',
120
+            max_length = 1
121
+        ),
122
+        
123
+        label_filter=entity(
124
+            name='Label filter',
125
+            value=character(0),
126
+            type='character',
127
+            description=paste0(
128
+                'Labels are only plotted for the named groups. If ',
129
+                'zero-length then all groups are included.'
130
+            )
131
+        ),
132
+        label_factor=entity(name='Factor for labels',
133
+            description=paste0('The column name of sample_meta to use for ',
134
+                'labelling samples on the plot. "rownames" will use the row ',
135
+                'names from sample_meta.'),
136
+            type='character',
137
+            value='rownames',
138
+            max_length=1),
139
+        label_size=entity(name='Text size of labels',
140
+            description='The text size of labels. Note this is not in Font Units.',
141
+            type='numeric',
142
+            value=3.88,
143
+            max_length=1)
144
+    )
145
+)
146
+
147
+#' @importFrom sp point.in.polygon
148
+#' @import ggplot2
149
+#' @importFrom scales squish
150
+#' @export
151
+#' @template chart_plot
152
+setMethod(f="chart_plot",
153
+    signature=c("scatter_chart",'DatasetExperiment'),
154
+    definition=function(obj,dobj)
155
+    {
156
+        # if provided convert index to names
157
+        if (is.numeric(obj$xcol)) {
158
+            obj$xcol=colnames(dobj)[obj$xcol]
159
+        }
160
+        if (is.numeric(obj$ycol)) {
161
+            obj$ycol=colnames(dobj)[obj$ycol]
162
+        }
163
+        
164
+        if (obj$points_to_label=='outliers' & !(obj$ellipse %in% c('all','sample'))) {
165
+            warning('Outliers are only labelled when plotting the sample ellipse')
166
+        }
167
+        
168
+        if (length(obj$factor_name)==1) {
169
+            shapes = 19 # filled circles for all samples
170
+        } else {
171
+            shapes = factor(dobj$sample_meta[[obj$factor_name[2]]])
172
+        }
173
+        
174
+        if (obj$label_factor=='rownames') {
175
+            slabels = rownames(dobj$sample_meta)
176
+        } else {
177
+            slabels = dobj$sample_meta[[obj$label_factor]]
178
+        }
179
+        obj$factor_name=obj$factor_name[[1]] # only use the first factor from now on
180
+        
181
+        x=dobj$data[,obj$xcol]
182
+        y=dobj$data[,obj$ycol]
183
+        xlabel=obj$xcol
184
+        ylabel=obj$ycol
185
+        
186
+        # get the factor from meta data
187
+        groups=dobj$sample_meta[[obj$factor_name]]
188
+        
189
+        # add a space to the front of the labels to offset them from the points, because nudge_x is in data units
190
+        for (i in 1:length(slabels)) {
191
+            slabels[i]=paste0('  ',slabels[i], '  ')
192
+        }
193
+        
194
+        # filter by label_filter list if provided
195
+        if (length(obj$label_filter)>0) {
196
+            out=!(as.character(groups) %in% obj$label_filter)
197
+            slabels[out]=''
198
+        }
199
+        
200
+        if (is(groups,'factor') | is(groups,'character')) {
201
+            plotClass= createClassAndColors(groups)
202
+            groups=plotClass$class
203
+        }
204
+        
205
+        # build the plot
206
+        A <- data.frame (group=groups,x=x, y=y,slabels=slabels)
207
+        
208
+        out = ggplot()
209
+        
210
+        # add invisible sample points for ellipse
211
+        out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE)
212
+        
213
+        
214
+        if (length(obj$factor_name)==2) {
215
+            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes))
216
+        }   else {
217
+            out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group))
218
+        }
219
+        
220
+        
221
+        out=out+
222
+            
223
+            geom_point(na.rm=TRUE) +
224
+            xlab(xlabel) +
225
+            ylab(ylabel) 
226
+        
227
+        if (length(obj$factor_name)==2) {
228
+            out=out+labs(shape=obj$factor_name[[2]],colour=obj$factor_name[[1]])
229
+        } else {
230
+            out=out+labs(shape=obj$factor_name[[1]])
231
+        }
232
+        
233
+        if (obj$ellipse %in% c('all','group')) {
234
+            out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group),type=obj$ellipse_type,
235
+                level=obj$ellipse_confidence) # ellipse for individual groups
236
+        }
237
+        
238
+        if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package
239
+            out=out+scale_colour_manual(values=plotClass$manual_colors,
240
+                name=obj$factor_name)
241
+        }else {# assume continuous and use the default colour gradient
242
+            out=out+scale_colour_viridis_c(limits=quantile(groups,
243
+                c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name)
244
+        }
245
+        out=out+theme_Publication(base_size = 12)
246
+        # add ellipse for all samples (ignoring group)
247
+        if (obj$ellipse %in% c('all','sample')) {
248
+            out=out+stat_ellipse(type=obj$ellipse_type,mapping=aes(x=x,y=y),
249
+                colour="#C0C0C0",linetype='dashed',data=A,
250
+                level=obj$ellipse_confidence)
251
+        }
252
+        
253
+        if (obj$ellipse %in% c('all','sample')) { # only do this if we plotted the sample ellipse
254
+            # identify samples outside the ellipse
255
+            build=ggplot_build(out)$data
256
+            points=build[[1]]
257
+            ell=build[[length(build)]]
258
+            # outlier for DatasetExperiment ellipse
259
+            points$in.ell=as.logical(sp::point.in.polygon(points$x,points$y,ell$x,ell$y))
260
+            
261
+            # label outliers if
262
+            if (obj$points_to_label=='outliers')
263
+            {
264
+                if (!all(points$in.ell))
265
+                {
266
+                    temp=subset(points,!points$in.ell)
267
+                    temp$group=groups[!points$in.ell]
268
+                    temp$label=slabels[!points$in.ell]
269
+                    out=out+geom_text(data=temp,aes_(x=~x,y=~y,label=~label,colour=~group),size=obj$label_size,vjust="inward",hjust="inward",show.legend=FALSE)
270
+                    
271
+                }
272
+            }
273
+            # add a list of outliers to the plot object
274
+            out$outliers=trimws(slabels[!points$in.ell])
275
+        }
276
+        
277
+        # label all points if requested
278
+        if (obj$points_to_label=='all') {
279
+            out=out+geom_text(data=A,aes_string(x='x',y='y',colour='group',label='slabels'),vjust="inward",hjust="inward",show.legend=FALSE)
280
+        }
281
+        
282
+        return(out)
283
+    }
284
+)
0 285
\ No newline at end of file