... | ... |
@@ -202,7 +202,7 @@ setMethod(f="chart_plot", |
202 | 202 |
} |
203 | 203 |
|
204 | 204 |
# build the plot |
205 |
- A <- data.frame (group=groups,x=x, y=y,slabels=slabels) |
|
205 |
+ A <- data.frame (group=groups,x=x, y=y,slabels=slabels,shape=shapes) |
|
206 | 206 |
|
207 | 207 |
out = ggplot() |
208 | 208 |
|
... | ... |
@@ -210,10 +210,11 @@ setMethod(f="chart_plot", |
210 | 210 |
out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE) |
211 | 211 |
|
212 | 212 |
if (length(shapes)>1) { |
213 |
- out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes)) |
|
213 |
+ out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shape)) |
|
214 | 214 |
} else { |
215 |
- out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes) |
|
215 |
+ out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes) |
|
216 | 216 |
} |
217 |
+ |
|
217 | 218 |
out=out+ |
218 | 219 |
|
219 | 220 |
geom_point(na.rm=TRUE) + |
... | ... |
@@ -227,16 +228,23 @@ setMethod(f="chart_plot", |
227 | 228 |
} |
228 | 229 |
|
229 | 230 |
if (obj$ellipse %in% c('all','group')) { |
230 |
- out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group),type=obj$ellipse_type, |
|
231 |
+ if (is.factor(groups)) { |
|
232 |
+ out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group,group=~group),type=obj$ellipse_type, |
|
231 | 233 |
level=obj$ellipse_confidence) # ellipse for individual groups |
234 |
+ } else { |
|
235 |
+ if (is.factor(shapes)) { |
|
236 |
+ out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,group=~shape),color="#C0C0C0",type=obj$ellipse_type, |
|
237 |
+ level=obj$ellipse_confidence) # ellipse for individual groups |
|
238 |
+ } |
|
239 |
+ } |
|
232 | 240 |
} |
233 | 241 |
|
234 | 242 |
if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package |
235 | 243 |
out=out+scale_colour_manual(values=plotClass$manual_colors, |
236 | 244 |
name=obj$factor_name[[1]]) |
237 |
- }else {# assume continuous and use the default colour gradient |
|
245 |
+ } else {# assume continuous and use the default colour gradient |
|
238 | 246 |
out=out+scale_colour_viridis_c(limits=quantile(groups, |
239 |
- c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name[[1]]) |
|
247 |
+ c(0.05,0.95),na.rm = TRUE),oob=scales::squish,name=obj$factor_name[[1]]) |
|
240 | 248 |
} |
241 | 249 |
out=out+theme_Publication(base_size = 12) |
242 | 250 |
# add ellipse for all samples (ignoring group) |
... | ... |
@@ -176,15 +176,14 @@ setMethod(f="chart_plot", |
176 | 176 |
} else { |
177 | 177 |
slabels = dobj$sample_meta[[obj$label_factor]] |
178 | 178 |
} |
179 |
- obj$factor_name=obj$factor_name[[1]] # only use the first factor from now on |
|
180 |
- |
|
179 |
+ |
|
181 | 180 |
x=dobj$data[,obj$xcol] |
182 | 181 |
y=dobj$data[,obj$ycol] |
183 | 182 |
xlabel=obj$xcol |
184 | 183 |
ylabel=obj$ycol |
185 | 184 |
|
186 | 185 |
# get the factor from meta data |
187 |
- groups=dobj$sample_meta[[obj$factor_name]] |
|
186 |
+ groups=dobj$sample_meta[[obj$factor_name[[1]]]] |
|
188 | 187 |
|
189 | 188 |
# add a space to the front of the labels to offset them from the points, because nudge_x is in data units |
190 | 189 |
for (i in 1:length(slabels)) { |
... | ... |
@@ -210,24 +209,21 @@ setMethod(f="chart_plot", |
210 | 209 |
# add invisible sample points for ellipse |
211 | 210 |
out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE) |
212 | 211 |
|
213 |
- |
|
214 |
- if (length(obj$factor_name)==2) { |
|
212 |
+ if (length(shapes)>1) { |
|
215 | 213 |
out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes)) |
216 |
- } else { |
|
217 |
- out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group)) |
|
214 |
+ } else { |
|
215 |
+ out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group),shape=shapes) |
|
218 | 216 |
} |
219 |
- |
|
220 |
- |
|
221 | 217 |
out=out+ |
222 | 218 |
|
223 | 219 |
geom_point(na.rm=TRUE) + |
224 | 220 |
xlab(xlabel) + |
225 | 221 |
ylab(ylabel) |
226 | 222 |
|
227 |
- if (length(obj$factor_name)==2) { |
|
223 |
+ if (length(shapes)>1) { |
|
228 | 224 |
out=out+labs(shape=obj$factor_name[[2]],colour=obj$factor_name[[1]]) |
229 | 225 |
} else { |
230 |
- out=out+labs(shape=obj$factor_name[[1]]) |
|
226 |
+ out=out+labs(color=obj$factor_name[[1]]) |
|
231 | 227 |
} |
232 | 228 |
|
233 | 229 |
if (obj$ellipse %in% c('all','group')) { |
... | ... |
@@ -237,10 +233,10 @@ setMethod(f="chart_plot", |
237 | 233 |
|
238 | 234 |
if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package |
239 | 235 |
out=out+scale_colour_manual(values=plotClass$manual_colors, |
240 |
- name=obj$factor_name) |
|
236 |
+ name=obj$factor_name[[1]]) |
|
241 | 237 |
}else {# assume continuous and use the default colour gradient |
242 | 238 |
out=out+scale_colour_viridis_c(limits=quantile(groups, |
243 |
- c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name) |
|
239 |
+ c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name[[1]]) |
|
244 | 240 |
} |
245 | 241 |
out=out+theme_Publication(base_size = 12) |
246 | 242 |
# add ellipse for all samples (ignoring group) |
* add selectivity ratio
* replace vip_summary with feature_importance
renamed and now allows vip, sr and sr_pvalues to be plotted
* add equal_split model
random subsets so generate training sets with equal group numbers
* plot 1 - p-value
to conform with the "best" feature being a maximum value
* add resample iterator
subsample at random over a number of iterations. Option to use
different kinds of splitting methods. Corresponding chart.
* allow use of list() for factor_name
* force apply not to simplify output to guarantee returning a list
* update example
* add correct parameter
collect will collect the requested model output over all iterations in a list WORK IN PROGRESS
* add collection of multiple outputs of model sequence
* plot reg coeff on rhs
* match outputs of xval for use with grid search etc
* specify levels when converting predictions to factor
* change PLSDA to inherit from PLSR
rename some charts to be compatible with both PLSR and PLSDA
* allow y-block column selection
* re-assign y output after PLSR with factor
* update vignettes wrt PLS changes
* update documentation
* update R version to 4.1
* update documentation
* update documentation
* update scatter plot
- new scatter chart object
- used by PCA scores, PLSR/PLSDA scores
- other charts updated to reflect changes in scores plots where necessary
- added ycol param to plots for when y-block is a matrix
* add url to github
* add plsda scores alias
- plsda_scores_plot and pls_scores_plot do that same thing
Included for backwards compatability
- added components back as parameter for scores plots for backwards compatibility
* fix broken example
* fix broken tests
- scores is now returned as a DatasetExperiment object not a data.frame
* Update data_analysis_omics_using_the_structtoolbox.Rmd
- wrt changes in scores plots
* update documentation
* fix colnames for Y matrix
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,284 @@ |
1 |
+#' @eval get_description('scatter_chart') |
|
2 |
+#' @import struct |
|
3 |
+#' @export scatter_chart |
|
4 |
+#' @examples |
|
5 |
+#' D = iris_DatasetExperiment() |
|
6 |
+#' C = scatter_chart( |
|
7 |
+#' xcol = 'Petal.Width', |
|
8 |
+#' ycol = 'Sepal.Width', |
|
9 |
+#' factor_name = 'Species' |
|
10 |
+#' ) |
|
11 |
+#' chart_plot(C,D) |
|
12 |
+#' |
|
13 |
+scatter_chart = function( |
|
14 |
+ xcol=1, |
|
15 |
+ ycol=2, |
|
16 |
+ points_to_label='none', |
|
17 |
+ factor_name='none', |
|
18 |
+ ellipse='all', |
|
19 |
+ ellipse_type='norm', |
|
20 |
+ ellipse_confidence=0.95, |
|
21 |
+ label_filter=character(0), |
|
22 |
+ label_factor='rownames', |
|
23 |
+ label_size=3.88, |
|
24 |
+ ...) { |
|
25 |
+ out=struct::new_struct('scatter_chart', |
|
26 |
+ xcol=xcol, |
|
27 |
+ ycol=ycol, |
|
28 |
+ points_to_label=points_to_label, |
|
29 |
+ factor_name=factor_name, |
|
30 |
+ ellipse=ellipse, |
|
31 |
+ label_filter=label_filter, |
|
32 |
+ label_factor=label_factor, |
|
33 |
+ label_size=label_size, |
|
34 |
+ ellipse_type=ellipse_type, |
|
35 |
+ ellipse_confidence=ellipse_confidence, |
|
36 |
+ ...) |
|
37 |
+ return(out) |
|
38 |
+} |
|
39 |
+ |
|
40 |
+ |
|
41 |
+.scatter_chart<-setClass( |
|
42 |
+ "scatter_chart", |
|
43 |
+ contains='chart', |
|
44 |
+ slots=c( |
|
45 |
+ # INPUTS |
|
46 |
+ xcol='entity', |
|
47 |
+ ycol='entity', |
|
48 |
+ points_to_label='enum', |
|
49 |
+ factor_name='entity', |
|
50 |
+ ellipse='enum', |
|
51 |
+ ellipse_type='enum', |
|
52 |
+ ellipse_confidence='entity', |
|
53 |
+ label_filter='entity', |
|
54 |
+ label_factor='entity', |
|
55 |
+ label_size='entity' |
|
56 |
+ ), |
|
57 |
+ |
|
58 |
+ prototype = list(name='Group scatter chart', |
|
59 |
+ description='Plots a 2d scatter plot of the input data.', |
|
60 |
+ type="scatter", |
|
61 |
+ .params=c('xcol','ycol','points_to_label','factor_name','ellipse', |
|
62 |
+ 'label_filter','label_factor','label_size','ellipse_type', |
|
63 |
+ 'ellipse_confidence'), |
|
64 |
+ |
|
65 |
+ xcol=entity( |
|
66 |
+ name='x-axis column name', |
|
67 |
+ value=1, |
|
68 |
+ type=c('numeric','integer','character'), |
|
69 |
+ description=paste0('The column name, or index, of data to plot on the x-axis'), |
|
70 |
+ max_length=1 |
|
71 |
+ ), |
|
72 |
+ |
|
73 |
+ ycol=entity( |
|
74 |
+ name='y-axis column name', |
|
75 |
+ value=2, |
|
76 |
+ type=c('numeric','integer','character'), |
|
77 |
+ description=paste0('The column name, or index, of data to plot on the y-axis'), |
|
78 |
+ max_length=1 |
|
79 |
+ ), |
|
80 |
+ |
|
81 |
+ points_to_label=enum(name='Points to label', |
|
82 |
+ value='none', |
|
83 |
+ type='character', |
|
84 |
+ description=c( |
|
85 |
+ 'none' = 'No samples labels are displayed.', |
|
86 |
+ "all" = 'The labels for all samples are displayed.', |
|
87 |
+ "outliers" = 'Labels for for potential outlier samples are displayed.' |
|
88 |
+ ), |
|
89 |
+ allowed=c('none','all','outliers') |
|
90 |
+ ), |
|
91 |
+ factor_name=ents$factor_name, |
|
92 |
+ ellipse=enum( |
|
93 |
+ name = 'Plot ellipses', |
|
94 |
+ description=c( |
|
95 |
+ "all" = paste0('Ellipses are plotted for all groups and all samples.'), |
|
96 |
+ "group" = 'Ellipses are plotted for all groups.', |
|
97 |
+ "none" = 'Ellipses are not included on the plot.', |
|
98 |
+ "sample" = 'An ellipse is plotted for all samples (ignoring group)'), |
|
99 |
+ allowed=c('all','group','none','sample'), |
|
100 |
+ value='all' |
|
101 |
+ ), |
|
102 |
+ |
|
103 |
+ ellipse_type=enum( |
|
104 |
+ name='Type of ellipse', |
|
105 |
+ description=c( |
|
106 |
+ 'norm' = paste0('Multivariate normal (p = 0.95)'), |
|
107 |
+ 't' = paste0('Multivariate t (p = 0.95)') |
|
108 |
+ ), |
|
109 |
+ value='norm', |
|
110 |
+ type='character', |
|
111 |
+ max_length = 1, |
|
112 |
+ allowed=c('norm','t') |
|
113 |
+ ), |
|
114 |
+ |
|
115 |
+ ellipse_confidence=entity( |
|
116 |
+ name='Ellipse confidence level', |
|
117 |
+ description='The confidence level for plotting ellipses.', |
|
118 |
+ value=0.95, |
|
119 |
+ type='numeric', |
|
120 |
+ max_length = 1 |
|
121 |
+ ), |
|
122 |
+ |
|
123 |
+ label_filter=entity( |
|
124 |
+ name='Label filter', |
|
125 |
+ value=character(0), |
|
126 |
+ type='character', |
|
127 |
+ description=paste0( |
|
128 |
+ 'Labels are only plotted for the named groups. If ', |
|
129 |
+ 'zero-length then all groups are included.' |
|
130 |
+ ) |
|
131 |
+ ), |
|
132 |
+ label_factor=entity(name='Factor for labels', |
|
133 |
+ description=paste0('The column name of sample_meta to use for ', |
|
134 |
+ 'labelling samples on the plot. "rownames" will use the row ', |
|
135 |
+ 'names from sample_meta.'), |
|
136 |
+ type='character', |
|
137 |
+ value='rownames', |
|
138 |
+ max_length=1), |
|
139 |
+ label_size=entity(name='Text size of labels', |
|
140 |
+ description='The text size of labels. Note this is not in Font Units.', |
|
141 |
+ type='numeric', |
|
142 |
+ value=3.88, |
|
143 |
+ max_length=1) |
|
144 |
+ ) |
|
145 |
+) |
|
146 |
+ |
|
147 |
+#' @importFrom sp point.in.polygon |
|
148 |
+#' @import ggplot2 |
|
149 |
+#' @importFrom scales squish |
|
150 |
+#' @export |
|
151 |
+#' @template chart_plot |
|
152 |
+setMethod(f="chart_plot", |
|
153 |
+ signature=c("scatter_chart",'DatasetExperiment'), |
|
154 |
+ definition=function(obj,dobj) |
|
155 |
+ { |
|
156 |
+ # if provided convert index to names |
|
157 |
+ if (is.numeric(obj$xcol)) { |
|
158 |
+ obj$xcol=colnames(dobj)[obj$xcol] |
|
159 |
+ } |
|
160 |
+ if (is.numeric(obj$ycol)) { |
|
161 |
+ obj$ycol=colnames(dobj)[obj$ycol] |
|
162 |
+ } |
|
163 |
+ |
|
164 |
+ if (obj$points_to_label=='outliers' & !(obj$ellipse %in% c('all','sample'))) { |
|
165 |
+ warning('Outliers are only labelled when plotting the sample ellipse') |
|
166 |
+ } |
|
167 |
+ |
|
168 |
+ if (length(obj$factor_name)==1) { |
|
169 |
+ shapes = 19 # filled circles for all samples |
|
170 |
+ } else { |
|
171 |
+ shapes = factor(dobj$sample_meta[[obj$factor_name[2]]]) |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ if (obj$label_factor=='rownames') { |
|
175 |
+ slabels = rownames(dobj$sample_meta) |
|
176 |
+ } else { |
|
177 |
+ slabels = dobj$sample_meta[[obj$label_factor]] |
|
178 |
+ } |
|
179 |
+ obj$factor_name=obj$factor_name[[1]] # only use the first factor from now on |
|
180 |
+ |
|
181 |
+ x=dobj$data[,obj$xcol] |
|
182 |
+ y=dobj$data[,obj$ycol] |
|
183 |
+ xlabel=obj$xcol |
|
184 |
+ ylabel=obj$ycol |
|
185 |
+ |
|
186 |
+ # get the factor from meta data |
|
187 |
+ groups=dobj$sample_meta[[obj$factor_name]] |
|
188 |
+ |
|
189 |
+ # add a space to the front of the labels to offset them from the points, because nudge_x is in data units |
|
190 |
+ for (i in 1:length(slabels)) { |
|
191 |
+ slabels[i]=paste0(' ',slabels[i], ' ') |
|
192 |
+ } |
|
193 |
+ |
|
194 |
+ # filter by label_filter list if provided |
|
195 |
+ if (length(obj$label_filter)>0) { |
|
196 |
+ out=!(as.character(groups) %in% obj$label_filter) |
|
197 |
+ slabels[out]='' |
|
198 |
+ } |
|
199 |
+ |
|
200 |
+ if (is(groups,'factor') | is(groups,'character')) { |
|
201 |
+ plotClass= createClassAndColors(groups) |
|
202 |
+ groups=plotClass$class |
|
203 |
+ } |
|
204 |
+ |
|
205 |
+ # build the plot |
|
206 |
+ A <- data.frame (group=groups,x=x, y=y,slabels=slabels) |
|
207 |
+ |
|
208 |
+ out = ggplot() |
|
209 |
+ |
|
210 |
+ # add invisible sample points for ellipse |
|
211 |
+ out = out+geom_point(data=A,aes_string(x='x',y='y'),alpha=0,show.legend=FALSE) |
|
212 |
+ |
|
213 |
+ |
|
214 |
+ if (length(obj$factor_name)==2) { |
|
215 |
+ out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group,shape=~shapes)) |
|
216 |
+ } else { |
|
217 |
+ out=out+geom_point(data=A, aes_(x=~x,y=~y,colour=~group)) |
|
218 |
+ } |
|
219 |
+ |
|
220 |
+ |
|
221 |
+ out=out+ |
|
222 |
+ |
|
223 |
+ geom_point(na.rm=TRUE) + |
|
224 |
+ xlab(xlabel) + |
|
225 |
+ ylab(ylabel) |
|
226 |
+ |
|
227 |
+ if (length(obj$factor_name)==2) { |
|
228 |
+ out=out+labs(shape=obj$factor_name[[2]],colour=obj$factor_name[[1]]) |
|
229 |
+ } else { |
|
230 |
+ out=out+labs(shape=obj$factor_name[[1]]) |
|
231 |
+ } |
|
232 |
+ |
|
233 |
+ if (obj$ellipse %in% c('all','group')) { |
|
234 |
+ out = out +stat_ellipse(data=A, aes_(x=~x,y=~y,colour=~group),type=obj$ellipse_type, |
|
235 |
+ level=obj$ellipse_confidence) # ellipse for individual groups |
|
236 |
+ } |
|
237 |
+ |
|
238 |
+ if (is(groups,'factor')) { # if a factor then plot by group using the colours from pmp package |
|
239 |
+ out=out+scale_colour_manual(values=plotClass$manual_colors, |
|
240 |
+ name=obj$factor_name) |
|
241 |
+ }else {# assume continuous and use the default colour gradient |
|
242 |
+ out=out+scale_colour_viridis_c(limits=quantile(groups, |
|
243 |
+ c(0.05,0.95),na.rm = TRUE),oob=squish,name=obj$factor_name) |
|
244 |
+ } |
|
245 |
+ out=out+theme_Publication(base_size = 12) |
|
246 |
+ # add ellipse for all samples (ignoring group) |
|
247 |
+ if (obj$ellipse %in% c('all','sample')) { |
|
248 |
+ out=out+stat_ellipse(type=obj$ellipse_type,mapping=aes(x=x,y=y), |
|
249 |
+ colour="#C0C0C0",linetype='dashed',data=A, |
|
250 |
+ level=obj$ellipse_confidence) |
|
251 |
+ } |
|
252 |
+ |
|
253 |
+ if (obj$ellipse %in% c('all','sample')) { # only do this if we plotted the sample ellipse |
|
254 |
+ # identify samples outside the ellipse |
|
255 |
+ build=ggplot_build(out)$data |
|
256 |
+ points=build[[1]] |
|
257 |
+ ell=build[[length(build)]] |
|
258 |
+ # outlier for DatasetExperiment ellipse |
|
259 |
+ points$in.ell=as.logical(sp::point.in.polygon(points$x,points$y,ell$x,ell$y)) |
|
260 |
+ |
|
261 |
+ # label outliers if |
|
262 |
+ if (obj$points_to_label=='outliers') |
|
263 |
+ { |
|
264 |
+ if (!all(points$in.ell)) |
|
265 |
+ { |
|
266 |
+ temp=subset(points,!points$in.ell) |
|
267 |
+ temp$group=groups[!points$in.ell] |
|
268 |
+ temp$label=slabels[!points$in.ell] |
|
269 |
+ out=out+geom_text(data=temp,aes_(x=~x,y=~y,label=~label,colour=~group),size=obj$label_size,vjust="inward",hjust="inward",show.legend=FALSE) |
|
270 |
+ |
|
271 |
+ } |
|
272 |
+ } |
|
273 |
+ # add a list of outliers to the plot object |
|
274 |
+ out$outliers=trimws(slabels[!points$in.ell]) |
|
275 |
+ } |
|
276 |
+ |
|
277 |
+ # label all points if requested |
|
278 |
+ if (obj$points_to_label=='all') { |
|
279 |
+ out=out+geom_text(data=A,aes_string(x='x',y='y',colour='group',label='slabels'),vjust="inward",hjust="inward",show.legend=FALSE) |
|
280 |
+ } |
|
281 |
+ |
|
282 |
+ return(out) |
|
283 |
+ } |
|
284 |
+) |
|
0 | 285 |
\ No newline at end of file |