... | ... |
@@ -1,7 +1,10 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(aggregateSignal) |
4 |
+export(convertToFromNullDist) |
|
5 |
+export(getGammaPVal) |
|
4 | 6 |
export(getMetaRegionProfile) |
7 |
+export(getPermStat) |
|
5 | 8 |
export(regionQuantileByPC) |
6 | 9 |
export(rsRankingIndex) |
7 | 10 |
export(rsScoreHeatmap) |
... | ... |
@@ -58,6 +61,7 @@ importFrom(grid,unit) |
58 | 61 |
importFrom(grid,viewport) |
59 | 62 |
importFrom(methods,hasArg) |
60 | 63 |
importFrom(methods,is) |
64 |
+importFrom(ppcor,pcor) |
|
61 | 65 |
importFrom(simpleCache,simpleCache) |
62 | 66 |
importFrom(stats,coefficients) |
63 | 67 |
importFrom(stats,ecdf) |
... | ... |
@@ -50,6 +50,10 @@ |
50 | 50 |
#' @importFrom fitdistrplus fitdist |
51 | 51 |
#' @importFrom simpleCache simpleCache |
52 | 52 |
#' @importFrom ppcor pcor |
53 |
+#' @eval addParamDocs <- function(signal=FALSE, signalCoord=FALSE, |
|
54 |
+#' signalCoordType=FALSE, GRList=FALSE, |
|
55 |
+#' regionSet=FALSE, signalCol=FALSE, |
|
56 |
+#' scoringMetric=FALSE, verbose=FALSE, absVal=FALSE ) { x +100} |
|
53 | 57 |
NULL |
54 | 58 |
|
55 | 59 |
# now package lists GenomicRanges in "Depends" instead of "Imports" in |
... | ... |
@@ -80,18 +84,12 @@ if (getRversion() >= "2.15.1") { |
80 | 84 |
#' Then the loadings are used to score the region set |
81 | 85 |
#' according to the `scoringMetric` parameter. |
82 | 86 |
#' |
87 |
+#' @eval print("@param testP my test param ") |
|
83 | 88 |
#' @param signal matrix of loadings (the coefficients of |
84 | 89 |
#' the linear combination that defines each PC). One named column for each PC. |
85 | 90 |
#' One row for each original dimension/variable (should be same order |
86 | 91 |
#' as original data/signalCoord). The x$rotation output of prcomp(). |
87 |
-#' @param signalCoord a GRanges object or data frame with coordinates |
|
88 |
-#' for the genomic signal/original data (e.g. DNA methylation) |
|
89 |
-#' included in the PCA. Coordinates should be in the |
|
90 |
-#' same order as the original data and the loadings |
|
91 |
-#' (each item/row in signalCoord |
|
92 |
-#' corresponds to a row in `signal`). If a data.frame, |
|
93 |
-#' must have chr and start columns. If end is included, start |
|
94 |
-#' and end should be the same. Start coordinate will be used for calculations. |
|
92 |
+#' @template signalCoord |
|
95 | 93 |
#' @param signalCoordType character. Can be "default", "singleBase", or |
96 | 94 |
#' "multiBase". This describes whether the coordinates for `signal` |
97 | 95 |
#' (`signalCoord`) are each a single base (e.g. as for DNA methylation) |
... | ... |
@@ -518,7 +516,7 @@ aggregateSignal <- function(signal, |
518 | 516 |
#' the linear combination that defines each PC). One named column for each PC. |
519 | 517 |
#' One row for each original dimension/variable (should be same order |
520 | 518 |
#' as original data/signalCoord). The x$rotation output of prcomp(). |
521 |
-#' @param signalCoord a GRanges object or data frame with coordinates |
|
519 |
+#' @param signalCoord a GRanges object or data.frame with coordinates |
|
522 | 520 |
#' for the genomic signal/original data (eg DNA methylation) |
523 | 521 |
#' included in the PCA. Coordinates should be in the |
524 | 522 |
#' same order as the original data and the loadings |
... | ... |
@@ -535,8 +533,8 @@ aggregateSignal <- function(signal, |
535 | 533 |
#' will be used. Otherwise, "multiBase" will be used. |
536 | 534 |
#' @param GRList GRangesList object. Each list item is |
537 | 535 |
#' a distinct region set to test (region set: regions that correspond to |
538 |
-#' the same biological annotation). The region set database. |
|
539 |
-#' Must be from the same reference genome |
|
536 |
+#' the same biological annotation). The region set database |
|
537 |
+#' must be from the same reference genome |
|
540 | 538 |
#' as the coordinates for the actual data/samples (signalCoord). |
541 | 539 |
#' @param signalCol A character vector with principal components to |
542 | 540 |
#' include. eg c("PC1", "PC2") These should be column names of signal. |
... | ... |
@@ -788,8 +786,7 @@ createCorFeatureMat <- function(dataMat, featureMat, |
788 | 786 |
featureMat = as.matrix(featureMat) |
789 | 787 |
} |
790 | 788 |
|
791 |
- #remove this line |
|
792 |
- # dataMat = data.table::copy(as.data.frame(t(dataMat))) |
|
789 |
+ # avoid this copy and/or delay transpose until after calculating correlation? |
|
793 | 790 |
dataMat = as.data.frame(t(dataMat)) |
794 | 791 |
|
795 | 792 |
|
... | ... |
@@ -862,7 +859,7 @@ createCorFeatureMat <- function(dataMat, featureMat, |
862 | 859 |
#' the linear combination that defines each PC). One named column for each PC. |
863 | 860 |
#' One row for each original dimension/variable (should be same order |
864 | 861 |
#' as original data/signalCoord). Given by prcomp(x)$rotation. |
865 |
-#' @param signalCoord a GRanges object or data frame with coordinates |
|
862 |
+#' @param signalCoord a GRanges object or data.frame with coordinates |
|
866 | 863 |
#' for the genomic signal/original data (eg DNA methylation) |
867 | 864 |
#' included in the PCA. Coordinates should be in the |
868 | 865 |
#' same order as the original data and the loadings |
... | ... |
@@ -1203,7 +1200,7 @@ BSBinAggregate <- function(BSDT, rangeDT, binCount, |
1203 | 1200 |
# the linear combination that defines each PC). One named column for each PC. |
1204 | 1201 |
# One row for each original dimension/variable (should be same order |
1205 | 1202 |
# as original data/signalCoord). The x$rotation output of prcomp(). |
1206 |
-# @param signalCoord a GRanges object or data frame with coordinates |
|
1203 |
+# @param signalCoord a GRanges object or data.frame with coordinates |
|
1207 | 1204 |
# for the genomic signal/original data (eg DNA methylation) |
1208 | 1205 |
# included in the PCA. Coordinates should be in the |
1209 | 1206 |
# same order as the original data and the loadings |
... | ... |
@@ -1389,42 +1386,43 @@ weightedAvePerRegion <- function(signalDT, |
1389 | 1386 |
|
1390 | 1387 |
|
1391 | 1388 |
|
1392 |
-# Get regions that are most associated with PCs of interest |
|
1393 |
-# |
|
1394 |
-# Get a GRanges with top regions from the region set based on average |
|
1395 |
-# loadings for the regions or the quantile of the region's loading. |
|
1396 |
-# Returns average loading or quantile as GRanges metadata. |
|
1397 |
-# |
|
1398 |
-# @param signal matrix of loadings (the coefficients of |
|
1399 |
-# the linear combination that defines each PC). One named column for each PC. |
|
1400 |
-# One row for each original dimension/variable (should be same order |
|
1401 |
-# as original data/signalCoord). The x$rotation output of prcomp(). |
|
1402 |
-# @param signalCoord a GRanges object or data frame with coordinates |
|
1403 |
-# for the genomic signal/original data (eg DNA methylation) |
|
1404 |
-# included in the PCA. Coordinates should be in the |
|
1405 |
-# same order as the original data and the loadings |
|
1406 |
-# (each item/row in signalCoord |
|
1407 |
-# corresponds to a row in `signal`). If a data.frame, |
|
1408 |
-# must have chr and start columns. If end is included, start |
|
1409 |
-# and end should be the same. Start coordinate will be used for calculations. |
|
1410 |
-# @param regionSet A GRanges object with regions corresponding |
|
1411 |
-# to the same biological annotation. |
|
1412 |
-# @param signalCol A character vector with principal components to |
|
1413 |
-# include. eg c("PC1", "PC2") These should be column names of signal. |
|
1414 |
-# @param returnQuantile "logical" object. If FALSE, return region averages. If TRUE, |
|
1415 |
-# for each region, return the quantile of that region's average value |
|
1416 |
-# based on the distribution of individual genomic signal/feature values |
|
1417 |
-# @return a GRanges object with region coordinates for regions with |
|
1418 |
-# scores/quantiles above "cutoff" for any PC in signalCol. The scores/quantiles |
|
1419 |
-# for signalCol are given as metadata in the GRanges. |
|
1389 |
+#' Get regions that are most associated with PCs of interest |
|
1390 |
+#' |
|
1391 |
+#' Get a GRanges with top regions from the region set based on average |
|
1392 |
+#' loadings for the regions or the quantile of the region's loading. |
|
1393 |
+#' Returns average loading or quantile as GRanges metadata. |
|
1394 |
+#' |
|
1395 |
+#' @param signal matrix of loadings (the coefficients of |
|
1396 |
+#' the linear combination that defines each PC). One named column for each PC. |
|
1397 |
+#' One row for each original dimension/variable (should be same order |
|
1398 |
+#' as original data/signalCoord). The x$rotation output of prcomp(). |
|
1399 |
+#' @param signalCoord a GRanges object or data.frame with coordinates |
|
1400 |
+#' for the genomic signal/original data (eg DNA methylation) |
|
1401 |
+#' included in the PCA. Coordinates should be in the |
|
1402 |
+#' same order as the original data and the loadings |
|
1403 |
+#' (each item/row in signalCoord |
|
1404 |
+#' corresponds to a row in `signal`). If a data.frame, |
|
1405 |
+#' must have chr and start columns. If end is included, start |
|
1406 |
+#' and end should be the same. Start coordinate will be used for calculations. |
|
1407 |
+#' @param regionSet A GRanges object with regions corresponding |
|
1408 |
+#' to the same biological annotation. |
|
1409 |
+#' @param signalCol A character vector with principal components to |
|
1410 |
+#' include. eg c("PC1", "PC2") These should be column names of signal. |
|
1411 |
+#' @param returnQuantile "logical" object. If FALSE, return region averages. If TRUE, |
|
1412 |
+#' for each region, return the quantile of that region's average value |
|
1413 |
+#' based on the distribution of individual genomic signal/feature values |
|
1414 |
+#' @return a GRanges object with region coordinates for regions with |
|
1415 |
+#' scores/quantiles above "cutoff" for any PC in signalCol. The scores/quantiles |
|
1416 |
+#' for signalCol are given as metadata in the GRanges. |
|
1420 | 1417 |
|
1421 | 1418 |
# Are regions in order along the rows of the data.table? |
1422 | 1419 |
# |
1423 |
-# @examples data("brcaLoadings1") |
|
1424 |
-# data("brcaMCoord1") |
|
1425 |
-# data("esr1_chr1") |
|
1426 |
-# COCOA:::getTopRegions(signal=brcaLoadings1, |
|
1427 |
-# signalCoord=brcaMCoord1, regionSet=esr1_chr1, returnQuantile = TRUE) |
|
1420 |
+#' @examples |
|
1421 |
+#' data("brcaLoadings1") |
|
1422 |
+#' data("brcaMCoord1") |
|
1423 |
+#' data("esr1_chr1") |
|
1424 |
+#' getTopRegions(signal=brcaLoadings1, |
|
1425 |
+#' signalCoord=brcaMCoord1, regionSet=esr1_chr1, returnQuantile = TRUE) |
|
1428 | 1426 |
|
1429 | 1427 |
getTopRegions <- function(signal, |
1430 | 1428 |
signalCoord, |
... | ... |
@@ -344,8 +344,7 @@ corPerm <- function(randomInd, genomicSignal, |
344 | 344 |
#' with one row for each region set (e.g. a data.frame with results for |
345 | 345 |
#' a single COCOA permutation). |
346 | 346 |
#' |
347 |
-#' @example |
|
348 |
-#' |
|
347 |
+#' @examples |
|
349 | 348 |
#' # six region sets (rows), 2 signals (columns) |
350 | 349 |
#' fakePermScores = data.frame(abs(rnorm(6)), abs(rnorm(6))) |
351 | 350 |
#' fakePermScores2 = data.frame(abs(rnorm(6)), abs(rnorm(6))) |
352 | 351 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,8 @@ |
1 |
+#' @param signalCoord a GRanges object or data.frame with coordinates |
|
2 |
+#' for the genomic signal/original data (e.g. DNA methylation) |
|
3 |
+#' included in the PCA. Coordinates should be in the |
|
4 |
+#' same order as the original data and the loadings |
|
5 |
+#' (each item/row in signalCoord |
|
6 |
+#' corresponds to a row in `signal`). If a data.frame, |
|
7 |
+#' must have chr and start columns. If end is included, start |
|
8 |
+#' and end should be the same. Start coordinate will be used for calculations. |
|
0 | 9 |
\ No newline at end of file |
... | ... |
@@ -14,7 +14,7 @@ the linear combination that defines each PC). One named column for each PC. |
14 | 14 |
One row for each original dimension/variable (should be same order |
15 | 15 |
as original data/signalCoord). The x$rotation output of prcomp().} |
16 | 16 |
|
17 |
-\item{signalCoord}{a GRanges object or data frame with coordinates |
|
17 |
+\item{signalCoord}{a GRanges object or data.frame with coordinates |
|
18 | 18 |
for the genomic signal/original data (e.g. DNA methylation) |
19 | 19 |
included in the PCA. Coordinates should be in the |
20 | 20 |
same order as the original data and the loadings |
... | ... |
@@ -93,6 +93,8 @@ will decrease (if there may be anticorrelation between |
93 | 93 |
regions in a region set). Choose FALSE if you expect regions in a |
94 | 94 |
given region set to all change in the same direction (all be positively |
95 | 95 |
correlated with each other).} |
96 |
+ |
|
97 |
+\item{testP}{my test param} |
|
96 | 98 |
} |
97 | 99 |
\value{ |
98 | 100 |
a data.frame with one row and the following |
99 | 101 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,42 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/permutation.R |
|
3 |
+\name{convertToFromNullDist} |
|
4 |
+\alias{convertToFromNullDist} |
|
5 |
+\title{Converts COCOA permutation results to null distributions and vice versa} |
|
6 |
+\usage{ |
|
7 |
+convertToFromNullDist(resultsList) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{resultsList}{each item in the list is a data.frame, one item for |
|
11 |
+each permutation with the results of that permutation. Each row in the |
|
12 |
+data.frame is a region set. All data.frames should be the same size and |
|
13 |
+each data.frame's rows should be in the same order} |
|
14 |
+} |
|
15 |
+\value{ |
|
16 |
+a list of data.frames. If given a list where each item is |
|
17 |
+a data.frame with results from one COCOA permutation, this function |
|
18 |
+will return a list of data.frames where each data.frame contains the |
|
19 |
+null distributions for a single region set. The output data.frames will |
|
20 |
+have the same columns as the input data.frames. If given a list where each |
|
21 |
+item is a data.frame with the null distribution/s for a single region |
|
22 |
+set, this function will return a list where each item is a data.frame |
|
23 |
+with one row for each region set (e.g. a data.frame with results for |
|
24 |
+a single COCOA permutation). |
|
25 |
+} |
|
26 |
+\description{ |
|
27 |
+This function will take a list of results of permutation tests that included |
|
28 |
+many region sets and return a list of data.frames where each data.frame |
|
29 |
+contains the null distribution for a single region set. |
|
30 |
+The function can |
|
31 |
+also convert in the reverse order from a list of null distributions to a |
|
32 |
+list of COCOA results. |
|
33 |
+} |
|
34 |
+\examples{ |
|
35 |
+# six region sets (rows), 2 signals (columns) |
|
36 |
+fakePermScores = data.frame(abs(rnorm(6)), abs(rnorm(6))) |
|
37 |
+fakePermScores2 = data.frame(abs(rnorm(6)), abs(rnorm(6))) |
|
38 |
+# 2 fake COCOA results (i.e. nPerm=2) |
|
39 |
+permRSScores = list(fakePermScores, fakePermScores2) |
|
40 |
+convertToFromNullDist(permRSScores) |
|
41 |
+ |
|
42 |
+} |
0 | 43 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/permutation.R |
|
3 |
+\name{fitGammaNullDist} |
|
4 |
+\alias{fitGammaNullDist} |
|
5 |
+\title{signalCoord=brcaMCoord1, |
|
6 |
+ GRList=GRangesList(esr1_chr1), |
|
7 |
+ signalCol=c("PC1", "PC2"), |
|
8 |
+ scoringMetric="regionMean")} |
|
9 |
+\usage{ |
|
10 |
+fitGammaNullDist(nullDistDF, method = "mme", force = FALSE) |
|
11 |
+} |
|
12 |
+\description{ |
|
13 |
+signalCoord=brcaMCoord1, |
|
14 |
+ GRList=GRangesList(esr1_chr1), |
|
15 |
+ signalCol=c("PC1", "PC2"), |
|
16 |
+ scoringMetric="regionMean") |
|
17 |
+} |
... | ... |
@@ -4,22 +4,31 @@ |
4 | 4 |
\alias{getGammaPVal} |
5 | 5 |
\title{Get p value after fitting a gamma distribution to the null distribution} |
6 | 6 |
\usage{ |
7 |
-getGammaPVal(scores, nullDistList, method = "mme", |
|
8 |
- realScoreInDist = FALSE, force = FALSE) |
|
7 |
+getGammaPVal(scores, nullDistList, calcCols, method = "mme", |
|
8 |
+ realScoreInDist = TRUE, force = FALSE) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{scores}{a data.frame. Has same columns as nullDistDF. One row per |
12 | 12 |
region set (should be in same order as nullDistDF) The scores |
13 | 13 |
that will be used to get p values.} |
14 | 14 |
|
15 |
-\item{nullDistList}{list of a data.frame. Each list item |
|
15 |
+\item{nullDistList}{list of data.frames. Each list item |
|
16 | 16 |
has null distributions for a single |
17 | 17 |
region set. Each column corresponds to a null distribution for that |
18 | 18 |
region set for a given variable/sample attribute.} |
19 | 19 |
|
20 |
+\item{calcCols}{character.} |
|
21 |
+ |
|
22 |
+\item{method}{character. Has the method to use to fit the gamma |
|
23 |
+distribution to the null distribution. See ?fitdistrplus::fitdist() for |
|
24 |
+available options and meaning. The default method "mme" is the |
|
25 |
+"moment matching estimation"} |
|
26 |
+ |
|
20 | 27 |
\item{realScoreInDist}{logical. Should the actual score (from |
21 | 28 |
test with no permutations) be included in the null distribution |
22 | 29 |
when fitting the gamma distribution} |
30 |
+ |
|
31 |
+\item{force}{logical.} |
|
23 | 32 |
} |
24 | 33 |
\value{ |
25 | 34 |
Returns a data.frame with p values, one column for each col in |
... | ... |
@@ -28,3 +37,13 @@ scores and nullDistDF |
28 | 37 |
\description{ |
29 | 38 |
Get p value after fitting a gamma distribution to the null distribution |
30 | 39 |
} |
40 |
+\examples{ |
|
41 |
+fakeOriginalScores = data.frame(PC1=abs(rnorm(6)), PC2=abs(rnorm(6))) |
|
42 |
+fakePermScores = data.frame(PC1=abs(rnorm(6)), PC2=abs(rnorm(6))) |
|
43 |
+fakePermScores2 = data.frame(PC1=abs(rnorm(6)), PC2=abs(rnorm(6))) |
|
44 |
+fakePermScores3 = data.frame(PC1=abs(rnorm(6)), PC2=abs(rnorm(6))) |
|
45 |
+permRSScores = list(fakePermScores, fakePermScores2, fakePermScores3) |
|
46 |
+nullDistList = convertToFromNullDist(permRSScores) |
|
47 |
+getGammaPVal(scores=fakeOriginalScores, nullDistList=nullDistList, calcCols=c("PC1", "PC2")) |
|
48 |
+ |
|
49 |
+} |
... | ... |
@@ -14,7 +14,7 @@ the linear combination that defines each PC). One named column for each PC. |
14 | 14 |
One row for each original dimension/variable (should be same order |
15 | 15 |
as original data/signalCoord). Given by prcomp(x)$rotation.} |
16 | 16 |
|
17 |
-\item{signalCoord}{a GRanges object or data frame with coordinates |
|
17 |
+\item{signalCoord}{a GRanges object or data.frame with coordinates |
|
18 | 18 |
for the genomic signal/original data (eg DNA methylation) |
19 | 19 |
included in the PCA. Coordinates should be in the |
20 | 20 |
same order as the original data and the loadings |
21 | 21 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,51 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/COCOA.R |
|
3 |
+\name{getTopRegions} |
|
4 |
+\alias{getTopRegions} |
|
5 |
+\title{Get regions that are most associated with PCs of interest} |
|
6 |
+\usage{ |
|
7 |
+getTopRegions(signal, signalCoord, regionSet, signalCol = c("PC1", |
|
8 |
+ "PC2"), cutoff = 0.8, returnQuantile = TRUE) |
|
9 |
+} |
|
10 |
+\arguments{ |
|
11 |
+\item{signal}{matrix of loadings (the coefficients of |
|
12 |
+the linear combination that defines each PC). One named column for each PC. |
|
13 |
+One row for each original dimension/variable (should be same order |
|
14 |
+as original data/signalCoord). The x$rotation output of prcomp().} |
|
15 |
+ |
|
16 |
+\item{signalCoord}{a GRanges object or data.frame with coordinates |
|
17 |
+for the genomic signal/original data (eg DNA methylation) |
|
18 |
+included in the PCA. Coordinates should be in the |
|
19 |
+same order as the original data and the loadings |
|
20 |
+(each item/row in signalCoord |
|
21 |
+corresponds to a row in `signal`). If a data.frame, |
|
22 |
+must have chr and start columns. If end is included, start |
|
23 |
+and end should be the same. Start coordinate will be used for calculations.} |
|
24 |
+ |
|
25 |
+\item{regionSet}{A GRanges object with regions corresponding |
|
26 |
+to the same biological annotation.} |
|
27 |
+ |
|
28 |
+\item{signalCol}{A character vector with principal components to |
|
29 |
+include. eg c("PC1", "PC2") These should be column names of signal.} |
|
30 |
+ |
|
31 |
+\item{returnQuantile}{"logical" object. If FALSE, return region averages. If TRUE, |
|
32 |
+for each region, return the quantile of that region's average value |
|
33 |
+based on the distribution of individual genomic signal/feature values} |
|
34 |
+} |
|
35 |
+\value{ |
|
36 |
+a GRanges object with region coordinates for regions with |
|
37 |
+scores/quantiles above "cutoff" for any PC in signalCol. The scores/quantiles |
|
38 |
+for signalCol are given as metadata in the GRanges. |
|
39 |
+} |
|
40 |
+\description{ |
|
41 |
+Get a GRanges with top regions from the region set based on average |
|
42 |
+loadings for the regions or the quantile of the region's loading. |
|
43 |
+Returns average loading or quantile as GRanges metadata. |
|
44 |
+} |
|
45 |
+\examples{ |
|
46 |
+data("brcaLoadings1") |
|
47 |
+data("brcaMCoord1") |
|
48 |
+data("esr1_chr1") |
|
49 |
+getTopRegions(signal=brcaLoadings1, |
|
50 |
+signalCoord=brcaMCoord1, regionSet=esr1_chr1, returnQuantile = TRUE) |
|
51 |
+} |
... | ... |
@@ -14,7 +14,7 @@ the linear combination that defines each PC). One named column for each PC. |
14 | 14 |
One row for each original dimension/variable (should be same order |
15 | 15 |
as original data/signalCoord). The x$rotation output of prcomp().} |
16 | 16 |
|
17 |
-\item{signalCoord}{a GRanges object or data frame with coordinates |
|
17 |
+\item{signalCoord}{a GRanges object or data.frame with coordinates |
|
18 | 18 |
for the genomic signal/original data (eg DNA methylation) |
19 | 19 |
included in the PCA. Coordinates should be in the |
20 | 20 |
same order as the original data and the loadings |
... | ... |
@@ -24,8 +24,8 @@ If end is not included, start coordinate will be used for calculations.} |
24 | 24 |
|
25 | 25 |
\item{GRList}{GRangesList object. Each list item is |
26 | 26 |
a distinct region set to test (region set: regions that correspond to |
27 |
-the same biological annotation). The region set database. |
|
28 |
-Must be from the same reference genome |
|
27 |
+the same biological annotation). The region set database |
|
28 |
+must be from the same reference genome |
|
29 | 29 |
as the coordinates for the actual data/samples (signalCoord).} |
30 | 30 |
|
31 | 31 |
\item{signalCol}{A character vector with principal components to |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/permutation.R |
3 | 3 |
\name{runCOCOAPerm} |
4 | 4 |
\alias{runCOCOAPerm} |
5 |
-\title{permutation test by shuffling sample labels} |
|
5 |
+\title{Run COCOA permutations to get p-values} |
|
6 | 6 |
\usage{ |
7 | 7 |
runCOCOAPerm(genomicSignal, signalCoord, GRList, realRSScores, |
8 | 8 |
sampleLabels, signalCol = c("PC1", "PC2"), |
... | ... |
@@ -11,22 +11,31 @@ runCOCOAPerm(genomicSignal, signalCoord, GRList, realRSScores, |
11 | 11 |
cacheDir = getwd(), dataID = "tmp", correctionMethod = "BH", ...) |
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 |
+\item{realRSScores}{data.frame. A data.frame with region set |
|
15 |
+scores. The output of the 'runCOCOA' function. |
|
16 |
+Rows should be in the same order as the region sets in GRList. |
|
17 |
+Must include columns with names given by 'colsToAnnotate'.} |
|
18 |
+ |
|
14 | 19 |
\item{sampleLabels}{data.frame/matrix. Sample labels/values that |
15 | 20 |
you are running COCOA to find region sets associated with. These |
16 | 21 |
values will be shuffled for the permutation test. Rows are samples. |
17 | 22 |
Each column is a sample label.} |
18 | 23 |
|
19 |
-\item{variationMetric}{character. Either "cor" (correlation), "pcor" (partial |
|
20 |
-correlation), or "cov" (covariation)} |
|
24 |
+\item{variationMetric}{character. Either "cor" (Pearson correlation), |
|
25 |
+"pcor" (partial correlation), "spearmanCor (Spearman correlation) |
|
26 |
+or "cov" (covariation).} |
|
21 | 27 |
|
22 | 28 |
\item{nPerm}{numeric. The number of permutations to do.} |
23 | 29 |
|
24 |
-\item{useSimpleCache}{logical.} |
|
30 |
+\item{useSimpleCache}{logical. Whether to use save caches. Caches |
|
31 |
+will be created for each permutation so that if the function is disrupted |
|
32 |
+it can restart where it left off. The final results are also saved |
|
33 |
+as a cache.} |
|
25 | 34 |
|
26 | 35 |
\item{cacheDir}{character.} |
27 | 36 |
|
28 | 37 |
\item{dataID}{character. A unique identifier for this dataset |
29 |
-(for saving results)} |
|
38 |
+(for saving results with simpleCache)} |
|
30 | 39 |
|
31 | 40 |
\item{correctionMethod}{character. P value correction method. Default |
32 | 41 |
is "BH" for Benjamini and Hochberg false discovery rate. For acceptable |
... | ... |
@@ -35,7 +44,7 @@ arguments and more info see ?stats::p.adjust() (method parameter)} |
35 | 44 |
\item{...}{character. Optional additional arguments for simpleCache} |
36 | 45 |
|
37 | 46 |
\item{colsToAnnotate}{character. The column names of `sampleLabels` that |
38 |
-you want to test.} |
|
47 |
+you want to test. These must also be columns in realRSScores.} |
|
39 | 48 |
|
40 | 49 |
\item{resultType}{character. "pval" or "zscore"} |
41 | 50 |
} |
... | ... |
@@ -44,5 +53,40 @@ Returns a list where each item is a data.frame of COCOA results |
44 | 53 |
from a separate permutation |
45 | 54 |
} |
46 | 55 |
\description{ |
56 |
+This is a convenience function that runs multiple steps of the |
|
57 |
+permutation process together: it runs COCOA permutations, converts these |
|
58 |
+to null distributions, gets the empirical p value (which is limited by the |
|
59 |
+number of permutations), gets z scores, and fits a gamma distribution |
|
60 |
+to each null distribution to estimate p values (not limited by the |
|
61 |
+number of permutations), |
|
62 |
+Requires that the user has previously calculated the real COCOA scores. |
|
63 |
+See these individual functions for more info on each step: corPerm, |
|
64 |
+convertToFromNullDist, getPermStat, and getGammaPVal. |
|
65 |
+} |
|
66 |
+\details{ |
|
47 | 67 |
For reproducibility, set seed with 'set.seed()' function before running. |
48 | 68 |
} |
69 |
+\examples{ |
|
70 |
+data("brcaMCoord1") |
|
71 |
+data("brcaLoadings1") |
|
72 |
+data("esr1_chr1") |
|
73 |
+data("nrf1_chr1") |
|
74 |
+data("brcaMethylData1") |
|
75 |
+data("brcaPCScores657") |
|
76 |
+pcCor = corFeature |
|
77 |
+sampleLabels <- brcaPCScores657[colnames(brcaMethylData1), ] |
|
78 |
+sampleLabels$ER_Status <- scale(as.numeric(sampleLabels$ER_Status), |
|
79 |
+ center=TRUE, scale=FALSE) |
|
80 |
+# give the actual order of samples to randomInd to get the real scores |
|
81 |
+realRSScores <- corPerm(randomInd=1:4, genomicSignal=brcaMethylData1, |
|
82 |
+ signalCoord=brcaMCoord1, GRList=GRangesList(esr1_chr1, nrf1_chr1), |
|
83 |
+ calcCols=c("PC1", "PC2"), sampleLabels=sampleLabels, |
|
84 |
+ variationMetric="cor") |
|
85 |
+ |
|
86 |
+a=runCOCOAPerm(genomicSignal=brcaMethylData1, |
|
87 |
+ signalCoord=brcaMCoord1, GRList=GRangesList(esr1_chr1, nrf1_chr1), |
|
88 |
+ realRSScores=realRSScores, |
|
89 |
+ sampleLabels=sampleLabels, signalCol=c("PC1", "PC2"), |
|
90 |
+ variationMetric="cor", nPerm = 10, useSimpleCache=FALSE) |
|
91 |
+ |
|
92 |
+} |