git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@119126 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,48 +1,48 @@ |
1 |
-export(runAbsoluteCN) |
|
2 |
-export(filterVcfBasic) |
|
3 |
-export(bootstrapResults) |
|
4 |
-export(filterVcfMuTect) |
|
5 |
-export(setPriorVcf) |
|
6 |
-export(getDiploid) |
|
7 | 1 |
export(autoCurateResults) |
8 |
-export(segmentationCBS) |
|
9 |
-export(segmentationPSCBS) |
|
10 |
-export(poolCoverage) |
|
11 |
-export(readCoverageGatk) |
|
12 |
-export(correctCoverageBias) |
|
13 |
-export(callAlterations) |
|
14 |
-export(findFocal) |
|
15 |
-export(readCurationFile) |
|
16 |
-export(calculatePowerDetectSomatic) |
|
2 |
+export(bootstrapResults) |
|
17 | 3 |
export(calculateBamCoverageByInterval) |
18 | 4 |
export(calculateGCContentByInterval) |
5 |
+export(calculatePowerDetectSomatic) |
|
6 |
+export(callAlterations) |
|
7 |
+export(callLOH) |
|
8 |
+export(correctCoverageBias) |
|
19 | 9 |
export(createCurationFile) |
10 |
+export(createExonWeightFile) |
|
20 | 11 |
export(createNormalDatabase) |
21 | 12 |
export(createSNPBlacklist) |
22 |
-export(createExonWeightFile) |
|
13 |
+export(filterVcfBasic) |
|
14 |
+export(filterVcfMuTect) |
|
23 | 15 |
export(findBestNormal) |
24 |
-export(plotBestNormal) |
|
25 |
-export(predictSomatic) |
|
26 |
-export(plotAbs) |
|
16 |
+export(findFocal) |
|
17 |
+export(getDiploid) |
|
27 | 18 |
export(getSexFromCoverage) |
28 | 19 |
export(getSexFromVcf) |
29 |
-import(VariantAnnotation) |
|
20 |
+export(plotAbs) |
|
21 |
+export(plotBestNormal) |
|
22 |
+export(poolCoverage) |
|
23 |
+export(predictSomatic) |
|
24 |
+export(readCoverageGatk) |
|
25 |
+export(readCurationFile) |
|
26 |
+export(runAbsoluteCN) |
|
27 |
+export(segmentationCBS) |
|
28 |
+export(segmentationPSCBS) |
|
29 |
+export(setPriorVcf) |
|
30 | 30 |
import(DNAcopy) |
31 |
-importFrom("data.table", "data.table") |
|
32 |
-importFrom("RColorBrewer", "brewer.pal") |
|
31 |
+import(IRanges) |
|
32 |
+import(VariantAnnotation) |
|
33 |
+importFrom("Biostrings", "letterFrequency") |
|
33 | 34 |
importFrom("GenomeInfoDb", "seqnames") |
34 | 35 |
importFrom("GenomicRanges", "GRanges") |
35 |
-importFrom("SummarizedExperiment", "rowRanges") |
|
36 |
+importFrom("RColorBrewer", "brewer.pal") |
|
36 | 37 |
importFrom("Rsamtools", "ScanBamParam", "scanBamFlag", "scanBam", "scanFa") |
37 |
-import(IRanges) |
|
38 | 38 |
importFrom("S4Vectors", "queryHits", "subjectHits") |
39 |
+importFrom("SummarizedExperiment", "rowRanges") |
|
40 |
+importFrom("data.table", "data.table") |
|
39 | 41 |
importFrom("grDevices", "colorRampPalette", "dev.off", "png", "adjustcolor") |
40 |
-importFrom("graphics", "abline", "axis", "boxplot", "contour", "hist", |
|
41 |
- "image", "legend", "lines", "par", "plot", "text", "mtext", "polygon", |
|
42 |
- "points") |
|
43 |
-importFrom("stats", "C", "complete.cases", "dbeta", "dist", "dnorm", |
|
44 |
- "dunif", "loess", "pbeta", "prcomp", "predict", "runif", "t.test", |
|
45 |
- "weighted.mean", "dbinom", "fisher.test", "hclust", "cutree") |
|
46 |
-importFrom("utils", "data", "head", "read.csv", "read.delim", |
|
47 |
- "read.table", "write.csv", "write.table", "tail", "packageVersion") |
|
48 |
-importFrom("Biostrings", "letterFrequency") |
|
42 |
+importFrom("graphics", "abline", "axis", "boxplot", "contour", "hist","image", |
|
43 |
+ "legend", "lines", "par", "plot", "text", "mtext", "polygon","points") |
|
44 |
+importFrom("stats", "C", "complete.cases", "dbeta", "dist", "dnorm", "dunif", |
|
45 |
+ "loess", "pbeta", "prcomp", "predict", "runif", "t.test", "weighted.mean", |
|
46 |
+ "dbinom", "fisher.test", "hclust", "cutree") |
|
47 |
+importFrom("utils", "data", "head", "read.csv", "read.delim", "read.table", |
|
48 |
+ "write.csv", "write.table", "tail", "packageVersion") |
49 | 49 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,91 @@ |
1 |
+callLOH <- structure(function(# Get regions of LOH |
|
2 |
+### This function provides detailed LOH information by region. |
|
3 |
+res, |
|
4 |
+### Return object of the runAbsoluteCN() function. |
|
5 |
+id=1, |
|
6 |
+### Candidate solution to extract LOH from. id=1 will |
|
7 |
+### use the maximum likelihood solution. |
|
8 |
+arm.cutoff=0.9 |
|
9 |
+### Min fraction LOH on a chromosome arm to call |
|
10 |
+### whole arm events. |
|
11 |
+) { |
|
12 |
+ if (is.null(res$input$vcf)) { |
|
13 |
+ .stopUserError("runAbsoluteCN was run without a VCF file.") |
|
14 |
+ } |
|
15 |
+ chr.hash <- res$input$chr.hash |
|
16 |
+ armLocations <- .getArmLocations(res, 1) |
|
17 |
+ loh <- res$results[[id]]$SNV.posterior$beta.model$loh$output |
|
18 |
+ if (!nrow(armLocations)) { |
|
19 |
+ warning("Centromere positions not available or matching.") |
|
20 |
+ return(loh) |
|
21 |
+ } |
|
22 |
+ armLocationsGR <- GRanges(seqnames=armLocations$chrom, |
|
23 |
+IRanges(start=armLocations$start, end=armLocations$end)) |
|
24 |
+ seg <- res$results[[id]]$seg |
|
25 |
+ |
|
26 |
+ minorChrNumber <- cbind( |
|
27 |
+ res$results[[id]]$SNV.posterior$beta.model$segment.ids, |
|
28 |
+ res$results[[id]]$SNV.posterior$beta.model$posteriors$ML.M.Segment |
|
29 |
+ ) |
|
30 |
+ minorChrNumber <- minorChrNumber[!duplicated(minorChrNumber[,1]),] |
|
31 |
+ seg$M[minorChrNumber[,1]] <- minorChrNumber[,2] |
|
32 |
+ seg <- seg[complete.cases(seg),] |
|
33 |
+ seg$chrom <- .add.chr.name(seg$chrom, chr.hash) |
|
34 |
+ segGR <- GRanges(seqnames=seg$chrom, IRanges(start=seg$loc.start, |
|
35 |
+ end=seg$loc.end)) |
|
36 |
+ |
|
37 |
+ ov <- findOverlaps(armLocationsGR, segGR) |
|
38 |
+ segLOH <- cbind(seg[subjectHits(ov),], armLocations[queryHits(ov),]) |
|
39 |
+ segLOH$loc.start <- apply(segLOH[,c("loc.start", "start")],1,max) |
|
40 |
+ segLOH$loc.end <- apply(segLOH[,c("loc.end", "end")],1,min) |
|
41 |
+ segLOH$chrom <- paste(segLOH$chrom, segLOH$arm, sep="") |
|
42 |
+ segLOH$size <- segLOH$loc.end-segLOH$loc.start+1 |
|
43 |
+ segLOH$fraction.arm <- round(segLOH$size/armLocations$size[match(segLOH$chrom, |
|
44 |
+ paste(armLocations$chrom, armLocations$arm, sep=""))], digits=2) |
|
45 |
+ segLOH$type <- "" |
|
46 |
+ segLOH$type[segLOH$C %% 2 == 0 & segLOH$C > 1 & |
|
47 |
+ segLOH$M == 0] <- "COPY-NEUTRAL LOH" |
|
48 |
+ segLOH$type[segLOH$type=="" & segLOH$M == 0] <- "LOH" |
|
49 |
+ idx <- segLOH$fraction.arm > arm.cutoff & segLOH$type != "" |
|
50 |
+ segLOH$type[idx] <- paste("WHOLE ARM", |
|
51 |
+ segLOH$type)[idx] |
|
52 |
+ rownames(segLOH) <- NULL |
|
53 |
+ segLOH <- segLOH[, c("chrom", "loc.start", "loc.end", "C", "M", "type")] |
|
54 |
+ # standardize colnames |
|
55 |
+ colnames(segLOH)[1:3] <- c("chr", "start", "end") |
|
56 |
+ segLOH |
|
57 |
+### Returns data.frame with LOH regions. |
|
58 |
+}, ex=function() { |
|
59 |
+data(purecn.example.output) |
|
60 |
+head(callLOH(purecn.example.output)) |
|
61 |
+}) |
|
62 |
+ |
|
63 |
+.getArmLocations <- function(res, id) { |
|
64 |
+ chr.hash <- res$input$chr.hash |
|
65 |
+ centromeres <- res$input$centromeres |
|
66 |
+ |
|
67 |
+ chromCoords <- t(vapply(split( |
|
68 |
+ res$results[[id]]$SNV.posterior$beta.model$loh$data$maploc, |
|
69 |
+ res$results[[id]]$SNV.posterior$beta.model$loh$data$chrom), function(x) |
|
70 |
+ c(min(x), max(x)), c(min=double(1), max=double(1)))) |
|
71 |
+ |
|
72 |
+ centromeres <- cbind(centromeres, chromCoords[match(centromeres$chrom, |
|
73 |
+ rownames(chromCoords)),]) |
|
74 |
+ centromeres <- centromeres[complete.cases(centromeres),] |
|
75 |
+ |
|
76 |
+ pArms <- centromeres[centromeres$min < centromeres$chromStart, |
|
77 |
+ c("chrom", "min", "chromStart")] |
|
78 |
+ qArms <- centromeres[centromeres$max > centromeres$chromEnd, |
|
79 |
+ c("chrom", "chromEnd", "max")] |
|
80 |
+ |
|
81 |
+ colnames(pArms) <- c("chrom", "start", "end") |
|
82 |
+ colnames(qArms) <- colnames(pArms) |
|
83 |
+ pArms$arm <- "p" |
|
84 |
+ qArms$arm <- "q" |
|
85 |
+ armLocations <- rbind(pArms, qArms) |
|
86 |
+ armLocations <- armLocations[order(match(armLocations$chrom, |
|
87 |
+ chr.hash[,1])),] |
|
88 |
+ armLocations$size <- armLocations$end-armLocations$start+1 |
|
89 |
+ armLocations |
|
90 |
+} |
|
91 |
+ |
0 | 7 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,24 @@ |
1 |
+\name{callLOH} |
|
2 |
+\alias{callLOH} |
|
3 |
+\title{Get regions of LOH} |
|
4 |
+\description{This function provides detailed LOH information by region.} |
|
5 |
+\usage{callLOH(res, id = 1, arm.cutoff = 0.9)} |
|
6 |
+\arguments{ |
|
7 |
+ \item{res}{Return object of the runAbsoluteCN() function.} |
|
8 |
+ \item{id}{Candidate solution to extract LOH from. id=1 will |
|
9 |
+use the maximum likelihood solution.} |
|
10 |
+ \item{arm.cutoff}{Min fraction LOH on a chromosome arm to call |
|
11 |
+whole arm events.} |
|
12 |
+} |
|
13 |
+ |
|
14 |
+\value{Returns data.frame with LOH regions. } |
|
15 |
+ |
|
16 |
+\author{Markus Riester} |
|
17 |
+ |
|
18 |
+ |
|
19 |
+ |
|
20 |
+ |
|
21 |
+\examples{ |
|
22 |
+data(purecn.example.output) |
|
23 |
+head(callLOH(purecn.example.output)) |
|
24 |
+} |
... | ... |
@@ -615,6 +615,17 @@ gene.calls <- callAlterations(ret) |
615 | 615 |
head(gene.calls) |
616 | 616 |
@ |
617 | 617 |
|
618 |
+\section{Find genomic regions in LOH} |
|
619 |
+ |
|
620 |
+The \Rcode{gene.calls} data.frame described above provides gene-level LOH |
|
621 |
+information. To find the corresponding regions in LOH, the \Rfunction{getLOH} |
|
622 |
+provides a convenient way: |
|
623 |
+ |
|
624 |
+<<loh>>= |
|
625 |
+loh <- callLOH(ret) |
|
626 |
+head(loh) |
|
627 |
+@ |
|
628 |
+ |
|
618 | 629 |
\section{Power to detect somatic mutations} |
619 | 630 |
|
620 | 631 |
As final quality control step, we can test if coverage and tumor purity are |