Browse code

Added more convenient LOH function.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@119126 bc3139a8-67e5-0310-9ffc-ced21a209358

Markus Riester authored on 05/07/2016 00:17:55
Showing 6 changed files

... ...
@@ -5,6 +5,7 @@
5 5
     - added calculateGCContentByInterval
6 6
     - added calculatePowerDetectSomatic
7 7
     - added callAlterations
8
+    - added callLOH
8 9
     - added getDiploid
9 10
     - added getSexFromVcf
10 11
 
... ...
@@ -1,48 +1,48 @@
1
-export(runAbsoluteCN)
2
-export(filterVcfBasic)
3
-export(bootstrapResults)
4
-export(filterVcfMuTect)
5
-export(setPriorVcf)
6
-export(getDiploid)
7 1
 export(autoCurateResults)
8
-export(segmentationCBS)
9
-export(segmentationPSCBS)
10
-export(poolCoverage)
11
-export(readCoverageGatk)
12
-export(correctCoverageBias)
13
-export(callAlterations)
14
-export(findFocal)
15
-export(readCurationFile)
16
-export(calculatePowerDetectSomatic)
2
+export(bootstrapResults)
17 3
 export(calculateBamCoverageByInterval)
18 4
 export(calculateGCContentByInterval)
5
+export(calculatePowerDetectSomatic)
6
+export(callAlterations)
7
+export(callLOH)
8
+export(correctCoverageBias)
19 9
 export(createCurationFile)
10
+export(createExonWeightFile)
20 11
 export(createNormalDatabase)
21 12
 export(createSNPBlacklist)
22
-export(createExonWeightFile)
13
+export(filterVcfBasic)
14
+export(filterVcfMuTect)
23 15
 export(findBestNormal)
24
-export(plotBestNormal)
25
-export(predictSomatic)
26
-export(plotAbs)
16
+export(findFocal)
17
+export(getDiploid)
27 18
 export(getSexFromCoverage)
28 19
 export(getSexFromVcf)
29
-import(VariantAnnotation)
20
+export(plotAbs)
21
+export(plotBestNormal)
22
+export(poolCoverage)
23
+export(predictSomatic)
24
+export(readCoverageGatk)
25
+export(readCurationFile)
26
+export(runAbsoluteCN)
27
+export(segmentationCBS)
28
+export(segmentationPSCBS)
29
+export(setPriorVcf)
30 30
 import(DNAcopy)
31
-importFrom("data.table", "data.table")
32
-importFrom("RColorBrewer", "brewer.pal")
31
+import(IRanges)
32
+import(VariantAnnotation)
33
+importFrom("Biostrings", "letterFrequency")
33 34
 importFrom("GenomeInfoDb", "seqnames")
34 35
 importFrom("GenomicRanges", "GRanges")
35
-importFrom("SummarizedExperiment", "rowRanges")
36
+importFrom("RColorBrewer", "brewer.pal")
36 37
 importFrom("Rsamtools", "ScanBamParam", "scanBamFlag", "scanBam", "scanFa")
37
-import(IRanges)
38 38
 importFrom("S4Vectors", "queryHits", "subjectHits")
39
+importFrom("SummarizedExperiment", "rowRanges")
40
+importFrom("data.table", "data.table")
39 41
 importFrom("grDevices", "colorRampPalette", "dev.off", "png", "adjustcolor")
40
-importFrom("graphics", "abline", "axis", "boxplot", "contour", "hist",
41
-    "image", "legend", "lines", "par", "plot", "text", "mtext", "polygon",
42
-    "points")
43
-importFrom("stats", "C", "complete.cases", "dbeta", "dist", "dnorm",
44
-    "dunif", "loess", "pbeta", "prcomp", "predict", "runif", "t.test",
45
-    "weighted.mean", "dbinom", "fisher.test", "hclust", "cutree")
46
-importFrom("utils", "data", "head", "read.csv", "read.delim",
47
-    "read.table", "write.csv", "write.table", "tail", "packageVersion")
48
-importFrom("Biostrings", "letterFrequency")
42
+importFrom("graphics", "abline", "axis", "boxplot", "contour", "hist","image", 
43
+    "legend", "lines", "par", "plot", "text", "mtext", "polygon","points")
44
+importFrom("stats", "C", "complete.cases", "dbeta", "dist", "dnorm", "dunif", 
45
+    "loess", "pbeta", "prcomp", "predict", "runif", "t.test", "weighted.mean", 
46
+    "dbinom", "fisher.test", "hclust", "cutree")
47
+importFrom("utils", "data", "head", "read.csv", "read.delim", "read.table", 
48
+    "write.csv", "write.table", "tail", "packageVersion")
49 49
new file mode 100644
... ...
@@ -0,0 +1,91 @@
1
+callLOH <- structure(function(# Get regions of LOH
2
+### This function provides detailed LOH information by region.
3
+res, 
4
+### Return object of the runAbsoluteCN() function.
5
+id=1, 
6
+### Candidate solution to extract LOH from. id=1 will  
7
+### use the maximum likelihood solution.
8
+arm.cutoff=0.9
9
+### Min fraction LOH on a chromosome arm to call 
10
+### whole arm events.
11
+) {
12
+    if (is.null(res$input$vcf)) {
13
+        .stopUserError("runAbsoluteCN was run without a VCF file.")
14
+    }
15
+    chr.hash <- res$input$chr.hash
16
+    armLocations <- .getArmLocations(res, 1)
17
+    loh <- res$results[[id]]$SNV.posterior$beta.model$loh$output
18
+    if (!nrow(armLocations)) {
19
+        warning("Centromere positions not available or matching.")
20
+        return(loh)
21
+    }
22
+    armLocationsGR <- GRanges(seqnames=armLocations$chrom,
23
+IRanges(start=armLocations$start, end=armLocations$end))
24
+    seg <- res$results[[id]]$seg
25
+
26
+    minorChrNumber <- cbind(
27
+        res$results[[id]]$SNV.posterior$beta.model$segment.ids,
28
+        res$results[[id]]$SNV.posterior$beta.model$posteriors$ML.M.Segment
29
+    )
30
+    minorChrNumber <- minorChrNumber[!duplicated(minorChrNumber[,1]),]
31
+    seg$M[minorChrNumber[,1]] <- minorChrNumber[,2]
32
+    seg <- seg[complete.cases(seg),]
33
+    seg$chrom <- .add.chr.name(seg$chrom, chr.hash)
34
+    segGR <- GRanges(seqnames=seg$chrom, IRanges(start=seg$loc.start,
35
+    end=seg$loc.end))
36
+
37
+    ov <- findOverlaps(armLocationsGR, segGR)
38
+    segLOH <-  cbind(seg[subjectHits(ov),], armLocations[queryHits(ov),])
39
+    segLOH$loc.start <- apply(segLOH[,c("loc.start", "start")],1,max)
40
+    segLOH$loc.end <- apply(segLOH[,c("loc.end", "end")],1,min)
41
+    segLOH$chrom <- paste(segLOH$chrom, segLOH$arm, sep="")
42
+    segLOH$size <- segLOH$loc.end-segLOH$loc.start+1
43
+    segLOH$fraction.arm <- round(segLOH$size/armLocations$size[match(segLOH$chrom,
44
+        paste(armLocations$chrom, armLocations$arm, sep=""))], digits=2)
45
+    segLOH$type <- ""
46
+    segLOH$type[segLOH$C %% 2 == 0 & segLOH$C > 1 & 
47
+        segLOH$M == 0] <- "COPY-NEUTRAL LOH"
48
+    segLOH$type[segLOH$type=="" & segLOH$M == 0] <- "LOH"
49
+    idx <- segLOH$fraction.arm > arm.cutoff & segLOH$type != ""
50
+    segLOH$type[idx] <- paste("WHOLE ARM",
51
+        segLOH$type)[idx]
52
+    rownames(segLOH) <- NULL
53
+    segLOH <- segLOH[, c("chrom", "loc.start", "loc.end", "C", "M", "type")]
54
+    # standardize colnames
55
+    colnames(segLOH)[1:3] <- c("chr", "start", "end")
56
+    segLOH
57
+### Returns data.frame with LOH regions.    
58
+}, ex=function() {
59
+data(purecn.example.output)
60
+head(callLOH(purecn.example.output))
61
+})
62
+
63
+.getArmLocations <- function(res, id) {
64
+    chr.hash <- res$input$chr.hash
65
+    centromeres <- res$input$centromeres
66
+
67
+    chromCoords <- t(vapply(split(
68
+        res$results[[id]]$SNV.posterior$beta.model$loh$data$maploc,
69
+        res$results[[id]]$SNV.posterior$beta.model$loh$data$chrom), function(x)
70
+        c(min(x), max(x)), c(min=double(1), max=double(1))))
71
+
72
+    centromeres <- cbind(centromeres, chromCoords[match(centromeres$chrom,
73
+        rownames(chromCoords)),])
74
+    centromeres <- centromeres[complete.cases(centromeres),]
75
+
76
+    pArms <- centromeres[centromeres$min < centromeres$chromStart,
77
+        c("chrom", "min", "chromStart")]
78
+    qArms <- centromeres[centromeres$max > centromeres$chromEnd,
79
+        c("chrom", "chromEnd", "max")]
80
+    
81
+    colnames(pArms) <- c("chrom", "start", "end")
82
+    colnames(qArms) <- colnames(pArms)
83
+    pArms$arm <- "p"
84
+    qArms$arm <- "q"
85
+    armLocations <- rbind(pArms, qArms)
86
+    armLocations <- armLocations[order(match(armLocations$chrom,
87
+        chr.hash[,1])),]
88
+    armLocations$size <- armLocations$end-armLocations$start+1
89
+    armLocations
90
+}
91
+
0 92
new file mode 100755
... ...
@@ -0,0 +1,6 @@
1
+test_callLOH <- function() {
2
+    data(purecn.example.output)
3
+    ret <- callLOH(purecn.example.output)
4
+    checkEquals("data.frame", class(ret))
5
+    checkEqualsNumeric(6, ncol(ret))
6
+}    
0 7
new file mode 100644
... ...
@@ -0,0 +1,24 @@
1
+\name{callLOH}
2
+\alias{callLOH}
3
+\title{Get regions of LOH}
4
+\description{This function provides detailed LOH information by region.}
5
+\usage{callLOH(res, id = 1, arm.cutoff = 0.9)}
6
+\arguments{
7
+  \item{res}{Return object of the runAbsoluteCN() function.}
8
+  \item{id}{Candidate solution to extract LOH from. id=1 will  
9
+use the maximum likelihood solution.}
10
+  \item{arm.cutoff}{Min fraction LOH on a chromosome arm to call 
11
+whole arm events.}
12
+}
13
+
14
+\value{Returns data.frame with LOH regions.    }
15
+
16
+\author{Markus Riester}
17
+
18
+
19
+
20
+
21
+\examples{
22
+data(purecn.example.output)
23
+head(callLOH(purecn.example.output))
24
+}
... ...
@@ -615,6 +615,17 @@ gene.calls <- callAlterations(ret)
615 615
 head(gene.calls)
616 616
 @
617 617
 
618
+\section{Find genomic regions in LOH}
619
+
620
+The \Rcode{gene.calls} data.frame described above provides gene-level LOH
621
+information. To find the corresponding regions in LOH, the \Rfunction{getLOH}
622
+provides a convenient way:
623
+
624
+<<loh>>=
625
+loh <- callLOH(ret)
626
+head(loh)
627
+@
628
+  
618 629
 \section{Power to detect somatic mutations}
619 630
 
620 631
 As final quality control step, we can test if coverage and tumor purity are