... | ... |
@@ -23,13 +23,6 @@ |
23 | 23 |
|
24 | 24 |
S4Vectors::setValidity2("TENxFile", .validTENxFile) |
25 | 25 |
|
26 |
-#' @exportClass TENxH5 |
|
27 |
-.TENxH5 <- setClass( |
|
28 |
- Class = "TENxH5", |
|
29 |
- contains = "TENxFile", |
|
30 |
- slots = c(version = "character", group = "character") |
|
31 |
-) |
|
32 |
- |
|
33 | 26 |
.TENxMTX <- setClass( |
34 | 27 |
Class = "TENxMTX", |
35 | 28 |
contains = "TENxFile" |
... | ... |
@@ -70,36 +63,3 @@ TENxFile <- function(resource, ...) { |
70 | 63 |
TENxFUN(resource = resource, extension = ext, ...) |
71 | 64 |
} |
72 | 65 |
|
73 |
-.get_h5_group <- function(fpath) { |
|
74 |
- if (!requireNamespace("rhdf5", quietly = TRUE)) |
|
75 |
- stop("Install 'rhdf5' to work with TENxH5") |
|
76 |
- l1 <- rhdf5::h5ls(fpath, recursive = FALSE) |
|
77 |
- l1[l1$otype == "H5I_GROUP", "name"] |
|
78 |
-} |
|
79 |
- |
|
80 |
-.KNOWN_H5_GROUPS <- c("matrix", "outs") |
|
81 |
- |
|
82 |
-.check_group_ok <- function(fpath) { |
|
83 |
- gname <- .get_h5_group(fpath) |
|
84 |
- if (!gname %in% .KNOWN_H5_GROUPS) |
|
85 |
- stop("'group' not recognized") |
|
86 |
- gname |
|
87 |
-} |
|
88 |
- |
|
89 |
-#' @rdname TENxH5 |
|
90 |
-#' @title Import H5 files from 10X |
|
91 |
-#' |
|
92 |
-#' @examples |
|
93 |
-#' |
|
94 |
-#' h5f <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_filtered_feature_bc_matrix.h5" |
|
95 |
-#' con <- TENxFile(h5f) |
|
96 |
-#' import(con) |
|
97 |
-#' |
|
98 |
-#' @export |
|
99 |
-TENxH5 <- |
|
100 |
- function(resource, version = c("3", "2"), ...) |
|
101 |
-{ |
|
102 |
- version <- match.arg(version) |
|
103 |
- group <- .check_group_ok(resource) |
|
104 |
- .TENxH5(resource = resource, group = group, version = version, ...) |
|
105 |
-} |
... | ... |
@@ -1,215 +1,6 @@ |
1 |
-#' @importFrom MatrixGenerics rowRanges |
|
2 | 1 |
#' @include TENxFileList-class.R |
3 | 2 |
NULL |
4 | 3 |
|
5 |
-gene.meta <- data.frame( |
|
6 |
- version = c("3", "2"), |
|
7 |
- ID = c("/features/id", "/genes"), |
|
8 |
- Symbol = c("/features/name", "/gene_names"), |
|
9 |
- Type = c("/features/feature_type", NA_character_) |
|
10 |
-) |
|
11 |
- |
|
12 |
-.getRowDat <- function(con) { |
|
13 |
- gm <- gene.meta[ |
|
14 |
- con@version == gene.meta[["version"]], |
|
15 |
- !names(gene.meta) %in% c("group", "version") |
|
16 |
- ] |
|
17 |
- gm[] <- Filter(Negate(is.na), gm) |
|
18 |
- res <- lapply(gm, function(colval) { |
|
19 |
- readname <- paste0(con@group, colval) |
|
20 |
- rhdf5::h5read(path(con), readname) |
|
21 |
- }) |
|
22 |
- as.data.frame(res) |
|
23 |
-} |
|
24 |
- |
|
25 |
-.getGenome <- function(con) { |
|
26 |
- gloc <- "matrix/features/genome" |
|
27 |
- gen <- unique(rhdf5::h5read(path(con), gloc)) |
|
28 |
- if (length(gen) != 1L) |
|
29 |
- stop("The genome build in ", gloc, " is not consistent") |
|
30 |
- gen |
|
31 |
-} |
|
32 |
- |
|
33 |
-#' @importFrom GenomeInfoDb genome genome<- |
|
34 |
-#' @importFrom S4Vectors mcols<- |
|
35 |
-.getRowRanges <- function(con) { |
|
36 |
- interval <- rhdf5::h5read(path(con), "matrix/features/interval") |
|
37 |
- interval[interval == "NA"] <- "NA_character_:0" |
|
38 |
- gr <- as(as.character(interval), "GRanges") |
|
39 |
- mcols(gr) <- .getRowDat(con) |
|
40 |
- genbuild <- rep(.getGenome(con), length(genome(gr))) |
|
41 |
- genome(gr) <- genbuild |
|
42 |
- gr |
|
43 |
-} |
|
44 |
- |
|
45 |
-#' @import SingleCellExperiment |
|
46 |
-#' @export |
|
47 |
-setMethod("import", "TENxH5", function(con, format, text, ...) { |
|
48 |
- if (!requireNamespace("HDF5Array", quietly = TRUE)) |
|
49 |
- stop("Install 'HDF5Array' to import TENxH5 files") |
|
50 |
- matrixdata <- HDF5Array::TENxMatrix(path(con), con@group) |
|
51 |
- if (identical(con@version, "3")) { |
|
52 |
- sce <- SingleCellExperiment::SingleCellExperiment( |
|
53 |
- assays = list(counts = matrixdata), rowRanges = .getRowRanges(con) |
|
54 |
- ) |
|
55 |
- rownames(sce) <- mcols(sce)[["ID"]] |
|
56 |
- splitAltExps(sce, rowData(sce)[["Type"]], ref = "Gene Expression") |
|
57 |
- } else { |
|
58 |
- stop("Version 2 not supported yet.") |
|
59 |
- } |
|
60 |
-}) |
|
61 |
- |
|
62 |
-#' @import SummarizedExperiment |
|
63 |
-#' @export |
|
64 |
-setMethod("import", "TENxMTX", function(con, format, text, ...) { |
|
65 |
- mtxf <- SingleCellMultiModal:::.read_mtx(path(con)) |
|
66 |
- ## TODO: make use of other files |
|
67 |
- SummarizedExperiment::SummarizedExperiment( |
|
68 |
- assays = SimpleList(counts = mtxf) |
|
69 |
- ) |
|
70 |
-}) |
|
71 |
- |
|
72 |
- |
|
73 |
-#' @importFrom utils untar tail |
|
74 |
-.TENxUntar <- function(con) { |
|
75 |
- dir.create(tempdir <- tempfile()) |
|
76 |
- untar(path(con), exdir = tempdir) |
|
77 |
- tempdir |
|
78 |
-} |
|
79 |
- |
|
80 |
-.readInFuns <- function(files) { |
|
81 |
- file_exts <- .get_ext(files) |
|
82 |
- lapply(.setNames(file_exts, basename(files)), function(ext) { |
|
83 |
- switch( |
|
84 |
- ext, |
|
85 |
- mtx.gz = Matrix::readMM, |
|
86 |
- tsv.gz = function(...) |
|
87 |
- readr::read_tsv(col_names = FALSE, show_col_types = FALSE, ...) |
|
88 |
- ) |
|
89 |
- }) |
|
90 |
-} |
|
91 |
- |
|
92 |
-.cleanUpFuns <- function(datalist) { |
|
93 |
- if (is.null(names(datalist))) |
|
94 |
- stop("'datalist' names must correspond to originating file names") |
|
95 |
- lapply(.setNames(nm = names(datalist)), function(fname) { |
|
96 |
- switch( |
|
97 |
- fname, |
|
98 |
- features.tsv.gz = function(df) { |
|
99 |
- names(df) <- c("ID", "Symbol", "Type", "Chr", "Start", "End") |
|
100 |
- df |
|
101 |
- }, |
|
102 |
- barcodes.tsv.gz = function(df) { |
|
103 |
- names(df) <- "barcode" |
|
104 |
- df |
|
105 |
- }, |
|
106 |
- matrix.mtx.gz = function(mat) { |
|
107 |
- as(mat, "dgCMatrix") |
|
108 |
- }, |
|
109 |
- ) |
|
110 |
- }) |
|
111 |
-} |
|
112 |
- |
|
113 |
-.TENxDecompress <- function(con) { |
|
114 |
- res_ext <- .get_ext(path(con)) |
|
115 |
- if (identical(res_ext, "tar.gz")) { |
|
116 |
- tenfolder <- .TENxUntar(con) |
|
117 |
- gfolder <- list.files(tenfolder, full.names = TRUE) |
|
118 |
- if (file.info(gfolder)$isdir) |
|
119 |
- gfiles <- list.files(gfolder, recursive = TRUE, full.names = TRUE) |
|
120 |
- else |
|
121 |
- gfiles <- gfolder |
|
122 |
- gdata <- Map( |
|
123 |
- f = function(reader, x) { |
|
124 |
- reader(x) |
|
125 |
- }, reader = .readInFuns(gfiles), x = gfiles |
|
126 |
- ) |
|
127 |
- Map(f = function(cleaner, x) { |
|
128 |
- cleaner(x) |
|
129 |
- }, cleaner = .cleanUpFuns(gdata), x = gdata |
|
130 |
- ) |
|
131 |
- } else { |
|
132 |
- stop("Extension type: ", res_ext, " not supported") |
|
133 |
- } |
|
134 |
-} |
|
135 |
- |
|
136 |
-#' @export |
|
137 |
-setMethod("import", "TENxFileList", function(con, format, text, ...) { |
|
138 |
- if (con@compressed) |
|
139 |
- fdata <- .TENxDecompress(con) |
|
140 |
- else |
|
141 |
- fdata <- con@listData |
|
142 |
- mat <- fdata[["matrix.mtx.gz"]] |
|
143 |
- colnames(mat) <- unlist(fdata[["barcodes.tsv.gz"]]) |
|
144 |
- warning("Matrix of mixed types; see in rowData(x)") |
|
145 |
- SingleCellExperiment::SingleCellExperiment( |
|
146 |
- SimpleList(counts = mat), |
|
147 |
- rowData = fdata[["features.tsv.gz"]] |
|
148 |
- ) |
|
149 |
-}) |
|
150 |
- |
|
151 |
-#' @importFrom MatrixGenerics rowRanges |
|
152 |
-#' @include TENxFileList-class.R |
|
153 |
-NULL |
|
154 |
- |
|
155 |
-gene.meta <- data.frame( |
|
156 |
- version = c("3", "2"), |
|
157 |
- ID = c("/features/id", "/genes"), |
|
158 |
- Symbol = c("/features/name", "/gene_names"), |
|
159 |
- Type = c("/features/feature_type", NA_character_) |
|
160 |
-) |
|
161 |
- |
|
162 |
-.getRowDat <- function(con) { |
|
163 |
- gm <- gene.meta[ |
|
164 |
- con@version == gene.meta[["version"]], |
|
165 |
- !names(gene.meta) %in% c("group", "version") |
|
166 |
- ] |
|
167 |
- gm[] <- Filter(Negate(is.na), gm) |
|
168 |
- res <- lapply(gm, function(colval) { |
|
169 |
- readname <- paste0(con@group, colval) |
|
170 |
- rhdf5::h5read(path(con), readname) |
|
171 |
- }) |
|
172 |
- as.data.frame(res) |
|
173 |
-} |
|
174 |
- |
|
175 |
-.getGenome <- function(con) { |
|
176 |
- gloc <- "matrix/features/genome" |
|
177 |
- gen <- unique(rhdf5::h5read(path(con), gloc)) |
|
178 |
- if (length(gen) != 1L) |
|
179 |
- stop("The genome build in ", gloc, " is not consistent") |
|
180 |
- gen |
|
181 |
-} |
|
182 |
- |
|
183 |
-#' @importFrom GenomeInfoDb genome genome<- |
|
184 |
-#' @importFrom S4Vectors mcols<- |
|
185 |
-.getRowRanges <- function(con) { |
|
186 |
- interval <- rhdf5::h5read(path(con), "matrix/features/interval") |
|
187 |
- interval[interval == "NA"] <- "NA_character_:0" |
|
188 |
- gr <- as(as.character(interval), "GRanges") |
|
189 |
- mcols(gr) <- .getRowDat(con) |
|
190 |
- genbuild <- rep(.getGenome(con), length(genome(gr))) |
|
191 |
- genome(gr) <- genbuild |
|
192 |
- gr |
|
193 |
-} |
|
194 |
- |
|
195 |
-#' @import SingleCellExperiment |
|
196 |
-#' @export |
|
197 |
-setMethod("import", "TENxH5", function(con, format, text, ...) { |
|
198 |
- if (!requireNamespace("HDF5Array", quietly = TRUE)) |
|
199 |
- stop("Install 'HDF5Array' to import TENxH5 files") |
|
200 |
- matrixdata <- HDF5Array::TENxMatrix(path(con), con@group) |
|
201 |
- if (identical(con@version, "3")) { |
|
202 |
- sce <- SingleCellExperiment::SingleCellExperiment( |
|
203 |
- assays = list(counts = matrixdata), rowRanges = .getRowRanges(con) |
|
204 |
- ) |
|
205 |
- rownames(sce) <- mcols(sce)[["ID"]] |
|
206 |
- sce <- sce[seqnames(rowRanges(sce)) != "NA_character_", ] |
|
207 |
- splitAltExps(sce, rowData(sce)[["Type"]], ref = "Gene Expression") |
|
208 |
- } else { |
|
209 |
- stop("Version 2 not supported yet.") |
|
210 |
- } |
|
211 |
-}) |
|
212 |
- |
|
213 | 4 |
#' @import SummarizedExperiment |
214 | 5 |
#' @export |
215 | 6 |
setMethod("import", "TENxMTX", function(con, format, text, ...) { |
... | ... |
@@ -299,20 +90,32 @@ setMethod("import", "TENxFileList", function(con, format, text, ...) { |
299 | 90 |
) |
300 | 91 |
}) |
301 | 92 |
|
93 |
+#' Importing peak-annotation files |
|
94 |
+#' |
|
95 |
+#' @keywords internal |
|
96 |
+#' |
|
302 | 97 |
#' @examples |
98 |
+#' |
|
303 | 99 |
#' fi <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_atac_peak_annotation.tsv" |
304 | 100 |
#' pa <- .import_peak_anno(fi) |
101 |
+#' |
|
305 | 102 |
.import_peak_anno <- function(file) { |
306 | 103 |
stopifnot(endsWith(file, "peak_annotation.tsv")) |
307 | 104 |
panno <- readr::read_tsv(file, col_types = c("c", "n", "n", "c", "n", "c")) |
308 | 105 |
makeGRangesFromDataFrame(panno, keep.extra.columns = TRUE) |
309 | 106 |
} |
310 | 107 |
|
108 |
+#' Importing fragment files |
|
109 |
+#' |
|
110 |
+#' @keywords internal |
|
111 |
+#' |
|
311 | 112 |
#' @examples |
113 |
+#' |
|
312 | 114 |
#' con <- TENxFile("~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_filtered_feature_bc_matrix.h5") |
313 | 115 |
#' sce <- import(con) |
314 | 116 |
#' fr <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_atac_fragments.tsv.gz" |
315 | 117 |
#' fra <- .import_fragments(fr) |
118 |
+#' |
|
316 | 119 |
.import_fragments <- function(file, withIndex = TRUE) { |
317 | 120 |
## read tabix format with Rsamtools |
318 | 121 |
stopifnot(endsWith(file, "atac_fragments.tsv.gz")) |
... | ... |
@@ -328,7 +131,9 @@ setMethod("import", "TENxFileList", function(con, format, text, ...) { |
328 | 131 |
## names from table in the link below |
329 | 132 |
## https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/output/fragments |
330 | 133 |
names(ex) <- c("chrom", "chromStart", "chromEnd", "barcode", "readSupport") |
331 |
- ggr <- makeGRangesFromDataFrame(ex, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE) |
|
134 |
+ ggr <- makeGRangesFromDataFrame( |
|
135 |
+ ex, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE |
|
136 |
+ ) |
|
332 | 137 |
ggrl <- as(splitAsList(ggr, mcols(ggr)$barcode), "GRangesList") |
333 | 138 |
RaggedExperiment(ggrl) |
334 | 139 |
# compactAssay(erg, i = "readSupport", sparse = TRUE) |
... | ... |
@@ -32,3 +32,81 @@ TENxFileList <- function(..., compressed = FALSE) { |
32 | 32 |
asl <- SimpleList(dots[[1]]) |
33 | 33 |
.TENxFileList(asl, compressed = compressed) |
34 | 34 |
} |
35 |
+ |
|
36 |
+#' @importFrom utils untar tail |
|
37 |
+.TENxUntar <- function(con) { |
|
38 |
+ dir.create(tempdir <- tempfile()) |
|
39 |
+ untar(path(con), exdir = tempdir) |
|
40 |
+ tempdir |
|
41 |
+} |
|
42 |
+ |
|
43 |
+.readInFuns <- function(files) { |
|
44 |
+ file_exts <- .get_ext(files) |
|
45 |
+ lapply(.setNames(file_exts, basename(files)), function(ext) { |
|
46 |
+ switch( |
|
47 |
+ ext, |
|
48 |
+ mtx.gz = Matrix::readMM, |
|
49 |
+ tsv.gz = function(...) |
|
50 |
+ readr::read_tsv(col_names = FALSE, show_col_types = FALSE, ...) |
|
51 |
+ ) |
|
52 |
+ }) |
|
53 |
+} |
|
54 |
+ |
|
55 |
+.cleanUpFuns <- function(datalist) { |
|
56 |
+ if (is.null(names(datalist))) |
|
57 |
+ stop("'datalist' names must correspond to originating file names") |
|
58 |
+ lapply(.setNames(nm = names(datalist)), function(fname) { |
|
59 |
+ switch( |
|
60 |
+ fname, |
|
61 |
+ features.tsv.gz = function(df) { |
|
62 |
+ names(df) <- c("ID", "Symbol", "Type", "Chr", "Start", "End") |
|
63 |
+ df |
|
64 |
+ }, |
|
65 |
+ barcodes.tsv.gz = function(df) { |
|
66 |
+ names(df) <- "barcode" |
|
67 |
+ df |
|
68 |
+ }, |
|
69 |
+ matrix.mtx.gz = function(mat) { |
|
70 |
+ as(mat, "dgCMatrix") |
|
71 |
+ }, |
|
72 |
+ ) |
|
73 |
+ }) |
|
74 |
+} |
|
75 |
+ |
|
76 |
+.TENxDecompress <- function(con) { |
|
77 |
+ res_ext <- .get_ext(path(con)) |
|
78 |
+ if (identical(res_ext, "tar.gz")) { |
|
79 |
+ tenfolder <- .TENxUntar(con) |
|
80 |
+ gfolder <- list.files(tenfolder, full.names = TRUE) |
|
81 |
+ if (file.info(gfolder)$isdir) |
|
82 |
+ gfiles <- list.files(gfolder, recursive = TRUE, full.names = TRUE) |
|
83 |
+ else |
|
84 |
+ gfiles <- gfolder |
|
85 |
+ gdata <- Map( |
|
86 |
+ f = function(reader, x) { |
|
87 |
+ reader(x) |
|
88 |
+ }, reader = .readInFuns(gfiles), x = gfiles |
|
89 |
+ ) |
|
90 |
+ Map(f = function(cleaner, x) { |
|
91 |
+ cleaner(x) |
|
92 |
+ }, cleaner = .cleanUpFuns(gdata), x = gdata |
|
93 |
+ ) |
|
94 |
+ } else { |
|
95 |
+ stop("Extension type: ", res_ext, " not supported") |
|
96 |
+ } |
|
97 |
+} |
|
98 |
+ |
|
99 |
+#' @export |
|
100 |
+setMethod("import", "TENxFileList", function(con, format, text, ...) { |
|
101 |
+ if (con@compressed) |
|
102 |
+ fdata <- .TENxDecompress(con) |
|
103 |
+ else |
|
104 |
+ fdata <- con@listData |
|
105 |
+ mat <- fdata[["matrix.mtx.gz"]] |
|
106 |
+ colnames(mat) <- unlist(fdata[["barcodes.tsv.gz"]]) |
|
107 |
+ warning("Matrix of mixed types; see in rowData(x)") |
|
108 |
+ SingleCellExperiment::SingleCellExperiment( |
|
109 |
+ SimpleList(counts = mat), |
|
110 |
+ rowData = fdata[["features.tsv.gz"]] |
|
111 |
+ ) |
|
112 |
+}) |
35 | 113 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,100 @@ |
1 |
+#' @importFrom MatrixGenerics rowRanges |
|
2 |
+NULL |
|
3 |
+ |
|
4 |
+#' @exportClass TENxH5 |
|
5 |
+.TENxH5 <- setClass( |
|
6 |
+ Class = "TENxH5", |
|
7 |
+ contains = "TENxFile", |
|
8 |
+ slots = c(version = "character", group = "character") |
|
9 |
+) |
|
10 |
+ |
|
11 |
+.get_h5_group <- function(fpath) { |
|
12 |
+ if (!requireNamespace("rhdf5", quietly = TRUE)) |
|
13 |
+ stop("Install 'rhdf5' to work with TENxH5") |
|
14 |
+ l1 <- rhdf5::h5ls(fpath, recursive = FALSE) |
|
15 |
+ l1[l1$otype == "H5I_GROUP", "name"] |
|
16 |
+} |
|
17 |
+ |
|
18 |
+.KNOWN_H5_GROUPS <- c("matrix", "outs") |
|
19 |
+ |
|
20 |
+.check_group_ok <- function(fpath) { |
|
21 |
+ gname <- .get_h5_group(fpath) |
|
22 |
+ if (!gname %in% .KNOWN_H5_GROUPS) |
|
23 |
+ stop("'group' not recognized") |
|
24 |
+ gname |
|
25 |
+} |
|
26 |
+ |
|
27 |
+#' @rdname TENxH5 |
|
28 |
+#' @title Import H5 files from 10X |
|
29 |
+#' |
|
30 |
+#' @examples |
|
31 |
+#' |
|
32 |
+#' h5f <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_filtered_feature_bc_matrix.h5" |
|
33 |
+#' con <- TENxFile(h5f) |
|
34 |
+#' import(con) |
|
35 |
+#' |
|
36 |
+#' @export |
|
37 |
+TENxH5 <- |
|
38 |
+ function(resource, version = c("3", "2"), ...) |
|
39 |
+{ |
|
40 |
+ version <- match.arg(version) |
|
41 |
+ group <- .check_group_ok(resource) |
|
42 |
+ .TENxH5(resource = resource, group = group, version = version, ...) |
|
43 |
+} |
|
44 |
+ |
|
45 |
+gene.meta <- data.frame( |
|
46 |
+ version = c("3", "2"), |
|
47 |
+ ID = c("/features/id", "/genes"), |
|
48 |
+ Symbol = c("/features/name", "/gene_names"), |
|
49 |
+ Type = c("/features/feature_type", NA_character_) |
|
50 |
+) |
|
51 |
+ |
|
52 |
+.getRowDat <- function(con) { |
|
53 |
+ gm <- gene.meta[ |
|
54 |
+ con@version == gene.meta[["version"]], |
|
55 |
+ !names(gene.meta) %in% c("group", "version") |
|
56 |
+ ] |
|
57 |
+ gm[] <- Filter(Negate(is.na), gm) |
|
58 |
+ res <- lapply(gm, function(colval) { |
|
59 |
+ readname <- paste0(con@group, colval) |
|
60 |
+ rhdf5::h5read(path(con), readname) |
|
61 |
+ }) |
|
62 |
+ as.data.frame(res) |
|
63 |
+} |
|
64 |
+ |
|
65 |
+.getGenome <- function(con) { |
|
66 |
+ gloc <- "matrix/features/genome" |
|
67 |
+ gen <- unique(rhdf5::h5read(path(con), gloc)) |
|
68 |
+ if (length(gen) != 1L) |
|
69 |
+ stop("The genome build in ", gloc, " is not consistent") |
|
70 |
+ gen |
|
71 |
+} |
|
72 |
+ |
|
73 |
+#' @importFrom GenomeInfoDb genome genome<- |
|
74 |
+#' @importFrom S4Vectors mcols<- |
|
75 |
+.getRowRanges <- function(con) { |
|
76 |
+ interval <- rhdf5::h5read(path(con), "matrix/features/interval") |
|
77 |
+ interval[interval == "NA"] <- "NA_character_:0" |
|
78 |
+ gr <- as(as.character(interval), "GRanges") |
|
79 |
+ mcols(gr) <- .getRowDat(con) |
|
80 |
+ genbuild <- rep(.getGenome(con), length(genome(gr))) |
|
81 |
+ genome(gr) <- genbuild |
|
82 |
+ gr |
|
83 |
+} |
|
84 |
+ |
|
85 |
+#' @import SingleCellExperiment |
|
86 |
+#' @export |
|
87 |
+setMethod("import", "TENxH5", function(con, format, text, ...) { |
|
88 |
+ if (!requireNamespace("HDF5Array", quietly = TRUE)) |
|
89 |
+ stop("Install 'HDF5Array' to import TENxH5 files") |
|
90 |
+ matrixdata <- HDF5Array::TENxMatrix(path(con), con@group) |
|
91 |
+ if (identical(con@version, "3")) { |
|
92 |
+ sce <- SingleCellExperiment::SingleCellExperiment( |
|
93 |
+ assays = list(counts = matrixdata), rowRanges = .getRowRanges(con) |
|
94 |
+ ) |
|
95 |
+ rownames(sce) <- mcols(sce)[["ID"]] |
|
96 |
+ splitAltExps(sce, rowData(sce)[["Type"]], ref = "Gene Expression") |
|
97 |
+ } else { |
|
98 |
+ stop("Version 2 not supported yet.") |
|
99 |
+ } |
|
100 |
+}) |
6 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/TENxFile-methods.R |
|
3 |
+\name{.import_fragments} |
|
4 |
+\alias{.import_fragments} |
|
5 |
+\title{Importing fragment files} |
|
6 |
+\usage{ |
|
7 |
+.import_fragments(file, withIndex = TRUE) |
|
8 |
+} |
|
9 |
+\description{ |
|
10 |
+Importing fragment files |
|
11 |
+} |
|
12 |
+\examples{ |
|
13 |
+ |
|
14 |
+con <- TENxFile("~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_filtered_feature_bc_matrix.h5") |
|
15 |
+sce <- import(con) |
|
16 |
+fr <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_atac_fragments.tsv.gz" |
|
17 |
+fra <- .import_fragments(fr) |
|
18 |
+ |
|
19 |
+} |
|
20 |
+\keyword{internal} |
0 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/TENxFile-methods.R |
|
3 |
+\name{.import_peak_anno} |
|
4 |
+\alias{.import_peak_anno} |
|
5 |
+\title{Importing peak-annotation files} |
|
6 |
+\usage{ |
|
7 |
+.import_peak_anno(file) |
|
8 |
+} |
|
9 |
+\description{ |
|
10 |
+Importing peak-annotation files |
|
11 |
+} |
|
12 |
+\examples{ |
|
13 |
+ |
|
14 |
+fi <- "~/data/10x/pbmc_3k/pbmc_granulocyte_sorted_3k_atac_peak_annotation.tsv" |
|
15 |
+pa <- .import_peak_anno(fi) |
|
16 |
+ |
|
17 |
+} |
|
18 |
+\keyword{internal} |