... | ... |
@@ -109,10 +109,14 @@ compute.gene.cdf <- function(expr, sample.idxs, Gaussk=TRUE, kernel=TRUE, |
109 | 109 |
else |
110 | 110 |
gene.cdf <- .ecdfvals_sparse_to_dense(expr[, sample.idxs, drop=FALSE], |
111 | 111 |
verbose) |
112 |
- } else if (is.matrix(expr)) |
|
113 |
- gene.cdf <- .ecdfvals_dense_to_dense(expr[, sample.idxs, drop=FALSE], |
|
112 |
+ } else if (is.matrix(expr)) { |
|
113 |
+ if (any_na) |
|
114 |
+ gene.cdf <- .ecdfvals_dense_to_dense_nas(expr[, sample.idxs, drop=FALSE], |
|
115 |
+ verbose) |
|
116 |
+ else |
|
117 |
+ gene.cdf <- .ecdfvals_dense_to_dense(expr[, sample.idxs, drop=FALSE], |
|
114 | 118 |
verbose) |
115 |
- else |
|
119 |
+ } else |
|
116 | 120 |
stop(sprintf("Matrix class %s cannot be handled yet.", class(expr))) |
117 | 121 |
} |
118 | 122 |
|
... | ... |
@@ -973,22 +977,20 @@ setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), |
973 | 977 |
walkStat <- .gsvaRndWalk_nas(gSetIdx, decOrdStat, symRnkStat, |
974 | 978 |
tau, na_use, minSize, wna_env) |
975 | 979 |
maxDev <- c(NA_real_, NA_real_) |
976 |
- if (na_use == "na.rm") |
|
977 |
- maxDev <- c(max(c(0, max(walkStat, na.rm=TRUE))), |
|
978 |
- min(c(0, min(walkStat, na.rm=TRUE)))) |
|
979 |
- else |
|
980 |
- maxDev <- c(max(c(0, max(walkStat))), |
|
981 |
- min(c(0, min(walkStat)))) |
|
980 |
+ if (any(!is.na(walkStat))) { |
|
981 |
+ if (na_use == "na.rm") |
|
982 |
+ maxDev <- c(max(c(0, max(walkStat, na.rm=TRUE))), |
|
983 |
+ min(c(0, min(walkStat, na.rm=TRUE)))) |
|
984 |
+ else |
|
985 |
+ maxDev <- c(max(c(0, max(walkStat))), |
|
986 |
+ min(c(0, min(walkStat)))) |
|
987 |
+ } |
|
982 | 988 |
maxDev |
983 | 989 |
}, rnkstats$dos, rnkstats$srs, na_use) |
984 | 990 |
md <- do.call("rbind", md) |
985 | 991 |
if (maxDiff && absRanking) |
986 | 992 |
md[, 2] <- -1 * md[, 2] |
987 |
- sco <- rep(NA_real_, length(geneSetsIdx)) |
|
988 |
- if (na_use == "na.rm") |
|
989 |
- sco <- rowSums(md, na.rm=TRUE) |
|
990 |
- else |
|
991 |
- sco <- rowSums(md) |
|
993 |
+ sco <- rowSums(md) |
|
992 | 994 |
if (!maxDiff) { |
993 | 995 |
mask <- is.na(sco) |
994 | 996 |
sco[!mask] <- md[cbind(1:sum(!mask), ifelse(sco[!mask] > 0, |
... | ... |
@@ -1026,22 +1028,20 @@ setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), |
1026 | 1028 |
walkStat <- .gsvaRndWalk_nas(gSetIdx, decOrdStat, symRnkStat, |
1027 | 1029 |
tau, na_use, minSize, wna_env) |
1028 | 1030 |
maxDev <- c(NA_real_, NA_real_) |
1029 |
- if (na_use == "na.rm") |
|
1030 |
- maxDev <- c(max(c(0, max(walkStat, na.rm=TRUE))), |
|
1031 |
- min(c(0, min(walkStat, na.rm=TRUE)))) |
|
1032 |
- else |
|
1033 |
- maxDev <- c(max(c(0, max(walkStat))), |
|
1034 |
- min(c(0, min(walkStat)))) |
|
1031 |
+ if (any(!is.na(walkStat))) { |
|
1032 |
+ if (na_use == "na.rm") |
|
1033 |
+ maxDev <- c(max(c(0, max(walkStat, na.rm=TRUE))), |
|
1034 |
+ min(c(0, min(walkStat, na.rm=TRUE)))) |
|
1035 |
+ else |
|
1036 |
+ maxDev <- c(max(c(0, max(walkStat))), |
|
1037 |
+ min(c(0, min(walkStat)))) |
|
1038 |
+ } |
|
1035 | 1039 |
maxDev |
1036 | 1040 |
}, rnkstats$dos, rnkstats$srs, na_use) |
1037 | 1041 |
md <- do.call("rbind", md) |
1038 | 1042 |
if (maxDiff && absRanking) |
1039 | 1043 |
md[, 2] <- -1 * md[, 2] |
1040 |
- sco <- rep(NA_real_, length(geneSetsIdx)) |
|
1041 |
- if (na_use == "na.rm") |
|
1042 |
- sco <- rowSums(md, na.rm=TRUE) |
|
1043 |
- else |
|
1044 |
- sco <- rowSums(md) |
|
1044 |
+ sco <- rowSums(md) |
|
1045 | 1045 |
if (!maxDiff) { |
1046 | 1046 |
mask <- is.na(sco) |
1047 | 1047 |
sco[!mask] <- md[cbind(1:sum(!mask), ifelse(sco[!mask] > 0, |
... | ... |
@@ -1225,6 +1225,12 @@ setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), |
1225 | 1225 |
.Call("ecdfvals_dense_to_dense_R", X, verbose) |
1226 | 1226 |
} |
1227 | 1227 |
|
1228 |
+.ecdfvals_dense_to_dense_nas <- function(X, verbose) { |
|
1229 |
+ stopifnot(is.matrix(X)) ## QC |
|
1230 |
+ stopifnot(is.logical(verbose)) ## QC |
|
1231 |
+ .Call("ecdfvals_dense_to_dense_nas_R", X, verbose) |
|
1232 |
+} |
|
1233 |
+ |
|
1228 | 1234 |
.kcdfvals_sparse_to_sparse <- function(X, Gaussk, verbose) { |
1229 | 1235 |
stopifnot(is(X, "CsparseMatrix")) ## QC |
1230 | 1236 |
Xrsp <- as(X, "RsparseMatrix") |
... | ... |
@@ -32,6 +32,9 @@ ecdfvals_sparse_to_dense_R(SEXP XCspR, SEXP XRspR, SEXP verboseR); |
32 | 32 |
SEXP |
33 | 33 |
ecdfvals_dense_to_dense_R(SEXP XR, SEXP verboseR); |
34 | 34 |
|
35 |
+SEXP |
|
36 |
+ecdfvals_dense_to_dense_nas_R(SEXP XR, SEXP verboseR); |
|
37 |
+ |
|
35 | 38 |
SEXP |
36 | 39 |
match_int(SEXP x, SEXP table); |
37 | 40 |
|
... | ... |
@@ -513,3 +516,134 @@ ecdfvals_dense_to_dense_R(SEXP XR, SEXP verboseR) { |
513 | 516 |
|
514 | 517 |
return(ecdfRobj); |
515 | 518 |
} |
519 |
+ |
|
520 |
+/* calculate empirical cumulative distribution function values |
|
521 |
+ * on the zero and nonzero entries from the input dense matrix, |
|
522 |
+ * when missing (NA) values are present, the returned value is |
|
523 |
+ * a dense matrix. |
|
524 |
+ */ |
|
525 |
+SEXP |
|
526 |
+ecdfvals_dense_to_dense_nas_R(SEXP XR, SEXP verboseR) { |
|
527 |
+ double* X; |
|
528 |
+ Rboolean verbose=asLogical(verboseR); |
|
529 |
+ int nr, nc; |
|
530 |
+ SEXP ecdfRobj; |
|
531 |
+ double* ecdf_vals; |
|
532 |
+ SEXP pb = R_NilValue; |
|
533 |
+ int nunprotect=0; |
|
534 |
+ |
|
535 |
+ PROTECT(XR); nunprotect++; |
|
536 |
+ |
|
537 |
+ nr = INTEGER(getAttrib(XR, R_DimSymbol))[0]; /* number of rows */ |
|
538 |
+ nc = INTEGER(getAttrib(XR, R_DimSymbol))[1]; /* number of columns */ |
|
539 |
+ X = REAL(XR); |
|
540 |
+ |
|
541 |
+ /* create a new dense matrix object to store the result, |
|
542 |
+ * if nr * nc > INT_MAX and LONG_VECTOR_SUPPORT is not |
|
543 |
+ * available, the function allocMatrix() will prompt an error */ |
|
544 |
+ ecdfRobj = PROTECT(allocMatrix(REALSXP, nr, nc)); nunprotect++; |
|
545 |
+ |
|
546 |
+ if (verbose) { |
|
547 |
+ pb = PROTECT(cli_progress_bar(nr, NULL)); |
|
548 |
+ cli_progress_set_name(pb, "Estimating ECDFs"); |
|
549 |
+ nunprotect++; |
|
550 |
+ } |
|
551 |
+ |
|
552 |
+ for (int i=0; i < nr; i++) { |
|
553 |
+ SEXP xR, uniqvR; |
|
554 |
+ int nuniqv; |
|
555 |
+ double* x; |
|
556 |
+ double* uniqv; |
|
557 |
+ double* ecdfuniqv; |
|
558 |
+ int sum; |
|
559 |
+ double* e1_p; |
|
560 |
+ const double* e2_p; |
|
561 |
+ int* mt; |
|
562 |
+ int* tab; |
|
563 |
+ int nnas; |
|
564 |
+ |
|
565 |
+ if (verbose) { /* show progress */ |
|
566 |
+ if (i % 100 == 0 && CLI_SHOULD_TICK) |
|
567 |
+ cli_progress_set(pb, i); |
|
568 |
+ } |
|
569 |
+ |
|
570 |
+ /* remove consecutive repeated elements */ |
|
571 |
+ PROTECT(uniqvR = allocVector(REALSXP, nc)); |
|
572 |
+ PROTECT(xR = allocVector(REALSXP, nc)); |
|
573 |
+ uniqv = REAL(uniqvR); |
|
574 |
+ x = REAL(xR); |
|
575 |
+ nnas = 0; /* non-NA values */ |
|
576 |
+ for (int j=0; j < nc; j++) { |
|
577 |
+#ifdef LONG_VECTOR_SUPPORT |
|
578 |
+ R_xlen_t idx = nr * j + i; |
|
579 |
+#else |
|
580 |
+ int idx = nr * j + i; |
|
581 |
+#endif |
|
582 |
+ x[j] = X[idx]; |
|
583 |
+ if (!ISNA(x[j])) { |
|
584 |
+ uniqv[nnas] = x[j]; |
|
585 |
+ nnas++; |
|
586 |
+ } |
|
587 |
+ } |
|
588 |
+ |
|
589 |
+ qsort(uniqv, nnas, sizeof(double), dbl_cmp); |
|
590 |
+ e1_p = uniqv; |
|
591 |
+ e2_p = e1_p + 1; |
|
592 |
+ nuniqv = 0; |
|
593 |
+ for (int j=0; j < nnas; j++) { |
|
594 |
+ if (*e2_p != *e1_p) { |
|
595 |
+ *(++e1_p) = *e2_p; |
|
596 |
+ nuniqv++; |
|
597 |
+ } |
|
598 |
+ e2_p++; |
|
599 |
+ } |
|
600 |
+ |
|
601 |
+ /* match original values to sorted unique values */ |
|
602 |
+ /* consider adding LONG_VECTOR_SUPPORT */ |
|
603 |
+ mt = INTEGER(match_int(xR, uniqvR)); /* 1-based! */ |
|
604 |
+ |
|
605 |
+ /* tabulate matches */ |
|
606 |
+ /* consider adding LONG_VECTOR_SUPPORT */ |
|
607 |
+ tab = R_Calloc(nuniqv, int); /* assuming zeroes are set */ |
|
608 |
+ for (int j=0; j < nc; j++) |
|
609 |
+ if (!ISNA(mt[j]) && mt[j] > 0 && mt[j] <= nuniqv) |
|
610 |
+ tab[mt[j] - 1]++; |
|
611 |
+ |
|
612 |
+ /* cumulative sum to calculate ecdf values */ |
|
613 |
+ /* consider adding LONG_VECTOR_SUPPORT */ |
|
614 |
+ ecdfuniqv = R_Calloc(nuniqv, double); /* assuming zeroes are set */ |
|
615 |
+ sum = 0; |
|
616 |
+ for (int j=0; j < nuniqv; j++) { |
|
617 |
+ sum = sum + tab[j]; |
|
618 |
+ ecdfuniqv[j] = ((double) sum) / ((double) nc); |
|
619 |
+ } |
|
620 |
+ |
|
621 |
+ /* set ecdf values on the corresponding positions |
|
622 |
+ * of the output dense matrix */ |
|
623 |
+ ecdf_vals = REAL(ecdfRobj); |
|
624 |
+ |
|
625 |
+ for (int j=0; j < nc; j++) { |
|
626 |
+#ifdef LONG_VECTOR_SUPPORT |
|
627 |
+ R_xlen_t idx = nr * j + i; |
|
628 |
+#else |
|
629 |
+ int idx = nr * j + i; |
|
630 |
+#endif |
|
631 |
+ if (!ISNA(X[idx])) |
|
632 |
+ ecdf_vals[idx] = ecdfuniqv[mt[j]-1]; |
|
633 |
+ else |
|
634 |
+ ecdf_vals[idx] = NA_REAL; |
|
635 |
+ } |
|
636 |
+ |
|
637 |
+ R_Free(ecdfuniqv); |
|
638 |
+ R_Free(tab); |
|
639 |
+ |
|
640 |
+ UNPROTECT(2); /* xR uniqvR */ |
|
641 |
+ } |
|
642 |
+ |
|
643 |
+ if (verbose) |
|
644 |
+ cli_progress_done(pb); |
|
645 |
+ |
|
646 |
+ UNPROTECT(nunprotect); /* XR ecdfRobj pb */ |
|
647 |
+ |
|
648 |
+ return(ecdfRobj); |
|
649 |
+} |
... | ... |
@@ -29,6 +29,8 @@ ecdfvals_sparse_to_dense_R(SEXP XCspR, SEXP XRspR, SEXP verboseR); |
29 | 29 |
SEXP |
30 | 30 |
ecdfvals_dense_to_dense_R(SEXP XR, SEXP verboseR); |
31 | 31 |
|
32 |
+SEXP |
|
33 |
+ecdfvals_dense_to_dense_nas_R(SEXP XR, SEXP verboseR); |
|
32 | 34 |
|
33 | 35 |
SEXP |
34 | 36 |
order_rankstat_R(SEXP xR); |
... | ... |
@@ -56,6 +58,7 @@ static R_CallMethodDef callMethods[] = { |
56 | 58 |
{"ecdfvals_sparse_to_sparse_R", (DL_FUNC) &ecdfvals_sparse_to_sparse_R, 3}, |
57 | 59 |
{"ecdfvals_sparse_to_dense_R", (DL_FUNC) &ecdfvals_sparse_to_dense_R, 3}, |
58 | 60 |
{"ecdfvals_dense_to_dense_R", (DL_FUNC) &ecdfvals_dense_to_dense_R, 2}, |
61 |
+ {"ecdfvals_dense_to_dense_nas_R", (DL_FUNC) &ecdfvals_dense_to_dense_nas_R, 2}, |
|
59 | 62 |
{"gsva_rnd_walk_R", (DL_FUNC) &gsva_rnd_walk_R, 3}, |
60 | 63 |
{"gsva_scores_genesets_R", (DL_FUNC) &gsva_score_genesets_R, 5}, |
61 | 64 |
{"order_rankstat_sparse_to_dense_R", (DL_FUNC) &order_rankstat_sparse_to_dense_R, 2}, |