9726003e |
#include <R.h>
#include <Rdefines.h>
#include <Rinternals.h>
#include <Rmath.h>
#include <R_ext/Rdynload.h>
|
ac4a7b9e |
#include <cli/progress.h>
|
9726003e |
/* from Mutils.h */
static R_INLINE
SEXP ALLOC_SLOT(SEXP obj, SEXP nm, SEXPTYPE type, int length)
{
SEXP val = allocVector(type, length);
SET_SLOT(obj, nm, val);
return val;
}
/* global variables */
extern SEXP Matrix_DimNamesSym,
Matrix_DimSym,
Matrix_xSym,
Matrix_iSym,
Matrix_jSym,
Matrix_pSym;
SEXP
|
f6e9f529 |
ecdfvals_sparse_to_sparse_R(SEXP XCspR, SEXP XRspR, SEXP verboseR);
|
9726003e |
SEXP
|
f6e9f529 |
ecdfvals_sparse_to_dense_R(SEXP XCspR, SEXP XRspR, SEXP verboseR);
|
9726003e |
SEXP
|
f6e9f529 |
ecdfvals_dense_to_dense_R(SEXP XR, SEXP verboseR);
|
9726003e |
|
5d3c2348 |
SEXP
ecdfvals_dense_to_dense_nas_R(SEXP XR, SEXP verboseR);
|
9726003e |
SEXP
match_int(SEXP x, SEXP table);
|
f6e9f529 |
int
pending_interrupt(void);
|
9726003e |
SEXP
allocMatrix(SEXPTYPE mode, int nrow, int ncol);
|
a815a208 |
/* numerical (double) comparison for qsort() - not needed with R_qsort() */
|
9726003e |
int
dbl_cmp(const void* a, const void* b) {
const double *ad = (double*) a;
const double *bd = (double*) b;
if (*ad > *bd)
return 1;
if (*ad < *bd)
return -1;
return 0;
}
/* calculate empirical cumulative distribution function values
* on the nonzero entries (only) from the input sparse matrix,
* which should be provided in both, the compressed sparse column
* (CSC) and the compressed sparse row (CSR) formats, where the latter
* is used for speeding up the scanning of the rows of the input matrix.
* the returned value is a sparse (CSC) matrix.
*/
SEXP
|
ac4a7b9e |
ecdfvals_sparse_to_sparse_R(SEXP XCspR, SEXP XRspR, SEXP verboseR) {
|
9726003e |
SEXP ecdfRobj;
int* XCsp_dim;
int* XCsp_i;
int* XCsp_p;
double* XCsp_x;
int* XRsp_j;
int* XRsp_p;
double* XRsp_x;
int nnz = length(GET_SLOT(XCspR, Matrix_xSym));
|
ac4a7b9e |
Rboolean verbose=asLogical(verboseR);
|
9726003e |
int* ecdfRobj_dim;
int* ecdfRobj_i;
int* ecdfRobj_p;
double* ecdfRobj_x;
int nr, nc;
|
ac4a7b9e |
SEXP pb = R_NilValue;
int nunprotect=0;
|
9726003e |
|
ac4a7b9e |
PROTECT(XCspR); nunprotect++;
PROTECT(XRspR); nunprotect++;
|
9726003e |
XCsp_dim = INTEGER(GET_SLOT(XCspR, Matrix_DimSym));
nr = XCsp_dim[0]; /* number of rows */
nc = XCsp_dim[1]; /* number of columns */
XCsp_i = INTEGER(GET_SLOT(XCspR, Matrix_iSym));
XCsp_p = INTEGER(GET_SLOT(XCspR, Matrix_pSym));
XCsp_x = REAL(GET_SLOT(XCspR, Matrix_xSym));
XRsp_j = INTEGER(GET_SLOT(XRspR, Matrix_jSym));
XRsp_p = INTEGER(GET_SLOT(XRspR, Matrix_pSym));
XRsp_x = REAL(GET_SLOT(XRspR, Matrix_xSym));
/* create a new dgCMatrix object (CSC) to store the result,
* copying the i and p slots from the input CsparseMatrix,
* and allocating memory for the x slot */
|
ac4a7b9e |
ecdfRobj = PROTECT(NEW_OBJECT(MAKE_CLASS("dgCMatrix"))); nunprotect++;
|
9726003e |
ecdfRobj_dim = INTEGER(ALLOC_SLOT(ecdfRobj, Matrix_DimSym, INTSXP, 2));
ecdfRobj_dim[0] = nr;
ecdfRobj_dim[1] = nc;
ecdfRobj_i = INTEGER(ALLOC_SLOT(ecdfRobj, Matrix_iSym, INTSXP, nnz));
ecdfRobj_p = INTEGER(ALLOC_SLOT(ecdfRobj, Matrix_pSym, INTSXP, nc+1));
ecdfRobj_x = REAL(ALLOC_SLOT(ecdfRobj, Matrix_xSym, REALSXP, nnz));
Memcpy(ecdfRobj_i, XCsp_i, (size_t) nnz);
Memcpy(ecdfRobj_p, XCsp_p, (size_t) (nc+1));
Memcpy(ecdfRobj_x, XCsp_x, (size_t) nnz);
|
ac4a7b9e |
if (verbose) {
pb = PROTECT(cli_progress_bar(nr, NULL));
cli_progress_set_name(pb, "Estimating ECDFs");
nunprotect++;
}
|
9726003e |
for (int i=0; i < nr; i++) {
SEXP xR, uniqvR;
int nv, nuniqv;
double* x;
double* uniqv;
double* ecdfuniqv;
int sum;
double* e1_p;
const double* e2_p;
int* mt;
int* tab;
|
ac4a7b9e |
if (verbose) { /* show progress */
if (i % 100 == 0 && CLI_SHOULD_TICK)
cli_progress_set(pb, i);
}
|
9726003e |
/* number of nonzero values in the i-th row */
nv = XRsp_p[i+1]-XRsp_p[i];
/* remove consecutive repeated elements */
/* consider adding LONG_VECTOR_SUPPORT */
PROTECT(uniqvR = allocVector(REALSXP, nv));
PROTECT(xR = allocVector(REALSXP, nv));
uniqv = REAL(uniqvR);
x = REAL(xR);
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i];
uniqv[k] = x[k] = XRsp_x[j];
}
|
a815a208 |
/* qsort(uniqv, nv, sizeof(double), dbl_cmp); */
R_qsort(uniqv, (size_t) 1, (size_t) nv);
|
9726003e |
e1_p = uniqv;
e2_p = e1_p + 1;
nuniqv = 0;
|
a815a208 |
if (nv > 0)
nuniqv = 1;
for (int j=0; j < nv-1; j++) { /* -1 for e2_p = e1_p + 1 */
|
9726003e |
if (*e2_p != *e1_p) {
*(++e1_p) = *e2_p;
nuniqv++;
}
e2_p++;
}
/* match original values to sorted unique values */
/* consider adding LONG_VECTOR_SUPPORT */
mt = INTEGER(match_int(xR, uniqvR)); /* 1-based! */
/* tabulate matches */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
tab = R_Calloc(nuniqv, int); /* assuming zeroes are set */
|
9726003e |
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i];
if (mt[k] > 0 && mt[k] <= nuniqv)
tab[mt[k] - 1]++;
}
/* cumulative sum to calculate ecdf values */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
ecdfuniqv = R_Calloc(nuniqv, double); /* assuming zeroes are set */
|
9726003e |
sum = 0;
for (int j=0; j < nuniqv; j++) {
sum = sum + tab[j];
ecdfuniqv[j] = ((double) sum) / ((double) nv);
}
/* set ecdf values on the corresponding positions
* of the output CSC matrix */
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i]; /* index value at ecdf */
int col = XRsp_j[j]; /* zero-based col index */
int nnz2col = XCsp_p[col]; /* nonzero values to col */
int s; /* shift to row in the col */
int XCsp_idx; /* index value at CSC */
s = 0;
while (XCsp_i[nnz2col+s] != i && nnz2col+s < XCsp_p[col+1])
s++;
XCsp_idx = nnz2col + s;
ecdfRobj_x[XCsp_idx] = ecdfuniqv[mt[k]-1];
}
|
a7c603bf |
R_Free(ecdfuniqv);
R_Free(tab);
|
9726003e |
UNPROTECT(2); /* xR uniqvR */
}
|
ac4a7b9e |
if (verbose)
cli_progress_done(pb);
UNPROTECT(nunprotect); /* XCspR XRspR ecdfRobj pb */
|
9726003e |
return(ecdfRobj);
}
/* calculate empirical cumulative distribution function values
* on the zero and nonzero entries from the input sparse matrix,
* which should be provided in both, the compressed sparse column
* (CSC) and the compressed sparse row (CSR) formats, where the latter
* is used for speeding up the scanning of the rows of the input matrix.
* the returned value is a dense matrix.
*/
SEXP
|
f6e9f529 |
ecdfvals_sparse_to_dense_R(SEXP XCspR, SEXP XRspR, SEXP verboseR) {
|
9726003e |
SEXP ecdfRobj;
double* ecdf_vals;
int* XCsp_dim;
int* XRsp_j;
int* XRsp_p;
|
f6e9f529 |
Rboolean verbose=asLogical(verboseR);
|
9726003e |
double* XRsp_x;
int nr, nc;
|
f6e9f529 |
SEXP pb = R_NilValue;
int nunprotect=0;
|
9726003e |
|
f6e9f529 |
PROTECT(XCspR); nunprotect++;
PROTECT(XRspR); nunprotect++;
|
9726003e |
XCsp_dim = INTEGER(GET_SLOT(XCspR, Matrix_DimSym));
nr = XCsp_dim[0]; /* number of rows */
nc = XCsp_dim[1]; /* number of columns */
XRsp_j = INTEGER(GET_SLOT(XRspR, Matrix_jSym));
XRsp_p = INTEGER(GET_SLOT(XRspR, Matrix_pSym));
XRsp_x = REAL(GET_SLOT(XRspR, Matrix_xSym));
/* create a new dense matrix object to store the result,
* if nr * nc > INT_MAX and LONG_VECTOR_SUPPORT is not
* available, the function allocMatrix() will prompt an error */
|
f6e9f529 |
ecdfRobj = PROTECT(allocMatrix(REALSXP, nr, nc)); nunprotect++;
if (verbose) {
pb = PROTECT(cli_progress_bar(nr, NULL));
cli_progress_set_name(pb, "Estimating ECDFs");
nunprotect++;
}
|
9726003e |
for (int i=0; i < nr; i++) {
SEXP xR, uniqvR;
int nv, nuniqv;
double* x;
double* uniqv;
double* ecdfuniqv;
int sum;
double* e1_p;
const double* e2_p;
int* mt;
int* tab;
Rboolean zeroes=FALSE;
int whz, icz;
|
f6e9f529 |
if (verbose) { /* show progress */
if (i % 100 == 0 && CLI_SHOULD_TICK)
cli_progress_set(pb, i);
}
|
9726003e |
/* number of nonzero values in the i-th row */
nv = XRsp_p[i+1]-XRsp_p[i];
if (nv < nc) { /* if there is at least one zero in the row */
nv++; /* add that zero as an extra possible value */
zeroes=TRUE;
}
/* remove consecutive repeated elements */
/* consider adding LONG_VECTOR_SUPPORT */
PROTECT(uniqvR = allocVector(REALSXP, nv));
PROTECT(xR = allocVector(REALSXP, zeroes ? nv-1 : nv));
uniqv = REAL(uniqvR);
x = REAL(xR);
if (zeroes) { /* if there is at least one zero in the row */
uniqv[0] = 0; /* add that zero as an extra possible value */
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i];
uniqv[k+1] = XRsp_x[j];
x[k] = XRsp_x[j];
}
} else {
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i];
uniqv[k] = x[k] = XRsp_x[j];
}
}
|
a815a208 |
/* qsort(uniqv, nv, sizeof(double), dbl_cmp); */
R_qsort(uniqv, (size_t) 1, (size_t) nv);
|
9726003e |
e1_p = uniqv;
e2_p = e1_p + 1;
nuniqv = 0;
|
a815a208 |
if (nv > 0)
nuniqv = 1;
for (int j=0; j < nv-1; j++) { /* -1 for e2_p = e1_p + 1 */
|
9726003e |
if (*e2_p != *e1_p) {
*(++e1_p) = *e2_p;
nuniqv++;
}
e2_p++;
}
/* match original values to sorted unique values */
/* consider adding LONG_VECTOR_SUPPORT */
mt = INTEGER(match_int(xR, uniqvR)); /* 1-based! */
/* tabulate matches */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
tab = R_Calloc(nuniqv, int); /* assuming zeroes are set */
|
9726003e |
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i];
if (mt[k] > 0 && mt[k] <= nuniqv)
tab[mt[k] - 1]++;
}
whz = -1;
if (zeroes) { /* if there is at least one zero in the row */
int j = 0;
while (j < nuniqv && tab[j] != 0)
j++;
if (j < nuniqv && tab[j] == 0) { /* add the number of zeroes */
tab[j] = nc - nv + 1; /* +1 b/c one zero is in nv */
whz = j;
}
}
/* cumulative sum to calculate ecdf values */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
ecdfuniqv = R_Calloc(nuniqv, double); /* assuming zeroes are set */
|
9726003e |
sum = 0;
for (int j=0; j < nuniqv; j++) {
sum = sum + tab[j];
ecdfuniqv[j] = ((double) sum) / ((double) nc);
}
/* set ecdf values on the corresponding positions
* of the output dense matrix */
ecdf_vals = REAL(ecdfRobj);
icz = 0; /* zero-based index of the columns at zeroes */
for (int j=XRsp_p[i]; j < XRsp_p[i+1]; j++) {
int k = j - XRsp_p[i]; /* index value at ecdf */
int col = XRsp_j[j]; /* zero-based col index */
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idx = nr * col + i;
#else
int idx = nr * col + i;
#endif
while (icz < col) { /* fill up the zero columns */
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idxz = nr * icz + i;
#else
int idxz = nr * icz + i;
#endif
ecdf_vals[idxz] = ecdfuniqv[whz];
icz++;
}
icz = col+1;
ecdf_vals[idx] = ecdfuniqv[mt[k]-1];
}
for (int j=icz; j < nc; j++) { /* fill up remaining zero columns */
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idxz = nr * j + i;
#else
int idxz = nr * j + i;
#endif
ecdf_vals[idxz] = ecdfuniqv[whz];
}
|
a7c603bf |
R_Free(ecdfuniqv);
R_Free(tab);
|
9726003e |
UNPROTECT(2); /* xR uniqvR */
}
|
f6e9f529 |
if (verbose)
cli_progress_done(pb);
UNPROTECT(nunprotect); /* XCspR XRspR ecdfRobj pb */
|
9726003e |
return(ecdfRobj);
}
/* calculate empirical cumulative distribution function values
* on the zero and nonzero entries from the input dense matrix,
* the returned value is a dense matrix.
*/
SEXP
|
f6e9f529 |
ecdfvals_dense_to_dense_R(SEXP XR, SEXP verboseR) {
|
9726003e |
double* X;
|
f6e9f529 |
Rboolean verbose=asLogical(verboseR);
|
9726003e |
int nr, nc;
SEXP ecdfRobj;
double* ecdf_vals;
|
f6e9f529 |
SEXP pb = R_NilValue;
int nunprotect=0;
|
9726003e |
|
f6e9f529 |
PROTECT(XR); nunprotect++;
|
9726003e |
nr = INTEGER(getAttrib(XR, R_DimSymbol))[0]; /* number of rows */
nc = INTEGER(getAttrib(XR, R_DimSymbol))[1]; /* number of columns */
X = REAL(XR);
/* create a new dense matrix object to store the result,
* if nr * nc > INT_MAX and LONG_VECTOR_SUPPORT is not
* available, the function allocMatrix() will prompt an error */
|
f6e9f529 |
ecdfRobj = PROTECT(allocMatrix(REALSXP, nr, nc)); nunprotect++;
if (verbose) {
pb = PROTECT(cli_progress_bar(nr, NULL));
cli_progress_set_name(pb, "Estimating ECDFs");
nunprotect++;
}
|
9726003e |
for (int i=0; i < nr; i++) {
SEXP xR, uniqvR;
int nuniqv;
double* x;
double* uniqv;
double* ecdfuniqv;
int sum;
double* e1_p;
const double* e2_p;
int* mt;
int* tab;
|
f6e9f529 |
if (verbose) { /* show progress */
if (i % 100 == 0 && CLI_SHOULD_TICK)
cli_progress_set(pb, i);
}
|
9726003e |
/* remove consecutive repeated elements */
PROTECT(uniqvR = allocVector(REALSXP, nc));
PROTECT(xR = allocVector(REALSXP, nc));
uniqv = REAL(uniqvR);
x = REAL(xR);
for (int j=0; j < nc; j++) {
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idx = nr * j + i;
#else
int idx = nr * j + i;
#endif
uniqv[j] = x[j] = X[idx];
}
|
a815a208 |
/* qsort(uniqv, nc, sizeof(double), dbl_cmp); */
R_qsort(uniqv, (size_t) 1, (size_t) nc);
|
9726003e |
e1_p = uniqv;
e2_p = e1_p + 1;
nuniqv = 0;
|
a815a208 |
if (nc > 0)
nuniqv = 1;
for (int j=0; j < nc-1; j++) { /* -1 for e2_p = e1_p + 1 */
|
9726003e |
if (*e2_p != *e1_p) {
*(++e1_p) = *e2_p;
nuniqv++;
}
e2_p++;
}
/* match original values to sorted unique values */
/* consider adding LONG_VECTOR_SUPPORT */
mt = INTEGER(match_int(xR, uniqvR)); /* 1-based! */
/* tabulate matches */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
tab = R_Calloc(nuniqv, int); /* assuming zeroes are set */
|
9726003e |
for (int j=0; j < nc; j++)
if (mt[j] > 0 && mt[j] <= nuniqv)
tab[mt[j] - 1]++;
/* cumulative sum to calculate ecdf values */
/* consider adding LONG_VECTOR_SUPPORT */
|
a7c603bf |
ecdfuniqv = R_Calloc(nuniqv, double); /* assuming zeroes are set */
|
9726003e |
sum = 0;
for (int j=0; j < nuniqv; j++) {
sum = sum + tab[j];
ecdfuniqv[j] = ((double) sum) / ((double) nc);
}
/* set ecdf values on the corresponding positions
* of the output dense matrix */
ecdf_vals = REAL(ecdfRobj);
for (int j=0; j < nc; j++) {
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idx = nr * j + i;
#else
int idx = nr * j + i;
#endif
ecdf_vals[idx] = ecdfuniqv[mt[j]-1];
}
|
a7c603bf |
R_Free(ecdfuniqv);
R_Free(tab);
|
9726003e |
UNPROTECT(2); /* xR uniqvR */
}
|
f6e9f529 |
if (verbose)
cli_progress_done(pb);
UNPROTECT(nunprotect); /* XR ecdfRobj pb */
|
9726003e |
return(ecdfRobj);
}
|
5d3c2348 |
/* calculate empirical cumulative distribution function values
* on the zero and nonzero entries from the input dense matrix,
* when missing (NA) values are present, the returned value is
* a dense matrix.
*/
SEXP
ecdfvals_dense_to_dense_nas_R(SEXP XR, SEXP verboseR) {
double* X;
Rboolean verbose=asLogical(verboseR);
int nr, nc;
SEXP ecdfRobj;
double* ecdf_vals;
SEXP pb = R_NilValue;
int nunprotect=0;
PROTECT(XR); nunprotect++;
nr = INTEGER(getAttrib(XR, R_DimSymbol))[0]; /* number of rows */
nc = INTEGER(getAttrib(XR, R_DimSymbol))[1]; /* number of columns */
X = REAL(XR);
/* create a new dense matrix object to store the result,
* if nr * nc > INT_MAX and LONG_VECTOR_SUPPORT is not
* available, the function allocMatrix() will prompt an error */
ecdfRobj = PROTECT(allocMatrix(REALSXP, nr, nc)); nunprotect++;
if (verbose) {
pb = PROTECT(cli_progress_bar(nr, NULL));
cli_progress_set_name(pb, "Estimating ECDFs");
nunprotect++;
}
for (int i=0; i < nr; i++) {
SEXP xR, uniqvR;
int nuniqv;
double* x;
double* uniqv;
double* ecdfuniqv;
int sum;
double* e1_p;
const double* e2_p;
int* mt;
int* tab;
int nnas;
if (verbose) { /* show progress */
if (i % 100 == 0 && CLI_SHOULD_TICK)
cli_progress_set(pb, i);
}
/* remove consecutive repeated elements */
PROTECT(uniqvR = allocVector(REALSXP, nc));
PROTECT(xR = allocVector(REALSXP, nc));
uniqv = REAL(uniqvR);
x = REAL(xR);
nnas = 0; /* non-NA values */
for (int j=0; j < nc; j++) {
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idx = nr * j + i;
#else
int idx = nr * j + i;
#endif
x[j] = X[idx];
if (!ISNA(x[j])) {
uniqv[nnas] = x[j];
nnas++;
}
}
|
a815a208 |
/* qsort(uniqv, nnas, sizeof(double), dbl_cmp); */
R_qsort(uniqv, (size_t) 1, (size_t) nnas);
|
5d3c2348 |
e1_p = uniqv;
e2_p = e1_p + 1;
nuniqv = 0;
|
a815a208 |
if (nnas > 0)
nuniqv = 1;
for (int j=0; j < nnas-1; j++) { /* -1 for e2_p = e1_p + 1 */
|
5d3c2348 |
if (*e2_p != *e1_p) {
*(++e1_p) = *e2_p;
nuniqv++;
}
e2_p++;
}
/* match original values to sorted unique values */
/* consider adding LONG_VECTOR_SUPPORT */
mt = INTEGER(match_int(xR, uniqvR)); /* 1-based! */
/* tabulate matches */
/* consider adding LONG_VECTOR_SUPPORT */
tab = R_Calloc(nuniqv, int); /* assuming zeroes are set */
for (int j=0; j < nc; j++)
|
52fcbfd6 |
if (mt[j] != NA_INTEGER && mt[j] > 0 && mt[j] <= nuniqv)
|
5d3c2348 |
tab[mt[j] - 1]++;
/* cumulative sum to calculate ecdf values */
/* consider adding LONG_VECTOR_SUPPORT */
ecdfuniqv = R_Calloc(nuniqv, double); /* assuming zeroes are set */
sum = 0;
for (int j=0; j < nuniqv; j++) {
sum = sum + tab[j];
ecdfuniqv[j] = ((double) sum) / ((double) nc);
}
/* set ecdf values on the corresponding positions
* of the output dense matrix */
ecdf_vals = REAL(ecdfRobj);
for (int j=0; j < nc; j++) {
#ifdef LONG_VECTOR_SUPPORT
R_xlen_t idx = nr * j + i;
#else
int idx = nr * j + i;
#endif
if (!ISNA(X[idx]))
ecdf_vals[idx] = ecdfuniqv[mt[j]-1];
else
ecdf_vals[idx] = NA_REAL;
}
R_Free(ecdfuniqv);
R_Free(tab);
UNPROTECT(2); /* xR uniqvR */
}
if (verbose)
cli_progress_done(pb);
UNPROTECT(nunprotect); /* XR ecdfRobj pb */
return(ecdfRobj);
}
|