#' @title Run PARADIGM on multi-omic data #' #' @param real_se A SummarizedExperiment object of PARADIGM CNA and RNA states. #' It is the same matrix as the output from `ppRealInp()`. #' #' @param fpth Name of a pathway file for PARADIGM. #' #' @param outdir Output folder to save all results. #' #' @param PARADIGM_bin PARADIGM binary, which can be downloaded from #' https://github.com/sng87/paradigm-scripts/tree/master/public/exe. Note that #' the binary is only available for Linux or MacOS. Default: NULL #' #' @param nohup_bin nohup binary, which is used for long running PARADIGM jobs. #' Default: NULL #' #' @param sampleids A vector of sample IDs to run PARADIGM on. If not provided, #' all the samples that exist in both copy-number alteration #' and RNA files will be ran. Default: NULL #' #' @param file_tag A string of output file name tag. Default: NULL #' #' @inheritParams ppRnaInp #' #' @usage runPrd(real_se, fpth, outdir, PARADIGM_bin=NULL, nohup_bin=NULL, #' sampleids=NULL, file_tag=NULL, threads=1) #' #' @return None #' #' @export #' #' @examples #' #' freal = system.file('extdata/TcgaInp/inp_real.rds', package='MPAC') #' real_se = readRDS(freal) #' #' fpth = system.file('extdata/Pth/tiny_pth.txt', package='MPAC') #' outdir = tempdir() #' paradigm_bin = '/path/to/PARADIGM' ## change to binary location #' #' # depends on external PARADIGM binary #' runPrd(real_se, fpth, outdir, paradigm_bin, sampleids=c('TCGA-CV-7100')) #' #' @importFrom BiocParallel bplapply #' @importFrom SummarizedExperiment assays #' runPrd <- function(real_se, fpth, outdir, PARADIGM_bin=NULL, nohup_bin=NULL, sampleids=NULL, file_tag=NULL, threads=1) { if ( is.null(sampleids) ) { sampleids <- colnames(real_se) } prots <- rownames(real_se) |> sort() cn_state_mat <- assays(real_se)$CN_state |> _[prots, sampleids, drop=FALSE] |> t() rna_state_mat <- assays(real_se)$RNA_state |> _[prots, sampleids, drop=FALSE] |> t() # jntl <- findJntPatProt(cn_state_mat, rna_state_mat, fpth, sampleids) if ( ! file.exists(outdir)) dir.create(outdir, recursive=TRUE) pats <- rownames(cn_state_mat) dummy <- bplapply(pats, runPrdByPat, cn_state_mat, rna_state_mat, fpth, outdir, PARADIGM_bin, nohup_bin, file_tag, BPPARAM=getBPPARAM(threads)) } #' @title Run PARADIGM on permuted data #' #' @param perml A list of SummarizedExperiment objects of permuted CNA and #' RNA states generated by `ppPermInp()`. #' #' @inheritParams runPrd #' #' @usage runPermPrd(perml, fpth, outdir, #' PARADIGM_bin=NULL, nohup_bin=NULL, sampleids=NULL, threads=1) #' #' @return None #' #' @export #' #' @examples #' #' fperm = system.file('extdata/TcgaInp/inp_perm.rds', package='MPAC') #' perml = readRDS(fperm) #' fpth = system.file('extdata/Pth/tiny_pth.txt', package='MPAC') #' outdir = tempdir() #' paradigm_bin = '/path/to/PARADIGM' ## change to binary location #' pat = 'TCGA-CV-7100' #' #' # depends on external PARADIGM binary, do not run #' runPermPrd(perml, fpth, outdir, paradigm_bin, sampleids=c(pat)) #' #' @importFrom BiocParallel bplapply #' @importFrom S4Vectors metadata #' runPermPrd <- function(perml, fpth, outdir, PARADIGM_bin=NULL, nohup_bin=NULL, sampleids=NULL, threads=1) { dummy <- lapply(perml, function(perm_se) { iperm_tag <- paste0('p', metadata(perm_se)$i) iperm_outdir <- paste0(outdir, '/', iperm_tag, '/') if ( is.null(sampleids) ) { sampleids <- colnames(perm_se) } runPrd(perm_se, fpth, iperm_outdir, PARADIGM_bin, nohup_bin, sampleids, threads, file_tag=metadata(perm_se)$i) }) } runPrdByPat <- function(pat, cnmat, rnamat, fpth, outdir, PARADIGM_bin, nohup_bin, file_tag=NULL) { outl <- ppRunPrd(pat, cnmat, rnamat, outdir, file_tag) fcfg <- outl$fcfg fcpt <- outl$fcpt fipl <- outl$fipl out_prefix <- outl$out_prefix runPARADIGM(fcfg, fcpt, fipl, out_prefix, fpth, PARADIGM_bin, nohup_bin) } #' @title Prepare required files to run PARADIGM #' #' @param pat Sample ID #' #' @param cnmat CN matrix #' #' @param rnamat RNA matrix #' #' @inheritParams runPrd #' #' @usage ppRunPrd(pat, cnmat, rnamat, outdir, file_tag=NULL) #' #' @return None #' #' @export #' ppRunPrd <- function(pat, cnmat, rnamat, outdir, file_tag=NULL) { fone_samp <- tag <- NULL if ( is.null(file_tag) ) { file_tag <- pat } else { file_tag <- paste0(pat, '_', file_tag) } out_prefix <- paste0(outdir, '/', file_tag, '_') omicdt <- data.table( rbind( c( 'genome', 'inp_focal' ), c( 'mRNA', 'inp_log10fpkmP1' ) )) |> setnames( c('type', 'tag') ) |> _[, fone_samp := paste0(out_prefix, tag, '.tsv')] type2mat <- list( 'genome' = cnmat[ pat, , drop=FALSE], 'mRNA' = rnamat[pat, , drop=FALSE] ) fcfg <- paste0(out_prefix, 'cfg.txt') fcpt <- paste0(out_prefix, 'cpt.txt') fipl <- paste0(out_prefix, 'ipl.txt') Map(prepOmic, omicdt$type, omicdt$fone_samp, MoreArgs=list(type2mat)) prepConfig(fcfg, omicdt) list(fcfg=fcfg, fcpt=fcpt, fipl=fipl, out_prefix=out_prefix) } prepOmic <- function(type, fone_samp, type2mat) { type2mat[[type]] |> as.data.table(keep.rownames=TRUE) |> setnames('rn', 'pat') |> fwrite(fone_samp, quote=FALSE, sep="\t") } runPARADIGM <- function(fcfg, fcpt, fipl, out_prefix, fpth, PARADIGM_bin, nohup_bin){ args <- c( '-p', fpth, '-c', fcfg, '-b', dirname(out_prefix) |> paste0('/'), '-e', fcpt, '-o', fipl, '-m 1' ## max_mem_in_G ) fout <- paste0(out_prefix, 'run.out') ferr <- paste0(out_prefix, 'run.err') if ( is.null(PARADIGM_bin) ) { paste0("Please provide PARADIGM binary. See vignette for details\n") |> cat() } else if ( ! file.exists(PARADIGM_bin) ) { paste0('PARADIGM binary not found:', PARADIGM_bin, "\n") |> cat() } else { ## cat(PARADIGM_bin, args, "\n") if ( is.null(nohup_bin) ) { system2(PARADIGM_bin, args=args, stdout=fout, stderr=ferr) } else { args <- c(PARADIGM_bin, args) system2(nohup_bin, args=args, stdout=fout, stderr=ferr) } } } prepConfig <- function(fcfg, omicdt) { s_obss <- c() s_evis <- c() options(scipen=10) ## for cnv_cutoff == 0.0001 for ( i in seq_len(nrow(omicdt)) ) { fname_one_samp <- basename(omicdt[i]$fone_samp) s_obss <- c(s_obss, paste0(fname_one_samp, '=-obs>')) s_evis <- c(s_evis, paste0('evidence [suffix=', fname_one_samp, ',node=', omicdt[i]$type, ',disc=-0.5;0.5', ',epsilon=0.001,epsilon0=0.2]')) } paste0( "pathway [max_in_degree=5]\n", "inference [method=BP,updates=SEQFIX,tol=1e-9,maxiter=10000,", "logdomain=0]\n", 'em_step [', paste(s_obss, collapse=','), "]\n", "em [max_iters=10000,log_z_tol=1e-10]\n", paste(s_evis, collapse="\n") ) |> write(file=fcfg) }