Browse code

Added needed functions for survival

Jean-Philippe Fortin authored on 10/02/2025 18:06:55
Showing 1 changed files

... ...
@@ -289,8 +289,355 @@ iplot <- function(x,
289 289
 }
290 290
 
291 291
 
292
+getZPrimeFactor <- function(x,y){
293
+    top <- sd(x)+sd(y)
294
+    bottom <- abs(mean(x)-mean(y))
295
+    factor <- 1-3*top/bottom
296
+    factor
297
+}
298
+
299
+
300
+#' @export
301
+#' @importFrom plotly plot_ly
302
+#' @importFrom plotly layout
303
+#' @importFrom magrittr %>%
304
+iplot <- function(x,y,labels,
305
+ xlab="", ylab="",
306
+ title="", col=NULL,
307
+ xlim=c(-10,10),
308
+ ylim=c(0,40)
309
+ ){
310
+  temp <- data.frame(xx=x, yy=y, label=labels)
311
+  zeroline <- TRUE
312
+  type = "scatter"
313
+  mode="markers"
314
+  pal <- c("grey75","firebrick2")
315
+
316
+  p <- plot_ly(data = temp, x = ~xx, y = ~yy, 
317
+             text=temp$label,
318
+             type=type,
319
+             mode=mode,
320
+             color=col, colors=pal,
321
+             source="volcano",
322
+             selected=list(marker=list(color='blue'))
323
+             #marker=list(color~col)
324
+  )
325
+  
326
+  p <- p %>%
327
+    plotly::layout(title = title,
328
+           xaxis = list(zeroline = zeroline, title=xlab, range=xlim),
329
+           yaxis = list(zeroline = zeroline, title=ylab, range=ylim)
330
+           )
331
+  p
332
+}
333
+
334
+getOverlap <- function(ranks1, ranks2){
335
+    kk <- 1:length(ranks1)
336
+    r <- factor(pmax(ranks1, ranks2), levels=kk)
337
+    tab <- table(r)
338
+    cs <- cumsum(tab)
339
+    cs/kk
340
+}
341
+
342
+
292 343
 
293 344
 
294 345
 
346
+#' Quick function to extract a subset of reads from NGS fastq files
347
+#' 
348
+#' Quick function to extract a subset of reads from NGS fastq files.
349
+#'
350
+#' @param ngs String specifying NGS project number (eg. "ngs3096").
351
+#'     Reads from the sample specified by \code{sample_index} will be 
352
+#'     read into R. If NULL, \code{file} must be provided instead. 
353
+#' @param file String specifying path to a FASTQ file from which reads will
354
+#'     be read from. Must be specified if \code{ngs} is NULL. 
355
+#' @param samid String specifying the SAMID of the sample to be considered.
356
+#'     Must be part of the NGS project specified by \code{ngs}.
357
+#' @param n_reads Integer specifying number of reads that should be returned.
358
+#'     1000 by default.
359
+#' @param sample_index Integer specifying which sample should be considered
360
+#'     from the list of files available for a given project specified by the 
361
+#'     \code{ngs}. 
362
+#' @param read_mate String specifying if reads from R1 or R2 files should
363
+#'     be returned. Must be either "R1" or "R2". R1" by default. 
364
+#'
365
+#' @return Character vector of sequencing reads.
366
+#'
367
+#' @author Jean-Philippe Fortin
368
+#' @examples
369
+#' \dontrun{
370
+#'     reads <- extractReads(ngs="ngs3565")
371
+#' }
372
+#' @export
373
+#' @importFrom gneDB ngsprojFastq
374
+#' @importFrom readr read_lines
375
+extractReads <- function(ngs=NULL,
376
+                         file=NULL,
377
+                         samid=NULL, 
378
+                         n_reads=1000, 
379
+                         sample_index=1,
380
+                         read_mate=c("R1", "R2")
381
+){
382
+
383
+    read_mate <- match.arg(read_mate)
384
+   
385
+
386
+    .readsFromFilePartial <- function(file,
387
+                                      n_reads=1000){
388
+        tempFile  <- paste0(tempfile(), ".gz")
389
+        tempFile2 <- gsub(".gz","", tempFile)
390
+        tempFile3 <- gsub(".gz",".txt", tempFile)
391
+        file.copy(file, tempFile, overwrite=TRUE)
392
+        #system(paste0("gunzip -f ", tempFile))
393
+        cmd <- paste0("zcat ",
394
+                      tempFile,
395
+                      " | head -",
396
+                      .makeLongInteger(n_reads*4),
397
+                      " > ",tempFile3)
398
+        system(cmd)
399
+        reads  <- readr::read_lines(tempFile3,
400
+                                    n_max = .makeLongInteger(n_reads*4))
401
+        wh <- seq(2,length(reads),4)
402
+        wh <- wh[wh<length(reads)]
403
+        reads <- reads[wh]
404
+        return(reads)
405
+    }
406
+    
407
+    .readsFromFileComplete <- function(file){
408
+        tempFile <- paste0(tempfile(), ".gz")
409
+        tempFile2 <- gsub(".gz","", tempFile)
410
+        tempFile3 <- gsub(".gz",".txt", tempFile)
411
+        file.copy(file, tempFile, overwrite=TRUE)
412
+        #system(paste0("gunzip -f ", tempFile))
413
+        cmd <- paste0("gunzip -f ", tempFile2)
414
+        system(cmd)
415
+        reads  <- readr::read_lines(tempFile2)
416
+        wh <- seq(2,length(reads),4)
417
+        wh <- wh[wh<=length(reads)]
418
+        reads <- reads[wh]
419
+        return(reads)
420
+    }
421
+    if (is.null(file) & !is.null(ngs)){
422
+        fastq.df = ngsprojFastq(ngs,collapse=FALSE)
423
+        if (read_mate=="R1"){
424
+            fastqs <- fastq.df$READ1_FILE
425
+        } else {
426
+            fastqs <- fastq.df$READ2_FILE
427
+        }
428
+        if (!is.null(samid)){
429
+            fastqs <- fastqs[grepl(samid, fastqs)]
430
+            file <- fastqs[1]
431
+        }  else {
432
+            file <- fastqs[sample_index]  
433
+        }
434
+    } else if (is.null(file) & is.null(ngs)){
435
+        stop("file or ngs must be provided. ")
436
+    }
437
+    if (!file.exists(file)){
438
+        stop("File does not exist.")
439
+    }
440
+    if (!is.null(n_reads)){
441
+        reads <- .readsFromFilePartial(file,
442
+                                       n_reads=n_reads)
443
+    } else {
444
+        reads <- .readsFromFileComplete(file)
445
+    }
446
+    return(reads)
447
+}
448
+
449
+
450
+#' Quick function to extract barcodes from a list of reads
451
+#' 
452
+#' Quick function to extract barcodes from a list of reads for testing 
453
+#'     purposes. 
454
+#'
455
+#' @param reads Character vector of sequencing reads.
456
+#' @param flank5 String containing the constant sequence on the 5'
457
+#'     flank of the barcode region.
458
+#' @param flank3 String containing the constant sequence on the 3'
459
+#'     flank of the barcode region.
460
+#' @param leftOnly Should only flank5 be considered? 
461
+#'     FALSE by default.
462
+#' @param barcode_len Length of the barcodes to be retrieved.
463
+#' @param ignore_barcode_len Should any barcode length be returned?
464
+#'     FALSE by default. 
465
+#'
466
+#' @return Character vector of barcodes.
467
+#'
468
+#' @author Jean-Philippe Fortin
469
+#' @examples
470
+#' \dontrun{
471
+#'     reads <- extractReads(ngs="ngs3565")
472
+#'     barcodes <- extractBarcodes(reads, flank5="TACCG", flank3="")
473
+#' }
474
+#' @export
475
+#' @importFrom stringr str_extract
476
+extractBarcodes <- function(reads, 
477
+                            flank5="AGTTCG", 
478
+                            flank3="TTCGGACTGT",
479
+                            leftOnly=FALSE, 
480
+                            barcode_len=19,
481
+                            ignore_barcode_len=FALSE
482
+){
483
+    # Only keeping reads that have flanking sequences:
484
+    good1 <- grepl(flank5, reads)
485
+    good2 <- grepl(flank3, reads)
486
+    if (leftOnly){
487
+        good <- which(good1)
488
+    } else {
489
+        good <- which(good1 & good2)
490
+    }
491
+    reads <- reads[good]
492
+    nleft <- nchar(flank5)
493
+    if (!ignore_barcode_len){
494
+        barcodes <- str_extract(reads,
495
+                                paste0(flank5, "[A-Z]+"))
496
+        barcodes <- gsub(flank5,"",barcodes)
497
+        barcodes <- substr(barcodes, 1, barcode_len)
498
+    } else {
499
+        barcodes <- str_extract(reads,
500
+                                paste0(flank5, "[A-Z]+", flank3))
501
+        barcodes <- gsub(flank5,"",barcodes)
502
+        barcodes <- gsub(flank3,"",barcodes)
503
+    }
504
+    return(barcodes)
505
+}
506
+
507
+
508
+
509
+
510
+
511
+#' @export 
512
+#' @importFrom biomaRt useMart getBM
513
+getEnsemblOrthologs <- function(ids, 
514
+    species.from="mouse", 
515
+    species.to="rat"
516
+){
517
+    species.latin <- list()
518
+    species.latin[["mouse"]] <- "mmusculus"
519
+    species.latin[["human"]] <- "hsapiens"
520
+    species.latin[["rat"]]   <- "rnorvegicus"
521
+    symbols <- list()
522
+    symbols[["mouse"]] <- "mgi_symbol"
523
+    symbols[["human"]] <- "hgnc_symbol"
524
+    symbols[["rat"]]   <- "rgd_symbol"
525
+
526
+    marts <- lapply(species.latin, function(x){
527
+        useMart("ensembl", dataset=paste0(x, "_gene_ensembl"))
528
+    })
529
+    names(marts) <- names(species.latin)
530
+    filters <- "ensembl_gene_id"
531
+    # Making attributes:
532
+    x1 <- paste0(species.latin[[species.to]], "_homolog_orthology_type")
533
+    x2 <- paste0(species.latin[[species.to]], "_homolog_ensembl_gene")
534
+    x3 <- "ensembl_gene_id"
535
+    attributes <- c(x1,x2,x3)
536
+
537
+    #Getting orthologs:
538
+    df <- getBM(attributes=attributes,
539
+        filters=filters,
540
+        values=ids,
541
+        mart=marts[[species.from]]
542
+    )
543
+    df <- df[df[[x1]]!="",,drop=FALSE]
544
+    col.from <- paste0(species.latin[[species.from]], "_ensembl_gene_id")
545
+    col.to <- paste0(species.latin[[species.to]], "_ensembl_gene_id")
546
+    colnames(df) <- c("type", col.to, col.from)
547
+    df <- df[,c(3,2,1)]
548
+    # Let's add gene symbol:
549
+    df.from <- getBM(attributes=c(symbols[[species.from]], "ensembl_gene_id"),
550
+        filters="ensembl_gene_id",
551
+        values=df[[col.from]],
552
+        mart=marts[[species.from]]
553
+    )
554
+    df.to <- getBM(attributes=c(symbols[[species.to]], "ensembl_gene_id"),
555
+        filters="ensembl_gene_id",
556
+        values=df[[col.to]],
557
+        mart=marts[[species.to]]
558
+    )
559
+    wh.from <- match(df[[col.from]], df.from$ensembl_gene_id)
560
+    wh.to   <- match(df[[col.to]], df.to$ensembl_gene_id)
561
+    df[[paste0(species.latin[[species.from]], "_gene")]] <- df.from[wh.from,1]
562
+    df[[paste0(species.latin[[species.to]], "_gene")]]   <- df.to[wh.to,1]
563
+    return(df)
564
+}
565
+
566
+
567
+#' @export 
568
+#' @importFrom biomaRt useMart getBM
569
+getSymbolFromEnsembl <- function(ids, 
570
+    species=c("human", "mouse")
571
+){
572
+    species <- match.arg(species)
573
+    species.latin <- list()
574
+    species.latin[["mouse"]] <- "mmusculus"
575
+    species.latin[["human"]] <- "hsapiens"
576
+    species.latin[["rat"]]   <- "rnorvegicus"
577
+    symbols <- list()
578
+    symbols[["mouse"]] <- "mgi_symbol"
579
+    symbols[["human"]] <- "hgnc_symbol"
580
+    symbols[["rat"]]   <- "rgd_symbol"
581
+
582
+    marts <- lapply(species.latin, function(x){
583
+        useMart("ensembl", dataset=paste0(x, "_gene_ensembl"))
584
+    })
585
+    names(marts) <- names(species.latin)
586
+    
587
+    df <- getBM(attributes=c(symbols[[species]], "ensembl_gene_id"),
588
+        filters="ensembl_gene_id",
589
+        values=ids,
590
+        mart=marts[[species]]
591
+    )
592
+    return(df)
593
+}
594
+
595
+
596
+#' @export 
597
+#' @importFrom biomaRt useMart getBM
598
+getEnsemblFromSymbol <- function(ids, 
599
+    species=c("human", "mouse")
600
+){
601
+    species <- match.arg(species)
602
+    species.latin <- list()
603
+    species.latin[["mouse"]] <- "mmusculus"
604
+    species.latin[["human"]] <- "hsapiens"
605
+    species.latin[["rat"]]   <- "rnorvegicus"
606
+    symbols <- list()
607
+    symbols[["mouse"]] <- "mgi_symbol"
608
+    symbols[["human"]] <- "hgnc_symbol"
609
+    symbols[["rat"]]   <- "rgd_symbol"
610
+
611
+    marts <- lapply(species.latin, function(x){
612
+        useMart("ensembl", dataset=paste0(x, "_gene_ensembl"))
613
+    })
614
+    names(marts) <- names(species.latin)
615
+    
616
+    df <- getBM(attributes=c(symbols[[species]], "ensembl_gene_id"),
617
+        filters=symbols[[species]],
618
+        values=ids,
619
+        mart=marts[[species]]
620
+    )
621
+    df
622
+}
623
+
624
+
625
+#' @export
626
+getCellSurvival <- function(r, d){
627
+    .getCellSurvival <- function(r,d){
628
+        if (d!=0){
629
+            result <- ((1+d)-sqrt((1+d)^2-4*d*r))/(2*d)
630
+        } else {
631
+            result <- r
632
+        }
633
+        result
634
+    }
635
+    survivals <- sapply(1:length(r), function(i){
636
+        .getCellSurvival(r[i], d[i])
637
+    })
638
+    survivals
639
+}
640
+
641
+
295 642
 
296 643