... | ... |
@@ -289,8 +289,355 @@ iplot <- function(x, |
289 | 289 |
} |
290 | 290 |
|
291 | 291 |
|
292 |
+getZPrimeFactor <- function(x,y){ |
|
293 |
+ top <- sd(x)+sd(y) |
|
294 |
+ bottom <- abs(mean(x)-mean(y)) |
|
295 |
+ factor <- 1-3*top/bottom |
|
296 |
+ factor |
|
297 |
+} |
|
298 |
+ |
|
299 |
+ |
|
300 |
+#' @export |
|
301 |
+#' @importFrom plotly plot_ly |
|
302 |
+#' @importFrom plotly layout |
|
303 |
+#' @importFrom magrittr %>% |
|
304 |
+iplot <- function(x,y,labels, |
|
305 |
+ xlab="", ylab="", |
|
306 |
+ title="", col=NULL, |
|
307 |
+ xlim=c(-10,10), |
|
308 |
+ ylim=c(0,40) |
|
309 |
+ ){ |
|
310 |
+ temp <- data.frame(xx=x, yy=y, label=labels) |
|
311 |
+ zeroline <- TRUE |
|
312 |
+ type = "scatter" |
|
313 |
+ mode="markers" |
|
314 |
+ pal <- c("grey75","firebrick2") |
|
315 |
+ |
|
316 |
+ p <- plot_ly(data = temp, x = ~xx, y = ~yy, |
|
317 |
+ text=temp$label, |
|
318 |
+ type=type, |
|
319 |
+ mode=mode, |
|
320 |
+ color=col, colors=pal, |
|
321 |
+ source="volcano", |
|
322 |
+ selected=list(marker=list(color='blue')) |
|
323 |
+ #marker=list(color~col) |
|
324 |
+ ) |
|
325 |
+ |
|
326 |
+ p <- p %>% |
|
327 |
+ plotly::layout(title = title, |
|
328 |
+ xaxis = list(zeroline = zeroline, title=xlab, range=xlim), |
|
329 |
+ yaxis = list(zeroline = zeroline, title=ylab, range=ylim) |
|
330 |
+ ) |
|
331 |
+ p |
|
332 |
+} |
|
333 |
+ |
|
334 |
+getOverlap <- function(ranks1, ranks2){ |
|
335 |
+ kk <- 1:length(ranks1) |
|
336 |
+ r <- factor(pmax(ranks1, ranks2), levels=kk) |
|
337 |
+ tab <- table(r) |
|
338 |
+ cs <- cumsum(tab) |
|
339 |
+ cs/kk |
|
340 |
+} |
|
341 |
+ |
|
342 |
+ |
|
292 | 343 |
|
293 | 344 |
|
294 | 345 |
|
346 |
+#' Quick function to extract a subset of reads from NGS fastq files |
|
347 |
+#' |
|
348 |
+#' Quick function to extract a subset of reads from NGS fastq files. |
|
349 |
+#' |
|
350 |
+#' @param ngs String specifying NGS project number (eg. "ngs3096"). |
|
351 |
+#' Reads from the sample specified by \code{sample_index} will be |
|
352 |
+#' read into R. If NULL, \code{file} must be provided instead. |
|
353 |
+#' @param file String specifying path to a FASTQ file from which reads will |
|
354 |
+#' be read from. Must be specified if \code{ngs} is NULL. |
|
355 |
+#' @param samid String specifying the SAMID of the sample to be considered. |
|
356 |
+#' Must be part of the NGS project specified by \code{ngs}. |
|
357 |
+#' @param n_reads Integer specifying number of reads that should be returned. |
|
358 |
+#' 1000 by default. |
|
359 |
+#' @param sample_index Integer specifying which sample should be considered |
|
360 |
+#' from the list of files available for a given project specified by the |
|
361 |
+#' \code{ngs}. |
|
362 |
+#' @param read_mate String specifying if reads from R1 or R2 files should |
|
363 |
+#' be returned. Must be either "R1" or "R2". R1" by default. |
|
364 |
+#' |
|
365 |
+#' @return Character vector of sequencing reads. |
|
366 |
+#' |
|
367 |
+#' @author Jean-Philippe Fortin |
|
368 |
+#' @examples |
|
369 |
+#' \dontrun{ |
|
370 |
+#' reads <- extractReads(ngs="ngs3565") |
|
371 |
+#' } |
|
372 |
+#' @export |
|
373 |
+#' @importFrom gneDB ngsprojFastq |
|
374 |
+#' @importFrom readr read_lines |
|
375 |
+extractReads <- function(ngs=NULL, |
|
376 |
+ file=NULL, |
|
377 |
+ samid=NULL, |
|
378 |
+ n_reads=1000, |
|
379 |
+ sample_index=1, |
|
380 |
+ read_mate=c("R1", "R2") |
|
381 |
+){ |
|
382 |
+ |
|
383 |
+ read_mate <- match.arg(read_mate) |
|
384 |
+ |
|
385 |
+ |
|
386 |
+ .readsFromFilePartial <- function(file, |
|
387 |
+ n_reads=1000){ |
|
388 |
+ tempFile <- paste0(tempfile(), ".gz") |
|
389 |
+ tempFile2 <- gsub(".gz","", tempFile) |
|
390 |
+ tempFile3 <- gsub(".gz",".txt", tempFile) |
|
391 |
+ file.copy(file, tempFile, overwrite=TRUE) |
|
392 |
+ #system(paste0("gunzip -f ", tempFile)) |
|
393 |
+ cmd <- paste0("zcat ", |
|
394 |
+ tempFile, |
|
395 |
+ " | head -", |
|
396 |
+ .makeLongInteger(n_reads*4), |
|
397 |
+ " > ",tempFile3) |
|
398 |
+ system(cmd) |
|
399 |
+ reads <- readr::read_lines(tempFile3, |
|
400 |
+ n_max = .makeLongInteger(n_reads*4)) |
|
401 |
+ wh <- seq(2,length(reads),4) |
|
402 |
+ wh <- wh[wh<length(reads)] |
|
403 |
+ reads <- reads[wh] |
|
404 |
+ return(reads) |
|
405 |
+ } |
|
406 |
+ |
|
407 |
+ .readsFromFileComplete <- function(file){ |
|
408 |
+ tempFile <- paste0(tempfile(), ".gz") |
|
409 |
+ tempFile2 <- gsub(".gz","", tempFile) |
|
410 |
+ tempFile3 <- gsub(".gz",".txt", tempFile) |
|
411 |
+ file.copy(file, tempFile, overwrite=TRUE) |
|
412 |
+ #system(paste0("gunzip -f ", tempFile)) |
|
413 |
+ cmd <- paste0("gunzip -f ", tempFile2) |
|
414 |
+ system(cmd) |
|
415 |
+ reads <- readr::read_lines(tempFile2) |
|
416 |
+ wh <- seq(2,length(reads),4) |
|
417 |
+ wh <- wh[wh<=length(reads)] |
|
418 |
+ reads <- reads[wh] |
|
419 |
+ return(reads) |
|
420 |
+ } |
|
421 |
+ if (is.null(file) & !is.null(ngs)){ |
|
422 |
+ fastq.df = ngsprojFastq(ngs,collapse=FALSE) |
|
423 |
+ if (read_mate=="R1"){ |
|
424 |
+ fastqs <- fastq.df$READ1_FILE |
|
425 |
+ } else { |
|
426 |
+ fastqs <- fastq.df$READ2_FILE |
|
427 |
+ } |
|
428 |
+ if (!is.null(samid)){ |
|
429 |
+ fastqs <- fastqs[grepl(samid, fastqs)] |
|
430 |
+ file <- fastqs[1] |
|
431 |
+ } else { |
|
432 |
+ file <- fastqs[sample_index] |
|
433 |
+ } |
|
434 |
+ } else if (is.null(file) & is.null(ngs)){ |
|
435 |
+ stop("file or ngs must be provided. ") |
|
436 |
+ } |
|
437 |
+ if (!file.exists(file)){ |
|
438 |
+ stop("File does not exist.") |
|
439 |
+ } |
|
440 |
+ if (!is.null(n_reads)){ |
|
441 |
+ reads <- .readsFromFilePartial(file, |
|
442 |
+ n_reads=n_reads) |
|
443 |
+ } else { |
|
444 |
+ reads <- .readsFromFileComplete(file) |
|
445 |
+ } |
|
446 |
+ return(reads) |
|
447 |
+} |
|
448 |
+ |
|
449 |
+ |
|
450 |
+#' Quick function to extract barcodes from a list of reads |
|
451 |
+#' |
|
452 |
+#' Quick function to extract barcodes from a list of reads for testing |
|
453 |
+#' purposes. |
|
454 |
+#' |
|
455 |
+#' @param reads Character vector of sequencing reads. |
|
456 |
+#' @param flank5 String containing the constant sequence on the 5' |
|
457 |
+#' flank of the barcode region. |
|
458 |
+#' @param flank3 String containing the constant sequence on the 3' |
|
459 |
+#' flank of the barcode region. |
|
460 |
+#' @param leftOnly Should only flank5 be considered? |
|
461 |
+#' FALSE by default. |
|
462 |
+#' @param barcode_len Length of the barcodes to be retrieved. |
|
463 |
+#' @param ignore_barcode_len Should any barcode length be returned? |
|
464 |
+#' FALSE by default. |
|
465 |
+#' |
|
466 |
+#' @return Character vector of barcodes. |
|
467 |
+#' |
|
468 |
+#' @author Jean-Philippe Fortin |
|
469 |
+#' @examples |
|
470 |
+#' \dontrun{ |
|
471 |
+#' reads <- extractReads(ngs="ngs3565") |
|
472 |
+#' barcodes <- extractBarcodes(reads, flank5="TACCG", flank3="") |
|
473 |
+#' } |
|
474 |
+#' @export |
|
475 |
+#' @importFrom stringr str_extract |
|
476 |
+extractBarcodes <- function(reads, |
|
477 |
+ flank5="AGTTCG", |
|
478 |
+ flank3="TTCGGACTGT", |
|
479 |
+ leftOnly=FALSE, |
|
480 |
+ barcode_len=19, |
|
481 |
+ ignore_barcode_len=FALSE |
|
482 |
+){ |
|
483 |
+ # Only keeping reads that have flanking sequences: |
|
484 |
+ good1 <- grepl(flank5, reads) |
|
485 |
+ good2 <- grepl(flank3, reads) |
|
486 |
+ if (leftOnly){ |
|
487 |
+ good <- which(good1) |
|
488 |
+ } else { |
|
489 |
+ good <- which(good1 & good2) |
|
490 |
+ } |
|
491 |
+ reads <- reads[good] |
|
492 |
+ nleft <- nchar(flank5) |
|
493 |
+ if (!ignore_barcode_len){ |
|
494 |
+ barcodes <- str_extract(reads, |
|
495 |
+ paste0(flank5, "[A-Z]+")) |
|
496 |
+ barcodes <- gsub(flank5,"",barcodes) |
|
497 |
+ barcodes <- substr(barcodes, 1, barcode_len) |
|
498 |
+ } else { |
|
499 |
+ barcodes <- str_extract(reads, |
|
500 |
+ paste0(flank5, "[A-Z]+", flank3)) |
|
501 |
+ barcodes <- gsub(flank5,"",barcodes) |
|
502 |
+ barcodes <- gsub(flank3,"",barcodes) |
|
503 |
+ } |
|
504 |
+ return(barcodes) |
|
505 |
+} |
|
506 |
+ |
|
507 |
+ |
|
508 |
+ |
|
509 |
+ |
|
510 |
+ |
|
511 |
+#' @export |
|
512 |
+#' @importFrom biomaRt useMart getBM |
|
513 |
+getEnsemblOrthologs <- function(ids, |
|
514 |
+ species.from="mouse", |
|
515 |
+ species.to="rat" |
|
516 |
+){ |
|
517 |
+ species.latin <- list() |
|
518 |
+ species.latin[["mouse"]] <- "mmusculus" |
|
519 |
+ species.latin[["human"]] <- "hsapiens" |
|
520 |
+ species.latin[["rat"]] <- "rnorvegicus" |
|
521 |
+ symbols <- list() |
|
522 |
+ symbols[["mouse"]] <- "mgi_symbol" |
|
523 |
+ symbols[["human"]] <- "hgnc_symbol" |
|
524 |
+ symbols[["rat"]] <- "rgd_symbol" |
|
525 |
+ |
|
526 |
+ marts <- lapply(species.latin, function(x){ |
|
527 |
+ useMart("ensembl", dataset=paste0(x, "_gene_ensembl")) |
|
528 |
+ }) |
|
529 |
+ names(marts) <- names(species.latin) |
|
530 |
+ filters <- "ensembl_gene_id" |
|
531 |
+ # Making attributes: |
|
532 |
+ x1 <- paste0(species.latin[[species.to]], "_homolog_orthology_type") |
|
533 |
+ x2 <- paste0(species.latin[[species.to]], "_homolog_ensembl_gene") |
|
534 |
+ x3 <- "ensembl_gene_id" |
|
535 |
+ attributes <- c(x1,x2,x3) |
|
536 |
+ |
|
537 |
+ #Getting orthologs: |
|
538 |
+ df <- getBM(attributes=attributes, |
|
539 |
+ filters=filters, |
|
540 |
+ values=ids, |
|
541 |
+ mart=marts[[species.from]] |
|
542 |
+ ) |
|
543 |
+ df <- df[df[[x1]]!="",,drop=FALSE] |
|
544 |
+ col.from <- paste0(species.latin[[species.from]], "_ensembl_gene_id") |
|
545 |
+ col.to <- paste0(species.latin[[species.to]], "_ensembl_gene_id") |
|
546 |
+ colnames(df) <- c("type", col.to, col.from) |
|
547 |
+ df <- df[,c(3,2,1)] |
|
548 |
+ # Let's add gene symbol: |
|
549 |
+ df.from <- getBM(attributes=c(symbols[[species.from]], "ensembl_gene_id"), |
|
550 |
+ filters="ensembl_gene_id", |
|
551 |
+ values=df[[col.from]], |
|
552 |
+ mart=marts[[species.from]] |
|
553 |
+ ) |
|
554 |
+ df.to <- getBM(attributes=c(symbols[[species.to]], "ensembl_gene_id"), |
|
555 |
+ filters="ensembl_gene_id", |
|
556 |
+ values=df[[col.to]], |
|
557 |
+ mart=marts[[species.to]] |
|
558 |
+ ) |
|
559 |
+ wh.from <- match(df[[col.from]], df.from$ensembl_gene_id) |
|
560 |
+ wh.to <- match(df[[col.to]], df.to$ensembl_gene_id) |
|
561 |
+ df[[paste0(species.latin[[species.from]], "_gene")]] <- df.from[wh.from,1] |
|
562 |
+ df[[paste0(species.latin[[species.to]], "_gene")]] <- df.to[wh.to,1] |
|
563 |
+ return(df) |
|
564 |
+} |
|
565 |
+ |
|
566 |
+ |
|
567 |
+#' @export |
|
568 |
+#' @importFrom biomaRt useMart getBM |
|
569 |
+getSymbolFromEnsembl <- function(ids, |
|
570 |
+ species=c("human", "mouse") |
|
571 |
+){ |
|
572 |
+ species <- match.arg(species) |
|
573 |
+ species.latin <- list() |
|
574 |
+ species.latin[["mouse"]] <- "mmusculus" |
|
575 |
+ species.latin[["human"]] <- "hsapiens" |
|
576 |
+ species.latin[["rat"]] <- "rnorvegicus" |
|
577 |
+ symbols <- list() |
|
578 |
+ symbols[["mouse"]] <- "mgi_symbol" |
|
579 |
+ symbols[["human"]] <- "hgnc_symbol" |
|
580 |
+ symbols[["rat"]] <- "rgd_symbol" |
|
581 |
+ |
|
582 |
+ marts <- lapply(species.latin, function(x){ |
|
583 |
+ useMart("ensembl", dataset=paste0(x, "_gene_ensembl")) |
|
584 |
+ }) |
|
585 |
+ names(marts) <- names(species.latin) |
|
586 |
+ |
|
587 |
+ df <- getBM(attributes=c(symbols[[species]], "ensembl_gene_id"), |
|
588 |
+ filters="ensembl_gene_id", |
|
589 |
+ values=ids, |
|
590 |
+ mart=marts[[species]] |
|
591 |
+ ) |
|
592 |
+ return(df) |
|
593 |
+} |
|
594 |
+ |
|
595 |
+ |
|
596 |
+#' @export |
|
597 |
+#' @importFrom biomaRt useMart getBM |
|
598 |
+getEnsemblFromSymbol <- function(ids, |
|
599 |
+ species=c("human", "mouse") |
|
600 |
+){ |
|
601 |
+ species <- match.arg(species) |
|
602 |
+ species.latin <- list() |
|
603 |
+ species.latin[["mouse"]] <- "mmusculus" |
|
604 |
+ species.latin[["human"]] <- "hsapiens" |
|
605 |
+ species.latin[["rat"]] <- "rnorvegicus" |
|
606 |
+ symbols <- list() |
|
607 |
+ symbols[["mouse"]] <- "mgi_symbol" |
|
608 |
+ symbols[["human"]] <- "hgnc_symbol" |
|
609 |
+ symbols[["rat"]] <- "rgd_symbol" |
|
610 |
+ |
|
611 |
+ marts <- lapply(species.latin, function(x){ |
|
612 |
+ useMart("ensembl", dataset=paste0(x, "_gene_ensembl")) |
|
613 |
+ }) |
|
614 |
+ names(marts) <- names(species.latin) |
|
615 |
+ |
|
616 |
+ df <- getBM(attributes=c(symbols[[species]], "ensembl_gene_id"), |
|
617 |
+ filters=symbols[[species]], |
|
618 |
+ values=ids, |
|
619 |
+ mart=marts[[species]] |
|
620 |
+ ) |
|
621 |
+ df |
|
622 |
+} |
|
623 |
+ |
|
624 |
+ |
|
625 |
+#' @export |
|
626 |
+getCellSurvival <- function(r, d){ |
|
627 |
+ .getCellSurvival <- function(r,d){ |
|
628 |
+ if (d!=0){ |
|
629 |
+ result <- ((1+d)-sqrt((1+d)^2-4*d*r))/(2*d) |
|
630 |
+ } else { |
|
631 |
+ result <- r |
|
632 |
+ } |
|
633 |
+ result |
|
634 |
+ } |
|
635 |
+ survivals <- sapply(1:length(r), function(i){ |
|
636 |
+ .getCellSurvival(r[i], d[i]) |
|
637 |
+ }) |
|
638 |
+ survivals |
|
639 |
+} |
|
640 |
+ |
|
641 |
+ |
|
295 | 642 |
|
296 | 643 |
|