#!/usr/bin/env bash
set -o pipefail -o nounset -o errexit
#checks output of snapR/snaptronR package against directly run Snaptron web service queries
#at the end runs diffs, so ls -ltr *.diff

#first get the data from Snaptron directly
cat all.urls | perl -ne 'chomp; ($fn,$u)=split(/\t/,$_); `curl "$u" | tail -n +2 > $fn`;'

#now get it through the R module
Rscript ./make_test_data.R

for f in gene gene2 exon jx; do
    c=`perl -e '$c="'${f}'"; if($c eq "jx" || $c eq "gene") { print "srav2"; } else { print "gtex";}'`
    /bin/bash -x diff_counts.sh ${f}.tsv $c cd99.${f}.count.r.tsv > ${f}.count.diff
    f1=${f}.tsv
    paste <(cut -f 3-7 $f1) <(cut -f 1-2 $f1) <(cut -f 8-12,14- $f1) | perl -ne 'chomp; $f=$_; $f=~s/\t\t\t\t/\tNA\tNA\tNA\t/g; $f=~s/\t\t/\tNA\t/; $f=~s/\.0\t/\t/g; print "$f\n";' > ${f1}.cut
    diff ${f1}.cut cd99.${f}.row.r.tsv > ${f}.row.diff
    cat /data/snaptron_data/${c}/samples.tsv | tail -n +2 | perl -ne 'chomp; $f=$_; $f=~s/\t(\d+)\.0\t/\t$1\t/g; $f=~s/\t(\d+)\.(\d+)0\t/\t$1\.$2\t/g; $f=~s/\t\.(\d+)\t/\t0\.$1\t/g; $f=~s/\ttrue\t/\tTRUE\t/g; $f=~s/\tfalse\t/\tFALSE\t/g; $f=~s/\t0\.0\t/\t0\t/g; $f=~s/\t0\.0$/\t0/g; while($f=~/\t\t/) { $f=~s/\t\t/\tNA\t/; } print "$f\n";' > ${c}.samples 
    cat cd99.${f}.col.r.tsv | perl -ne 'chomp; $f=$_; $f=~s/\t(\d+)\.(\d+)0\t/\t$1\.$2\t/g; while($f=~/\t\t/) { $f=~s/\t\t/\tNA\t/; } print "$f\n";' > cd99.${f}.col.r.tsv.samples
    diff ${c}.samples cd99.${f}.col.r.tsv.samples > ${f}.col.diff
done
c='gtex'
for f in base base_sids; do
    cut -f 5- ${f}.tsv > ${f}.tsv.counts
    diff ${f}.tsv.counts cd99.${f}.count.r.tsv > ${f}.count.diff
    cut -f 2-4 ${f}.tsv | perl -ne 'chomp; print "$_\t2\t*\n";' > ${f}.tsv.cut
    diff ${f}.tsv.cut cd99.${f}.row.r.tsv > ${f}.row.diff
    cat cd99.${f}.col.r.tsv | perl -ne 'chomp; $f=$_; $f=~s/\t(\d+)\.(\d+)0\t/\t$1\.$2\t/g; while($f=~/\t\t/) { $f=~s/\t\t/\tNA\t/; } print "$f\n";' > cd99.${f}.col.r.tsv.samples
    diff ${c}.samples cd99.${f}.col.r.tsv.samples > ${f}.col.diff
done