# set on input: $BATCH, $REF_CHR, $REF_START, $REF_END, $REF_WRAP_BP
# bait regions provided in $@ (positional parameters)

set -euo pipefail

sudo apt-get --yes install parallel

chmod +x htsbox
chmod +x bwa

TMP_DIR=$HOME/tmp
mkdir -p $TMP_DIR
mkdir -p $TMP_DIR/out

printf "%s\t%s\t%s\n" $REF_CHR $REF_START $REF_END > $TMP_DIR/ref.bed
printf "%s\t%s\t%s\n" $REF_CHR $REF_START $[REF_START+REF_WRAP_BP] >> $TMP_DIR/ref.bed

bedtools getfasta -fi GRCh38_full_analysis_set_plus_decoy_hla.fa \
    -bed $TMP_DIR/ref.bed > $TMP_DIR/ref.fasta.tmp
awk 'NR==1 ; NR==2 {printf("%s",$1)} NR==4 ; END {if (NR==2) printf("\n")}' \
    $TMP_DIR/ref.fasta.tmp > $TMP_DIR/ref.fasta
./bwa index $TMP_DIR/ref.fasta
samtools faidx $TMP_DIR/ref.fasta

CRAM_DIR="/mnt/project/Bulk/Whole genome sequences/Whole genome CRAM files/$BATCH"

set +e # OK if some jobs fail; status will appear in job log
ls "$CRAM_DIR"/*.cram \
    | parallel --joblog /dev/stderr -j 4 bash run_bwa_htsbox_1.sh {} $[REF_END-REF_START] $TMP_DIR $@
set -e

### create tar.gz of htsbox output directory
tar -czf htsbox_batch$BATCH.tar.gz -C $TMP_DIR out
