#!/bin/bash

if [ "$1" != "" ]; then ANALYSIS_DIR="$1/"; else ANALYSIS_DIR=""; fi

set -euo pipefail

INPUT_DIR=${ANALYSIS_DIR}input # directory containing inputs
OUTPUT_DIR=${ANALYSIS_DIR}output # directory in which to store outputs and temporary files

ARRAY_DATA_DIR=$OUTPUT_DIR/array_data # subdirectory in which processed SNP-array data is stored
PRELIM_CNV_CALL_DIR=$OUTPUT_DIR/prelim_CNV_calls # subdirectory in which prelim CNV calls are stored
mkdir -p $PRELIM_CNV_CALL_DIR

for CHR in {1..22} # NOTE: parallelize this loop across jobs if using a cluster/cloud
do
    LRR_STD_SCALE_FILE=$ARRAY_DATA_DIR/LRR_denoised.std_scale.txt # previously generated by denoise_lrr
    BIM_FILE=$INPUT_DIR/chr$CHR.bim # input plink .bim file
    LRR_THETA_GENO_FILE=$ARRAY_DATA_DIR/lrr_theta_confgeno.chr$CHR.bin # previously generated by merge_lrr_theta_geno
    LRR_DEL=-0.5 # initial guess of expected LRR for DELs
    LRR_DUP=0.27 # initial guess of expected LRR for DUPs
    LRR_REFINE_ITERS=1 # number of refinement iterations for re-estimating mean DEL/DUP LRR
    PRELIM_CNV_CALL_PREFIX=$PRELIM_CNV_CALL_DIR/prelim_CNV_calls.chr$CHR # output prefix: .batch*.txt.gz

    ./bin/call_CNVs_prelim \
	$LRR_STD_SCALE_FILE \
	$BIM_FILE \
	$LRR_THETA_GENO_FILE \
	$LRR_DEL \
	$LRR_DUP \
	$LRR_REFINE_ITERS \
	$PRELIM_CNV_CALL_PREFIX \
	| tee $PRELIM_CNV_CALL_PREFIX.log
done
