#!/bin/bash

if [ "$1" != "" ]; then ANALYSIS_DIR="$1/"; else ANALYSIS_DIR=""; fi

set -euo pipefail

INPUT_DIR=${ANALYSIS_DIR}input # directory containing inputs
OUTPUT_DIR=${ANALYSIS_DIR}output # directory in which to store outputs and temporary files

ARRAY_DATA_DIR=$OUTPUT_DIR/array_data # subdirectory in which processed SNP-array data is stored
PRELIM_CNV_CALL_DIR=$OUTPUT_DIR/prelim_CNV_calls # subdirectory in which prelim CNV calls are stored
SNP_CLUSTER_DIR=$OUTPUT_DIR/snp_clusters # subdirectory in which genotype cluster data is stored
mkdir -p $SNP_CLUSTER_DIR

for CHR in {1..22} # NOTE: parallelize this loop across jobs if using a cluster/cloud
do
    LRR_STD_SCALE_FILE=$ARRAY_DATA_DIR/LRR_denoised.std_scale.txt # previously generated by denoise_lrr
    BIM_FILE=$INPUT_DIR/chr$CHR.bim # input PLINK .bim file
    LRR_THETA_GENO_FILE=$ARRAY_DATA_DIR/lrr_theta_confgeno.chr$CHR.bin # previously generated by merge_lrr_theta_geno
    PRELIM_CNV_CALL_PREFIX=$PRELIM_CNV_CALL_DIR/prelim_CNV_calls.chr$CHR # previously generated by call_CNVs_prelim
    REF_CLUSTER_PREFIX=$SNP_CLUSTER_DIR/ref_clusters.chr$CHR # output prefix: .batch*.txt.gz

    ./bin/compute_ref_clusters \
	$LRR_STD_SCALE_FILE \
	$BIM_FILE \
	$LRR_THETA_GENO_FILE \
	$PRELIM_CNV_CALL_PREFIX \
	$REF_CLUSTER_PREFIX \
	| tee $REF_CLUSTER_PREFIX.log
done
