#!/bin/bash

if [ "$1" != "" ]; then ANALYSIS_DIR="$1/"; else ANALYSIS_DIR=""; fi

set -euo pipefail

THREADS=6

INPUT_DIR=${ANALYSIS_DIR}input # directory containing inputs
OUTPUT_DIR=${ANALYSIS_DIR}output # directory in which to store outputs and temporary files

ARRAY_DATA_DIR=$OUTPUT_DIR/array_data # subdirectory in which processed SNP-array data is stored
SNP_CLUSTER_DIR=$OUTPUT_DIR/snp_clusters # subdirectory in which genotype cluster data is stored

for CHR in {1..22} # NOTE: parallelize this loop across jobs if using a cluster/cloud
do
    LRR_STD_SCALE_FILE=$ARRAY_DATA_DIR/LRR_denoised.std_scale.txt # previously generated by denoise_lrr
    REF_CLUSTER_PREFIX_NO_CHR=$SNP_CLUSTER_DIR/ref_clusters # previously generated by compute_ref_clusters
    PRED_CLUSTER_PREFIX=$SNP_CLUSTER_DIR/pred_clusters.chr$CHR # output prefix: .batch*.txt.gz

    ./bin/predict_clusters \
	$LRR_STD_SCALE_FILE \
	$CHR \
	$REF_CLUSTER_PREFIX_NO_CHR \
	$PRED_CLUSTER_PREFIX \
	$THREADS \
	| tee $PRED_CLUSTER_PREFIX.log
done
