#!/bin/bash

if [ "$1" != "" ]; then ANALYSIS_DIR="$1/"; else ANALYSIS_DIR=""; fi

set -euo pipefail

INPUT_DIR=${ANALYSIS_DIR}input # directory containing inputs
OUTPUT_DIR=${ANALYSIS_DIR}output # directory in which to store outputs and temporary files

LOG_FILE_DIR=$OUTPUT_DIR/check_inputs # subdirectory in which to store logs of check_inputs
mkdir -p $LOG_FILE_DIR

for CHR in {1..22} # NOTE: parallelize this loop across jobs if using a cluster/cloud
do
    OUTPUT_PREFIX=$LOG_FILE_DIR/chr$CHR # .{snp,sample,phased,imputed}_stats.txt.gz
    SAMPLE_INFO_FILE=$INPUT_DIR/sample_info.txt
    BED_BIM_FAM_PREFIX=$INPUT_DIR/chr$CHR # .bed + .bim + .fam
    LRR_FILE=$INPUT_DIR/chr$CHR.LRR.txt.gz
    THETA_FILE=$INPUT_DIR/chr$CHR.theta.txt.gz
    HAPS_SAMPLE_PREFIX=$INPUT_DIR/chr$CHR.phased # .haps.gz + .sample
    BGEN_FILES=$( ls $INPUT_DIR/chr$CHR.*bgen ) # .bgen; note that .sample files must also exist

    ./bin/check_inputs \
	$OUTPUT_PREFIX \
	$SAMPLE_INFO_FILE \
	$BED_BIM_FAM_PREFIX \
	$LRR_FILE \
	$THETA_FILE \
	$HAPS_SAMPLE_PREFIX \
	$BGEN_FILES \
	| tee $OUTPUT_PREFIX.log
done
