#!/bin/bash

if [ "$1" != "" ]; then ANALYSIS_DIR="$1/"; else ANALYSIS_DIR=""; fi

set -euo pipefail

INPUT_DIR=${ANALYSIS_DIR}input # directory containing inputs
OUTPUT_DIR=${ANALYSIS_DIR}output # directory in which to store outputs and temporary files

IBD_DIR=$OUTPUT_DIR/IBD # subdirectory in which haplotype-sharing (IBD) data is stored
NEARBY_IMP_MASK_DIR=$OUTPUT_DIR/near_imputed_masks # subdirectory in which genotype masks are stored
mkdir -p $NEARBY_IMP_MASK_DIR

MAX_BP_DIST=30 # distance threshold to use for masking genotype probe data near imputed variants
DOSAGE_THRESH=0.1 # minimum minor-allele dosage of nearby genotypes required to perform masking

for CHR in {1..22} # NOTE: parallelize this loop across jobs if using a cluster/cloud
do
    NEARBY_IMP_MASK_PREFIX=$NEARBY_IMP_MASK_DIR/imp_masks.chr$CHR # output to be generated
    SAMPLE_INFO_FILE=$INPUT_DIR/sample_info.txt # input sample info file
    BIM_FILE=$INPUT_DIR/chr$CHR.bim # input plink .bim file
    IBD_FILE=$IBD_DIR/chr$CHR.ibd.bin # previously generated by find_IBD
    BGEN_FILES=$( ls $INPUT_DIR/chr$CHR.*bgen ) # input imputed .bgen files; corresponding .sample files must also exist

    ./bin/make_imp_masks \
	$NEARBY_IMP_MASK_PREFIX.bin \
	$SAMPLE_INFO_FILE \
	$BIM_FILE \
	$IBD_FILE \
	$MAX_BP_DIST \
	$DOSAGE_THRESH \
	$BGEN_FILES \
	| tee $NEARBY_IMP_MASK_PREFIX.log
done
