# set on input: $CORES, $CHR, $BAYES_FACTOR_CLIP_THRESH, $OUT_PREFIX

### example
# CHR=21
# CORES=8
# BAYES_FACTOR_CLIP_THRESH=1e-3
# OUT_PREFIX=test.chr$CHR

set -euo pipefail

### less-important parameters
MAX_WES_NOISE_STD_SCALE=2 # remove a small fraction of WES samples with aberrantly high noise
COEFF_VAR_BASELINE_THRESH=0.2 # exclude a small fraction of noCommonSV bins with high coefficients of variation of baselineRC in most sample subsets
MAX_50K_VS_150K_MU_DIP_REL_DIFF=0.05 # exclude a small fraction of CommonSV bins with discordant mu_dip estimated from 1000 high-coverage samples in N=50K vs. N=150K
MAX_NEARBY_SNPS=3 # mask read counts in bins with too many nearby SNPs => potential capture bias


chmod +x computeLogBFs
chmod +x cnvCallWES

RESOURCES_DIR=/mnt/project/lohdata/resources

TMP_DIR=$HOME/tmp
mkdir -p $HOME/tmp

ls /mnt/project/lohdata/ploh/WES_CNVs/WES_read_counts/results/WES_{454K,15K}_batch*/*.100bp.chr$CHR.bin.gz > $TMP_DIR/WES_read_counts_bin_gz_list.txt

paste \
    <(ls /mnt/project/lohdata/ploh/WES_CNVs/normalized_read_counts/results/baselineScales.chr$CHR.set*.bin.gz) \
    <(ls /mnt/project/lohdata/ploh/WES_CNVs/normalized_read_counts/results/ID_40709.stdScale.cInvs.set*.txt.gz) \
    > $TMP_DIR/baselineStdScales_list.txt

zcat /mnt/project/lohdata/ploh/WES_CNVs/normalized_read_counts/results/ID_40709.stdScale.cInvs.set*.txt.gz \
    | awk '$1!="ID" && $1>0 {print $1,$2}' > $TMP_DIR/IDstdScale.txt


./computeLogBFs \
    "/mnt/project/Bulk/Imputation/UKB imputation from genotype/ukb22828_c1_b0_v3.sample" \
    chr$CHR \
    /mnt/project/lohdata/ploh/WES_CNVs/normalized_read_counts/inputs/noCommonSVregions.highCov1000_sdNormRCs_50K_150K.txt.gz \
    /mnt/project/lohdata/ploh/WES_CNVs/HI-CNV_WES/inputs/commonSVparams_50K_150K.txt.gz \
    $TMP_DIR/WES_read_counts_bin_gz_list.txt \
    $TMP_DIR/baselineStdScales_list.txt \
    "/mnt/project/Bulk/Exome sequences/Population level exome OQFE variants, BGEN format - final release/ukb23159_c${CHR}_b0_v1" \
    /mnt/project/lohdata/ploh/WES_CNVs/HI-CNV_WES/inputs/IDs_40709.50K.txt \
    0 \
    1e9 \
    $COEFF_VAR_BASELINE_THRESH \
    $MAX_50K_VS_150K_MU_DIP_REL_DIFF \
    $BAYES_FACTOR_CLIP_THRESH \
    $MAX_NEARBY_SNPS \
    $CORES \
    dump_RCs \
    $TMP_DIR/chr$CHR.487K

ls $TMP_DIR/chr$CHR.487K.file*.RC_expectRCdip.bin > $TMP_DIR/RCexpectRCdip_list.txt


for IBD_PARAM in 0 5 10 25 50 100
do
    ./cnvCallWES \
	chr$CHR \
	$TMP_DIR/chr$CHR.487K.logBFs.bin \
	$TMP_DIR/RCexpectRCdip_list.txt \
	$RESOURCES_DIR/bim_with_cM/chr$CHR.bim \
	/mnt/project/lohdata/ploh/WES_CNVs/region_phasing/inputs/chr$CHR.UKBsites.hg38.ucsc_bed \
	/mnt/project/lohdata/ploh/WES_CNVs/HI-CNV_WES/inputs/commonSVparams_50K_150K.txt.gz \
	$TMP_DIR/IDstdScale.txt \
	$MAX_WES_NOISE_STD_SCALE \
	$RESOURCES_DIR/IBD/chr$CHR.bin \
	$IBD_PARAM \
	$CORES \
	$OUT_PREFIX.ibd$IBD_PARAM.txt.gz
done
