// g++ -O3 -fopenmp -Wall -static-libgcc -static-libstdc++ computeLogBFs.cpp -o computeLogBFs -I/n/groups/price/poru/HSPH_SVN/src/EAGLE -I/home/pl88/boost_1_58_0/install/include -I/n/data1/bwh/medicine/loh/ploh/external_software/zstd-1.5.2/lib -L/n/groups/price/poru/external_software/libstdc++/usr/lib/gcc/x86_64-redhat-linux/4.8.5/ -L/n/groups/price/poru/external_software/zlib/zlib-1.2.11 -L/n/data1/bwh/medicine/loh/ploh/external_software/zstd-1.5.2/lib -L/home/pl88/boost_1_58_0/install/lib -Wl,-Bstatic -lzstd -lboost_iostreams -lz

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <set>
#include <map>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cmath>

#include <boost/math/distributions/negative_binomial.hpp>

#include "omp.h"
#include "zstd.h"

#include "Types.hpp"
#include "FileUtils.cpp"
#include "StringUtils.cpp"
#include "Timer.cpp"

using namespace std;

#define MAX_ID 6100000
#define RC_QUANTILES 10
const char NAN_CHAR = -128;

struct Region {
  int start38, end38, minMAPQ;
  string toString(void) const {
    return StringUtils::itos(start38) + "_" + StringUtils::itos(end38)
      + ".Q" + StringUtils::itos(minMAPQ);
  }
};

struct RCdata {
  uchar nearbyBps[2][3]; // [REFmajor|REFminor][SNP|DEL|INS]
  ushort RC;
  float coeffVarBaseline, baselineScale, expectRC, theta0adj, muDip, sigmaDipAdj, BFs[2];
};

inline float sqf(float x) { return x*x; }

void processNoCommonSVbins(int &binSize, set <int> &binStartsToExtract,
			   vector <string> &baselineScalesFiles, vector <string> &stdScalesFiles,
			   map <int, float> noCommonSVsdHighCov[2],
			   const char *baselineStdScalesListFile, int chr,
			   float coeffVarBaselineRCthresh, const char *noCommonSVregionsFile,
			   double bpStart38, double bpEnd38) {

  FileUtils::AutoGzIfstream finList; finList.openOrExit(baselineStdScalesListFile);
  string baselineScalesFile, stdScalesFile;
  map <int, int> numCVpassSets;
  cout << "chr" << chr << " bins per sample set with coeffVar(baseline) > "
       << coeffVarBaselineRCthresh << ":" << endl;
  while (finList >> baselineScalesFile >> stdScalesFile) {
    baselineScalesFiles.push_back(baselineScalesFile);
    stdScalesFiles.push_back(stdScalesFile);
    FileUtils::AutoGzIfstream finBaselineScales; finBaselineScales.openOrExit(baselineScalesFile);
    uint64 R; finBaselineScales.read((char *) &R, sizeof(R));
    vector <Region> regions(R);
    vector <float> coeffVarBaselineRCs(R);
    finBaselineScales.read((char *) &regions[0], R*sizeof(regions[0]));
    for (int k = 0; k < 4; k++) // skip 3 rows; coeffVarBaselineRCs is 4th
      finBaselineScales.read((char *) &coeffVarBaselineRCs[0], R*sizeof(coeffVarBaselineRCs[0]));
    int ctrCVfail = 0;
    for (uint r = 0; r < R; r++) {
      numCVpassSets[regions[r].start38] += coeffVarBaselineRCs[r] <= coeffVarBaselineRCthresh;
      ctrCVfail += coeffVarBaselineRCs[r] > coeffVarBaselineRCthresh;
    }
    cout << " " << ctrCVfail << flush;
    finBaselineScales.close();
  }
  finList.close();
  cout << endl << endl;

  int ctrNoCommonSVbinsFiltered = 0;
  FileUtils::AutoGzIfstream finRegions; finRegions.openOrExit(noCommonSVregionsFile);
  int chrBin, start38, end38; float sd_highCov_50K, sd_highCov_150K;
  while (finRegions >> chrBin >> start38 >> end38 >> sd_highCov_50K >> sd_highCov_150K) {
    if (binSize == 0) binSize = end38-start38;
    assert(end38 == start38+binSize);
    if (chrBin == chr && bpStart38 <= start38 && start38 < bpEnd38) {
      if (numCVpassSets[start38] > (int) baselineScalesFiles.size()/2)
	binStartsToExtract.insert(start38);
      else
	ctrNoCommonSVbinsFiltered++;
    }
    if (chrBin == chr) {
      noCommonSVsdHighCov[0][start38] = sd_highCov_50K;
      noCommonSVsdHighCov[1][start38] = sd_highCov_150K;
    }
  }
  finRegions.close();
  cout << "Using " << binStartsToExtract.size() << " noCommonSV chr" << chr << " bins" << endl;
  cout << "Filtered " << ctrNoCommonSVbinsFiltered << " bins with coeffVar(baseline) > "
       << coeffVarBaselineRCthresh << " in most sample sets" << endl << endl;
}

float relDiff(float x, float y) { return x < y ? y/x-1 : x/y-1; }

void processCommonSVbins(set <int> &binStartsToExtract, const char *CommonSVparamsFile,
			 float max_50K_vs_150K_diff, int binSize, int chr, double bpStart38,
			 double bpEnd38) {

  int ctrCommonSVbins = 0, ctrCommonSVbinsFiltered = 0;
  FileUtils::AutoGzIfstream finCommonSVparams; finCommonSVparams.openOrExit(CommonSVparamsFile);
  string line; getline(finCommonSVparams, line); // ignore header
  int chrBin, start38, end38;
  float mu_dip_50K, sd_dip_50K, theta0_MOM_50K, mu_dip_150K, sd_dip_150K, theta0_MOM_150K;
  while (finCommonSVparams >> chrBin >> start38 >> end38 >> mu_dip_50K >> sd_dip_50K
	 >> theta0_MOM_50K >> mu_dip_150K >> sd_dip_150K >> theta0_MOM_150K) {
    assert(end38 == start38+binSize);
    if (chrBin == chr && bpStart38 <= start38 && start38 < bpEnd38) {
      assert(!binStartsToExtract.count(start38));
      if (mu_dip_50K > 0 && mu_dip_150K > 0 // disagreeing fit params in N=50K vs. N=150K
	  && relDiff(mu_dip_50K, mu_dip_150K) > max_50K_vs_150K_diff)
	ctrCommonSVbinsFiltered++;
      else {
	binStartsToExtract.insert(start38);
	ctrCommonSVbins++;
      }
    }
  }
  finCommonSVparams.close();
  cout << "Added " << ctrCommonSVbins << " CommonSV chr" << chr << " bins" << endl;
  cout << "Filtered " << ctrCommonSVbinsFiltered << " CommonSV bins with mu_dip 50K vs. 150K >"
       << " " << max_50K_vs_150K_diff << endl << endl;
}

void readCommonSVnegBinomParams(vector < vector <float > > &commonSVmuDips,
				vector < vector <float > > &commonSVsigmaDips,
				vector < vector <float > > &commonSVtheta0s,
				const char *CommonSVparamsFile, const vector <int> &binStarts,
				int chr) {

  const float thetaMin = 0.0001f;
  map <int, int> binStartToInd;
  for (uint m = 0; m < binStarts.size(); m++) binStartToInd[binStarts[m]] = m;
  FileUtils::AutoGzIfstream finCommonSVparams; finCommonSVparams.openOrExit(CommonSVparamsFile);
  string line; getline(finCommonSVparams, line); // ignore header
  int chrBin, start38, end38;
  float mu_dip_50K, sd_dip_50K, theta0_MOM_50K, mu_dip_150K, sd_dip_150K, theta0_MOM_150K;
  while (finCommonSVparams >> chrBin >> start38 >> end38 >> mu_dip_50K >> sd_dip_50K
	 >> theta0_MOM_50K >> mu_dip_150K >> sd_dip_150K >> theta0_MOM_150K)
    if (chrBin == chr && binStartToInd.find(start38) != binStartToInd.end()) {
      int m = binStartToInd[start38];
      commonSVmuDips[0][m] = mu_dip_50K;
      commonSVsigmaDips[0][m] = sd_dip_50K;
      commonSVtheta0s[0][m] = theta0_MOM_50K==-1 ? -1 : max(thetaMin, theta0_MOM_50K);
      commonSVmuDips[1][m] = mu_dip_150K;
      commonSVsigmaDips[1][m] = sd_dip_150K;
      commonSVtheta0s[1][m] = theta0_MOM_150K==-1 ? -1 : max(thetaMin, theta0_MOM_150K);
      //cout << binStarts[m] << " " << commonSVmuDips[0][m] << " " << commonSVtheta0s[0][m] << " " << commonSVmuDips[1][m] << " " << commonSVtheta0s[1][m] << " " << endl;
    }
  finCommonSVparams.close();
}

// returns a vector <char> indicating whether read counts were input for each sample
vector <char> unzipReadCounts(ushort *readCounts, int Nkeep, const vector <int> &IDtoKeepInd,
			      const set <int> &binStartsToExtract,
			      const char *regionCountsBinGzListFile) {

  vector <char> hasRCs(Nkeep);

  uint64 M = binStartsToExtract.size();

  // read file list
  vector <string> files;
  {
    FileUtils::AutoGzIfstream finBinGzList; finBinGzList.openOrExit(regionCountsBinGzListFile);
    string file;
    while (getline(finBinGzList, file))
      files.push_back(file);
    finBinGzList.close();
    cout << "Read " << files.size() << " read count file paths" << endl;
  }

  // unzip read counts in parallel
#pragma omp parallel for schedule(dynamic)
  for (uint f = 0; f < files.size(); f++) {
    // read regions from header
    FileUtils::AutoGzIfstream finBinGz; finBinGz.openOrExit(files[f], std::ios_base::binary);
    int R;
    finBinGz.read((char *) &R, sizeof(int));
    vector <bool> extract(R);
    int ctrExtract = 0;
    int prevStart38 = -1;
    for (int r = 0; r < R; r++) {
      Region region;
      finBinGz.read((char *) &region, sizeof(Region));
      assert(region.start38 > prevStart38); prevStart38 = region.start38;
      extract[r] = binStartsToExtract.count(region.start38);
      ctrExtract += extract[r];
    }
    assert(ctrExtract == (int) M);

    // read WES read counts per sample
    int ID;
    ushort *readCountsBuf = new ushort[R];
    while (finBinGz.read((char *) &ID, sizeof(int))) {
      finBinGz.read((char *) readCountsBuf, R*sizeof(readCountsBuf[0]));
      if (IDtoKeepInd[ID] == -1) // don't keep this sample
	continue;
      int iKeep = IDtoKeepInd[ID];
      assert(!hasRCs[iKeep]);
      hasRCs[iKeep]++;
      ushort *readCountsRow = readCounts + iKeep*M;
      int m = 0;
      for (int r = 0; r < R; r++)
	if (extract[r]) {
	  if (readCountsRow[m] != (ushort) -1) // fill read count if not masked
	    readCountsRow[m] = readCountsBuf[r];
	  m++;
	}
      assert(m == (int) M);
    }
    delete[] readCountsBuf;
    finBinGz.close();
  }

  int ctr = 0; for (int iKeep = 0; iKeep < Nkeep; iKeep++) ctr += hasRCs[iKeep];
  cout << "Stored WES read counts for " << ctr << " samples" << endl;

  return hasRCs;
}

float decompressSnp(uchar *buf, uint bufLen, const uchar *zBuf, uint zBufLen, uint Nbgen) {

  // decompress and check genotype probability block
  if (ZSTD_decompress(buf, bufLen, zBuf, zBufLen) != bufLen) {
    cerr << "ERROR: ZSTD_decompress() failed" << endl;
    exit(1);
  }
  uchar *bufAt = buf;
  uint N = bufAt[0]|(bufAt[1]<<8)|(bufAt[2]<<16)|(bufAt[3]<<24); bufAt += 4;
  if (N != Nbgen) {
    cerr << "ERROR: N = " << N << " (mismatch with header block)" << endl;
    exit(1);
  }
  uint K = bufAt[0]|(bufAt[1]<<8); bufAt += 2;
  if (K != 2U) {
    cerr << "ERROR: K = " << K << " (non-bi-allelic)" << endl;
    exit(1);
  }
  uint Pmin = *bufAt; bufAt++;
  if (Pmin != 2U) {
    cerr << "ERROR: minimum ploidy = " << Pmin << " (not 2)" << endl;
    exit(1);
  }
  uint Pmax = *bufAt; bufAt++;
  if (Pmax != 2U) {
    cerr << "ERROR: maximum ploidy = " << Pmax << " (not 2)" << endl;
    exit(1);
  }
  const uchar *ploidyMissBytes = bufAt;
  for (uint i = 0; i < N; i++) {
    uint ploidyMiss = *bufAt; bufAt++;
    if (ploidyMiss != 2U && ploidyMiss != 130U) {
      cerr << "ERROR: ploidy/missingness byte = " << ploidyMiss
	   << " (not 2 or 130)" << endl;
      exit(1);
    }
  }
  uint Phased = *bufAt; bufAt++;
  if (Phased != 0U) {
    cerr << "ERROR: Phased = " << Phased << " (not 0)" << endl;
    exit(1);
  }
  uint B = *bufAt; bufAt++;
  if (B != 8U) {
    cerr << "ERROR: B = " << B << " (not 8)" << endl;
    exit(1);
  }

  // compute allele frequency; overwrite first N bytes of buf with 0129 genotypes
  int Nnonmiss = 0;
  int genoSum = 0;
  for (uint i = 0; i < N; i++) {
    if (ploidyMissBytes[i] == 130U) {
      buf[i] = 9;
      bufAt += 2;
      continue;
    }
    Nnonmiss++;
    uchar p11 = *bufAt; bufAt++;
    uchar p10 = *bufAt; bufAt++;
    /*
    assert(p11 == 255U || p11 == 0U);
    assert(p10 == 255U || p10 == 0U);
    */
    uchar geno = 2 - 2*(p11==255U) - (p10==255U);
    buf[i] = geno;
    genoSum += geno;
  }
  float alleleFreq = genoSum / (2.0f * Nnonmiss);
  
  return alleleFreq;
}

bool checkNearby(int &mAffectedStart, int &mAffectedEnd, const vector <int> &binStarts,
		 int binSize, int mCur, int M, int bp) {
  const int bpNearbyThresh = 100;
  mAffectedStart = 1<<30; mAffectedEnd = -1;
  for (int m = mCur-1; m <= mCur+1; m++)
    if (m >= 0 && m < M)
      if (binStarts[m]-bpNearbyThresh <= bp && bp < binStarts[m]+binSize+bpNearbyThresh) {
	if (mAffectedStart > m) mAffectedStart = m;
	mAffectedEnd = m;
      }
  return mAffectedStart <= mAffectedEnd;
}

void readNearbySNPs(vector < vector <RCdata> > &RCdataDump, ushort *readCounts, int Nkeep,
		    const vector <int> &IDtoKeepInd, const vector <int> &binStarts, int binSize,
		    const string &bgenSamplePrefix, const char *chrStr, int maxNearbySNPs,
		    int threads) {

  uint64 M = binStarts.size();

  if (readCounts != NULL) // use readCounts matrix to store nearbyREF[major|minor]SNPs
    memset(readCounts, 0, Nkeep * M * sizeof(readCounts[0]));

  // read IDs from .sample file; set up keepInd: Nbgen -> keep index
  vector <int> keepInd;
  {
    int ctrKeep = 0;
    FileUtils::AutoGzIfstream finSample; finSample.openOrExit(bgenSamplePrefix+".sample");
    string line; getline(finSample, line); getline(finSample, line); // throw away headers
    int ID;
    while (finSample >> ID) {
      int ind = ID > 0 ? IDtoKeepInd[ID] : -1;
      keepInd.push_back(ind);
      ctrKeep += ind != -1;
      getline(finSample, line);
    }
    finSample.close();
    cout << "Read " << keepInd.size() << " IDs from .sample file; recording data for " << ctrKeep
	 << " samples" << endl << endl;
  }
  uint Nsample = keepInd.size();

  /********** READ HEADER **********/

  FILE *fin = fopen((bgenSamplePrefix+".bgen").c_str(), "rb"); assert(fin != NULL);
  uint offset; fread(&offset, 4, 1, fin); //cout << "offset: " << offset << endl;
  uint L_H; fread(&L_H, 4, 1, fin); //cout << "L_H: " << L_H << endl;
  uint Mbgen; fread(&Mbgen, 4, 1, fin); cout << "snpBlocks (Mbgen): " << Mbgen << endl;
  assert(Mbgen != 0);
  uint Nbgen; fread(&Nbgen, 4, 1, fin); cout << "samples (Nbgen): " << Nbgen << endl;
  if (Nbgen != Nsample) {
    cerr << "ERROR: Number of samples in BGEN header does not match sample file" << endl;
    exit(1);
  }
  char magic[5]; fread(magic, 1, 4, fin); magic[4] = '\0'; //cout << "magic bytes: " << string(magic) << endl;
  fseek(fin, L_H-20, SEEK_CUR); //cout << "skipping L_H-20 = " << L_H-20 << " bytes (free data area)" << endl;
  uint flags; fread(&flags, 4, 1, fin); //cout << "flags: " << flags << endl;
  uint CompressedSNPBlocks = flags&3; cout << "CompressedSNPBlocks: " << CompressedSNPBlocks << endl;
  assert(CompressedSNPBlocks==2); // REQUIRE CompressedSNPBlocks==2 (Zstd)
  uint Layout = (flags>>2)&0xf; cout << "Layout: " << Layout << endl;
  assert(Layout==1 || Layout==2); // REQUIRE Layout==1 or Layout==2

  //uint SampleIdentifiers = flags>>31; //cout << "SampleIdentifiers: " << SampleIdentifiers << endl;
  fseek(fin, offset+4, SEEK_SET);

  /********** READ SNP BLOCKS IN BATCHES **********/

  const int B_MAX = 1000; // number of SNPs to process in one batch (for multi-threading)

  char snpID[65536], rsID[65536], chrStrBgen[65536];
  char *allele1, *allele2;
  uint maxLA = 65536, maxLB = 65536;
  allele1 = (char *) malloc(maxLA+1);
  allele2 = (char *) malloc(maxLB+1);

  // during single-threaded reading of block, store SNP data for later multi-threaded processing
  vector <int> bps(B_MAX);
  vector <int> bpDeltas(B_MAX);
  vector < vector <uchar> > zBufs(B_MAX), bufs(B_MAX);
  vector <uint> zBufLens(B_MAX), bufLens(B_MAX);
  vector <float> alleleFreqs(B_MAX);
  vector <int> mAffectedStarts(B_MAX), mAffectedEnds(B_MAX);
    
  Timer timer;
  int B = 0; // current block size
  int mCur = 0; // current position in bins to extract
  for (uint mbgen = 0; mbgen < Mbgen; mbgen++) {
    ushort LS; fread(&LS, 2, 1, fin); // cout << "LS: " << LS << " " << std::flush;
    fread(snpID, 1, LS, fin); snpID[LS] = '\0'; // cout << "snpID: " << string(snpID) << " " << std::flush;
    ushort LR; fread(&LR, 2, 1, fin); // cout << "LR: " << LR << " " << std::flush;
    fread(rsID, 1, LR, fin); rsID[LR] = '\0'; // cout << "rsID: " << string(rsID) << " " << std::flush;
    ushort LC; fread(&LC, 2, 1, fin); // cout << "LC: " << LC << " " << std::flush;
    fread(chrStrBgen, 1, LC, fin); chrStrBgen[LC] = '\0';
    assert(strcmp(chrStrBgen, chrStr) == 0 || strcmp(chrStrBgen, chrStr+3) == 0);
    uint bp; fread(&bp, 4, 1, fin); // cout << "bp: " << bp << " " << std::flush;
    bps[B] = bp;

    ushort K; fread(&K, 2, 1, fin); //cout << "K: " << K << endl;
    if (K != 2) {
      cerr << "ERROR: Non-bi-allelic variant found: " << K << " alleles" << endl;
      exit(1);
    }

    uint LA; fread(&LA, 4, 1, fin); // cout << "LA: " << LA << " " << std::flush;
    if (LA > maxLA) {
      maxLA = 2*LA;
      free(allele1);
      allele1 = (char *) malloc(maxLA+1);
    }
    fread(allele1, 1, LA, fin); allele1[LA] = '\0';
    uint LB; fread(&LB, 4, 1, fin); // cout << "LB: " << LB << " " << std::flush;
    if (LB > maxLB) {
      maxLB = 2*LB;
      free(allele2);
      allele2 = (char *) malloc(maxLB+1);
    }
    fread(allele2, 1, LB, fin); allele2[LB] = '\0';

    bpDeltas[B] = LB - LA;

    uint C; fread(&C, 4, 1, fin); //cout << "C: " << C << endl;
    if (C > zBufs[B].size()) zBufs[B].resize(C-4);
    uint D; fread(&D, 4, 1, fin); //cout << "D: " << D << endl;
    zBufLens[B] = C-4; bufLens[B] = D;
    fread(&zBufs[B][0], 1, C-4, fin);

    while (mCur < (int) M && (int) bp >= binStarts[mCur]+100)
      mCur++; // move bin ptr to at or after bp
    if (checkNearby(mAffectedStarts[B], mAffectedEnds[B], binStarts, binSize, mCur, M, bp)) {
      B++;
    }

    if (B == B_MAX || (mbgen+1 == Mbgen && B>0)) { // process SNP block using multi-threading
#pragma omp parallel for schedule(dynamic)
      for (int b = 0; b < B; b++) {
	if (bufLens[b] > bufs[b].size()) bufs[b].resize(bufLens[b]);
	alleleFreqs[b] = decompressSnp(&bufs[b][0], bufLens[b], &zBufs[b][0], zBufLens[b], Nbgen);
      }

#pragma omp parallel for
      for (int t = 0; t < threads; t++) {
	int iStart = t*Nbgen/threads, iEnd = (t+1)*Nbgen/threads;
	for (int b = 0; b < B; b++) {
	  bool isREFminor = alleleFreqs[b] > 0.5;
	  uchar genoHomMinor = isREFminor ? 0 : 2;
	  uchar bpsAltered = max(1, abs(bpDeltas[b]));
	  int variantType = bpDeltas[b]==0 ? 0 : (bpDeltas[b]<0 ? 1 : 2); // SNP, DEL, INS
	  uchar *genos = &bufs[b][0];
	  for (int i = iStart; i < iEnd; i++) {
	    if (keepInd[i] != -1 && (genos[i]==1U || genos[i]==genoHomMinor)) {
	      uchar incr = (genos[i]==1U ? 1 : 2) * bpsAltered;
	      for (int m = mAffectedStarts[b]; m <= mAffectedEnds[b]; m++) {
		//cout << "updating " << bps[b] << ":" << bpDeltas[b] << " " << i << "," << m << "." << (int) isREFminor << "," << variantType << " += " << (int) incr << endl;
		if (readCounts != NULL) {
		  if (variantType == 0) { // SNP
		    uchar *nearbySNPs = (uchar *) &readCounts[keepInd[i]*M + m];
		    nearbySNPs[isREFminor] += incr;
		  }
		}
		else if (!RCdataDump[keepInd[i]].empty())
		  RCdataDump[keepInd[i]][m].nearbyBps[isREFminor][variantType] += incr;
              }
	    }
	  }
	}
      }

      B = 0; // reset current block size

      if ((int) bp > binStarts.back() + binSize + 1000) // early exit if >1kb beyond range
	break;
    }
    if (mbgen % 100000 == 99999)  {
      cout << "At SNP " << mbgen+1 << "; time for block: " << timer.update_time() << endl;
    }
  }

  free(allele1);
  free(allele2);

  fclose(fin);

  if (readCounts != NULL) // post-process nearbyREF[major|minor]SNPs -> -1 (masked) or 0
    for (uint64 ij = 0; ij < Nkeep * M; ij++) {
      ushort &RC = readCounts[ij];
      int nearbySNPs[2] = {RC & 0xFF, RC>>8}; // REFmajor and REFminor counts
      RC = (nearbySNPs[0] <= maxNearbySNPs && nearbySNPs[1] <= maxNearbySNPs) ? 0 : -1;
    }
}


int main(int argc, char *argv[]) {

  if (argc != 18) {
    cerr << "Usage:" << endl;
    cerr << "- arg1 = ID list (.sample file)" << endl;
    cerr << "- arg2 = chrStr (chr##)" << endl;
    cerr << "- arg3 = file listing noCommonSV regions" << endl;
    cerr << "- arg4 = file providing mu_dip and theta0 (N=50K,150K) for CommonSV regions" << endl;
    cerr << "- arg5 = 100bp-bin read count .bin.gz list file" << endl;
    cerr << "- arg6 = (baselineScales .bin.gz + stdScale .txt.gz) list file" << endl;
    cerr << "- arg7 = .{bgen,sample} file prefix for WES SNP/indel calls" << endl;
    cerr << "- arg8 = N=50K WES ID list" << endl;
    cerr << "- arg9 = bpStart38 (0 for full chr)" << endl;
    cerr << "- arg10 = bpEnd38 (1e9 for full chr)" << endl;
    cerr << "- arg11 = coeffVarBaselineRCthresh (e.g., 0.2)" << endl;
    cerr << "- arg12 = max_50K_vs_150K_mu_dip_rel_diff (e.g., 0.05)" << endl;
    cerr << "- arg13 = Bayes factor clipping threshold (e.g., 1e-3)" << endl;
    cerr << "- arg14 = maximum number of nearby SNPs allowed (e.g., 3)" << endl;
    cerr << "- arg15 = threads" << endl;
    cerr << "- arg16 = dump_RCs | dump_all" << endl;
    cerr << "- arg17 = out prefix (.logBFs.bin, .file#.RC_expectRCdip.bin, .RCdata.bin)" << endl;
    exit(1);
  }
  const char *sampleFile = argv[1];
  const char *chrStr = argv[2]; int chr; assert(sscanf(chrStr, "chr%d", &chr)==1);
  const char *noCommonSVregionsFile = argv[3];
  const char *CommonSVparamsFile = argv[4];
  const char *regionCountsBinGzListFile = argv[5];
  const char *baselineStdScalesListFile = argv[6];
  const char *bgenSamplePrefix = argv[7];
  const char *IDs50Kfile = argv[8];
  double bpStart38; assert(sscanf(argv[9], "%lf", &bpStart38)==1); assert(bpStart38>=0);
  double bpEnd38; assert(sscanf(argv[10], "%lf", &bpEnd38)==1); assert(bpEnd38>bpStart38);
  float coeffVarBaselineRCthresh; assert(sscanf(argv[11], "%f", &coeffVarBaselineRCthresh)==1);
  float max_50K_vs_150K_diff; assert(sscanf(argv[12], "%f", &max_50K_vs_150K_diff)==1);
  float BFclipThresh; assert(sscanf(argv[13], "%f", &BFclipThresh)==1); assert(BFclipThresh < 1);
  const float logBFscale = -127 / logf(BFclipThresh);  
  int maxNearbySNPs; sscanf(argv[14], "%d", &maxNearbySNPs); assert(maxNearbySNPs>=2);
  int threads; sscanf(argv[15], "%d", &threads);
  const string dumpType = argv[16]; assert(dumpType=="dump_all"||dumpType=="dump_RCs");
  bool dumpAll = dumpType=="dump_all";
  const char *outPrefix = argv[17];

  Timer timer;
  
  cout << "Analyzing " << chrStr << ", start=" << bpStart38 << ", end=" << bpEnd38 << endl;
  cout << "Including noCommonSV bins with coeffVar(baseline) <= " << coeffVarBaselineRCthresh
       << " in most sample sets" << endl;
  cout << "Including CommonSV bins with mu_dip 50K vs. 150K diff <= " << max_50K_vs_150K_diff
       << "x" << endl;
  cout << "Using Bayes factor clip thresh = " << BFclipThresh << endl;
  cout << "Setting number of threads to " << threads << endl << endl;
  omp_set_num_threads(threads);

  // read IDs from .sample file; set up indexing
  vector <int> keepIDs;
  vector <int> IDtoKeepInd(MAX_ID, -1);
  {
    FileUtils::AutoGzIfstream finSample; finSample.openOrExit(sampleFile);
    string line; getline(finSample, line); getline(finSample, line); // throw away headers
    int ID;
    while (finSample >> ID) {
      if (ID > 0) IDtoKeepInd[ID] = keepIDs.size();
      keepIDs.push_back(ID);
      getline(finSample, line);
    }
    finSample.close();
  }
  int Nkeep = keepIDs.size();
  cout << "Read " << Nkeep << " IDs from .sample file" << endl << endl;

  // create list of bin starts to extract: (1) noCommonSV + (2) CommonSV with previously-fit params
  int binSize = 0;
  set <int> binStartsToExtract;
  vector <string> baselineScalesFiles, stdScalesFiles;
  map <int, float> noCommonSVsdHighCov[2]; // N=50K, N=150K (for calibrating CommonSV sigmaDip)
  // read list of noCommonSV bins
  processNoCommonSVbins(binSize, binStartsToExtract, baselineScalesFiles, stdScalesFiles,
			noCommonSVsdHighCov, baselineStdScalesListFile, chr,
			coeffVarBaselineRCthresh, noCommonSVregionsFile, bpStart38, bpEnd38);
  // read list of CommonSV bins
  processCommonSVbins(binStartsToExtract, CommonSVparamsFile, max_50K_vs_150K_diff, binSize, chr,
		      bpStart38, bpEnd38);

  uint64 M = binStartsToExtract.size();
  cout << "Total 100bp-bins to extract in chr" << chr << ": " << M << endl;

  // store negative binomial parameters (previously estimated from high-coverage samples)
  vector <int> binStarts(binStartsToExtract.begin(), binStartsToExtract.end());
  vector < vector <float> > commonSVmuDips(2, vector <float> (M, NAN)); // N=50K, N=150K
  vector < vector <float> > commonSVsigmaDips(2, vector <float> (M, NAN)); // N=50K, N=150K
  vector < vector <float> > commonSVtheta0s(2, vector <float> (M, NAN)); // N=50K, N=150K
  // reread CommonSV negative binomial params
  readCommonSVnegBinomParams(commonSVmuDips, commonSVsigmaDips, commonSVtheta0s,
			     CommonSVparamsFile, binStarts, chr);
			     
  cout << "\nTime for filtering bins: " << timer.update_time() << " sec\n" << endl;


  // allocate storage for read counts
  // (used initially for nearbySNPs, then read counts, and finally overwritten with logBFscaled)
  ushort *readCounts = new ushort[Nkeep * M];

  vector < vector <RCdata> > RCdataDump; // will be ignored in call below
  // stream WES SNP/indel calls; set entries of readCounts to -1 if a bin is to be masked, 0 o/w
  readNearbySNPs(RCdataDump, readCounts, Nkeep, IDtoKeepInd, binStarts, binSize, bgenSamplePrefix,
		 chrStr, maxNearbySNPs, threads);

  cout << "\nTime for setting nearbySNP masks: " << timer.update_time() << " sec\n" << endl;


  // read 100bp-bin read counts
  vector <char> hasRCs = unzipReadCounts(readCounts, Nkeep, IDtoKeepInd, binStartsToExtract,
					 regionCountsBinGzListFile);

  cout << "\nTime for unzipping read counts: " << timer.update_time() << " sec\n" << endl;


  // read list of IDs in 50K (to decide which CommonSV negative binomial parameters to use)
  set <int> IDs50K;
  {
    FileUtils::AutoGzIfstream finIDs50K; finIDs50K.openOrExit(IDs50Kfile);
    int ID; while (finIDs50K >> ID) IDs50K.insert(ID);
    finIDs50K.close();
  }

  // compute logBFscaled for DEL and DUP vs. CN=2
  const float minBaselineScale = 0.8f, maxBaselineScale = 1.333333f; // mask lo/hi baselineScales
  vector <float> stdScales(Nkeep, INFINITY);
  // if dumping all RC-related data, allocate storage
  if (dumpAll) RCdataDump.resize(Nkeep);
#pragma omp parallel for schedule(dynamic)
  for (uint f = 0; f < baselineScalesFiles.size(); f++) {
    // read RC quantile boundaries from header of stdScales file
    FileUtils::AutoGzIfstream finStdScales;
    finStdScales.openOrExit(stdScalesFiles[f]);
    string line; getline(finStdScales, line); // ignore text header (line 1)
    int ID; float stdScale;
    vector <float> meanRCquantilesLo(RC_QUANTILES+1);
    finStdScales >> ID >> stdScale; // ignore first two columns of line 2
    for (int q = 1; q <= RC_QUANTILES; q++)
      finStdScales >> meanRCquantilesLo[q]; // read RC quantile boundaries
    // store per-sample data for later: ID, stdScale, cInvs
    vector <int> setIDs; vector <float> setStdScales; vector < vector <float> > set_cInvs;
    // determine whether samples came from N=50K oligo lot
    int ctr50K = 0, ctrNot50K = 0;
    while (finStdScales >> ID >> stdScale) { // read ID and stdScale
      vector <float> cInvs(RC_QUANTILES);
      for (int q = 0; q < RC_QUANTILES; q++)
	finStdScales >> cInvs[q]; // read per-RC quantile theta0 multipliers for negative binomial
      setIDs.push_back(ID);
      setStdScales.push_back(stdScale);
      set_cInvs.push_back(cInvs);

      if (IDs50K.count(ID))
	ctr50K++;
      else
	ctrNot50K++;
    }
    assert(ctr50K==0 || ctrNot50K==0);
    int oligoLot = (ctr50K > ctrNot50K ? 0 : 1);
    finStdScales.close();

    // read regions + bin-level summary parameters from header of baselineScales file
    FileUtils::AutoGzIfstream finBaselineScales;
    finBaselineScales.openOrExit(baselineScalesFiles[f]);
    uint64 R; finBaselineScales.read((char *) &R, sizeof(R));
    vector <bool> extract(R);
    vector <int> start38s(R);
    {
      int ctrExtract = 0;
      int prevStart38 = -1;
      for (uint r = 0; r < R; r++) {
	Region region;
	finBaselineScales.read((char *) &region, sizeof(Region));
	start38s[r] = region.start38;
	assert(region.start38 > prevStart38); prevStart38 = region.start38;
	extract[r] = binStartsToExtract.count(region.start38);
	ctrExtract += extract[r];
      }
      assert(ctrExtract == (int) M);
    }
    vector <float> meanRCs(R), sdNormRCs(R), meanBaselineRCs(R), coeffVarBaselineRCs(R), theta0s(R);
    finBaselineScales.read((char *) &meanRCs[0], R*sizeof(meanRCs[0]));
    finBaselineScales.read((char *) &sdNormRCs[0], R*sizeof(sdNormRCs[0]));
    finBaselineScales.read((char *) &meanBaselineRCs[0], R*sizeof(meanBaselineRCs[0]));
    finBaselineScales.read((char *) &coeffVarBaselineRCs[0], R*sizeof(coeffVarBaselineRCs[0]));
    finBaselineScales.read((char *) &theta0s[0], R*sizeof(theta0s[0]));

    // assign bins to mean read count quantiles
    vector <char> quantileAssign(R);
    for (uint r = 0; r < R; r++)
      for (int q = RC_QUANTILES-1; q >= 0; q--)
	if (meanRCs[r] > meanRCquantilesLo[q]) {
	  quantileAssign[r] = q;
	  break;
	}

    // estimate CommonSV sigmaDip calibration factor by comparing sdNormRCs vs. sdHighCov
    float commonSVsigmaMult;
    {
      vector <float> sdRatios;
      for (uint r = 0; r < R; r++)
	if (noCommonSVsdHighCov[oligoLot].find(start38s[r])
	    != noCommonSVsdHighCov[oligoLot].end())
	  sdRatios.push_back(sdNormRCs[r] / noCommonSVsdHighCov[oligoLot][start38s[r]]);
      sort(sdRatios.begin(), sdRatios.end());
      commonSVsigmaMult = sdRatios[sdRatios.size()/2];
      cout << "Calibrated CommonSV sigmaDip for file " << f << ": " << commonSVsigmaMult << endl;
    }

    // process each sample in turn; stream RC, expectRCdip to bin file
    char buf[1000]; sprintf(buf, "%s.file%d.RC_expectRCdip.bin", outPrefix, f);
    FILE *fout = fopen(buf, "wb");
    int ctrKeepSamples = 0;
    vector <uchar> baselineScales(R);
    for (uint iSet = 0; iSet < setIDs.size(); iSet++) {
      ID = setIDs[iSet]; stdScale = setStdScales[iSet];
      const vector <float> &cInvs = set_cInvs[iSet];
      
      // read ID, coverageScale, and baselineScales vector from baselineScales file
      int IDbaselineScales; float coverageScale;
      finBaselineScales.read((char *) &IDbaselineScales, sizeof(IDbaselineScales));
      finBaselineScales.read((char *) &coverageScale, sizeof(coverageScale));
      finBaselineScales.read((char *) &baselineScales[0], R*sizeof(baselineScales[0]));
      assert(ID == IDbaselineScales);
      assert(finBaselineScales);

      int iKeep = IDtoKeepInd[ID];
      if (iKeep != -1) {
	fwrite(&ID, sizeof(ID), 1, fout);
	assert(hasRCs[iKeep]);
	stdScales[iKeep] = stdScale;
	if (dumpAll) RCdataDump[iKeep].resize(M);
	// compute logBFscaled
	ushort *readCountsRow = readCounts + iKeep*M;
	int m = 0; // position in filtered bin set
	for (uint r = 0; r < R; r++)
	  if (extract[r]) { // restrict to filtered bin set
	    ushort RC = readCountsRow[m];
	    char *logBFscaled = (char *) &readCountsRow[m]; // will overwrite with logBFscaled!
	    float baselineScale = baselineScales[r] * (1.0f/128);
	    bool isMasked = RC == (ushort) -1 // mask indicator (or overflow)
	      || baselineScale < minBaselineScale || baselineScale > maxBaselineScale;
	    float expectRC = coverageScale * baselineScale * meanBaselineRCs[r];
	    float muDip = 1, sigmaDip = sdNormRCs[r], theta0 = theta0s[r]; // default: noCommonSV
	    if (!(isnan(commonSVmuDips[oligoLot][m]))) { // CommonSV bin => replace muDip, theta0
	      muDip = commonSVmuDips[oligoLot][m];
	      sigmaDip = commonSVsigmaDips[oligoLot][m] * commonSVsigmaMult;
	      theta0 = commonSVtheta0s[oligoLot][m];
	      if (muDip == -1)
		isMasked = true;
	    }
	    theta0 *= cInvs[quantileAssign[r]]; // apply per-sample per-RC-decile shape adjustment
	    if (theta0<0) theta0 = -1;
	    sigmaDip *= stdScale; if (sigmaDip<0) sigmaDip = -1;

	    float BFs[2] = {1.0f, 1.0f};
	    float expectRCdip;
	    if (!isMasked) { // bin count isn't masked => compute logBFscaled
	      expectRCdip = expectRC * muDip;
	      float r = 1 / theta0;
	      float NBprobs[4];
	      for (int CN = 1; CN <= 3; CN++) {
		float theta = expectRCdip*0.5f*CN * theta0;
		float p = 1 / (1+theta);
		NBprobs[CN] =
		  boost::math::pdf(boost::math::negative_binomial_distribution <float> (r, p), RC);
	      }
	      for (int CN = 1; CN <= 3; CN += 2) {
		if (NBprobs[CN] * NBprobs[2] == 0) // NB probability of 0 => use Gaussian instead
		  BFs[CN/2] = sqrtf(2.0f/CN) *
		    expf(-0.5f * (sqf(RC - expectRCdip*(0.5f*CN))*2.0f/CN - sqf(RC - expectRCdip))
			 / sqf(sigmaDip*expectRC));
		else
		  BFs[CN/2] = NBprobs[CN] / NBprobs[2];
		logBFscaled[CN/2] = (int) min(127.0f, max(-127.0f, logf(BFs[CN/2]) * logBFscale));
	      }
	    }

	    if (dumpAll) {
	      RCdata &d = RCdataDump[iKeep][m];
	      d.RC = RC;
	      d.coeffVarBaseline = coeffVarBaselineRCs[r];
	      d.baselineScale = baselineScale;
	      d.expectRC = expectRC;
	      d.theta0adj = theta0;
	      d.muDip = muDip;
	      d.sigmaDipAdj = sigmaDip;
	      d.BFs[0] = BFs[0];
	      d.BFs[1] = BFs[1];
	    }

	    if (isMasked) { // bin count is masked => mask data
	      RC = 0;
	      expectRCdip = 0;
	      logBFscaled[0] = logBFscaled[1] = NAN_CHAR;
	    }
	    // write RC, expectRCdip to bin file
	    fwrite(&RC, sizeof(RC), 1, fout);
	    fwrite(&expectRCdip, sizeof(expectRCdip), 1, fout);

	    m++;
	  }
	assert(m == (int) M);
	ctrKeepSamples++;
      }
    }
    assert(!(finBaselineScales.read((char *) &baselineScales[0], 1)));
    finBaselineScales.close();
    fclose(fout);

    cout << "Finished file " << f << ": processed samples in keep set = "
	 << ctrKeepSamples << " / " << setIDs.size() << endl;
  }

  cout << "\nTime for computing logBFs: " << timer.update_time() << " sec\n" << endl;

  if (dumpAll) {

    // read WES variant calls (bgen+sample) and nearby variant info (-> capture bias)
    readNearbySNPs(RCdataDump, NULL, 0, IDtoKeepInd, binStarts, binSize, bgenSamplePrefix, chrStr,
		   maxNearbySNPs, threads);

    cout << "\nTime for extracting nearby SNP/indel data: " << timer.update_time() << " sec\n"
	 << endl;

    FILE *foutRCdataDump = fopen(outPrefix, "wb");
    // write header
    fwrite(&M, sizeof(M), 1, foutRCdataDump);
    fwrite(&binSize, sizeof(binSize), 1, foutRCdataDump);
    fwrite(&binStarts[0], sizeof(binStarts[0]), M, foutRCdataDump);
    // write RCdata for each selected sample
    for (int iKeep = 0; iKeep < Nkeep; iKeep++)
      if (!RCdataDump[iKeep].empty()) {
	fwrite(&keepIDs[iKeep], sizeof(keepIDs[0]), 1, foutRCdataDump);
	fwrite(&RCdataDump[iKeep][0], sizeof(RCdataDump[iKeep][0]), M, foutRCdataDump);
      }
    assert(!ferror(foutRCdataDump));
    fclose(foutRCdataDump);
  }
  else {
    // mask logBFscaled (stored in readCounts matrix) for samples without baselineScales data
    int ctrMissingBaselineScales = 0;
    for (int iKeep = 0; iKeep < Nkeep; iKeep++)
      if (isinf(stdScales[iKeep])) {
	ctrMissingBaselineScales++;
	memset(readCounts + iKeep*M, NAN_CHAR, 2*M);
      }
    cout << "Set logBFs to missing for " << ctrMissingBaselineScales << " missing keep samples"
	 << endl;

    // write logBFscaled (stored in readCounts matrix)
    FILE *foutLogBFs = fopen((string(outPrefix) + ".logBFs.bin").c_str(), "wb");
    // write header
    fwrite(&M, sizeof(M), 1, foutLogBFs);
    fwrite(&binSize, sizeof(binSize), 1, foutLogBFs);
    fwrite(&binStarts[0], sizeof(binStarts[0]), M, foutLogBFs);
    fwrite(&Nkeep, sizeof(Nkeep), 1, foutLogBFs);
    fwrite(&keepIDs[0], sizeof(keepIDs[0]), Nkeep, foutLogBFs);
    fwrite(&logBFscale, sizeof(logBFscale), 1, foutLogBFs);
    // write logBFscaled matrix
    fwrite(readCounts, sizeof(readCounts[0]), Nkeep * M, foutLogBFs);
    assert(!ferror(foutLogBFs));
    fclose(foutLogBFs);
  }

  cout << "\nTime for writing output: " << timer.update_time() << " sec\n" << endl;

  delete[] readCounts;
  
  return 0;
}
