// g++ -O3 -fopenmp -Wall -static-libgcc -static-libstdc++ cnvCallWES.cpp -o cnvCallWES -I/n/groups/price/poru/HSPH_SVN/src/EAGLE -I/home/pl88/boost_1_58_0/install/include -L/n/groups/price/poru/external_software/libstdc++/usr/lib/gcc/x86_64-redhat-linux/4.8.5/ -L/n/groups/price/poru/external_software/zlib/zlib-1.2.11 -L/home/pl88/boost_1_58_0/install/lib -Wl,-Bstatic -lboost_iostreams -lz

#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <string>
#include <set>
#include <map>
#include <queue>
#include <utility>
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cmath>

#include "omp.h"

using namespace std;

#include "FileUtils.cpp"
#include "StringUtils.cpp"
#include "NumericUtils.cpp"
#include "Timer.cpp"

const char NAN_CHAR = -128;
const int maxNumLong = 10; // maximum number of IBD neighbors
double tIBD, tBF, tHMM;

typedef unsigned short uint16;
typedef unsigned long long uint64;

inline double sq(double x) { return x*x; }

struct Match {
  int hap; uint16 mStart, mEnd;
  Match(int _hap=0, uint16 _mStart=0, uint16 _mEnd=0) : hap(_hap), mStart(_mStart), mEnd(_mEnd) {}
  bool operator < (const Match &match) const { // endOpp_hap_start sort order for active set PQ
    if (mEnd != match.mEnd) return mEnd > match.mEnd; // opp for priority queue!
    else if (hap != match.hap) return hap < match.hap;
    else return mStart < match.mStart;
  }
};
bool comp_start_endOpp_hap(const Match &match1, const Match &match2) {
  if (match1.mStart != match2.mStart) return match1.mStart < match2.mStart;
  else if (match1.mEnd != match2.mEnd) return match1.mEnd > match2.mEnd;
  else return match1.hap < match2.hap;
}

struct Match_cM {
  Match match; double cMlen;
  Match_cM(const Match &_match, const vector <double> &cMvec) :
    match(_match), cMlen(cMvec[_match.mEnd-1]-cMvec[_match.mStart]) {}
  bool operator < (const Match_cM &m) const { // sort order for active set
    if (cMlen != m.cMlen) return cMlen > m.cMlen; // longest first
    else return match < m.match;
  }
};

struct MatchWeight {
  int hap; double prob_lt_T_gen;
  MatchWeight(int _hap=0, double _prob_lt_T_gen=0) : hap(_hap), prob_lt_T_gen(_prob_lt_T_gen) {}
};

struct MatchArray {
  int mStart, mEnd;
  vector <MatchWeight> mList;
};

double prob_lt_T_gen(int mStart, int mEnd, const vector <double> &cMvec, int IBDparam) {
  if (IBDparam <= 0) return NAN;
  else {
    int genT = IBDparam;
    double len = 0.01 * (cMvec[mEnd-1] - cMvec[mStart]);
    double prob = 1-exp(-2*len*genT)*(1+2*len*genT+0.5*sq(2*len*genT));
    return floor(prob + 0.5);
  }
}

// note: matchData for haplotype h is sorted by this function
vector <MatchArray> extractMatchArrays(const vector <double> &cMvec,
				       const vector <uint64> &blockStarts, Match *matchData,
				       int h, const vector <bool> &isAllowableNbr, int IBDparam) {

  vector <MatchArray> matchArrays;

  // read matches
  int matchesN = blockStarts[h+1] - blockStarts[h];
  Match *matchesMerged = matchData + blockStarts[h];

  // prune to top maxNumLong matches per position
  priority_queue <Match> activeMatchPQ;
  set <Match_cM> activeMatches;
  set <Match> usedMatches;
  sort(matchesMerged, matchesMerged + matchesN, comp_start_endOpp_hap); // sort by start
  int jMerged = 0; // position in matchesMerged list
  int mPrev = 0;
  while (jMerged < matchesN || !activeMatchPQ.empty()) {
    int earliestActiveEnd = activeMatchPQ.empty() ? (1<<30) : (int) activeMatchPQ.top().mEnd;
    int incomingStart = (jMerged < matchesN) ? (int) matchesMerged[jMerged].mStart : (1<<30);
      
    int mFirst = min(earliestActiveEnd, incomingStart);
    if (mFirst > mPrev) {
      MatchArray matchArray;
      matchArray.mStart = mPrev;
      matchArray.mEnd = mFirst;
      int topCtr = 0;
      set <int> usedInds; usedInds.insert(h/2); // don't use HBD matches (note: set is a bit slow)
      for (set <Match_cM>::iterator it = activeMatches.begin();
	   it != activeMatches.end() && topCtr < maxNumLong; it++)
	if (!usedInds.count(it->match.hap/2)) { // don't use an individual twice
	  usedInds.insert(it->match.hap/2);
	  topCtr++;
	  matchArray.mList.push_back(MatchWeight(it->match.hap,
						 prob_lt_T_gen(it->match.mStart, it->match.mEnd,
							       cMvec, IBDparam)));
	}
      matchArrays.push_back(matchArray);
    }

    // pop all matches with earliestActiveEnd; delete them from activeMatches set
    if (earliestActiveEnd <= incomingStart) {
      while (!activeMatchPQ.empty() && activeMatchPQ.top().mEnd == earliestActiveEnd) {
	activeMatches.erase(Match_cM(activeMatchPQ.top(), cMvec));
	activeMatchPQ.pop();
      }
    }
    // add all matches with incomingStart to active set
    if (incomingStart <= earliestActiveEnd) {
      while (jMerged < matchesN && matchesMerged[jMerged].mStart==incomingStart) {
	if (isAllowableNbr[matchesMerged[jMerged].hap/2]) {
	  activeMatchPQ.push(matchesMerged[jMerged]);
	  activeMatches.insert(Match_cM(matchesMerged[jMerged], cMvec));
	}
	jMerged++;
      }
    }
    mPrev = mFirst;
  }
  assert(matchArrays[0].mStart==0);

  return matchArrays;
}

void computeTransitionProbs(double (*logPtrans)[3][3], const vector <int> &binStarts, int R) {

  for (int r = 0; r <= R; r++) {
    double di = r>0 && r<R ? binStarts[r] - binStarts[r-1] : 1e9;
    double f_exp = 1e-3 * (1 - exp(-di * 1e-3)), f_exp2 = sq(f_exp);
    const double d_gap = 1e5; // impose penalty for staying in CN=1 or CN=3 across gap
    logPtrans[r][1-1][1-1] = log(di<d_gap ? 1 : f_exp); //log(1 - f_exp - f_exp2);
    logPtrans[r][1-1][2-1] = log(f_exp);
    logPtrans[r][1-1][3-1] = log(f_exp2);
    logPtrans[r][2-1][1-1] = log(f_exp);
    logPtrans[r][2-1][2-1] = log(1); //log(1 - 2*f_exp);
    logPtrans[r][2-1][3-1] = log(f_exp);
    logPtrans[r][3-1][1-1] = log(f_exp2);
    logPtrans[r][3-1][2-1] = log(f_exp);
    logPtrans[r][3-1][3-1] = log(di<d_gap ? 1 : f_exp); //log(1 - f_exp - f_exp2);
  }
}

struct CNVcall {
  int rStart, rEnd;
  float CNest;
  float fracCommonSVbins;
  string outStr;
  CNVcall(int _rStart=0, int _rEnd=0, const char *_outStr="")
    : rStart(_rStart), rEnd(_rEnd), outStr(_outStr) {}
};

vector <CNVcall> runHMM(const double (*logPtrans)[3][3], const vector <int> &binStarts,
			int binSize, const vector <int> &bin2probe, const vector <double> &cMvec,
			const vector <uint64> &blockStarts, Match *matchData, const char *logBFs,
			double logBFscaleInv, int H, int R, int IBDparam, int h, int ID,
			const vector <bool> &isAllowableNbr, int chr) {

  vector <CNVcall> calls_h;

  Timer timer;

  vector <MatchArray> matchArrays = extractMatchArrays(cMvec, blockStarts, matchData, h,
						       isAllowableNbr, IBDparam);
  int mEndLastMatch = matchArrays.back().mEnd;
  vector <ushort> probe2array(mEndLastMatch, -1); // SNP-array bim index -> spanning matchArray ind
  for (uint a = 0; a < matchArrays.size(); a++)
    for (int m = matchArrays[a].mStart; m < matchArrays[a].mEnd; m++)
      probe2array[m] = a;

#pragma omp atomic
  tIBD += timer.update_time();

  // compute and store combined, rescaled Bayes factors for CN=1 vs. 2 and CN=3 vs. 2
  double (*logBFsIBD)[2] = new double[R][2]; // [r][0=del,1=dup]
  double (*logBFsIndiv)[2] = new double[R][2]; // [r][0=del,1=dup]
  double *preMaskIndivNbrs = new double[R], *unmaskedIndivNbrs = new double[R];
  vector <bool> usedInd(H/2);
  for (int r = 0; r < R; r++) {
    // set logBFs for current individual; undo scaling to char [-127,127]
    unmaskedIndivNbrs[r] = 0;
    if (logBFs[(h/2)*2LL*R + 2*r] != NAN_CHAR) {
      for (int c = 0; c < 2; c++)
	logBFsIndiv[r][c] = logBFs[(h/2)*2LL*R + 2*r + c] * logBFscaleInv;
      unmaskedIndivNbrs[r]++;
    }
    else
      logBFsIndiv[r][0] = logBFsIndiv[r][1] = 0;
    preMaskIndivNbrs[r] = 1;

    // initialize combined logBFs (indiv + IBD neighbors) to logBFsIndiv
    for (int c = 0; c < 2; c++)
      logBFsIBD[r][c] = logBFsIndiv[r][c];

    // add in logBFs of neighbors, weighting according to nbrCount/genThresh
    if (bin2probe[r] < mEndLastMatch) { // haven't overshot last IBD match
      const MatchArray &matchArray = matchArrays[probe2array[bin2probe[r]]];
      if (IBDparam < 0) {
	for (int k = 0; k < -IBDparam && k < (int) matchArray.mList.size(); k++) {
	  int iPhased = matchArray.mList[k].hap/2;
	  if (logBFs[iPhased*2LL*R + 2*r] != NAN_CHAR) {
	    for (int c = 0; c < 2; c++)
	      logBFsIBD[r][c] += logBFs[iPhased*2LL*R + 2*r + c] * logBFscaleInv;
	    unmaskedIndivNbrs[r]++;
	  }
	  preMaskIndivNbrs[r]++;
	}
      }
      else if (IBDparam > 0) { // P(IBD>generation) weighting
	for (int k = 0; k < (int) matchArray.mList.size(); k++) {
	  int iPhased = matchArray.mList[k].hap/2;
	  if (logBFs[iPhased*2LL*R + 2*r] != NAN_CHAR) {
	    for (int c = 0; c < 2; c++)
	      logBFsIBD[r][c] += logBFs[iPhased*2LL*R + 2*r + c] * logBFscaleInv *
		matchArray.mList[k].prob_lt_T_gen;
	    unmaskedIndivNbrs[r] += matchArray.mList[k].prob_lt_T_gen;
	  }
	  preMaskIndivNbrs[r] += matchArray.mList[k].prob_lt_T_gen;
	}
      }
    }
  }

#pragma omp atomic
  tBF += timer.update_time();

  double (*cumLogP)[3] = new double[R][3];
  char (*prev)[3] = new char[R][3];
  //const double logPjump = log(1e-3);

  // initialize
  int r = 0;
  cumLogP[r][0] = logPtrans[r][1][0] + logBFsIBD[r][0]; // del
  cumLogP[r][1] = logPtrans[r][1][1]; // CN=2
  cumLogP[r][2] = logPtrans[r][1][2] + logBFsIBD[r][1]; // dup
  // iterate
  for (r = 1; r < R; r++) {
    // transition; set prev
    for (int s = 0; s <= 2; s++) // cur state
      for (int t = 0; t <= 2; t++) // prev state
	if (t==0 || (cumLogP[r][s] < cumLogP[r-1][t] + logPtrans[r][t][s])) {
	  cumLogP[r][s] = cumLogP[r-1][t] + logPtrans[r][t][s];
	  prev[r][s] = t;
	}
    // emission
    cumLogP[r][0] += logBFsIBD[r][0]; // del
    cumLogP[r][2] += logBFsIBD[r][1]; // dup
  }
  // finalize: penalty for ending in del/dup state = transition back to CN=2 (state=1)
  r = R-1;
  cumLogP[r][0] += logPtrans[R][0][1];
  cumLogP[r][2] += logPtrans[R][2][1];
    
  // backtrack
  int s = 0;
  if (cumLogP[r][1] > cumLogP[r][s]) s = 1;
  if (cumLogP[r][2] > cumLogP[r][s]) s = 2;
  int rSegEnd = r;
  vector < pair <int, int> > CNVsegs; // inclusive
  vector <bool> states; // 0=del, 1=dup
  while (r > 0) {
    if (prev[r][s] != s) {
      if (s != 1) { CNVsegs.push_back(make_pair(r, rSegEnd)); states.push_back(s/2); }
      rSegEnd = r-1;
      s = prev[r][s];
    }
    r--;
  }
  if (s != 1) { CNVsegs.push_back(make_pair(r, rSegEnd)); states.push_back(s/2); }

#pragma omp atomic
  tHMM += timer.update_time();

  // record calls
  for (int k = CNVsegs.size()-1; k >= 0; k--) {
    int rStart = CNVsegs[k].first, rEnd = CNVsegs[k].second, state = states[k];
    while (unmaskedIndivNbrs[rStart]==0) rStart++;
    while (unmaskedIndivNbrs[rEnd]==0) rEnd--;
    double logBF = 0, logBFindiv = 0, meanUnmaskedNbrs = 0, sumUnmaskedFrac = 0;
    for (int r = rStart; r <= rEnd; r++) {
      logBF += logBFsIBD[r][state];
      logBFindiv += logBFsIndiv[r][state];
      meanUnmaskedNbrs += unmaskedIndivNbrs[r]
	- (logBFs[(h/2)*2LL*R+2*r] != NAN_CHAR); // neighbors-only; exclude indiv
      sumUnmaskedFrac += unmaskedIndivNbrs[r] / preMaskIndivNbrs[r];
    }
    meanUnmaskedNbrs /= (rEnd-rStart+1);
    const char types[2][4] = {"DEL", "DUP"};
    char buf[200];
    sprintf(buf, "%7d %d %3s %7.2f %4.1f %6.1f %2d %9d %9d %5.1f %7.2f %5d %5d",
	    ID, (h&1)+1, types[state], logBF/log(10), meanUnmaskedNbrs,
	    (binStarts[rEnd]-binStarts[rStart]+binSize)*1e-3,
	    chr, binStarts[rStart], binStarts[rEnd]+binSize,
	    sumUnmaskedFrac, (logBF-logBFindiv)/log(10), rStart, rEnd);
    calls_h.push_back(CNVcall(rStart, rEnd, buf));
  }

  delete[] logBFsIBD;
  delete[] logBFsIndiv;
  delete[] preMaskIndivNbrs;
  delete[] unmaskedIndivNbrs;
  delete[] cumLogP;
  delete[] prev;
  
  return calls_h;
}

void processSNPs(vector <int> &bin2probe, vector <bool> &isCommonSVbin, vector <double> &cMvec,
		 const char *chrStr, const char *bimFile, const char *lift38File,
		 const char *CommonSVbinsFile, const vector <int> &binStarts, int binSize) {

  // read cM coordinates from bim file
  int Mbim = 0;
  cMvec.clear();
  map <int, int> bp19ToBimInd;
  {
    FileUtils::AutoGzIfstream finBim; finBim.openOrExit(bimFile);
    int c; string snpStr; double genpos; int bp19; string lineStr;
    while (finBim >> c >> snpStr >> genpos >> bp19) {
      cMvec.push_back(100*genpos);
      bp19ToBimInd[bp19] = Mbim++;
      getline(finBim, lineStr);
    }
    finBim.close();
    cout << "Read " << Mbim << " SNPs in bim file" << endl;
  }

  // read lifted hg38 coordinates
  vector <int> bp38s(Mbim, 1<<30);
  {
    FileUtils::AutoGzIfstream finLift38; finLift38.openOrExit(lift38File);
    string cStr; int bp38_1, bp38, bp19;
    int ctr38 = 0;
    while (finLift38 >> cStr >> bp38_1 >> bp38 >> bp19)
      if (cStr == chrStr) {
	assert(bp19ToBimInd.find(bp19) != bp19ToBimInd.end());
	bp38s[bp19ToBimInd[bp19]] = bp38;
	ctr38++;
      }
    finLift38.close();
    cout << "Read hg38 coordinates for " << ctr38 << " SNPs" << endl;
  }

  set <int> commonSVbinStarts;
  {
    FileUtils::AutoGzIfstream finCommonSVbins; finCommonSVbins.openOrExit(CommonSVbinsFile);
    string line; getline(finCommonSVbins, line); // ignore header
    int chr; assert(sscanf(chrStr, "chr%d", &chr)==1);
    int chrBin, start38, end38;
    while (finCommonSVbins >> chrBin >> start38 >> end38) {
      assert(end38 == start38+binSize);
      if (chrBin == chr)
	commonSVbinStarts.insert(start38);
      getline(finCommonSVbins, line); // ignore rest of line
    }
    finCommonSVbins.close();
  }

  // find probe with closest hg38 lift to each of binMidpoint38s; also annotate CommonSV bins
  bin2probe.resize(binStarts.size());
  isCommonSVbin.resize(binStarts.size());
  for (uint r = 0; r < binStarts.size(); r++) {
    int bp38mid = binStarts[r] + binSize/2;
    int mBest = 0;
    for (int m = 1; m < Mbim; m++)
      if (abs(bp38mid - bp38s[m]) < abs(bp38mid - bp38s[mBest]))
	mBest = m;
    bin2probe[r] = mBest;
    isCommonSVbin[r] = commonSVbinStarts.count(binStarts[r]);
  }
}

int main(int argc, char *argv[]) {
  const int OPT_ARG_NUM = 13;
  if (argc != OPT_ARG_NUM && argc != OPT_ARG_NUM+1) {
    cerr << "ERROR: need 6 or 7 args:" << endl;
    cerr << "- arg1 = chrStr (chr##)" << endl;
    cerr << "- arg2 = 2-byte logBF binary file" << endl;
    cerr << "- arg3 = .file#.RC_expectRCdip.bin file list (for annotating calls with CNs)" << endl;
    cerr << "- arg4 = bim file" << endl;
    cerr << "- arg5 = hg38 lift file" << endl;
    cerr << "- arg6 = file listing CommonSV regions" << endl;
    cerr << "- arg7 = ID stdScale file (collated from WES read count normalization)" << endl;
    cerr << "- arg8 = maxStdScale (mask WES data from higher-noise samples)" << endl;
    cerr << "- arg9 = IBD binary file" << endl;
    cerr << "- arg10 = IBD param (>=0 for fixed # of neighbors; <0 for P(IBD>generations)" << endl;
    cerr << "- arg11 = threads" << endl;
    cerr << "- arg12 = output file" << endl;
    cerr << "- (optional) arg13 = file containing IDs to include in analysis (1) / exclude as IBD neighbors (-1)" << endl;
    exit(1);
  }

  const char *chrStr = argv[1]; int chr; assert(sscanf(chrStr, "chr%d", &chr)==1);
  const char *logBFfile = argv[2];
  const char *RCexpectRCdipFileList = argv[3];
  const char *bimFile = argv[4];
  const char *lift38File = argv[5];
  const char *CommonSVbinsFile = argv[6];
  const char *IDstdScaleFile = argv[7];
  double maxStdScale; assert(sscanf(argv[8], "%lf", &maxStdScale)==1); assert(maxStdScale>1.5);
  const char *IBDfile = argv[9];
  int IBDparam; assert(sscanf(argv[10], "%d", &IBDparam)==1);
  int threads; assert(sscanf(argv[11], "%d", &threads)==1);
  const char *outFile = argv[12];

  assert(IBDparam >= -maxNumLong && IBDparam <= 1000);

  Timer timer;

  cout << "Setting number of threads to " << threads << endl;
  omp_set_num_threads(threads);

  /***** read 100bp-bin list, phased sample list, and logBF data *****/
  // [0]: scaled log p(lrr|del) - log p(lrr|CN=2)
  // [1]: scaled log p(lrr|dup) - log p(lrr|CN=2)
  uint64 R;
  int binSize;
  vector <int> binStarts;
  int N;
  vector <int> IDs;
  map <int, int> IDtoInd;
  double logBFscaleInv;
  char *logBFs;
  {
    FILE *finLogBFs = fopen(logBFfile, "rb");
    fread(&R, sizeof(R), 1, finLogBFs);
    fread(&binSize, sizeof(binSize), 1, finLogBFs);
    binStarts.resize(R);
    fread(&binStarts[0], sizeof(binStarts[0]), R, finLogBFs);
    cout << "Read locations of " << R << " " << binSize << "bp bins on " << chrStr << endl;
    fread(&N, sizeof(N), 1, finLogBFs);
    IDs.resize(N);
    fread(&IDs[0], sizeof(IDs[0]), N, finLogBFs);
    for (int i = 0; i < N; i++) IDtoInd[IDs[i]] = i;
    cout << "Read " << N << " sample IDs" << endl;
    float logBFscale;
    fread(&logBFscale, sizeof(logBFscale), 1, finLogBFs);
    logBFscaleInv = 1.0 / logBFscale;
    logBFs = new char[N*2LL*R];
    fread(logBFs, sizeof(logBFs[0]), N*2LL*R, finLogBFs);
    assert(!ferror(finLogBFs));
    fclose(finLogBFs);
  }
  cout << "Read logBF data (time = " << timer.update_time() << " sec)" << endl;

  /***** read SNP-array coordinates; find index of probe with closest hg38 lift to each bin *****/
  vector <int> bin2probe; // dim = R; maps 100bp-bin indices to SNP-array probe indices used in IBD
  vector <bool> isCommonSVbin;
  vector <double> cMvec;
  processSNPs(bin2probe, isCommonSVbin, cMvec, chrStr, bimFile, lift38File, CommonSVbinsFile,
	      binStarts, binSize);

  /***** read ID stdScale file (collated from WES read count norm); mask high-noise samples *****/
  vector <bool> isAllowableNbr(N);
  {
    FileUtils::AutoGzIfstream finIDstdScale; finIDstdScale.openOrExit(IDstdScaleFile);
    int ID; float stdScale;
    int ctrGood = 0, ctrBad = 0, ctrNotPhased = 0;
    while (finIDstdScale >> ID >> stdScale) {
      if (IDtoInd.find(ID) == IDtoInd.end())
	ctrNotPhased++;
      else if (stdScale > maxStdScale) {
	ctrBad++;
	memset(logBFs + IDtoInd[ID]*2LL*R, NAN_CHAR, 2*R); // mask logBFs
      }
      else {
	ctrGood++;
	isAllowableNbr[IDtoInd[ID]] = true;
      }
    }
    finIDstdScale.close();
    cout << "Read samples with normalized WES read counts:" << endl;
    printf("%8d allowable haplotype neighbors\n", ctrGood);
    printf("%8d too noisy (stdScale>%g) => set logBFs to missing\n", ctrBad, maxStdScale);
    printf("%8d not in phasing\n", ctrNotPhased);
    cout << endl;
  }

  /***** read IDs of individuals to include in analysis (1) / exclude as IBD neighbors (-1) *****/
  set <int> inclIDs;
  vector <bool> excludeAsNbr(N);
  if (argc == OPT_ARG_NUM+1) {
    const char *excludeIDfile = argv[OPT_ARG_NUM];
    FileUtils::AutoGzIfstream finExcludeIDs; finExcludeIDs.openOrExit(excludeIDfile);
    int ctrExclude = 0;
    int ID, action; // 1 for include in analysis, -1 for exclude as IBD neighbors
    while (finExcludeIDs >> ID >> action) {
      if (action==1)
	inclIDs.insert(ID);
      else if (IDtoInd.find(ID) != IDtoInd.end()) {
	isAllowableNbr[IDtoInd[ID]] = false;
	ctrExclude++;
      }
    }
    finExcludeIDs.close();
    cout << "Read " << inclIDs.size() << " IDs of individuals to include in analysis" << endl;
    cout << "Read " << ctrExclude << " IDs of individuals to exclude as IBD neighbors" << endl;
  }

  /***** read IBD data *****/
  int H = 2*N;
  vector <uint64> blockStarts(H+1); // array indices in matchData (converted from bin file blocks)
  Match *matchData;
  {
    FILE *finIBD = fopen(IBDfile, "rb");
    int Nibd; fread(&Nibd, sizeof(int), 1, finIBD); assert(Nibd == N);
    fseek(finIBD, Nibd*sizeof(int), SEEK_CUR); // skip ID block (14048)
    fread(&blockStarts[0], sizeof(uint64), H+1, finIBD); // H+1 uint64: match block starts (hdr2)
    for (int h = H; h >= 0; h--)
      blockStarts[h] = (blockStarts[h] - blockStarts[0]) / sizeof(Match); // change to array inds
    matchData = new Match[blockStarts[H]];
    fread(matchData, sizeof(Match), blockStarts[H], finIBD);
    fclose(finIBD);
    cout << "Read IBD data (time = " << timer.update_time() << " sec)" << endl;
  }

  /***** set up transition matrix *****/
  double (*logPtrans)[3][3] = new double[R+1][3][3];
  computeTransitionProbs(logPtrans, binStarts, R);

  /***** run HMM *****/
  vector < vector <CNVcall> > calls(H);
#pragma omp parallel for
  for (int h = 0; h < H; h++)
    if (inclIDs.empty() || inclIDs.count(IDs[h/2]))
      calls[h] = runHMM(logPtrans, binStarts, binSize, bin2probe, cMvec, blockStarts, matchData,
			logBFs, logBFscaleInv, H, R, IBDparam, h, IDs[h/2], isAllowableNbr, chr);

  cout << "Finished HMM computation (time = " << timer.update_time() << " sec)" << endl;

  cout << "tIBD: " << tIBD << endl;
  cout << "tBF:  " << tBF << endl;
  cout << "tHMM: " << tHMM << endl;
  cout << endl;

  delete[] matchData;
  delete[] logBFs;
  delete[] logPtrans;

  /***** annotate calls with CN estimated using RC + expectRCdip *****/
  {
    FileUtils::AutoGzIfstream finRCfileList; finRCfileList.openOrExit(RCexpectRCdipFileList);
    vector <string> RCexpectRCdipFiles;
    string file;
    while (finRCfileList >> file)
      RCexpectRCdipFiles.push_back(file);
    finRCfileList.close();
    vector <bool> foundRCdata(N);
#pragma omp parallel for schedule(dynamic)
    for (uint f = 0; f < RCexpectRCdipFiles.size(); f++) {
      FILE *fin = fopen(RCexpectRCdipFiles[f].c_str(), "rb"); assert(fin != NULL);
      int ID;
      ushort RC;
      float expectRCdip;
      size_t bytesCell = sizeof(RC)+sizeof(expectRCdip);
      while (fread(&ID, sizeof(ID), 1, fin)) {
	size_t byteOffset = ftell(fin);
	if (IDtoInd.find(ID) != IDtoInd.end()) {
	  int iPhased = IDtoInd[ID];
	  foundRCdata[iPhased] = true;
	  for (int h = 2*iPhased; h < 2*iPhased+2; h++)
	    for (uint c = 0; c < calls[h].size(); c++) {
	      int sumRC = 0;
	      float sumExpectRCdip = 0;
	      int numCommonSVbins = 0;
	      fseek(fin, byteOffset + calls[h][c].rStart * bytesCell, SEEK_SET);
	      for (int r = calls[h][c].rStart; r <= calls[h][c].rEnd; r++) {
		fread(&RC, sizeof(RC), 1, fin);
		fread(&expectRCdip, sizeof(expectRCdip), 1, fin);
		sumRC += RC;
		sumExpectRCdip += expectRCdip;
		numCommonSVbins += isCommonSVbin[r];
	      }
	      calls[h][c].CNest = 2 * sumRC / sumExpectRCdip;
	      calls[h][c].fracCommonSVbins =
		numCommonSVbins / (float) (calls[h][c].rEnd - calls[h][c].rStart + 1);
	    }
	}
	fseek(fin, byteOffset + R*bytesCell, SEEK_SET);
      }
    }
    // set CNest to missing and compute fracCommonSVbins for individuals without WES data
#pragma omp parallel for
    for (int h = 0; h < H; h++)
      if (!foundRCdata[h/2])
	for (uint c = 0; c < calls[h].size(); c++) {
	  int numCommonSVbins = 0;
	  for (int r = calls[h][c].rStart; r <= calls[h][c].rEnd; r++)
	    numCommonSVbins += isCommonSVbin[r];
	  calls[h][c].CNest = NAN;
	  calls[h][c].fracCommonSVbins =
	    numCommonSVbins / (float) (calls[h][c].rEnd - calls[h][c].rStart + 1);
	}
  }
  cout << "Annotated calls with CN estimates from RC + expectRC data (time = "
       << timer.update_time() << " sec)" << endl;

  /***** write output *****/

  FileUtils::AutoGzOfstream fout; fout.openOrExit(outFile);
  fout << "ID    HAP TYPE    LOD NBRS LEN_KB CHR  START38     END38  BINS LOD_NBR STARTB ENDB    CN FRAC_COMMON_BINS" << endl;
  fout << fixed << setprecision(2);
  for (int h = 0; h < H; h++)
    if (inclIDs.empty() || inclIDs.count(IDs[h/2]))
      for (uint c = 0; c < calls[h].size(); c++) {
	char buf[10]; sprintf(buf, "%.1f", calls[h][c].fracCommonSVbins);
	if (buf[2]=='0') buf[1]='\0';
	fout << calls[h][c].outStr << "  " << calls[h][c].CNest << " " << buf << endl;;
      }
  fout.close();
  
  cout << "Successfully completed cnvCallWES run for chr" << chr << " IBDparam " << IBDparam
       << endl;

  return 0;
}
