#include <iostream>
#include <fstream>
#include <string>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cmath>

#include "VersionHeader.hpp"
#include "FileUtils.cpp"
#include "StringUtils.cpp"
#include "Timer.cpp"

#define require(test, errorStr) if (!(test)) { cout << "ERROR: " << (errorStr) << endl; exit(1); }

using namespace std;

const char NAN_CHAR = -128;
const int bedToGeno[4] = {2, 3, 1, 0};

char charCrop(float x) {
  return (char) max(-127, min(127, (int) floor(x+0.5)));
}

void readBedLine(uchar *bedLineIn, FILE *finBed, uint64 M, uint64 N, uint64 m) {
  fseek(finBed, 3 + m*((N+3)>>2), SEEK_SET);
  fread(bedLineIn, 1, (N+3)>>2, finBed);  
}

struct GenoInfo {
  char lrr;
  char theta;
  unsigned char geno: 2;
  unsigned char conf: 6;
};

FILE *openCheckBed(const string &bedFile, uint64 M, uint64 N) {
  FILE *finBed = fopen(bedFile.c_str(), "rb");
  require(finBed != NULL, "unable to open " + bedFile);
  uint64 bedBytesExpected = M * ((N+3)>>2) + 3;
  fseek(finBed, 0, SEEK_END);
  require((uint64) ftell(finBed) == bedBytesExpected,
	  "incorrect number of bytes in bed file: expected " + StringUtils::itos(M) + "*"
	  + StringUtils::itos((N+3)>>2) + "+3");
  fseek(finBed, 0, SEEK_SET);
  uchar header[3];
  fread(header, 1, 3, finBed);
  require(header[0]==0x6c && header[1]==0x1b && header[2]==0x01,
	  "incorrect first three bytes of bed file");
  return finBed;
}

int main(int argc, char *argv[]) {

  printVersion();

  cout << "merge_lrr_theta_geno:" << endl;
  cout << "- arg1 = $BED_BIM_FAM_PREFIX" << endl;
  cout << "- arg2 = $LRR_DENOISED_FILE" << endl;
  cout << "- arg3 = $THETA_FILE" << endl;
  cout << "- arg4 = $LRR_THETA_GENO_PREFIX.bin (output file)" << endl;
  cout << endl;

  printCmd(argc, argv);

  if (argc != 5) {
    cout << "ERROR: 4 arguments required" << endl;
    exit(1);
  }

  const char *plinkPrefix = argv[1];
  const char *lrrBinFile = checkInputFileExt(argv, 2, ".bin");
  const char *thetaFile = argv[3];
  const char *outFile = argv[4];
  
  FileUtils::requireReadable(plinkPrefix + string(".bed"));
  FileUtils::requireReadable(plinkPrefix + string(".bim"));
  FileUtils::requireReadable(plinkPrefix + string(".fam"));
  FileUtils::requireReadable(lrrBinFile);
  FileUtils::requireReadable(thetaFile);
  FileUtils::requireWriteable(outFile);

  Timer timer; double t0 = timer.get_time();

  cout << "Merging LRR (PC-denoised) + theta + genotype data" << endl;
  uint64 N = FileUtils::AutoGzIfstream::lineCount(plinkPrefix + string(".fam"));
  cout << "Number of individuals: " << N << endl;
  uint64 M = FileUtils::AutoGzIfstream::lineCount(plinkPrefix + string(".bim"));
  cout << "Number of variants: " << M << endl;

  // open bed file
  cout << "Opening plink bed file: " << plinkPrefix + string(".bed") << endl;
  FILE *finBed = openCheckBed(plinkPrefix + string(".bed"), M, N);

  // open LRR file (PC-denoised; 1-byte binary format)
  cout << "Opening denoised LRR file: " << lrrBinFile << endl;
  FILE *finLRR = fopen(lrrBinFile, "rb");
  require(finLRR != NULL, "unable to open " + string(lrrBinFile));
  fseek(finLRR, 0, SEEK_END);
  require((uint64) ftell(finLRR) == M*N,
	  "incorrect number of bytes in LRR file: expected " + StringUtils::itos(M) + "*"
	  + StringUtils::itos(N));
  fseek(finLRR, 0, SEEK_SET);
  
  // open theta file and output file
  cout << "Opening theta file: " << thetaFile << endl;
  FileUtils::AutoGzIfstream finTheta; finTheta.openOrExit(thetaFile);
  FILE *fout = fopen(outFile, "wb");
  require(fout != NULL, "unable to write to " + string(outFile));

  // stream all three input files and write merged output
  uchar *bedLineIn = (uchar *) malloc((N+3)>>2); assert(bedLineIn != NULL);
  string thetaStr;
  for (uint64 m = 0; m < M; m++) {
    if ((m+1)%100 == 0) cout << "." << flush;
    readBedLine(bedLineIn, finBed, M, N, m);
    for (uint64 n = 0; n < N; n++) {
      GenoInfo gi;
      // set LRR (PC-denoised)
      fread(&gi.lrr, 1, 1, finLRR);

      // set theta
      assert(finTheta >> thetaStr);
      float theta = NAN;
      assert((sscanf(thetaStr.c_str(), "%f", &theta) && theta >= 0 && theta <= 1)
	     || thetaStr=="NA" || thetaStr=="nan");
      gi.theta = isnan(theta) ? NAN_CHAR : charCrop(127*(2*theta-1));

      // set genotype + confidence
      gi.geno = bedToGeno[(bedLineIn[n>>2]>>((n&3)<<1))&3];
      gi.conf = gi.geno == 3 ? 0 : 63; // set genotyping confidence to 0/1 according to missingness

      // write merged GenoInfo
      assert(fwrite(&gi, sizeof(GenoInfo), 1, fout) == 1U);
    }
  }
  assert(!(finTheta >> thetaStr));
  cout << endl;

  free(bedLineIn);
  fclose(finBed);
  fclose(finLRR);
  finTheta.close();

  cout << "Finished merge_lrr_theta_geno; total time = " << timer.get_time()-t0 << " sec" << endl;

  return 0;
}
