#include <vector>
#include <string>
#include <iostream>
#include <iomanip>

#include <boost/program_options.hpp>

#include "FileUtils.cpp"
#include "StringUtils.cpp"
#include "MapInterpolater.cpp"

using namespace std;

struct SnpInfo {
  int chrom;
  std::string ID;
  double genpos; // Morgans
  int physpos;
  std::string allele1, allele2;
};

int chrStrToInt(string chrom, int Nauto) {
  if (chrom.substr(0, 3) == "chr")
    chrom = chrom.substr(3);
  if (isdigit(chrom[0])) {
    int chr = atoi(chrom.c_str());
    if (chr>=1 && chr<=Nauto+1) return chr;
    return -1;
  }
  if (chrom == "X" || chrom == "XY" || chrom == "PAR1" || chrom == "PAR2") return Nauto+1;
  return -1;
}

vector <SnpInfo> readBimFile(const string &bimFile, int Nauto) {
  vector <SnpInfo> ret;
  string line;
  FileUtils::AutoGzIfstream fin; fin.openOrExit(bimFile);
  int numOutOfOrder = 0;
  while (getline(fin, line)) {
    std::istringstream iss(line);
    SnpInfo snp; string chrom_str;
    if (!(iss >> chrom_str >> snp.ID >> snp.genpos >> snp.physpos >> snp.allele1 >> snp.allele2))
      {
	cerr << "ERROR: Incorrectly formatted bim file: " << bimFile << endl;
	cerr << "Line " << ret.size()+1 << ":" << endl;
	cerr << line << endl;
	cerr << "Unable to input 6 values (2 string, 1 double, 1 int, 2 string)" << endl;
	exit(1);
      }
    snp.chrom = chrStrToInt(chrom_str, Nauto);
    if (snp.chrom == -1) {
      cerr << "ERROR: Unknown chromosome code in bim file: " << bimFile << endl;
      cerr << "Line " << ret.size()+1 << ":" << endl;
      cerr << line << endl;
      exit(1);
    }
    if (!ret.empty() &&
	(snp.chrom < ret.back().chrom ||
	 (snp.chrom == ret.back().chrom && (snp.physpos <= ret.back().physpos ||
					    snp.genpos < ret.back().genpos)))) {
      if (numOutOfOrder < 5) {
	cerr << "WARNING: Out-of-order snp in bim file: " << bimFile << endl;
	cerr << "Line " << ret.size()+1 << ":" << endl;
	cerr << line << endl;
      }
      numOutOfOrder++;
      //exit(1);
    }
    ret.push_back(snp);
  }
  if (numOutOfOrder)
    cerr << "WARNING: Total number of out-of-order snps in bim file: " << numOutOfOrder << endl;
  fin.close();
  return ret;
}


class InterpolateBimParams {
  
public:
  string bimFile;  
  string outFile;
  string geneticMapFile;

  // populates members; error-checks
  bool processCommandLineArgs(int argc, char *argv[]) {

    namespace po = boost::program_options;
    po::options_description desc("Usage");
    desc.add_options()
      ("bim", po::value<string>(&bimFile)->required(), "input bim file")
      ("out", po::value<string>(&outFile)->required(), "output bim file")
      ("geneticMapFile", po::value<string>(&geneticMapFile)->required(),
       "map file for filling in genetic map coordinates: chr pos rate(cM/Mb) map(cM)")
      ("help,h", "print help message")
      ;
    po::options_description all("All");
    all.add(desc);
    all.add_options()
      ("bad-args", po::value< vector <string> >(), "bad args")
      ;
    po::positional_options_description positional_desc;
    positional_desc.add("bad-args", -1); // for error-checking command line
    
    po::variables_map vm;
    po::command_line_parser cmd_line(argc, argv);
    cmd_line.options(all);
    cmd_line.positional(positional_desc);
    try {
      po::store(cmd_line.run(), vm);

      if (vm.count("help")) {
	cout << desc << endl;
	exit(0);
      }
      
      po::notify(vm); // throws an error if there are any problems

      if (vm.count("bad-args")) {
	cerr << "ERROR: Unknown options:";
	vector <string> bad_args = vm["bad-args"].as< vector <string> >();
	for (uint i = 0; i < bad_args.size(); i++) cerr << " " << bad_args[i];
	cerr << endl;
	return false;
      }

    }
    catch (po::error &e) {
      cerr << "ERROR: " << e.what() << endl << endl;
      cerr << desc << endl;
      return false;
    }

    FileUtils::requireReadable(bimFile);
    FileUtils::requireReadable(geneticMapFile);
    FileUtils::requireWriteable(outFile);

    return true;
  }
};

int main(int argc, char *argv[]) {

  InterpolateBimParams params;
  if (!params.processCommandLineArgs(argc, argv)) {
    cerr << "Aborting due to error processing command line arguments" << endl;
    cerr << "For list of arguments, run with -h (--help) option" << endl;
    exit(1);
  }

  cout << "Using map: " << params.geneticMapFile << endl;
  MapInterpolater mapInterpolater(params.geneticMapFile);

  cout << "Reading bim file: " << params.bimFile << endl;
  int Nautosomes = 22;
  vector <SnpInfo> snps = readBimFile(params.bimFile, Nautosomes);
  for (uint m = 0; m < snps.size(); m++)
    snps[m].genpos = mapInterpolater.interp(snps[m].chrom, snps[m].physpos);
  
  cout << "Writing bim file: " << params.outFile << endl;
  FileUtils::AutoGzOfstream fout; fout.openOrExit(params.outFile);
  fout << std::fixed << std::setprecision(8);
  for (uint m = 0; m < snps.size(); m++)
    fout << snps[m].chrom << "\t" << snps[m].ID << "\t" << snps[m].genpos << "\t"
	 << snps[m].physpos << "\t" << snps[m].allele1 << "\t" << snps[m].allele2 << endl;
  fout.close();
  
}
