/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include "modules/mod_mer.H"
#include "util/fmttext.H"
#include "util/fileanddisk.H"

#include <getopt.h>
#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>

#include "version.H"


using namespace std;




void MiraMer::usage()
{
//hdiIpPrvb:f:t:o:a:k:L:n:
  cout << "miramer\t(MIRALIB version " << MIRALIBVERSION << ")\n";
  cout << "Author: Bastien Chevreux\t(bach@chevreux.org)\n\n";
  cout << "...\n";
}


/*
void MiraMer::merCreateHashStats(int argc, char ** argv)
{
  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  string loadfn(argv[optind++]);

  vector<MIRAParameters> Pv;
  MIRAParameters::setupStdMIRAParameters(Pv);

  auto rgid = ReadGroupLib::getReadGroupID(0);

  NHashStatistics nhs;
  nhs.setupNewAnalysis(32,4,MER_basesperhash,MER_numlearnsteps);
  {
    uint8 ziptype=0;
    string ft,pathto,stem;
    guessFileAndZipType(loadfn,pathto,stem,ft,ziptype);

    ReadPool rp1;
    {
      ReadPoolIO rpio(rp1);
      rpio.registerFile(
	"fastq",
	loadfn,
	"",
	rgid,
	false);
      rpio.loadNextSeqs(-1);
    }

    nhs.analyseReadPool(rp1);
    nhs.dumpHealth(cout);
    nhs.deleteBloomFilter();
    nhs.saveHashStatistics(stem+".mhs",true);
  }

}
*/

void MiraMer::merCreateHashStats(int argc, char ** argv)
{
  FUNCSTART("void MiraMer::merCreateHashStats(int argc, char ** argv)");

  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  list<string> loadfn;
  for(;optind<argc;++optind){
    loadfn.push_back(argv[optind]);
  }

  vector<MIRAParameters> Pv;
  MIRAParameters::setupStdMIRAParameters(Pv);

  auto rgid=ReadGroupLib::newReadGroup();
  rgid.setSequencingType(ReadGroupLib::SEQTYPE_TEXT);

  cout << "Loading data into memory ...";
  ReadPool loadrp;
  ReadPoolIO rpio(loadrp);
  rpio.setAttributeFASTAQualFileWanted(false); // in case we load FASTAs

  for(auto & dfn : loadfn){
    uint8 ziptype=0;
    string ft;
    string dummyfromstem;
    string dummypathto;
    guessFileAndZipType(dfn,dummypathto,dummyfromstem,ft,ziptype);

    rpio.registerFile(ft,dfn,"",rgid,false);
    rpio.loadNextSeqs(-1,-1);
  }

  string resultfn("miramer.mhs.gz");
  bool fwdandrev=true;
  cout << "MER_basesperhash " << MER_basesperhash << endl;
  auto bytes=HashStatistics<vhash64_t>::byteSizeOfHash(MER_basesperhash);
  if(bytes==8){
    HashStatistics<vhash64_t> hs;
    hs.prepareHashStatistics(loadrp,false,false,fwdandrev,1,MER_rarekmerearlykill,MER_basesperhash,
			     Pv[0].getHashStatisticsParams().hs_million_hashes_per_buffer,
			     resultfn,".");
  }else if(bytes==16){
    HashStatistics<vhash128_t> hs;
    hs.prepareHashStatistics(loadrp,false,false,fwdandrev,1,MER_rarekmerearlykill,MER_basesperhash,
			     Pv[0].getHashStatisticsParams().hs_million_hashes_per_buffer,
			     resultfn,".");
  }else if(bytes==32){
    HashStatistics<vhash256_t> hs;
    hs.prepareHashStatistics(loadrp,false,false,fwdandrev,1,MER_rarekmerearlykill,MER_basesperhash,
			     Pv[0].getHashStatisticsParams().hs_million_hashes_per_buffer,
			     resultfn,".");
  }else if(bytes==64){
    HashStatistics<vhash512_t> hs;
    hs.prepareHashStatistics(loadrp,false,false,fwdandrev,1,MER_rarekmerearlykill,MER_basesperhash,
			     Pv[0].getHashStatisticsParams().hs_million_hashes_per_buffer,
			     resultfn,".");
  }else{
    MIRANOTIFY(true,"Kmer size " << MER_basesperhash << " with " << bytes << " bytes are not expected here.\n");
  }
}


void MiraMer::merInfoHashStats(int argc, char ** argv)
{
  FUNCSTART("void MiraMer::merInfoHashStats(int argc, char ** argv)");


  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  list<string> loadfn;
  for(;optind<argc;++optind){
    loadfn.push_back(argv[optind]);

    string hfn(argv[optind]);
    cout << "File " << hfn << ":";
    try {
      auto mhs=HashStatistics<vhash64_t>::loadHashStatisticsFileHeader(hfn);
      cout << "\n  File format version:\t" << static_cast<uint16>(mhs.version)
	   << "\n  Kmer length:\t" << mhs.basesperhash
	   << "\n  Kmer bytes:\t" << mhs.sizeofhash
	   << "\n  Num. kmers:\t" << mhs.numelem
	   << "\n  Sort status:\t" << static_cast<uint16>(mhs.sortstatus)
	   << endl;
    }
    catch(Notify n){
      cout << " not readable or not a mhs file.\n";
    }
  }

}




void MiraMer::merSortHashStats(int argc, char ** argv)
{
  if(argc-optind < 2) {
    cerr << argv[0] << ": " << "Missing name of at least one file.\n";
    exit(1);
  }

//  string loadfn(argv[optind]);
//  NHashStatistics nhs;
//  nhs.loadHashStatistics(loadfn);
//  nhs.sortLow24Bit();
//  nhs.saveHashStatistics(loadfn+".sorted",true);
}

void MiraMer::merDiffHashStats(int argc, char ** argv)
{
  if(argc-optind < 2) {
    cerr << argv[0] << ": " << "Missing name of at least one file.\n";
    exit(1);
  }

  string fn1(argv[optind++]);
  string fn2(argv[optind++]);

  boost::filesystem::path fp(fn1);
  string nameseta(fp.stem().string());
  fp=fn2;
  string namesetb(fp.stem().string());

  cout << nameseta << " " << namesetb << endl;

  int32 trimfr=4;
  int32 trimtot=10;

  HashStatistics<vhash512_t> hs1;
  cout << "load " << fn1 << endl;
  dateStamp(cout);
  hs1.loadHashStatistics(fn1);
  dateStamp(cout);
  cout << "trim " << nameseta << endl;
  hs1.trimHashStatsByFrequencyANDOR(trimfr,trimfr,trimtot);

  HashStatistics<vhash512_t> hs2;
  cout << "load " << fn2 << endl;
  dateStamp(cout);
  hs2.loadHashStatistics(fn2);
  dateStamp(cout);
  cout << "trim " << namesetb << endl;
  hs2.trimHashStatsByFrequencyANDOR(trimfr,trimfr,trimtot);
  dateStamp(cout);

  cout << "creating subhs" << endl;
  HashStatistics<vhash512_t> in_a_not_b;
  in_a_not_b.inANotB(hs1,hs2);
  in_a_not_b.sortByCountDown();
  dateStamp(cout);
  {
    string outname("in_"+nameseta+"_notin_"+namesetb+".fasta");
    cout << "Saving hashes to FASTA file " << outname << endl;
    ofstream fout(outname);
    in_a_not_b.dumpAsFASTA(fout);
  }
  {
    string outname("in_"+nameseta+"_notin_"+namesetb+".txt");
    cout << "Saving hashes to text file " << outname << endl;
    ofstream fout(outname);
    in_a_not_b.dump(fout);
  }
  dateStamp(cout);

  HashStatistics<vhash512_t> in_b_not_a;
  in_b_not_a.inANotB(hs2,hs1);
  in_b_not_a.sortByCountDown();
  {
    string outname("in_"+namesetb+"_notin_"+nameseta+".fasta");
    cout << "Saving hashes to FASTA file " << outname << endl;
    ofstream fout(outname);
    in_b_not_a.dumpAsFASTA(fout);
  }
  {
    string outname("in_"+namesetb+"_notin_"+nameseta+".txt");
    cout << "Saving hashes to text file " << outname << endl;
    ofstream fout(outname);
    in_b_not_a.dump(fout);
  }
  dateStamp(cout);

}

void MiraMer::merDumpHashStats(int argc, char ** argv)
{
  FUNCSTART("void MiraMer::merDumpHashStats(int argc, char ** argv)");

  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  string loadfn(argv[optind]);
  auto bytes=HashStatistics<vhash64_t>::loadHashStatisticsFileHeader(loadfn).sizeofhash;
  if(bytes==8){
    HashStatistics<vhash64_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dump(cout);
  }else if(bytes==16){
    HashStatistics<vhash128_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dump(cout);
  }else if(bytes==32){
    HashStatistics<vhash256_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dump(cout);
  }else if(bytes==64){
    HashStatistics<vhash512_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dump(cout);
  }else{
    MIRANOTIFY(true,"Kmer size " << MER_basesperhash << " with " << bytes << " bytes are not expected here.\n");
  }
}

void MiraMer::merDumpDebug(int argc, char ** argv)
{
  FUNCSTART("void MiraMer::merDumpDebug(int argc, char ** argv)");

  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  string loadfn(argv[optind]);
  auto bytes=HashStatistics<vhash64_t>::loadHashStatisticsFileHeader(loadfn).sizeofhash;
  if(bytes==8){
    HashStatistics<vhash64_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dumpHSDebug(cout);
  }else if(bytes==16){
    HashStatistics<vhash128_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dumpHSDebug(cout);
  }else if(bytes==32){
    HashStatistics<vhash256_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dumpHSDebug(cout);
  }else if(bytes==64){
    HashStatistics<vhash512_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.dumpHSDebug(cout);
  }else{
    MIRANOTIFY(true,"Kmer size " << MER_basesperhash << " with " << bytes << " bytes are not expected here.\n");
  }
}

void MiraMer::merDumpHashDistrib(int argc, char ** argv)
{
  FUNCSTART("void MiraMer::merDumpHashDistrib(int argc, char ** argv)");
  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  string loadfn(argv[optind]);
  auto bytes=HashStatistics<vhash64_t>::loadHashStatisticsFileHeader(loadfn).sizeofhash;
  if(bytes==8){
    HashStatistics<vhash64_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.showHashStatisticsInfo();
  }else if(bytes==16){
    HashStatistics<vhash128_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.showHashStatisticsInfo();
  }else if(bytes==32){
    HashStatistics<vhash256_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.showHashStatisticsInfo();
  }else if(bytes==64){
    HashStatistics<vhash512_t> hs;
    hs.loadHashStatistics(loadfn);
    hs.showHashStatisticsInfo();
  }else{
    MIRANOTIFY(true,"Kmer size " << MER_basesperhash << " with " << bytes << " bytes are not expected here.\n");
  }
}

void MiraMer::merDeltaTest(int argc, char ** argv)
{
  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing name of input.\n";
    exit(1);
  }

  cerr << argv[0] << ": " << "merDeltaTest() Currently de-activated.\n";
  exit(1);
/*
  TODO
  De-activated until KMer gets a - operator

  string loadfn(argv[optind]);
  HashStatistics<vhash512_t> hs;
  hs.loadHashStatistics(loadfn);
  hs.sortLexicographically();
  auto & hsd=hs.getHashStats();
  if(!hsd.empty()){
    uint64 oldvh=0;
    for(auto & hsde : hsd){
      //cout << oldvh << " " << hsde.vhash << "\t->\t";
      uint64 newvh=hsde.vhash-oldvh;
      oldvh=hsde.vhash;
      hsde.vhash=newvh;
      //cout << oldvh << " " << newvh << endl;
    }
    hs.sortLexicographically();
    hs.saveHashStatistics(loadfn+".delta.sorted",true);
  }
*/
}


int MiraMer::mainMiraMer(int argc, char ** argv)
{
  // that loop is straight from the GNU getopt_long example
  // http://www.gnu.org/s/hello/manual/libc/Getopt-Long-Option-Example.html
  while (1){
    static struct option mlong_options[] =
      {
	{"help",  no_argument,           0, 'h'},
	{"job", required_argument,         0, 'j'},
	{"kmersize", required_argument,         0, 'k'},
	{"version", no_argument,         0, 'v'},
	{0, 0, 0, 0}
      };
    /* getopt_long stores the option index here. */
    int option_index = 0;

    int c = getopt_long (argc, argv, "hc:j:k:v",
		     mlong_options, &option_index);

    if (c == -1) break;

    switch (c) {
    case 'h':
      cout << "mira\t\tMIRALIB version " << MIRAVERSION << "\n"
	"Author:\t\tBastien Chevreux (bach@chevreux.org)\n"
	"Purpose:\thandle k-mer statistics of a data set\n\n";

      cout << "Usage:\n"
	"miramer ...\n";
      cout << "\nOptions:\n";
      cout <<
	"  -j / --job\t\t\t\tJob type. Currently:\n"
	"            \t\t\t\tcreate (default)\n"
	"            \t\t\t\tinfo\n"
	"            \t\t\t\tsort\n"
	"            \t\t\t\tdiff\n"
	"            \t\t\t\tdumpcounts\n"
	"            \t\t\t\tdebug\n"
	"            \t\t\t\tdtest\n"
	"            \t\t\t\tdumpdistrib\n"
	"  -h / --help\t\t\t\tPrint short help and exit\n"
	"  -v / --version\t\t\tPrint version and exit\n"
	;
      exit(0);
    case 'j': {
      MER_job=optarg;
      boost::to_lower(MER_job);
      break;
    }
    case 'k': {
      uint64 bla=atoi(optarg);
      //if(bla>32) bla=32;
      MER_basesperhash=bla;
      break;
    }
    case 'c': {
      uint64 bla=atoi(optarg);
      //if(bla>32) bla=32;
      MER_rarekmerearlykill=bla;
      break;
    }
    case 'v':
      cout << MIRAVERSION << endl;
      exit(0);
    default:
      abort();
    }
  }

  if(MER_basesperhash==0) MER_basesperhash=32;
  if(MER_basesperhash>256){
    cout << "Sorry, -k for kmer size must be <= 256 for the time being.\n";
    exit(100);
  }

  try {
    if(MER_job=="create"){
      merCreateHashStats(argc,argv);
    }else if(MER_job=="info"){
      merInfoHashStats(argc,argv);
    }else if(MER_job=="sort"){
      merSortHashStats(argc,argv);
    }else if(MER_job=="diff"){
      merDiffHashStats(argc,argv);
    }else if(MER_job=="dumpcounts"){
      merDumpHashStats(argc,argv);
    }else if(MER_job=="debug"){
      merDumpDebug(argc,argv);
    }else if(MER_job=="dtest"){
      merDeltaTest(argc,argv);
    }else if(MER_job=="dumpdistrib"){
      merDumpHashDistrib(argc,argv);
    }else{
      cout << argv[0] << ": unknown job '" << MER_job << "'???" << endl;
      exit(1);
    }
  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }
  catch(...){
    cerr << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    abort();
  }

  FUNCEND();
  return 0;
}
