void 
affx::TsvFileDiff::open_residuals(const std::string& filename,
                                  affx::TsvFile& tsv1,
                                  affx::TsvFile& tsv2)
{
  // no file
  if (filename=="") {
    return;
  }
  //
  m_residuals_tsv=new affx::TsvFile();
  //
  for (int clvl=0;clvl<tsv1.getLevelCount();clvl++) {
    for (int cidx=0;cidx<tsv1.getColumnCount(clvl);cidx++) {
      std::string cname=tsv1.getColumnName(clvl,cidx); // +"_res";
      m_residuals_tsv->defineColumn(clvl,cidx,cname);
    }
  }
  //
  m_residuals_tsv->addHeader("apt-file-type","tsv-diff-residuals");
  m_residuals_tsv->addHeader("tsv-diff-input-file1",tsv1.getFileName());
  m_residuals_tsv->addHeader("tsv-diff-input-file2",tsv2.getFileName());
  m_residuals_tsv->addHeader("file-guid",affxutil::Guid::GenerateNewGuid());
  //
  if (m_residuals_tsv->getLevelCount()==1) {
    m_residuals_tsv->writeTsv_v1(filename);
  } else {
    m_residuals_tsv->writeTsv_v2(filename);
  }
}
static void WriteHeader(MetabolizerPhenotypingEngine *eng, affx::TsvFile &tsv, int idx, std::ofstream &str)
{
	// Output the header
	str << "# For research use only. Not for diagnostic purposes." << std::endl;
	str << "#%report-guid=" << affxutil::Guid::GenerateNewGuid() << std::endl;
	str << "#%Program=" << MET_PROGRAM_NAME << std::endl;
	str << "#%Version=" << MET_PROGRAM_VERSION << std::endl;
	str << "#%Date=" << Util::getTimeStamp() << std::endl;
	str << "#%MetabolizerFile=" << eng->getOpt("metabolizer-file") << std::endl;
	str << "#%AlleleFile=" << eng->getOpt("allele-file") << std::endl;
	for (int i=0; i<(int)eng->PhenotypeCallDescs().size(); i++)
		str << "#%PhenotypeCallDesc=" << eng->PhenotypeCallDescs()[i] << std::endl;
	for (int i=0; i<(int)eng->UserInformation().size(); i++)
		str << "#%Info=" << eng->UserInformation()[i] << std::endl;
	str << "Index" << "\t"
		<< "CHP File" << "\t"
		<< "Gene" << "\t"
		<< "Phenotype Call" << "\t"
		<< "Gene Activity" << "\t"
		<< "Known Call" << "\t"
		<< "Unknown Call" << "\t"
		<< "Interpretation Code";
	int n = tsv.getColumnCount(0);
	for (int i=idx; i<n; i++)
	{
		std::string name = tsv.getColumnName(0, i);
		str << "\t" << name;
	}
	str << std::endl;
}
void
affx::TsvFileDiff::p_diff_1line(affx::TsvFile& tsv1, affx::TsvFile& tsv2)
{
  std::string val1,val2;
  int tsv1_clvl=tsv1.lineLevel();
  int tsv1_cidx_max=tsv1.getColumnCount(tsv1_clvl);

  //
  p_linenums(tsv1,tsv2);
  printf("!");
  //
  for (int cidx=0;cidx<tsv1_cidx_max;cidx++) {
    if (cidx!=0) {
      printf("\t");
    }
    printf("\t");
    tsv1.get(tsv1_clvl,cidx,val1);
    tsv2.get(tsv1_clvl,cidx,val2);
    if (val1==val2) {
      printf("%s",val1.c_str());
    }
    else {
      printf("'%s'/'%s'",val1.c_str(),val2.c_str());
    }
  }
  printf("\n");
}
/**
 * Check whether a clf file is sequential.
 *
 * @param clfTsv Clf file tsv object.
 * @return bool Returns true if the file is sequential, else false.
 */
bool SequentialClfFile::isSequential(affx::TsvFile& clfTsv)
{
  std::string rowsValue, colsValue, sequentialValue;
  clfTsv.headersFindNext("rows", rowsValue);
  clfTsv.headersFindNext("cols", colsValue);
  clfTsv.headersFindNext("sequential", sequentialValue);
  // Require rows, cols, and a sequential value of 1.
  if (rowsValue.empty() || colsValue.empty() || sequentialValue != "1")
    return false;
  // Save rows, cols.
  m_Rows = Convert::toInt(rowsValue);
  m_Cols = Convert::toInt(colsValue);
  return true;
}
static int FindStartingAttributeColumnIndex(affx::TsvFile &tsv)
{
	int n = tsv.getColumnCount(0);
	int idx = n;
	for (int i=0; i<n; i++)
	{
		std::string name = tsv.getColumnName(0, i);
		if (name == "Override Comment")
		{
			idx = i+1;
			break;
		}
	}
	return idx;
}
int
affx::TsvFileDiff::diffHeaders(affx::TsvFile& tsv1, affx::TsvFile& tsv2)
{
  int start_diff_cnt=m_diff_cnt;
  std::string key1,val1,key2,val2;

  // do the diff in three passes.

  // 1 = changed values
  tsv1.headersBegin();
  while (tsv1.headersNext(key1,val1)==affx::TSV_OK) {
    if (tsv2.getHeader(key1,val2)==affx::TSV_OK) {
      if (val1==val2) {
        if (m_opt_print_same) {
          if (p_inc()) {
            printf(" #%%%s=%s\n",key1.c_str(),val1.c_str());
          }
        }
      }
      else { // !=
        m_diff_cnt++;
        if (p_inc()) {
          printf("-#%%%s=%s\n",key1.c_str(),val1.c_str());
          printf("+#%%%s=%s\n",key1.c_str(),val2.c_str());
        }
      }
    }
  }

  // 2 = headers in tsv1 not in tsv2
  tsv1.headersBegin();
  while (tsv1.headersNext(key1,val1)==affx::TSV_OK) {
    if (tsv2.getHeader(key1,val2)!=affx::TSV_OK) {
      m_diff_cnt++;
      if (p_inc()) {
        printf("-#%%%s=%s\n",key1.c_str(),val1.c_str());
      }
    }
  }
  // 3 = headers in tsv2 not in tsv1
  tsv2.headersBegin();
  while (tsv2.headersNext(key2,val2)==affx::TSV_OK) {
    if (tsv1.getHeader(key2,val1)!=affx::TSV_OK) {
      m_diff_cnt++;
      if (p_inc()) {
        printf("+#%%%s=%s\n",key2.c_str(),val2.c_str());
      }
    }
  }
  //
  return m_diff_cnt-start_diff_cnt;
}
void affx::TsvFileDiff::p_line(affx::TsvFile& tsv) {
  int clvl=tsv.lineLevel();
  int cidx_max=tsv.getColumnCount(clvl);
  std::string val;
  //
  for (int i=0;i<clvl;i++) {
    printf("\t");
  }
  //
  for (int cidx=0;cidx<cidx_max;cidx++) {
    if (cidx!=0) {
      printf("\t");
    }
    printf("\t");
    tsv.get(clvl,cidx,val);
    printf("%s",val.c_str());
  }
  printf("\n");
}
static void OpenAlleleFile(MetabolizerPhenotypingEngine *eng, affx::TsvFile &tsv, MetabolizerResultType &result, int &sampleIdx, int &ncols, std::vector<std::string> &sampleCols)
{
	std::string translationTableFile = eng->getOpt("allele-file");
	tsv.m_optQuoteChar1 = 0;
	tsv.m_optQuoteChar2 = 0;
	if (tsv.open(translationTableFile) != affx::TSV_OK)
	{
		std::string err = "Unable to open the metabolizer bin file.";
		Err::errAbort(err);
	}
	tsv.bind(0, "Index", &result.index, affx::TSV_BIND_REQUIRED);
	tsv.bind(0, "CHP Filename", &result.chpFile, affx::TSV_BIND_REQUIRED);
	tsv.bind(0, "Gene", &result.gene, affx::TSV_BIND_REQUIRED);
	tsv.bind(0, "Known Call", &result.knownCall, affx::TSV_BIND_REQUIRED);
	tsv.bind(0, "Unknown Call", &result.unknownCall, affx::TSV_BIND_REQUIRED);
	tsv.bind(0, "Interpretation Code", &result.code, affx::TSV_BIND_REQUIRED);
	sampleIdx = FindStartingAttributeColumnIndex(tsv);
	ncols = tsv.getColumnCount(0);
	sampleCols.resize(ncols-sampleIdx);
	for (int i=sampleIdx; i<ncols; i++)
		tsv.bind(0, i, &sampleCols[i-sampleIdx], affx::TSV_BIND_REQUIRED);
}
int
affx::TsvFileDiff::diffData(affx::TsvFile& tsv1, affx::TsvFile& tsv2)
{
  int start_diff_cnt=m_diff_cnt;
  std::string val1,val2;
    
  //
  open_residuals(m_residuals_filename,tsv1,tsv2);

  //
  tsv1.rewind();
  tsv2.rewind();

  //
  tsv1.nextLine();
  tsv2.nextLine();

  //
  while ((!tsv1.eof())||(!tsv2.eof())) {
    //
    int tsv1_clvl=tsv1.lineLevel();
    int tsv2_clvl=tsv2.lineLevel();
    //
    while (((tsv1_clvl>tsv2_clvl)||tsv2.eof())&&(!tsv1.eof())) {
      if (p_inc()) {
        p_linenums(tsv1,tsv2);
        printf("-");
        p_line(tsv1);
      }
      m_diff_cnt+=tsv1.getColumnCount(tsv1_clvl);
      tsv1.nextLine();
      tsv1_clvl=tsv1.lineLevel();
    }
    //
    while (((tsv2_clvl>tsv1_clvl)||tsv1.eof())&&(!tsv2.eof())) {
      if (p_inc()) {
        p_linenums(tsv1,tsv2);
        printf("+");
        p_line(tsv2);
      }
      m_diff_cnt+=tsv2.getColumnCount(tsv2_clvl);
      tsv2.nextLine();
      tsv2_clvl=tsv2.lineLevel();
    }
    //
    if ((tsv1_clvl>=0)&&(tsv2_clvl>=0)) {
      int tsv1_cidx_max=tsv1.getColumnCount(tsv1_clvl);
      int tsv2_cidx_max=tsv2.getColumnCount(tsv2_clvl);
      //
      int cidx_max;
      if (tsv1_cidx_max<tsv2_cidx_max) {
        cidx_max=tsv2_cidx_max;
      } else {
        cidx_max=tsv1_cidx_max;
      }
      
      // count diffs on the line
      int line_diff_cnt=0;
      double d1,d2;
      for (int cidx=0;cidx<cidx_max;cidx++) {
        tsv1.get(tsv1_clvl,cidx,val1);
        tsv2.get(tsv2_clvl,cidx,val2);
        // the same?
        if (val1==val2) {
          // record the base value
          if (m_residuals_tsv!=NULL) {
            // numeric?
            if (tsv1.get(tsv1_clvl,cidx,d1)==affx::TSV_OK) {
              // put a zero
              m_residuals_tsv->set(tsv1_clvl,cidx,"0");
            } else {
              // put the value
              m_residuals_tsv->set(tsv1_clvl,cidx,val1);
            }
          }
        }
        // Not string equal -- check for numeric diff
        else if ((tsv1.get(tsv1_clvl,cidx,d1)==affx::TSV_OK) &&
                 (tsv2.get(tsv2_clvl,cidx,d2)==affx::TSV_OK)) {
          double d_diff=d1-d2;
          // output the result...
          if (m_residuals_tsv!=NULL) {
            m_residuals_tsv->set(tsv1_clvl,cidx,d_diff);
          }
          // if small, it isnt a diff
          if (!((m_opt_max_diff>0.0) && (fabs(d_diff)<=m_opt_max_diff))) {
            // too big -- mark it down as a diff.
            m_diff_cnt++;
            line_diff_cnt++;
            //
          }
        }
        // normal text diff
        else {
          if (m_residuals_tsv!=NULL) {
            m_residuals_tsv->set(tsv1_clvl,cidx,"'"+val1+"'/'"+val2+"'");
          }
        }
      }
      
      // output the result line
      if (m_residuals_tsv!=NULL) {
        m_residuals_tsv->writeLevel(tsv1_clvl);
      }

      // print unchanged lines?
      if ((m_opt_print_same || (line_diff_cnt!=0)) &&
          (p_inc())) {
        //
        if (line_diff_cnt==0) {
          p_linenums(tsv1,tsv2);
          printf(" ");
          p_line(tsv1);
        }
        else if (m_opt_print_format==TsvFileDiff::FMT_1) {
          p_diff_1line(tsv1,tsv2);
        } 
        else if (m_opt_print_format==TsvFileDiff::FMT_2) {
          p_diff_2line(tsv1,tsv2);
        }
        else {
          Err::errAbort("Bad format");
        }
      }
    }
    
    //
    tsv1.nextLine();
    tsv2.nextLine();
  }
  
  close_residuals();
  
  return m_diff_cnt-start_diff_cnt;
}
void
affx::TsvFileDiff::p_linenums(affx::TsvFile& tsv1,affx::TsvFile& tsv2)
{
  p_linenums(tsv1.lineNumber(),tsv2.lineNumber());
}