void affx::TsvFileDiff::open_residuals(const std::string& filename, affx::TsvFile& tsv1, affx::TsvFile& tsv2) { // no file if (filename=="") { return; } // m_residuals_tsv=new affx::TsvFile(); // for (int clvl=0;clvl<tsv1.getLevelCount();clvl++) { for (int cidx=0;cidx<tsv1.getColumnCount(clvl);cidx++) { std::string cname=tsv1.getColumnName(clvl,cidx); // +"_res"; m_residuals_tsv->defineColumn(clvl,cidx,cname); } } // m_residuals_tsv->addHeader("apt-file-type","tsv-diff-residuals"); m_residuals_tsv->addHeader("tsv-diff-input-file1",tsv1.getFileName()); m_residuals_tsv->addHeader("tsv-diff-input-file2",tsv2.getFileName()); m_residuals_tsv->addHeader("file-guid",affxutil::Guid::GenerateNewGuid()); // if (m_residuals_tsv->getLevelCount()==1) { m_residuals_tsv->writeTsv_v1(filename); } else { m_residuals_tsv->writeTsv_v2(filename); } }
static void WriteHeader(MetabolizerPhenotypingEngine *eng, affx::TsvFile &tsv, int idx, std::ofstream &str) { // Output the header str << "# For research use only. Not for diagnostic purposes." << std::endl; str << "#%report-guid=" << affxutil::Guid::GenerateNewGuid() << std::endl; str << "#%Program=" << MET_PROGRAM_NAME << std::endl; str << "#%Version=" << MET_PROGRAM_VERSION << std::endl; str << "#%Date=" << Util::getTimeStamp() << std::endl; str << "#%MetabolizerFile=" << eng->getOpt("metabolizer-file") << std::endl; str << "#%AlleleFile=" << eng->getOpt("allele-file") << std::endl; for (int i=0; i<(int)eng->PhenotypeCallDescs().size(); i++) str << "#%PhenotypeCallDesc=" << eng->PhenotypeCallDescs()[i] << std::endl; for (int i=0; i<(int)eng->UserInformation().size(); i++) str << "#%Info=" << eng->UserInformation()[i] << std::endl; str << "Index" << "\t" << "CHP File" << "\t" << "Gene" << "\t" << "Phenotype Call" << "\t" << "Gene Activity" << "\t" << "Known Call" << "\t" << "Unknown Call" << "\t" << "Interpretation Code"; int n = tsv.getColumnCount(0); for (int i=idx; i<n; i++) { std::string name = tsv.getColumnName(0, i); str << "\t" << name; } str << std::endl; }
void affx::TsvFileDiff::p_diff_1line(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { std::string val1,val2; int tsv1_clvl=tsv1.lineLevel(); int tsv1_cidx_max=tsv1.getColumnCount(tsv1_clvl); // p_linenums(tsv1,tsv2); printf("!"); // for (int cidx=0;cidx<tsv1_cidx_max;cidx++) { if (cidx!=0) { printf("\t"); } printf("\t"); tsv1.get(tsv1_clvl,cidx,val1); tsv2.get(tsv1_clvl,cidx,val2); if (val1==val2) { printf("%s",val1.c_str()); } else { printf("'%s'/'%s'",val1.c_str(),val2.c_str()); } } printf("\n"); }
/** * Check whether a clf file is sequential. * * @param clfTsv Clf file tsv object. * @return bool Returns true if the file is sequential, else false. */ bool SequentialClfFile::isSequential(affx::TsvFile& clfTsv) { std::string rowsValue, colsValue, sequentialValue; clfTsv.headersFindNext("rows", rowsValue); clfTsv.headersFindNext("cols", colsValue); clfTsv.headersFindNext("sequential", sequentialValue); // Require rows, cols, and a sequential value of 1. if (rowsValue.empty() || colsValue.empty() || sequentialValue != "1") return false; // Save rows, cols. m_Rows = Convert::toInt(rowsValue); m_Cols = Convert::toInt(colsValue); return true; }
static int FindStartingAttributeColumnIndex(affx::TsvFile &tsv) { int n = tsv.getColumnCount(0); int idx = n; for (int i=0; i<n; i++) { std::string name = tsv.getColumnName(0, i); if (name == "Override Comment") { idx = i+1; break; } } return idx; }
int affx::TsvFileDiff::diffHeaders(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { int start_diff_cnt=m_diff_cnt; std::string key1,val1,key2,val2; // do the diff in three passes. // 1 = changed values tsv1.headersBegin(); while (tsv1.headersNext(key1,val1)==affx::TSV_OK) { if (tsv2.getHeader(key1,val2)==affx::TSV_OK) { if (val1==val2) { if (m_opt_print_same) { if (p_inc()) { printf(" #%%%s=%s\n",key1.c_str(),val1.c_str()); } } } else { // != m_diff_cnt++; if (p_inc()) { printf("-#%%%s=%s\n",key1.c_str(),val1.c_str()); printf("+#%%%s=%s\n",key1.c_str(),val2.c_str()); } } } } // 2 = headers in tsv1 not in tsv2 tsv1.headersBegin(); while (tsv1.headersNext(key1,val1)==affx::TSV_OK) { if (tsv2.getHeader(key1,val2)!=affx::TSV_OK) { m_diff_cnt++; if (p_inc()) { printf("-#%%%s=%s\n",key1.c_str(),val1.c_str()); } } } // 3 = headers in tsv2 not in tsv1 tsv2.headersBegin(); while (tsv2.headersNext(key2,val2)==affx::TSV_OK) { if (tsv1.getHeader(key2,val1)!=affx::TSV_OK) { m_diff_cnt++; if (p_inc()) { printf("+#%%%s=%s\n",key2.c_str(),val2.c_str()); } } } // return m_diff_cnt-start_diff_cnt; }
void affx::TsvFileDiff::p_line(affx::TsvFile& tsv) { int clvl=tsv.lineLevel(); int cidx_max=tsv.getColumnCount(clvl); std::string val; // for (int i=0;i<clvl;i++) { printf("\t"); } // for (int cidx=0;cidx<cidx_max;cidx++) { if (cidx!=0) { printf("\t"); } printf("\t"); tsv.get(clvl,cidx,val); printf("%s",val.c_str()); } printf("\n"); }
static void OpenAlleleFile(MetabolizerPhenotypingEngine *eng, affx::TsvFile &tsv, MetabolizerResultType &result, int &sampleIdx, int &ncols, std::vector<std::string> &sampleCols) { std::string translationTableFile = eng->getOpt("allele-file"); tsv.m_optQuoteChar1 = 0; tsv.m_optQuoteChar2 = 0; if (tsv.open(translationTableFile) != affx::TSV_OK) { std::string err = "Unable to open the metabolizer bin file."; Err::errAbort(err); } tsv.bind(0, "Index", &result.index, affx::TSV_BIND_REQUIRED); tsv.bind(0, "CHP Filename", &result.chpFile, affx::TSV_BIND_REQUIRED); tsv.bind(0, "Gene", &result.gene, affx::TSV_BIND_REQUIRED); tsv.bind(0, "Known Call", &result.knownCall, affx::TSV_BIND_REQUIRED); tsv.bind(0, "Unknown Call", &result.unknownCall, affx::TSV_BIND_REQUIRED); tsv.bind(0, "Interpretation Code", &result.code, affx::TSV_BIND_REQUIRED); sampleIdx = FindStartingAttributeColumnIndex(tsv); ncols = tsv.getColumnCount(0); sampleCols.resize(ncols-sampleIdx); for (int i=sampleIdx; i<ncols; i++) tsv.bind(0, i, &sampleCols[i-sampleIdx], affx::TSV_BIND_REQUIRED); }
int affx::TsvFileDiff::diffData(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { int start_diff_cnt=m_diff_cnt; std::string val1,val2; // open_residuals(m_residuals_filename,tsv1,tsv2); // tsv1.rewind(); tsv2.rewind(); // tsv1.nextLine(); tsv2.nextLine(); // while ((!tsv1.eof())||(!tsv2.eof())) { // int tsv1_clvl=tsv1.lineLevel(); int tsv2_clvl=tsv2.lineLevel(); // while (((tsv1_clvl>tsv2_clvl)||tsv2.eof())&&(!tsv1.eof())) { if (p_inc()) { p_linenums(tsv1,tsv2); printf("-"); p_line(tsv1); } m_diff_cnt+=tsv1.getColumnCount(tsv1_clvl); tsv1.nextLine(); tsv1_clvl=tsv1.lineLevel(); } // while (((tsv2_clvl>tsv1_clvl)||tsv1.eof())&&(!tsv2.eof())) { if (p_inc()) { p_linenums(tsv1,tsv2); printf("+"); p_line(tsv2); } m_diff_cnt+=tsv2.getColumnCount(tsv2_clvl); tsv2.nextLine(); tsv2_clvl=tsv2.lineLevel(); } // if ((tsv1_clvl>=0)&&(tsv2_clvl>=0)) { int tsv1_cidx_max=tsv1.getColumnCount(tsv1_clvl); int tsv2_cidx_max=tsv2.getColumnCount(tsv2_clvl); // int cidx_max; if (tsv1_cidx_max<tsv2_cidx_max) { cidx_max=tsv2_cidx_max; } else { cidx_max=tsv1_cidx_max; } // count diffs on the line int line_diff_cnt=0; double d1,d2; for (int cidx=0;cidx<cidx_max;cidx++) { tsv1.get(tsv1_clvl,cidx,val1); tsv2.get(tsv2_clvl,cidx,val2); // the same? if (val1==val2) { // record the base value if (m_residuals_tsv!=NULL) { // numeric? if (tsv1.get(tsv1_clvl,cidx,d1)==affx::TSV_OK) { // put a zero m_residuals_tsv->set(tsv1_clvl,cidx,"0"); } else { // put the value m_residuals_tsv->set(tsv1_clvl,cidx,val1); } } } // Not string equal -- check for numeric diff else if ((tsv1.get(tsv1_clvl,cidx,d1)==affx::TSV_OK) && (tsv2.get(tsv2_clvl,cidx,d2)==affx::TSV_OK)) { double d_diff=d1-d2; // output the result... if (m_residuals_tsv!=NULL) { m_residuals_tsv->set(tsv1_clvl,cidx,d_diff); } // if small, it isnt a diff if (!((m_opt_max_diff>0.0) && (fabs(d_diff)<=m_opt_max_diff))) { // too big -- mark it down as a diff. m_diff_cnt++; line_diff_cnt++; // } } // normal text diff else { if (m_residuals_tsv!=NULL) { m_residuals_tsv->set(tsv1_clvl,cidx,"'"+val1+"'/'"+val2+"'"); } } } // output the result line if (m_residuals_tsv!=NULL) { m_residuals_tsv->writeLevel(tsv1_clvl); } // print unchanged lines? if ((m_opt_print_same || (line_diff_cnt!=0)) && (p_inc())) { // if (line_diff_cnt==0) { p_linenums(tsv1,tsv2); printf(" "); p_line(tsv1); } else if (m_opt_print_format==TsvFileDiff::FMT_1) { p_diff_1line(tsv1,tsv2); } else if (m_opt_print_format==TsvFileDiff::FMT_2) { p_diff_2line(tsv1,tsv2); } else { Err::errAbort("Bad format"); } } } // tsv1.nextLine(); tsv2.nextLine(); } close_residuals(); return m_diff_cnt-start_diff_cnt; }
void affx::TsvFileDiff::p_linenums(affx::TsvFile& tsv1,affx::TsvFile& tsv2) { p_linenums(tsv1.lineNumber(),tsv2.lineNumber()); }