unsigned int CAlignmentRefiner::GetBlocksToAlign(unsigned int nBlocks, vector<unsigned int>& blocks, string& msg, bool useExtras) { bool skip = false; unsigned int first = 0, last = nBlocks - 1; blocks.clear(); if (nBlocks == 0) return 0; CArgs args = GetArgs(); unsigned int nExtra = (useExtras) ? (unsigned int) args.GetNExtra() : 0; // If specify realignment of all blocks, the default settings are OK. // Otherwise, use the range specified in the -f and -l flags. // Recall that the command line takes in a one-based integer; blocks is zero-based. if (!args["ab"]) { if (args["f"]) { first = (unsigned) args["f"].AsInteger() - 1; } if (args["l"]) { last = (unsigned) args["l"].AsInteger() - 1; } if (first >= nBlocks) { first = 0; } if (last < first || last >= nBlocks) { last = nBlocks - 1; } } // If there are any extra arguments provided, they refer to block numbers to freeze. msg = "\nAligning blocks: "; for (unsigned int i = first; i <= last; ++i) { if (nExtra > 0) { skip = false; for (size_t extra = 1; extra <= nExtra; ++extra) { if (args[extra].AsInteger() - 1 == (int) i) { skip = true; break; } } if (skip) continue; } blocks.push_back(i); msg.append(NStr::UIntToString(i+1) + " "); if ((last-first+1)%15 == 0 && first != 0) msg.append("\n"); } //TERSE_INFO_MESSAGE_CL("message in GetBlocksToAlign:\n" << msg); return blocks.size(); }
void CAgpValidateApplication::x_ValidateUsingFiles(const CArgs& args, CNcbiOstream* out) { if(m_reader.m_is_chr) { if(m_reader.m_explicit_scaf) { if(!m_use_xml) { cout << "===== Reading Chromosome from scaffold AGP =====" << endl; // second header - for details that are printed below the summary and stats if(out) *out << "===== Chromosome from scaffold AGP =====" << endl; } } // else: cout << "===== Reading Chromosome from component AGP =====" << endl; } else if(m_reader.m_explicit_scaf) { if(!m_use_xml) { cout << "===== Reading Scaffold from component AGP =====" << endl; if(out) *out << "===== Scaffold from component AGP =====" << endl; // header for details that are printed below } } if( 0==(m_ValidationType&VT_Acc) && args["out"].HasValue()) { CAgpCompSpanSplitter *comp_splitter = new CAgpCompSpanSplitter(&(args["out"].AsOutputFile())); m_reader.SetRowOutput(comp_splitter); } if (args.GetNExtra() == 0) { x_ValidateFile(cin); } else { SIZE_TYPE num_fasta_files=0; bool allowFasta = !m_reader.m_explicit_scaf; for (unsigned int i = 1; i <= args.GetNExtra(); i++) { m_CurrentFileName = args['#' + NStr::IntToString(i)].AsString(); if(m_CurrentFileName=="-chr") { if(m_reader.m_is_chr) { cerr << "Error -- second -chr is not supported.\n"; exit(1); } if(!m_reader.m_explicit_scaf) { cerr << "Error -- -chr after a file, but no preceding -scaf. Expecting:\n" << " -scaf Scaffold_AGP_file(s) -chr Chromosome_AGP_file(s)\n"; exit(1); } m_reader.PrintTotals(cout, m_use_xml); m_reader.Reset(true); pAgpErr->ResetTotals(); if(!m_use_xml) { cout << "\n===== Reading Chromosome from scaffold AGP =====" << endl; if(out) *out << "\n===== Chromosome from scaffold AGP =====" << endl;// header for details that are printed below } continue; } //CNcbiIstream& istr = args['#' + NStr::IntToString(i)].AsInputFile(); CNcbiIfstream istr(m_CurrentFileName.c_str()); if (!istr) { cerr << "Error -- unable to open file : " << m_CurrentFileName << "\n"; exit (1); } char ch=0; if(allowFasta) { istr.get(ch); istr.putback(ch); } if(ch=='>') { x_LoadLenFa(istr, m_CurrentFileName); num_fasta_files++; } else { if(allowFasta && num_fasta_files) x_ReportFastaSeqCount(); if( args.GetNExtra()-num_fasta_files>1 ) pAgpErr->StartFile(m_CurrentFileName); x_ValidateFile(istr); allowFasta=false; } } if(num_fasta_files==args.GetNExtra()) { //cerr << "No AGP files."; exit (1); if(allowFasta && num_fasta_files) x_ReportFastaSeqCount(); x_ValidateFile(cin); } } }
void CAlignmentRefiner::EchoSettings(ostream& echoStream, bool echoLOO, bool echoBE) { static string yes = "Yes", no = "No"; CArgs args = GetArgs(); unsigned int nExtra = (unsigned int) args.GetNExtra(); if ((!echoLOO && !echoBE) || (echoLOO && echoBE)) { echoStream << "Global Refinement Parameters:" << endl; echoStream << "=================================" << endl; echoStream << "Number of trials = " << m_nTrials << endl; echoStream << "Number of cycles per trial = " << m_nCycles << endl; echoStream << "Alignment score deviation threshold = " << m_scoreDeviationThreshold << endl; if (nExtra > 0) { echoStream << "Extra argument(s) freeze " << ((m_loo.extrasAreRows) ? "Row:\n " : "Block:\n "); for (size_t extra = 1; extra <= nExtra; ++extra) { echoStream << args[extra].AsInteger() << " "; } echoStream << endl; } else { echoStream << "No extra arguments that exclude specific rows/blocks from refinement." << endl; } // echoStream << "Quiet details mode? " << ((m_quietDetails) ? "ON" : "OFF") << endl; // echoStream << "Forced threshold (for MC only) = " << m_forcedThreshold << endl; echoStream << "Quiet mode? " << ((m_quietMode) ? "ON" : "OFF") << endl; echoStream << endl; } if (echoLOO) { echoStream << "Leave-One_Out parameters:" << endl; echoStream << "=================================" << endl; echoStream << "LOO on? " << ((m_loo.doLOO) ? yes : no) << endl; if (m_loo.doLOO) { echoStream << "Row selection order: " << RefinerRowSelectorCodeToStr(m_loo.selectorCode) << endl; echoStream << "Number left out between PSSM recomputation = " << m_loo.lno << endl; echoStream << "Freeze alignment of rows with structure? " << ((m_loo.fixStructures) ? yes : no) << endl; echoStream << "Use full sequence or aligned footprint? " << ((m_loo.fullSequence) ? "Full" : "Aligned") << endl; echoStream << "N-terminal extension allowed = " << m_loo.nExt << endl; echoStream << "C-terminal extension allowed = " << m_loo.cExt << endl; echoStream << "Converged after fraction of rows left out do not change score = " << m_loo.sameScoreThreshold << endl; echoStream << "Random number generator seed = " << m_loo.seed << endl; echoStream << "LOO loop percentile: longest loop allowed = max initial loop * " << m_loo.percentile << endl; echoStream << "LOO extension to longest loop allowed = " << m_loo.extension << endl; echoStream << "LOO absolute maximum longest loop (zero == no max) = " << m_loo.cutoff << endl; } echoStream << endl; } if (echoBE) { string algMethod = "Invalid Method"; string columnMethod = algMethod; switch (m_blockEdit.algMethod) { case eSimpleExtendAndShrink: algMethod = "Extend and Shrink"; break; case eSimpleExtend: algMethod = "Extend Only"; break; case eSimpleShrink: algMethod = "Shrink Only"; break; case eGreedyExtend: algMethod = "Greedy Extend Only"; break; default: break; }; switch (m_blockEdit.columnMethod) { case ePercentAtOrOverThreshold: columnMethod = "% Rows at or Over Threshold"; break; case eSumOfScores: columnMethod = "Sum of Scores"; break; case eMedianScore: columnMethod = "Median Score"; break; case ePercentOfWeightOverThreshold: columnMethod = "% Score Weight at or Over Threshold"; break; case eCompoundScorer: columnMethod = "3.3.3"; if (GetArgs()["be_score"].AsString() != "3.3.3") { columnMethod = "Compound Scoring"; } break; default: break; }; echoStream << "Block editing parameters:" << endl; echoStream << "=================================" << endl; echoStream << "block editing on? " << ((m_blockEdit.editBlocks) ? yes : no) << endl; if (m_blockEdit.editBlocks) { echoStream << "block shrinking on? " << ((m_blockEdit.canShrink) ? yes : no) << endl; echoStream << "extend first? " << ((m_blockEdit.extendFirst) ? yes : no) << endl; echoStream << endl; echoStream << "block editing method = " << algMethod << endl; echoStream << "column scoring method = " << columnMethod << endl; echoStream << endl; // echoStream << "not used: column meth2 = " << m_blockEdit.columnMethod2 << endl << endl; if (GetArgs()["be_score"].AsString() == "3.3.3") { echoStream << "(used for 3.3.3 scoring only):" << endl; echoStream << " median threshold = " << m_blockEdit.median << endl; echoStream << " negative score fraction = " << m_blockEdit.negScoreFraction << endl; echoStream << " negative row fraction = " << m_blockEdit.negRowsFraction << endl; } else { echoStream << "minimum block size = " << m_blockEdit.minBlockSize << endl; echoStream << "column-scorer threshold = " << m_blockEdit.columnScorerThreshold << endl; echoStream << "extension threshold = " << m_blockEdit.extensionThreshold << endl; echoStream << "shrinkage threshold = " << m_blockEdit.shrinkageThreshold << endl; } } echoStream << endl; } }
RefinerResultCode CAlignmentRefiner::ExtractLOOArgs(unsigned int nAlignedBlocks, string& msg) { int selectionOrder; unsigned int nBlocksMade, nExtra, extra; // Get arguments CArgs args = GetArgs(); RefinerResultCode result = eRefinerResultOK; msg.erase(); m_loo.doLOO = (!args["no_LOO"]); m_loo.fixStructures = (args["fix_structs"]); m_loo.extrasAreRows = (!args["extras_are_blocks"]); // "selection_order" is mandatory (unless -no_LOO is present) and constrained to {0, 1, 2}. // number of trials is only relevant for a random selection order. selectionOrder = (m_loo.doLOO) ? args["selection_order"].AsInteger() : 0; switch (selectionOrder) { case 0: m_loo.selectorCode = eRandomSelectionOrder; break; case 1: m_nTrials = 1; m_loo.selectorCode = eWorstScoreFirst; break; case 2: m_nTrials = 1; m_loo.selectorCode = eBestScoreFirst; break; }; if (m_loo.doLOO) { m_loo.fullSequence = (args["fs"]); m_loo.nExt = (args["nex"]) ? args["nex"].AsInteger() : args["ex"].AsInteger(); m_loo.cExt = (args["cex"]) ? args["cex"].AsInteger() : args["ex"].AsInteger(); m_loo.seed = (args["seed"]) ? args["seed"].AsInteger() : 0; m_loo.lno = (unsigned int) args["lno"].AsInteger(); m_loo.sameScoreThreshold = args["convSameScore"].AsDouble(); m_loo.percentile = args["p"].AsDouble(); m_loo.extension = (unsigned) args["x"].AsInteger(); m_loo.cutoff = (unsigned) args["c"].AsInteger(); } if (m_loo.doLOO) { if (m_loo.extrasAreRows) { nExtra = (unsigned int) args.GetNExtra(); for (unsigned int i = 1; i <= nExtra; ++i) { extra = (unsigned int) args[i].AsInteger() - 1; m_loo.rowsToExclude.push_back(extra); } } // 'false' == don't exclude any blocks using extra cmd line arguments; nBlocksMade = GetBlocksToAlign(nAlignedBlocks, m_loo.blocks, msg, !m_loo.extrasAreRows); msg = "Freeze " + NStr::UIntToString(nAlignedBlocks - nBlocksMade) + " blocks in ExtractLOOArgs.\n"; } return result; }
int COMSSAMerge::Run() { try { CArgs args = GetArgs(); CRef <COMSSASearch> MySearch(new COMSSASearch); ESerialDataFormat InFileType(eSerial_Xml), OutFileType(eSerial_Xml); bool obz2(false); // output bzip2 compressed? bool ibz2(false); // input bzip2 compressed? if(args["ox"]) OutFileType = eSerial_Xml; else if(args["ob"]) OutFileType = eSerial_AsnBinary; else if(args["ot"]) OutFileType = eSerial_AsnText; else if(args["obz2"]) { OutFileType = eSerial_Xml; obz2 = true; } else ERR_POST(Fatal << "output file type not given"); if(args["ix"]) InFileType = eSerial_Xml; else if(args["ib"]) InFileType = eSerial_AsnBinary; else if(args["it"]) InFileType = eSerial_AsnText; else if(args["ibz2"]) { InFileType = eSerial_Xml; ibz2 = true; } else ERR_POST(Fatal << "input file type not given"); // loop thru input files if ( args["i"].AsString() != "") { ifstream is(args["i"].AsString().c_str()); bool Begin(true); if(!is) ERR_POST(Fatal << "unable to open input file list " << args["i"].AsString()); while(!is.eof()) { string iFileName; NcbiGetline(is, iFileName, "\x0d\x0a"); if(iFileName == "" || is.eof()) continue; try { CRef <COMSSASearch> InSearch(new COMSSASearch); CSearchHelper::ReadCompleteSearch(iFileName, InFileType, ibz2, *InSearch); // InSearch->ReadCompleteSearch(iFileName, InFileType, ibz2); if(Begin) { Begin = false; MySearch->CopyCMSSearch(InSearch); } else { // add MySearch->AppendSearch(InSearch); } } catch(CException& e) { ERR_POST(Fatal << "exception: " << e.what()); return 1; } } } else if ( args.GetNExtra() ) { for (size_t extra = 1; extra <= args.GetNExtra(); extra++) { CRef <COMSSASearch> InSearch(new COMSSASearch); CSearchHelper::ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2, *InSearch); //InSearch->ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2); try { if(extra == 1) { // copy MySearch->CopyCMSSearch(InSearch); } else { // add MySearch->AppendSearch(InSearch); } } catch(CException& e) { ERR_POST(Fatal << "exception: " << e.what()); return 1; } } } // write out the new search auto_ptr <CNcbiOfstream> raw_out; auto_ptr <CCompressionOStream> compress_out; auto_ptr <CObjectOStream> txt_out; if( obz2 ) { raw_out.reset(new CNcbiOfstream(args["o"].AsString().c_str())); compress_out.reset( new CCompressionOStream (*raw_out, new CBZip2StreamCompressor(), CCompressionStream::fOwnProcessor)); txt_out.reset(CObjectOStream::Open(OutFileType, *compress_out)); } else { txt_out.reset(CObjectOStream::Open(args["o"].AsString().c_str(), OutFileType)); } // auto_ptr <CObjectOStream> txt_out( // CObjectOStream::Open(args["o"].AsString(), OutFileType)); if(txt_out.get()) { SetUpOutputFile(txt_out.get(), OutFileType); if (args["sw"]) { txt_out->Write(ObjectInfo(*(*MySearch->SetResponse().begin()))); } else { txt_out->Write(ObjectInfo(*MySearch)); } txt_out->Flush(); txt_out->Close(); } } catch (NCBI_NS_STD::exception& e) { ERR_POST(Fatal << "Exception in COMSSAMerge::Run: " << e.what()); } return 0; }