Exemplo n.º 1
0
unsigned int  CAlignmentRefiner::GetBlocksToAlign(unsigned int nBlocks, vector<unsigned int>& blocks, string& msg, bool useExtras) {

    bool skip  = false;
    unsigned int first = 0, last = nBlocks - 1;
    blocks.clear();
    if (nBlocks == 0) return 0;

    CArgs args = GetArgs();
    unsigned int nExtra = (useExtras) ? (unsigned int) args.GetNExtra() : 0;

    //  If specify realignment of all blocks, the default settings are OK.
    //  Otherwise, use the range specified in the -f and -l flags.
    //  Recall that the command line takes in a one-based integer; blocks is zero-based.
    if (!args["ab"]) {

        if (args["f"]) {
            first = (unsigned) args["f"].AsInteger() - 1;
        }
        if (args["l"]) {
            last  = (unsigned) args["l"].AsInteger() - 1;
        }
        if (first >= nBlocks) {
            first = 0;
        }
        if (last < first || last >= nBlocks) {
            last = nBlocks - 1;
        }
    }

    //  If there are any extra arguments provided, they refer to block numbers to freeze.
    msg = "\nAligning blocks: ";
    for (unsigned int i = first; i <= last; ++i) {
        if (nExtra > 0) {
            skip = false;
            for (size_t extra = 1; extra <= nExtra; ++extra) {
                if (args[extra].AsInteger() - 1 == (int) i) {
                    skip = true;
                    break;
                }
            }
            if (skip) continue;
        }
        blocks.push_back(i);
        msg.append(NStr::UIntToString(i+1) + " ");
        if ((last-first+1)%15 == 0 && first != 0) msg.append("\n");
    }
    //TERSE_INFO_MESSAGE_CL("message in GetBlocksToAlign:\n" << msg);
    return blocks.size();
}
Exemplo n.º 2
0
void CAgpValidateApplication::x_ValidateUsingFiles(const CArgs& args, CNcbiOstream* out)
{
  if(m_reader.m_is_chr) {
    if(m_reader.m_explicit_scaf) {
      if(!m_use_xml) {
        cout << "===== Reading Chromosome from scaffold AGP =====" << endl;
        // second header - for details that are printed below the summary and stats
        if(out) *out << "===== Chromosome from scaffold AGP =====" << endl;
      }
    }
    // else: cout << "===== Reading Chromosome from component AGP =====" << endl;
  }
  else if(m_reader.m_explicit_scaf) {
    if(!m_use_xml) {
      cout << "===== Reading Scaffold from component AGP =====" << endl;
      if(out) *out << "===== Scaffold from component AGP =====" << endl; // header for details that are printed below
    }
  }

  if( 0==(m_ValidationType&VT_Acc) && args["out"].HasValue()) {
    CAgpCompSpanSplitter *comp_splitter = new CAgpCompSpanSplitter(&(args["out"].AsOutputFile()));
    m_reader.SetRowOutput(comp_splitter);
  }

  if (args.GetNExtra() == 0) {
    x_ValidateFile(cin);
  }
  else {
    SIZE_TYPE num_fasta_files=0;
    bool allowFasta = !m_reader.m_explicit_scaf;
    for (unsigned int i = 1; i <= args.GetNExtra(); i++) {

      m_CurrentFileName = args['#' + NStr::IntToString(i)].AsString();
      if(m_CurrentFileName=="-chr") {
        if(m_reader.m_is_chr) {
          cerr << "Error -- second -chr is not supported.\n";
          exit(1);
        }
        if(!m_reader.m_explicit_scaf) {
          cerr << "Error -- -chr after a file, but no preceding -scaf. Expecting:\n"
               << "    -scaf Scaffold_AGP_file(s) -chr Chromosome_AGP_file(s)\n";
          exit(1);
        }

        m_reader.PrintTotals(cout, m_use_xml);
        m_reader.Reset(true);
        pAgpErr->ResetTotals();

        if(!m_use_xml) {
          cout << "\n===== Reading Chromosome from scaffold AGP =====" << endl;
          if(out) *out << "\n===== Chromosome from scaffold AGP =====" << endl;// header for details that are printed below
        }
        continue;
      }

      //CNcbiIstream& istr = args['#' + NStr::IntToString(i)].AsInputFile();
      CNcbiIfstream istr(m_CurrentFileName.c_str());
      if (!istr) {
          cerr << "Error -- unable to open file : " << m_CurrentFileName << "\n";
          exit (1);
      }

      char ch=0;
      if(allowFasta) {
        istr.get(ch); istr.putback(ch);
      }
      if(ch=='>') {
        x_LoadLenFa(istr, m_CurrentFileName);
        num_fasta_files++;
      }
      else {
        if(allowFasta && num_fasta_files) x_ReportFastaSeqCount();
        if( args.GetNExtra()-num_fasta_files>1 ) pAgpErr->StartFile(m_CurrentFileName);
        x_ValidateFile(istr);
        allowFasta=false;
      }

    }
    if(num_fasta_files==args.GetNExtra()) {
      //cerr << "No AGP files."; exit (1);
      if(allowFasta && num_fasta_files) x_ReportFastaSeqCount();
      x_ValidateFile(cin);
    }
  }

}
Exemplo n.º 3
0
void CAlignmentRefiner::EchoSettings(ostream& echoStream, bool echoLOO, bool echoBE) {

    static string yes = "Yes", no = "No";

    CArgs args = GetArgs();
    unsigned int nExtra = (unsigned int) args.GetNExtra();

    if ((!echoLOO && !echoBE) || (echoLOO && echoBE)) {
        echoStream << "Global Refinement Parameters:" << endl;
        echoStream << "=================================" << endl;
        echoStream << "Number of trials = " << m_nTrials << endl;
        echoStream << "Number of cycles per trial = " << m_nCycles << endl;
        echoStream << "Alignment score deviation threshold = " << m_scoreDeviationThreshold << endl;

        if (nExtra > 0) {
            echoStream << "Extra argument(s) freeze " << ((m_loo.extrasAreRows) ? "Row:\n    " : "Block:\n    ");
            for (size_t extra = 1; extra <= nExtra; ++extra) {
                echoStream << args[extra].AsInteger() << "  ";
            }
            echoStream << endl;
        } else {
            echoStream << "No extra arguments that exclude specific rows/blocks from refinement." << endl;
        }
//    echoStream << "Quiet details mode? " << ((m_quietDetails) ? "ON" : "OFF") << endl;
//    echoStream << "Forced threshold (for MC only) = " << m_forcedThreshold << endl;
        echoStream << "Quiet mode? " << ((m_quietMode) ? "ON" : "OFF") << endl;
        echoStream << endl;
    }

    if (echoLOO) {
        echoStream << "Leave-One_Out parameters:" << endl;
        echoStream << "=================================" << endl;
        echoStream << "LOO on?  " << ((m_loo.doLOO) ? yes : no) << endl;
        if (m_loo.doLOO) {
            echoStream << "Row selection order:  " << RefinerRowSelectorCodeToStr(m_loo.selectorCode) << endl;
            echoStream << "Number left out between PSSM recomputation = " << m_loo.lno << endl;

            echoStream << "Freeze alignment of rows with structure?  " << ((m_loo.fixStructures) ? yes : no) << endl;
            echoStream << "Use full sequence or aligned footprint?  " << ((m_loo.fullSequence) ? "Full" : "Aligned") << endl;
            echoStream << "N-terminal extension allowed = " << m_loo.nExt << endl;
            echoStream << "C-terminal extension allowed = " << m_loo.cExt << endl;

            echoStream << "Converged after fraction of rows left out do not change score = " << m_loo.sameScoreThreshold << endl;
            echoStream << "Random number generator seed = " << m_loo.seed << endl;

            echoStream << "LOO loop percentile:  longest loop allowed = max initial loop * " << m_loo.percentile << endl;
            echoStream << "LOO extension to longest loop allowed = " << m_loo.extension << endl;
            echoStream << "LOO absolute maximum longest loop (zero == no max) = " << m_loo.cutoff << endl;
        }
        echoStream << endl;
}

    if (echoBE) {
        string algMethod = "Invalid Method";
        string columnMethod = algMethod;

        switch (m_blockEdit.algMethod) {
        case eSimpleExtendAndShrink:
            algMethod = "Extend and Shrink";
            break;
        case eSimpleExtend:
            algMethod = "Extend Only";
            break;
        case eSimpleShrink:
            algMethod = "Shrink Only";
            break;
        case eGreedyExtend:
            algMethod = "Greedy Extend Only";
            break;
        default:
            break;
        };

        switch (m_blockEdit.columnMethod) {
        case ePercentAtOrOverThreshold:
            columnMethod = "% Rows at or Over Threshold";
            break;
        case eSumOfScores:
            columnMethod = "Sum of Scores";
            break;
        case eMedianScore:
            columnMethod = "Median Score";
            break;
        case ePercentOfWeightOverThreshold:
            columnMethod = "% Score Weight at or Over Threshold";
            break;
        case eCompoundScorer:
            columnMethod = "3.3.3";
            if (GetArgs()["be_score"].AsString() != "3.3.3") {
                columnMethod = "Compound Scoring";
            }
            break;
        default:
            break;
        };


        echoStream << "Block editing parameters:" << endl;
        echoStream << "=================================" << endl;
        echoStream << "block editing on?         " << ((m_blockEdit.editBlocks) ? yes : no) << endl;
        if (m_blockEdit.editBlocks) {
            echoStream << "block shrinking on?       " << ((m_blockEdit.canShrink) ? yes : no) << endl;
            echoStream << "extend first?             " << ((m_blockEdit.extendFirst) ? yes : no) << endl;
            echoStream << endl;
            echoStream << "block editing method    = " << algMethod << endl;
            echoStream << "column scoring method   = " << columnMethod << endl;
            echoStream << endl;
//        echoStream << "not used:  column meth2 = " << m_blockEdit.columnMethod2 << endl << endl; 
            if (GetArgs()["be_score"].AsString() == "3.3.3") {
                echoStream << "(used for 3.3.3 scoring only):" << endl;
                echoStream << "    median threshold        = " << m_blockEdit.median << endl;
                echoStream << "    negative score fraction = " << m_blockEdit.negScoreFraction << endl;
                echoStream << "    negative row   fraction = " << m_blockEdit.negRowsFraction << endl;
            } else {
                echoStream << "minimum block size      = " << m_blockEdit.minBlockSize << endl;
                echoStream << "column-scorer threshold = " << m_blockEdit.columnScorerThreshold << endl;
                echoStream << "extension threshold     = " << m_blockEdit.extensionThreshold << endl;
                echoStream << "shrinkage threshold     = " << m_blockEdit.shrinkageThreshold << endl;
            }

        }
        echoStream << endl;
    }
}
Exemplo n.º 4
0
RefinerResultCode CAlignmentRefiner::ExtractLOOArgs(unsigned int nAlignedBlocks, string& msg) {

    int selectionOrder;
    unsigned int nBlocksMade, nExtra, extra;

    // Get arguments
    CArgs args = GetArgs();
    RefinerResultCode result = eRefinerResultOK;

    msg.erase();

    m_loo.doLOO      = (!args["no_LOO"]);
    m_loo.fixStructures = (args["fix_structs"]);
    m_loo.extrasAreRows = (!args["extras_are_blocks"]);


    //  "selection_order" is mandatory (unless -no_LOO is present) and constrained to {0, 1, 2}.
    //  number of trials is only relevant for a random selection order.
    selectionOrder = (m_loo.doLOO) ? args["selection_order"].AsInteger() : 0;
    switch (selectionOrder) {
    case 0:
        m_loo.selectorCode = eRandomSelectionOrder;
        break;
    case 1:
        m_nTrials = 1;
        m_loo.selectorCode = eWorstScoreFirst;
        break;
    case 2:
        m_nTrials = 1;
        m_loo.selectorCode = eBestScoreFirst;
        break;
    };


    if (m_loo.doLOO) {
        m_loo.fullSequence = (args["fs"]);
        m_loo.nExt = (args["nex"]) ? args["nex"].AsInteger() : args["ex"].AsInteger();
        m_loo.cExt = (args["cex"]) ? args["cex"].AsInteger() : args["ex"].AsInteger();

        m_loo.seed = (args["seed"]) ? args["seed"].AsInteger() : 0; 
        m_loo.lno  = (unsigned int) args["lno"].AsInteger();
        m_loo.sameScoreThreshold      = args["convSameScore"].AsDouble();

        m_loo.percentile = args["p"].AsDouble();
        m_loo.extension  = (unsigned) args["x"].AsInteger();
        m_loo.cutoff     = (unsigned) args["c"].AsInteger();
    }

   if (m_loo.doLOO) {
       if (m_loo.extrasAreRows) {
           nExtra = (unsigned int) args.GetNExtra();
           for (unsigned int i = 1; i <= nExtra; ++i) {
               extra = (unsigned int) args[i].AsInteger() - 1;
               m_loo.rowsToExclude.push_back(extra);
           }
       }
       //  'false' == don't exclude any blocks using extra cmd line arguments;
       nBlocksMade   = GetBlocksToAlign(nAlignedBlocks, m_loo.blocks, msg, !m_loo.extrasAreRows);
       msg = "Freeze " + NStr::UIntToString(nAlignedBlocks - nBlocksMade) + " blocks in ExtractLOOArgs.\n";
    }
    return result;
}
Exemplo n.º 5
0
int COMSSAMerge::Run()
{    

    try {

	CArgs args = GetArgs();


    CRef <COMSSASearch> MySearch(new COMSSASearch);

    ESerialDataFormat InFileType(eSerial_Xml), OutFileType(eSerial_Xml);

    bool obz2(false);  // output bzip2 compressed?
    bool ibz2(false);  // input bzip2 compressed?

    if(args["ox"]) OutFileType = eSerial_Xml;
    else if(args["ob"]) OutFileType = eSerial_AsnBinary;
    else if(args["ot"]) OutFileType = eSerial_AsnText;
    else if(args["obz2"]) {
        OutFileType = eSerial_Xml;
        obz2 = true;
    }
    else ERR_POST(Fatal << "output file type not given");

    if(args["ix"]) InFileType = eSerial_Xml;
    else if(args["ib"]) InFileType = eSerial_AsnBinary;
    else if(args["it"]) InFileType = eSerial_AsnText;
    else if(args["ibz2"]) {
        InFileType = eSerial_Xml;
        ibz2 = true;
    }
    else ERR_POST(Fatal << "input file type not given");


    // loop thru input files
    if ( args["i"].AsString() != "") {
        ifstream is(args["i"].AsString().c_str());
        bool Begin(true);
        if(!is)
            ERR_POST(Fatal << "unable to open input file list " << args["i"].AsString());
        while(!is.eof()) {
            string iFileName;
            NcbiGetline(is, iFileName, "\x0d\x0a");
            if(iFileName == "" || is.eof()) continue;
            try {
                CRef <COMSSASearch> InSearch(new COMSSASearch);
                CSearchHelper::ReadCompleteSearch(iFileName, InFileType, ibz2, *InSearch);
//                InSearch->ReadCompleteSearch(iFileName, InFileType, ibz2);
                if(Begin) {
                    Begin = false;
                    MySearch->CopyCMSSearch(InSearch);
                }
                else {
                    // add
                    MySearch->AppendSearch(InSearch);
                }
            }
            catch(CException& e) {
                ERR_POST(Fatal << "exception: " << e.what());
                return 1;
            }
        }
    }
    else if ( args.GetNExtra() ) {
        for (size_t extra = 1;  extra <= args.GetNExtra();  extra++) {
            CRef <COMSSASearch> InSearch(new COMSSASearch);
            CSearchHelper::ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2, *InSearch);
            //InSearch->ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2);
            try {
                if(extra == 1) {
                    // copy
                    MySearch->CopyCMSSearch(InSearch);
                }
                else {
                    // add
                    MySearch->AppendSearch(InSearch);
                }
            }
            catch(CException& e) {
                ERR_POST(Fatal << "exception: " << e.what());
                return 1;
            }
        }
    }
 
    // write out the new search

    auto_ptr <CNcbiOfstream> raw_out;
    auto_ptr <CCompressionOStream> compress_out;
    auto_ptr <CObjectOStream> txt_out;
    
    if( obz2 ) {
        raw_out.reset(new CNcbiOfstream(args["o"].AsString().c_str()));
        compress_out.reset( new CCompressionOStream (*raw_out, 
                                                     new CBZip2StreamCompressor(), 
                                                     CCompressionStream::fOwnProcessor)); 
        txt_out.reset(CObjectOStream::Open(OutFileType, *compress_out)); 
    }
    else {
        txt_out.reset(CObjectOStream::Open(args["o"].AsString().c_str(), OutFileType));
    }


//    auto_ptr <CObjectOStream> txt_out(
//         CObjectOStream::Open(args["o"].AsString(), OutFileType));

    if(txt_out.get()) {
        SetUpOutputFile(txt_out.get(), OutFileType);
        if (args["sw"]) {
            txt_out->Write(ObjectInfo(*(*MySearch->SetResponse().begin())));
	}
        else {
            txt_out->Write(ObjectInfo(*MySearch));
        }
        txt_out->Flush();
        txt_out->Close();
    }


    } catch (NCBI_NS_STD::exception& e) {
	ERR_POST(Fatal << "Exception in COMSSAMerge::Run: " << e.what());
    }

    return 0;
}