コード例 #1
0
ファイル: psiblast.cpp プロジェクト: jackgopack4/pico-blast
CRef<objects::CPssmWithParameters> 
PsiBlastComputePssmFromAlignment(const objects::CBioseq& query,
                                 CConstRef<objects::CSeq_align_set> alignment,
                                 CRef<objects::CScope> database_scope,
                                 const CPSIBlastOptionsHandle& opts_handle,
                                 CConstRef<CBlastAncillaryData> ancillary_data,
                                 PSIDiagnosticsRequest* diagnostics_request)
{
    // Extract PSSM engine options from options handle
    CPSIBlastOptions opts;
    PSIBlastOptionsNew(&opts);
    opts->pseudo_count = opts_handle.GetPseudoCount();
    opts->inclusion_ethresh = opts_handle.GetInclusionThreshold();

    string query_descr = NcbiEmptyString;
 
    if (query.IsSetDescr()) {
         const CBioseq::TDescr::Tdata& data = query.GetDescr().Get();
         ITERATE(CBioseq::TDescr::Tdata, iter, data) {
             if((*iter)->IsTitle()) {
                 query_descr += (*iter)->GetTitle();
             }
         }
    }

    CBlastQuerySourceBioseqSet query_source(query, true);
    string warnings;
    const SBlastSequence query_seq = 
        query_source.GetBlastSequence(0, eBlastEncodingProtein,
                                      eNa_strand_unknown,
                                      eSentinels, &warnings);
    _ASSERT(warnings.empty());

    CPsiBlastInputData input(query_seq.data.get()+1,    // skip sentinel
                             query_seq.length-2,        // don't count sentinels
                             alignment, database_scope, 
                             *opts.Get(), 
                             opts_handle.GetMatrixName(),
                             opts_handle.GetGapOpeningCost(),
                             opts_handle.GetGapExtensionCost(),
                             diagnostics_request, 
                             query_descr);

    CPssmEngine engine(&input);
    engine.SetUngappedStatisticalParams(ancillary_data);
    CRef<CPssmWithParameters> retval(engine.Run());

    PsiBlastAddAncillaryPssmData(*retval,
                                  opts_handle.GetGapOpeningCost(), 
                                  opts_handle.GetGapExtensionCost());
    return retval;
}
コード例 #2
0
ファイル: blast_options.c プロジェクト: fast-project/mpifast
Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number,
   LookupTableOptions** lookup_options,
   QuerySetUpOptions** query_setup_options, 
   BlastInitialWordOptions** word_options,
   BlastExtensionOptions** ext_options,
   BlastHitSavingOptions** hit_options,
   BlastScoringOptions** score_options,
   BlastEffectiveLengthsOptions** eff_len_options,
   PSIBlastOptions** psi_options,
   BlastDatabaseOptions** db_options)
{
   Int2 status;

   if ((status = LookupTableOptionsNew(program_number, lookup_options)))
      return status;

   if ((status=BlastQuerySetUpOptionsNew(query_setup_options)))
      return status;

   if ((status=BlastInitialWordOptionsNew(program_number, word_options)))
      return status;

   if ((status=BlastScoringOptionsNew(program_number, score_options)))
      return status;

   if ((status = BlastExtensionOptionsNew(program_number, ext_options,
                                       (*score_options)->gapped_calculation)))
      return status;

   if ((status=BlastHitSavingOptionsNew(program_number, hit_options,
                                        (*score_options)->gapped_calculation)))
      return status;

   if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options)))
      return status;
   
   if ((status=PSIBlastOptionsNew(psi_options)))
      return status;

   if ((status=BlastDatabaseOptionsNew(db_options)))
      return status;

   return 0;

}
コード例 #3
0
ファイル: su_pssm.cpp プロジェクト: DmitrySigaev/ncbi
SU_PSSMInput::SU_PSSMInput(const BlockMultipleAlignment *b) : bma(b)
{
//    TRACE_MESSAGE("Creating SU_PSSMInput structure");

    // encode master
    masterLength = bma->GetMaster()->Length();
    masterNCBIStdaa = new unsigned char[masterLength];
    for (unsigned int i=0; i<masterLength; ++i)
        masterNCBIStdaa[i] = LookupNCBIStdaaNumberFromCharacter(bma->GetMaster()->m_sequenceString[i]);

    // create PSIMsa
    PSIMsaDimensions dim;
    dim.query_length = bma->GetMaster()->Length();
    dim.num_seqs = bma->NRows() - 1;    // not including master
    data = PSIMsaNew(&dim);
    FillInAlignmentData(bma, data);

    // set up PSIDiagnosticsRequest
    diag.information_content = false;
    diag.residue_frequencies = false;
    diag.weighted_residue_frequencies = false;
    diag.frequency_ratios = true;      // true to match cdtree
    diag.gapless_column_weights = false;

    // create PSIBlastOptions
    PSIBlastOptionsNew(&options);
    options->nsg_compatibility_mode = false;    // false for now, since we're not using a consensus
    double infoContent = CalculateInformationContent(data, false);
    if      (infoContent > 84  ) options->pseudo_count = 10;
    else if (infoContent > 55  ) options->pseudo_count =  7;
    else if (infoContent > 43  ) options->pseudo_count =  5;
    else if (infoContent > 41.5) options->pseudo_count =  4;
    else if (infoContent > 40  ) options->pseudo_count =  3;
    else if (infoContent > 39  ) options->pseudo_count =  2;
    else                         options->pseudo_count =  1;

#ifdef DEBUG_PSSM
    CNcbiOfstream ofs("psimsa.txt", IOS_BASE::out | IOS_BASE::app);
    if (ofs) {
//        diag.residue_frequencies = true;
        ofs << "information content: " << setprecision(6) << infoContent << '\n'
            << "pseudocount: " << options->pseudo_count << '\n'
            << "query length: " << GetQueryLength() << '\n'
            << "query: ";
        for (unsigned int i=0; i<GetQueryLength(); ++i)
            ofs << LookupCharacterFromNCBIStdaaNumber(GetQuery()[i]);
        ofs << "\nmatrix name: " << GetMatrixName() << '\n'
            << "options->pseudo_count: " << options->pseudo_count << '\n'
            << "options->inclusion_ethresh: " << options->inclusion_ethresh << '\n'
            << "options->use_best_alignment: " << (int) options->use_best_alignment << '\n'
            << "options->nsg_compatibility_mode: " << (int) options->nsg_compatibility_mode << '\n'
            << "options->impala_scaling_factor: " << options->impala_scaling_factor << '\n'
            << "diag->information_content: " << (int) GetDiagnosticsRequest()->information_content << '\n'
            << "diag->residue_frequencies: " << (int) GetDiagnosticsRequest()->residue_frequencies << '\n'
            << "diag->weighted_residue_frequencies: " << (int) GetDiagnosticsRequest()->weighted_residue_frequencies << '\n'
            << "diag->frequency_ratios: " << (int) GetDiagnosticsRequest()->frequency_ratios << '\n'
            << "diag->gapless_column_weights: " << (int) GetDiagnosticsRequest()->gapless_column_weights << '\n'
            << "num_seqs: " << data->dimensions->num_seqs << ", query_length: " << data->dimensions->query_length << '\n';
        for (unsigned int row=0; row<=data->dimensions->num_seqs; ++row) {
            for (unsigned int column=0; column<data->dimensions->query_length; ++column)
                ofs << LookupCharacterFromNCBIStdaaNumber(data->data[row][column].letter);
            ofs << '\n';
        }
        for (unsigned int row=0; row<=data->dimensions->num_seqs; ++row) {
            for (unsigned int column=0; column<data->dimensions->query_length; ++column)
                ofs << (data->data[row][column].is_aligned ? 'A' : 'U');
            ofs << '\n';
        }
    }
#endif
}