CRef<objects::CPssmWithParameters> PsiBlastComputePssmFromAlignment(const objects::CBioseq& query, CConstRef<objects::CSeq_align_set> alignment, CRef<objects::CScope> database_scope, const CPSIBlastOptionsHandle& opts_handle, CConstRef<CBlastAncillaryData> ancillary_data, PSIDiagnosticsRequest* diagnostics_request) { // Extract PSSM engine options from options handle CPSIBlastOptions opts; PSIBlastOptionsNew(&opts); opts->pseudo_count = opts_handle.GetPseudoCount(); opts->inclusion_ethresh = opts_handle.GetInclusionThreshold(); string query_descr = NcbiEmptyString; if (query.IsSetDescr()) { const CBioseq::TDescr::Tdata& data = query.GetDescr().Get(); ITERATE(CBioseq::TDescr::Tdata, iter, data) { if((*iter)->IsTitle()) { query_descr += (*iter)->GetTitle(); } } } CBlastQuerySourceBioseqSet query_source(query, true); string warnings; const SBlastSequence query_seq = query_source.GetBlastSequence(0, eBlastEncodingProtein, eNa_strand_unknown, eSentinels, &warnings); _ASSERT(warnings.empty()); CPsiBlastInputData input(query_seq.data.get()+1, // skip sentinel query_seq.length-2, // don't count sentinels alignment, database_scope, *opts.Get(), opts_handle.GetMatrixName(), opts_handle.GetGapOpeningCost(), opts_handle.GetGapExtensionCost(), diagnostics_request, query_descr); CPssmEngine engine(&input); engine.SetUngappedStatisticalParams(ancillary_data); CRef<CPssmWithParameters> retval(engine.Run()); PsiBlastAddAncillaryPssmData(*retval, opts_handle.GetGapOpeningCost(), opts_handle.GetGapExtensionCost()); return retval; }
Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number, LookupTableOptions** lookup_options, QuerySetUpOptions** query_setup_options, BlastInitialWordOptions** word_options, BlastExtensionOptions** ext_options, BlastHitSavingOptions** hit_options, BlastScoringOptions** score_options, BlastEffectiveLengthsOptions** eff_len_options, PSIBlastOptions** psi_options, BlastDatabaseOptions** db_options) { Int2 status; if ((status = LookupTableOptionsNew(program_number, lookup_options))) return status; if ((status=BlastQuerySetUpOptionsNew(query_setup_options))) return status; if ((status=BlastInitialWordOptionsNew(program_number, word_options))) return status; if ((status=BlastScoringOptionsNew(program_number, score_options))) return status; if ((status = BlastExtensionOptionsNew(program_number, ext_options, (*score_options)->gapped_calculation))) return status; if ((status=BlastHitSavingOptionsNew(program_number, hit_options, (*score_options)->gapped_calculation))) return status; if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options))) return status; if ((status=PSIBlastOptionsNew(psi_options))) return status; if ((status=BlastDatabaseOptionsNew(db_options))) return status; return 0; }
SU_PSSMInput::SU_PSSMInput(const BlockMultipleAlignment *b) : bma(b) { // TRACE_MESSAGE("Creating SU_PSSMInput structure"); // encode master masterLength = bma->GetMaster()->Length(); masterNCBIStdaa = new unsigned char[masterLength]; for (unsigned int i=0; i<masterLength; ++i) masterNCBIStdaa[i] = LookupNCBIStdaaNumberFromCharacter(bma->GetMaster()->m_sequenceString[i]); // create PSIMsa PSIMsaDimensions dim; dim.query_length = bma->GetMaster()->Length(); dim.num_seqs = bma->NRows() - 1; // not including master data = PSIMsaNew(&dim); FillInAlignmentData(bma, data); // set up PSIDiagnosticsRequest diag.information_content = false; diag.residue_frequencies = false; diag.weighted_residue_frequencies = false; diag.frequency_ratios = true; // true to match cdtree diag.gapless_column_weights = false; // create PSIBlastOptions PSIBlastOptionsNew(&options); options->nsg_compatibility_mode = false; // false for now, since we're not using a consensus double infoContent = CalculateInformationContent(data, false); if (infoContent > 84 ) options->pseudo_count = 10; else if (infoContent > 55 ) options->pseudo_count = 7; else if (infoContent > 43 ) options->pseudo_count = 5; else if (infoContent > 41.5) options->pseudo_count = 4; else if (infoContent > 40 ) options->pseudo_count = 3; else if (infoContent > 39 ) options->pseudo_count = 2; else options->pseudo_count = 1; #ifdef DEBUG_PSSM CNcbiOfstream ofs("psimsa.txt", IOS_BASE::out | IOS_BASE::app); if (ofs) { // diag.residue_frequencies = true; ofs << "information content: " << setprecision(6) << infoContent << '\n' << "pseudocount: " << options->pseudo_count << '\n' << "query length: " << GetQueryLength() << '\n' << "query: "; for (unsigned int i=0; i<GetQueryLength(); ++i) ofs << LookupCharacterFromNCBIStdaaNumber(GetQuery()[i]); ofs << "\nmatrix name: " << GetMatrixName() << '\n' << "options->pseudo_count: " << options->pseudo_count << '\n' << "options->inclusion_ethresh: " << options->inclusion_ethresh << '\n' << "options->use_best_alignment: " << (int) options->use_best_alignment << '\n' << "options->nsg_compatibility_mode: " << (int) options->nsg_compatibility_mode << '\n' << "options->impala_scaling_factor: " << options->impala_scaling_factor << '\n' << "diag->information_content: " << (int) GetDiagnosticsRequest()->information_content << '\n' << "diag->residue_frequencies: " << (int) GetDiagnosticsRequest()->residue_frequencies << '\n' << "diag->weighted_residue_frequencies: " << (int) GetDiagnosticsRequest()->weighted_residue_frequencies << '\n' << "diag->frequency_ratios: " << (int) GetDiagnosticsRequest()->frequency_ratios << '\n' << "diag->gapless_column_weights: " << (int) GetDiagnosticsRequest()->gapless_column_weights << '\n' << "num_seqs: " << data->dimensions->num_seqs << ", query_length: " << data->dimensions->query_length << '\n'; for (unsigned int row=0; row<=data->dimensions->num_seqs; ++row) { for (unsigned int column=0; column<data->dimensions->query_length; ++column) ofs << LookupCharacterFromNCBIStdaaNumber(data->data[row][column].letter); ofs << '\n'; } for (unsigned int row=0; row<=data->dimensions->num_seqs; ++row) { for (unsigned int column=0; column<data->dimensions->query_length; ++column) ofs << (data->data[row][column].is_aligned ? 'A' : 'U'); ofs << '\n'; } } #endif }