Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions* *options, Boolean gapped) { *options = (BlastExtensionOptions*) calloc(1, sizeof(BlastExtensionOptions)); if (*options == NULL) return BLASTERR_MEMORY; if (program != eBlastTypeBlastn && program != eBlastTypePhiBlastn) /* protein-protein options. */ { (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT; (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_PROT; } else { (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL; (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL; } (*options)->ePrelimGapExt = eDynProgScoreOnly; (*options)->eTbackExt = eDynProgTbck; (*options)->compositionBasedStats = eNoCompositionBasedStats; /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when * program is blastp? */ if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) { (*options)->compositionBasedStats = eCompositionBasedStats; } (*options)->program_number = program; return 0; }
Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number, BlastHitSavingOptions** options, Boolean gapped_calculation) { *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions)); if (*options == NULL) return BLASTERR_INVALIDPARAM; (*options)->hitlist_size = BLAST_HITLIST_SIZE; (*options)->expect_value = BLAST_EXPECT_VALUE; (*options)->program_number = program_number; /* By default, sum statistics is used for all translated searches * (except RPS BLAST), and for all ungapped searches. */ if (program_number == eBlastTypeRpsTblastn) { (*options)->do_sum_stats = FALSE; } else if (!gapped_calculation || Blast_QueryIsTranslated(program_number) || Blast_SubjectIsTranslated(program_number)) { (*options)->do_sum_stats = TRUE; } else { (*options)->do_sum_stats = FALSE; } return 0; }
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold) { const double kB62_threshold = 11; if (program_number == eBlastTypeBlastn) return 0; if (matrixName == NULL) return BLASTERR_INVALIDPARAM; if(strcasecmp(matrixName, "BLOSUM62") == 0) *threshold = kB62_threshold; else if(strcasecmp(matrixName, "BLOSUM45") == 0) *threshold = 14; else if(strcasecmp(matrixName, "BLOSUM62_20") == 0) *threshold = 100; else if(strcasecmp(matrixName, "BLOSUM80") == 0) *threshold = 12; else if(strcasecmp(matrixName, "PAM30") == 0) *threshold = 16; else if(strcasecmp(matrixName, "PAM70") == 0) *threshold = 14; else *threshold = kB62_threshold; if (Blast_SubjectIsTranslated(program_number) == TRUE) *threshold += 2; /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */ else if (Blast_QueryIsTranslated(program_number) == TRUE) *threshold += 1; return 0; }
int CCmdLineBlastXML2ReportData::GetDbGeneticCode() const { if(Blast_SubjectIsTranslated(m_Options->GetProgramType())) return m_Options->GetDbGeneticCode(); return 0; }
Int2 BLAST_FillExtensionOptions(BlastExtensionOptions* options, EBlastProgramType program, Int4 greedy, double x_dropoff, double x_dropoff_final) { if (!options) return BLASTERR_INVALIDPARAM; if (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn) { if (greedy) { options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY; options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL; options->ePrelimGapExt = eGreedyScoreOnly; options->eTbackExt = eGreedyTbck; } else { options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL; options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL; options->ePrelimGapExt = eDynProgScoreOnly; options->eTbackExt = eDynProgTbck; } } if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) { options->compositionBasedStats = eCompositionBasedStats; } if (x_dropoff) options->gap_x_dropoff = x_dropoff; if (x_dropoff_final) { options->gap_x_dropoff_final = x_dropoff_final; } else { /* Final X-dropoff can't be smaller than preliminary X-dropoff */ options->gap_x_dropoff_final = MAX(options->gap_x_dropoff_final, x_dropoff); } return 0; }
Int2 BLAST_FormatResults(SBlastSeqalignArray* seqalign_arr, Int4 num_queries, SeqLoc* query_slp, SeqLoc* mask_loc_head, BlastFormattingInfo* format_info, Blast_SummaryReturn* sum_returns) { SeqLoc* mask_loc; SeqLoc* next_mask_loc = NULL; SeqLoc* tmp_loc = NULL; Uint1 align_type; Boolean db_is_na; Int4 query_index; SeqLoc* slp; SeqLoc* mask_slp; AsnIo* aip = NULL; MBXml* xmlp = NULL; FILE *outfp = NULL; BlastFormattingOptions* format_options; EAlignView align_view; Boolean ungapped; ASSERT(format_info && format_info->format_options && format_info->search_options && query_slp); format_options = format_info->format_options; align_view = format_options->align_view; ungapped = !format_info->search_options->score_options->gapped_calculation; if (align_view == eAlignViewXml) { const Int4 kXmlFlag = 0; /* Change to BXML_INCLUDE_QUERY if inclusion of query sequence is desired in the XML output header. */ xmlp = format_info->xmlp; if (!xmlp) { xmlp = format_info->xmlp = s_MBXmlInit(format_info->aip, format_info->program_name, format_info->db_name, query_slp, kXmlFlag, sum_returns->search_params); } } else if (align_view == eAlignViewAsnText || align_view == eAlignViewAsnBinary) aip = format_info->aip; else outfp = format_info->outfp; align_type = GetOldAlignType(format_info->search_options->program, &db_is_na); if (format_info->db_name) { /* Enable fetching from the BLAST database. */ ReadDBBioseqFetchEnable ("blast", format_info->db_name, db_is_na, TRUE); /* If database is translated, set the genetic code for tranlation. */ if (Blast_SubjectIsTranslated(format_info->search_options->program)) { ReadDBBioseqSetDbGeneticCode(format_info->search_options-> db_options->genetic_code); } } if(format_info->search_options->score_options->is_ooframe) { ErrPostEx(SEV_WARNING, 0, 0, "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments"); } slp = query_slp; mask_loc = mask_loc_head; for (query_index=0; query_index<seqalign_arr->num_queries && slp; query_index++, slp=slp->next) { Bioseq* bsp = NULL; SeqAlignPtr seqalign = seqalign_arr->array[query_index]; /* Find which query the current SeqAlign is for */ SeqId* query_id = TxGetQueryIdFromSeqAlign(seqalign); if (seqalign == NULL) { if (align_view < eAlignViewXml) s_AcknowledgeEmptyResults(slp, format_options, format_info, outfp); /* this query has no results. */ else if (align_view == eAlignViewXml) { /* Retrieve this query's Bioseq */ Iteration* iterp; /* Call to TxGetQueryIdFromSeqAlign returned NULL. */ query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); iterp = s_XMLBuildOneQueryIteration(NULL, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, "No hits found", bsp, NULL); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); BioseqUnlock(bsp); } else if (align_view == eAlignViewTabularWithComments) { query_id = SeqLocId(slp); bsp = BioseqLockById(query_id); PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BioseqUnlock(bsp); } continue; } format_info->is_seqalign_null = FALSE; /* reset flag, at least one query has seqalign */ /* Find the masking location for this query. Initialize next_mask_loc to the current start of the chain, in case nothing for this query will be found. */ next_mask_loc = mask_loc; for ( ; mask_loc; mask_loc = mask_loc->next) { mask_slp = (SeqLoc*) mask_loc->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) == SIC_YES) { break; } } /* Unlink the masking location for this query and save the next one */ if (mask_loc) { for (next_mask_loc = mask_loc; next_mask_loc->next; next_mask_loc = next_mask_loc->next) { mask_slp = (SeqLoc*) next_mask_loc->next->data.ptrvalue; if (SeqIdComp(query_id, SeqLocId(mask_slp)) != SIC_YES) { break; } } tmp_loc = next_mask_loc; next_mask_loc = next_mask_loc->next; tmp_loc->next = NULL; } /* On the next iteration we can start from the next query */ /* Retrieve this query's Bioseq */ bsp = BioseqLockById(query_id); if (align_view < eAlignViewXml) { if (format_info->head_on_every_query == TRUE) BLAST_PrintOutputHeader(format_info); init_buff_ex(70); AcknowledgeBlastQuery(bsp, 70, outfp, format_options->believe_query, format_options->html); free_buff(); if (format_info->head_on_every_query == TRUE) { s_BLAST_PrintDatabaseInfo(format_info); fprintf(format_info->outfp, "%s", "Searching..................................................done\n\n"); } } if (align_view == eAlignViewTabular || align_view == eAlignViewTabularWithComments) { if (align_view == eAlignViewTabularWithComments) PrintTabularOutputHeader(format_info->db_name, bsp, NULL, format_info->program_name, 0, format_options->believe_query, outfp); BlastPrintTabulatedResults(seqalign, bsp, NULL, format_options->number_of_alignments, format_info->program_name, ungapped, format_options->believe_query, 0, 0, outfp, (Boolean)(align_view == eAlignViewTabularWithComments)); } else if(align_view == eAlignViewXml) { Iteration* iterp; ASSERT(xmlp && xmlp->aip); /* The index of this "query iteration" is the query_index in the current formatting round, plus the number of previously formatted queries. */ iterp = s_XMLBuildOneQueryIteration(seqalign, sum_returns, FALSE, ungapped, query_index+1+format_info->num_formatted, NULL, bsp, mask_loc); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } else { SeqAnnot* seqannot = SeqAnnotNew(); seqannot->type = 2; AddAlignInfoToSeqAnnot(seqannot, align_type); seqannot->data = seqalign; if (aip) { SeqAnnotAsnWrite((SeqAnnot*) seqannot, aip, NULL); AsnIoReset(aip); } if (outfp) { BlastPruneSapStruct* prune; Int4** matrix = s_LoadMatrix(sum_returns->search_params->matrix); ObjMgrSetHold(); init_buff_ex(85); PrintDefLinesFromSeqAlignEx2(seqalign, 80, outfp, format_options->print_options, FIRST_PASS, NULL, format_options->number_of_descriptions, NULL, NULL); free_buff(); /** @todo FIXME: note that by calling BlastPruneHitsFromSeqAlign * we're making a COPY of the seqalign to print it out! Clearly * this could use a better design */ prune = BlastPruneHitsFromSeqAlign(seqalign, format_options->number_of_alignments, NULL); seqannot->data = prune->sap; if(format_info->search_options->score_options->is_ooframe) { OOFShowBlastAlignment(prune->sap, mask_loc, outfp, format_options->align_options, NULL); } else if (align_view != eAlignViewPairwise) { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, NULL); } else { ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, format_options->align_options, matrix, mask_loc, FormatScoreFunc); } s_DeleteMatrix(matrix); seqannot->data = seqalign; prune = BlastPruneSapStructDestruct(prune); ObjMgrClearHold(); } /* Set data to NULL, because we do not free Seq-align here. */ seqannot->data = NULL; seqannot = SeqAnnotFree(seqannot); } BioseqUnlock(bsp); /* Relink the mask locations so chain can be freed in the end. The 'tmp_loc' variable points to the location that was unlinked. */ if (tmp_loc) tmp_loc->next = next_mask_loc; mask_loc = next_mask_loc; ObjMgrFreeCache(0); } /* End loop on seqaligns for different queries */ /* close BlastOutput_iterations openned in s_MBXmlInit; Rt ticket # 15135151 */ if((format_info->is_seqalign_null==TRUE) && (align_view == eAlignViewXml)) { /* extra output only if no hits at all, otherwise "for loop" logic should take care*/ Iteration* iterp; iterp = IterationNew(); iterp->iter_num = 1; iterp->stat = s_XMLBuildStatistics(sum_returns, ungapped); ASSERT(xmlp && xmlp->aip); IterationAsnWrite(iterp, xmlp->aip, xmlp->atp); AsnIoFlush(xmlp->aip); IterationFree(iterp); } if (format_info->db_name) { /* Free the database translation tables, if applicable. */ TransTableFreeAll(); ReadDBBioseqFetchDisable(); } /* Update the count of the formatted queries. */ format_info->num_formatted += num_queries; return 0; }
Int2 SBlastOptionsNew(const char* program_name, SBlastOptions** options_out, Blast_SummaryReturn* extra_returns) { QuerySetUpOptions* query_options=NULL; LookupTableOptions* lookup_options=NULL; BlastInitialWordOptions* word_options=NULL; BlastScoringOptions* score_options=NULL; BlastExtensionOptions* ext_options=NULL; BlastHitSavingOptions* hit_options=NULL; BlastEffectiveLengthsOptions* eff_len_options=NULL; PSIBlastOptions* psi_options = NULL; BlastDatabaseOptions* db_options = NULL; SBlastOptions* options; EBlastProgramType program = eBlastTypeUndefined; Int2 status = 0; if (!options_out || !extra_returns) return -1; BlastProgram2Number(program_name, &program); if (program == eBlastTypeUndefined) { char message[256]; sprintf(message, "Program name %s is not supported. The supported programs " "are blastn, blastp, blastx, tblastn, tblastx, rpsblast, " "rpstblastn\n", program_name); SBlastMessageWrite(&extra_returns->error, SEV_ERROR, message, NULL, FALSE); return -1; } status = BLAST_InitDefaultOptions(program, &lookup_options, &query_options, &word_options, &ext_options, &hit_options, &score_options, &eff_len_options, &psi_options, &db_options); if (status) { *options_out = NULL; SBlastMessageWrite(&extra_returns->error, SEV_ERROR, "Failed to initialize default options\n", NULL, FALSE); return status; } if (Blast_SubjectIsTranslated(program) || program == eBlastTypeRpsTblastn) { Uint1* gc = NULL; BLAST_GeneticCodeFind(db_options->genetic_code, &gc); GenCodeSingletonAdd(db_options->genetic_code, gc); free(gc); } *options_out = options = (SBlastOptions*) calloc(1, sizeof(SBlastOptions)); options->program = program; options->query_options = query_options; options->lookup_options = lookup_options; options->word_options = word_options; options->ext_options = ext_options; options->score_options = score_options; options->hit_options = hit_options; options->eff_len_options = eff_len_options; options->psi_options = psi_options; options->db_options = db_options; options->num_cpus = 1; options->believe_query = FALSE; /* Set default filter string to low complexity filtering. */ SBlastOptionsSetFilterString(options, "T"); return status; }