//------------------------------------------------------------------------------ CSegMasker::TMaskList* CSegMasker::operator()(const objects::CSeqVector & data) { if ( !data.IsProtein() ) { throw logic_error("SEG can only filter protein sequences"); } if (data.GetCoding() != CSeq_data::e_Ncbistdaa ) { throw logic_error("SEG expects protein sequences in ncbistdaa format"); } string sequence; BlastSeqLoc* seq_locs = NULL; data.GetSeqData(data.begin(), data.end(), sequence); Int2 status = SeqBufferSeg((Uint1*)(sequence.data()), static_cast<Int4>(sequence.size()), 0, m_SegParameters, &seq_locs); sequence.erase(); if (status != 0) { seq_locs = BlastSeqLocFree(seq_locs); throw runtime_error("SEG internal error (check that input is protein) " + NStr::IntToString(status)); } auto_ptr<TMaskList> retval(new TMaskList); for (BlastSeqLoc* itr = seq_locs; itr; itr = itr->next) { retval->push_back (TMaskList::value_type(itr->ssr->left, itr->ssr->right)); } seq_locs = BlastSeqLocFree(seq_locs); return retval.release(); }
BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc) { Int4 index; if (mask_loc == NULL) return NULL; for (index=0; index<mask_loc->total_size; index++) { if (mask_loc->seqloc_array != NULL) BlastSeqLocFree(mask_loc->seqloc_array[index]); } sfree(mask_loc->seqloc_array); sfree(mask_loc); return NULL; }
Int2 BlastMaskLocDNAToProtein(BlastMaskLoc* mask_loc, const BlastQueryInfo* query_info) { Uint4 seq_index; BlastSeqLoc* dna_seqlocs[NUM_FRAMES]; if (!mask_loc) return 0; /* Check that the array size in BlastMaskLoc corresponds to the number of contexts in BlastQueryInfo. */ ASSERT(mask_loc->total_size == query_info->last_context + 1); /* Loop over multiple DNA sequences */ for (seq_index = 0; seq_index < (Uint4)query_info->num_queries; ++seq_index) { const Uint4 ctx_idx = NUM_FRAMES * seq_index; const Int4 dna_length = BlastQueryInfoGetQueryLength(query_info, eBlastTypeBlastx, seq_index); Int4 context; /* Save the DNA masking locations, as they'll be freed and overwritten * by their translations */ memset((void*) &dna_seqlocs, 0, sizeof(dna_seqlocs)); memcpy((void*) &dna_seqlocs, (void*) &mask_loc->seqloc_array[ctx_idx], sizeof(dna_seqlocs)); memset((void*) &mask_loc->seqloc_array[ctx_idx], 0, sizeof(dna_seqlocs)); /* Reproduce this mask for all 6 frames, with translated coordinates */ for (context = 0; context < NUM_FRAMES; ++context) { const Int2 frame = BLAST_ContextToFrame(eBlastTypeBlastx, context); BlastSeqLoc* frame_seqloc = dna_seqlocs[context]; BlastSeqLoc* prot_tail = NULL; BlastSeqLoc* itr = NULL; /* If no masks were provided for some frames, use the first one */ if (frame_seqloc == NULL && dna_seqlocs[0]) { frame_seqloc = dna_seqlocs[0]; } for (itr = frame_seqloc; itr; itr = itr->next) { Int4 from, to; SSeqRange* seq_range = itr->ssr; /* masks should be 0-offset */ ASSERT(seq_range->right < dna_length); ASSERT(seq_range->left >= 0); if (frame < 0) { from = (dna_length + frame - seq_range->right)/CODON_LENGTH; to = (dna_length + frame - seq_range->left)/CODON_LENGTH; } else { from = (seq_range->left - frame + 1)/CODON_LENGTH; to = (seq_range->right - frame + 1)/CODON_LENGTH; } if (from < 0) from = 0; if (to < 0) to = 0; if (from >= query_info->contexts[ctx_idx+context].query_length) from = query_info->contexts[ctx_idx+context].query_length - 1; if (to >= query_info->contexts[ctx_idx+context].query_length) to = query_info->contexts[ctx_idx+context].query_length - 1; ASSERT(from >= 0); ASSERT(to >= 0); ASSERT(from < query_info->contexts[ctx_idx+context].query_length); ASSERT(to < query_info->contexts[ctx_idx+context].query_length); /* Cache the tail of the list to avoid the overhead of * traversing the list when appending to it */ prot_tail = BlastSeqLocNew((prot_tail ? & prot_tail : & mask_loc->seqloc_array[ctx_idx+context]), from, to); } } for (context = 0; context < NUM_FRAMES; ++context) { BlastSeqLocFree(dna_seqlocs[context]); } } return 0; }
Int2 Blast_RunSearch(SeqLoc* query_seqloc, Blast_PsiCheckpointLoc * psi_checkpoint, const BlastSeqSrc* seq_src, SeqLoc* masking_locs, const SBlastOptions* options, BlastTabularFormatData* tf_data, BlastHSPResults **results, SeqLoc** filter_out, Blast_SummaryReturn* extra_returns) { Int2 status = 0; BLAST_SequenceBlk *query = NULL; BlastQueryInfo* query_info = NULL; double scale_factor = 1.0; BlastSeqLoc* lookup_segments = NULL; BlastScoreBlk* sbp = NULL; LookupTableWrap* lookup_wrap = NULL; BlastMaskLoc* mask_loc = NULL; BlastHSPStream* hsp_stream = NULL; const EBlastProgramType kProgram = options->program; const Boolean kRpsBlast = (kProgram == eBlastTypeRpsBlast || kProgram == eBlastTypeRpsTblastn); BlastRPSInfo* rps_info = NULL; Nlm_MemMapPtr rps_mmap = NULL; Nlm_MemMapPtr rps_pssm_mmap = NULL; const QuerySetUpOptions* query_options = options->query_options; const LookupTableOptions* lookup_options = options->lookup_options; const BlastScoringOptions* score_options = options->score_options; const BlastHitSavingOptions* hit_options = options->hit_options; SBlastOptions* rps_options = NULL; const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(kProgram); const Uint1 kDeallocateMe = 253; Blast_Message *core_msg = NULL; if (!query_seqloc || !seq_src || !options || !extra_returns) return -1; if ((status = BLAST_ValidateOptions(kProgram, options->ext_options, score_options, lookup_options, options->word_options, hit_options, &core_msg)) != 0) { extra_returns->error = Blast_MessageToSBlastMessage(core_msg, NULL, NULL, options->believe_query); core_msg = Blast_MessageFree(core_msg); return status; } if (options->program == eBlastTypeBlastn) { SeqLoc* dust_mask = NULL; /* Dust mask locations */ Blast_FindDustSeqLoc(query_seqloc, options, &dust_mask); /* Combine dust mask with lower case mask The dust mask will be deallocated by the end of this function though as it's copied in BLAST_MainSetUp Not deallocating it will result in a memory leak if masking_locs was NULL at the start of this function */ if (dust_mask) { SeqLoc* dust_mask_var = dust_mask; while (dust_mask_var) { dust_mask_var->choice = kDeallocateMe; dust_mask_var = dust_mask_var->next; } ValNodeLink(&masking_locs, dust_mask); } } if (kRpsBlast) { if ((status = s_RPSExtraStructsSetUp(seq_src, options, &rps_options, &rps_info, &rps_mmap, &rps_pssm_mmap, &scale_factor, extra_returns))) return status; score_options = rps_options->score_options; hit_options = rps_options->hit_options; options = rps_options; /* This will not change the caller's pointer. */ } if ((status = BLAST_SetUpQuery(kProgram, query_seqloc, query_options, masking_locs, &query_info, &query))) { SBlastMessageWrite(&extra_returns->error, SEV_ERROR, "BLAST_SetUpQuery returned non-zero status\n", NULL, FALSE); return status; } status = BLAST_MainSetUp(kProgram, query_options, score_options, query, query_info, scale_factor, &lookup_segments, &mask_loc, &sbp, &core_msg, s_BlastFindMatrixPath); if (core_msg) { extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query); core_msg = Blast_MessageFree(core_msg); } if (status) return status; if (psi_checkpoint) { core_msg = NULL; status = s_SetupScoreBlkPssmFromChkpt(sbp, query, psi_checkpoint, &core_msg); if (core_msg) { extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query); core_msg = Blast_MessageFree(core_msg); } if (status) return status; } if (filter_out) { *filter_out = BlastMaskLocToSeqLoc(kProgram, mask_loc, query_seqloc); } /* Mask locations in BlastMaskLoc form are no longer needed. */ BlastMaskLocFree(mask_loc); if (masking_locs) { SeqLocPtr slp_var = masking_locs; SeqLocPtr last = NULL; while (slp_var) { if (slp_var->choice == kDeallocateMe) { if (last == NULL) { masking_locs = slp_var->next; slp_var->next = NULL; Blast_ValNodeMaskListFree(slp_var); slp_var = masking_locs; } else { last->next = slp_var->next; slp_var->next = NULL; Blast_ValNodeMaskListFree(slp_var); slp_var = last->next; } } else { last = slp_var; slp_var = slp_var->next; } } } status = LookupTableWrapInit(query, lookup_options, query_options, lookup_segments, sbp, &lookup_wrap, rps_info, &core_msg); if (core_msg) { extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query); core_msg = Blast_MessageFree(core_msg); } if (status) return status; /* For PHI BLAST, save information about pattern occurrences in query in the BlastQueryInfo structure. */ if (kPhiBlast) { SPHIPatternSearchBlk* pattern_blk = (SPHIPatternSearchBlk*) lookup_wrap->lut; Blast_SetPHIPatternInfo(kProgram, pattern_blk, query, lookup_segments, query_info, &core_msg); if (core_msg) { extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query); core_msg = Blast_MessageFree(core_msg); } } /* Only need for the setup of lookup table. */ lookup_segments = BlastSeqLocFree(lookup_segments); if ((status = s_BlastHSPStreamSetUp(query, query_info, seq_src, options, sbp, tf_data, &hsp_stream, extra_returns))) return status; if ((status = s_BlastThreadManager(query, query_info, seq_src, options, lookup_wrap, sbp, hsp_stream, rps_info, tf_data, results, extra_returns))) return status; lookup_wrap = LookupTableWrapFree(lookup_wrap); query = BlastSequenceBlkFree(query); query_info = BlastQueryInfoFree(query_info); BlastScoreBlkFree(sbp); if (kRpsBlast) s_RPSExtraStructsFree(rps_info, rps_mmap, rps_pssm_mmap, rps_options); return status; }
/* -- SSH -- Create lookup table for the large sequence, that represented by all collection of PSSM matrixes and dump this table to disk Used by RPS Blast. */ Boolean RPSCreateLookupFile(ScoreRow *combinedMatrix, Int4 numProfiles, Int4Ptr seqlens, CharPtr filename, Nlm_FloatHi scalingFactor) { BlastScoreBlk *sbp; FILE *fd; Int4 **posMatrix; Int4 start, i, header_size, all_length, magicNumber; Int4Ptr offsets; Int4 num_lookups; BlastSeqLoc *lookup_segment=NULL; BlastAaLookupTable *lookup; LookupTableWrap* lookup_wrap_ptr=NULL; LookupTableOptions* lookup_options; if((fd = FileOpen(filename, "wb")) == NULL) return FALSE; num_lookups = 1; /* Single lookup table for all set */ all_length = seqlens[numProfiles] - seqlens[0]; posMatrix = MemNew((all_length + 1) * sizeof(Int4 *)); for (i = 0; i < all_length; i++) { posMatrix[i] = (Int4 *) &(combinedMatrix[i][0]); } /* Last row is necessary */ posMatrix[all_length] = MemNew(sizeof(Int4) * PRO_ALPHABET_SIZE); for(i = 0; i < PRO_ALPHABET_SIZE; i++) { posMatrix[all_length][i] = -INT2_MAX; } sbp = BlastScoreBlkNew(BLASTAA_SEQ_CODE, 1); RPSPsiMatrixAttach(sbp, posMatrix); LookupTableOptionsNew(eBlastTypeBlastp, &lookup_options); BLAST_FillLookupTableOptions(lookup_options, eBlastTypePsiBlast, FALSE, (Int4) (myargs[3].floatvalue*scalingFactor), myargs[4].intvalue); BlastSeqLocNew(&lookup_segment, 0, all_length); /* Need query for psi-blast?? where to put the PSSM? */ LookupTableWrapInit(NULL, lookup_options, NULL, lookup_segment, sbp, &lookup_wrap_ptr, NULL, NULL); RPSPsiMatrixDetach(sbp); sbp = BlastScoreBlkFree(sbp); lookup_options = LookupTableOptionsFree(lookup_options); lookup_segment = BlastSeqLocFree(lookup_segment); lookup = (BlastAaLookupTable*) lookup_wrap_ptr->lut; /* Only Uint4 maximum length for lookup file allowed in current implementation */ header_size = (numProfiles+1)*sizeof(Int4) + 8*sizeof(Int4); /* Beginning of file will be allocated for lookup offsets */ fseek(fd, header_size, SEEK_SET); offsets = MemNew(sizeof(Int4) * (num_lookups + 1)); offsets[0] = ftell(fd); start = seqlens[0]; /* 0 */ RPSDumpLookupTable(lookup, fd); i = 1; offsets[i] = ftell(fd); /* Last offset also recorded */ fseek(fd, 0, SEEK_SET); magicNumber = RPS_MAGIC_NUMBER; FileWrite(&magicNumber, sizeof(Int4), 1, fd); /* header[0] */ FileWrite(&num_lookups, sizeof(Int4), 1, fd); /* header[1] */ FileWrite(&lookup->neighbor_matches, sizeof(Int4), 1, fd); /* header[2] */ FileWrite(&lookup->neighbor_matches, sizeof(Int4), 1, fd); /* header[3] */ FileWrite(&lookup->overflow_size, sizeof(Int4), 1, fd); /* header[4] */ /* Now writing recorded offsets in the beginning of the file */ fseek(fd, 8*sizeof(Int4), SEEK_SET); FileWrite(offsets, sizeof(Int4), num_lookups + 1, fd); FileClose(fd); /* Final memory cleenup */ MemFree(posMatrix[all_length]); MemFree(posMatrix); return TRUE; }