Exemplo n.º 1
0
void
BlastQueryInfoSetEffSearchSpace(BlastQueryInfo* qinfo,
                                EBlastProgramType program,
                                Int4 query_index,
                                Int8 eff_searchsp)
{
    Int4 i = 0;
    const Int4 kNumContexts = (Int4)BLAST_GetNumberOfContexts(program);
    ASSERT(query_index < qinfo->num_queries);

    for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
        qinfo->contexts[i].eff_searchsp = eff_searchsp;
    }
}
Exemplo n.º 2
0
/* FIXME: should the EBlastProgramType be added as a member of the
 * BlastQueryInfo structure? Without it, there's many operations that can't be
 * done, so it doesn't make sense to have them separate... */
Int8
BlastQueryInfoGetEffSearchSpace(const BlastQueryInfo* qinfo,
                                EBlastProgramType program,
                                Int4 query_index)
{
    Int8 retval = 0;
    Int4 i = 0;
    const Int4 kNumContexts = (Int4)BLAST_GetNumberOfContexts(program);
    ASSERT(query_index < qinfo->num_queries);

    for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
        if ( (retval = qinfo->contexts[i].eff_searchsp) != 0) {
            break;
        }
    }
    return retval;
}
Exemplo n.º 3
0
CBlastAncillaryData::CBlastAncillaryData(EBlastProgramType program_type,
                    int query_number,
                    const BlastScoreBlk *sbp,
                    const BlastQueryInfo *query_info)
: m_GumbelBlk(0), m_UngappedKarlinBlk(0), m_GappedKarlinBlk(0), m_PsiUngappedKarlinBlk(0),
  m_PsiGappedKarlinBlk(0), m_SearchSpace(0), m_LengthAdjustment(0)
{
    int i;
    int context_per_query = BLAST_GetNumberOfContexts(program_type);

    // find the first valid context corresponding to this query
    for (i = 0; i < context_per_query; i++) {
        BlastContextInfo *ctx = query_info->contexts + 
                                query_number * context_per_query + i;
        if (ctx->is_valid) {
            m_SearchSpace = ctx->eff_searchsp;
	    m_LengthAdjustment = ctx->length_adjustment;
            break;
        }
    }
    if (i >= context_per_query) {
        return; // we didn't find a valid context :(
    }

    // fill in the Karlin blocks for that context, if they
    // are valid
    const int ctx_index = query_number * context_per_query + i;
    if (sbp->kbp_std) {
        s_InitializeKarlinBlk(sbp->kbp_std[ctx_index], &m_UngappedKarlinBlk);
    }
    if (sbp->kbp_gap) {
        s_InitializeKarlinBlk(sbp->kbp_gap[ctx_index], &m_GappedKarlinBlk);
    }
    if (sbp->kbp_psi) {
        s_InitializeKarlinBlk(sbp->kbp_psi[ctx_index], &m_PsiUngappedKarlinBlk);
    }
    if (sbp->kbp_gap_psi) {
        s_InitializeKarlinBlk(sbp->kbp_gap_psi[ctx_index], 
                              &m_PsiGappedKarlinBlk);
    }
    if (sbp->gbp) {
        s_InitializeGumbelBlk(sbp->gbp, &m_GumbelBlk);
    }
}
Exemplo n.º 4
0
Int2
BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk* query_blk, 
                                 const BlastQueryInfo* query_info, 
                                 EBlastProgramType program_number, 
                                 const SBlastFilterOptions* filter_options, 
                                 BlastMaskLoc** filter_maskloc, 
                                 Blast_Message** blast_message)
{
    Int2 status = 0;
    Int4 context = 0; /* loop variable. */
    const int kNumContexts = query_info->last_context + 1;

    ASSERT(query_info && query_blk && filter_maskloc);

    ASSERT(blast_message);
    ASSERT(kNumContexts == 
           query_info->num_queries*BLAST_GetNumberOfContexts(program_number));
    *filter_maskloc = BlastMaskLocNew(kNumContexts);

    for (context = query_info->first_context;
         context <= query_info->last_context; ++context) {
  
        BlastSeqLoc *filter_per_context = NULL;
        status = s_GetFilteringLocationsForOneContext(query_blk, 
                                                      query_info, 
                                                      context, 
                                                      program_number, 
                                                      filter_options, 
                                                      &filter_per_context, 
                                                      blast_message);
        if (status) {
            Blast_MessageWrite(blast_message, eBlastSevError, context,
                                   "Failure at filtering");
            return status;
        }

    /* NB: for translated searches filter locations are returned in 
           protein coordinates, because the DNA lengths of sequences are 
           not available here. The caller must take care of converting 
           them back to nucleotide coordinates. */
         (*filter_maskloc)->seqloc_array[context] = filter_per_context;
    }
    return 0;
}
Exemplo n.º 5
0
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo* qinfo,
                                  EBlastProgramType program,
                                  Int4 query_index)
{
    const Uint4 kNumContexts = BLAST_GetNumberOfContexts(program);
    ASSERT(query_index < qinfo->num_queries);

    if (Blast_QueryIsTranslated(program)) {
        return s_GetTranslatedQueryDNALength(qinfo, query_index);
    } else if (program == eBlastTypeBlastn) {
        Int4 retval = qinfo->contexts[query_index*kNumContexts].query_length;
        if (retval <= 0) {
            retval = qinfo->contexts[query_index*kNumContexts+1].query_length;
        }
        return retval;
    } else {
        return qinfo->contexts[query_index*kNumContexts].query_length;
    }
}
Exemplo n.º 6
0
BlastQueryInfo* BlastQueryInfoNew(EBlastProgramType program, int num_queries)
{
    const unsigned int kNumContexts = BLAST_GetNumberOfContexts(program);
    BlastQueryInfo* retval = NULL;
    
    if (num_queries <= 0) {
        return retval;
    }
    ASSERT(kNumContexts != 0);

    retval = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
    if ( !retval ) {
        return BlastQueryInfoFree(retval);
    }

    retval->num_queries = num_queries;

    retval->first_context = 0;
    retval->last_context = retval->num_queries * kNumContexts - 1;

    retval->contexts = (BlastContextInfo*) calloc(retval->last_context + 1,
                                                  sizeof(BlastContextInfo));

    if ( !retval->contexts ) {
        return BlastQueryInfoFree(retval);
    } else {
        int i;
        for (i = 0; i < retval->last_context + 1; i++) {
            retval->contexts[i].query_index =
                Blast_GetQueryIndexFromContext(i, program);
            ASSERT(retval->contexts[i].query_index != -1);

            retval->contexts[i].frame = BLAST_ContextToFrame(program,  i);
            ASSERT(retval->contexts[i].frame != INT1_MAX);

            retval->contexts[i].is_valid = TRUE;
        }
    }

    return retval;
}
Exemplo n.º 7
0
/** Merge two HSPStreams. The HSPs from the first stream are
 *  moved to the second stream.
 * @param squery_blk Structure controlling the merge process [in]
 * @param chunk_num Unique integer assigned to hsp_stream [in]
 * @param stream1 The stream to merge [in][out]
 * @param stream2 The stream that will contain the
 *         HSPLists of the first stream [in][out]
 */
int BlastHSPStreamMerge(SSplitQueryBlk *squery_blk,
                             Uint4 chunk_num,
                             BlastHSPStream* stream1,
                             BlastHSPStream* stream2)
{
   Int4 i, j, k;
   BlastHSPResults *results1 = NULL;
   BlastHSPResults *results2 = NULL;
   Int4 contexts_per_query = 0;
#ifdef _DEBUG
   Int4 num_queries = 0, num_ctx = 0, num_ctx_offsets = 0;
   Int4 max_ctx;
#endif
   
   Uint4 *query_list = NULL, *offset_list = NULL, num_contexts = 0;
   Int4 *context_list = NULL;


   if (!stream1 || !stream2) 
       return kBlastHSPStream_Error;

   s_FinalizeWriter(stream1);
   s_FinalizeWriter(stream2);

   results1 = stream1->results;
   results2 = stream2->results;

   contexts_per_query = BLAST_GetNumberOfContexts(stream2->program);

   SplitQueryBlk_GetQueryIndicesForChunk(squery_blk, chunk_num, &query_list);
   SplitQueryBlk_GetQueryContextsForChunk(squery_blk, chunk_num, 
                                          &context_list, &num_contexts);
   SplitQueryBlk_GetContextOffsetsForChunk(squery_blk, chunk_num, &offset_list);

#if defined(_DEBUG_VERBOSE)
   fprintf(stderr, "Chunk %d\n", chunk_num);
   fprintf(stderr, "Queries : ");
   for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++)
       fprintf(stderr, "%d ", query_list[num_queries]);
   fprintf(stderr, "\n");
   fprintf(stderr, "Contexts : ");
   for (num_ctx = 0; num_ctx < num_contexts; num_ctx++)
       fprintf(stderr, "%d ", context_list[num_ctx]);
   fprintf(stderr, "\n");
   fprintf(stderr, "Context starting offsets : ");
   for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
        num_ctx_offsets++)
       fprintf(stderr, "%d ", offset_list[num_ctx_offsets]);
   fprintf(stderr, "\n");
#elif defined(_DEBUG)
   for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++) ;
   for (num_ctx = 0, max_ctx = INT4_MIN; num_ctx < num_contexts; num_ctx++) 
       max_ctx = MAX(max_ctx, context_list[num_ctx]);
   for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
        num_ctx_offsets++) ;
#endif

   for (i = 0; i < results1->num_queries; i++) {
       BlastHitList *hitlist = results1->hitlist_array[i];
       Int4 global_query = query_list[i];
       Int4 split_points[NUM_FRAMES];
#ifdef _DEBUG
       ASSERT(i < num_queries);
#endif

       if (hitlist == NULL) {
#if defined(_DEBUG_VERBOSE)
fprintf(stderr, "No hits to query %d\n", global_query);
#endif
           continue;
       }

       /* we will be mapping HSPs from the local context to
          their place on the unsplit concatenated query. Once
          that's done, overlapping HSPs need to get merged, and
          to do that we must know the offset within each context
          where the last chunk ended and the current chunk begins */
       for (j = 0; j < contexts_per_query; j++) {
           split_points[j] = -1;
       }

       for (j = 0; j < contexts_per_query; j++) {
           Int4 local_context = i * contexts_per_query + j;
           if (context_list[local_context] >= 0) {
               split_points[context_list[local_context] % contexts_per_query] = 
                                offset_list[local_context];
           }
       }

#if defined(_DEBUG_VERBOSE)
       fprintf(stderr, "query %d split points: ", i);
       for (j = 0; j < contexts_per_query; j++) {
           fprintf(stderr, "%d ", split_points[j]);
       }
       fprintf(stderr, "\n");
#endif

       for (j = 0; j < hitlist->hsplist_count; j++) {
           BlastHSPList *hsplist = hitlist->hsplist_array[j];

           for (k = 0; k < hsplist->hspcnt; k++) {
               BlastHSP *hsp = hsplist->hsp_array[k];
               Int4 local_context = hsp->context;
#ifdef _DEBUG
               ASSERT(local_context <= max_ctx);
               ASSERT(local_context < num_ctx);
               ASSERT(local_context < num_ctx_offsets);
#endif

               hsp->context = context_list[local_context];
               hsp->query.offset += offset_list[local_context];
               hsp->query.end += offset_list[local_context];
               hsp->query.gapped_start += offset_list[local_context];
               hsp->query.frame = BLAST_ContextToFrame(stream2->program,
                                                       hsp->context);
           }

           hsplist->query_index = global_query;
       }

       Blast_HitListMerge(results1->hitlist_array + i,
                          results2->hitlist_array + global_query,
                          contexts_per_query, split_points,
                          SplitQueryBlk_GetChunkOverlapSize(squery_blk),
                          SplitQueryBlk_AllowGap(squery_blk));
   }

   /* Sort to the canonical order, which the merge may not have done. */
   for (i = 0; i < results2->num_queries; i++) {
       BlastHitList *hitlist = results2->hitlist_array[i];
       if (hitlist == NULL)
           continue;

       for (j = 0; j < hitlist->hsplist_count; j++)
           Blast_HSPListSortByScore(hitlist->hsplist_array[j]);
   }

   stream2->results_sorted = FALSE;

#if _DEBUG_VERBOSE
   fprintf(stderr, "new results: %d queries\n", results2->num_queries);
   for (i = 0; i < results2->num_queries; i++) {
       BlastHitList *hitlist = results2->hitlist_array[i];
       if (hitlist == NULL)
           continue;

       for (j = 0; j < hitlist->hsplist_count; j++) {
           BlastHSPList *hsplist = hitlist->hsplist_array[j];
           fprintf(stderr, 
                   "query %d OID %d\n", hsplist->query_index, hsplist->oid);

           for (k = 0; k < hsplist->hspcnt; k++) {
               BlastHSP *hsp = hsplist->hsp_array[k];
               fprintf(stderr, "c %d q %d-%d s %d-%d score %d\n", hsp->context,
                      hsp->query.offset, hsp->query.end,
                      hsp->subject.offset, hsp->subject.end,
                      hsp->score);
           }
       }
   }
#endif

   sfree(query_list);
   sfree(context_list);
   sfree(offset_list);

   return kBlastHSPStream_Success;
}