Пример #1
0
void
OffsetArrayToContextOffsets(BlastQueryInfo    * info,
                            Int4              * new_offsets,
                            EBlastProgramType   prog)
{
    Uint4 count = (info->last_context + 1);
    Uint4 i     = 0;
    
    ASSERT(info);
    ASSERT(new_offsets);
    
    if (! info->contexts) {
        info->contexts = calloc(count, sizeof(BlastContextInfo));
    }
    
    for(i = 0; i < count; i++) {
        Int4 distance = 0;
        
        info->contexts[i].query_offset = new_offsets[i];
        
        distance = new_offsets[i+1] - new_offsets[i];
        info->contexts[i].query_length = distance ? distance-1 : 0;
        
        /* Set the frame and query index */
        
        info->contexts[i].frame =
            BLAST_ContextToFrame(prog, i);
        
        info->contexts[i].query_index =
            Blast_GetQueryIndexFromContext(i, prog);
    }
}
Пример #2
0
BlastQueryInfo* BlastQueryInfoNew(EBlastProgramType program, int num_queries)
{
    const unsigned int kNumContexts = BLAST_GetNumberOfContexts(program);
    BlastQueryInfo* retval = NULL;
    
    if (num_queries <= 0) {
        return retval;
    }
    ASSERT(kNumContexts != 0);

    retval = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
    if ( !retval ) {
        return BlastQueryInfoFree(retval);
    }

    retval->num_queries = num_queries;

    retval->first_context = 0;
    retval->last_context = retval->num_queries * kNumContexts - 1;

    retval->contexts = (BlastContextInfo*) calloc(retval->last_context + 1,
                                                  sizeof(BlastContextInfo));

    if ( !retval->contexts ) {
        return BlastQueryInfoFree(retval);
    } else {
        int i;
        for (i = 0; i < retval->last_context + 1; i++) {
            retval->contexts[i].query_index =
                Blast_GetQueryIndexFromContext(i, program);
            ASSERT(retval->contexts[i].query_index != -1);

            retval->contexts[i].frame = BLAST_ContextToFrame(program,  i);
            ASSERT(retval->contexts[i].frame != INT1_MAX);

            retval->contexts[i].is_valid = TRUE;
        }
    }

    return retval;
}
Пример #3
0
Int2 BlastMaskLocDNAToProtein(BlastMaskLoc* mask_loc, 
                              const BlastQueryInfo* query_info)
{
    Uint4 seq_index;
    BlastSeqLoc* dna_seqlocs[NUM_FRAMES];

    if (!mask_loc)
        return 0;

    /* Check that the array size in BlastMaskLoc corresponds to the number
       of contexts in BlastQueryInfo. */
    ASSERT(mask_loc->total_size == query_info->last_context + 1);

    /* Loop over multiple DNA sequences */
    for (seq_index = 0; seq_index < (Uint4)query_info->num_queries; 
         ++seq_index) { 
        const Uint4 ctx_idx = NUM_FRAMES * seq_index;
        const Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
                                                             eBlastTypeBlastx,
                                                             seq_index);
        Int4 context;

        /* Save the DNA masking locations, as they'll be freed and overwritten
         * by their translations */
        memset((void*) &dna_seqlocs, 0, sizeof(dna_seqlocs));
        memcpy((void*) &dna_seqlocs, 
               (void*) &mask_loc->seqloc_array[ctx_idx], 
               sizeof(dna_seqlocs));
        memset((void*) &mask_loc->seqloc_array[ctx_idx], 0, sizeof(dna_seqlocs));

        /* Reproduce this mask for all 6 frames, with translated coordinates */
        for (context = 0; context < NUM_FRAMES; ++context) {
            const Int2 frame = BLAST_ContextToFrame(eBlastTypeBlastx, context);
            BlastSeqLoc* frame_seqloc = dna_seqlocs[context];
            BlastSeqLoc* prot_tail = NULL;
            BlastSeqLoc* itr = NULL;

            /* If no masks were provided for some frames, use the first one */
            if (frame_seqloc == NULL && dna_seqlocs[0]) {
                frame_seqloc = dna_seqlocs[0];
            }
            for (itr = frame_seqloc; itr; itr = itr->next) {
                Int4 from, to;
                SSeqRange* seq_range = itr->ssr;
                /* masks should be 0-offset */
                ASSERT(seq_range->right < dna_length);
                ASSERT(seq_range->left  >= 0);
                if (frame < 0) {
                    from = (dna_length + frame - seq_range->right)/CODON_LENGTH;
                    to = (dna_length + frame - seq_range->left)/CODON_LENGTH;
                } else {
                    from = (seq_range->left - frame + 1)/CODON_LENGTH;
                    to = (seq_range->right - frame + 1)/CODON_LENGTH;
                }

                if (from < 0)
                    from = 0;
                if (to   < 0)
                    to   = 0;
                if (from >= query_info->contexts[ctx_idx+context].query_length)
                    from = query_info->contexts[ctx_idx+context].query_length - 1;
                if (to >= query_info->contexts[ctx_idx+context].query_length)
                    to = query_info->contexts[ctx_idx+context].query_length - 1;

                ASSERT(from >= 0);
                ASSERT(to   >= 0);
                ASSERT(from < query_info->contexts[ctx_idx+context].query_length);
                ASSERT(to   < query_info->contexts[ctx_idx+context].query_length);

                /* Cache the tail of the list to avoid the overhead of
                 * traversing the list when appending to it */
                prot_tail = BlastSeqLocNew((prot_tail 
                            ? & prot_tail 
                            : & mask_loc->seqloc_array[ctx_idx+context]), 
                            from, to);
            }
        }
        for (context = 0; context < NUM_FRAMES; ++context) {
            BlastSeqLocFree(dna_seqlocs[context]);
        }
    }

    return 0;
}
Пример #4
0
Int2 BlastMaskLocProteinToDNA(BlastMaskLoc* mask_loc, 
                              const BlastQueryInfo* query_info)
{
   Int2 status = 0;
   Int4 index;

   /* If there is not mask, there is nothing to convert to DNA coordinates,
      hence just return. */
   if (!mask_loc) 
      return 0;

   /* Check that the array size in BlastMaskLoc corresponds to the number
      of contexts in BlastQueryInfo. */
   ASSERT(mask_loc->total_size == query_info->last_context + 1);

   /* Loop over all DNA sequences */
   for (index=0; index < query_info->num_queries; ++index)
   {
       Int4 frame_start = index*NUM_FRAMES;
       Int4 frame_index;
       Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
                                                      eBlastTypeBlastx,
                                                      index);
       /* Loop over all frames of one DNA sequence */
       for (frame_index=frame_start; frame_index<(frame_start+NUM_FRAMES); 
            frame_index++) {
           BlastSeqLoc* loc;
           Int2 frame = 
               BLAST_ContextToFrame(eBlastTypeBlastx, frame_index % NUM_FRAMES);
           /* Loop over all mask locations for a given frame */
           for (loc = mask_loc->seqloc_array[frame_index]; loc; loc = loc->next) {
               Int4 from=0, to=0;
               SSeqRange* seq_range = loc->ssr;
               if (frame < 0) {
                   to = dna_length - CODON_LENGTH*seq_range->left + frame;
                   from = dna_length - CODON_LENGTH*seq_range->right + frame + 1;
               } else {
                   from = CODON_LENGTH*seq_range->left + frame - 1;
                   to = CODON_LENGTH*seq_range->right + frame - 1;
               }

               if (from < 0)
                   from = 0;
               if (to   < 0)
                   to   = 0;
               if (from >= dna_length)
                   from = dna_length - 1;
               if (to   >= dna_length)
                   to   = dna_length - 1;
                   
               ASSERT(from >= 0);
               ASSERT(to   >= 0);
               ASSERT(from < dna_length);
               ASSERT(to   < dna_length);

               seq_range->left = from;
               seq_range->right = to;
           }
       }
   }
   return status;
}
Пример #5
0
/** Merge two HSPStreams. The HSPs from the first stream are
 *  moved to the second stream.
 * @param squery_blk Structure controlling the merge process [in]
 * @param chunk_num Unique integer assigned to hsp_stream [in]
 * @param stream1 The stream to merge [in][out]
 * @param stream2 The stream that will contain the
 *         HSPLists of the first stream [in][out]
 */
int BlastHSPStreamMerge(SSplitQueryBlk *squery_blk,
                             Uint4 chunk_num,
                             BlastHSPStream* stream1,
                             BlastHSPStream* stream2)
{
   Int4 i, j, k;
   BlastHSPResults *results1 = NULL;
   BlastHSPResults *results2 = NULL;
   Int4 contexts_per_query = 0;
#ifdef _DEBUG
   Int4 num_queries = 0, num_ctx = 0, num_ctx_offsets = 0;
   Int4 max_ctx;
#endif
   
   Uint4 *query_list = NULL, *offset_list = NULL, num_contexts = 0;
   Int4 *context_list = NULL;


   if (!stream1 || !stream2) 
       return kBlastHSPStream_Error;

   s_FinalizeWriter(stream1);
   s_FinalizeWriter(stream2);

   results1 = stream1->results;
   results2 = stream2->results;

   contexts_per_query = BLAST_GetNumberOfContexts(stream2->program);

   SplitQueryBlk_GetQueryIndicesForChunk(squery_blk, chunk_num, &query_list);
   SplitQueryBlk_GetQueryContextsForChunk(squery_blk, chunk_num, 
                                          &context_list, &num_contexts);
   SplitQueryBlk_GetContextOffsetsForChunk(squery_blk, chunk_num, &offset_list);

#if defined(_DEBUG_VERBOSE)
   fprintf(stderr, "Chunk %d\n", chunk_num);
   fprintf(stderr, "Queries : ");
   for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++)
       fprintf(stderr, "%d ", query_list[num_queries]);
   fprintf(stderr, "\n");
   fprintf(stderr, "Contexts : ");
   for (num_ctx = 0; num_ctx < num_contexts; num_ctx++)
       fprintf(stderr, "%d ", context_list[num_ctx]);
   fprintf(stderr, "\n");
   fprintf(stderr, "Context starting offsets : ");
   for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
        num_ctx_offsets++)
       fprintf(stderr, "%d ", offset_list[num_ctx_offsets]);
   fprintf(stderr, "\n");
#elif defined(_DEBUG)
   for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++) ;
   for (num_ctx = 0, max_ctx = INT4_MIN; num_ctx < num_contexts; num_ctx++) 
       max_ctx = MAX(max_ctx, context_list[num_ctx]);
   for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
        num_ctx_offsets++) ;
#endif

   for (i = 0; i < results1->num_queries; i++) {
       BlastHitList *hitlist = results1->hitlist_array[i];
       Int4 global_query = query_list[i];
       Int4 split_points[NUM_FRAMES];
#ifdef _DEBUG
       ASSERT(i < num_queries);
#endif

       if (hitlist == NULL) {
#if defined(_DEBUG_VERBOSE)
fprintf(stderr, "No hits to query %d\n", global_query);
#endif
           continue;
       }

       /* we will be mapping HSPs from the local context to
          their place on the unsplit concatenated query. Once
          that's done, overlapping HSPs need to get merged, and
          to do that we must know the offset within each context
          where the last chunk ended and the current chunk begins */
       for (j = 0; j < contexts_per_query; j++) {
           split_points[j] = -1;
       }

       for (j = 0; j < contexts_per_query; j++) {
           Int4 local_context = i * contexts_per_query + j;
           if (context_list[local_context] >= 0) {
               split_points[context_list[local_context] % contexts_per_query] = 
                                offset_list[local_context];
           }
       }

#if defined(_DEBUG_VERBOSE)
       fprintf(stderr, "query %d split points: ", i);
       for (j = 0; j < contexts_per_query; j++) {
           fprintf(stderr, "%d ", split_points[j]);
       }
       fprintf(stderr, "\n");
#endif

       for (j = 0; j < hitlist->hsplist_count; j++) {
           BlastHSPList *hsplist = hitlist->hsplist_array[j];

           for (k = 0; k < hsplist->hspcnt; k++) {
               BlastHSP *hsp = hsplist->hsp_array[k];
               Int4 local_context = hsp->context;
#ifdef _DEBUG
               ASSERT(local_context <= max_ctx);
               ASSERT(local_context < num_ctx);
               ASSERT(local_context < num_ctx_offsets);
#endif

               hsp->context = context_list[local_context];
               hsp->query.offset += offset_list[local_context];
               hsp->query.end += offset_list[local_context];
               hsp->query.gapped_start += offset_list[local_context];
               hsp->query.frame = BLAST_ContextToFrame(stream2->program,
                                                       hsp->context);
           }

           hsplist->query_index = global_query;
       }

       Blast_HitListMerge(results1->hitlist_array + i,
                          results2->hitlist_array + global_query,
                          contexts_per_query, split_points,
                          SplitQueryBlk_GetChunkOverlapSize(squery_blk),
                          SplitQueryBlk_AllowGap(squery_blk));
   }

   /* Sort to the canonical order, which the merge may not have done. */
   for (i = 0; i < results2->num_queries; i++) {
       BlastHitList *hitlist = results2->hitlist_array[i];
       if (hitlist == NULL)
           continue;

       for (j = 0; j < hitlist->hsplist_count; j++)
           Blast_HSPListSortByScore(hitlist->hsplist_array[j]);
   }

   stream2->results_sorted = FALSE;

#if _DEBUG_VERBOSE
   fprintf(stderr, "new results: %d queries\n", results2->num_queries);
   for (i = 0; i < results2->num_queries; i++) {
       BlastHitList *hitlist = results2->hitlist_array[i];
       if (hitlist == NULL)
           continue;

       for (j = 0; j < hitlist->hsplist_count; j++) {
           BlastHSPList *hsplist = hitlist->hsplist_array[j];
           fprintf(stderr, 
                   "query %d OID %d\n", hsplist->query_index, hsplist->oid);

           for (k = 0; k < hsplist->hspcnt; k++) {
               BlastHSP *hsp = hsplist->hsp_array[k];
               fprintf(stderr, "c %d q %d-%d s %d-%d score %d\n", hsp->context,
                      hsp->query.offset, hsp->query.end,
                      hsp->subject.offset, hsp->subject.end,
                      hsp->score);
           }
       }
   }
#endif

   sfree(query_list);
   sfree(context_list);
   sfree(offset_list);

   return kBlastHSPStream_Success;
}