Uint4 
SplitQuery_CalculateNumChunks(EBlastProgramType program,
                              size_t *chunk_size, 
                              size_t concatenated_query_length,
                              size_t num_queries)
{
    if ( !SplitQuery_ShouldSplit(program, *chunk_size, 
                                 concatenated_query_length, num_queries)) {
        _TRACE("Not splitting queries");
        return 1;
    }

    size_t overlap_size = SplitQuery_GetOverlapChunkSize(program);
    Uint4 num_chunks = 0;

    _DEBUG_ARG(size_t target_chunk_size = *chunk_size);

    // For translated queries the chunk size should be divisible by CODON_LENGTH
    if (Blast_QueryIsTranslated(program)) {
        size_t chunk_size_delta = ((*chunk_size) % CODON_LENGTH);
        *chunk_size -= chunk_size_delta;
        _ASSERT((*chunk_size % CODON_LENGTH) == 0);
    }

    // Fix for small query size
    if ((*chunk_size) > overlap_size) {
       num_chunks = concatenated_query_length / ((*chunk_size) - overlap_size);
    }

    // Only one chunk, just return;
    if (num_chunks <= 1) {
       *chunk_size = concatenated_query_length;
       return 1;
    }

    // Re-adjust the chunk_size to make load even
    if (!Blast_QueryIsTranslated(program)) {
       *chunk_size = (concatenated_query_length + (num_chunks - 1) * overlap_size) / num_chunks;
       // Round up only if this will not decrease the number of chunks
       if (num_chunks < (*chunk_size) - overlap_size ) (*chunk_size)++;
    }

    _TRACE("Number of chunks: " << num_chunks << "; "
           "Target chunk size: " << target_chunk_size << "; "
           "Returned chunk size: " << *chunk_size);

    return num_chunks;
}
void
CQuerySplitter::x_ComputeChunkRanges()
{
    _ASSERT(m_SplitBlk.NotEmpty());

    // Note that this information might not need to be stored in the
    // SSplitQueryBlk structure, as these ranges can be calculated as follows:
    // chunk_start = (chunk_num*chunk_size) - (chunk_num*overlap_size);
    // chunk_end = chunk_start + chunk_size > query_size ? query_size :
    // chunk_start + chunk_size;

    size_t chunk_start = 0;
    const size_t kOverlapSize =
        SplitQuery_GetOverlapChunkSize(m_Options->GetProgramType());
    for (size_t chunk_num = 0; chunk_num < m_NumChunks; chunk_num++) {
        size_t chunk_end = chunk_start + m_ChunkSize;

        // if the chunk end is larger than the sequence ...
        if (chunk_end >= m_TotalQueryLength ||
            // ... or this is the last chunk and it didn't make it to the end
            // of the sequence
            (chunk_end < m_TotalQueryLength && (chunk_num + 1) == m_NumChunks))
        {
            // ... assign this chunk's end to the end of the sequence
            chunk_end = m_TotalQueryLength;
        }

        m_SplitBlk->SetChunkBounds(chunk_num, 
                                   TChunkRange(chunk_start, chunk_end));
        _TRACE("Chunk " << chunk_num << ": ranges from " << chunk_start 
               << " to " << chunk_end);

        chunk_start += (m_ChunkSize - kOverlapSize);
        if (chunk_start > m_TotalQueryLength || 
            chunk_end == m_TotalQueryLength) {
            break;
        }
    }

    // For purposes of having an accurate overlap size when stitching back
    // HSPs, save the overlap size
    const size_t kOverlap = 
        Blast_QueryIsTranslated(m_Options->GetProgramType()) 
        ? kOverlapSize / CODON_LENGTH : kOverlapSize;
    m_SplitBlk->SetChunkOverlapSize(kOverlap);
}
void
CQuerySplitter::x_ComputeContextOffsets_TranslatedQueries()
{
    _ASSERT( !m_QueryChunkFactories.empty() );

    const EBlastProgramType kProgram = m_Options->GetProgramType();
    _ASSERT(Blast_QueryIsTranslated(kProgram));
    const BlastQueryInfo* global_qinfo = m_LocalQueryData->GetQueryInfo();
#ifdef DEBUG_COMPARE_SEQUENCES
    const BLAST_SequenceBlk* global_seq = m_LocalQueryData->GetSequenceBlk();
#endif
    const size_t kOverlap = 
        SplitQuery_GetOverlapChunkSize(kProgram) / CODON_LENGTH;
    CContextTranslator ctx_translator(*m_SplitBlk, &m_QueryChunkFactories,
                                      m_Options);
    CQueryDataPerChunk qdpc(*m_SplitBlk, kProgram, m_LocalQueryData);
    vector<const BlastQueryInfo*> chunk_qinfo(m_NumChunks, 0);

    for (size_t chunk_num = 0; chunk_num < m_NumChunks; chunk_num++) {
        CRef<IQueryFactory> chunk_qf(m_QueryChunkFactories[chunk_num]);
        CRef<ILocalQueryData> chunk_qd(chunk_qf->MakeLocalQueryData(m_Options));
#ifdef DEBUG_COMPARE_SEQUENCES
        const BLAST_SequenceBlk* chunk_seq = chunk_qd->GetSequenceBlk();
#endif

        // BlastQueryInfo structure corresponding to chunk number chunk_num
        chunk_qinfo[chunk_num] = chunk_qd->GetQueryInfo();
        _ASSERT(chunk_qinfo[chunk_num]);

        // In case the first context differs from 0, for consistency with the
        // other data returned by this class...
        for (Int4 ctx = 0; ctx < chunk_qinfo[chunk_num]->first_context; ctx++) {
            m_SplitBlk->AddContextOffsetToChunk(chunk_num, INT4_MAX);
        }

        for (Int4 ctx = chunk_qinfo[chunk_num]->first_context; 
             ctx <= chunk_qinfo[chunk_num]->last_context; 
             ctx++) {

            size_t correction = 0;
            const int starting_chunk =
                ctx_translator.GetStartingChunk(chunk_num, ctx);
            const int absolute_context =
                ctx_translator.GetAbsoluteContext(chunk_num, ctx);
            const int last_query_chunk = qdpc.GetLastChunk(chunk_num, ctx);

            if (absolute_context == kInvalidContext || 
                starting_chunk == kInvalidContext) {
                _ASSERT( !chunk_qinfo[chunk_num]->contexts[ctx].is_valid );
                // INT4_MAX is the sentinel value for invalid contexts
                m_SplitBlk->AddContextOffsetToChunk(chunk_num, INT4_MAX);
                continue;
            }

            // The corrections for the contexts corresponding to the negative
            // strand in the last chunk of a query sequence are all 0
            if (!s_IsPlusStrand(chunk_qinfo[chunk_num], ctx) &&
                (chunk_num == (size_t)last_query_chunk) && 
                (ctx % NUM_FRAMES >= 3)) {
                correction = 0;
                goto error_check;
            }

            // The corrections for the contexts corresponding to the plus
            // strand are always the same, so only calculate the first one
            if (s_IsPlusStrand(chunk_qinfo[chunk_num], ctx) && 
                (ctx % NUM_FRAMES == 1 || ctx % NUM_FRAMES == 2)) {
                correction = m_SplitBlk->GetContextOffsets(chunk_num).back();
                goto error_check;
            }

            // If the query length is divisible by CODON_LENGTH, the
            // corrections for all contexts corresponding to a given strand are
            // the same, so only calculate the first one
            if ((qdpc.GetQueryLength(chunk_num, ctx) % CODON_LENGTH == 0) &&
                (ctx % NUM_FRAMES != 0) && (ctx % NUM_FRAMES != 3)) {
                correction = m_SplitBlk->GetContextOffsets(chunk_num).back();
                goto error_check;
            }

            // If the query length % CODON_LENGTH == 1, the corrections for the
            // first two contexts of the negative strand are the same, and the
            // correction for the last context is one more than that.
            if ((qdpc.GetQueryLength(chunk_num, ctx) % CODON_LENGTH == 1) &&
                !s_IsPlusStrand(chunk_qinfo[chunk_num], ctx)) {

                if (ctx % NUM_FRAMES == 4) {
                    correction = 
                        m_SplitBlk->GetContextOffsets(chunk_num).back();
                    goto error_check;
                } else if (ctx % NUM_FRAMES == 5) {
                    correction = 
                        m_SplitBlk->GetContextOffsets(chunk_num).back() + 1;
                    goto error_check;
                }
            }
                
            // If the query length % CODON_LENGTH == 2, the corrections for the
            // last two contexts of the negative strand are the same, which is
            // one more that the first context on the negative strand.
            if ((qdpc.GetQueryLength(chunk_num, ctx) % CODON_LENGTH == 2) &&
                !s_IsPlusStrand(chunk_qinfo[chunk_num], ctx)) {

                if (ctx % NUM_FRAMES == 4) {
                    correction = 
                        m_SplitBlk->GetContextOffsets(chunk_num).back() + 1;
                    goto error_check;
                } else if (ctx % NUM_FRAMES == 5) {
                    correction = 
                        m_SplitBlk->GetContextOffsets(chunk_num).back();
                    goto error_check;
                }
            }

            if (s_IsPlusStrand(chunk_qinfo[chunk_num], ctx)) {

                for (int c = chunk_num; c != starting_chunk; c--) {
                    size_t prev_len = s_GetAbsoluteContextLength(chunk_qinfo, 
                                                         c - 1,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t curr_len = s_GetAbsoluteContextLength(chunk_qinfo, c,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t overlap = min(kOverlap, curr_len);
                    correction += prev_len - min(overlap, prev_len);
                }

            } else {

                size_t subtrahend = 0;

                for (int c = chunk_num; c >= starting_chunk && c >= 0; c--) {
                    size_t prev_len = s_GetAbsoluteContextLength(chunk_qinfo, 
                                                         c - 1,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t curr_len = s_GetAbsoluteContextLength(chunk_qinfo,
                                                         c,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t overlap = min(kOverlap, curr_len);
                    subtrahend += (curr_len - min(overlap, prev_len));
                }
                correction =
                    global_qinfo->contexts[absolute_context].query_length -
                    subtrahend;
            }

error_check:
            _ASSERT((chunk_qinfo[chunk_num]->contexts[ctx].is_valid));
            m_SplitBlk->AddContextOffsetToChunk(chunk_num, correction);
#ifdef DEBUG_COMPARE_SEQUENCES
{
    int global_offset = global_qinfo->contexts[absolute_context].query_offset +
        correction;
    int chunk_offset = chunk_qinfo[chunk_num]->contexts[ctx].query_offset;
    int num_bases2compare = 
        min(10, chunk_qinfo[chunk_num]->contexts[ctx].query_length);
    if (!cmp_sequence(&global_seq->sequence[global_offset], 
                      &chunk_seq->sequence[chunk_offset], 
                      num_bases2compare, Blast_QueryIsProtein(kProgram))) {
        cerr << "Failed to compare sequence data for chunk " << chunk_num
             << ", context " << ctx << endl;
    }
}
#endif
        }
    }
    _TRACE("CContextTranslator contents: " << ctx_translator);
}
// Record the correction needed to synchronize with the complete query all the
// query offsets within HSPs that fall within this context
void
CQuerySplitter::x_ComputeContextOffsets_NonTranslatedQueries()
{
    _ASSERT( !m_QueryChunkFactories.empty() );

    const EBlastProgramType kProgram = m_Options->GetProgramType();
    _ASSERT( !Blast_QueryIsTranslated(kProgram) );
    const BlastQueryInfo* global_qinfo = m_LocalQueryData->GetQueryInfo();
#ifdef DEBUG_COMPARE_SEQUENCES
    const BLAST_SequenceBlk* global_seq = m_LocalQueryData->GetSequenceBlk();
#endif
    const size_t kOverlap = SplitQuery_GetOverlapChunkSize(kProgram);
    CContextTranslator ctx_translator(*m_SplitBlk, &m_QueryChunkFactories,
                                      m_Options);
    vector<const BlastQueryInfo*> chunk_qinfo(m_NumChunks, 0);

    for (size_t chunk_num = 0; chunk_num < m_NumChunks; chunk_num++) {
        CRef<IQueryFactory> chunk_qf(m_QueryChunkFactories[chunk_num]);
        CRef<ILocalQueryData> chunk_qd(chunk_qf->MakeLocalQueryData(m_Options));
#ifdef DEBUG_COMPARE_SEQUENCES
        const BLAST_SequenceBlk* chunk_seq = chunk_qd->GetSequenceBlk();
#endif

        // BlastQueryInfo structure corresponding to chunk number chunk_num
        chunk_qinfo[chunk_num] = chunk_qd->GetQueryInfo();
        _ASSERT(chunk_qinfo[chunk_num]);

        // In case the first context differs from 0, for consistency with the
        // other data returned by this class...
        for (Int4 ctx = 0; ctx < chunk_qinfo[chunk_num]->first_context; ctx++) {
            m_SplitBlk->AddContextOffsetToChunk(chunk_num, INT4_MAX);
        }

        for (Int4 ctx = chunk_qinfo[chunk_num]->first_context; 
             ctx <= chunk_qinfo[chunk_num]->last_context; 
             ctx++) {

            size_t correction = 0;
            const int starting_chunk =
                ctx_translator.GetStartingChunk(chunk_num, ctx);
            const int absolute_context =
                ctx_translator.GetAbsoluteContext(chunk_num, ctx);

            if (absolute_context == kInvalidContext || 
                starting_chunk == kInvalidContext) {
                _ASSERT( !chunk_qinfo[chunk_num]->contexts[ctx].is_valid );
                // INT4_MAX is the sentinel value for invalid contexts
                m_SplitBlk->AddContextOffsetToChunk(chunk_num, INT4_MAX);
                continue;
            }

            if (s_IsPlusStrand(chunk_qinfo[chunk_num], ctx)) {

                for (int c = chunk_num; c != starting_chunk; c--) {
                    size_t prev_len = s_GetAbsoluteContextLength(chunk_qinfo, 
                                                         c - 1,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t curr_len = s_GetAbsoluteContextLength(chunk_qinfo, c,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t overlap = min(kOverlap, curr_len);
                    correction += prev_len - min(overlap, prev_len);
                }

            } else {

                size_t subtrahend = 0;

                for (int c = chunk_num; c >= starting_chunk && c >= 0; c--) {
                    size_t prev_len = s_GetAbsoluteContextLength(chunk_qinfo, 
                                                         c - 1,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t curr_len = s_GetAbsoluteContextLength(chunk_qinfo,
                                                         c,
                                                         ctx_translator,
                                                         absolute_context);
                    size_t overlap = min(kOverlap, curr_len);
                    subtrahend += (curr_len - min(overlap, prev_len));
                }
                correction =
                    global_qinfo->contexts[absolute_context].query_length -
                    subtrahend;

            }
            _ASSERT((chunk_qinfo[chunk_num]->contexts[ctx].is_valid));
            m_SplitBlk->AddContextOffsetToChunk(chunk_num, correction);
#ifdef DEBUG_COMPARE_SEQUENCES
{
    int global_offset = global_qinfo->contexts[absolute_context].query_offset +
        correction;
    int chunk_offset = chunk_qinfo[chunk_num]->contexts[ctx].query_offset;
    if (!cmp_sequence(&global_seq->sequence[global_offset], 
                      &chunk_seq->sequence[chunk_offset], 10,
                      Blast_QueryIsProtein(kProgram))) {
        cerr << "Failed to compare sequence data!" << endl;
    }
}
#endif

        }
    }
    _TRACE("CContextTranslator contents: " << ctx_translator);
}