Пример #1
0
rc_t extract_statistic_from_row(statistic *self, 
                                row_input const *data)
{
    rc_t rc = 0;
    spotgrp *sg;
    char const *spotgrp_base;
    uint32_t spotgrp_len;
    unsigned i;
    uint8_t lb = 4;
    unsigned hpr = 0;
    unsigned gcc = 0;
    
    if (data == NULL) {
        return RC(rcXF, rcFunction, rcExecuting, rcParam, rcNull);
    }
    if (self == NULL) {
        return RC(rcXF, rcFunction, rcExecuting, rcSelf, rcNull);
    }
    rc = validate_row_data(self, data);
    if (rc)
        return rc;
    
    spotgrp_base = data->spotgroup;
    spotgrp_len = data->spotgroup_len;
    
    if (spotgrp_base == NULL || spotgrp_len == 0) {
        spotgrp_base = "";
        spotgrp_len = 0;
    }

    sg = find_spotgroup( self, spotgrp_base, spotgrp_len );
    if ( sg == NULL )
    {
        sg = make_spotgrp( spotgrp_base, spotgrp_len );
        if ( sg == NULL )
        {
            return RC( rcXF, rcFunction, rcExecuting, rcMemory, rcExhausted );
        }
        else
        {
            rc = BSTreeInsert ( &self->spotgroups, (BSTNode *)sg, spotgroup_sort );
            if (rc)
                return rc;
        }
    }
    for (i = 0; i < data->read_len && rc == 0; ++i) {
        unsigned const base = data->read[i];
        unsigned dimer;

        if (base > 3) {
            dimer = 16;
            hpr = 0;
        }
        else {
            dimer = (lb > 3) ? 16 : ((lb << 2) | base);
            if (lb == base)
                ++hpr;
            else
                hpr = 0;
        }
        if (i > 0)
            rc = spotgroup_enter_values(sg, data->quality[i], dimer, gcc, hpr, data->base_pos_offset + i, CASE_MATCH);

        if (base == 1 || base == 2)
            ++gcc;
        if (i >= self->gc_window) {
            unsigned const out = data->read[i - self->gc_window];
            
            if (out == 1 || out == 2)
                --gcc;
        }
        lb = base;
	}
    return rc;
}
Пример #2
0
static rc_t loop_through_base_calls( spotgrp *sg,
                        uint64_t *entries,
                        uint32_t gc_window,
                        char * read_ptr,    /* points at begin of array */
                        uint32_t n_bases,
                        uint8_t * qual_ptr, /* points at begin of array */
                        uint8_t * case_ptr, /* points at begin of array */
                        uint32_t base_pos_offset,
                        uint8_t n_read,     /* the number of the read (0/1) */
                        const int64_t row_id,
                        const int32_t ofs )
{
    rc_t rc = 0;
    uint32_t base_pos;
    char prev_char;
    char * gc_ptr = read_ptr;
    uint8_t gc_content = 0;
    uint8_t hp_run = 0;
    uint8_t max_qual_value = 0;
    bool enter_value;
    uint8_t *saved_qual_ptr = qual_ptr;

    /* calculate the max. quality value, befor we loop through the bases a 2nd time */
    for ( base_pos = 0; base_pos < n_bases; ++base_pos )
    {
        if ( max_qual_value < *qual_ptr )
        {
            max_qual_value = *qual_ptr;
        }
        qual_ptr += ofs; /* because of going from forward or reverse */
    }
    /* restore qual_ptr */
    qual_ptr = saved_qual_ptr;

    prev_char = 'N';
    for ( base_pos = 0; base_pos < n_bases && rc == 0; ++base_pos )
    {
        /* calculate the hp-run-count */
        if ( prev_char == *read_ptr )
        {
            hp_run++;
            assert( hp_run <= n_bases );
        }
        else
        {
            hp_run = 0;
        }

        /* advance the "window" */
        if ( base_pos >= ( gc_window + 1 ) )
        {
            if ( *gc_ptr == 'G' || *gc_ptr == 'C' )
            {
                assert( gc_content > 0 );
                gc_content--;
            }
            gc_ptr++;
        }

        if ( case_ptr != NULL )
        {
            enter_value = ( case_ptr[0] != CASE_IGNORE && case_ptr[ofs] != CASE_IGNORE );
        }
        else
        {
            enter_value = true;
        }

        if ( enter_value )
        {
            uint8_t case_value = CASE_MATCH;
            if ( case_ptr != NULL )
            {
                case_value = case_ptr[ 1 ];
            }
            rc = spotgroup_enter_values( sg, entries,
                                         *qual_ptr,
                                         dimer_2_bin( prev_char, *read_ptr ),
                                         gc_content,
                                         hp_run,
                                         max_qual_value,
                                         n_read,
                                         base_pos + base_pos_offset,
                                         case_value,
                                         row_id );
        }

        /* handle the current base-position after the record was entered
           because we do not include the current base into the gc-content */
        if ( *read_ptr == 'G' || *read_ptr == 'C' )
            gc_content++;

        qual_ptr += ofs;
        prev_char = *read_ptr;
        read_ptr++;
        if ( case_ptr != NULL )
        {
            case_ptr += ofs;
        }
    }
    return rc;
}