rc_t extract_statistic_from_row(statistic *self, row_input const *data) { rc_t rc = 0; spotgrp *sg; char const *spotgrp_base; uint32_t spotgrp_len; unsigned i; uint8_t lb = 4; unsigned hpr = 0; unsigned gcc = 0; if (data == NULL) { return RC(rcXF, rcFunction, rcExecuting, rcParam, rcNull); } if (self == NULL) { return RC(rcXF, rcFunction, rcExecuting, rcSelf, rcNull); } rc = validate_row_data(self, data); if (rc) return rc; spotgrp_base = data->spotgroup; spotgrp_len = data->spotgroup_len; if (spotgrp_base == NULL || spotgrp_len == 0) { spotgrp_base = ""; spotgrp_len = 0; } sg = find_spotgroup( self, spotgrp_base, spotgrp_len ); if ( sg == NULL ) { sg = make_spotgrp( spotgrp_base, spotgrp_len ); if ( sg == NULL ) { return RC( rcXF, rcFunction, rcExecuting, rcMemory, rcExhausted ); } else { rc = BSTreeInsert ( &self->spotgroups, (BSTNode *)sg, spotgroup_sort ); if (rc) return rc; } } for (i = 0; i < data->read_len && rc == 0; ++i) { unsigned const base = data->read[i]; unsigned dimer; if (base > 3) { dimer = 16; hpr = 0; } else { dimer = (lb > 3) ? 16 : ((lb << 2) | base); if (lb == base) ++hpr; else hpr = 0; } if (i > 0) rc = spotgroup_enter_values(sg, data->quality[i], dimer, gcc, hpr, data->base_pos_offset + i, CASE_MATCH); if (base == 1 || base == 2) ++gcc; if (i >= self->gc_window) { unsigned const out = data->read[i - self->gc_window]; if (out == 1 || out == 2) --gcc; } lb = base; } return rc; }
static rc_t loop_through_base_calls( spotgrp *sg, uint64_t *entries, uint32_t gc_window, char * read_ptr, /* points at begin of array */ uint32_t n_bases, uint8_t * qual_ptr, /* points at begin of array */ uint8_t * case_ptr, /* points at begin of array */ uint32_t base_pos_offset, uint8_t n_read, /* the number of the read (0/1) */ const int64_t row_id, const int32_t ofs ) { rc_t rc = 0; uint32_t base_pos; char prev_char; char * gc_ptr = read_ptr; uint8_t gc_content = 0; uint8_t hp_run = 0; uint8_t max_qual_value = 0; bool enter_value; uint8_t *saved_qual_ptr = qual_ptr; /* calculate the max. quality value, befor we loop through the bases a 2nd time */ for ( base_pos = 0; base_pos < n_bases; ++base_pos ) { if ( max_qual_value < *qual_ptr ) { max_qual_value = *qual_ptr; } qual_ptr += ofs; /* because of going from forward or reverse */ } /* restore qual_ptr */ qual_ptr = saved_qual_ptr; prev_char = 'N'; for ( base_pos = 0; base_pos < n_bases && rc == 0; ++base_pos ) { /* calculate the hp-run-count */ if ( prev_char == *read_ptr ) { hp_run++; assert( hp_run <= n_bases ); } else { hp_run = 0; } /* advance the "window" */ if ( base_pos >= ( gc_window + 1 ) ) { if ( *gc_ptr == 'G' || *gc_ptr == 'C' ) { assert( gc_content > 0 ); gc_content--; } gc_ptr++; } if ( case_ptr != NULL ) { enter_value = ( case_ptr[0] != CASE_IGNORE && case_ptr[ofs] != CASE_IGNORE ); } else { enter_value = true; } if ( enter_value ) { uint8_t case_value = CASE_MATCH; if ( case_ptr != NULL ) { case_value = case_ptr[ 1 ]; } rc = spotgroup_enter_values( sg, entries, *qual_ptr, dimer_2_bin( prev_char, *read_ptr ), gc_content, hp_run, max_qual_value, n_read, base_pos + base_pos_offset, case_value, row_id ); } /* handle the current base-position after the record was entered because we do not include the current base into the gc-content */ if ( *read_ptr == 'G' || *read_ptr == 'C' ) gc_content++; qual_ptr += ofs; prev_char = *read_ptr; read_ptr++; if ( case_ptr != NULL ) { case_ptr += ofs; } } return rc; }