static void count_indel_fragment( BSTree * fragments, const INSDC_4na_bin *bases, uint32_t len ) { find_fragment_ctx fctx; fctx.bases = malloc( len ); if ( fctx.bases != NULL ) { indel_fragment * fragment; uint32_t i; fctx.len = len; for ( i = 0; i < len; ++i ) ( ( char * )fctx.bases )[ i ] = _4na_to_ascii( bases[ i ], false ); fragment = ( indel_fragment * ) BSTreeFind ( fragments, &fctx, cmp_fragment_vs_find_ctx ); if ( fragment == NULL ) { fragment = make_indel_fragment( fctx.bases, len ); if ( fragment != NULL ) { rc_t rc = BSTreeInsert ( fragments, ( BSTNode * )fragment, cmp_fragment_vs_fragment ); if ( rc != 0 ) free_indel_fragment( ( BSTNode * )fragment, NULL ); } } else fragment->count++; free( ( void * ) fctx.bases ); } }
static void walk_counter_state( ReferenceIterator *ref_iter, int32_t state, bool reverse, pileup_counters * counters ) { if ( ( state & align_iter_invalid ) == align_iter_invalid ) return; if ( ( state & align_iter_skip ) != align_iter_skip ) { if ( ( state & align_iter_match ) == align_iter_match ) (counters->matches)++; else { char c = _4na_to_ascii( state, false ); switch( c ) { case 'A' : ( counters->mismatches[ 0 ] )++; break; case 'C' : ( counters->mismatches[ 1 ] )++; break; case 'G' : ( counters->mismatches[ 2 ] )++; break; case 'T' : ( counters->mismatches[ 3 ] )++; break; } } } if ( reverse ) (counters->reverse)++; else (counters->forward)++; if ( ( state & align_iter_insert ) == align_iter_insert ) { const INSDC_4na_bin *bases; uint32_t n = ReferenceIteratorBasesInserted ( ref_iter, &bases ); (counters->inserts) += n; count_indel_fragment( &(counters->insert_fragments), bases, n ); } if ( ( state & align_iter_delete ) == align_iter_delete ) { const INSDC_4na_bin *bases; INSDC_coord_zero ref_pos; uint32_t n = ReferenceIteratorBasesDeleted ( ref_iter, &ref_pos, &bases ); if ( bases != NULL ) { (counters->deletes) += n; count_indel_fragment( &(counters->delete_fragments), bases, n ); free( (void *) bases ); } } if ( ( state & align_iter_first ) == align_iter_first ) ( counters->starting)++; if ( ( state & align_iter_last ) == align_iter_last ) ( counters->ending)++; }
char * dup_2_ascii( const INSDC_4na_bin * b, size_t len, bool reverse ) { char * res = malloc( len + 1 ); if ( res != NULL ) { uint32_t i; for ( i = 0; i < len; ++i ) res[ i ] = _4na_to_ascii( b[ i ], reverse ); res[ i ] = 0; } return res; }
static rc_t walk_ref_iter_position( ReferenceIterator *ref_iter, struct ReferenceObj const * refobj, bool skip_empty, bool nodebug ) { INSDC_coord_zero pos; uint32_t depth; INSDC_4na_bin base; rc_t rc = ReferenceIteratorPosition ( ref_iter, &pos, &depth, &base ); if ( rc != 0 ) { if ( GetRCState( rc ) != rcDone ) LOGERR( klogInt, rc, "ReferenceIteratorNextPos() failed" ); } else { rc_t rc1 = 0; const char * reference_name = NULL; char c = _4na_to_ascii( base, false ); rc = ReferenceObj_SeqId( refobj, &reference_name ); if ( rc == 0 ) { OUTMSG(( "%s\t%u\t%c\t%u", reference_name, pos, c, depth )); if ( depth > 0 ) { const PlacementRecord *rec; rc1 = ReferenceIteratorNextPlacement ( ref_iter, &rec ); if ( rc1 == 0 ) { uint8_t qualities[ 4096 ]; uint32_t i = 0; OUTMSG(( "\t" )); while ( rc1 == 0 ) { handle_base_pos( ref_iter, rec, &( qualities[ i++ ] ), nodebug ); rc1 = ReferenceIteratorNextPlacement ( ref_iter, &rec ); } OUTMSG(( "\t" )); for ( i = 0; i < depth; ++i ) { char c = ( qualities[ i ] + 33 ); OUTMSG(( "%c", c )); } } } OUTMSG(( "\n" )); } if ( GetRCState( rc1 ) == rcDone ) rc1 = 0; rc = rc1; } return rc; }
static rc_t print_counter_line( const char * ref_name, INSDC_coord_zero ref_pos, INSDC_4na_bin ref_base, uint32_t depth, pileup_counters * counters ) { char c = _4na_to_ascii( ref_base, false ); rc_t rc = KOutMsg( "%s\t%u\t%c\t%u\t", ref_name, ref_pos + 1, c, depth ); if ( rc == 0 && counters->matches > 0 ) rc = KOutMsg( "%u", counters->matches ); if ( rc == 0 /* && counters->mismatches[ 0 ] > 0 */ ) rc = KOutMsg( "\t%u-A", counters->mismatches[ 0 ] ); if ( rc == 0 /* && counters->mismatches[ 1 ] > 0 */ ) rc = KOutMsg( "\t%u-C", counters->mismatches[ 1 ] ); if ( rc == 0 /* && counters->mismatches[ 2 ] > 0 */ ) rc = KOutMsg( "\t%u-G", counters->mismatches[ 2 ] ); if ( rc == 0 /* && counters->mismatches[ 3 ] > 0 */ ) rc = KOutMsg( "\t%u-T", counters->mismatches[ 3 ] ); if ( rc == 0 ) rc = KOutMsg( "\tI:" ); if ( rc == 0 ) rc = print_fragments( &(counters->insert_fragments) ); if ( rc == 0 ) rc = KOutMsg( "\tD:" ); if ( rc == 0 ) rc = print_fragments( &(counters->delete_fragments) ); if ( rc == 0 ) rc = KOutMsg( "\t%u%%", percent( counters->forward, counters->reverse ) ); if ( rc == 0 && counters->starting > 0 ) rc = KOutMsg( "\tS%u", counters->starting ); if ( rc == 0 && counters->ending > 0 ) rc = KOutMsg( "\tE%u", counters->ending ); if ( rc == 0 ) rc = KOutMsg( "\n" ); free_fragments( &(counters->insert_fragments) ); free_fragments( &(counters->delete_fragments) ); return rc; }
static rc_t ref_walker_walk_alignment( struct ref_walker * self, ReferenceIterator * ref_iter, const PlacementRecord * rec, ref_walker_data * rwd ) { walker_rec * xrec = PlacementRecordCast ( rec, placementRecordExtension1 ); rwd->state = ReferenceIteratorState ( ref_iter, &rwd->seq_pos ); rwd->valid = ( ( rwd->state & align_iter_invalid ) == 0 ); rwd->reverse = xrec->reverse; rwd->first = ( ( rwd->state & align_iter_first ) == align_iter_first ); rwd->last = ( ( rwd->state & align_iter_last ) == align_iter_last ); rwd->skip = ( ( rwd->state & align_iter_skip ) == align_iter_skip ); rwd->match = ( ( rwd->state & align_iter_match ) == align_iter_match ); rwd->bin_alignment_base = ( rwd->state & 0x0F ); rwd->ascii_alignment_base = _4na_to_ascii( rwd->state, rwd->reverse ); if ( !self->omit_quality ) rwd->quality = xrec->quality[ rwd->seq_pos ]; rwd->mapq = rec->mapq; return self->on_alignment( rwd ); }
static rc_t ref_walker_walk_ref_range( struct ref_walker * self, ref_walker_data * rwd ) { ReferenceIterator * ref_iter; rc_t rc = AlignMgrMakeReferenceIterator ( self->amgr, &ref_iter, &self->cb_block, self->min_mapq ); /* align/iterator.h */ if ( rc == 0 ) { /* construct the reference iterator */ uint32_t idx, count; uint32_t reflist_options = ref_walker_make_reflist_options( self ); /* above */ Vector cur_id_vector; VectorInit ( &cur_id_vector, 0, 12 ); rc = VNameListCount ( self->sources, &count ); for ( idx = 0; idx < count && rc == 0; ++idx ) { const char * src_name = NULL; rc = VNameListGet ( self->sources, idx, &src_name ); if ( rc == 0 && src_name != NULL ) { const VDatabase *db; rc = VDBManagerOpenDBRead ( self->vmgr, &db, self->vschema, "%s", src_name ); if ( rc == 0 ) { const ReferenceList * ref_list; rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 ); if ( rc == 0 ) { const ReferenceObj * ref_obj; rc = ReferenceList_Find( ref_list, &ref_obj, rwd->ref_name, string_size( rwd->ref_name ) ); if ( rc == 0 ) { INSDC_coord_len len; rc = ReferenceObj_SeqLength( ref_obj, &len ); if ( rc == 0 ) { if ( rwd->ref_start == 0 ) rwd->ref_start = 1; if ( ( rwd->ref_end == 0 )||( rwd->ref_end > len + 1 ) ) rwd->ref_end = ( len - rwd->ref_start ) + 1; if ( self->primary_alignments ) rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, &cur_id_vector, db, ref_obj, ref_iter, TBL_PRIM, primary_align_ids ); if ( rc == 0 && self->secondary_alignments ) rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, &cur_id_vector, db, ref_obj, ref_iter, TBL_SEC, secondary_align_ids ); if ( rc == 0 && self->evidence_alignments ) rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, &cur_id_vector, db, ref_obj, ref_iter, TBL_EV, evidence_align_ids ); } ReferenceObj_Release( ref_obj ); } ReferenceList_Release( ref_list ); } VDatabaseRelease( db ); } } } if ( rc == 0 ) { /* walk the reference iterator */ struct ReferenceObj const * ref_obj; rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, &ref_obj ); if ( rc == 0 && ref_obj != NULL ) { if ( self->use_seq_name ) rc = ReferenceObj_Name( ref_obj, &rwd->ref_name ); else rc = ReferenceObj_SeqId( ref_obj, &rwd->ref_name ); if ( rc == 0 ) { INSDC_coord_zero first_pos; INSDC_coord_len len; rc_t rc_w = ReferenceIteratorNextWindow ( ref_iter, &first_pos, &len ); while ( rc == 0 && rc_w == 0 ) { rc_t rc_p = ReferenceIteratorNextPos ( ref_iter, !self->no_skip ); if ( rc_p == 0 ) { rc = ReferenceIteratorPosition ( ref_iter, &rwd->pos, &rwd->depth, &rwd->bin_ref_base ); if ( rwd->depth > 0 && rc == 0 ) { rc_t rc_sg = 0; rwd->ascii_ref_base = _4na_to_ascii( rwd->bin_ref_base, false ); if ( self->on_enter_ref_pos != NULL ) rc = self->on_enter_ref_pos( rwd ); while ( rc_sg == 0 && rc == 0 ) { rc_sg = ReferenceIteratorNextSpotGroup ( ref_iter, &rwd->spot_group, &rwd->spot_group_len ); if ( rc_sg == 0 ) { rc_t rc_pr = 0; if ( self->on_enter_spot_group != NULL ) rc = self->on_enter_spot_group( rwd ); while ( rc == 0 && rc_pr == 0 ) { const PlacementRecord * rec; rc_pr = ReferenceIteratorNextPlacement ( ref_iter, &rec ); if ( rc_pr == 0 && self->on_alignment != NULL ) rc = ref_walker_walk_alignment( self, ref_iter, rec, rwd ); } if ( self->on_exit_spot_group != NULL ) rc = self->on_exit_spot_group( rwd ); } } if ( self->on_exit_ref_pos != NULL ) rc = self->on_exit_ref_pos( rwd ); } rc = Quitting(); } } } } } /* free cur_id_vector */ ReferenceIteratorRelease ( ref_iter ); } return rc; }
static void handle_base_pos( ReferenceIterator *ref_iter, const PlacementRecord *rec, uint8_t * qual, bool nodebug ) { INSDC_coord_zero seq_pos; int32_t state = ReferenceIteratorState ( ref_iter, &seq_pos ); ext_rec *xrec = ( ext_rec * ) PlacementRecordCast ( rec, placementRecordExtension1 ); bool reverse = xrec->reverse; if ( !nodebug ) OUTMSG(( "[%lu.%u ", rec->id, seq_pos )); *qual = xrec->quality[ seq_pos ]; if ( ( state & align_iter_first ) == align_iter_first ) { char mapq = ( rec->mapq + 33 ); OUTMSG(( "^%c", mapq )); } if ( ( state & align_iter_last ) == align_iter_last ) OUTMSG(( "$" )); if ( ( state & align_iter_insert ) == align_iter_insert ) { const INSDC_4na_bin *bases; uint32_t n = ReferenceIteratorBasesInserted ( ref_iter, &bases ); char * s = dup_2_ascii( bases, n, reverse ); OUTMSG(( "+%u%s", n, s )); free( s ); } if ( ( state & align_iter_delete ) == align_iter_delete ) { const INSDC_4na_bin *bases; INSDC_coord_zero ref_pos; uint32_t n = ReferenceIteratorBasesDeleted ( ref_iter, &ref_pos, &bases ); if ( bases != NULL ) { char * s = dup_2_ascii( bases, n, reverse ); OUTMSG(( "-%u%s", n, s )); free( s ); free( (void *) bases ); } else { OUTMSG(( "-%u?", n )); } } if ( ( state & align_iter_skip ) == align_iter_skip ) OUTMSG(( "*" )); else { if ( ( state & align_iter_match ) == align_iter_match ) OUTMSG(( "%c", reverse ? ',' : '.' )); else OUTMSG(( "%c", _4na_to_ascii( state & 0x0F, reverse ) )); } if ( !nodebug ) OUTMSG(( "]" )); }
static rc_t CC pileup_v2_alignment( ref_walker_data * rwd ) { rc_t rc = 0; pileup_v2_ctx * ctx = rwd->data; if ( !rwd->valid ) { rc = add_char_2_dyn_string( ctx->bases, '?' ); if ( rc == 0 && ctx->print_qual ) rc = add_char_2_dyn_string( ctx->qual, '?' ); } else { if ( rwd->first ) { char s[ 3 ]; int32_t c = rwd->mapq + 33; if ( c > '~' ) { c = '~'; } if ( c < 33 ) { c = 33; } s[ 0 ] = '^'; s[ 1 ] = c; s[ 2 ] = 0; rc = add_string_2_dyn_string( ctx->bases, s ); } if ( rc == 0 ) { if ( rwd->skip ) { if ( rwd->reverse ) rc = add_char_2_dyn_string( ctx->bases, '<' ); else rc = add_char_2_dyn_string( ctx->bases, '>' ); } else { if ( rwd->match ) rc = add_char_2_dyn_string( ctx->bases, ( rwd->reverse ? ',' : '.' ) ); else rc = add_char_2_dyn_string( ctx->bases, rwd->ascii_alignment_base ); } } if ( rc == 0 && rwd->ins ) { uint32_t i, n = rwd->ins_bases_count; rc = print_2_dyn_string( ctx->bases, "+%u", rwd->ins_bases_count ); for ( i = 0; i < n && rc == 0; ++i ) rc = add_char_2_dyn_string( ctx->bases, _4na_to_ascii( rwd->ins_bases[ i ], rwd->reverse ) ); } if ( rc == 0 && rwd->del && rwd->del_bases_count > 0 && rwd->del_bases != NULL ) { uint32_t i, n = rwd->del_bases_count; rc = print_2_dyn_string( ctx->bases, "-%u", n ); for ( i = 0; i < n && rc == 0; ++i ) rc = add_char_2_dyn_string( ctx->bases, _4na_to_ascii( rwd->del_bases[ i ], rwd->reverse ) ); } if ( rc == 0 && rwd->last ) rc = add_char_2_dyn_string( ctx->bases, '$' ); if ( rc == 0 && ctx->print_qual ) { rc = add_char_2_dyn_string( ctx->qual, rwd->quality ); } } return rc; }