Пример #1
0
static void count_indel_fragment( BSTree * fragments, const INSDC_4na_bin *bases, uint32_t len )
{
    find_fragment_ctx fctx;

    fctx.bases = malloc( len );
    if ( fctx.bases != NULL )
    {
        indel_fragment * fragment;
        uint32_t i;

        fctx.len = len;
        for ( i = 0; i < len; ++i )
            ( ( char * )fctx.bases )[ i ] = _4na_to_ascii( bases[ i ], false );

        fragment = ( indel_fragment * ) BSTreeFind ( fragments, &fctx, cmp_fragment_vs_find_ctx );
        if ( fragment == NULL )
        {
            fragment = make_indel_fragment( fctx.bases, len );
            if ( fragment != NULL )
            {
                rc_t rc = BSTreeInsert ( fragments, ( BSTNode * )fragment, cmp_fragment_vs_fragment );
                if ( rc != 0 )
                    free_indel_fragment( ( BSTNode * )fragment, NULL );
            }
        }
        else
            fragment->count++;

        free( ( void * ) fctx.bases );
    }
}
Пример #2
0
static void walk_counter_state( ReferenceIterator *ref_iter, int32_t state, bool reverse,
                                pileup_counters * counters )
{
    if ( ( state & align_iter_invalid ) == align_iter_invalid )
        return;

    if ( ( state & align_iter_skip ) != align_iter_skip )
    {
        if ( ( state & align_iter_match ) == align_iter_match )
            (counters->matches)++;
        else
        {
            char c = _4na_to_ascii( state, false );
            switch( c )
            {
                case 'A' : ( counters->mismatches[ 0 ] )++; break;
                case 'C' : ( counters->mismatches[ 1 ] )++; break;
                case 'G' : ( counters->mismatches[ 2 ] )++; break;
                case 'T' : ( counters->mismatches[ 3 ] )++; break;
            }
        }
    }

    if ( reverse )
        (counters->reverse)++;
    else
        (counters->forward)++;

    if ( ( state & align_iter_insert ) == align_iter_insert )
    {
        const INSDC_4na_bin *bases;
        uint32_t n = ReferenceIteratorBasesInserted ( ref_iter, &bases );
        (counters->inserts) += n;
        count_indel_fragment( &(counters->insert_fragments), bases, n );
    }

    if ( ( state & align_iter_delete ) == align_iter_delete )
    {
        const INSDC_4na_bin *bases;
        INSDC_coord_zero ref_pos;
        uint32_t n = ReferenceIteratorBasesDeleted ( ref_iter, &ref_pos, &bases );
        if ( bases != NULL )
        {
            (counters->deletes) += n;
            count_indel_fragment( &(counters->delete_fragments), bases, n );
            free( (void *) bases );
        }
    }

    if ( ( state & align_iter_first ) == align_iter_first )
        ( counters->starting)++;

    if ( ( state & align_iter_last ) == align_iter_last )
        ( counters->ending)++;
}
Пример #3
0
char * dup_2_ascii( const INSDC_4na_bin * b, size_t len, bool reverse )
{
    char * res = malloc( len + 1 );
    if ( res != NULL )
    {
        uint32_t i;
        for ( i = 0; i < len; ++i )
            res[ i ] = _4na_to_ascii( b[ i ], reverse );
        res[ i ] = 0;
    }
    return res;
}
Пример #4
0
static rc_t walk_ref_iter_position( ReferenceIterator *ref_iter,
                                    struct ReferenceObj const * refobj,
                                    bool skip_empty,
                                    bool nodebug )
{
    INSDC_coord_zero pos;
    uint32_t depth;
    INSDC_4na_bin base;

    rc_t rc = ReferenceIteratorPosition ( ref_iter, &pos, &depth, &base );
    if ( rc != 0 )
    {
        if ( GetRCState( rc ) != rcDone )
            LOGERR( klogInt, rc, "ReferenceIteratorNextPos() failed" );
    }
    else
    {
        rc_t rc1 = 0;
        const char * reference_name = NULL;
        char c = _4na_to_ascii( base, false );
        rc = ReferenceObj_SeqId( refobj, &reference_name );
        if ( rc == 0 )
        {
            OUTMSG(( "%s\t%u\t%c\t%u", reference_name, pos, c, depth ));
            if ( depth > 0 )
            {
                const PlacementRecord *rec;
                rc1 = ReferenceIteratorNextPlacement ( ref_iter, &rec );
                if ( rc1 == 0 )
                {
                    uint8_t qualities[ 4096 ];
                    uint32_t i = 0;
                    OUTMSG(( "\t" ));
                    while ( rc1 == 0 )
                    {
                        handle_base_pos( ref_iter, rec, &( qualities[ i++ ] ), nodebug );
                        rc1 = ReferenceIteratorNextPlacement ( ref_iter, &rec );
                    }
                    OUTMSG(( "\t" ));
                    for ( i = 0; i < depth; ++i )
                    {
                        char c = ( qualities[ i ] + 33 );
                        OUTMSG(( "%c", c ));
                    }
                }
            }
            OUTMSG(( "\n" ));
        }
        if ( GetRCState( rc1 ) == rcDone ) rc1 = 0;
        rc = rc1;
    } 
    return rc;
}
Пример #5
0
static rc_t print_counter_line( const char * ref_name,
                                INSDC_coord_zero ref_pos,
                                INSDC_4na_bin ref_base,
                                uint32_t depth,
                                pileup_counters * counters )
{
    char c = _4na_to_ascii( ref_base, false );

    rc_t rc = KOutMsg( "%s\t%u\t%c\t%u\t", ref_name, ref_pos + 1, c, depth );

    if ( rc == 0 && counters->matches > 0 )
        rc = KOutMsg( "%u", counters->matches );

    if ( rc == 0 /* && counters->mismatches[ 0 ] > 0 */ )
        rc = KOutMsg( "\t%u-A", counters->mismatches[ 0 ] );

    if ( rc == 0 /* && counters->mismatches[ 1 ] > 0 */ )
        rc = KOutMsg( "\t%u-C", counters->mismatches[ 1 ] );

    if ( rc == 0 /* && counters->mismatches[ 2 ] > 0 */ )
        rc = KOutMsg( "\t%u-G", counters->mismatches[ 2 ] );

    if ( rc == 0 /* && counters->mismatches[ 3 ] > 0 */ )
        rc = KOutMsg( "\t%u-T", counters->mismatches[ 3 ] );

    if ( rc == 0 )
        rc = KOutMsg( "\tI:" );
    if ( rc == 0 )
        rc = print_fragments( &(counters->insert_fragments) );

    if ( rc == 0 )
        rc = KOutMsg( "\tD:" );
    if ( rc == 0 )
        rc = print_fragments( &(counters->delete_fragments) );

    if ( rc == 0 )
        rc = KOutMsg( "\t%u%%", percent( counters->forward, counters->reverse ) );

    if ( rc == 0 && counters->starting > 0 )
        rc = KOutMsg( "\tS%u", counters->starting );

    if ( rc == 0 && counters->ending > 0 )
        rc = KOutMsg( "\tE%u", counters->ending );

    if ( rc == 0 )
        rc = KOutMsg( "\n" );

    free_fragments( &(counters->insert_fragments) );
    free_fragments( &(counters->delete_fragments) );

    return rc;
}
Пример #6
0
static rc_t ref_walker_walk_alignment( struct ref_walker * self,
                                       ReferenceIterator * ref_iter,
                                       const PlacementRecord * rec,
                                       ref_walker_data * rwd )
{
    walker_rec * xrec = PlacementRecordCast ( rec, placementRecordExtension1 );
    rwd->state = ReferenceIteratorState ( ref_iter, &rwd->seq_pos );
    rwd->valid = ( ( rwd->state & align_iter_invalid ) == 0 );
    rwd->reverse = xrec->reverse;
    rwd->first = ( ( rwd->state & align_iter_first ) == align_iter_first );
    rwd->last  = ( ( rwd->state & align_iter_last ) == align_iter_last );
    rwd->skip = ( ( rwd->state & align_iter_skip ) == align_iter_skip );
    rwd->match = ( ( rwd->state & align_iter_match ) == align_iter_match );
    rwd->bin_alignment_base = ( rwd->state & 0x0F );
    rwd->ascii_alignment_base = _4na_to_ascii( rwd->state, rwd->reverse );
    if ( !self->omit_quality )
        rwd->quality = xrec->quality[ rwd->seq_pos ];
    rwd->mapq = rec->mapq;
    return self->on_alignment( rwd );
}
Пример #7
0
static rc_t ref_walker_walk_ref_range( struct ref_walker * self, ref_walker_data * rwd )
{
    ReferenceIterator * ref_iter;
    rc_t rc = AlignMgrMakeReferenceIterator ( self->amgr, &ref_iter, &self->cb_block, self->min_mapq ); /* align/iterator.h */
    if ( rc == 0 )
    {
        /* construct the reference iterator */

        uint32_t idx, count;
        uint32_t reflist_options = ref_walker_make_reflist_options( self ); /* above */
        Vector cur_id_vector;
        VectorInit ( &cur_id_vector, 0, 12 );

        rc = VNameListCount ( self->sources, &count );
        for ( idx = 0; idx < count && rc == 0; ++idx )
        {
            const char * src_name = NULL;
            rc = VNameListGet ( self->sources, idx, &src_name );
            if ( rc == 0 && src_name != NULL )
            {
                const VDatabase *db;
                rc = VDBManagerOpenDBRead ( self->vmgr, &db, self->vschema, "%s", src_name );
                if ( rc == 0 )
                {
                    const ReferenceList * ref_list;
                    rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 );
                    if ( rc == 0 )
                    {
                        const ReferenceObj * ref_obj;
                        rc = ReferenceList_Find( ref_list, &ref_obj, rwd->ref_name, string_size( rwd->ref_name ) );
                        if ( rc == 0 )
                        {
                            INSDC_coord_len len;
                            rc = ReferenceObj_SeqLength( ref_obj, &len );
                            if ( rc == 0 )
                            {
                                if ( rwd->ref_start == 0 )
                                    rwd->ref_start = 1;
                                if ( ( rwd->ref_end == 0 )||( rwd->ref_end > len + 1 ) )
                                    rwd->ref_end = ( len - rwd->ref_start ) + 1;

                                if ( self->primary_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_PRIM, primary_align_ids );

                                if ( rc == 0 && self->secondary_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_SEC, secondary_align_ids );

                                if ( rc == 0 && self->evidence_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_EV, evidence_align_ids );

                            }
                            ReferenceObj_Release( ref_obj );
                        }
                        ReferenceList_Release( ref_list );
                    }
                    VDatabaseRelease( db );
                }
            }
        }

        if ( rc == 0 )
        {
            /* walk the reference iterator */
            struct ReferenceObj const * ref_obj;
            rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, &ref_obj );
            if ( rc == 0 && ref_obj != NULL )
            {
                if ( self->use_seq_name )
                    rc = ReferenceObj_Name( ref_obj, &rwd->ref_name );
                else
                    rc = ReferenceObj_SeqId( ref_obj, &rwd->ref_name );
                if ( rc == 0 )
                {
                    INSDC_coord_zero first_pos;
                    INSDC_coord_len len;
                    rc_t rc_w = ReferenceIteratorNextWindow ( ref_iter, &first_pos, &len );
                    while ( rc == 0 && rc_w == 0 )
                    {
                        rc_t rc_p = ReferenceIteratorNextPos ( ref_iter, !self->no_skip );
                        if ( rc_p == 0 )
                        {
                            rc = ReferenceIteratorPosition ( ref_iter, &rwd->pos, &rwd->depth, &rwd->bin_ref_base );
                            if ( rwd->depth > 0 && rc == 0 )
                            {
                                rc_t rc_sg = 0;
                                rwd->ascii_ref_base = _4na_to_ascii( rwd->bin_ref_base, false );
                                if ( self->on_enter_ref_pos != NULL )
                                    rc = self->on_enter_ref_pos( rwd );

                                while ( rc_sg == 0 && rc == 0 )
                                {
                                    rc_sg = ReferenceIteratorNextSpotGroup ( ref_iter, &rwd->spot_group, &rwd->spot_group_len );
                                    if ( rc_sg == 0 )
                                    {
                                        rc_t rc_pr = 0;
                                        if ( self->on_enter_spot_group != NULL )
                                            rc = self->on_enter_spot_group( rwd );

                                        while ( rc == 0 && rc_pr == 0 )
                                        {
                                            const PlacementRecord * rec;
                                            rc_pr = ReferenceIteratorNextPlacement ( ref_iter, &rec );
                                            if ( rc_pr == 0 && self->on_alignment != NULL )
                                                rc = ref_walker_walk_alignment( self, ref_iter, rec, rwd );
                                        }

                                        if ( self->on_exit_spot_group != NULL )
                                            rc = self->on_exit_spot_group( rwd );
                                    }
                                }
                                if ( self->on_exit_ref_pos != NULL )
                                    rc = self->on_exit_ref_pos( rwd );
                            }
                            rc = Quitting();
                        }
                    }
                }
            }
        }

        /* free cur_id_vector */

        ReferenceIteratorRelease ( ref_iter );
    }
    return rc;
}
Пример #8
0
static void handle_base_pos( ReferenceIterator *ref_iter,
                             const PlacementRecord *rec,
                             uint8_t * qual,
                             bool nodebug )
{
    INSDC_coord_zero seq_pos;
    int32_t state = ReferenceIteratorState ( ref_iter, &seq_pos );
    ext_rec *xrec = ( ext_rec * ) PlacementRecordCast ( rec, placementRecordExtension1 );
    bool reverse = xrec->reverse;

    if ( !nodebug )
        OUTMSG(( "[%lu.%u ", rec->id, seq_pos ));

    *qual = xrec->quality[ seq_pos ];
    if ( ( state & align_iter_first ) == align_iter_first )
    {
        char mapq = ( rec->mapq + 33 );
        OUTMSG(( "^%c", mapq ));
    }

    if ( ( state & align_iter_last ) == align_iter_last )
        OUTMSG(( "$" ));

    if ( ( state & align_iter_insert ) == align_iter_insert )
    {
        const INSDC_4na_bin *bases;
        uint32_t n = ReferenceIteratorBasesInserted ( ref_iter, &bases );
        char * s = dup_2_ascii( bases, n, reverse );
        OUTMSG(( "+%u%s", n, s ));
        free( s );
    }

    if ( ( state & align_iter_delete ) == align_iter_delete )
    {
        const INSDC_4na_bin *bases;
        INSDC_coord_zero ref_pos;
        uint32_t n = ReferenceIteratorBasesDeleted ( ref_iter, &ref_pos, &bases );
        if ( bases != NULL )
        {
            char * s = dup_2_ascii( bases, n, reverse );
            OUTMSG(( "-%u%s", n, s ));
            free( s );
            free( (void *) bases );
        }
        else
        {
            OUTMSG(( "-%u?", n ));
        }
    }

    if ( ( state & align_iter_skip ) == align_iter_skip )
        OUTMSG(( "*" ));
    else
    {
        if ( ( state & align_iter_match ) == align_iter_match )
            OUTMSG(( "%c", reverse ? ',' : '.' ));
        else
            OUTMSG(( "%c", _4na_to_ascii( state & 0x0F, reverse ) ));
    }

    if ( !nodebug )
        OUTMSG(( "]" ));
}
Пример #9
0
static rc_t CC pileup_v2_alignment( ref_walker_data * rwd )
{
    rc_t rc = 0;
    pileup_v2_ctx * ctx = rwd->data;

    if ( !rwd->valid )
    {
        rc = add_char_2_dyn_string( ctx->bases, '?' );
        if ( rc == 0 && ctx->print_qual )
            rc = add_char_2_dyn_string( ctx->qual, '?' );
    }
    else
    {
        if ( rwd->first )
        {
            char s[ 3 ];
            int32_t c = rwd->mapq + 33;
            if ( c > '~' ) { c = '~'; }
            if ( c < 33 ) { c = 33; }
            s[ 0 ] = '^';
            s[ 1 ] = c;
            s[ 2 ] = 0;
            rc = add_string_2_dyn_string( ctx->bases, s );
        }


        if ( rc == 0 )
        {
            if ( rwd->skip )
            {
                if ( rwd->reverse )
                    rc = add_char_2_dyn_string( ctx->bases, '<' );
                else
                    rc = add_char_2_dyn_string( ctx->bases, '>' );
            }
            else
            {
                if ( rwd->match )
                    rc = add_char_2_dyn_string( ctx->bases, ( rwd->reverse ? ',' : '.' ) );
                else
                    rc = add_char_2_dyn_string( ctx->bases, rwd->ascii_alignment_base );
            }
        }

        if ( rc == 0 && rwd->ins )
        {
            uint32_t i, n = rwd->ins_bases_count;
            
            rc = print_2_dyn_string( ctx->bases, "+%u", rwd->ins_bases_count );
            for ( i = 0; i < n && rc == 0; ++i )
                rc = add_char_2_dyn_string( ctx->bases, _4na_to_ascii( rwd->ins_bases[ i ], rwd->reverse ) );
        }

        if ( rc == 0 && rwd->del && rwd->del_bases_count > 0 && rwd->del_bases != NULL )
        {
            uint32_t i, n = rwd->del_bases_count;
            rc = print_2_dyn_string( ctx->bases, "-%u", n );
            for ( i = 0; i < n && rc == 0; ++i )
                rc = add_char_2_dyn_string( ctx->bases, _4na_to_ascii( rwd->del_bases[ i ], rwd->reverse ) );
        }

        if ( rc == 0 && rwd->last )
            rc = add_char_2_dyn_string( ctx->bases, '$' );

        if ( rc == 0 && ctx->print_qual )
        {
            rc = add_char_2_dyn_string( ctx->qual, rwd->quality );
        }
    }

    return rc;
}