Ejemplo n.º 1
0
static rc_t vdb_fasta_loop_with_name( const p_dump_context ctx, const fastq_ctx * fctx )
{
    rc_t rc = 0;
    int64_t row_id;

    vdn_start( ctx->row_generator );
    while ( vdn_next( ctx->row_generator, (uint64_t*)&row_id ) && rc == 0 )
    {
        rc = Quitting();
        if ( rc == 0 )
        {
            uint32_t elem_bits, boff, row_len, name_len;
            const char * data;
            const char * name;

            rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
                                        (const void**)&name, &boff, &name_len );
            if ( rc != 0 )
                vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
            else
            {
                rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
                                            (const void**)&data, &boff, &row_len );
                if ( rc != 0 )
                    vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
                else
                {
                    uint32_t idx = 0;
                    int32_t to_print = row_len;

                    rc = KOutMsg( ">%s.%li %.*s length=%u\n",
                                  fctx->run_name, row_id, name_len, name, row_len );
                    if ( to_print > ctx->max_line_len )
                        to_print = ctx->max_line_len;
                    while ( rc == 0 && to_print > 0 )
                    {
                        rc = KOutMsg( "%.*s\n", to_print, &data[ idx ] );
                        if ( rc == 0 )
                        {
                            idx += ctx->max_line_len;
                            to_print = ( row_len - idx );
                            if ( to_print > ctx->max_line_len )
                                to_print = ctx->max_line_len;
                        }
                    }
                }
            }
        }
    }
    return rc;
}
Ejemplo n.º 2
0
rc_t runChecks(const TestCase& test_case, const VCursor * cursor, uint32_t name_idx, uint32_t name_range_idx)
{
    rc_t rc;
    int64_t first_id;
    uint64_t count_id;
    
    rc = VCursorIdRange( cursor, name_idx, &first_id, &count_id );
    if (rc != 0)
    {
        LOGERR( klogInt, rc, "VCursorIdRange() failed" );
        return rc;
    }
    
    for (uint64_t row_id = first_id; row_id < first_id + count_id; ++row_id)
    {
        const char * name = NULL;
        uint32_t name_len;
        RowRange *row_range;
        
        rc = VCursorCellDataDirect( cursor, row_id, name_idx, NULL, (void const **)&name, NULL, &name_len );
        if ( rc != 0 )
            return rc;
        
        rc = VCursorParamsSet( ( struct VCursorParams const * )cursor, "QUERY_NAME", "%.*s", name_len, name );
        if ( rc != 0 )
            return rc;
        
        rc = VCursorCellDataDirect( cursor, row_id, name_range_idx, NULL, (void const **)&row_range, NULL, NULL );
        if ( rc != 0 )
            return rc;
        
        std::string name_str(name, name_len);
        
        if (test_case.key_ranges.find(name_str) == test_case.key_ranges.end())
        {
            PLOGMSG( klogInt, (klogErr, "Unexpected name '$(NAME)' in test case '$(TC_NAME)'", "TC_NAME=%s,NAME=%s", test_case_name, name_str.c_str()) );
            return 1;
        }
        
        RowRange row_range_exp = test_case.key_ranges.find(name_str)->second;
        if (row_range->start_id != row_range_exp.start_id || row_range->stop_id != row_range_exp.stop_id)
        {
            PLOGMSG( klogInt, (klogErr, "Row range for name '$(NAME)' in test case '$(TC_NAME)' does not match. Expected: $(EXP_S)-$(EXP_F), actual: $(ACT_S)-$(ACT_F)",
                               "TC_NAME=%s,NAME=%s,EXP_S=%ld,EXP_F=%ld,ACT_S=%ld,ACT_F=%ld",
                               test_case_name, name_str.c_str(), row_range_exp.start_id, row_range_exp.stop_id, row_range->start_id, row_range->stop_id) );
            return 1;
        }
    }
    
    return rc;
}
Ejemplo n.º 3
0
rc_t read_INSDC_dna_text_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_dna_text **res, uint32_t *len, const char * hint )
{
	rc_t rc;
	if ( idx == INVALID_COLUMN )
	{
		rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );	
        (void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_dna_text (ptr)",
            "tr=%li,hi=%s", row_id, hint ) );
	}
	else
	{
		const INSDC_dna_text * value;
		uint32_t elem_bits, boff, row_len;
		rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
		if ( rc != 0 )
		{
			(void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_dna_text (ptr) failed", 
				"tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
		}
		else
		{
			if ( row_len > 0 )
				*res = value;
			if ( len != NULL )
				*len = row_len;
		}
	}
    return rc;
}
Ejemplo n.º 4
0
rc_t read_uint8( int64_t row_id, const VCursor * cursor, uint32_t idx, uint8_t *res, uint8_t dflt, const char * hint )
{
	rc_t rc;
	if ( idx == INVALID_COLUMN )
	{
		rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );	
        (void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) uint8",
            "tr=%li,hi=%s", row_id, hint ) );
	}
	else
	{
		const uint8_t * value;
		uint32_t elem_bits, boff, row_len;
		rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
		if ( rc != 0 )
		{
			(void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed", 
				"tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
		}
		else
		{
			*res = ( row_len > 0 ) ? *value : dflt;
		}
	}
    return rc;
}
Ejemplo n.º 5
0
static const char * get_platform( const VTable * tab )
{
    const char * res = PT_NONE;
    const VCursor * cur;
    rc_t rc = VTableCreateCursorRead( tab, &cur );
    if ( rc == 0 )
    {
        uint32_t idx;
        rc = VCursorAddColumn( cur, &idx, "PLATFORM" );
        if ( rc == 0 )
        {
            rc = VCursorOpen( cur );
            if ( rc == 0 )
            {
                const uint8_t * pf;
                rc = VCursorCellDataDirect( cur, 1, idx, NULL, (const void**)&pf, NULL, NULL );
                if ( rc == 0 )
                {
                    res = vdcd_get_platform_txt( *pf );
                }
            }
        }
        VCursorRelease( cur );
    }
    return res;
}
Ejemplo n.º 6
0
static rc_t get_idx_and_read( struct VCursor const *curs,
                              const char * name,
                              int64_t row_id,
                              const void ** base,
                              uint32_t * len )
{
    uint32_t column_idx;
    rc_t rc = VCursorGetColumnIdx ( curs, &column_idx, name );
    if ( rc != 0 )
        LOGERR( klogInt, rc, "VCursorGetColumnIdx() failed" );
    else
    {
        uint32_t elem_bits, boff, len_intern;
        const void * ptr;
        rc = VCursorCellDataDirect ( curs, row_id, column_idx, 
                                     &elem_bits, &ptr, &boff, &len_intern );
        if ( rc != 0 )
            LOGERR( klogInt, rc, "VCursorCellDataDirect() failed" );
        else
        {
            assert( boff == 0 );
            if ( len != NULL ) *len = len_intern;
            if ( base != NULL ) *base = ptr;
        }
    }
    return rc;
}
Ejemplo n.º 7
0
static void get_string_cell( char * buffer, size_t buffer_size, const VTable * tab, int64_t row, const char * column )
{
    if ( has_col( tab, column ) )
    {
        const VCursor * cur;
        rc_t rc = VTableCreateCursorRead( tab, &cur );
        if ( rc == 0 )
        {
            uint32_t idx;
            rc = VCursorAddColumn( cur, &idx, column );
            if ( rc == 0 )
            {
                rc = VCursorOpen( cur );
                if ( rc == 0 )
                {
                    const char * src;
                    uint32_t row_len;
                    rc = VCursorCellDataDirect( cur, row, idx, NULL, (const void**)&src, NULL, &row_len );
                    if ( rc == 0 )
                    {
                        size_t num_writ;
                        string_printf( buffer, buffer_size, &num_writ, "%.*s", row_len, src );
                    }
                }
            }
            VCursorRelease( cur );
        }
    }
}
Ejemplo n.º 8
0
static rc_t cigar_loop( const VCursor *cur,
                        uint32_t cigar_idx,
                        int64_t first,
                        uint64_t count,
                        uint32_t min_len )
{
    rc_t rc = 0;
    int64_t row_id, last_row = ( first + count );
    rna_splice_candidates candidates;

    for ( row_id = first; ( row_id < last_row ) && ( rc == 0 ) && ( Quitting() == 0 ); row_id++ )
    {
        const char * cigar;
        uint32_t row_len;
        rc = VCursorCellDataDirect ( cur, row_id, cigar_idx, NULL, ( const void ** )&cigar, NULL, &row_len );
        if ( rc == 0 )
        {
            candidates.count = 0;
            candidates.fwd_matched = 0;
            candidates.rev_matched = 0;

            rc = discover_rna_splicing_candidates( row_len, cigar, min_len, &candidates );
            if ( rc == 0 && candidates.count > 0 )
            {
                rc = KOutMsg( "%d rna-splice-candidates at row #%ld : %.*s\n", candidates.count, row_id, row_len, cigar );
            }
        }
    }
    return rc;
}
Ejemplo n.º 9
0
static rc_t vdb_fastq_loop_with_name( const p_dump_context ctx, const fastq_ctx * fctx )
{
    rc_t rc = 0;
    int64_t row_id;

    vdn_start( ctx->row_generator );
    while ( vdn_next( ctx->row_generator, (uint64_t*)&row_id ) && rc == 0 )
    {
        rc = Quitting();
        if ( rc == 0 )
        {
            uint32_t elem_bits, boff, row_len, name_len;
            const char * data;
            const char * name;

            rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
                                        (const void**)&name, &boff, &name_len );
            if ( rc != 0 )
                vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
            else
            {
                rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
                                            (const void**)&data, &boff, &row_len );
                if ( rc != 0 )
                    vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
                else
                {
                    rc = KOutMsg( "@%s.%li %.*s length=%u\n%.*s\n",
                                  fctx->run_name, row_id, name_len, name, row_len, row_len, data );
                    if ( rc == 0 )
                    {
                        rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_qual, &elem_bits,
                                                    (const void**)&data, &boff, &row_len );
                        if ( rc != 0 )
                            vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), QUALITY ) failed", rc, row_id );
                        else
                            rc = KOutMsg( "+%s.%li %.*s length=%u\n%.*s\n",
                                          fctx->run_name, row_id, name_len, name, row_len, row_len, data );
                    }
                }
            }
        }
    }
    return rc;
}
Ejemplo n.º 10
0
static
rc_t RefPosMake ( RefPos **objp, const VTable *tbl, const VCursor *native_curs )
{
    rc_t rc;

    /* create the object */
    RefPos *obj = malloc ( sizeof * obj );
    if ( obj == NULL ) {
        rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    } else {
	obj->curs=NULL;
        BSTreeInit(&obj->tr_range);
        /* open the reference table cursor*/
	  
	if( (rc = AlignRefTableCursor(tbl, native_curs, &obj->curs, NULL)) == 0 ) {
                uint32_t itmp;
                if(  (rc = VCursorAddColumn(obj->curs, &itmp, "(U32)MAX_SEQ_LEN")) == 0 || GetRCState(rc) == rcExists)  {
                    const void *base;
                    uint32_t row_len;
                    rc = VCursorCellDataDirect(obj->curs, 1, itmp, NULL, &base, NULL, &row_len);
                    if(rc == 0) {
                        assert(row_len == 1);
                        memcpy(&obj->max_seq_len, base, 4);
                    }
                }
                if( GetRCObject(rc) == rcColumn && GetRCState(rc) == rcNotFound ) {
		    /*** no MAX_SEQ_LEN means that REF_POS==REF_START **/
		    VCursorRelease(obj->curs);
		    obj->curs = NULL;
                    obj->max_seq_len = 0;
		    obj->name_range_idx = 0;
		    obj->name_idx = 0;
                    rc = 0;
                } else if( rc == 0 ) {
                        /* add columns to cursor */
			rc = VCursorAddColumn(obj->curs, &obj->name_idx, "(utf8)NAME");
			if(rc == 0 || GetRCState(rc) == rcExists)
				rc = VCursorAddColumn(obj->curs, &obj->name_range_idx, "NAME_RANGE");
			if(GetRCState(rc) == rcExists)
				rc = 0;
                }
        }
        if( rc == 0 ) {
            *objp = obj;
        } else {
	    VCursorRelease(obj->curs);
            free(obj);
        }
    }
    return rc;
}
Ejemplo n.º 11
0
rc_t read_cell( const VCursor *my_cursor,
                int64_t row_id,
                col *column,
                const char * name )
{
    rc_t rc = VCursorCellDataDirect ( my_cursor, row_id,
            column->idx, &column->elem_bits, &column->base,
            &column->bit_offset, &column->row_len );
    if ( rc != 0 )
        PLOGERR( klogInt, ( klogInt, rc, 
                            "VCursorCellDataDirect($(name),$(rowid)) failed", 
                            "name=%s,rowid=%lu", name, row_id ) );
    return rc;
}
Ejemplo n.º 12
0
static rc_t report_ref_row( const VCursor *cur, report_row_ctx * row_ctx )
{
    rc_t rc = 0;
    uint32_t elem_bits, boff, prim_count, sec_count;
    const void *base;
    rc = VCursorCellDataDirect ( cur, row_ctx->row_id, row_ctx->prim_idx, &elem_bits, &base, &boff, &prim_count );
    if ( rc != 0 )
    {
        (void)LOGERR( klogErr, rc, "cannot read colum >PRIMARY_ALIGNMENT_IDS<" );
    }
    else
    {
        rc = VCursorCellDataDirect ( cur, row_ctx->row_id, row_ctx->sec_idx, &elem_bits, &base, &boff, &sec_count );
        if ( rc != 0 )
        {
            (void)LOGERR( klogErr, rc, "cannot read colum >SECONDARY_ALIGNMENT_IDS<" );
        }
        else if ( prim_count > 0 || sec_count > 0 )
        {
            rc = KOutMsg( "ROW[ %,lu ]: PRIM:%,u SEC:%,u\n", row_ctx->row_id, prim_count, sec_count );
        }
    }
    return rc;
}
Ejemplo n.º 13
0
static
rc_t LocalRefIDMake ( LocalRefID **objp, const VTable *tbl, const VCursor *native_curs)
{
    rc_t rc;
    /* create the object */
    LocalRefID *obj = malloc ( sizeof * obj );
    if ( obj == NULL )
    {
        rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        const VCursor *curs=NULL;
        /* open the reference table cursor*/
        rc = AlignRefTableCursor( tbl, native_curs, &curs, NULL );
        if ( rc == 0 )
        {
            uint32_t itmp;
            rc = VCursorAddColumn( curs, &itmp, "(U32)MAX_SEQ_LEN" );
            if ( rc == 0 || GetRCState( rc ) == rcExists )
            {
                const void *base;
                uint32_t row_len;
                rc = VCursorCellDataDirect( curs, 1, itmp, NULL, &base, NULL, &row_len );
                if ( rc == 0 )
                {
                    assert( row_len == 1 );
                    memmove( &obj->max_seq_len, base, 4 );
                }
            }

            if ( ( GetRCObject( rc ) == ( enum RCObject )rcColumn ) && ( GetRCState( rc ) == rcNotFound ) )
            {
                obj->max_seq_len = 0;
                rc = 0;
            }

            VCursorRelease( curs );
            if ( rc == 0 )
            {
                *objp = obj;
                return 0;
            }
        }
        free ( obj );
    }
    return rc;
}
Ejemplo n.º 14
0
/*
 function ascii NCBI:align:ref_seq_id ( I64 ref_id );
*/
static
rc_t CC align_ref_seq_id ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc;
    RefSeqID const *self = ( void const * )data;
    char const *name = NULL;
    uint32_t name_len;

    /* get start and length of reference segment */
    int64_t const *ref_id = argv[REF_ID].u.data.base;

    assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8) ;

    if ( self->curs == NULL || argv[ REF_ID ].u.data.elem_count == 0 )
    {
        rslt->elem_count = 0;
        return 0;
    }

    ref_id += argv[ REF_ID] .u.data.first_elem;

    SUB_DEBUG( ( "SUB.Rd in 'align_ref_seq_id.c' at #%lu\n", ref_id[ 0 ] ) );
    
    rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->seqID_idx, NULL, (void const **)&name, NULL, &name_len );
    if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcRow )
    {
        name = "";
        name_len = 0;
    }
    else if ( rc != 0 )
    {
        return rc;
    }

    rc = KDataBufferCast( rslt->data, rslt->data, sizeof( name[ 0 ] ) * 8, true );
    if ( rc != 0 )
        return rc;

    rc = KDataBufferResize( rslt->data, name_len );
    if ( rc != 0 )
        return rc;
    
    memmove( rslt->data->base, name, sizeof( name[ 0 ] ) * name_len );
    rslt->elem_count = name_len;
    rslt->elem_bits = sizeof( name[ 0 ] ) * 8;
    return rc;
}
Ejemplo n.º 15
0
/* Read - PRIVATE
 *  column message sent via table
 */
rc_t SRATableRead ( const SRATable *self, spotid_t id, uint32_t idx,
    const void **base, bitsz_t *offset, bitsz_t *size )
{
    rc_t rc;

    if ( base == NULL || offset == NULL || size == NULL )
        rc = RC ( rcSRA, rcColumn, rcReading, rcParam, rcNull );
    else if ( self == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
    else
    {
        rc = 0;

        /* open cursor */
        if ( ! self -> curs_open )
        {
            rc = VCursorOpen(self->curs);
            if ( rc == 0 )
                ((SRATable *)self)->curs_open = true;
        }

        if ( rc == 0 )
        {
            uint32_t elem_bits, elem_off, elem_cnt;
            rc = VCursorCellDataDirect ( self -> curs, id, idx,
                & elem_bits, base, & elem_off, & elem_cnt );
            if ( rc == 0 )
            {
                * offset = elem_off * elem_bits;
                * size   = elem_cnt * elem_bits;
                return 0;
            } else if( UIError(rc, NULL, self->vtbl) ) {
                UITableLOGError(rc, self->vtbl, true);
            }
        }
    }

    if ( base != NULL )
        * base = NULL;
    if ( offset != NULL )
        * offset = 0;
    if ( size != NULL )
        * size = 0;

    return rc;
}
Ejemplo n.º 16
0
static
rc_t fetch_all_rows(const VCursor *curs, unsigned ncol, const uint32_t cid[/* ncol */])
{
    int64_t start;
    int64_t stop;
    int64_t row;
    unsigned i;
    rc_t rc;
    
    for (i = 0; i != ncol; ++i) {
        int64_t cstart;
        uint64_t ccount;
        
        rc = VCursorIdRange(curs, cid[i], &cstart, &ccount);
        if (rc)
            return rc;
        if (i == 0) {
            start = cstart;
            stop = cstart + ccount;
        }
        else {
            if (start > cstart)
                start = cstart;
            if (stop < cstart + ccount)
                stop = cstart + ccount;
        }
    }
    for (row = start; row != stop; ++row) {
        for (i = 0; i != ncol; ++i) {
            uint32_t elem_bits;
            const void *base;
            uint32_t offset;
            uint32_t length;
            
            rc = VCursorCellDataDirect(curs, row, cid[i], &elem_bits,
                                       &base, &offset, &length);
            if (rc)
                return rc;
        }
    }
    return 0;
}
Ejemplo n.º 17
0
static bool rr_store_alignment( rr_store * rr, int64_t align_id, const VCursor * curs, uint32_t read_idx )
{
    bool res = false;
    const INSDC_4na_bin * read = NULL;
    uint32_t read_len;
    rc_t rc = VCursorCellDataDirect( curs, align_id, read_idx, NULL, ( const void** ) &read, NULL, &read_len );
    if ( rc == 0 )
    {
        rr_entry * entry;
        res = rr_entry_make ( &entry, read, read_len );
        if ( res )
        {
            uint64_t key = ( uint64_t ) align_id;
            res = ( KVectorSetPtr ( rr -> v, key, entry ) == 0 );
            if ( !res )
                rr_entry_release( key, entry, NULL );
        }
    }
    return res;
}
Ejemplo n.º 18
0
static rc_t read_base_and_len( struct VCursor const *curs,
                               uint32_t column_idx,
                               int64_t row_id,
                               const void ** base,
                               uint32_t * len )
{
    uint32_t elem_bits, boff, len_intern;
    const void * ptr;
    rc_t rc = VCursorCellDataDirect ( curs, row_id, column_idx, &elem_bits, &ptr, &boff, &len_intern );
    if ( rc != 0 )
    {
        LOGERR( klogInt, rc, "VCursorCellDataDirect() failed" );
    }
    else
    {
        if ( len != NULL ) *len = len_intern;
        if ( base != NULL ) *base = ptr;
    }
    return rc;
}
Ejemplo n.º 19
0
static rc_t cg_dump_row( cg_dump_opts * opts, cg_dump_ctx * cg_ctx, uint64_t row_id )
{
    uint32_t elem_bits, boff, sg_len;
    const char * sg;
    rc_t rc = VCursorCellDataDirect( cg_ctx->seq_cur, row_id, cg_ctx->seq_sg_idx, &elem_bits, (const void**)&sg, &boff, &sg_len );
    if ( rc != 0 )
    {
        (void)PLOGERR( klogErr, ( klogErr, rc, "cannot read spot-group in row #$(row_id)", "row_id=%lu", row_id ) );
    }
    else
    {
        String spot_group;
        lane * sg_lane;

        StringInit( &spot_group, sg, sg_len, sg_len );
        sg_lane = ( lane * )BSTreeFind ( &cg_ctx->lanes, &spot_group, String_lane_cmp );
        if ( sg_lane == NULL )
        {
            /* KOutMsg( "row %lu (%S) not found, create it\n", row_id, &spot_group ); */
            rc = make_lane( opts, cg_ctx->lookup, cg_ctx->out_dir, &spot_group, &sg_lane );
            if ( rc == 0 )
            {
                rc = BSTreeInsert ( &cg_ctx->lanes, ( BSTNode * )sg_lane, lane_lane_cmp );
                if ( rc != 0 )
                {
                    (void)LOGERR( klogErr, rc, "cannot insert new lane" );
                    whack_lane( sg_lane );
                }
            }
        }
        else
        {
            /* KOutMsg( "row %lu (%S) found, use it\n", row_id, &spot_group ); */
        }
        if ( rc == 0 )
        {
            cg_dump_write_spot( opts, cg_ctx, row_id, sg_lane ); /* <================== */
        }
    }
    return rc;
}
Ejemplo n.º 20
0
/*
 function ascii NCBI:align:ref_name ( I64 ref_id );
*/
static
rc_t CC align_ref_name ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc;
    RefName const *self = (void const *)data;
    char const *name = NULL;
    uint32_t name_len;

    /* get start and length of reference segment */
    int64_t const *ref_id = argv[REF_ID].u.data.base;

    if (argv[REF_ID].u.data.elem_count == 0)
        rc = RC(rcAlign, rcFunction, rcExecuting, rcRow, rcNotFound);
    else {
        assert(argv[REF_ID].u.data.elem_bits == sizeof(*ref_id) * 8);
        
        ref_id += argv[REF_ID].u.data.first_elem;
        
        rc = VCursorCellDataDirect(self->curs, ref_id[0], self->name_idx, NULL, (void const **)&name, NULL, &name_len);
    }
    
    if (GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow) {
        name = "";
        name_len = 0;
    }
    else if (rc) return rc;
    
    rc = KDataBufferCast(rslt->data, rslt->data, sizeof(name[0]) * 8, true);
    if (rc) return rc;

    rc = KDataBufferResize(rslt->data, name_len);
    if (rc) return rc;
    
    memcpy(rslt->data->base, name, sizeof(name[0]) * name_len);
    rslt->elem_count = name_len;
    rslt->elem_bits = sizeof(name[0]) * 8;
    return rc;
}
Ejemplo n.º 21
0
static rc_t read_from_ref_node( ref_node * node, 
                                int32_t ref_offset, uint32_t ref_len,
                                uint8_t *exclude_vector,
                                uint32_t *active )
{
    rc_t rc = 0;
    uint64_t row_id = ( ref_offset / node->read_len ) + 1;
    uint8_t *dst = exclude_vector;
    uint32_t remaining = ref_len;
    uint32_t src_ofs = ref_offset % node->read_len;

    while ( remaining > 0 && rc == 0 )
    {
        uint32_t elem_bits, boff, rlen;
        const uint8_t *src;
        rc = VCursorCellDataDirect ( node->cur, row_id, node->hits_idx,
                                     &elem_bits, (const void**)&src, &boff, &rlen );
        if ( rc != 0 )
        {
            PLOGERR( klogInt, ( klogInt, rc, 
                 "error to read $(col_name) from 1st row in table $(db_name).$(tab_name)",
                 "col_name=%s,db_name=%S,tab_name=%s",
                 HITS_COLUMN, node->name, HITMAP_TAB ) );
        }
        else
        {
            if ( src_ofs >= rlen )
            {
                rc = RC( rcApp, rcNoTarg, rcReading, rcParam, rcInvalid );
                PLOGERR( klogInt, ( klogInt, rc, 
                     "error: try to read more data than are in var-loc $(tab_name)",
                     "tab_name=%S", node->name ) );
            }
            else
            {
                uint32_t to_copy = ( rlen - src_ofs );
                if ( to_copy > remaining )
                {
                    to_copy = remaining;
                }
                src += src_ofs;

                memmove( dst, src, to_copy );
                dst += to_copy;
                remaining -= to_copy;
                src_ofs = 0;
                row_id ++;

                node->bytes_requested += to_copy;
            }
        }
    }
    *active = 0;
    if ( rc == 0 )
    {
        for ( src_ofs = 0; src_ofs < ref_len; ++src_ofs )
        {
            if ( exclude_vector[ src_ofs ] > 0 )
            {
                ( *active )++;
            }
        }
    }

    return rc;
}
Ejemplo n.º 22
0
bool nextPileup (
    PileupIteratorState* pileup_state,
    VCursor const* cursor_ref, VCursor const* cursor_pa,
    char const* const* column_names_ref, uint32_t* column_index_ref, size_t column_count_ref,
    char const* const* column_names_pa, uint32_t* column_index_pa, size_t column_count_pa,
    char* error_buf,
    size_t error_buf_size
    )
{
    int64_t ref_row_id; /* current row_id */
    int64_t prev_ref_row_id;
    uint64_t ref_pos = pileup_state->ref_pos;
    rc_t rc;

    /* TODO: check the case when slice_end is beyond the reference end*/
    if ( pileup_state->slice_length && pileup_state->ref_pos == pileup_state->slice_start + pileup_state->slice_length )
    {
        error_buf[0] = '\0'; /* indicating that no error has occured */
        return false;
    }

    /* drop cached alignments that we will not need anymore */
    remove_unneeded_alignments ( pileup_state, ref_pos, error_buf, error_buf_size ); /* it's not an issue but this action is not rolled backed in the case of error below */

    /* Check if we moved to the next reference row_id,
       if yes - read it and add appropriate alignments to cache
    */

    prev_ref_row_id = pileup_state->reference_start_id + ref_pos / pileup_state->max_seq_len;
    ++ ref_pos;
    ref_row_id = pileup_state->reference_start_id + ref_pos / pileup_state->max_seq_len;

    if ( ref_row_id != prev_ref_row_id ) /* moved to the next row_id */
    {
        uint32_t dummy;
        uint32_t row_len;
        uint32_t seq_start;
#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS != 1
        int64_t const* alignment_ids;
#endif

        char ref_name[ countof (pileup_state->ref_name) ];

        /* TODO: consider storing this in pileup_state (don't need to calculate every time)*/
        /*slice_start_id = pileup_state->reference_start_id + pileup_state->slice_start/pileup_state->max_seq_len;
        slice_end_id = pileup_state->slice_length != 0 ?
            pileup_state->reference_start_id + (pileup_state->slice_start + (int64_t)pileup_state->slice_length)/pileup_state->max_seq_len :
            (int64_t)pileup_state->total_row_count;*/
        
        if ( ref_row_id < pileup_state->slice_start_id || ref_row_id > pileup_state->slice_end_id )
        {
            error_buf[0] = '\0'; /* indicating that no error has occured */
            return false;
        }

        rc = VCursorReadDirect ( cursor_ref, ref_row_id, column_index_ref [COL_NAME],
            sizeof (ref_name[0]) * 8, ref_name, countof(ref_name), & row_len );
        if ( rc != 0 )
        {
            rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc);
            if (res == rcBuffer || res == rcInsufficient)
                error_buf [ error_buf_size - 1 ] = '\0';

            return false;
        }
        ref_name[ min ( countof(ref_name) - 1, row_len) ] = '\0';
        if ( strcmp (ref_name, pileup_state->ref_name) )
        {
            /*Alignment_Init ( & pileup_state->cache_alignment);
            strncpy ( pileup_state->ref_name, ref_name, countof (pileup_state->ref_name) - 1 );
            pileup_state->reference_start_id = ref_row_id;*/

            error_buf[0] = '\0'; /* indicating that no error has occured */
            return false;
        }

#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1
        rc = open_blob_for_current_id ( ref_row_id,
            cursor_ref, & pileup_state->blob_alignment_ids,
            column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
            error_buf, error_buf_size );
        if (rc != 0)
            return false;
#endif

        /* Read new SEQ_START */
        rc = VCursorReadDirect ( cursor_ref, ref_row_id,
                                 column_index_ref [COL_SEQ_START],
                                 sizeof (seq_start) * 8, & seq_start, 1, & row_len );
        if ( rc != 0 )
        {
            rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                "ERROR: VCursorReadDirect(ref-seq_start) failed with error: 0x%08x (%u) [%R]",
                rc, rc, rc);
            if (res == rcBuffer || res == rcInsufficient)
                error_buf [ error_buf_size - 1 ] = '\0';

            return false;
        }
        pileup_state->current_seq_start = seq_start;

        /* Read REFERENCE row's PRIMARY_ALIGNMENT_IDS column to iterate through them */
        /* elem_bits = sizeof (*pileup_state->alignment_ids) * 8; */
#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1
        rc = VBlobCellData ( pileup_state->blob_alignment_ids, ref_row_id,
            & dummy, & pileup_state->alignment_ids, NULL, & row_len );
        if ( rc != 0 )
        {
            rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                "ERROR: VBlobCellData(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u",
                rc, rc, rc, row_len);
            if (res == rcBuffer || res == rcInsufficient)
                error_buf [ error_buf_size - 1 ] = '\0';

            return false;
        }
        pileup_state -> size_alignment_ids = row_len;
#else
        rc = VCursorCellDataDirect ( cursor_ref, ref_row_id,
                    column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
                    NULL,
                    (void const**)(& alignment_ids), 0, & row_len );

        /*rc = VCursorReadDirect ( cursor_ref, ref_row_id,
                                 column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
                                 sizeof (*pileup_state->alignment_ids) * 8,
                                 pileup_state->alignment_ids,
                                 countof (pileup_state->alignment_ids),
                                 & row_len );*/
        if ( rc != 0 )
        {
            rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                "ERROR: VCursorCellDataDirect(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u",
                rc, rc, rc, row_len);
            if (res == rcBuffer || res == rcInsufficient)
                error_buf [ error_buf_size - 1 ] = '\0';

            return false;
        }
        rc = PileupIteratorState_SetAlignmentIds ( pileup_state, alignment_ids, row_len );
        if ( rc != 0 )
        {
            rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                "ERROR: PileupIteratorState_SetAlignmentIds failed with error: 0x%08x (%u), row_len=%u",
                rc, rc, row_len);
            if (res == rcBuffer || res == rcInsufficient)
                error_buf [ error_buf_size - 1 ] = '\0';

            return rc;
        }
#endif
        pileup_state->next_alignment_idx = 0;
        /*pileup_state->size_alignment_ids = row_len;*/

        printf ("Read %lu PRIMARY_ALIGNMENT_IDS for REFERENCE row_id=%lld\n", row_len, ref_row_id);

        /* For each PRIMARY_ALIGNMENT_ID in pa_ids: read its start, length and
           cache it if it intersects the slice
        */
        rc = add_ref_row_to_cache ( pileup_state, cursor_pa, seq_start, ref_pos,
                pileup_state->alignment_ids, row_len,
                column_names_pa, column_index_pa, column_count_pa,
                error_buf, error_buf_size );
        if ( rc != 0 )
            return false;

        /*pileup_state -> seq_start = seq_start;*/
    }
    else
    {
        /* read remaining alignment_ids and check if they must be cached */
        size_t count = pileup_state->size_alignment_ids - pileup_state->next_alignment_idx;
        if (count > 0)
        {
            rc_t rc = add_ref_row_to_cache ( pileup_state, cursor_pa,
                pileup_state->current_seq_start, ref_pos,
                & pileup_state->alignment_ids[ pileup_state->next_alignment_idx ],
                (uint32_t)count,
                column_names_pa, column_index_pa, column_count_pa,
                error_buf, error_buf_size );
            if ( rc != 0 )
                return false;
        }
    }

    ++ pileup_state->ref_pos;
    return true;
}
Ejemplo n.º 23
0
static
rc_t build_scaffold_read_impl(self_t const *const self, void *const Dst,
                              unsigned const components,
                              INSDC_coord_one const Start[/* components */],
                              INSDC_coord_len const Length[/* components */],
                              NCBI_WGS_component_props const Props[/* components */],
                              int64_t const join[/* components */])
{
    INSDC_4na_bin *const dst = Dst;
    unsigned i;
    unsigned j;
    unsigned id;
    rc_t rc;
    
    for (rc = 0, id = j = i = 0; rc == 0 && i != components; ++i) {
        INSDC_coord_len const length = Length[i];
        int const props = Props[i];

        if (props < 0) {
            /* gap */
            memset(dst + j, READ_GAP_VALUE, length);
        }
        else if (self->curs == NULL) {
            memset(dst + j, 15, length);
        }
        else {
            int const type = props & 0x0F;
            int const strand = (props & ~(NCBI_WGS_strand_plus | NCBI_WGS_strand_minus)) >> 4;
            
            if (type != 0 || strand == 3)
                rc = RC(rcXF, rcFunction, rcExecuting, rcType, rcInvalid);
            else {
                int64_t const row = join[id++];
                uint32_t elem_bits;
                uint32_t bit_offset;
                uint32_t elem_count;
                void const *base;
                
                rc = VCursorCellDataDirect(self->curs, row, self->col_idx,
                                           &elem_bits, &base, &bit_offset,
                                           &elem_count);
                assert(bit_offset == 0);
                if (rc == 0) {
                    INSDC_coord_one const start = Start[i] - 1;
                    
                    if (elem_count < start + length)
                        rc = RC(rcXF, rcFunction, rcExecuting, rcData, rcInsufficient);
                    else {
                        INSDC_4na_bin const *const src = base;
                        
                        if (strand == 2) {
                            static INSDC_4na_bin const complement[] = {
                                /* 0  0000 - 0000*/  0,
                                /* 1  0001 - 1000*/  8,
                                /* 2  0010 - 0100*/  4,
                                /* 3  0011 - 1100*/ 12,
                                /* 4  0100 - 0010*/  2,
                                /* 5  0101 - 1010*/ 10,
                                /* 6  0110 - 0110*/  6,
                                /* 7  0111 - 1110*/ 14,
                                /* 8  1000 - 0001*/  1,
                                /* 9  1001 - 1001*/  9,
                                /*10  1010 - 0101*/  5,
                                /*11  1011 - 1101*/ 13,
                                /*12  1100 - 0011*/  3,
                                /*13  1101 - 1011*/ 11,
                                /*14  1110 - 0111*/  7,
                                /*15  1111 - 1111*/ 15
                            };
                            unsigned k;
                            unsigned jj;
                            
                            for (jj = j + length, k = 0; k != length; ++k) {
                                INSDC_4na_bin const elem = src[start + k];
                                
                                assert(/* 0 <= elem && */ elem <= 15);
                                --jj;
                                dst[jj] = complement[elem];
                            }
                        }
                        else
                            memcpy(&dst[j], &src[start], length);
                    }
                }
            }
        }

        j += length;
    }
    return rc;
}
Ejemplo n.º 24
0
static
rc_t CC seq_restore_read_impl1 ( void *data, const VXformInfo *info, int64_t row_id,
                                 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    int i;
    Read_Restorer   *self = data;
    INSDC_4na_bin   *dst;
    INSDC_coord_len     len;
    uint32_t        src_len     = (uint32_t)argv[ 0 ].u.data.elem_count;
    const INSDC_4na_bin *src    = argv[ 0 ].u.data.base;
    const uint32_t  num_reads   = (uint32_t)argv[ 1 ].u.data.elem_count;
    const int64_t   *align_id   = argv[ 1 ].u.data.base;
    const INSDC_coord_len *read_len = argv[ 2 ].u.data.base;
    const uint8_t   *read_type  = argv[ 3 ].u.data.base;
    bool is_sequential = false;
    
    assert( argv[ 0 ].u.data.elem_bits == 8 );
    assert( argv[ 1 ].u.data.elem_bits == 64 );
    assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 );
    assert( argv[ 2 ].u.data.elem_count == num_reads );
    assert( argv[ 3 ].u.data.elem_count == num_reads );
    
    
    src   += argv [ 0 ] . u . data . first_elem;
    align_id  += argv [ 1 ] . u . data . first_elem;
    read_len  += argv [ 2 ] . u . data . first_elem;
    read_type += argv [ 3 ] . u . data . first_elem;

    if ( row_id != self->last_row_id  && row_id != self->last_row_id + 1 )
    {
        self->first_sequential_row_id = row_id;
        is_sequential = false;
    }
    else if ( row_id > self->first_sequential_row_id + 100 )
    {
        is_sequential = true;
    }
    self->last_row_id = row_id;
    
    
    /* is_sequential = false; forcing it to false ... Sept. 16th 2015 to analyze prefetching */
    
    for ( i = 0, len = 0; i < (int)num_reads; i++ )
    {
        len += read_len[ i ];
    }

    /* resize output row */    
    rslt->data->elem_bits = 8;
    rc = KDataBufferResize( rslt->data, len );
    rslt->elem_count = len;
    dst = rslt->data->base;
    if ( rc == 0 && len > 0 )
    {
        if ( len == src_len ) /*** shortcut - all data is local ***/
        {
            memmove( dst, src, len );
        }
        else
        {
            if ( is_sequential &&
                 ( row_id < self->prefetch_start_id || row_id > self->prefetch_stop_id ) )
            { /* do prefetch */
                uint32_t num_rows = ( argv[ 1 ].u.data.base_elem_count - argv[ 1 ].u.data.first_elem );
                
                /*
                KTimeMs_t ts = KTimeMsStamp();
                fprintf( stderr, "\nprefetch row_id #%lu ( start_id #%lu, stop_id #%lu ) num_rows = %d\n", row_id, self->prefetch_start_id, self->prefetch_stop_id, num_rows );
                */
                
                VCursorDataPrefetch( self->curs,
                                     align_id,
                                     self->read_idx,
                                     num_rows,
                                     1,
                                     INT64_MAX,
                                     true );
                                     
                /*
                ts = KTimeMsStamp() - ts;
                fprintf( stderr, "prefetch done in %lu ms\n", ts );
                */
                
                self->prefetch_start_id=row_id;
                self->prefetch_stop_id =argv[1].blob_stop_id;
            }
            for( i = 0; i < (int)num_reads && rc == 0; i++ ) /*** checking read by read ***/
            {
                if ( align_id[ i ] > 0 )
                {
                    const INSDC_4na_bin *r_src;
                    uint32_t             r_src_len;

                    SUB_DEBUG( ( "SUB.Rd in 'seq-restore-read.c' at #%lu\n", align_id[ i ] ) );

                    rc = VCursorCellDataDirect( self -> curs, align_id[ i ], self -> read_idx,
                                                NULL, ( const void** ) &r_src, NULL, &r_src_len );
                    if ( rc == 0 )
                    {
                        if ( r_src_len == read_len[ i ] )
                        {
                            if ( read_type[ i ] & SRA_READ_TYPE_FORWARD )
                            {
                                memmove( dst, r_src, read_len[ i ] );
                            }
                            else if ( read_type[ i ] & SRA_READ_TYPE_REVERSE )
                            {
                                int j, k;
                                for( j = 0, k = read_len[ i ] - 1; j < (int)read_len[ i ]; j++, k-- )
                                {
                                    dst[ j ] = map [ r_src[ k ] & 15 ];
                                }
                            }
                            else
                            {
                                rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                            }
                        }
                        else
                        {
                            rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                        }
                    }
                }
                else /*** data is in READ column **/
                {
                    if ( src_len >= read_len[ i ] )
                    {
                        memmove( dst, src, read_len[ i ] );
                        src_len -= read_len[ i ];
                        src     += read_len[ i ];
                    }
                    else
                    {
                        return RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                    }
                }
                dst += read_len[ i ];
            }
        }
    }

    return rc;
}
Ejemplo n.º 25
0
/* --------------------------------------------------------------------------------------
    argv[ 0 ]   ... CMP_READ
    argv[ 1 ]   ... PRIM_ALIG_ID
    argv[ 2 ]   ... READ_LEN
    argv[ 3 ]   ... READ_TYPE
-------------------------------------------------------------------------------------- */
static rc_t CC seq_restore_read_impl2 ( void *data, const VXformInfo *info, int64_t row_id,
                                 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    Read_Restorer   *self = data;
    INSDC_4na_bin   *dst;
    INSDC_coord_len len;
    id_list align_ids;
    uint32_t i; 
    uint32_t src_len                 = (uint32_t)argv[ 0 ] . u . data . elem_count;
    const INSDC_4na_bin * src        = argv[ 0 ] . u . data.base;
    const uint32_t num_reads         = (uint32_t)argv[ 1 ]. u . data . elem_count;
    const INSDC_coord_len * read_len = argv[ 2 ] . u . data.base;
    const uint8_t *read_type         = argv[ 3 ] . u . data.base;
    int64_t last_row_id              = argv[ 1 ] . blob_stop_id;
    
    align_ids.list  = ( int64_t * )argv[ 1 ].u.data.base;
    align_ids.count = ( uint32_t )( argv[ 1 ].u.data.base_elem_count - argv[ 1 ].u.data.first_elem );
    
    assert( argv[ 0 ].u.data.elem_bits == 8 );
    assert( argv[ 1 ].u.data.elem_bits == 64 );
    assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 );
    assert( argv[ 2 ].u.data.elem_count == num_reads );
    assert( argv[ 3 ].u.data.elem_count == num_reads );
    
    src   += argv [ 0 ] . u . data . first_elem;
    align_ids.list += argv [ 1 ] . u . data . first_elem;
    read_len  += argv [ 2 ] . u . data . first_elem;
    read_type += argv [ 3 ] . u . data . first_elem;

    handle_caching( self, &align_ids, row_id, last_row_id );

    for ( i = 0, len = 0; i < num_reads; i++ )
        len += read_len[ i ];

    /* resize output row */    
    rslt->data->elem_bits = 8;
    rc = KDataBufferResize( rslt->data, len );
    rslt->elem_count = len;
    dst = rslt->data->base;
    
    if ( rc == 0 && len > 0 )
    {
        if ( len == src_len ) /*** shortcut - all data is local ***/
            memmove( dst, src, len );
        else
        {
            rr_entry * ep;
            const INSDC_4na_bin * rd;
            uint32_t rd_len;
            bool found_in_cache;
            
            for ( i = 0; i < num_reads && rc == 0; i++ ) /*** checking read by read ***/
            {
                int64_t align_id = align_ids.list[ i ];
                if ( align_id > 0 )
                {
                    found_in_cache = false;
                    if ( self -> read_store != NULL )
                        found_in_cache = rr_get_read ( self -> read_store, align_id, &ep );
                    if ( found_in_cache )
                    {
                        /* we found it in the cache... */
                        rd = &( ep->read[ 0 ] );
                        rd_len = ep->read_len;
                    }
                    else
                    {
                        /* we did not find it in the cache, get it from the alignment-table... */
                        rc = VCursorCellDataDirect( self -> curs, align_id, self -> read_idx,
                                                    NULL, ( const void** ) &rd, NULL, &rd_len );
                    }
                    
                    if ( rc == 0 )
                    {
                        if ( rd_len == read_len[ i ] )
                        {
                            if ( read_type[ i ] & SRA_READ_TYPE_FORWARD )
                            {
                                memmove( dst, rd, read_len[ i ] );
                            }
                            else if ( read_type[ i ] & SRA_READ_TYPE_REVERSE )
                            {
                                int j, k;
                                for( j = 0, k = read_len[ i ] - 1; j < (int)read_len[ i ]; j++, k-- )
                                {
                                    dst[ j ] = map [ rd[ k ] & 15 ];
                                }
                            }
                            else
                            {
                                rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                            }
                        }
                        else
                        {
                            rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                        }
                    }

                }
                else /*** data is in READ column **/
                {
                    if ( src_len >= read_len[ i ] )
                    {
                        memmove( dst, src, read_len[ i ] );
                        src_len -= read_len[ i ];
                        src     += read_len[ i ];
                    }
                    else
                    {
                        return RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                    }
                }
                dst += read_len[ i ];
            }
        }
    }

    return rc;
}
Ejemplo n.º 26
0
static
rc_t build_scaffold_qual_impl(self_t const *const self, void *const Dst,
                              unsigned const components,
                              INSDC_coord_one const Start[/* components */],
                              INSDC_coord_len const Length[/* components */],
                              NCBI_WGS_component_props const Props[/* components */],
                              int64_t const join[/* components */])
{
    INSDC_quality_phred *const dst = Dst;
    unsigned i;
    unsigned j;
    unsigned id;
    rc_t rc;
    
    for (rc = 0, id = j = i = 0; rc == 0 && i != components; ++i) {
        INSDC_coord_len const length = Length[i];
        int const props = Props[i];
        
        if (props < 0) {
            /* gap */
            memset(dst + j, QUAL_GAP_VALUE, length);
        }
        else if (self->curs == NULL) {
            memset(dst + j, 30, length);
        }
        else {
            int const type = props & 0x0F;
            int const strand = (props & ~(NCBI_WGS_strand_plus | NCBI_WGS_strand_minus)) >> 4;
            
            if (type != 0 || strand == 3)
                rc = RC(rcXF, rcFunction, rcExecuting, rcType, rcInvalid);
            else {
                int64_t const row = join[id++];
                uint32_t elem_bits;
                uint32_t bit_offset;
                uint32_t elem_count;
                void const *base;
                
                rc = VCursorCellDataDirect(self->curs, row, self->col_idx,
                                           &elem_bits, &base, &bit_offset,
                                           &elem_count);
                assert(bit_offset == 0);
                if (rc == 0) {
                    INSDC_quality_phred const start = Start[i] - 1;
                    
                    if (elem_count < start + length)
                        rc = RC(rcXF, rcFunction, rcExecuting, rcData, rcInsufficient);
                    else {
                        INSDC_quality_phred const *const src = base;
                        
                        if (strand == 2) {
                            unsigned k;
                            unsigned jj;
                            
                            for (jj = j + length, k = 0; k != length; ++k) {
                                INSDC_quality_phred const elem = src[start + k];
                                
                                --jj;
                                dst[jj] = elem;
                            }
                        }
                        else
                            memcpy(&dst[j], &src[start], length);
                    }
                }
            }
        }
        
        j += length;
    }
    return rc;
}
Ejemplo n.º 27
0
/**
 * returns true if checks are passed
 */
void runChecks ( const char * accession, const CheckCorruptConfig * config, const VCursor * pa_cursor, const VCursor * sa_cursor, const VCursor * seq_cursor )
{
    rc_t rc;
    uint32_t pa_has_ref_offset_idx;
    uint32_t sa_has_ref_offset_idx;
    uint32_t sa_seq_spot_id_idx;
    uint32_t sa_seq_read_id_idx;
    uint32_t sa_pa_id_idx;
    uint32_t sa_tmp_mismatch_idx;
    uint32_t seq_pa_id_idx;
    uint32_t seq_read_len_idx;
    uint32_t seq_cmp_read_idx;
    bool has_tmp_mismatch;

    /* add columns to cursor */
#define add_column(tbl_name, cursor, idx, col_spec) \
    rc = VCursorAddColumn( cursor, &idx, col_spec ); \
    if ( rc != 0 ) \
        throw VDB_ERROR("VCursorAddColumn() failed for " tbl_name " table, " col_spec " column", rc);

    add_column( "PRIMARY_ALIGNMENT", pa_cursor, pa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" );
    add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" );
    add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_spot_id_idx, "SEQ_SPOT_ID" );
    add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_read_id_idx, "SEQ_READ_ID" );
    add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_pa_id_idx, "PRIMARY_ALIGNMENT_ID" );
    add_column( "SEQUENCE", seq_cursor, seq_pa_id_idx, "PRIMARY_ALIGNMENT_ID" );
    add_column( "SEQUENCE", seq_cursor, seq_read_len_idx, "READ_LEN" );
    add_column( "SEQUENCE", seq_cursor, seq_cmp_read_idx, "CMP_READ" );

    // optional columns
    rc = VCursorAddColumn( sa_cursor, &sa_tmp_mismatch_idx, "TMP_MISMATCH" );
    if ( rc == 0 )
        has_tmp_mismatch = true;
    else
    {
        has_tmp_mismatch = false;
        rc = 0;
    }


#undef add_column

    rc = VCursorOpen( pa_cursor );
    if (rc != 0)
        throw VDB_ERROR("VCursorOpen() failed for PRIMARY_ALIGNMENT table", rc);
    rc = VCursorOpen( sa_cursor );
    if (rc != 0)
        throw VDB_ERROR("VCursorOpen() failed for SECONDARY_ALIGNMENT table", rc);
    rc = VCursorOpen( seq_cursor );
    if (rc != 0)
        throw VDB_ERROR("VCursorOpen() failed for SEQUENCE table", rc);

    int64_t sa_id_first;
    uint64_t sa_row_count;

    rc = VCursorIdRange( sa_cursor, sa_pa_id_idx, &sa_id_first, &sa_row_count );
    if (rc != 0)
        throw VDB_ERROR("VCursorIdRange() failed for SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", rc);

    bool reported_about_no_pa = false;
    uint64_t pa_longer_sa_rows = 0;
    uint64_t pa_longer_sa_limit;
    if (config->pa_len_threshold_percent > 0)
        pa_longer_sa_limit = ceil( config->pa_len_threshold_percent * sa_row_count );
    else if (config->pa_len_threshold_number == 0 || config->pa_len_threshold_number > sa_row_count)
        pa_longer_sa_limit = sa_row_count;
    else
        pa_longer_sa_limit = config->pa_len_threshold_number;

    uint64_t sa_row_limit;
    if (config->sa_cutoff_percent > 0)
        sa_row_limit = ceil( config->sa_cutoff_percent * sa_row_count );
    else if (config->sa_cutoff_number == 0 || config->sa_cutoff_number > sa_row_count)
        sa_row_limit = sa_row_count;
    else
        sa_row_limit = config->sa_cutoff_number;

    for ( uint64_t i = 0; i < sa_row_count && i < sa_row_limit; ++i )
    {
        int64_t sa_row_id = i + sa_id_first;
        const void * data_ptr = NULL;
        uint32_t data_len;
        uint32_t pa_row_len;
        uint32_t sa_row_len;
        uint32_t seq_read_len_len;

        // SA:HAS_REF_OFFSET
        rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_has_ref_offset_idx, NULL, (const void**)&data_ptr, NULL, &sa_row_len );
        if ( rc != 0 )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, HAS_REF_OFFSET column", sa_row_id, rc);

        const int64_t * p_seq_spot_id;
        uint32_t seq_spot_id_len;
        // SA:SEQ_SPOT_ID
        rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_spot_id_idx, NULL, (const void**)&p_seq_spot_id, NULL, &seq_spot_id_len );
        if ( rc != 0 || p_seq_spot_id == NULL || seq_spot_id_len != 1 )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_SPOT_ID column", sa_row_id, rc);

        int64_t seq_spot_id = *p_seq_spot_id;
        if (seq_spot_id == 0)
        {
            std::stringstream ss;
            ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " has SEQ_SPOT_ID = " << seq_spot_id;

            throw DATA_ERROR(ss.str());
        }

        if ( has_tmp_mismatch )
        {
            const char * p_sa_tmp_mismatch;
            // SA:TMP_MISMATCH
            rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_tmp_mismatch_idx, NULL, (const void**)&p_sa_tmp_mismatch, NULL, &data_len );
            if ( rc != 0 || p_sa_tmp_mismatch == NULL )
                throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, TMP_MISMATCH column", sa_row_id, rc);

            for ( uint32_t j = 0; j < data_len; ++j )
            {
                if ( p_sa_tmp_mismatch[j] == '=' )
                {
                    std::stringstream ss;
                    ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " TMP_MISMATCH contains '='";

                    throw DATA_ERROR(ss.str());
                }
            }
        }

        const int64_t * p_pa_row_id;
        // SA:PRIMARY_ALIGNMENT_ID
        rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_pa_id_idx, NULL, (const void**)&p_pa_row_id, NULL, &data_len );
        if ( rc != 0 || p_pa_row_id == NULL || data_len != 1 )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", sa_row_id, rc);

        int64_t pa_row_id = *p_pa_row_id;
        if (pa_row_id == 0)
        {
            if (!reported_about_no_pa)
            {
                PLOGMSG (klogInfo, (klogInfo, "$(ACC) has secondary alignments without primary", "ACC=%s", accession));
                reported_about_no_pa = true;
            }
            continue;
        }

        // PA:HAS_REF_OFFSET
        rc = VCursorCellDataDirect ( pa_cursor, pa_row_id, pa_has_ref_offset_idx, NULL, &data_ptr, NULL, &pa_row_len );
        if ( rc != 0 )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on PRIMARY_ALIGNMENT table, HAS_REF_OFFSET column", pa_row_id, rc);

        // move on when PA.len equal to SA.len
        if (pa_row_len == sa_row_len)
            continue;

        if (pa_row_len < sa_row_len)
        {
            std::stringstream ss;
            ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") less than SECONDARY_ALIGNMENT:" << sa_row_id << " HAS_REF_OFFSET length (" << sa_row_len << ")";

            throw DATA_ERROR(ss.str());
        }

        // we already know that pa_row_len > sa_row_len
        ++pa_longer_sa_rows;

        const int32_t * p_seq_read_id;
        // SA:SEQ_READ_ID
        rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_read_id_idx, NULL, (const void**)&p_seq_read_id, NULL, &data_len );
        if ( rc != 0 || p_seq_read_id == NULL || data_len != 1 )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_READ_ID column", sa_row_id, rc);

        // one-based read index
        int32_t seq_read_id = *p_seq_read_id;

        const uint32_t * p_seq_read_len;
        // SEQ:READ_LEN
        rc = VCursorCellDataDirect ( seq_cursor, seq_spot_id, seq_read_len_idx, NULL, (const void**)&p_seq_read_len, NULL, &seq_read_len_len );
        if ( rc != 0 || p_seq_read_len == NULL )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column", seq_spot_id, rc);

        if ( seq_read_id < 1 || (uint32_t)seq_read_id > seq_read_len_len )
        {
            std::stringstream ss;
            ss << "SECONDARY:" << sa_row_id << " SEQ_READ_ID value (" << seq_read_id << ") - 1 based, is out of SEQUENCE:" << seq_spot_id << " READ_LEN range (" << seq_read_len_len << ")";

            throw DATA_ERROR(ss.str());
        }

        if (pa_row_len != p_seq_read_len[seq_read_id - 1])
        {
            std::stringstream ss;
            ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") does not match its SEQUENCE:" << seq_spot_id << " READ_LEN[" << seq_read_id - 1 << "] value (" << p_seq_read_len[seq_read_id - 1] << ")";

            throw DATA_ERROR(ss.str());
        }

        if (pa_longer_sa_rows >= pa_longer_sa_limit)
        {
            std::stringstream ss;
            ss << "Limit violation (pa_longer_sa): there are at least " << pa_longer_sa_rows << " alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT";

            throw DATA_ERROR(ss.str());
        }
    }

    int64_t seq_id_first;
    uint64_t seq_row_count;

    rc = VCursorIdRange( seq_cursor, seq_pa_id_idx, &seq_id_first, &seq_row_count );
    if (rc != 0)
        throw VDB_ERROR("VCursorIdRange() failed for SEQUENCE table, PRIMARY_ALIGNMENT_ID column", rc);

    uint64_t seq_row_limit;
    if (config->seq_cutoff_percent > 0)
        seq_row_limit = ceil( config->seq_cutoff_percent * seq_row_count );
    else if (config->seq_cutoff_number == 0 || config->seq_cutoff_number > seq_row_count)
        seq_row_limit = seq_row_count;
    else
        seq_row_limit = config->seq_cutoff_number;

    for ( uint64_t i = 0; i < seq_row_count && i < seq_row_limit; ++i )
    {
        int64_t seq_row_id = i + seq_id_first;
        const void * data_ptr = NULL;
        uint32_t data_len;

        const int64_t * p_seq_pa_id;
        uint32_t seq_pa_id_len;
        // SEQ:PRIMARY_ALIGNMENT_ID
        rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_pa_id_idx, NULL, (const void**)&p_seq_pa_id, NULL, &seq_pa_id_len );
        if ( rc != 0 || p_seq_pa_id == NULL )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, PRIMARY_ALIGNMENT_ID column", seq_row_id, rc);

        const uint32_t * p_seq_read_len;
        // SEQ:READ_LEN
        rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_read_len_idx, NULL, (const void**)&p_seq_read_len, NULL, &data_len );
        if ( rc != 0 || p_seq_read_len == NULL )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column", seq_row_id, rc);
        if ( seq_pa_id_len != data_len )
        {
            std::stringstream ss;
            ss << "SEQUENCE:" << seq_row_id << " PRIMARY_ALIGNMENT_ID length (" << seq_pa_id_len << ") does not match SEQUENCE:" << seq_row_id << " READ_LEN length (" << data_len << ")";

            throw DATA_ERROR(ss.str());
        }

        uint64_t sum_unaligned_read_len = 0;
        for ( uint32_t j = 0; j < seq_pa_id_len; ++j )
        {
            if ( p_seq_pa_id[j] == 0 )
            {
                sum_unaligned_read_len += p_seq_read_len[j];
            }
        }

        // SEQ:CMP_READ
        rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_cmp_read_idx, NULL, (const void**)&data_ptr, NULL, &data_len );
        if ( rc != 0 || data_ptr == NULL )
            throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, SEQ:CMP_READ column", seq_row_id, rc);

        if ( sum_unaligned_read_len != data_len )
        {
            std::stringstream ss;
            ss << "SEQUENCE:" << seq_row_id << " CMP_READ length (" << data_len << ") does not match sum of unaligned READ_LEN values (" << sum_unaligned_read_len << ")";

            throw DATA_ERROR(ss.str());
        }
    }

    if (sa_row_limit < sa_row_count || seq_row_limit < seq_row_count)
        PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good (based on first $(SA_CUTOFF) of SECONDARY_ALIGNMENT and $(SEQ_CUTOFF) SEQUENCE rows)", "ACC=%s,SA_CUTOFF=%lu,SEQ_CUTOFF=%lu", accession, sa_row_limit, seq_row_limit));
    else
        PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good", "ACC=%s", accession));
}
Ejemplo n.º 28
0
/*
function INSDC:coord:zero NCBI:align:ref_pos ( I64 ref_id, INSDC:coord:zero ref_start );
*/
static
rc_t CC align_ref_pos ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc = 0;
    RefPos const *self = ( void const * )data;
    int64_t ref_row_id = 0;
    INSDC_coord_zero *ref_pos;
    unsigned const ploidy = ( unsigned const )argv[ REF_START ].u.data.elem_count;
    unsigned i;

    /* get start and length of reference segment */
    int64_t const *ref_id = 0;
    INSDC_coord_zero const *ref_start;

    assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8 );
    assert( argv[ REF_START ].u.data.elem_bits == sizeof( *ref_start ) * 8 );

    ref_start = argv[ REF_START ].u.data.base;
    ref_start += argv[ REF_START ].u.data.first_elem;

    if ( self->curs != NULL )
    {
        char const *name = NULL;
        uint32_t name_len;
        BSTRowRange *brr;

        ref_id = argv[ REF_ID ].u.data.base;
        ref_id += argv[ REF_ID ].u.data.first_elem;

        brr = ( BSTRowRange * )BSTreeFind( &self->tr_range, &ref_id[ 0 ], row_range_cmp );
        if ( brr == NULL )
        {
            RowRange *new_rr;

            SUB_DEBUG( ( "SUB.Rd in 'align-ref-pos.c' at #%lu\n", ref_id[ 0 ] ) );

            rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_idx, NULL, (void const **)&name, NULL, &name_len );
            if ( rc != 0 )
                return rc;

            rc = VCursorParamsSet( ( struct VCursorParams const * )self->curs, "QUERY_SEQ_NAME", "%.*s", name_len, name );
            if ( rc != 0 )
                return rc;

            rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_range_idx, NULL, (void const **)&new_rr, NULL, NULL );
            if ( rc != 0 )
                return rc;

            brr = malloc( sizeof( *brr ) );
            if ( brr == NULL )
            {
                return RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
            }
            else
            {
                memcpy( &brr->rr, new_rr, sizeof( *new_rr ) );
                BSTreeInsert( ( BSTree* )&self->tr_range, ( BSTNode* )brr, row_range_sort );
            }
        }
        ref_row_id = brr->rr.start_id;
    }

    rc = KDataBufferResize( rslt->data, ploidy );
    if ( rc != 0 )
        return rc;
    
    ref_pos = rslt->data->base;
    for ( i = 0; i != ploidy; ++i )
    {
        ref_pos[ i ] = ref_start[ i ];
        if ( self->curs != NULL )
        {
            ref_pos[ i ] += ( INSDC_coord_zero )( ( ref_id[ 0 ] - ref_row_id ) * self->max_seq_len );
        }
    }
    rslt->elem_count = ploidy;
    rslt->elem_bits = sizeof( ref_pos[ 0 ] ) * 8;

    return rc;
}
Ejemplo n.º 29
0
static rc_t cg_dump_write_spot( cg_dump_opts * opts, cg_dump_ctx * cg_ctx, uint64_t row_id, lane * l )
{
    uint32_t elem_bits, boff, read_len;
    const char * read;

    rc_t rc = VCursorCellDataDirect( cg_ctx->seq_cur, row_id, cg_ctx->seq_read_idx, &elem_bits, (const void**)&read, &boff, &read_len );
    if ( rc != 0 )
    {
        (void)PLOGERR( klogErr, ( klogErr, rc, "cannot read READ in row #$(row_id)", "row_id=%lu", row_id ) );
    }
    else
    {
        uint32_t qual_len;
        const char * qual;
        rc = VCursorCellDataDirect( cg_ctx->seq_cur, row_id, cg_ctx->seq_qual_idx, &elem_bits, (const void**)&qual, &boff, &qual_len );
        if ( rc != 0 )
        {
            (void)PLOGERR( klogErr, ( klogErr, rc, "cannot read QUALITY in row #$(row_id)", "row_id=%lu", row_id ) );
        }
        else
        {
            if ( ( read_len != 70 ) && ( qual_len != 70 ) )
            {
                rc = RC( rcExe, rcDatabase, rcReading, rcRange, rcInvalid );
                (void)LOGERR( klogErr, rc, "len of read/quality columns do not match cg-length of 2 x 35" );
            }
            else
            {
                char buffer[ 1024 ];
                size_t num_writ_buf;
                rc = string_printf ( buffer, sizeof buffer, &num_writ_buf,
                        "%lu\t0\t%.35s\t%.35s\n%lu\t1\t%.35s\t%.35s\n",
                        row_id, read, qual, row_id, &(read[35]), &(qual[35]) );
                if ( rc != 0 )
                {
                    (void)PLOGERR( klogErr, ( klogErr, rc, "cannot generate output in row #$(row_id)", "row_id=%lu", row_id ) );
                }
                else
                {
                    if ( opts->comp != oc_null )
                    {
                        if ( l->spot_count >= opts->cutoff )
                        {
                            KFileRelease( l->reads );
                            l->chunk++;
                            l->spot_count = 0;
                            l->write_pos = 0;
                            rc = make_read_file( opts, cg_ctx->lookup, cg_ctx->out_dir, l );
                        }
                        if ( rc == 0 )
                        {
                            size_t num_writ_file;
                            rc = KFileWrite ( l->reads, l->write_pos, buffer, num_writ_buf, &num_writ_file );
                            if ( rc != 0 )
                            {
                                (void)PLOGERR( klogErr, ( klogErr, rc, "cannot write output in row #$(row_id)", "row_id=%lu", row_id ) );
                            }
                            else
                            {
                                l->write_pos += num_writ_file;
                                l->spot_count ++;
                            }
                        }
                    }
                }
            }
        }
    }
    return rc;
}
Ejemplo n.º 30
0
rc_t initialize_ref_pos (
    PileupIteratorState* pileup_state,
    VCursor const* cursor_ref, VCursor const* cursor_pa,
    char const* const* column_names_ref, uint32_t* column_index_ref, size_t column_count_ref,
    char const* const* column_names_pa, uint32_t* column_index_pa, size_t column_count_pa,
    char* error_buf,
    size_t error_buf_size
    )
{
    int64_t row_id;
    uint64_t row_count;

    uint32_t max_seq_len, row_len;

    rc_t rc = VCursorIdRange ( cursor_ref, 0, & row_id, & row_count );

    /*printf ("REFERENCE table: row_id=%lld, row_count=%llu\n", row_id, row_count);*/


    if ( row_count < 1 )
    {
        rc_t res = string_printf ( error_buf, error_buf_size, NULL,
            "There is no rows in REFERENCE table");
        if (res == rcBuffer || res == rcInsufficient)
            error_buf [ error_buf_size - 1 ] = '\0';

        return (rc_t)(-1);
    }
    pileup_state->total_row_count = row_count;

    /* We don't know the reference end id use its name to notice the moment when it changes - this will be the end */
    rc = VCursorReadDirect ( cursor_ref, pileup_state->reference_start_id, column_index_ref [COL_NAME],
        sizeof (pileup_state->ref_name[0]) * 8, pileup_state->ref_name, countof(pileup_state->ref_name), & row_len );
    if ( rc != 0 )
    {
        rc_t res = string_printf ( error_buf, error_buf_size, NULL,
            "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc);
        if (res == rcBuffer || res == rcInsufficient)
            error_buf [ error_buf_size - 1 ] = '\0';

        return rc;
    }
    pileup_state->ref_name[ min ( countof(pileup_state->ref_name) - 1, row_len) ] = '\0';

    /* Read MAX_SEQ_LEN from the start_row_id and assume that it's the same for all the rest */
    rc = VCursorReadDirect ( cursor_ref, pileup_state->reference_start_id, column_index_ref [COL_MAX_SEQ_LEN],
                             sizeof (max_seq_len) * 8, & max_seq_len, 1, & row_len );
    if ( rc != 0 )
    {
        rc_t res = string_printf ( error_buf, error_buf_size, NULL,
            "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc);
        if (res == rcBuffer || res == rcInsufficient)
            error_buf [ error_buf_size - 1 ] = '\0';

        return rc;
    }
    pileup_state->max_seq_len = max_seq_len;

    if ( row_len < 1 )
    {
        rc_t res = string_printf ( error_buf, error_buf_size, NULL,
            "There is no MAX_SEQ_LEN column for row_id=%lld in REFERENCE table", row_id);
        if (res == rcBuffer || res == rcInsufficient)
            error_buf [ error_buf_size - 1 ] = '\0';

        return (rc_t)(-1);
    }

    printf ("MAX_SEQ_LEN=%lu\n", max_seq_len);

    pileup_state->slice_start_id = pileup_state->reference_start_id + pileup_state->slice_start/max_seq_len;
    pileup_state->slice_end_id = pileup_state->slice_length != 0 ?
        pileup_state->reference_start_id + (pileup_state->slice_start + (int64_t)pileup_state->slice_length)/max_seq_len :
        (int64_t)pileup_state->total_row_count;

    printf ("slice position range: [%lld, %llu]\n", pileup_state->slice_start, pileup_state->slice_start + pileup_state->slice_length);
    /*printf ("slice id range: [%lld, %lld]\n", slice_start_id, slice_end_id);*/

    /* Read reference slice_start_id,
       read OVERLAP_*_POS to find out how
       many rows we need to read ahead of slice_start_id
       TODO: this is not implemented yet, insted we read just 10 rows ahead
    */


    /* Set cursor to <read_ahead_rows> rows ahead of slice_start_id
       and cache corresponding PRIMARY_ALIGNMENTS
    */


    {
        int64_t current_id = max (pileup_state->reference_start_id, pileup_state->slice_start_id - 10);
        int64_t stop_id = pileup_state->slice_start_id;
        uint32_t seq_start;
        uint32_t dummy;
#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS != 1
        int64_t const* alignment_ids;
#endif

        for (; ; ++current_id)
        {
            /* We don't know the current reference end_id
               read it's name and break when it changes
            */
            char ref_name[ countof (pileup_state->ref_name) ];
            rc = VCursorReadDirect ( cursor_ref, current_id, column_index_ref [COL_NAME],
                sizeof (ref_name[0]) * 8, ref_name, countof(ref_name), & row_len );
            if ( rc != 0 )
            {
                rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                    "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc);
                if (res == rcBuffer || res == rcInsufficient)
                    error_buf [ error_buf_size - 1 ] = '\0';

                return rc;
            }
            ref_name[ min ( countof(ref_name) - 1, row_len) ] = '\0';
            if ( current_id > stop_id || strcmp (ref_name, pileup_state->ref_name) )
                break;

#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1
            rc = open_blob_for_current_id ( current_id,
                cursor_ref, & pileup_state->blob_alignment_ids,
                column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
                error_buf, error_buf_size );
            if (rc != 0)
                return rc;
#endif

            /* Read REFERENCE row's SEQ_START column to know the offset */
            rc = VCursorReadDirect ( cursor_ref, current_id,
                                     column_index_ref [COL_SEQ_START],
                                     sizeof (seq_start) * 8, & seq_start, 1, & row_len );
            if ( rc != 0 )
            {
                rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                    "ERROR: VCursorReadDirect(ref-seq_start) failed with error: 0x%08x (%u) [%R]",
                    rc, rc, rc);
                if (res == rcBuffer || res == rcInsufficient)
                    error_buf [ error_buf_size - 1 ] = '\0';

                return rc;
            }
            pileup_state->current_seq_start = seq_start;

            /* Read REFERENCE row's PRIMARY_ALIGNMENT_IDS column to iterate through them */
            /* elem_bits = sizeof (*pileup_state->alignment_ids) * 8;*/
#if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1
            rc = VBlobCellData ( pileup_state->blob_alignment_ids, current_id,
                & dummy, & pileup_state->alignment_ids, NULL, & row_len );
            if ( rc != 0 )
            {
                rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                    "ERROR: VBlobCellData(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u",
                    rc, rc, rc, row_len);
                if (res == rcBuffer || res == rcInsufficient)
                    error_buf [ error_buf_size - 1 ] = '\0';

                return rc;
            }
            pileup_state -> size_alignment_ids = row_len;
#else

            rc = VCursorCellDataDirect ( cursor_ref, current_id,
                        column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
                        NULL,
                        (void const**)(& alignment_ids), 0, & row_len );

            /*rc = VCursorReadDirect ( cursor_ref, current_id,
                        column_index_ref [COL_PRIMARY_ALIGNMENT_IDS],
                        sizeof (*pileup_state->alignment_ids) * 8,
                        pileup_state->alignment_ids,
                        countof (pileup_state->alignment_ids), & row_len );*/
            if ( rc != 0 )
            {
                rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                    "ERROR: VCursorCellDataDirect(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u",
                    rc, rc, rc, row_len);
                if (res == rcBuffer || res == rcInsufficient)
                    error_buf [ error_buf_size - 1 ] = '\0';

                return rc;
            }
            rc = PileupIteratorState_SetAlignmentIds ( pileup_state, alignment_ids, row_len );
            if ( rc != 0 )
            {
                rc_t res = string_printf ( error_buf, error_buf_size, NULL,
                    "ERROR: PileupIteratorState_SetAlignmentIds failed with error: 0x%08x (%u), row_len=%u",
                    rc, rc, row_len);
                if (res == rcBuffer || res == rcInsufficient)
                    error_buf [ error_buf_size - 1 ] = '\0';

                return rc;
            }
#endif

            pileup_state->next_alignment_idx = 0;
            /*pileup_state->size_alignment_ids = row_len;*/
            /*printf ("Read %lu PRIMARY_ALIGNMENT_IDS for REFERENCE row_id=%lld:", row_len, current_id);*/
            {
                /*size_t i = 0;

                for (; i < row_len; ++i)
                    printf(" %lld", pa_ids [i]);*/

                /*printf ("\n");*/
            }

            /* For each PRIMARY_ALIGNMENT_ID in alignment_ids: read its start, length and
               cache it if it intersects the starting position
            */
            rc = add_ref_row_to_cache ( pileup_state, cursor_pa, seq_start,
                        pileup_state->slice_start,
                        pileup_state->alignment_ids, row_len,
                        column_names_pa, column_index_pa, column_count_pa,
                        error_buf, error_buf_size );
            if ( rc != 0 )
                return rc;
        }
    }

    return rc;

}