示例#1
0
static
rc_t Read_Restorer_Make( Read_Restorer **objp, const VTable *tbl, const VCursor* native_curs )
{
    rc_t rc;

    /* create the object */
    Read_Restorer *obj = malloc ( sizeof * obj );
    if ( obj == NULL )
    {
        *objp = NULL;
        rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        memset( obj, 0, sizeof * obj );
        rc = open_RR_cursor( obj, tbl, native_curs, "PRIMARY_ALIGNMENT" );
        if ( rc == 0 )
        {
#if READ_RESTORER_VERSION == 2
            /* - we have no cache to begin with ( obj->read_store is NULL because of memset above )
               - we make one if sequential access is detected */
#endif
            if ( rc == 0 )
            {
                SUB_DEBUG( ( "SUB.Make in 'seq-restore-read.c'\n" ) );
                * objp = obj;
            }
        }
        if ( rc != 0 )
            free( obj );
    }
    return rc;
}
示例#2
0
static
rc_t RestoreReadMake ( RestoreRead **objp, const VDBManager *mgr )
{
	rc_t rc = 0;
	RestoreRead *obj;

	assert( objp != NULL );
    assert( mgr != NULL );

    obj = calloc( 1, sizeof( *obj ) );
    if ( obj == NULL )
    {
		rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        SUB_DEBUG( ( "SUB.Make in 'ref_restore_read.c'\n" ) );

        rc = RefSeqMgr_Make( &obj->rmgr, mgr, errefseq_4NA, 8 * 1024 * 1024, 30 );
        if ( rc == 0 )
        {
		    *objp = obj;
        }
        else
        {
            *objp = NULL;
            RestoreReadWhack( obj );
        }
	}
	return rc;
}
示例#3
0
static
rc_t RestoreReadMake ( RestoreRead **objp, const VTable *tbl, const VCursor* native_curs )
{
    rc_t rc;
    char name[]="PRIMARY_ALIGNMENT";

    /* create the object */
    RestoreRead *obj = malloc ( sizeof * obj );
    memset(obj,0,sizeof * obj);
    if ( obj == NULL )
    {
		*objp=0;
        rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
	{
		rc = VCursorLinkedCursorGet(native_curs,name,&obj->curs);
		if(rc == 0){
			VCursorAddRef(obj->curs);
		} else {
			/* get at the parent database */
			const VDatabase *db;
			rc = VTableOpenParentRead ( tbl, & db );
			if ( rc == 0 )
			{
				const VTable *patbl;
				/* open the primary alignment table */
				rc = VDatabaseOpenTableRead ( db, & patbl, name );
				VDatabaseRelease ( db );
				if ( rc == 0 )
				{
					/* create a cursor */
					rc = VTableCreateCachedCursorRead( patbl, &obj->curs, 32*1024*1024UL );
					VTableRelease ( patbl );
					if ( rc == 0 )
					{
						/* add columns to cursor */
						rc = VCursorAddColumn ( obj -> curs, & obj -> read_idx, "( INSDC:4na:bin ) READ" );
						if ( rc == 0 )
						{
							rc = VCursorOpen ( obj -> curs );
							if ( rc == 0 )
							{
								VCursorLinkedCursorSet( native_curs, name, obj->curs );
								SUB_DEBUG( ( "SUB.Make in 'seq-restore-read.c'\n" ) );
								* objp = obj;
								return 0;
							}
						}
						VCursorRelease ( obj -> curs );
					}
				}
			}
			free ( obj );
		}
	}
    return rc;
}
示例#4
0
/*
 function ascii NCBI:align:ref_seq_id ( I64 ref_id );
*/
static
rc_t CC align_ref_seq_id ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc;
    RefSeqID const *self = ( void const * )data;
    char const *name = NULL;
    uint32_t name_len;

    /* get start and length of reference segment */
    int64_t const *ref_id = argv[REF_ID].u.data.base;

    assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8) ;

    if ( self->curs == NULL || argv[ REF_ID ].u.data.elem_count == 0 )
    {
        rslt->elem_count = 0;
        return 0;
    }

    ref_id += argv[ REF_ID] .u.data.first_elem;

    SUB_DEBUG( ( "SUB.Rd in 'align_ref_seq_id.c' at #%lu\n", ref_id[ 0 ] ) );
    
    rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->seqID_idx, NULL, (void const **)&name, NULL, &name_len );
    if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcRow )
    {
        name = "";
        name_len = 0;
    }
    else if ( rc != 0 )
    {
        return rc;
    }

    rc = KDataBufferCast( rslt->data, rslt->data, sizeof( name[ 0 ] ) * 8, true );
    if ( rc != 0 )
        return rc;

    rc = KDataBufferResize( rslt->data, name_len );
    if ( rc != 0 )
        return rc;
    
    memmove( rslt->data->base, name, sizeof( name[ 0 ] ) * name_len );
    rslt->elem_count = name_len;
    rslt->elem_bits = sizeof( name[ 0 ] ) * 8;
    return rc;
}
示例#5
0
static
rc_t RefSeqIDMake ( RefSeqID **objp, const VTable *tbl, const VCursor *native_curs )
{
    rc_t rc;

    /* create the object */
    RefSeqID *obj = malloc ( sizeof * obj );
    if ( obj == NULL )
    {
        rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        obj->curs=NULL;

        SUB_DEBUG( ( "SUB.Make in 'align_ref_seq_id.c'\n" ) );

        /* open the reference cursor */
        rc = AlignRefTableCursor( tbl, native_curs, &obj->curs, NULL );
        if ( rc == 0 )
        {
            /* add columns to cursor */
            rc = VCursorAddColumn( obj->curs, &obj->seqID_idx, "SEQ_ID" );
            if ( GetRCObject( rc ) == ( enum RCObject ) rcColumn && GetRCState( rc ) == rcNotFound )
            {
                rc = VCursorAddColumn( obj->curs, &obj->seqID_idx, "REF_SEQ_ID" );
            }
            
            if ( GetRCState( rc ) == rcExists )
            {
                rc = 0;
            }

            if ( rc == 0 )
            {
                *objp = obj;
                return 0;
            }

            VCursorRelease ( obj -> curs );
        }
        free ( obj );
    }
    return rc;
}
示例#6
0
static
rc_t CC seq_restore_read_impl1 ( void *data, const VXformInfo *info, int64_t row_id,
                                 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    int i;
    Read_Restorer   *self = data;
    INSDC_4na_bin   *dst;
    INSDC_coord_len     len;
    uint32_t        src_len     = (uint32_t)argv[ 0 ].u.data.elem_count;
    const INSDC_4na_bin *src    = argv[ 0 ].u.data.base;
    const uint32_t  num_reads   = (uint32_t)argv[ 1 ].u.data.elem_count;
    const int64_t   *align_id   = argv[ 1 ].u.data.base;
    const INSDC_coord_len *read_len = argv[ 2 ].u.data.base;
    const uint8_t   *read_type  = argv[ 3 ].u.data.base;
    bool is_sequential = false;
    
    assert( argv[ 0 ].u.data.elem_bits == 8 );
    assert( argv[ 1 ].u.data.elem_bits == 64 );
    assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 );
    assert( argv[ 2 ].u.data.elem_count == num_reads );
    assert( argv[ 3 ].u.data.elem_count == num_reads );
    
    
    src   += argv [ 0 ] . u . data . first_elem;
    align_id  += argv [ 1 ] . u . data . first_elem;
    read_len  += argv [ 2 ] . u . data . first_elem;
    read_type += argv [ 3 ] . u . data . first_elem;

    if ( row_id != self->last_row_id  && row_id != self->last_row_id + 1 )
    {
        self->first_sequential_row_id = row_id;
        is_sequential = false;
    }
    else if ( row_id > self->first_sequential_row_id + 100 )
    {
        is_sequential = true;
    }
    self->last_row_id = row_id;
    
    
    /* is_sequential = false; forcing it to false ... Sept. 16th 2015 to analyze prefetching */
    
    for ( i = 0, len = 0; i < (int)num_reads; i++ )
    {
        len += read_len[ i ];
    }

    /* resize output row */    
    rslt->data->elem_bits = 8;
    rc = KDataBufferResize( rslt->data, len );
    rslt->elem_count = len;
    dst = rslt->data->base;
    if ( rc == 0 && len > 0 )
    {
        if ( len == src_len ) /*** shortcut - all data is local ***/
        {
            memmove( dst, src, len );
        }
        else
        {
            if ( is_sequential &&
                 ( row_id < self->prefetch_start_id || row_id > self->prefetch_stop_id ) )
            { /* do prefetch */
                uint32_t num_rows = ( argv[ 1 ].u.data.base_elem_count - argv[ 1 ].u.data.first_elem );
                
                /*
                KTimeMs_t ts = KTimeMsStamp();
                fprintf( stderr, "\nprefetch row_id #%lu ( start_id #%lu, stop_id #%lu ) num_rows = %d\n", row_id, self->prefetch_start_id, self->prefetch_stop_id, num_rows );
                */
                
                VCursorDataPrefetch( self->curs,
                                     align_id,
                                     self->read_idx,
                                     num_rows,
                                     1,
                                     INT64_MAX,
                                     true );
                                     
                /*
                ts = KTimeMsStamp() - ts;
                fprintf( stderr, "prefetch done in %lu ms\n", ts );
                */
                
                self->prefetch_start_id=row_id;
                self->prefetch_stop_id =argv[1].blob_stop_id;
            }
            for( i = 0; i < (int)num_reads && rc == 0; i++ ) /*** checking read by read ***/
            {
                if ( align_id[ i ] > 0 )
                {
                    const INSDC_4na_bin *r_src;
                    uint32_t             r_src_len;

                    SUB_DEBUG( ( "SUB.Rd in 'seq-restore-read.c' at #%lu\n", align_id[ i ] ) );

                    rc = VCursorCellDataDirect( self -> curs, align_id[ i ], self -> read_idx,
                                                NULL, ( const void** ) &r_src, NULL, &r_src_len );
                    if ( rc == 0 )
                    {
                        if ( r_src_len == read_len[ i ] )
                        {
                            if ( read_type[ i ] & SRA_READ_TYPE_FORWARD )
                            {
                                memmove( dst, r_src, read_len[ i ] );
                            }
                            else if ( read_type[ i ] & SRA_READ_TYPE_REVERSE )
                            {
                                int j, k;
                                for( j = 0, k = read_len[ i ] - 1; j < (int)read_len[ i ]; j++, k-- )
                                {
                                    dst[ j ] = map [ r_src[ k ] & 15 ];
                                }
                            }
                            else
                            {
                                rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                            }
                        }
                        else
                        {
                            rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                        }
                    }
                }
                else /*** data is in READ column **/
                {
                    if ( src_len >= read_len[ i ] )
                    {
                        memmove( dst, src, read_len[ i ] );
                        src_len -= read_len[ i ];
                        src     += read_len[ i ];
                    }
                    else
                    {
                        return RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent );
                    }
                }
                dst += read_len[ i ];
            }
        }
    }

    return rc;
}
示例#7
0
/*
function INSDC:coord:zero NCBI:align:ref_pos ( I64 ref_id, INSDC:coord:zero ref_start );
*/
static
rc_t CC align_ref_pos ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc = 0;
    RefPos const *self = ( void const * )data;
    int64_t ref_row_id = 0;
    INSDC_coord_zero *ref_pos;
    unsigned const ploidy = ( unsigned const )argv[ REF_START ].u.data.elem_count;
    unsigned i;

    /* get start and length of reference segment */
    int64_t const *ref_id = 0;
    INSDC_coord_zero const *ref_start;

    assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8 );
    assert( argv[ REF_START ].u.data.elem_bits == sizeof( *ref_start ) * 8 );

    ref_start = argv[ REF_START ].u.data.base;
    ref_start += argv[ REF_START ].u.data.first_elem;

    if ( self->curs != NULL )
    {
        char const *name = NULL;
        uint32_t name_len;
        BSTRowRange *brr;

        ref_id = argv[ REF_ID ].u.data.base;
        ref_id += argv[ REF_ID ].u.data.first_elem;

        brr = ( BSTRowRange * )BSTreeFind( &self->tr_range, &ref_id[ 0 ], row_range_cmp );
        if ( brr == NULL )
        {
            RowRange *new_rr;

            SUB_DEBUG( ( "SUB.Rd in 'align-ref-pos.c' at #%lu\n", ref_id[ 0 ] ) );

            rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_idx, NULL, (void const **)&name, NULL, &name_len );
            if ( rc != 0 )
                return rc;

            rc = VCursorParamsSet( ( struct VCursorParams const * )self->curs, "QUERY_SEQ_NAME", "%.*s", name_len, name );
            if ( rc != 0 )
                return rc;

            rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_range_idx, NULL, (void const **)&new_rr, NULL, NULL );
            if ( rc != 0 )
                return rc;

            brr = malloc( sizeof( *brr ) );
            if ( brr == NULL )
            {
                return RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
            }
            else
            {
                memcpy( &brr->rr, new_rr, sizeof( *new_rr ) );
                BSTreeInsert( ( BSTree* )&self->tr_range, ( BSTNode* )brr, row_range_sort );
            }
        }
        ref_row_id = brr->rr.start_id;
    }

    rc = KDataBufferResize( rslt->data, ploidy );
    if ( rc != 0 )
        return rc;
    
    ref_pos = rslt->data->base;
    for ( i = 0; i != ploidy; ++i )
    {
        ref_pos[ i ] = ref_start[ i ];
        if ( self->curs != NULL )
        {
            ref_pos[ i ] += ( INSDC_coord_zero )( ( ref_id[ 0 ] - ref_row_id ) * self->max_seq_len );
        }
    }
    rslt->elem_count = ploidy;
    rslt->elem_bits = sizeof( ref_pos[ 0 ] ) * 8;

    return rc;
}
示例#8
0
static
rc_t RefPosMake ( RefPos **objp, const VTable *tbl, const VCursor *native_curs )
{
    rc_t rc;

    /* create the object */
    RefPos *obj = malloc ( sizeof * obj );
    if ( obj == NULL )
    {
        rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        obj->curs=NULL;
        BSTreeInit( &obj->tr_range );
        /* open the reference table cursor*/

        SUB_DEBUG( ( "SUB.Make in 'align-ref-pos.c'\n" ) );
	  
        rc = AlignRefTableCursor( tbl, native_curs, &obj->curs, NULL );
        if ( rc == 0 )
        {
            uint32_t itmp;
            rc = VCursorAddColumn( obj->curs, &itmp, "(U32)MAX_SEQ_LEN" );
            if ( ( rc == 0 ) || GetRCState( rc ) == rcExists )
            {
                const void * base;
                uint32_t row_len;
                rc = VCursorCellDataDirect( obj->curs, 1, itmp, NULL, &base, NULL, &row_len );
                if ( rc == 0 )
                {
                    assert( row_len == 1 );
                    memcpy( &obj->max_seq_len, base, 4 );
                }
            }

            if ( GetRCObject( rc ) == ( enum RCObject )rcColumn && GetRCState( rc ) == rcNotFound )
            {
                /*** no MAX_SEQ_LEN means that REF_POS==REF_START **/
                VCursorRelease( obj->curs );
                obj->curs = NULL;
                obj->max_seq_len = 0;
                obj->name_range_idx = 0;
                obj->name_idx = 0;
                rc = 0;
            }
            else if ( rc == 0 )
            {
                /* add columns to cursor */
                rc = VCursorAddColumn( obj->curs, &obj->name_idx, "(utf8)NAME" );
                if ( rc == 0 || GetRCState( rc ) == rcExists )
                {
                    rc = VCursorAddColumn( obj->curs, &obj->name_range_idx, "NAME_RANGE" );
                }
                if ( GetRCState( rc ) == rcExists )
                {
                    rc = 0;
                }
            }
        }

        if ( rc == 0 )
        {
            *objp = obj;
        }
        else
        {
            VCursorRelease( obj->curs );
            free( obj );
        }
    }

    return rc;
}
示例#9
0
static
rc_t CC ref_restore_read_impl ( void *data, const VXformInfo *info, int64_t row_id,
                                VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    RestoreRead* self = data;
    uint8_t* dst;
    uint32_t read_len     = (uint32_t)argv[ 0 ].u.data.elem_count;
    const uint8_t *read   = argv[ 0 ].u.data.base;
    uint32_t seqid_len    = (uint32_t)argv[ 1 ].u.data.elem_count;
    const char* seqid     = argv[ 1 ].u.data.base;
    INSDC_coord_one   seq_start;
    INSDC_coord_len   seq_len;

    assert( argv[ 0 ].u.data.elem_bits == 8 );
    assert( argv[ 1 ].u.data.elem_bits == 8 );
    assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_one ) * 8 );
    assert( argv[ 3 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 );
    assert( argv[ 2 ].u.data.elem_count == 1 );
    assert( argv[ 3 ].u.data.elem_count == 1 );

    read   += argv [ 0 ] . u . data . first_elem;
    seqid  += argv [ 1 ] . u . data . first_elem;
    seq_start = ( ( INSDC_coord_one* )argv[ 2 ].u.data.base )[ argv[ 2 ].u.data.first_elem ];
    seq_len   = ( ( INSDC_coord_len* )argv[ 3 ].u.data.base )[ argv[ 3 ].u.data.first_elem ];

    if ( seq_len < read_len )
    {
        rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
    }
    else
    {
        /* resize output row for the total number of reads */    
        rslt->data->elem_bits = 8;
        rc = KDataBufferResize( rslt->data, seq_len );
        if ( rc == 0 )
        {
            rslt->elem_count = seq_len;
            dst = rslt->data->base;

            if ( seq_len > 0 )
            {
                if ( read_len > 0 )
                {
                    memcpy( dst, read, read_len );
                    if ( read_len < seq_len )
                    {
                        memset( dst + read_len, 15, seq_len - read_len ); /* pad with 'N' */
                    }
                }
                else if ( seq_start == 0 )
                {
                    memset( dst, 15, seq_len ); /* fill with 'N' */
                }
                else
                {
                    INSDC_coord_len read = 0;

                    SUB_DEBUG( ( "SUB.Rd in 'ref_restore_read.c' at: %.*s at %u.%u\n", seqid_len, seqid, seq_start, seq_len ) );

                    rc = RefSeqMgr_Read( self->rmgr, seqid, seqid_len, seq_start - 1, seq_len, dst, &read );
                    if ( rc == 0 )
                    {
                        if ( read != seq_len )
                        {
                            rc = RC( rcXF, rcFunction, rcExecuting, rcData, read < seq_len ? rcTooShort : rcTooLong );
                        }
                    }
                }
            }
        }
    }
    return rc;
}