static rc_t Read_Restorer_Make( Read_Restorer **objp, const VTable *tbl, const VCursor* native_curs ) { rc_t rc; /* create the object */ Read_Restorer *obj = malloc ( sizeof * obj ); if ( obj == NULL ) { *objp = NULL; rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { memset( obj, 0, sizeof * obj ); rc = open_RR_cursor( obj, tbl, native_curs, "PRIMARY_ALIGNMENT" ); if ( rc == 0 ) { #if READ_RESTORER_VERSION == 2 /* - we have no cache to begin with ( obj->read_store is NULL because of memset above ) - we make one if sequential access is detected */ #endif if ( rc == 0 ) { SUB_DEBUG( ( "SUB.Make in 'seq-restore-read.c'\n" ) ); * objp = obj; } } if ( rc != 0 ) free( obj ); } return rc; }
static rc_t RestoreReadMake ( RestoreRead **objp, const VDBManager *mgr ) { rc_t rc = 0; RestoreRead *obj; assert( objp != NULL ); assert( mgr != NULL ); obj = calloc( 1, sizeof( *obj ) ); if ( obj == NULL ) { rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { SUB_DEBUG( ( "SUB.Make in 'ref_restore_read.c'\n" ) ); rc = RefSeqMgr_Make( &obj->rmgr, mgr, errefseq_4NA, 8 * 1024 * 1024, 30 ); if ( rc == 0 ) { *objp = obj; } else { *objp = NULL; RestoreReadWhack( obj ); } } return rc; }
static rc_t RestoreReadMake ( RestoreRead **objp, const VTable *tbl, const VCursor* native_curs ) { rc_t rc; char name[]="PRIMARY_ALIGNMENT"; /* create the object */ RestoreRead *obj = malloc ( sizeof * obj ); memset(obj,0,sizeof * obj); if ( obj == NULL ) { *objp=0; rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { rc = VCursorLinkedCursorGet(native_curs,name,&obj->curs); if(rc == 0){ VCursorAddRef(obj->curs); } else { /* get at the parent database */ const VDatabase *db; rc = VTableOpenParentRead ( tbl, & db ); if ( rc == 0 ) { const VTable *patbl; /* open the primary alignment table */ rc = VDatabaseOpenTableRead ( db, & patbl, name ); VDatabaseRelease ( db ); if ( rc == 0 ) { /* create a cursor */ rc = VTableCreateCachedCursorRead( patbl, &obj->curs, 32*1024*1024UL ); VTableRelease ( patbl ); if ( rc == 0 ) { /* add columns to cursor */ rc = VCursorAddColumn ( obj -> curs, & obj -> read_idx, "( INSDC:4na:bin ) READ" ); if ( rc == 0 ) { rc = VCursorOpen ( obj -> curs ); if ( rc == 0 ) { VCursorLinkedCursorSet( native_curs, name, obj->curs ); SUB_DEBUG( ( "SUB.Make in 'seq-restore-read.c'\n" ) ); * objp = obj; return 0; } } VCursorRelease ( obj -> curs ); } } } free ( obj ); } } return rc; }
/* function ascii NCBI:align:ref_seq_id ( I64 ref_id ); */ static rc_t CC align_ref_seq_id ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] ) { rc_t rc; RefSeqID const *self = ( void const * )data; char const *name = NULL; uint32_t name_len; /* get start and length of reference segment */ int64_t const *ref_id = argv[REF_ID].u.data.base; assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8) ; if ( self->curs == NULL || argv[ REF_ID ].u.data.elem_count == 0 ) { rslt->elem_count = 0; return 0; } ref_id += argv[ REF_ID] .u.data.first_elem; SUB_DEBUG( ( "SUB.Rd in 'align_ref_seq_id.c' at #%lu\n", ref_id[ 0 ] ) ); rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->seqID_idx, NULL, (void const **)&name, NULL, &name_len ); if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcRow ) { name = ""; name_len = 0; } else if ( rc != 0 ) { return rc; } rc = KDataBufferCast( rslt->data, rslt->data, sizeof( name[ 0 ] ) * 8, true ); if ( rc != 0 ) return rc; rc = KDataBufferResize( rslt->data, name_len ); if ( rc != 0 ) return rc; memmove( rslt->data->base, name, sizeof( name[ 0 ] ) * name_len ); rslt->elem_count = name_len; rslt->elem_bits = sizeof( name[ 0 ] ) * 8; return rc; }
static rc_t RefSeqIDMake ( RefSeqID **objp, const VTable *tbl, const VCursor *native_curs ) { rc_t rc; /* create the object */ RefSeqID *obj = malloc ( sizeof * obj ); if ( obj == NULL ) { rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { obj->curs=NULL; SUB_DEBUG( ( "SUB.Make in 'align_ref_seq_id.c'\n" ) ); /* open the reference cursor */ rc = AlignRefTableCursor( tbl, native_curs, &obj->curs, NULL ); if ( rc == 0 ) { /* add columns to cursor */ rc = VCursorAddColumn( obj->curs, &obj->seqID_idx, "SEQ_ID" ); if ( GetRCObject( rc ) == ( enum RCObject ) rcColumn && GetRCState( rc ) == rcNotFound ) { rc = VCursorAddColumn( obj->curs, &obj->seqID_idx, "REF_SEQ_ID" ); } if ( GetRCState( rc ) == rcExists ) { rc = 0; } if ( rc == 0 ) { *objp = obj; return 0; } VCursorRelease ( obj -> curs ); } free ( obj ); } return rc; }
static rc_t CC seq_restore_read_impl1 ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; int i; Read_Restorer *self = data; INSDC_4na_bin *dst; INSDC_coord_len len; uint32_t src_len = (uint32_t)argv[ 0 ].u.data.elem_count; const INSDC_4na_bin *src = argv[ 0 ].u.data.base; const uint32_t num_reads = (uint32_t)argv[ 1 ].u.data.elem_count; const int64_t *align_id = argv[ 1 ].u.data.base; const INSDC_coord_len *read_len = argv[ 2 ].u.data.base; const uint8_t *read_type = argv[ 3 ].u.data.base; bool is_sequential = false; assert( argv[ 0 ].u.data.elem_bits == 8 ); assert( argv[ 1 ].u.data.elem_bits == 64 ); assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 ); assert( argv[ 2 ].u.data.elem_count == num_reads ); assert( argv[ 3 ].u.data.elem_count == num_reads ); src += argv [ 0 ] . u . data . first_elem; align_id += argv [ 1 ] . u . data . first_elem; read_len += argv [ 2 ] . u . data . first_elem; read_type += argv [ 3 ] . u . data . first_elem; if ( row_id != self->last_row_id && row_id != self->last_row_id + 1 ) { self->first_sequential_row_id = row_id; is_sequential = false; } else if ( row_id > self->first_sequential_row_id + 100 ) { is_sequential = true; } self->last_row_id = row_id; /* is_sequential = false; forcing it to false ... Sept. 16th 2015 to analyze prefetching */ for ( i = 0, len = 0; i < (int)num_reads; i++ ) { len += read_len[ i ]; } /* resize output row */ rslt->data->elem_bits = 8; rc = KDataBufferResize( rslt->data, len ); rslt->elem_count = len; dst = rslt->data->base; if ( rc == 0 && len > 0 ) { if ( len == src_len ) /*** shortcut - all data is local ***/ { memmove( dst, src, len ); } else { if ( is_sequential && ( row_id < self->prefetch_start_id || row_id > self->prefetch_stop_id ) ) { /* do prefetch */ uint32_t num_rows = ( argv[ 1 ].u.data.base_elem_count - argv[ 1 ].u.data.first_elem ); /* KTimeMs_t ts = KTimeMsStamp(); fprintf( stderr, "\nprefetch row_id #%lu ( start_id #%lu, stop_id #%lu ) num_rows = %d\n", row_id, self->prefetch_start_id, self->prefetch_stop_id, num_rows ); */ VCursorDataPrefetch( self->curs, align_id, self->read_idx, num_rows, 1, INT64_MAX, true ); /* ts = KTimeMsStamp() - ts; fprintf( stderr, "prefetch done in %lu ms\n", ts ); */ self->prefetch_start_id=row_id; self->prefetch_stop_id =argv[1].blob_stop_id; } for( i = 0; i < (int)num_reads && rc == 0; i++ ) /*** checking read by read ***/ { if ( align_id[ i ] > 0 ) { const INSDC_4na_bin *r_src; uint32_t r_src_len; SUB_DEBUG( ( "SUB.Rd in 'seq-restore-read.c' at #%lu\n", align_id[ i ] ) ); rc = VCursorCellDataDirect( self -> curs, align_id[ i ], self -> read_idx, NULL, ( const void** ) &r_src, NULL, &r_src_len ); if ( rc == 0 ) { if ( r_src_len == read_len[ i ] ) { if ( read_type[ i ] & SRA_READ_TYPE_FORWARD ) { memmove( dst, r_src, read_len[ i ] ); } else if ( read_type[ i ] & SRA_READ_TYPE_REVERSE ) { int j, k; for( j = 0, k = read_len[ i ] - 1; j < (int)read_len[ i ]; j++, k-- ) { dst[ j ] = map [ r_src[ k ] & 15 ]; } } else { rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent ); } } else { rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent ); } } } else /*** data is in READ column **/ { if ( src_len >= read_len[ i ] ) { memmove( dst, src, read_len[ i ] ); src_len -= read_len[ i ]; src += read_len[ i ]; } else { return RC( rcXF, rcFunction, rcExecuting, rcData, rcInconsistent ); } } dst += read_len[ i ]; } } } return rc; }
/* function INSDC:coord:zero NCBI:align:ref_pos ( I64 ref_id, INSDC:coord:zero ref_start ); */ static rc_t CC align_ref_pos ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] ) { rc_t rc = 0; RefPos const *self = ( void const * )data; int64_t ref_row_id = 0; INSDC_coord_zero *ref_pos; unsigned const ploidy = ( unsigned const )argv[ REF_START ].u.data.elem_count; unsigned i; /* get start and length of reference segment */ int64_t const *ref_id = 0; INSDC_coord_zero const *ref_start; assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8 ); assert( argv[ REF_START ].u.data.elem_bits == sizeof( *ref_start ) * 8 ); ref_start = argv[ REF_START ].u.data.base; ref_start += argv[ REF_START ].u.data.first_elem; if ( self->curs != NULL ) { char const *name = NULL; uint32_t name_len; BSTRowRange *brr; ref_id = argv[ REF_ID ].u.data.base; ref_id += argv[ REF_ID ].u.data.first_elem; brr = ( BSTRowRange * )BSTreeFind( &self->tr_range, &ref_id[ 0 ], row_range_cmp ); if ( brr == NULL ) { RowRange *new_rr; SUB_DEBUG( ( "SUB.Rd in 'align-ref-pos.c' at #%lu\n", ref_id[ 0 ] ) ); rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_idx, NULL, (void const **)&name, NULL, &name_len ); if ( rc != 0 ) return rc; rc = VCursorParamsSet( ( struct VCursorParams const * )self->curs, "QUERY_SEQ_NAME", "%.*s", name_len, name ); if ( rc != 0 ) return rc; rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->name_range_idx, NULL, (void const **)&new_rr, NULL, NULL ); if ( rc != 0 ) return rc; brr = malloc( sizeof( *brr ) ); if ( brr == NULL ) { return RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { memcpy( &brr->rr, new_rr, sizeof( *new_rr ) ); BSTreeInsert( ( BSTree* )&self->tr_range, ( BSTNode* )brr, row_range_sort ); } } ref_row_id = brr->rr.start_id; } rc = KDataBufferResize( rslt->data, ploidy ); if ( rc != 0 ) return rc; ref_pos = rslt->data->base; for ( i = 0; i != ploidy; ++i ) { ref_pos[ i ] = ref_start[ i ]; if ( self->curs != NULL ) { ref_pos[ i ] += ( INSDC_coord_zero )( ( ref_id[ 0 ] - ref_row_id ) * self->max_seq_len ); } } rslt->elem_count = ploidy; rslt->elem_bits = sizeof( ref_pos[ 0 ] ) * 8; return rc; }
static rc_t RefPosMake ( RefPos **objp, const VTable *tbl, const VCursor *native_curs ) { rc_t rc; /* create the object */ RefPos *obj = malloc ( sizeof * obj ); if ( obj == NULL ) { rc = RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted ); } else { obj->curs=NULL; BSTreeInit( &obj->tr_range ); /* open the reference table cursor*/ SUB_DEBUG( ( "SUB.Make in 'align-ref-pos.c'\n" ) ); rc = AlignRefTableCursor( tbl, native_curs, &obj->curs, NULL ); if ( rc == 0 ) { uint32_t itmp; rc = VCursorAddColumn( obj->curs, &itmp, "(U32)MAX_SEQ_LEN" ); if ( ( rc == 0 ) || GetRCState( rc ) == rcExists ) { const void * base; uint32_t row_len; rc = VCursorCellDataDirect( obj->curs, 1, itmp, NULL, &base, NULL, &row_len ); if ( rc == 0 ) { assert( row_len == 1 ); memcpy( &obj->max_seq_len, base, 4 ); } } if ( GetRCObject( rc ) == ( enum RCObject )rcColumn && GetRCState( rc ) == rcNotFound ) { /*** no MAX_SEQ_LEN means that REF_POS==REF_START **/ VCursorRelease( obj->curs ); obj->curs = NULL; obj->max_seq_len = 0; obj->name_range_idx = 0; obj->name_idx = 0; rc = 0; } else if ( rc == 0 ) { /* add columns to cursor */ rc = VCursorAddColumn( obj->curs, &obj->name_idx, "(utf8)NAME" ); if ( rc == 0 || GetRCState( rc ) == rcExists ) { rc = VCursorAddColumn( obj->curs, &obj->name_range_idx, "NAME_RANGE" ); } if ( GetRCState( rc ) == rcExists ) { rc = 0; } } } if ( rc == 0 ) { *objp = obj; } else { VCursorRelease( obj->curs ); free( obj ); } } return rc; }
static rc_t CC ref_restore_read_impl ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; RestoreRead* self = data; uint8_t* dst; uint32_t read_len = (uint32_t)argv[ 0 ].u.data.elem_count; const uint8_t *read = argv[ 0 ].u.data.base; uint32_t seqid_len = (uint32_t)argv[ 1 ].u.data.elem_count; const char* seqid = argv[ 1 ].u.data.base; INSDC_coord_one seq_start; INSDC_coord_len seq_len; assert( argv[ 0 ].u.data.elem_bits == 8 ); assert( argv[ 1 ].u.data.elem_bits == 8 ); assert( argv[ 2 ].u.data.elem_bits == sizeof( INSDC_coord_one ) * 8 ); assert( argv[ 3 ].u.data.elem_bits == sizeof( INSDC_coord_len ) * 8 ); assert( argv[ 2 ].u.data.elem_count == 1 ); assert( argv[ 3 ].u.data.elem_count == 1 ); read += argv [ 0 ] . u . data . first_elem; seqid += argv [ 1 ] . u . data . first_elem; seq_start = ( ( INSDC_coord_one* )argv[ 2 ].u.data.base )[ argv[ 2 ].u.data.first_elem ]; seq_len = ( ( INSDC_coord_len* )argv[ 3 ].u.data.base )[ argv[ 3 ].u.data.first_elem ]; if ( seq_len < read_len ) { rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid ); } else { /* resize output row for the total number of reads */ rslt->data->elem_bits = 8; rc = KDataBufferResize( rslt->data, seq_len ); if ( rc == 0 ) { rslt->elem_count = seq_len; dst = rslt->data->base; if ( seq_len > 0 ) { if ( read_len > 0 ) { memcpy( dst, read, read_len ); if ( read_len < seq_len ) { memset( dst + read_len, 15, seq_len - read_len ); /* pad with 'N' */ } } else if ( seq_start == 0 ) { memset( dst, 15, seq_len ); /* fill with 'N' */ } else { INSDC_coord_len read = 0; SUB_DEBUG( ( "SUB.Rd in 'ref_restore_read.c' at: %.*s at %u.%u\n", seqid_len, seqid, seq_start, seq_len ) ); rc = RefSeqMgr_Read( self->rmgr, seqid, seqid_len, seq_start - 1, seq_len, dst, &read ); if ( rc == 0 ) { if ( read != seq_len ) { rc = RC( rcXF, rcFunction, rcExecuting, rcData, read < seq_len ? rcTooShort : rcTooLong ); } } } } } } return rc; }