/* function ascii NCBI:align:ref_seq_id ( I64 ref_id ); */ static rc_t CC align_ref_seq_id ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] ) { rc_t rc; RefSeqID const *self = ( void const * )data; char const *name = NULL; uint32_t name_len; /* get start and length of reference segment */ int64_t const *ref_id = argv[REF_ID].u.data.base; assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8) ; if ( self->curs == NULL || argv[ REF_ID ].u.data.elem_count == 0 ) { rslt->elem_count = 0; return 0; } ref_id += argv[ REF_ID] .u.data.first_elem; SUB_DEBUG( ( "SUB.Rd in 'align_ref_seq_id.c' at #%lu\n", ref_id[ 0 ] ) ); rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->seqID_idx, NULL, (void const **)&name, NULL, &name_len ); if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcRow ) { name = ""; name_len = 0; } else if ( rc != 0 ) { return rc; } rc = KDataBufferCast( rslt->data, rslt->data, sizeof( name[ 0 ] ) * 8, true ); if ( rc != 0 ) return rc; rc = KDataBufferResize( rslt->data, name_len ); if ( rc != 0 ) return rc; memmove( rslt->data->base, name, sizeof( name[ 0 ] ) * name_len ); rslt->elem_count = name_len; rslt->elem_bits = sizeof( name[ 0 ] ) * 8; return rc; }
static rc_t CC dynamic_read_desc_static ( void *self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; dynamic_read_desc *p; KDataBuffer *dst = rslt -> data; /* severe error if adapter is longer than spot */ if ( argv [ 0 ] . u . data . elem_count < argv [ 1 ] . u . data . elem_count ) return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcCorrupt ); /* the buffer should have already been given the correct element size */ if ( dst -> elem_bits != 32 * 3 ) { rc = KDataBufferCast ( dst, dst, 32 * 3, true ); if ( rc != 0 ) return rc; } /* we always produce 2 reads */ if ( dst -> elem_count != 2 ) { rc = KDataBufferResize ( dst, 2 ); if ( rc != 0 ) return rc; } p = dst -> base; /* adapter */ p [ 0 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL; p [ 0 ] [ dyn_read_start ] = 0; assert(argv [ 1 ] . u . data . elem_count >> 32 == 0); p [ 0 ] [ dyn_read_len ] = (uint32_t)argv [ 1 ] . u . data . elem_count; /* fragment */ p [ 1 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL; p [ 1 ] [ dyn_read_start ] = p [ 0 ] [ dyn_read_len ]; assert(argv [ 0 ] . u . data . elem_count >> 32 == 0); p [ 1 ] [ dyn_read_len ] = (uint32_t)(argv [ 0 ] . u . data . elem_count) - p [ 0 ] [ dyn_read_len ]; rslt -> elem_count = 2; return 0; }
/* function ascii NCBI:align:ref_name ( I64 ref_id ); */ static rc_t CC align_ref_name ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] ) { rc_t rc; RefName const *self = (void const *)data; char const *name = NULL; uint32_t name_len; /* get start and length of reference segment */ int64_t const *ref_id = argv[REF_ID].u.data.base; if (argv[REF_ID].u.data.elem_count == 0) rc = RC(rcAlign, rcFunction, rcExecuting, rcRow, rcNotFound); else { assert(argv[REF_ID].u.data.elem_bits == sizeof(*ref_id) * 8); ref_id += argv[REF_ID].u.data.first_elem; rc = VCursorCellDataDirect(self->curs, ref_id[0], self->name_idx, NULL, (void const **)&name, NULL, &name_len); } if (GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow) { name = ""; name_len = 0; } else if (rc) return rc; rc = KDataBufferCast(rslt->data, rslt->data, sizeof(name[0]) * 8, true); if (rc) return rc; rc = KDataBufferResize(rslt->data, name_len); if (rc) return rc; memcpy(rslt->data->base, name, sizeof(name[0]) * name_len); rslt->elem_count = name_len; rslt->elem_bits = sizeof(name[0]) * 8; return rc; }
static rc_t CC environment_read_func( void *Self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] ) { const KDataBuffer *value = Self; rc_t rc = 0; rslt->data->elem_bits = value->elem_bits; rslt->data->elem_count = 0; rc = KDataBufferResize(rslt->data, value->elem_count); if (rc == 0) { memcpy(rslt->data->base, value->base, KDataBufferBytes(value)); rc = KDataBufferCast(rslt->data, rslt->data, rslt->elem_bits, true); if (rc == 0) rslt->elem_count = rslt->data->elem_count; } return rc; }
static rc_t CC dynamic_read_desc_with_linker ( void *xself, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; dynamic_read_desc *p; KDataBuffer *dst = rslt -> data; const linker_agrep *self = ( const void* ) xself; AgrepMatch match; int32_t found; AgrepFlags agrepflags; Agrep *agrep; /* AgrepCallArgs args; */ const char *agreppattern; char buf[4096]; const char *text; uint32_t textlen; /* severe error if adapter is longer than spot */ if ( argv [ 0 ] . u . data . elem_count < argv [ 1 ] . u . data . elem_count ) return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcCorrupt ); /* the buffer should have already been given the correct element size */ if ( dst -> elem_bits != 32 * 3 ) { rc = KDataBufferCast ( dst, dst, 32 * 3, true ); if ( rc != 0 ) return rc; } /* we always produce 4 reads for when the linker is present */ if ( dst -> elem_count != 4 ) { rc = KDataBufferResize ( dst, 4 ); if ( rc != 0 ) return rc; } /* TBD - a mechanism for detecting when this has not changed since typically it will be identical for every row in a table but not necessarily so */ agreppattern = argv[2].u.data.base; agreppattern += argv[2].u.data.first_elem; textlen = (uint32_t)string_copy(buf, sizeof buf, agreppattern, argv[2].u.data.elem_count); if ( textlen >= sizeof buf ) return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcExcessive ); text = argv[0].u.data.base; text += argv[0].u.data.first_elem; assert(argv[0].u.data.elem_count >> 32 == 0); textlen = (uint32_t)argv[0].u.data.elem_count; text += argv[1].u.data.elem_count; textlen -= argv[1].u.data.elem_count; agrepflags = AGREP_TEXT_EXPANDED_2NA | AGREP_PATTERN_4NA | AGREP_EXTEND_BETTER | AGREP_LEFT_MAINTAIN_SCORE | AGREP_ANYTHING_ELSE_IS_N; /* This might fail due to size restrictions. */ rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_MYERS, buf); if (rc == 0) { /* fprintf(stderr, "Using myers.\n"); */ } else { rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_MYERS_UNLTD, buf); /* Try one more time. */ if (rc) { rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_DP, buf); } if (rc) return rc; } found = AgrepFindBest(agrep, self->edit_distance, text, textlen, &match); if (found) { p = dst -> base; /* adapter */ p [ 0 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL; p [ 0 ] [ dyn_read_start ] = 0; assert(argv [ 1 ] . u . data . elem_count >> 32 == 0); p [ 0 ] [ dyn_read_len ] = (uint32_t)argv [ 1 ] . u . data . elem_count; /* fragment */ p [ 1 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL; p [ 1 ] [ dyn_read_start ] = (uint32_t)argv [ 1 ] . u . data . elem_count; p [ 1 ] [ dyn_read_len ] = match.position; /* linker */ p [ 2 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL; p [ 2 ] [ dyn_read_start ] = match.position + (uint32_t)argv[1].u.data.elem_count; p [ 2 ] [ dyn_read_len ] = match.length; /* fragment */ p [ 3 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL; p [ 3 ] [ dyn_read_start ] = match.position + match.length + (uint32_t)argv[1].u.data.elem_count; p [ 3 ] [ dyn_read_len ] = (uint32_t)argv [ 0 ] . u . data . elem_count - match.position - match.length - (uint32_t)argv[1].u.data.elem_count; rslt -> elem_count = 4; } else {
/* format_spot_name ( ascii name_fmt, I32 X, I32 Y * ascii spot_name ); * given a name format string, X, and Y * produce a reconstructed spot name string * * "name_fmt" [ DATA ] - name format string ( see format explanation below ) * * "X" [ DATA ] - X coordinate for spot * * "Y" [ DATA ] - Y coordinate for spot * * "spot_name" [ DATA, OPTIONAL ] - potential source of unformatted names * * SYNOPSIS: * "name_fmt" may have any ASCII characters * the special character '$' is an escape symbol * when followed by a recognized format character, * both the '$' and its format character will be * replaced with a numeral generated from X and/or Y. * * when "spot_name" is present and the "name_fmt" row is empty, * output is taken verbatim from "spot_name" * * FORMAT: * 'X' ( or 'x' ) = substitute with a decimal representation for X * 'Y' ( or 'y' ) = substitute with a decimal representation for Y * 'Q' ( or 'q' ) = calculate 454-style base-36 representation * for both X and Y */ static rc_t CC format_spot_name ( void *self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; char *name; uint32_t elem_count; KDataBuffer *dst = rslt -> data; if ( dst -> elem_bits != 8 ) { rc = KDataBufferCast ( dst, dst, 8, true ); if ( rc != 0 ) return rc; } /* check for NAME_FMT */ if ( argv [ 0 ] . u. data . elem_count != 0 ) { size_t num_writ; char sname[1024]; /** name on stack **/ const char *name_fmt = ((char*)argv[0].u.data.base) + argv[0].u.data.first_elem; uint32_t i, j, x, y; const uint32_t fmt_size = argv [ 0 ] . u . data . elem_count; /* the coordinates to substitute */ x = ( ( const int32_t* ) argv [ 1 ] . u . data . base ) [ argv [ 1 ] . u . data . first_elem ]; y = ( ( const int32_t* ) argv [ 2 ] . u . data . base ) [ argv [ 2 ] . u . data . first_elem ]; for ( i=j=0; i < fmt_size -1;) { if( name_fmt [ i ] == '$' ) { switch( name_fmt [ i+1 ]) { case 'x': case 'X': if( j > sizeof(sname) - 11) { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } else { i+=2; if( i < fmt_size -1 && name_fmt [ i ] == '%' && isdigit(name_fmt [ i+1 ])) { x += 24*1024*(name_fmt [ i+1 ]-'0'); i+=2; } j+=sprintf(sname+j,"%d",x); } break; case 'y': case 'Y': if( j > sizeof(sname) - 11) { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } else { i+=2; if( i < fmt_size -1 && name_fmt [ i ] == '%' && isdigit(name_fmt [ i+1 ])) { y += 24*1024*(name_fmt [ i+1 ]-'0'); i+=2; } j+=sprintf(sname+j,"%d",y); } break; case 'q': case 'Q': if( j > sizeof(sname) - 5) { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } else if( y > 0xFFF) { return RC ( rcXF, rcFunction, rcDecoding, rcRange, rcTooBig); } else { uint32_t q= x << 12 | y; sname[j+4]=Q_ALPHABET[q % 36]; q /= 36; sname[j+3]=Q_ALPHABET[q % 36]; q /= 36; sname[j+2]=Q_ALPHABET[q % 36]; q /= 36; sname[j+1]=Q_ALPHABET[q % 36]; q /= 36; sname[j] =Q_ALPHABET[q]; j+=5; i+=2; } break; case 'r': case 'R': rc = string_printf ( & sname [ j ], sizeof sname - j, & num_writ, "%ld", row_id ); assert ( rc == 0 ); j += ( uint32_t ) num_writ; i+=2; break; default: sname[j++]=name_fmt[i++]; break; } } else { if(j < sizeof(sname)) { sname[j++]=name_fmt[i++]; } else { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } if( j > sizeof(sname)-1) { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } if(i==fmt_size -1) { if(j < sizeof(sname)) { sname[j++]=name_fmt[i++]; } else { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } /* j is our new element count **/ elem_count = j; /* resize output */ if ( dst -> elem_count <= elem_count ) { rc = KDataBufferResize ( dst, elem_count); if ( rc != 0 ) return rc; } /* the output name */ name = dst -> base; memcpy ( name, sname, elem_count ); rslt -> elem_count = elem_count; return 0; } /* check for NAME */ if( argc == 4 && argv[3].u.data.elem_count != 0 ) { const char *sname = ((char*)argv[3].u.data.base) + argv[3].u.data.first_elem; /* output size */ elem_count = argv[3].u.data.elem_count; /* resize output */ if( dst -> elem_count <= elem_count ) { rc = KDataBufferResize( dst, elem_count + 1 ); if( rc != 0 ) return rc; } name = dst->base; memcpy(dst->base, sname, elem_count); rslt->elem_count = elem_count; name[elem_count] = 0; return 0; } /* spot has no name */ return RC ( rcSRA, rcColumn, rcReading, rcRow, rcNull ); }
static rc_t CC format_spot_name_no_coord ( void *self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; char *name; uint32_t elem_count; KDataBuffer *dst = rslt -> data; if ( dst -> elem_bits != 8 ) { rc = KDataBufferCast ( dst, dst, 8, true ); if ( rc != 0 ) return rc; } /* check for NAME_FMT */ if ( argv [ 0 ] . u. data . elem_count != 0 ) { size_t num_writ; char sname[1024]; /** name on stack **/ const char *name_fmt = ((char*)argv[0].u.data.base) + argv[0].u.data.first_elem; uint32_t i, j; const uint32_t fmt_size = argv [ 0 ] . u . data . elem_count; for ( i=j=0; i < fmt_size -1;) { if( name_fmt [ i ] == '$' ) { switch( name_fmt [ i+1 ]) { case 'r': case 'R': rc = string_printf ( & sname [ j ], sizeof sname - j, & num_writ, "%ld", row_id ); assert ( rc == 0 ); j += ( uint32_t ) num_writ; i+=2; break; default: sname[j++]=name_fmt[i++]; break; } } else { if(j < sizeof(sname)) { sname[j++]=name_fmt[i++]; } else { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } if( j > sizeof(sname)-1) { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } if(i==fmt_size -1) { if(j < sizeof(sname)) { sname[j++]=name_fmt[i++]; } else { return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient ); } } /* j is our new element count **/ elem_count = j; /* resize output */ if ( dst -> elem_count <= elem_count ) { rc = KDataBufferResize ( dst, elem_count); if ( rc != 0 ) return rc; } /* the output name */ name = dst -> base; memcpy ( name, sname, elem_count ); rslt -> elem_count = elem_count; return 0; } /* check for NAME */ if( argc == 2 && argv[1].u.data.elem_count != 0 ) { const char *sname = ((char*)argv[1].u.data.base) + argv[1].u.data.first_elem; /* output size */ elem_count = argv[1].u.data.elem_count; /* resize output */ if( dst -> elem_count <= elem_count ) { rc = KDataBufferResize( dst, elem_count + 1 ); if( rc != 0 ) return rc; } name = dst->base; memcpy(dst->base, sname, elem_count); rslt->elem_count = elem_count; name[elem_count] = 0; return 0; } /* spot has no name */ return RC ( rcSRA, rcColumn, rcReading, rcRow, rcNull ); }
static rc_t CC illumina_rewrite_spot_name ( void *data, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc; char buffer [ 64]; uint32_t coord_len; uint32_t prefix_len; unsigned int a, b, c, d; KDataBuffer *dst = rslt -> data; const char *prefix; const char *skey = argv [ 0 ] . u . data . base; uint64_t i, j, count = argv [ 0 ] . u . data . elem_count; skey += argv [ 0 ] . u . data . first_elem; /* find last hex portion */ for ( i = count; i > 0; ) { if ( ! isxdigit ( skey [ -- i ] ) ) break; } if ( count - i < ( SLX_COORD_LEN - 1 ) ) { const char *end = skey + count; /* new format */ for ( d = 0, j = 0, i = count; i > 0; ) { if ( ! isdigit ( skey [ -- i ] ) ) { j = i + 1; break; } } d = non_braindead_atoi ( & skey [ j ], end ); for ( c = 0, j = 0; i > 0; ) { if ( ! isdigit ( skey [ -- i ] ) ) { j = i + 1; break; } } c = atoi ( & skey [ j ] ); for ( b = 0, j = 0; i > 0; ) { if ( ! isdigit ( skey [ -- i ] ) ) { j = i + 1; break; } } b = atoi ( & skey [ j ] ); for ( a = 0, j = 0; i > 0; ) { if ( ! isdigit ( skey [ -- i ] ) ) { j = i + 1; break; } } a = atoi ( & skey [ j ] ); if ( j > 0 ) { if ( i > 0 ) -- i; while ( isalpha ( skey [ i ] ) ) ++ i; } } else { a = scan_hex ( skey, 1 ); b = scan_hex ( & skey [ 1 ], 3 ); c = scan_hex ( & skey [ 4 ], 3 ); d = scan_hex ( & skey [ 7 ], 3 ); if ( count > SLX_COORD_LEN ) { i = count - SLX_COORD_LEN; } } /* generate coordinates */ coord_len = sprintf ( buffer, ":%d:%d:%d:%d", a, b, c, d ); /* get size of prefix */ if ( argc == 1 ) { prefix = ""; prefix_len = 0; } else { prefix = argv [ 1 ] . u . data . base; assert(argv [ 1 ] . u . data . elem_count >> 32 == 0); prefix_len = (uint32_t)argv [ 1 ] . u . data . elem_count; prefix += argv [ 1 ] . u . data . first_elem; } /* resize output buffer for prefix, name stuff, coordinates */ if ( dst -> elem_bits != 8 ) { rc = KDataBufferCast ( dst, dst, 8, true ); if ( rc != 0 ) return rc; } rc = KDataBufferResize ( dst, prefix_len + i + coord_len + 1 ); if ( rc != 0 ) return rc; /* copy in prefix, name prefix, coordinates */ rslt -> elem_count = sprintf ( dst -> base, "%.*s%.*s%s" , ( int ) prefix_len, prefix , ( int ) i, skey , buffer ); return 0; }