Esempio n. 1
0
/*
 function ascii NCBI:align:ref_seq_id ( I64 ref_id );
*/
static
rc_t CC align_ref_seq_id ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc;
    RefSeqID const *self = ( void const * )data;
    char const *name = NULL;
    uint32_t name_len;

    /* get start and length of reference segment */
    int64_t const *ref_id = argv[REF_ID].u.data.base;

    assert( argv[ REF_ID ].u.data.elem_bits == sizeof( *ref_id ) * 8) ;

    if ( self->curs == NULL || argv[ REF_ID ].u.data.elem_count == 0 )
    {
        rslt->elem_count = 0;
        return 0;
    }

    ref_id += argv[ REF_ID] .u.data.first_elem;

    SUB_DEBUG( ( "SUB.Rd in 'align_ref_seq_id.c' at #%lu\n", ref_id[ 0 ] ) );
    
    rc = VCursorCellDataDirect( self->curs, ref_id[ 0 ], self->seqID_idx, NULL, (void const **)&name, NULL, &name_len );
    if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcRow )
    {
        name = "";
        name_len = 0;
    }
    else if ( rc != 0 )
    {
        return rc;
    }

    rc = KDataBufferCast( rslt->data, rslt->data, sizeof( name[ 0 ] ) * 8, true );
    if ( rc != 0 )
        return rc;

    rc = KDataBufferResize( rslt->data, name_len );
    if ( rc != 0 )
        return rc;
    
    memmove( rslt->data->base, name, sizeof( name[ 0 ] ) * name_len );
    rslt->elem_count = name_len;
    rslt->elem_bits = sizeof( name[ 0 ] ) * 8;
    return rc;
}
static
rc_t CC dynamic_read_desc_static ( void *self, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    dynamic_read_desc *p;
    KDataBuffer *dst = rslt -> data;

    /* severe error if adapter is longer than spot */
    if ( argv [ 0 ] . u . data . elem_count < argv [ 1 ] . u . data . elem_count )
        return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcCorrupt );

    /* the buffer should have already been given the correct element size */
    if ( dst -> elem_bits != 32 * 3 )
    {
        rc = KDataBufferCast ( dst, dst, 32 * 3, true );
        if ( rc != 0 )
            return rc;
    }

    /* we always produce 2 reads */
    if ( dst -> elem_count != 2 )
    {
        rc = KDataBufferResize ( dst, 2 );
        if ( rc != 0 )
            return rc;
    }

    p = dst -> base;

    /* adapter */
    p [ 0 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL;
    p [ 0 ] [ dyn_read_start ] = 0;
    assert(argv [ 1 ] . u . data . elem_count >> 32 == 0);
    p [ 0 ] [ dyn_read_len ] = (uint32_t)argv [ 1 ] . u . data . elem_count;

    /* fragment */
    p [ 1 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL;
    p [ 1 ] [ dyn_read_start ] = p [ 0 ] [ dyn_read_len ];
    assert(argv [ 0 ] . u . data . elem_count >> 32 == 0);
    p [ 1 ] [ dyn_read_len ] = (uint32_t)(argv [ 0 ] . u . data . elem_count) - p [ 0 ] [ dyn_read_len ];

    rslt -> elem_count = 2;
    return 0;
}
Esempio n. 3
0
/*
 function ascii NCBI:align:ref_name ( I64 ref_id );
*/
static
rc_t CC align_ref_name ( void *data, const VXformInfo *info,
    int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv[] )
{
    rc_t rc;
    RefName const *self = (void const *)data;
    char const *name = NULL;
    uint32_t name_len;

    /* get start and length of reference segment */
    int64_t const *ref_id = argv[REF_ID].u.data.base;

    if (argv[REF_ID].u.data.elem_count == 0)
        rc = RC(rcAlign, rcFunction, rcExecuting, rcRow, rcNotFound);
    else {
        assert(argv[REF_ID].u.data.elem_bits == sizeof(*ref_id) * 8);
        
        ref_id += argv[REF_ID].u.data.first_elem;
        
        rc = VCursorCellDataDirect(self->curs, ref_id[0], self->name_idx, NULL, (void const **)&name, NULL, &name_len);
    }
    
    if (GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcRow) {
        name = "";
        name_len = 0;
    }
    else if (rc) return rc;
    
    rc = KDataBufferCast(rslt->data, rslt->data, sizeof(name[0]) * 8, true);
    if (rc) return rc;

    rc = KDataBufferResize(rslt->data, name_len);
    if (rc) return rc;
    
    memcpy(rslt->data->base, name, sizeof(name[0]) * name_len);
    rslt->elem_count = name_len;
    rslt->elem_bits = sizeof(name[0]) * 8;
    return rc;
}
Esempio n. 4
0
static
rc_t CC environment_read_func(
                         void *Self,
                         const VXformInfo *info,
                         int64_t row_id,
                         VRowResult *rslt,
                         uint32_t argc,
                         const VRowData argv[]
) {
    const KDataBuffer *value = Self;
    rc_t rc = 0;
    
    rslt->data->elem_bits = value->elem_bits;
    rslt->data->elem_count = 0;
    rc = KDataBufferResize(rslt->data, value->elem_count);
    if (rc == 0) {
        memcpy(rslt->data->base, value->base, KDataBufferBytes(value));
        rc = KDataBufferCast(rslt->data, rslt->data, rslt->elem_bits, true);
        if (rc == 0)
            rslt->elem_count = rslt->data->elem_count;
    }
    return rc;
}
static
rc_t CC dynamic_read_desc_with_linker ( void *xself, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    dynamic_read_desc *p;
    KDataBuffer *dst = rslt -> data;
    const linker_agrep *self = ( const void* ) xself;

    AgrepMatch match;
    int32_t found;
    AgrepFlags agrepflags;
    Agrep *agrep;
    /* AgrepCallArgs args; */
    
    const char *agreppattern;
    char buf[4096];
    const char *text;
    uint32_t textlen;

    /* severe error if adapter is longer than spot */
    if ( argv [ 0 ] . u . data . elem_count < argv [ 1 ] . u . data . elem_count )
        return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcCorrupt );

    /* the buffer should have already been given the correct element size */
    if ( dst -> elem_bits != 32 * 3 )
    {
        rc = KDataBufferCast ( dst, dst, 32 * 3, true );
        if ( rc != 0 )
            return rc;
    }

    /* we always produce 4 reads for when the linker is present */
    if ( dst -> elem_count != 4 )
    {
        rc = KDataBufferResize ( dst, 4 );
        if ( rc != 0 )
            return rc;
    }

    /* TBD - a mechanism for detecting when this has not changed
       since typically it will be identical for every row in a table
       but not necessarily so */
    agreppattern = argv[2].u.data.base;
    agreppattern += argv[2].u.data.first_elem; 
    textlen = (uint32_t)string_copy(buf, sizeof buf, agreppattern, argv[2].u.data.elem_count);
    if ( textlen >= sizeof buf )
        return RC ( rcSRA, rcFunction, rcExecuting, rcData, rcExcessive );

    text = argv[0].u.data.base;
    text += argv[0].u.data.first_elem;

    assert(argv[0].u.data.elem_count >> 32 == 0);
    textlen = (uint32_t)argv[0].u.data.elem_count;

    text += argv[1].u.data.elem_count;
    textlen -= argv[1].u.data.elem_count;

    agrepflags = AGREP_TEXT_EXPANDED_2NA 
        | AGREP_PATTERN_4NA
        | AGREP_EXTEND_BETTER
        | AGREP_LEFT_MAINTAIN_SCORE
        | AGREP_ANYTHING_ELSE_IS_N;

    /* This might fail due to size restrictions. */
    rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_MYERS, buf);
    if (rc == 0) {
        /* fprintf(stderr, "Using myers.\n"); */
    } else {
        rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_MYERS_UNLTD, buf);
        /* Try one more time. */
        if (rc) {
            rc = AgrepMake(&agrep, agrepflags | AGREP_ALG_DP, buf);
        }
        if (rc)
            return rc;
    }

    found = AgrepFindBest(agrep, self->edit_distance, text, textlen, &match);

    if (found) {

        p = dst -> base;
        
        /* adapter */
        p [ 0 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL;
        p [ 0 ] [ dyn_read_start ] = 0;
        assert(argv [ 1 ] . u . data . elem_count >> 32 == 0);
        p [ 0 ] [ dyn_read_len ] = (uint32_t)argv [ 1 ] . u . data . elem_count;
        
        /* fragment */
        p [ 1 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL;
        p [ 1 ] [ dyn_read_start ] = (uint32_t)argv [ 1 ] . u . data . elem_count;
        p [ 1 ] [ dyn_read_len ] = match.position;

        /* linker */
        p [ 2 ] [ dyn_read_type ] = SRA_READ_TYPE_TECHNICAL;
        p [ 2 ] [ dyn_read_start ] = match.position + (uint32_t)argv[1].u.data.elem_count;
        p [ 2 ] [ dyn_read_len ] = match.length;
        
        /* fragment */
        p [ 3 ] [ dyn_read_type ] = SRA_READ_TYPE_BIOLOGICAL;
        p [ 3 ] [ dyn_read_start ] = match.position + match.length + (uint32_t)argv[1].u.data.elem_count;
        p [ 3 ] [ dyn_read_len ] = (uint32_t)argv [ 0 ] . u . data . elem_count - match.position - match.length - (uint32_t)argv[1].u.data.elem_count;
        
        rslt -> elem_count = 4;

    } else {
Esempio n. 6
0
/* format_spot_name ( ascii name_fmt, I32 X, I32 Y * ascii spot_name );
 *  given a name format string, X, and Y
 *  produce a reconstructed spot name string
 *
 *  "name_fmt" [ DATA ] - name format string ( see format explanation below )
 *
 *  "X" [ DATA ] - X coordinate for spot
 *
 *  "Y" [ DATA ] - Y coordinate for spot
 *
 *  "spot_name" [ DATA, OPTIONAL ] - potential source of unformatted names
 *
 * SYNOPSIS:
 *  "name_fmt" may have any ASCII characters
 *  the special character '$' is an escape symbol
 *  when followed by a recognized format character,
 *  both the '$' and its format character will be
 *  replaced with a numeral generated from X and/or Y.
 *
 *  when "spot_name" is present and the "name_fmt" row is empty,
 *  output is taken verbatim from "spot_name"
 *
 * FORMAT:
 *  'X' ( or 'x' ) = substitute with a decimal representation for X
 *  'Y' ( or 'y' ) = substitute with a decimal representation for Y
 *  'Q' ( or 'q' ) = calculate 454-style base-36 representation
 *                   for both X and Y
 */
static
rc_t CC format_spot_name ( void *self,
                           const VXformInfo *info, int64_t row_id, VRowResult *rslt,
                           uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    char *name;
    uint32_t elem_count;
    KDataBuffer *dst = rslt -> data;

    if ( dst -> elem_bits != 8 )
    {
        rc = KDataBufferCast ( dst, dst, 8, true );
        if ( rc != 0 )
            return rc;
    }

    /* check for NAME_FMT */
    if ( argv [ 0 ] . u. data . elem_count != 0 )
    {
        size_t num_writ;
        char sname[1024]; /** name on stack **/
        const char *name_fmt = ((char*)argv[0].u.data.base) + argv[0].u.data.first_elem;
        uint32_t i, j, x, y;
        const uint32_t fmt_size = argv [ 0 ] . u . data . elem_count;

        /* the coordinates to substitute */
        x = ( ( const int32_t* ) argv [ 1 ] . u . data . base )
            [ argv [ 1 ] . u . data . first_elem ];
        y = ( ( const int32_t* ) argv [ 2 ] . u . data . base )
            [ argv [ 2 ] . u . data . first_elem ];

        for ( i=j=0; i < fmt_size -1;) {
            if( name_fmt [ i ] == '$' ) {
                switch( name_fmt [ i+1 ]) {
                case 'x':
                case 'X':
                    if( j > sizeof(sname) - 11) {
                        return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
                    } else {
                        i+=2;
                        if( i < fmt_size -1 && name_fmt [ i ] == '%' && isdigit(name_fmt [ i+1 ])) {
                            x += 24*1024*(name_fmt [ i+1 ]-'0');
                            i+=2;
                        }
                        j+=sprintf(sname+j,"%d",x);
                    }
                    break;
                case 'y':
                case 'Y':
                    if( j > sizeof(sname) - 11) {
                        return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
                    } else {
                        i+=2;
                        if( i < fmt_size -1 && name_fmt [ i ] == '%' && isdigit(name_fmt [ i+1 ])) {
                            y += 24*1024*(name_fmt [ i+1 ]-'0');
                            i+=2;
                        }
                        j+=sprintf(sname+j,"%d",y);
                    }
                    break;
                case 'q':
                case 'Q':
                    if( j > sizeof(sname) - 5) {
                        return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
                    } else if( y > 0xFFF) {
                        return RC ( rcXF, rcFunction, rcDecoding, rcRange, rcTooBig);
                    } else {
                        uint32_t q= x << 12 | y;
                        sname[j+4]=Q_ALPHABET[q % 36];
                        q /= 36;
                        sname[j+3]=Q_ALPHABET[q % 36];
                        q /= 36;
                        sname[j+2]=Q_ALPHABET[q % 36];
                        q /= 36;
                        sname[j+1]=Q_ALPHABET[q % 36];
                        q /= 36;
                        sname[j]  =Q_ALPHABET[q];
                        j+=5;
                        i+=2;
                    }
                    break;
                case 'r':
                case 'R':
                    rc = string_printf ( & sname [ j ], sizeof sname - j, & num_writ, "%ld", row_id );
                    assert ( rc == 0 );
                    j += ( uint32_t ) num_writ;
                    i+=2;
                    break;
                default:
                    sname[j++]=name_fmt[i++];
                    break;
                }
            } else {
                if(j < sizeof(sname)) {
                    sname[j++]=name_fmt[i++];
                } else {
                    return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
                }
            }
            if( j > sizeof(sname)-1) {
                return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
            }
        }
        if(i==fmt_size -1) {
            if(j < sizeof(sname)) {
                sname[j++]=name_fmt[i++];
            } else {
                return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
            }
        }

        /* j is our new element count **/
        elem_count = j;

        /* resize output */
        if ( dst -> elem_count <= elem_count )
        {
            rc = KDataBufferResize ( dst, elem_count);
            if ( rc != 0 )
                return rc;
        }

        /* the output name */
        name = dst -> base;

        memcpy ( name, sname, elem_count );
        rslt -> elem_count = elem_count;
        return 0;
    }

    /* check for NAME */
    if( argc == 4 && argv[3].u.data.elem_count != 0 ) {
        const char *sname = ((char*)argv[3].u.data.base) + argv[3].u.data.first_elem;
        /* output size */
        elem_count = argv[3].u.data.elem_count;

        /* resize output */
        if( dst -> elem_count <= elem_count ) {
            rc = KDataBufferResize( dst, elem_count + 1 );
            if( rc != 0 )
                return rc;
        }
        name = dst->base;
        memcpy(dst->base, sname, elem_count);
        rslt->elem_count = elem_count;
        name[elem_count] = 0;
        return 0;
    }

    /* spot has no name */
    return RC ( rcSRA, rcColumn, rcReading, rcRow, rcNull );
}
Esempio n. 7
0
static
rc_t CC format_spot_name_no_coord ( void *self,
                                    const VXformInfo *info, int64_t row_id, VRowResult *rslt,
                                    uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    char *name;
    uint32_t elem_count;
    KDataBuffer *dst = rslt -> data;

    if ( dst -> elem_bits != 8 )
    {
        rc = KDataBufferCast ( dst, dst, 8, true );
        if ( rc != 0 )
            return rc;
    }

    /* check for NAME_FMT */
    if ( argv [ 0 ] . u. data . elem_count != 0 )
    {
        size_t num_writ;
        char sname[1024]; /** name on stack **/
        const char *name_fmt = ((char*)argv[0].u.data.base) + argv[0].u.data.first_elem;
        uint32_t i, j;
        const uint32_t fmt_size = argv [ 0 ] . u . data . elem_count;

        for ( i=j=0; i < fmt_size -1;) {
            if( name_fmt [ i ] == '$' ) {
                switch( name_fmt [ i+1 ]) {
                case 'r':
                case 'R':
                    rc = string_printf ( & sname [ j ], sizeof sname - j, & num_writ, "%ld", row_id );
                    assert ( rc == 0 );
                    j += ( uint32_t ) num_writ;
                    i+=2;
                    break;
                default:
                    sname[j++]=name_fmt[i++];
                    break;
                }
            } else {
                if(j < sizeof(sname)) {
                    sname[j++]=name_fmt[i++];
                } else {
                    return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
                }
            }
            if( j > sizeof(sname)-1) {
                return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
            }
        }
        if(i==fmt_size -1) {
            if(j < sizeof(sname)) {
                sname[j++]=name_fmt[i++];
            } else {
                return RC ( rcXF, rcFunction, rcDecoding, rcBuffer, rcInsufficient );
            }
        }

        /* j is our new element count **/
        elem_count = j;

        /* resize output */
        if ( dst -> elem_count <= elem_count )
        {
            rc = KDataBufferResize ( dst, elem_count);
            if ( rc != 0 )
                return rc;
        }

        /* the output name */
        name = dst -> base;

        memcpy ( name, sname, elem_count );
        rslt -> elem_count = elem_count;
        return 0;
    }

    /* check for NAME */
    if( argc == 2 && argv[1].u.data.elem_count != 0 ) {
        const char *sname = ((char*)argv[1].u.data.base) + argv[1].u.data.first_elem;
        /* output size */
        elem_count = argv[1].u.data.elem_count;

        /* resize output */
        if( dst -> elem_count <= elem_count ) {
            rc = KDataBufferResize( dst, elem_count + 1 );
            if( rc != 0 )
                return rc;
        }
        name = dst->base;
        memcpy(dst->base, sname, elem_count);
        rslt->elem_count = elem_count;
        name[elem_count] = 0;
        return 0;
    }

    /* spot has no name */
    return RC ( rcSRA, rcColumn, rcReading, rcRow, rcNull );
}
Esempio n. 8
0
static
rc_t CC illumina_rewrite_spot_name ( void *data, const VXformInfo *info, int64_t row_id,
                                     VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    char buffer [ 64];
    uint32_t coord_len;
    uint32_t prefix_len;
    unsigned int a, b, c, d;
    KDataBuffer *dst = rslt -> data;

    const char *prefix;
    const char *skey = argv [ 0 ] . u . data . base;
    uint64_t i, j, count = argv [ 0 ] . u . data . elem_count;

    skey += argv [ 0 ] . u . data . first_elem;

    /* find last hex portion */
    for ( i = count; i > 0; )
    {
        if ( ! isxdigit ( skey [ -- i ] ) )
            break;
    }

    if ( count - i < ( SLX_COORD_LEN - 1 ) )
    {
        const char *end = skey + count;

        /* new format */
        for ( d = 0, j = 0, i = count; i > 0; )
        {
            if ( ! isdigit ( skey [ -- i ] ) )
            {
                j = i + 1;
                break;
            }
        }
        d = non_braindead_atoi ( & skey [ j ], end );
        for ( c = 0, j = 0; i > 0; )
        {
            if ( ! isdigit ( skey [ -- i ] ) )
            {
                j = i + 1;
                break;
            }
        }
        c = atoi ( & skey [ j ] );
        for ( b = 0, j = 0; i > 0; )
        {
            if ( ! isdigit ( skey [ -- i ] ) )
            {
                j = i + 1;
                break;
            }
        }
        b = atoi ( & skey [ j ] );
        for ( a = 0, j = 0; i > 0; )
        {
            if ( ! isdigit ( skey [ -- i ] ) )
            {
                j = i + 1;
                break;
            }
        }
        a = atoi ( & skey [ j ] );
        if ( j > 0 )
        {
            if ( i > 0 )
                -- i;
            while ( isalpha ( skey [ i ] ) )
                ++ i;
        }
    }
    else
    {
        a = scan_hex ( skey, 1 );
        b = scan_hex ( & skey [ 1 ], 3 );
        c = scan_hex ( & skey [ 4 ], 3 );
        d = scan_hex ( & skey [ 7 ], 3 );
        if ( count > SLX_COORD_LEN )
        {
            i = count - SLX_COORD_LEN;
        }
    }

    /* generate coordinates */
    coord_len = sprintf ( buffer, ":%d:%d:%d:%d", a, b, c, d );

    /* get size of prefix */
    if ( argc == 1 )
    {
        prefix = "";
        prefix_len = 0;
    }
    else
    {
        prefix = argv [ 1 ] . u . data . base;
        assert(argv [ 1 ] . u . data . elem_count >> 32 == 0);
        prefix_len = (uint32_t)argv [ 1 ] . u . data . elem_count;
        prefix += argv [ 1 ] . u . data . first_elem;
    }

    /* resize output buffer for prefix, name stuff, coordinates */
    if ( dst -> elem_bits != 8 )
    {
        rc = KDataBufferCast ( dst, dst, 8, true );
        if ( rc != 0 )
            return rc;
    }
    rc = KDataBufferResize ( dst, prefix_len + i + coord_len + 1 );
    if ( rc != 0 )
        return rc;

    /* copy in prefix, name prefix, coordinates */
    rslt -> elem_count = sprintf ( dst -> base, "%.*s%.*s%s"
                                   , ( int ) prefix_len, prefix
                                   , ( int ) i, skey
                                   , buffer );

    return 0;
}