Beispiel #1
0
static rc_t SpotGroupSplitter_GetKey( const SRASplitter* cself,
                                      const char** key, spotid_t spot, readmask_t* readmask )
{
    rc_t rc = 0;
    SpotGroupSplitter* self = ( SpotGroupSplitter* )cself;

    if ( self == NULL || key == NULL )
    {
        rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull );
    }
    else
    {
        *key = self->cur_key;
        if ( self->col != NULL )
        {
            const char* g = NULL;
            bitsz_t o = 0, sz = 0;
            rc = SRAColumnRead( self->col, spot, (const void **)&g, &o, &sz );
            if ( rc == 0 )
            {
                sz /= 8;
                /* truncate trailing \0 */
                while ( sz > 0 && g[ sz - 1 ] == '\0' )
                {
                    sz--;
                }
                if ( sz > sizeof( self->cur_key ) - 1 )
                {
                    rc = RC( rcSRA, rcNode, rcExecuting, rcBuffer, rcInsufficient );
                }
                else
                {
                    int i;
                    bool found = false;
                    memcpy( self->cur_key, g, sz );
                    self->cur_key[ sz ] = '\0';
                    for ( i = 0; self->spot_group[ i ] != NULL; i++ )
                    {
                        if ( strcmp( self->cur_key, self->spot_group[ i ] ) == 0 )
                        {
                            found = true;
                            break;
                        }
                    }
                    if ( self->spot_group[ 0 ] != NULL && !found )
                    {
                        /* list not empty and not in list -> skip */
                        *key = NULL;
                    }
                    else if ( !self->split )
                    {
                        *key = "";
                    }
                }
            }
        }
    }
    return rc;
}
Beispiel #2
0
static rc_t MaxNReadsValidator_GetKey( const SRASplitter* cself,
                                       const char** key, spotid_t spot, readmask_t* readmask )
{
    rc_t rc = 0;
    MaxNReadsValidator* self = ( MaxNReadsValidator* )cself;

    if ( self == NULL || key == NULL )
    {
        rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull );
    }
    else
    {
        const void* nreads = NULL;
        bitsz_t o = 0, sz = 0;
        uint64_t nn = 0;

        *key = "";
        if ( self->col != NULL )
        {
            rc = SRAColumnRead( self->col, spot, &nreads, &o, &sz );
            if ( rc == 0 )
            {
                switch( sz )
                {
                case 8:
                    nn = *((const uint8_t*)nreads);
                    break;
                case 16:
                    nn = *((const uint16_t*)nreads);
                    break;
                case 32:
                    nn = *((const uint32_t*)nreads);
                    break;
                case 64:
                    nn = *((const uint64_t*)nreads);
                    break;
                default:
                    rc = RC( rcSRA, rcNode, rcExecuting, rcData, rcUnexpected );
                    break;
                }
                if ( nn > nreads_max )
                {
                    clear_readmask( readmask );
                    PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), maximum $(max) supported, skipped",
                                       PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn, spot, nreads_max));
                }
                else if ( nn == nreads_max - 1 )
                {
                    PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), truncated to $(max)",
                                       PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn + 1, spot, nreads_max));
                }
            }
        }
    }
    return rc;
}
Beispiel #3
0
CSraValue::CSraValue(const CSraColumn& col, spotid_t id, ECheckRc check_rc)
    : m_Error(0), m_Data(0), m_Bitoffset(0), m_Bitlen(0), m_Len(0)
{
    m_Error = SRAColumnRead(col, id, &m_Data, &m_Bitoffset, &m_Bitlen);
    if ( !m_Error ) {
        if ( m_Bitoffset ) {
            m_Error = RC(rcApp, rcColumn, rcDecoding, rcOffset, rcUnsupported);
        }
        else {
            m_Len = (m_Bitlen+7)>>3;
        }
    }
    if ( m_Error && check_rc == eCheckRc ) {
        NCBI_THROW3(CSraException, eNotFoundValue, "Cannot read value",
                    m_Error, NStr::UIntToString(id));
    }
}
Beispiel #4
0
static rc_t SRAReader_ColumnsRead( SRAReader * self )
{
    rc_t rc = 0;
    int i = 0;

    while ( self->cols[ i ].name != NULL )
    {
        if ( self->cols[ i ].col != NULL )
        {
            bitsz_t bitofs = 0;
            rc = SRAColumnRead( self->cols[ i ].col, self->spot, &self->cols[ i ].base, &bitofs, &self->cols[ i ].size );
            if ( rc != 0 || bitofs != 0 )
            {
                SRADBG (( "%s: read column %s spot %u %R\n", __func__, self->cols[i].name, self->spot, rc ));
                break;
            }
        }
        self->cols[ i ].size /= 8;
        i++;
    }
    return rc;
}
Beispiel #5
0
static
rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint16_t zlib_ver = ZLIB_VERNUM;
    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;
        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk ));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    FastqReaderWhack(reader);
    return rc;
}
Beispiel #6
0
static
rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }

    if( rc == 0 ) {
        size_t written = 0;
        uint32_t blk = 0;
        SIndexNode* inode = NULL;

        while( rc == 0 ) {
            rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written);
            if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) {
                inode->key_size = blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n",
                                                            inode->id_qty, inode->key, inode->key_size));
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
            }
            if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) {
                rc = 0;
                break;
            }
            if( inode == NULL ) {
                spotid_t spotid = 0;
                if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                    break;
                }
                inode = malloc(sizeof(SIndexNode));
                if( inode == NULL ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                    break;
                }
                inode->key = obj->file_size;
                inode->key_size = 0;
                inode->id = spotid;
                inode->id_qty = 0;
                DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key));
            }
            inode->id_qty++;
            obj->file_size += written;
            blk += written;
            MD5StateAppend(&obj->md5, buffer, written);
            if( g_dump ) {
                fwrite(buffer, written, 1, stderr);
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
    }
    FastqReaderWhack(reader);
    return rc;
}
Beispiel #7
0
/*******************************************************************************
 * KMain - defined for use with kapp library
 *******************************************************************************/
rc_t CC KMain ( int argc, char* argv[] )
{
    rc_t rc = 0;
    int i;
    const char* arg;
    uint64_t total_spots = 0;

    const SRAMgr* sraMGR = NULL;
    SRADumperFmt fmt;

    bool to_stdout = false, do_gzip = false, do_bzip2 = false;
    char const* outdir = NULL;
    spotid_t minSpotId = 1;
    spotid_t maxSpotId = ~0;
    bool sub_dir = false;
    bool keep_empty = false;
    const char* table_path[10240];
    int table_path_qty = 0;

    char const* D_option = NULL;
    char const* P_option = NULL;
    char P_option_buffer[4096];
    const char* accession = NULL;
    const char* table_name = NULL;

    bool spot_group_on = false;
    int spot_groups = 0;
    char* spot_group[128] = {NULL};
    bool read_filter_on = false;
    SRAReadFilter read_filter = 0xFF;

    bool failed_to_open = false;

    /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */
    if ( fasta_dump_requested( argc, argv ) )
    {
        return fasta_dump( argc, argv );
    }

    /* Prepare for the worst: report this information after disaster */
    ReportBuildDate ( __DATE__ );

    memset( &fmt, 0, sizeof( fmt ) );
    rc = SRADumper_Init( &fmt );
    if ( rc != 0 )
    {
        LOGERR(klogErr, rc, "formatter initialization");
        return 100;
    }
    else if ( fmt.get_factory == NULL )
    {
        rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull );
        LOGERR( klogErr, rc, "formatter factory" );
        return 101;
    }
    else
    {
        rc = SRADumper_ArgsValidate( argv[0], &fmt );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "formatter args list" );
            return 102;
        }
    }

    if ( argc < 2 )
    {
        CoreUsage( argv[0], &fmt, true, EXIT_FAILURE );
        return 0;
    }

    for ( i = 1; i < argc; i++ )
    {
        arg = argv[ i ];
        if ( arg[ 0 ] != '-' )
        {
            uint32_t k;
            for ( k = 0; k < table_path_qty; k++ )
            {
                if ( strcmp( arg, table_path[ k ] ) == 0 )
                {
                    break;
                }
            }
            if ( k >= table_path_qty )
            {
                if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) )
                {
                    rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient );
                    goto Catch;
                }
                table_path[ table_path_qty++ ] = arg;
            }
            continue;
        }
        arg = NULL;
        if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) )
        {
            rc = LogLevelSet( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
        }
        else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) )
        {
#if _DEBUGGING
            rc = KDbgSetString( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
#endif
        }
        else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) ||
                  SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) )
        {
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS );

        }
        else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) )
        {
            HelpVersion ( argv[ 0 ], KAppVersion() );
            return 0;
        }
        else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) )
        {
            KStsLevelAdjust( 1 );

        }
        else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) )
        {
            LOGMSG( klogErr, "option -D is deprecated, see --help" );
        }
        else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) )
        {
            LOGMSG( klogErr, "option -P is deprecated, see --help" );

        }
        else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) )
        {
            to_stdout = true;
        }
        else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) )
        {
            do_gzip = true;
        }
        else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) )
        {
            do_bzip2 = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) )
        {
            minSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) )
        {
            maxSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) )
        {
            spot_group_on = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) )
        {
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                int f = 0, t = 0;
                i++;
                while ( argv[ i ][ t ] != '\0' )
                {
                    if ( argv[ i ][ t ] == ',' )
                    {
                        if ( t - f > 0 )
                        {
                            spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                        }
                        f = t + 1;
                    }
                    t++;
                }
                if ( t - f > 0 )
                {
                    spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                }
                if ( spot_groups < 1 )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                spot_group[ spot_groups ] = NULL;
            }
        }
        else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) )
        {
            read_filter_on = true;
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                i++;
                if ( read_filter != 0xFF )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)",
                                        PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                if ( strcasecmp( argv[ i ], "pass" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_PASS;
                }
                else if ( strcasecmp( argv[ i ], "reject" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REJECT;
                }
                else if ( strcasecmp( argv[ i ], "criteria" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_CRITERIA;
                }
                else if ( strcasecmp( argv[ i ], "redacted" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REDACTED;
                }
                else
                {
                    /* must be accession */
                    i--;
                }
            }
        }
        else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) )
        {
            sub_dir = true;
        }
        else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) )
        {
            keep_empty = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) )
        {
            KConfigDisableUserSettings ();
        }
        else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) )
        {
        }
        else
        {
            rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect );
            PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) );
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
        }
    }

    if ( to_stdout )
    {
        if ( outdir != NULL || sub_dir || keep_empty ||
                spot_group_on || ( read_filter_on && read_filter == 0xFF ) )
        {
            LOGMSG( klogWarn, "stdout mode is set, some options are ignored" );
            spot_group_on = false;
            if ( read_filter == 0xFF )
            {
                read_filter_on = false;
            }
        }
        KOutHandlerSetStdErr();
        KStsHandlerSetStdErr();
        KLogHandlerSetStdErr();
        ( void ) KDbgHandlerSetStdErr();
    }

    if ( do_gzip && do_bzip2 )
    {
        rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous );
        LOGERR( klogErr, rc, "output compression method" );
        CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
    }

    if ( minSpotId > maxSpotId )
    {
        spotid_t temp = maxSpotId;
        maxSpotId = minSpotId;
        minSpotId = temp;
    }

    if ( table_path_qty == 0 )
    {
        if ( D_option != NULL && D_option[ 0 ] != '\0' )
        {
            /* support deprecated '-D' option */
            table_path[ table_path_qty++ ] = D_option;
        }
        else if ( accession == NULL || accession[ 0 ] == '\0' )
        {
            /* must have accession to proceed */
            rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty );
            LOGERR( klogErr, rc, "expected accession" );
            goto Catch;
        }
        else if ( P_option != NULL && P_option[ 0 ] != '\0' )
        {
            /* support deprecated '-P' option */
            i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession );
            if ( i < 0 || i >= sizeof( P_option_buffer ) )
            {
                rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive );
                LOGERR( klogErr, rc, "path too long" );
                goto Catch;
            }
            table_path[ table_path_qty++ ] = P_option_buffer;
        }
        else
        {
            table_path[ table_path_qty++ ] = accession;
        }
    }

    rc = SRAMgrMakeRead( &sraMGR );
    if ( rc != 0 )
    {
        LOGERR( klogErr, rc, "failed to open SRA manager" );
        goto Catch;
    }
    else
    {
        rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "failed to initialize files" );
            goto Catch;
        }
    }

    {
        const VDBManager* vmgr = NULL;
        rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr );
        if ( rc2 != 0 )
        {
            LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" );
        }
        rc2 = ReportSetVDBManager( vmgr );
        VDBManagerRelease( vmgr );
    }


    /* loop tables */
    for ( i = 0; i < table_path_qty; i++ )
    {
        const SRASplitterFactory* fact_head = NULL;
        spotid_t smax, smin;

        SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) );
        if ( table_name != NULL )
        {
            rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc,
                                    "failed to open '$(path):$(table)'", "path=%s,table=%s",
                                    table_path[ i ], table_name ) );
                continue;
            }
        }

        ReportResetObject( table_path[ i ] );
        if ( fmt.table == NULL )
        {
            rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] );
            if ( rc != 0 )
            {
                if ( UIError( rc, NULL, NULL ) )
                {
                    UITableLOGError( rc, NULL, true );
                }
                else
                {
                    PLOGERR( klogErr, ( klogErr, rc,
                                        "failed to open '$(path)'", "path=%s",
                                        table_path[ i ] ) );
                    if (GetRCState(rc) == rcNotFound) {
                        failed_to_open = true;
                    }
                }
                continue;
            }
        }

        /* infer accession from table_path if missing or more than one table */
        fmt.accession = table_path_qty > 1 ? NULL : accession;
        if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 )
        {
            char * basename;
            char *ext;
            size_t l;
            bool is_url = false;

            strcpy( P_option_buffer, table_path[ i ] );

            basename = strchr ( P_option_buffer, ':' );
            if ( basename )
            {
                ++basename;
                if ( basename [0] == '\0' )
                    basename = P_option_buffer;
                else
                    is_url = true;
            }
            else
                basename = P_option_buffer;

            if ( is_url )
            {
                ext = strchr ( basename, '#' );
                if ( ext )
                    ext[ 0 ] = '\0';
                ext = strchr ( basename, '?' );
                if ( ext )
                    ext[ 0 ] = '\0';
            }


            l = strlen( basename  );
            while ( strchr( "\\/", basename[ l - 1 ] ) != NULL )
            {
                basename[ --l ] = '\0';
            }
            fmt.accession = strrchr( basename, '/' );
            if ( fmt.accession++ == NULL )
            {
                fmt.accession = basename;
            }

            /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */
            ext = strrchr( fmt.accession, '.' );
            if ( ext != NULL )
            {
                if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                }
                if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                    if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 )
                    {
                        *ext = '\0';
                    }
                }
            }
        }

        SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) );
        rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession );

        while ( rc == 0 )
        {
            /* sort out the spot id range */
            if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 ||
                    ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 )
            {
                break;
            }

            {
                const struct VTable* tbl = NULL;
                rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                rc2 = ReportResetTable( table_path[i], tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                VTableRelease( tbl );   /* SRATableGetVTableRead adds Reference to tbl! */
            }

            /* test if we have to dump anything... */
            if ( smax < minSpotId || smin > maxSpotId )
            {
                break;
            }
            if ( smax > maxSpotId )
            {
                smax = maxSpotId;
            }
            if ( smin < minSpotId )
            {
                smin = minSpotId;
            }

            /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */
            if ( true ) /* ??? */
            {
                const SRAColumn* c = NULL;
                nreads_max = NREADS_MAX;
                rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t );
                if ( rc == 0 )
                {
                    const INSDC_SRA_platform_id *platform;
                    bitsz_t o, z;
                    rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z );
                    if ( rc == 0 && platform != NULL )
                    {
                        if ( *platform != SRA_PLATFORM_PACBIO_SMRT )
                        {
                            nreads_max = 32;
                        }
                    }
                    SRAColumnRelease( c );
                }
                else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn )
                {
                    rc = 0;
                }
            }

            /* table dependent */
            rc = fmt.get_factory( &fmt, &fact_head );
            if ( rc != 0 )
            {
                break;
            }
            if ( fact_head == NULL )
            {
                rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull );
                break;
            }

            if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) )
            {
                const SRASplitterFactory* f = NULL;
                rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 && read_filter_on )
            {
                const SRASplitterFactory* f = NULL;
                rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 )
            {
                /* this filter takes over head of chain to be first and kill off bad NREADS */
                const SRASplitterFactory* f = NULL;
                rc = MaxNReadsValidatorFactory_Make( &f, fmt.table );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            rc = SRASplitterFactory_Init( fact_head );
            if ( rc == 0 )
            {
                /* ********************************************************** */
                rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head );
                /* ********************************************************** */
                if ( rc == 0 )
                {
                    uint64_t total = 0, file = 0;
                    SRASplitterFactory_FilerReport( &total, &file );
                    OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] ));
                    if ( to_stdout && total > 0 )
                    {
                        PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots",
                                             PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file ));
                    }
                    total_spots = total;
                }
            }
            break;
        }

        SRASplitterFactory_Release( fact_head );
        SRATableRelease( fmt.table );
        fmt.table = NULL;
        if ( rc == 0 )
        {
            PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots",
                                 PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)),
                                 table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) );
        }
        else if ( !reportToUser( rc, argv [0 ] ) )
        {
            PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)",
                                PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)),
                                table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) );
        }
    }

Catch:
    if ( fmt.release )
    {
        rc_t rr = fmt.release( &fmt );
        if ( rr != 0 )
        {
            SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) );
        }
    }

    for ( i = 0; i < spot_groups; i++ )
    {
        free( spot_group[ i ] );
    }
    SRASplitterFiler_Release();
    SRAMgrRelease( sraMGR );
    OUTMSG(( "Written %lu spots total\n", total_spots ));


    if (failed_to_open) {
        ReportSilence();
    }
    {
        /* Report execution environment if necessary */
        rc_t rc2 = ReportFinalize( rc );
        if ( rc == 0 )
        {
            rc = rc2;
        }
    }
    return rc;
}
Beispiel #8
0
static rc_t ReadFilterSplitter_GetKeySet( const SRASplitter* cself,
        const SRASplitter_Keys** key, uint32_t* keys, spotid_t spot, const readmask_t* readmask )
{
    rc_t rc = 0;
    ReadFilterSplitter* self = ( ReadFilterSplitter* )cself;

    if ( self == NULL || key == NULL )
    {
        rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull );
    }
    else
    {
        const INSDC_SRA_read_filter* rdf;
        bitsz_t o = 0, sz = 0;

        *keys = 0;
        if ( self->col_rdf != NULL )
        {
            rc = SRAColumnRead( self->col_rdf, spot, (const void **)&rdf, &o, &sz );
            if ( rc == 0 )
            {
                int32_t j, i = sz / sizeof( INSDC_SRA_read_filter ) / 8;
                *key = self->keys;
                *keys = sizeof( self->keys ) / sizeof( self->keys[ 0 ] );
                for ( j = 0; j < *keys; j++ )
                {
                    clear_readmask( self->keys[ j ].readmask );
                }
                while ( i > 0 )
                {
                    i--;
                    if ( self->read_filter != 0xFF && self->read_filter != rdf[i] )
                    {
                        /* skip by filter value != to command line */
                    }
                    else if ( rdf[ i ] == SRA_READ_FILTER_PASS )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_pass ].readmask, i );
                    }
                    else if ( rdf[ i ] == SRA_READ_FILTER_REJECT )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_reject ].readmask, i );
                    }
                    else if( rdf[ i ] == SRA_READ_FILTER_CRITERIA )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_criteria ].readmask, i );
                    }
                    else if( rdf[ i ] == SRA_READ_FILTER_REDACTED )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_redacted ].readmask, i );
                    }
                    else
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_unknown ].readmask, i );
                        PLOGMSG( klogWarn, ( klogWarn,
                                             "unknown READ_FILTER value $(value) at spot id $(row)",
                                             PLOG_2( PLOG_U8( value ), PLOG_I64( row ) ), rdf[ i ], spot ) );
                    }
                }
            }
        }
    }
    return rc;
}
Beispiel #9
0
static rc_t SDataUpdate(struct SData* self,
    const char* newColName, const char* redactFileName,
    spotid_t* redactedSpots, spotid_t* all)
{
    struct SBlob blob;
    uint8_t filter[32];
    rc_t rc = 0, rc2 = 0;
    uint32_t colIdx = 0;
    spotid_t spot = 0, last = 0;
    bool toRedact = false;
    struct SpotIterator it;

    assert(self && redactedSpots && all);

    memset(filter, SRA_READ_FILTER_REDACTED, sizeof filter);

    if ((rc = SpotIteratorInit(&it, self->_rdTbl, redactFileName))
        == 0)
    {
        rc = SRATableOpenColumnWrite
            (self->_wrTbl, &colIdx, NULL, newColName, sra_read_filter_t);
        if (rc != 0) {
            plogerr(klogErr, rc,
                "cannot open Column $(path) for Write", "path=%s", newColName);
            return rc;
        }
    }
    else {
        return rc;
    }

    rc = SBlobInit(&blob, self, &it);
    if (rc != 0) {
        return rc;
    }

    while (rc == 0 && SpotIteratorNext(&it, &rc, &spot, &toRedact)) {
        bitsz_t offset = 0, size = 0;
        const void *base = NULL;
        uint8_t nReads = 0;

        if (rc != 0) {
            break;
        }

        plogmsg(klogDebug2, "Spot $(spot): $(action)",
            PLOG_U32(spot) ",action=%s",
            spot, toRedact ? "redact" : "original");

        /* GET NEXT BLOB RANGE */
        if (spot == 1 || spot > last) {
            rc = SBlobGetRange(&blob, spot, &last);
            if (rc != 0) {
                break;
            }
        }

        assert(spot <= last);

        /* GET NREADS */
        if ((rc = SRAColumnRead
            (self->_NReadsCol, spot, &base, &offset, &size)) != 0)
        {
            logerr(klogErr, rc, "cannot SRAColumnRead");
            break;
        }
        else if (offset != 0 || size != sizeof nReads * 8) {
            rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
            plogerr(klogErr, rc,
                "Bad SRAColumnRead(\"NREADS\", $(spot)) result",
                PLOG_U32(spot), spot);
        }
        else {
            nReads = *((uint8_t*) base);
            if (spot == 1) {
                if (nReads == 1) {
                    plogmsg(klogInfo, "The first spot has $(nreads) read",
                        "nreads=%d", nReads);
                }
                else {
                    plogmsg(klogInfo, "The first spot has $(nreads) reads",
                        "nreads=%d", nReads);
                }
            }
        }

        /* GET READ_FILTER */
        if (toRedact) {
            base = filter;
            ++(*redactedSpots);
        }
        else {
            if ((rc = SRAColumnRead(self->_origFilterCol,
                spot, &base, &offset, &size)) != 0)
            {
                plogerr(klogErr, rc,
                    "while calling SRAColumnRead($(name))", "name=%s",
                    "READ_FILTER");
                break;
            }
            else if (offset != 0
                  || size != sizeof (uint8_t) * 8 * nReads)
            {
                rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
                plogerr(klogErr, rc, "Bad SRAColumnRead($(spot)) result",
                    PLOG_U32(spot), spot);
            }
        }

        if ((rc = SRATableOpenSpot(self->_wrTbl, spot)) != 0) {
            plogerr(klogErr, rc, "cannot open Spot $(id)", PLOG_U32(id), spot);
            break;
        }
        if ((rc = SRATableWriteIdxColumn(self->_wrTbl,
            colIdx, base, 0, sizeof (uint8_t) * 8 * nReads)) != 0)
        {
            logerr(klogErr, rc, "cannot SRATableWriteIdxColumn");
            break;
        }
        if ((rc = SRATableCloseSpot(self->_wrTbl)) != 0) {
            logerr(klogErr, rc, "cannot SRATableCloseSpot");
            break;
        }

        /* CUT THE BLOB */
        if (spot == last) {
            rc = SRATableCloseCursor(self->_wrTbl);
            if (rc != 0) {
                plogerr(klogErr, rc, "cannot SRATableCloseCursor $(id)",
                    PLOG_U32(id), spot);
                break;
            }
        }
    }

    rc2 = SpotIteratorDestroy(&it);
    if (rc == 0)
    {   rc = rc2; }

    *all = spot;

    return rc;
}