Example #1
0
rc_t SRAReaderMake(const SRAReader** self, size_t size, const SRATable* table,
                   const char* accession, spotid_t minSpotId, spotid_t maxSpotId)
{
    rc_t rc = 0;
    SRAReader* me = NULL;

    if( table == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcNull);
    } else {
        if( (rc = SRAReaderAlloc(&me, size, accession)) == 0 ) {
            me->table = table;
            if( (rc = SRATableMinSpotId(me->table, &me->minSpotId)) == 0 &&
                (rc = SRATableMaxSpotId(me->table, &me->maxSpotId)) == 0 ) {
                if( rc == 0 && minSpotId > 0 && (rc = SRAReaderSeekSpot(me, minSpotId)) == 0 ) {
                    me->minSpotId = minSpotId;
                }
                if( rc == 0 && maxSpotId > 0 && (rc = SRAReaderSeekSpot(me, maxSpotId)) == 0 ) {
                    me->maxSpotId = maxSpotId;
                }
            }
            /* reset after seekspot to initial state */
            me->spot = 0;
        }
    }
    if( rc != 0 ) {
        SRAReaderWhack(me);
    } else {
        *self = me;
    }
    return rc;
}
Example #2
0
static rc_t SpotIteratorInit(struct SpotIterator* self,
    const SRATable* tbl, const char* redactFileName)
{
    rc_t rc = 0;

    assert(self && tbl && redactFileName);

    memset(self, 0, sizeof *self);

    self->m_crnSpotId = 1;

    rc = SRATableMaxSpotId(tbl, &self->m_maxSpotId);
    if (rc != 0) {
        logerr(klogErr, rc, "while calling SRATableMaxSpotId");
    }
    else {
        plogmsg(klogInfo,
            "MaxSpotId = $(spot)", PLOG_U32(spot), self->m_maxSpotId);
    }

    if (rc == 0) {
        rc = SpotIteratorInitDirectory();
    }

    if (rc == 0) {
        self->m_filename = redactFileName;
        plogmsg(klogInfo, "Opening '$(path)'", "path=%s", self->m_filename);
        rc = KDirectoryOpenFileRead(
            __SpotIteratorDirectory, &self->m_file, "%s", self->m_filename);
        if (rc != 0) {
            plogerr(klogErr, rc,
                "while opening file '$(path)'", "path=%s", self->m_filename);
        }
    }

    if (rc == 0) {
        rc = SpotIteratorReadSpotToRedact(self);
    }

    return rc;
}
Example #3
0
/*******************************************************************************
 * KMain - defined for use with kapp library
 *******************************************************************************/
rc_t CC KMain ( int argc, char* argv[] )
{
    rc_t rc = 0;
    int i;
    const char* arg;
    uint64_t total_spots = 0;

    const SRAMgr* sraMGR = NULL;
    SRADumperFmt fmt;

    bool to_stdout = false, do_gzip = false, do_bzip2 = false;
    char const* outdir = NULL;
    spotid_t minSpotId = 1;
    spotid_t maxSpotId = ~0;
    bool sub_dir = false;
    bool keep_empty = false;
    const char* table_path[10240];
    int table_path_qty = 0;

    char const* D_option = NULL;
    char const* P_option = NULL;
    char P_option_buffer[4096];
    const char* accession = NULL;
    const char* table_name = NULL;

    bool spot_group_on = false;
    int spot_groups = 0;
    char* spot_group[128] = {NULL};
    bool read_filter_on = false;
    SRAReadFilter read_filter = 0xFF;

    bool failed_to_open = false;

    /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */
    if ( fasta_dump_requested( argc, argv ) )
    {
        return fasta_dump( argc, argv );
    }

    /* Prepare for the worst: report this information after disaster */
    ReportBuildDate ( __DATE__ );

    memset( &fmt, 0, sizeof( fmt ) );
    rc = SRADumper_Init( &fmt );
    if ( rc != 0 )
    {
        LOGERR(klogErr, rc, "formatter initialization");
        return 100;
    }
    else if ( fmt.get_factory == NULL )
    {
        rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull );
        LOGERR( klogErr, rc, "formatter factory" );
        return 101;
    }
    else
    {
        rc = SRADumper_ArgsValidate( argv[0], &fmt );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "formatter args list" );
            return 102;
        }
    }

    if ( argc < 2 )
    {
        CoreUsage( argv[0], &fmt, true, EXIT_FAILURE );
        return 0;
    }

    for ( i = 1; i < argc; i++ )
    {
        arg = argv[ i ];
        if ( arg[ 0 ] != '-' )
        {
            uint32_t k;
            for ( k = 0; k < table_path_qty; k++ )
            {
                if ( strcmp( arg, table_path[ k ] ) == 0 )
                {
                    break;
                }
            }
            if ( k >= table_path_qty )
            {
                if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) )
                {
                    rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient );
                    goto Catch;
                }
                table_path[ table_path_qty++ ] = arg;
            }
            continue;
        }
        arg = NULL;
        if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) )
        {
            rc = LogLevelSet( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
        }
        else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) )
        {
#if _DEBUGGING
            rc = KDbgSetString( arg );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) );
                goto Catch;
            }
#endif
        }
        else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) ||
                  SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) )
        {
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS );

        }
        else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) )
        {
            HelpVersion ( argv[ 0 ], KAppVersion() );
            return 0;
        }
        else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) )
        {
            KStsLevelAdjust( 1 );

        }
        else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) )
        {
            LOGMSG( klogErr, "option -D is deprecated, see --help" );
        }
        else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) )
        {
            LOGMSG( klogErr, "option -P is deprecated, see --help" );

        }
        else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) )
        {
            to_stdout = true;
        }
        else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) )
        {
            do_gzip = true;
        }
        else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) )
        {
            do_bzip2 = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) )
        {
        }
        else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) )
        {
            minSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) )
        {
            maxSpotId = AsciiToU32( arg, NULL, NULL );
        }
        else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) )
        {
            spot_group_on = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) )
        {
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                int f = 0, t = 0;
                i++;
                while ( argv[ i ][ t ] != '\0' )
                {
                    if ( argv[ i ][ t ] == ',' )
                    {
                        if ( t - f > 0 )
                        {
                            spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                        }
                        f = t + 1;
                    }
                    t++;
                }
                if ( t - f > 0 )
                {
                    spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f );
                }
                if ( spot_groups < 1 )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                spot_group[ spot_groups ] = NULL;
            }
        }
        else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) )
        {
            read_filter_on = true;
            if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' )
            {
                i++;
                if ( read_filter != 0xFF )
                {
                    rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate );
                    PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)",
                                        PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) );
                    CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
                }
                if ( strcasecmp( argv[ i ], "pass" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_PASS;
                }
                else if ( strcasecmp( argv[ i ], "reject" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REJECT;
                }
                else if ( strcasecmp( argv[ i ], "criteria" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_CRITERIA;
                }
                else if ( strcasecmp( argv[ i ], "redacted" ) == 0 )
                {
                    read_filter = SRA_READ_FILTER_REDACTED;
                }
                else
                {
                    /* must be accession */
                    i--;
                }
            }
        }
        else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) )
        {
            sub_dir = true;
        }
        else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) )
        {
            keep_empty = true;
        }
        else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) )
        {
            KConfigDisableUserSettings ();
        }
        else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) )
        {
        }
        else
        {
            rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect );
            PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) );
            CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
        }
    }

    if ( to_stdout )
    {
        if ( outdir != NULL || sub_dir || keep_empty ||
                spot_group_on || ( read_filter_on && read_filter == 0xFF ) )
        {
            LOGMSG( klogWarn, "stdout mode is set, some options are ignored" );
            spot_group_on = false;
            if ( read_filter == 0xFF )
            {
                read_filter_on = false;
            }
        }
        KOutHandlerSetStdErr();
        KStsHandlerSetStdErr();
        KLogHandlerSetStdErr();
        ( void ) KDbgHandlerSetStdErr();
    }

    if ( do_gzip && do_bzip2 )
    {
        rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous );
        LOGERR( klogErr, rc, "output compression method" );
        CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE );
    }

    if ( minSpotId > maxSpotId )
    {
        spotid_t temp = maxSpotId;
        maxSpotId = minSpotId;
        minSpotId = temp;
    }

    if ( table_path_qty == 0 )
    {
        if ( D_option != NULL && D_option[ 0 ] != '\0' )
        {
            /* support deprecated '-D' option */
            table_path[ table_path_qty++ ] = D_option;
        }
        else if ( accession == NULL || accession[ 0 ] == '\0' )
        {
            /* must have accession to proceed */
            rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty );
            LOGERR( klogErr, rc, "expected accession" );
            goto Catch;
        }
        else if ( P_option != NULL && P_option[ 0 ] != '\0' )
        {
            /* support deprecated '-P' option */
            i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession );
            if ( i < 0 || i >= sizeof( P_option_buffer ) )
            {
                rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive );
                LOGERR( klogErr, rc, "path too long" );
                goto Catch;
            }
            table_path[ table_path_qty++ ] = P_option_buffer;
        }
        else
        {
            table_path[ table_path_qty++ ] = accession;
        }
    }

    rc = SRAMgrMakeRead( &sraMGR );
    if ( rc != 0 )
    {
        LOGERR( klogErr, rc, "failed to open SRA manager" );
        goto Catch;
    }
    else
    {
        rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "failed to initialize files" );
            goto Catch;
        }
    }

    {
        const VDBManager* vmgr = NULL;
        rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr );
        if ( rc2 != 0 )
        {
            LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" );
        }
        rc2 = ReportSetVDBManager( vmgr );
        VDBManagerRelease( vmgr );
    }


    /* loop tables */
    for ( i = 0; i < table_path_qty; i++ )
    {
        const SRASplitterFactory* fact_head = NULL;
        spotid_t smax, smin;

        SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) );
        if ( table_name != NULL )
        {
            rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] );
            if ( rc != 0 )
            {
                PLOGERR( klogErr, ( klogErr, rc,
                                    "failed to open '$(path):$(table)'", "path=%s,table=%s",
                                    table_path[ i ], table_name ) );
                continue;
            }
        }

        ReportResetObject( table_path[ i ] );
        if ( fmt.table == NULL )
        {
            rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] );
            if ( rc != 0 )
            {
                if ( UIError( rc, NULL, NULL ) )
                {
                    UITableLOGError( rc, NULL, true );
                }
                else
                {
                    PLOGERR( klogErr, ( klogErr, rc,
                                        "failed to open '$(path)'", "path=%s",
                                        table_path[ i ] ) );
                    if (GetRCState(rc) == rcNotFound) {
                        failed_to_open = true;
                    }
                }
                continue;
            }
        }

        /* infer accession from table_path if missing or more than one table */
        fmt.accession = table_path_qty > 1 ? NULL : accession;
        if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 )
        {
            char * basename;
            char *ext;
            size_t l;
            bool is_url = false;

            strcpy( P_option_buffer, table_path[ i ] );

            basename = strchr ( P_option_buffer, ':' );
            if ( basename )
            {
                ++basename;
                if ( basename [0] == '\0' )
                    basename = P_option_buffer;
                else
                    is_url = true;
            }
            else
                basename = P_option_buffer;

            if ( is_url )
            {
                ext = strchr ( basename, '#' );
                if ( ext )
                    ext[ 0 ] = '\0';
                ext = strchr ( basename, '?' );
                if ( ext )
                    ext[ 0 ] = '\0';
            }


            l = strlen( basename  );
            while ( strchr( "\\/", basename[ l - 1 ] ) != NULL )
            {
                basename[ --l ] = '\0';
            }
            fmt.accession = strrchr( basename, '/' );
            if ( fmt.accession++ == NULL )
            {
                fmt.accession = basename;
            }

            /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */
            ext = strrchr( fmt.accession, '.' );
            if ( ext != NULL )
            {
                if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                }
                if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) )
                {
                    *ext = '\0';
                    ext = strrchr( fmt.accession, '.' );
                    if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 )
                    {
                        *ext = '\0';
                    }
                }
            }
        }

        SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) );
        rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession );

        while ( rc == 0 )
        {
            /* sort out the spot id range */
            if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 ||
                    ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 )
            {
                break;
            }

            {
                const struct VTable* tbl = NULL;
                rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                rc2 = ReportResetTable( table_path[i], tbl );
                if ( rc == 0 )
                {
                    rc = rc2;
                }
                VTableRelease( tbl );   /* SRATableGetVTableRead adds Reference to tbl! */
            }

            /* test if we have to dump anything... */
            if ( smax < minSpotId || smin > maxSpotId )
            {
                break;
            }
            if ( smax > maxSpotId )
            {
                smax = maxSpotId;
            }
            if ( smin < minSpotId )
            {
                smin = minSpotId;
            }

            /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */
            if ( true ) /* ??? */
            {
                const SRAColumn* c = NULL;
                nreads_max = NREADS_MAX;
                rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t );
                if ( rc == 0 )
                {
                    const INSDC_SRA_platform_id *platform;
                    bitsz_t o, z;
                    rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z );
                    if ( rc == 0 && platform != NULL )
                    {
                        if ( *platform != SRA_PLATFORM_PACBIO_SMRT )
                        {
                            nreads_max = 32;
                        }
                    }
                    SRAColumnRelease( c );
                }
                else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn )
                {
                    rc = 0;
                }
            }

            /* table dependent */
            rc = fmt.get_factory( &fmt, &fact_head );
            if ( rc != 0 )
            {
                break;
            }
            if ( fact_head == NULL )
            {
                rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull );
                break;
            }

            if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) )
            {
                const SRASplitterFactory* f = NULL;
                rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 && read_filter_on )
            {
                const SRASplitterFactory* f = NULL;
                rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            if ( rc == 0 )
            {
                /* this filter takes over head of chain to be first and kill off bad NREADS */
                const SRASplitterFactory* f = NULL;
                rc = MaxNReadsValidatorFactory_Make( &f, fmt.table );
                if ( rc == 0 )
                {
                    rc = SRASplitterFactory_AddNext( f, fact_head );
                    if ( rc == 0 )
                    {
                        fact_head = f;
                    }
                    else
                    {
                        SRASplitterFactory_Release( f );
                    }
                }
            }

            rc = SRASplitterFactory_Init( fact_head );
            if ( rc == 0 )
            {
                /* ********************************************************** */
                rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head );
                /* ********************************************************** */
                if ( rc == 0 )
                {
                    uint64_t total = 0, file = 0;
                    SRASplitterFactory_FilerReport( &total, &file );
                    OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] ));
                    if ( to_stdout && total > 0 )
                    {
                        PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots",
                                             PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file ));
                    }
                    total_spots = total;
                }
            }
            break;
        }

        SRASplitterFactory_Release( fact_head );
        SRATableRelease( fmt.table );
        fmt.table = NULL;
        if ( rc == 0 )
        {
            PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots",
                                 PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)),
                                 table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) );
        }
        else if ( !reportToUser( rc, argv [0 ] ) )
        {
            PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)",
                                PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)),
                                table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) );
        }
    }

Catch:
    if ( fmt.release )
    {
        rc_t rr = fmt.release( &fmt );
        if ( rr != 0 )
        {
            SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) );
        }
    }

    for ( i = 0; i < spot_groups; i++ )
    {
        free( spot_group[ i ] );
    }
    SRASplitterFiler_Release();
    SRAMgrRelease( sraMGR );
    OUTMSG(( "Written %lu spots total\n", total_spots ));


    if (failed_to_open) {
        ReportSilence();
    }
    {
        /* Report execution environment if necessary */
        rc_t rc2 = ReportFinalize( rc );
        if ( rc == 0 )
        {
            rc = rc2;
        }
    }
    return rc;
}