rc_t SRAReaderMake(const SRAReader** self, size_t size, const SRATable* table, const char* accession, spotid_t minSpotId, spotid_t maxSpotId) { rc_t rc = 0; SRAReader* me = NULL; if( table == NULL ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcNull); } else { if( (rc = SRAReaderAlloc(&me, size, accession)) == 0 ) { me->table = table; if( (rc = SRATableMinSpotId(me->table, &me->minSpotId)) == 0 && (rc = SRATableMaxSpotId(me->table, &me->maxSpotId)) == 0 ) { if( rc == 0 && minSpotId > 0 && (rc = SRAReaderSeekSpot(me, minSpotId)) == 0 ) { me->minSpotId = minSpotId; } if( rc == 0 && maxSpotId > 0 && (rc = SRAReaderSeekSpot(me, maxSpotId)) == 0 ) { me->maxSpotId = maxSpotId; } } /* reset after seekspot to initial state */ me->spot = 0; } } if( rc != 0 ) { SRAReaderWhack(me); } else { *self = me; } return rc; }
static rc_t SpotIteratorInit(struct SpotIterator* self, const SRATable* tbl, const char* redactFileName) { rc_t rc = 0; assert(self && tbl && redactFileName); memset(self, 0, sizeof *self); self->m_crnSpotId = 1; rc = SRATableMaxSpotId(tbl, &self->m_maxSpotId); if (rc != 0) { logerr(klogErr, rc, "while calling SRATableMaxSpotId"); } else { plogmsg(klogInfo, "MaxSpotId = $(spot)", PLOG_U32(spot), self->m_maxSpotId); } if (rc == 0) { rc = SpotIteratorInitDirectory(); } if (rc == 0) { self->m_filename = redactFileName; plogmsg(klogInfo, "Opening '$(path)'", "path=%s", self->m_filename); rc = KDirectoryOpenFileRead( __SpotIteratorDirectory, &self->m_file, "%s", self->m_filename); if (rc != 0) { plogerr(klogErr, rc, "while opening file '$(path)'", "path=%s", self->m_filename); } } if (rc == 0) { rc = SpotIteratorReadSpotToRedact(self); } return rc; }
/******************************************************************************* * KMain - defined for use with kapp library *******************************************************************************/ rc_t CC KMain ( int argc, char* argv[] ) { rc_t rc = 0; int i; const char* arg; uint64_t total_spots = 0; const SRAMgr* sraMGR = NULL; SRADumperFmt fmt; bool to_stdout = false, do_gzip = false, do_bzip2 = false; char const* outdir = NULL; spotid_t minSpotId = 1; spotid_t maxSpotId = ~0; bool sub_dir = false; bool keep_empty = false; const char* table_path[10240]; int table_path_qty = 0; char const* D_option = NULL; char const* P_option = NULL; char P_option_buffer[4096]; const char* accession = NULL; const char* table_name = NULL; bool spot_group_on = false; int spot_groups = 0; char* spot_group[128] = {NULL}; bool read_filter_on = false; SRAReadFilter read_filter = 0xFF; bool failed_to_open = false; /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */ if ( fasta_dump_requested( argc, argv ) ) { return fasta_dump( argc, argv ); } /* Prepare for the worst: report this information after disaster */ ReportBuildDate ( __DATE__ ); memset( &fmt, 0, sizeof( fmt ) ); rc = SRADumper_Init( &fmt ); if ( rc != 0 ) { LOGERR(klogErr, rc, "formatter initialization"); return 100; } else if ( fmt.get_factory == NULL ) { rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull ); LOGERR( klogErr, rc, "formatter factory" ); return 101; } else { rc = SRADumper_ArgsValidate( argv[0], &fmt ); if ( rc != 0 ) { LOGERR( klogErr, rc, "formatter args list" ); return 102; } } if ( argc < 2 ) { CoreUsage( argv[0], &fmt, true, EXIT_FAILURE ); return 0; } for ( i = 1; i < argc; i++ ) { arg = argv[ i ]; if ( arg[ 0 ] != '-' ) { uint32_t k; for ( k = 0; k < table_path_qty; k++ ) { if ( strcmp( arg, table_path[ k ] ) == 0 ) { break; } } if ( k >= table_path_qty ) { if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) ) { rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient ); goto Catch; } table_path[ table_path_qty++ ] = arg; } continue; } arg = NULL; if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) ) { rc = LogLevelSet( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } } else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) ) { } else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) ) { #if _DEBUGGING rc = KDbgSetString( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } #endif } else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) || SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) ) { CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS ); } else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) ) { HelpVersion ( argv[ 0 ], KAppVersion() ); return 0; } else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) ) { KStsLevelAdjust( 1 ); } else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) ) { LOGMSG( klogErr, "option -D is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) ) { LOGMSG( klogErr, "option -P is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) ) { } else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) ) { } else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) ) { to_stdout = true; } else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) ) { do_gzip = true; } else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) ) { do_bzip2 = true; } else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) ) { } else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) ) { minSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) ) { maxSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) ) { spot_group_on = true; } else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) ) { if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { int f = 0, t = 0; i++; while ( argv[ i ][ t ] != '\0' ) { if ( argv[ i ][ t ] == ',' ) { if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } f = t + 1; } t++; } if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } if ( spot_groups < 1 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } spot_group[ spot_groups ] = NULL; } } else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) ) { read_filter_on = true; if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { i++; if ( read_filter != 0xFF ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate ); PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)", PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( strcasecmp( argv[ i ], "pass" ) == 0 ) { read_filter = SRA_READ_FILTER_PASS; } else if ( strcasecmp( argv[ i ], "reject" ) == 0 ) { read_filter = SRA_READ_FILTER_REJECT; } else if ( strcasecmp( argv[ i ], "criteria" ) == 0 ) { read_filter = SRA_READ_FILTER_CRITERIA; } else if ( strcasecmp( argv[ i ], "redacted" ) == 0 ) { read_filter = SRA_READ_FILTER_REDACTED; } else { /* must be accession */ i--; } } } else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) ) { sub_dir = true; } else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) ) { keep_empty = true; } else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) ) { KConfigDisableUserSettings (); } else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) ) { } else { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } } if ( to_stdout ) { if ( outdir != NULL || sub_dir || keep_empty || spot_group_on || ( read_filter_on && read_filter == 0xFF ) ) { LOGMSG( klogWarn, "stdout mode is set, some options are ignored" ); spot_group_on = false; if ( read_filter == 0xFF ) { read_filter_on = false; } } KOutHandlerSetStdErr(); KStsHandlerSetStdErr(); KLogHandlerSetStdErr(); ( void ) KDbgHandlerSetStdErr(); } if ( do_gzip && do_bzip2 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous ); LOGERR( klogErr, rc, "output compression method" ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( minSpotId > maxSpotId ) { spotid_t temp = maxSpotId; maxSpotId = minSpotId; minSpotId = temp; } if ( table_path_qty == 0 ) { if ( D_option != NULL && D_option[ 0 ] != '\0' ) { /* support deprecated '-D' option */ table_path[ table_path_qty++ ] = D_option; } else if ( accession == NULL || accession[ 0 ] == '\0' ) { /* must have accession to proceed */ rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty ); LOGERR( klogErr, rc, "expected accession" ); goto Catch; } else if ( P_option != NULL && P_option[ 0 ] != '\0' ) { /* support deprecated '-P' option */ i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession ); if ( i < 0 || i >= sizeof( P_option_buffer ) ) { rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive ); LOGERR( klogErr, rc, "path too long" ); goto Catch; } table_path[ table_path_qty++ ] = P_option_buffer; } else { table_path[ table_path_qty++ ] = accession; } } rc = SRAMgrMakeRead( &sraMGR ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to open SRA manager" ); goto Catch; } else { rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to initialize files" ); goto Catch; } } { const VDBManager* vmgr = NULL; rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr ); if ( rc2 != 0 ) { LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" ); } rc2 = ReportSetVDBManager( vmgr ); VDBManagerRelease( vmgr ); } /* loop tables */ for ( i = 0; i < table_path_qty; i++ ) { const SRASplitterFactory* fact_head = NULL; spotid_t smax, smin; SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) ); if ( table_name != NULL ) { rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path):$(table)'", "path=%s,table=%s", table_path[ i ], table_name ) ); continue; } } ReportResetObject( table_path[ i ] ); if ( fmt.table == NULL ) { rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] ); if ( rc != 0 ) { if ( UIError( rc, NULL, NULL ) ) { UITableLOGError( rc, NULL, true ); } else { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path)'", "path=%s", table_path[ i ] ) ); if (GetRCState(rc) == rcNotFound) { failed_to_open = true; } } continue; } } /* infer accession from table_path if missing or more than one table */ fmt.accession = table_path_qty > 1 ? NULL : accession; if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 ) { char * basename; char *ext; size_t l; bool is_url = false; strcpy( P_option_buffer, table_path[ i ] ); basename = strchr ( P_option_buffer, ':' ); if ( basename ) { ++basename; if ( basename [0] == '\0' ) basename = P_option_buffer; else is_url = true; } else basename = P_option_buffer; if ( is_url ) { ext = strchr ( basename, '#' ); if ( ext ) ext[ 0 ] = '\0'; ext = strchr ( basename, '?' ); if ( ext ) ext[ 0 ] = '\0'; } l = strlen( basename ); while ( strchr( "\\/", basename[ l - 1 ] ) != NULL ) { basename[ --l ] = '\0'; } fmt.accession = strrchr( basename, '/' ); if ( fmt.accession++ == NULL ) { fmt.accession = basename; } /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */ ext = strrchr( fmt.accession, '.' ); if ( ext != NULL ) { if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); } if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 ) { *ext = '\0'; } } } } SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) ); rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession ); while ( rc == 0 ) { /* sort out the spot id range */ if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 || ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 ) { break; } { const struct VTable* tbl = NULL; rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl ); if ( rc == 0 ) { rc = rc2; } rc2 = ReportResetTable( table_path[i], tbl ); if ( rc == 0 ) { rc = rc2; } VTableRelease( tbl ); /* SRATableGetVTableRead adds Reference to tbl! */ } /* test if we have to dump anything... */ if ( smax < minSpotId || smin > maxSpotId ) { break; } if ( smax > maxSpotId ) { smax = maxSpotId; } if ( smin < minSpotId ) { smin = minSpotId; } /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */ if ( true ) /* ??? */ { const SRAColumn* c = NULL; nreads_max = NREADS_MAX; rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t ); if ( rc == 0 ) { const INSDC_SRA_platform_id *platform; bitsz_t o, z; rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z ); if ( rc == 0 && platform != NULL ) { if ( *platform != SRA_PLATFORM_PACBIO_SMRT ) { nreads_max = 32; } } SRAColumnRelease( c ); } else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn ) { rc = 0; } } /* table dependent */ rc = fmt.get_factory( &fmt, &fact_head ); if ( rc != 0 ) { break; } if ( fact_head == NULL ) { rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull ); break; } if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) ) { const SRASplitterFactory* f = NULL; rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 && read_filter_on ) { const SRASplitterFactory* f = NULL; rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 ) { /* this filter takes over head of chain to be first and kill off bad NREADS */ const SRASplitterFactory* f = NULL; rc = MaxNReadsValidatorFactory_Make( &f, fmt.table ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } rc = SRASplitterFactory_Init( fact_head ); if ( rc == 0 ) { /* ********************************************************** */ rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head ); /* ********************************************************** */ if ( rc == 0 ) { uint64_t total = 0, file = 0; SRASplitterFactory_FilerReport( &total, &file ); OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] )); if ( to_stdout && total > 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots", PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file )); } total_spots = total; } } break; } SRASplitterFactory_Release( fact_head ); SRATableRelease( fmt.table ); fmt.table = NULL; if ( rc == 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots", PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) ); } else if ( !reportToUser( rc, argv [0 ] ) ) { PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)", PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) ); } } Catch: if ( fmt.release ) { rc_t rr = fmt.release( &fmt ); if ( rr != 0 ) { SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) ); } } for ( i = 0; i < spot_groups; i++ ) { free( spot_group[ i ] ); } SRASplitterFiler_Release(); SRAMgrRelease( sraMGR ); OUTMSG(( "Written %lu spots total\n", total_spots )); if (failed_to_open) { ReportSilence(); } { /* Report execution environment if necessary */ rc_t rc2 = ReportFinalize( rc ); if ( rc == 0 ) { rc = rc2; } } return rc; }