static rc_t copy_file (const KFile * fin, KFile *fout) { rc_t rc; uint8_t buff [64 * 1024]; size_t num_read; uint64_t inpos; uint64_t outpos; assert (fin != NULL); assert (fout != NULL); inpos = 0; outpos = 0; do { rc = KFileRead (fin, inpos, buff, sizeof (buff), &num_read); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to read from directory structure in creating archive at $(P)", PLOG_U64(P), inpos)); break; } else if (num_read > 0) { size_t to_write; inpos += (uint64_t)num_read; STSMSG (2, ("Read %zu bytes to %lu", num_read, inpos)); /* PLOGMSG (klogDebug10, "Read $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_read, inpos); */ to_write = num_read; while (to_write > 0) { size_t num_writ; rc = KFileWrite (fout, outpos, buff, num_read, &num_writ); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to write to archive in creating archive at $(P)", PLOG_U64(P), outpos)); break; } outpos += num_writ; /* PLOGMSG (klogDebug10, "Wrote $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_writ, outpos); */ to_write -= num_writ; } } /* else */ /* PLOGMSG (klogDebug10, "Read $(B) Bytes for $(T)", PLOG_2(PLOG_U64(B),PLOG_U64(T)), num_read, inpos); */ if (rc != 0) break; } while (num_read != 0); return rc; }
static rc_t copy_file (const KFile * fin, KFile *fout) { rc_t rc; uint8_t buff [64 * 1024]; size_t num_read; uint64_t inpos; uint64_t outpos; uint64_t fsize; assert (fin != NULL); assert (fout != NULL); inpos = 0; outpos = 0; rc = KFileSize (fin, &fsize); if (rc != 0) return rc; do { rc = KFileRead (fin, inpos, buff, sizeof (buff), &num_read); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to read from directory structure in creating archive at $(P)", PLOG_U64(P), inpos)); break; } else if (num_read > 0) { size_t to_write; inpos += (uint64_t)num_read; to_write = num_read; while (to_write > 0) { size_t num_writ; rc = KFileWrite (fout, outpos, buff, num_read, &num_writ); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to write to archive in creating archive at $(P)", PLOG_U64(P), outpos)); break; } outpos += num_writ; to_write -= num_writ; } } if (rc != 0) break; } while (num_read != 0); return rc; }
static rc_t MaxNReadsValidator_GetKey( const SRASplitter* cself, const char** key, spotid_t spot, readmask_t* readmask ) { rc_t rc = 0; MaxNReadsValidator* self = ( MaxNReadsValidator* )cself; if ( self == NULL || key == NULL ) { rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull ); } else { const void* nreads = NULL; bitsz_t o = 0, sz = 0; uint64_t nn = 0; *key = ""; if ( self->col != NULL ) { rc = SRAColumnRead( self->col, spot, &nreads, &o, &sz ); if ( rc == 0 ) { switch( sz ) { case 8: nn = *((const uint8_t*)nreads); break; case 16: nn = *((const uint16_t*)nreads); break; case 32: nn = *((const uint32_t*)nreads); break; case 64: nn = *((const uint64_t*)nreads); break; default: rc = RC( rcSRA, rcNode, rcExecuting, rcData, rcUnexpected ); break; } if ( nn > nreads_max ) { clear_readmask( readmask ); PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), maximum $(max) supported, skipped", PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn, spot, nreads_max)); } else if ( nn == nreads_max - 1 ) { PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), truncated to $(max)", PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn + 1, spot, nreads_max)); } } } } return rc; }
static rc_t copy_file_skey_md5_kludge (const KFile * fin, KFile *fout) { /* size of HEX digest plus spzce plus * */ #define READ_SIZE 34 static const uint8_t skey[] = "skey\n"; uint8_t buff [256]; uint64_t tot_read, tot_writ; size_t num_read, num_writ; rc_t rc; assert (fin); assert (fout); for (tot_read = 0 ; tot_read < READ_SIZE; tot_read += num_read) { rc = KFileRead (fin, tot_read, buff, READ_SIZE - tot_read, &num_read); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to read from directory structure in creating archive at $(P)", PLOG_U64(P), tot_read)); break; } if (num_read == 0) break; } if (rc == 0) { if (tot_read == READ_SIZE) { memcpy (buff + READ_SIZE, skey, sizeof (skey)); tot_read += sizeof (skey) - 1; } for (tot_writ = 0; tot_writ < tot_read; tot_writ += num_writ) { rc = KFileWrite (fout, tot_writ, buff + tot_writ, (uint32_t)(tot_read - tot_writ), &num_writ); if (rc != 0) { PLOGERR (klogErr, (klogErr, rc, "Failed to write to archive in creating archive at $(P)", PLOG_U64(P), num_writ)); break; } } } return rc; }
/** Read a character from input file */ static rc_t SpotIteratorFileReadCharWithEof(SpotIterator* self, char* buffer) { rc_t rc = 0; size_t num_read = 0; assert(self); /* get back the saved character */ if (self->hasCh) { buffer[0] = self->ch; self->hasCh = false; } else { rc = KFileRead(self->file, self->filePos, buffer, 1, &num_read); if (rc == 0) { if (num_read == 0) { self->eof = true; } else { self->filePos += num_read; } } else { PLOGERR(klogErr, (klogErr, rc, "on line $(lineno) while reading file '$(path)'", PLOG_U64(lineno) ",path=%s", self->line, self->filename)); } } return rc; }
/** Read a character from the input file */ static rc_t SpotIteratorFileReadWithEof(struct SpotIterator* self, void* buffer, size_t bsize) { rc_t rc = 0; size_t num_read = 0; assert(self); rc = KFileRead(self->m_file, self->m_filePos, buffer, bsize, &num_read); if (rc == 0) { if (num_read == 0) { self->m_eof = true; } else { self->m_filePos += num_read; } } else { plogerr(klogErr, rc, "on line $(lineno) while reading file '$(path)'", PLOG_U64(lineno) ",path=%s", self->m_line, self->m_filename); } return rc; }
rc_t WriteFileMeta(SIndexObj* obj) { rc_t rc = 0; KMDataNode* nd = NULL; PLOGMSG(klogInfo, (klogInfo, "Meta $(f) on index $(i): file size $(s), buffer $(b)", PLOG_4(PLOG_S(f),PLOG_S(i),PLOG_U64(s),PLOG_U32(b)), obj->file, obj->index, obj->file_size, obj->buffer_sz)); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Format")) == 0 ) { KMDataNode* opt = NULL; rc = KMDataNodeWriteCString(nd, obj->format); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(nd, &opt, "Options")) == 0 ) { KMDataNode* ond = NULL; if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "accession")) == 0 ) { rc = KMDataNodeWriteCString(ond, g_accession); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "minSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->minSpotId); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "maxSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->maxSpotId); KMDataNodeRelease(ond); } KMDataNodeRelease(opt); } KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Size")) == 0 ) { rc = KMDataNodeWriteB64(nd, &obj->file_size); KMDataNodeRelease(nd); } if( rc == 0 && obj->buffer_sz > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Buffer")) == 0 ) { rc = KMDataNodeWriteB32(nd, &obj->buffer_sz); KMDataNodeRelease(nd); } if( rc == 0 && strlen(obj->index) > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Index")) == 0 ) { rc = KMDataNodeWriteCString(nd, obj->index); KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "md5")) == 0 ) { char x[5]; int i; for( i = 0; rc == 0 && i < sizeof(obj->md5_digest); i++ ) { int l = snprintf(x, 4, "%02x", obj->md5_digest[i]); rc = KMDataNodeAppend(nd, x, l); } KMDataNodeRelease(nd); } return rc; }
/** Get next spot from input file */ static rc_t SpotIteratorReadSpotToRedact(SpotIterator* self) { rc_t rc = 0; assert(self); while (rc == 0 && ! self->eof) { rc = SpotIteratorReadLine(self); /* skip empty lines */ if ((rc == 0) && (self->inBuffer > 0)) { spotid_t spot = 0; /* make sure the line contains digits only */ int i = 0; for (i = 0; i < self->inBuffer; ++i) { if (!isdigit(self->buffer[i])) { rc = RC(rcExe, rcFile, rcReading, rcChar, rcUnexpected); PLOGERR(klogErr, (klogErr, rc, "character '$(char)' on line $(lineno)" " while reading file '$(path)': '$(line)'", "char=%c," PLOG_U64(lineno) ",path=%s,line=%s", self->buffer[i], self->line, self->filename, self->buffer)); return rc; } } sscanf(self->buffer, "%ld", &spot); if (spot == 0) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "bad spot id '0' on line $(lineno) " "while reading file '$(path)': '$(line)'", PLOG_U64(lineno) ",path=%s,line=%s", self->line, self->filename, self->buffer)); } else if (spot == self->spotToReduct) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "duplicated spot id '$(spot)' " "on line $(lineno) while reading file '$(path)': '$(line)'", PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s,line=%s", spot, self->line, self->filename, self->buffer)); } else if (spot < self->spotToReduct) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "File '$(path)' is unsorted. " "$(id) < $(last). See line $(lineno): '$(line)'", "path=%s," PLOG_U32(id) "," PLOG_U32(last) "," PLOG_U64(lineno) ",line=%s", self->filename, spot, self->spotToReduct, self->line, self->buffer)); } else if (spot > self->maxSpotId) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "spotId $(spot) on line $(lineno) " "of file '$(path)' is bigger that the max spotId $(max): " "'$(line)'", PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s," PLOG_U32(max) ",line=%s", spot, self->line, self->filename, self->maxSpotId, self->buffer)); } else { self->spotToReduct = spot; self->inBuffer = 0; } break; } } return rc; }
/** Read a line from input file */ static rc_t SpotIteratorReadLine(SpotIterator* self) { rc_t rc = 0; bool overflow = false; /* input buffer overflow */ /* to skip leading/traling spaces */ enum ELane { eBefore, eIn, eAfter } state = eBefore; if (self->eof) { return rc; } assert(self); ++self->line; self->inBuffer = 0; while (!overflow && !self->eof) /* do until in-buffer overflow or EOF*/{ char ch = 0; /* get next characted */ if ((rc = SpotIteratorFileReadCharWithEof(self, &ch)) != 0) { return rc; } if (!self->eof) { /* treat leading/trailing spaces */ switch (state) { /* skip leading spaces */ case eBefore: if (isblank(ch)) { continue; } else { state = eIn; } break; case eIn: if (isblank(ch)) { state = eAfter; continue; } break; /* skip trailing spaces */ case eAfter: if (isblank(ch)) { continue; } else if (ch != '\n' && ch != '\r') { rc = RC(rcExe, rcFile, rcReading, rcChar, rcUnexpected); PLOGERR(klogErr, (klogErr, rc, "bad symbol '$(char)' " "on line $(lineno) in file '$(path)': '$(line)...'", "char=%c," PLOG_U64(lineno) ",path=%s,line=%s", ch, self->line, self->filename, self->buffer)) ; return rc; } break; } /* add next non-blank characted */ overflow = !SpotIteratorBufferAdd(self, ch); if (!overflow) { /* all combinations as "\r", "\n", "\r\n", "\n\r" are considered as EOL */ if (ch == '\n' || ch == '\r') { char c1 = 0; if ((rc = SpotIteratorFileReadCharWithEof(self, &c1)) != 0) { return rc; } if (self->eof) { break; } else if ((c1 != '\n' && c1 != '\r') || (ch == c1)) { /* save the character when EOL is a single character (WINDOWS): will be get back in SpotIteratorFileReadCharWithEof */ self->ch = c1; self->hasCh = true; break; } else { overflow = !SpotIteratorBufferAdd(self, c1); break; } } } } } /* remove EOL */ if (!overflow) { bool done = false; while (self->inBuffer > 0 && !done) { switch (self->buffer[self->inBuffer - 1]) { case '\n': case '\r': self->buffer[--self->inBuffer] = '\0'; break; default: done = true; break; } } } else { rc = RC(rcExe, rcFile, rcReading, rcString, rcTooLong); PLOGERR(klogErr, (klogErr, rc, "on line $(lineno) while reading file '$(path)': '$(line)...'", PLOG_U64(lineno) ",path=%s,line=%s", self->line, self->filename, self->buffer)); } return rc; }
/******************************************************************************* * KMain - defined for use with kapp library *******************************************************************************/ rc_t CC KMain ( int argc, char* argv[] ) { rc_t rc = 0; int i; const char* arg; uint64_t total_spots = 0; const SRAMgr* sraMGR = NULL; SRADumperFmt fmt; bool to_stdout = false, do_gzip = false, do_bzip2 = false; char const* outdir = NULL; spotid_t minSpotId = 1; spotid_t maxSpotId = ~0; bool sub_dir = false; bool keep_empty = false; const char* table_path[10240]; int table_path_qty = 0; char const* D_option = NULL; char const* P_option = NULL; char P_option_buffer[4096]; const char* accession = NULL; const char* table_name = NULL; bool spot_group_on = false; int spot_groups = 0; char* spot_group[128] = {NULL}; bool read_filter_on = false; SRAReadFilter read_filter = 0xFF; bool failed_to_open = false; /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */ if ( fasta_dump_requested( argc, argv ) ) { return fasta_dump( argc, argv ); } /* Prepare for the worst: report this information after disaster */ ReportBuildDate ( __DATE__ ); memset( &fmt, 0, sizeof( fmt ) ); rc = SRADumper_Init( &fmt ); if ( rc != 0 ) { LOGERR(klogErr, rc, "formatter initialization"); return 100; } else if ( fmt.get_factory == NULL ) { rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull ); LOGERR( klogErr, rc, "formatter factory" ); return 101; } else { rc = SRADumper_ArgsValidate( argv[0], &fmt ); if ( rc != 0 ) { LOGERR( klogErr, rc, "formatter args list" ); return 102; } } if ( argc < 2 ) { CoreUsage( argv[0], &fmt, true, EXIT_FAILURE ); return 0; } for ( i = 1; i < argc; i++ ) { arg = argv[ i ]; if ( arg[ 0 ] != '-' ) { uint32_t k; for ( k = 0; k < table_path_qty; k++ ) { if ( strcmp( arg, table_path[ k ] ) == 0 ) { break; } } if ( k >= table_path_qty ) { if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) ) { rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient ); goto Catch; } table_path[ table_path_qty++ ] = arg; } continue; } arg = NULL; if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) ) { rc = LogLevelSet( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } } else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) ) { } else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) ) { #if _DEBUGGING rc = KDbgSetString( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } #endif } else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) || SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) ) { CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS ); } else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) ) { HelpVersion ( argv[ 0 ], KAppVersion() ); return 0; } else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) ) { KStsLevelAdjust( 1 ); } else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) ) { LOGMSG( klogErr, "option -D is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) ) { LOGMSG( klogErr, "option -P is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) ) { } else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) ) { } else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) ) { to_stdout = true; } else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) ) { do_gzip = true; } else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) ) { do_bzip2 = true; } else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) ) { } else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) ) { minSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) ) { maxSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) ) { spot_group_on = true; } else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) ) { if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { int f = 0, t = 0; i++; while ( argv[ i ][ t ] != '\0' ) { if ( argv[ i ][ t ] == ',' ) { if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } f = t + 1; } t++; } if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } if ( spot_groups < 1 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } spot_group[ spot_groups ] = NULL; } } else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) ) { read_filter_on = true; if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { i++; if ( read_filter != 0xFF ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate ); PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)", PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( strcasecmp( argv[ i ], "pass" ) == 0 ) { read_filter = SRA_READ_FILTER_PASS; } else if ( strcasecmp( argv[ i ], "reject" ) == 0 ) { read_filter = SRA_READ_FILTER_REJECT; } else if ( strcasecmp( argv[ i ], "criteria" ) == 0 ) { read_filter = SRA_READ_FILTER_CRITERIA; } else if ( strcasecmp( argv[ i ], "redacted" ) == 0 ) { read_filter = SRA_READ_FILTER_REDACTED; } else { /* must be accession */ i--; } } } else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) ) { sub_dir = true; } else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) ) { keep_empty = true; } else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) ) { KConfigDisableUserSettings (); } else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) ) { } else { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } } if ( to_stdout ) { if ( outdir != NULL || sub_dir || keep_empty || spot_group_on || ( read_filter_on && read_filter == 0xFF ) ) { LOGMSG( klogWarn, "stdout mode is set, some options are ignored" ); spot_group_on = false; if ( read_filter == 0xFF ) { read_filter_on = false; } } KOutHandlerSetStdErr(); KStsHandlerSetStdErr(); KLogHandlerSetStdErr(); ( void ) KDbgHandlerSetStdErr(); } if ( do_gzip && do_bzip2 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous ); LOGERR( klogErr, rc, "output compression method" ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( minSpotId > maxSpotId ) { spotid_t temp = maxSpotId; maxSpotId = minSpotId; minSpotId = temp; } if ( table_path_qty == 0 ) { if ( D_option != NULL && D_option[ 0 ] != '\0' ) { /* support deprecated '-D' option */ table_path[ table_path_qty++ ] = D_option; } else if ( accession == NULL || accession[ 0 ] == '\0' ) { /* must have accession to proceed */ rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty ); LOGERR( klogErr, rc, "expected accession" ); goto Catch; } else if ( P_option != NULL && P_option[ 0 ] != '\0' ) { /* support deprecated '-P' option */ i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession ); if ( i < 0 || i >= sizeof( P_option_buffer ) ) { rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive ); LOGERR( klogErr, rc, "path too long" ); goto Catch; } table_path[ table_path_qty++ ] = P_option_buffer; } else { table_path[ table_path_qty++ ] = accession; } } rc = SRAMgrMakeRead( &sraMGR ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to open SRA manager" ); goto Catch; } else { rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to initialize files" ); goto Catch; } } { const VDBManager* vmgr = NULL; rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr ); if ( rc2 != 0 ) { LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" ); } rc2 = ReportSetVDBManager( vmgr ); VDBManagerRelease( vmgr ); } /* loop tables */ for ( i = 0; i < table_path_qty; i++ ) { const SRASplitterFactory* fact_head = NULL; spotid_t smax, smin; SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) ); if ( table_name != NULL ) { rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path):$(table)'", "path=%s,table=%s", table_path[ i ], table_name ) ); continue; } } ReportResetObject( table_path[ i ] ); if ( fmt.table == NULL ) { rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] ); if ( rc != 0 ) { if ( UIError( rc, NULL, NULL ) ) { UITableLOGError( rc, NULL, true ); } else { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path)'", "path=%s", table_path[ i ] ) ); if (GetRCState(rc) == rcNotFound) { failed_to_open = true; } } continue; } } /* infer accession from table_path if missing or more than one table */ fmt.accession = table_path_qty > 1 ? NULL : accession; if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 ) { char * basename; char *ext; size_t l; bool is_url = false; strcpy( P_option_buffer, table_path[ i ] ); basename = strchr ( P_option_buffer, ':' ); if ( basename ) { ++basename; if ( basename [0] == '\0' ) basename = P_option_buffer; else is_url = true; } else basename = P_option_buffer; if ( is_url ) { ext = strchr ( basename, '#' ); if ( ext ) ext[ 0 ] = '\0'; ext = strchr ( basename, '?' ); if ( ext ) ext[ 0 ] = '\0'; } l = strlen( basename ); while ( strchr( "\\/", basename[ l - 1 ] ) != NULL ) { basename[ --l ] = '\0'; } fmt.accession = strrchr( basename, '/' ); if ( fmt.accession++ == NULL ) { fmt.accession = basename; } /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */ ext = strrchr( fmt.accession, '.' ); if ( ext != NULL ) { if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); } if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 ) { *ext = '\0'; } } } } SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) ); rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession ); while ( rc == 0 ) { /* sort out the spot id range */ if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 || ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 ) { break; } { const struct VTable* tbl = NULL; rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl ); if ( rc == 0 ) { rc = rc2; } rc2 = ReportResetTable( table_path[i], tbl ); if ( rc == 0 ) { rc = rc2; } VTableRelease( tbl ); /* SRATableGetVTableRead adds Reference to tbl! */ } /* test if we have to dump anything... */ if ( smax < minSpotId || smin > maxSpotId ) { break; } if ( smax > maxSpotId ) { smax = maxSpotId; } if ( smin < minSpotId ) { smin = minSpotId; } /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */ if ( true ) /* ??? */ { const SRAColumn* c = NULL; nreads_max = NREADS_MAX; rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t ); if ( rc == 0 ) { const INSDC_SRA_platform_id *platform; bitsz_t o, z; rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z ); if ( rc == 0 && platform != NULL ) { if ( *platform != SRA_PLATFORM_PACBIO_SMRT ) { nreads_max = 32; } } SRAColumnRelease( c ); } else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn ) { rc = 0; } } /* table dependent */ rc = fmt.get_factory( &fmt, &fact_head ); if ( rc != 0 ) { break; } if ( fact_head == NULL ) { rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull ); break; } if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) ) { const SRASplitterFactory* f = NULL; rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 && read_filter_on ) { const SRASplitterFactory* f = NULL; rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 ) { /* this filter takes over head of chain to be first and kill off bad NREADS */ const SRASplitterFactory* f = NULL; rc = MaxNReadsValidatorFactory_Make( &f, fmt.table ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } rc = SRASplitterFactory_Init( fact_head ); if ( rc == 0 ) { /* ********************************************************** */ rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head ); /* ********************************************************** */ if ( rc == 0 ) { uint64_t total = 0, file = 0; SRASplitterFactory_FilerReport( &total, &file ); OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] )); if ( to_stdout && total > 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots", PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file )); } total_spots = total; } } break; } SRASplitterFactory_Release( fact_head ); SRATableRelease( fmt.table ); fmt.table = NULL; if ( rc == 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots", PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) ); } else if ( !reportToUser( rc, argv [0 ] ) ) { PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)", PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) ); } } Catch: if ( fmt.release ) { rc_t rr = fmt.release( &fmt ); if ( rr != 0 ) { SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) ); } } for ( i = 0; i < spot_groups; i++ ) { free( spot_group[ i ] ); } SRASplitterFiler_Release(); SRAMgrRelease( sraMGR ); OUTMSG(( "Written %lu spots total\n", total_spots )); if (failed_to_open) { ReportSilence(); } { /* Report execution environment if necessary */ rc_t rc2 = ReportFinalize( rc ); if ( rc == 0 ) { rc = rc2; } } return rc; }
rc_t txt2kdb_io() { rc_t rc = 0; uint64_t rowid = 1; uint64_t tix = 0; KColumnBlob * blob; bool blobopen = false; while (rc == 0) { size_t num_read; uint8_t buffer [4096]; uint8_t * limit; uint8_t * append_start = buffer; uint8_t * cursor = buffer; bool eol = true; /* quit if we are already past the end of the range */ if ( ! rowid_upper_range(rowid)) break; /* read a buffer full. It may straddle rows. */ rc = KFileRead (G.txt, tix, buffer, sizeof buffer, &num_read); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Read failed starting $(P)", PLOG_U64(P), tix)); break; } /* break at EOF */ if (num_read == 0) break; /* scan across the buffer looking for lines */ for (limit = buffer + num_read; cursor < limit; append_start = cursor) { /* if we are at the beginning of a line (end of previous line or start of first */ if (eol) { /* if we are within the pass thru range create a blob */ if (rowid_lower_range(rowid) && rowid_upper_range(rowid)) { rc = KColumnCreateBlob (G.col, &blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to create Blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); continue; } blobopen = true; } /* clear the flag */ eol = false; } /* this blob append will go until end of buffer or end of line */ for ( ; cursor < limit; ++ cursor, ++tix) { /* if we hit a NewLine flag it and break for append */ if (*cursor == '\n') { eol = true; ++cursor; ++tix; break; } } /* if we are within the selected range append this to the open blob * ir might be the first append, a middle append, a last append or only append */ if (blobopen) { rc = KColumnBlobAppend (blob, append_start, cursor - append_start); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to append Blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } } /* if we hit a NewLine and are within range we will close this blob */ if (eol) { if (blobopen) { /* single row blobs */ rc = KColumnBlobAssignRange (blob, rowid, 1); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } rc = KColumnBlobCommit (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } rc = KColumnBlobRelease (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to release blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } blobopen = false; } ++rowid; if ( ! rowid_upper_range (rowid)) break; } } } /* if not in an error state and the last line was unterminated close the blob */ if ((rc == 0) && blobopen) { rc = KColumnBlobAssignRange (blob, rowid, 1); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); } else { rc = KColumnBlobCommit (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); } } KColumnBlobRelease (blob); } return rc; }