rc_t IlluminaSpot_Add(IlluminaSpot* spot, const pstring* name, const pstring* barcode, const IlluminaRead* read) { rc_t rc = 0; if( spot->nreads == 0 ) { rc = IlluminaSpot_Set(spot, spot->nreads++, name, barcode, read); } else if( pstring_cmp(spot->name, name) == 0 ) { /* look if same read_id was already seen in this spot */ int32_t k; for(k = 0; k < spot->nreads; k++) { if( spot->reads[k].read_id == read->read_id ) { const char* field; rc = IlluminaSpot_Append(spot, k, barcode, read, &field); if( GetRCState(rc) == rcDuplicate && read->read_id == ILLUMINAWRITER_READID_NONE ) { /* may be it is the case when readids are missing on defline and these are separate reads */ k = spot->nreads + 1; rc = 0; } else if( rc != 0 ) { PLOGERR(klogErr, (klogErr, rc, "$(field) for spot '$(s)'", PLOG_2(PLOG_S(field),PLOG_S(s)), field, spot->name->data)); } break; } } if( rc == 0 && k >= spot->nreads ) { /* read was not found, adddind new read to this spot */ rc = IlluminaSpot_Set(spot, spot->nreads++, name, barcode, read); } } else { rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcIgnored); } return rc; }
rc_t ProcessOneDo (ProcessOne * self) { static const char F[] = PLOG_2(PLOG_S(p),PLOG_S(t)); enum KPathType type; rc_t rc = 0; type = KDirectoryPathType (self->dir, self->path); switch (type) { case kptFile: rc = ProcessOneDoFile (self); break; /* case kptDir: */ /* break; */ /* case kptAlias|kptFile: */ /* break; */ /* case kptAlias|kptDir: */ /* break; */ default: PLOGMSG (klogInfo, "+ Skipping $(p) of type $(t)", F, self->path, typeToString(type)); break; } return rc; }
static rc_t SFFLoaderFmtReadDataHeader(SFFLoaderFmt* self, const SRALoaderFile* file) { rc_t rc = 0; uint16_t head_sz = 0; /* Make sure the entire fixed portion of Read Header section is in the file buffer window */ if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFReadHeader_size, "read header", false)) != 0 ) { return rc; } memcpy(&self->read_header, self->file_buf, SFFReadHeader_size); #if __BYTE_ORDER == __LITTLE_ENDIAN self->read_header.header_length = bswap_16(self->read_header.header_length); self->read_header.name_length = bswap_16(self->read_header.name_length); self->read_header.number_of_bases = bswap_32(self->read_header.number_of_bases); self->read_header.clip_quality_left = bswap_16(self->read_header.clip_quality_left); self->read_header.clip_quality_right = bswap_16(self->read_header.clip_quality_right); self->read_header.clip_adapter_left = bswap_16(self->read_header.clip_adapter_left); self->read_header.clip_adapter_right = bswap_16(self->read_header.clip_adapter_right); #endif head_sz = SFFReadHeader_size + self->read_header.name_length; head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0; if( head_sz != self->read_header.header_length ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid); SRALoaderFile_LOG(file, klogErr, rc, "read header length $(h) != $(s)", PLOG_2(PLOG_U16(h),PLOG_U16(s)), self->header.header_length, head_sz); return rc; } /* read name */ self->file_advance = SFFReadHeader_size; if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFReadHeader_size, "read header", false)) != 0) { return rc; } self->file_advance = head_sz - SFFReadHeader_size; if( (rc = pstring_assign(&self->name, self->file_buf, self->read_header.name_length)) != 0 ) { SRALoaderFile_LOG(file, klogErr, rc, "copying read name", NULL); } return rc; }
static void CC FGroupMAP_Validate( BSTNode *n, void *data ) { const FGroupMAP* g = (const FGroupMAP*)n; rc_t rc = 0, *rc_out = (rc_t*)data; const char* rnm = NULL, *mnm = NULL; rc = FGroupKey_Validate(&g->key); if( g->seq != NULL ) { CGLoaderFile_Filename(g->seq, &rnm); rnm = rnm ? strrchr(rnm, '/') : rnm; DEBUG_MSG(5, (" READS(%s)", rnm)); } if( g->align ) { CGLoaderFile_Filename(g->align, &mnm); mnm = mnm ? strrchr(mnm, '/') : mnm; DEBUG_MSG(5, (" MAPPINGS(%s)", mnm)); } DEBUG_MSG(5, ("\n")); if( rc == 0 && g->seq == NULL ) { rc = RC(rcExe, rcQueue, rcValidating, rcItem, rcIncomplete); } /* THIS USED TO WIPE OUT THE "rc" ON EACH ENTRY */ if( rc != 0) { PLOGERR(klogErr, (klogErr, rc, "file pair $(f1)[mandatory], $(f2)[optional]", PLOG_2(PLOG_S(f1),PLOG_S(f2)), rnm, mnm)); if ( * rc_out == 0 ) *rc_out = rc; #if 0 } else { *rc_out = RC(0, 0, 0, 0, 0); #endif } }
static rc_t SFFLoaderFmtReadCommonHeader(SFFLoaderFmt* self, const SRALoaderFile *file) { rc_t rc = 0; bool skiped_idx_pad = false; uint16_t head_sz; SFFCommonHeader prev_head; pstring prev_flow_chars; pstring prev_key_seq; if( (rc = SRALoaderFile_Offset(file, &self->index_correction)) != 0 ) { SRALoaderFile_LOG(file, klogErr, rc, "Reading initial file position", NULL); return rc; } SkipIndexPad: self->index_correction += self->file_advance; if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFCommonHeader_size, NULL, true)) != 0) { SRALoaderFile_LOG(file, klogErr, rc, "common header, needed $(needed) bytes", PLOG_U32(needed), SFFCommonHeader_size); return rc; } if( self->header.magic_number != 0 ) { /* next file in stream, remember prev to sync to each */ memcpy(&prev_head, &self->header, sizeof(SFFCommonHeader)); pstring_copy(&prev_flow_chars, &self->flow_chars); pstring_copy(&prev_key_seq, &self->key_seq); } else { prev_head.magic_number = 0; prev_head.index_length = 0; } memcpy(&self->header, self->file_buf, SFFCommonHeader_size); #if __BYTE_ORDER == __LITTLE_ENDIAN self->header.magic_number = bswap_32(self->header.magic_number); self->header.version = bswap_32(self->header.version); self->header.index_offset = bswap_64(self->header.index_offset); self->header.index_length = bswap_32(self->header.index_length); self->header.number_of_reads = bswap_32(self->header.number_of_reads); self->header.header_length = bswap_16(self->header.header_length); self->header.key_length = bswap_16(self->header.key_length); self->header.num_flows_per_read = bswap_16(self->header.num_flows_per_read); #endif if( self->header.magic_number != (('.'<<24)|('s'<<16)|('f'<<8)|('f'<<0)) ) { if( !skiped_idx_pad && prev_head.magic_number != 0 ) { /* possible concatination of 2 files with index at EOF and padded to 8 bytes with header values not padded, try skipping padding and reread */ uint32_t pad = 8 - prev_head.index_length % 8; if( pad != 0 ) { self->file_advance += pad; DEBUG_MSG(5, ("%s: trying to skip over %u bytes index section padding\n", self->file_name, pad)); skiped_idx_pad = true; goto SkipIndexPad; } } rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized); SRALoaderFile_LOG(file, klogErr, rc, "magic number: $(m)", PLOG_U32(m), self->header.magic_number); return rc; } if( self->header.version != 1 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcBadVersion); SRALoaderFile_LOG(file, klogErr, rc, "format version $(v)", PLOG_U32(v), self->header.version); return rc; } if( self->header.flowgram_format_code != SFFFormatCodeUI16Hundreths ) { /* NOTE: add a case here if flowgram coding gets new version to support different */ rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported); SRALoaderFile_LOG(file, klogErr, rc, "common header flowgram format code", NULL); return rc; } if( self->header.index_length % 8 != 0 ) { DEBUG_MSG(5, ("%s: index_length field value is not 8 byte padded: %u\n", self->file_name, self->header.index_length)); } head_sz = SFFCommonHeader_size + self->header.num_flows_per_read + self->header.key_length; head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0; if( head_sz != self->header.header_length ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid); SRALoaderFile_LOG(file, klogErr, rc, "header length $(h) <> $(s) ", PLOG_2(PLOG_U16(h),PLOG_U16(s)), self->header.header_length, head_sz); return rc; } /* read flow chars and key */ self->file_advance = SFFCommonHeader_size; if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFCommonHeader_size, "common header", false)) != 0) { return rc; } self->file_advance = head_sz - SFFCommonHeader_size; if( (rc = pstring_assign(&self->flow_chars, self->file_buf, self->header.num_flows_per_read)) != 0 || (rc = pstring_assign(&self->key_seq, self->file_buf + self->header.num_flows_per_read, self->header.key_length)) != 0 ) { SRALoaderFile_LOG(file, klogErr, rc, "reading flows/key sequence", NULL); return rc; } if( prev_head.magic_number != 0 ) { /* next file's common header must match previous file's common header, partially */ if( prev_head.key_length != self->header.key_length || prev_head.num_flows_per_read != self->header.num_flows_per_read || pstring_cmp(&prev_flow_chars, &self->flow_chars) != 0 || pstring_cmp(&prev_key_seq, &self->key_seq) != 0 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInconsistent); SRALoaderFile_LOG(file, klogErr, rc, "previous file common header differ in flows/key sequence", NULL); } } if( rc == 0 ) { if( self->w454 ) { rc = SRAWriter454_WriteHead(self->w454, &self->flow_chars, &self->key_seq); } else { rc = SRAWriterIonTorrent_WriteHead(self->wIonTorrent, &self->flow_chars, &self->key_seq); } } return rc; }
static rc_t CGWriterAlgn_Save(CGWriterAlgn *const self, TReadsData *const rd, TableWriterAlgn const *const writer, uint32_t const mate, int64_t *const rowid) { rc_t rc = 0; TMappingsData_map *const map = &self->data.map[mate]; if( !map->saved ) { CGWriterAlgn_match *const match = &self->match[mate]; TableWriterAlgnData *const algn = &self->algn[mate]; uint32_t g = 0; uint32_t* cigar = NULL; uint32_t left_cigar15 []={ 5 << 4, 0, 10 << 4, 0, 10 << 4, 0,10 << 4 }; uint32_t right_cigar15[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 5 << 4 }; uint32_t left_cigar25 []={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 }; uint32_t right_cigar25[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 }; uint32_t *left_cigar = NULL; uint32_t *right_cigar = NULL; uint32_t cg_reads_ngaps = 0; const char *read = NULL; uint32_t read_len = 0; assert(rd); cg_reads_ngaps = get_cg_reads_ngaps(rd->reads_format); read_len = rd->seq.spot_len / 2; if (cg_reads_ngaps == 3) { left_cigar = left_cigar15; right_cigar = right_cigar15; } else if (cg_reads_ngaps == 2) { left_cigar = left_cigar25; right_cigar = right_cigar25; } else { assert(0); } if (match->seq_read_id == 2) { read = &((const char*)(rd->seq.sequence.buffer))[read_len]; cigar = right_cigar; g = read_len; } else { read = rd->seq.sequence.buffer; cigar = left_cigar; g = 0; } if (match->ref_orientation) { if( rd->reverse[g] == '\0' ) { if( (rc = DNAReverseCompliment(read, &rd->reverse[g], read_len)) != 0) { return rc; } DEBUG_MSG(10, ("'%.*s' -> cg_eRevDnbStrand: '%.*s'\n", read_len, read, read_len, &rd->reverse[g])); } read = &rd->reverse[g]; cigar = (cigar == left_cigar) ? right_cigar : left_cigar; } for(g = 0; g < cg_reads_ngaps; g++) { if( map->gap[g] > 0 ) { cigar[g * 2 + 1] = (map->gap[g] << 4) | 3; /* 'xN' */ } else if( map->gap[g] < 0 ) { cigar[g * 2 + 1] = (-map->gap[g] << 4) | 9; /* 'xB' */ } else { cigar[g * 2 + 1] = 0; /* '0M' */ } } algn->ploidy = 0; if( (rc = ReferenceMgr_Compress(self->rmgr, ewrefmgr_cmp_Binary, map->chr, map->offset, read, read_len, cigar, 7, 0, NULL, 0, 0, NULL, 0, NCBI_align_ro_complete_genomics, algn)) != 0 ) { PLOGERR(klogErr, (klogErr, rc, "compression failed $(id) $(o)", PLOG_2(PLOG_S(id),PLOG_I32(o)), map->chr, map->offset)); } else { #if 1 /* this is to try represent these alignments as unmated to match cgatools * axf uses the row length of MATE_REF_ORIENTATION as the indicator of * mate presence */ unsigned const save = algn->mate_ref_orientation.elements; if (map->mate == mate) algn->mate_ref_orientation.elements = 0; rc = TableWriterAlgn_Write(writer, algn, rowid); if (map->mate == mate) algn->mate_ref_orientation.elements = save; #else rc = TableWriterAlgn_Write(writer, algn, rowid); #endif map->saved = true; } } return rc; }
/******************************************************************************* * KMain - defined for use with kapp library *******************************************************************************/ rc_t CC KMain ( int argc, char* argv[] ) { rc_t rc = 0; int i; const char* arg; uint64_t total_spots = 0; const SRAMgr* sraMGR = NULL; SRADumperFmt fmt; bool to_stdout = false, do_gzip = false, do_bzip2 = false; char const* outdir = NULL; spotid_t minSpotId = 1; spotid_t maxSpotId = ~0; bool sub_dir = false; bool keep_empty = false; const char* table_path[10240]; int table_path_qty = 0; char const* D_option = NULL; char const* P_option = NULL; char P_option_buffer[4096]; const char* accession = NULL; const char* table_name = NULL; bool spot_group_on = false; int spot_groups = 0; char* spot_group[128] = {NULL}; bool read_filter_on = false; SRAReadFilter read_filter = 0xFF; bool failed_to_open = false; /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */ if ( fasta_dump_requested( argc, argv ) ) { return fasta_dump( argc, argv ); } /* Prepare for the worst: report this information after disaster */ ReportBuildDate ( __DATE__ ); memset( &fmt, 0, sizeof( fmt ) ); rc = SRADumper_Init( &fmt ); if ( rc != 0 ) { LOGERR(klogErr, rc, "formatter initialization"); return 100; } else if ( fmt.get_factory == NULL ) { rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull ); LOGERR( klogErr, rc, "formatter factory" ); return 101; } else { rc = SRADumper_ArgsValidate( argv[0], &fmt ); if ( rc != 0 ) { LOGERR( klogErr, rc, "formatter args list" ); return 102; } } if ( argc < 2 ) { CoreUsage( argv[0], &fmt, true, EXIT_FAILURE ); return 0; } for ( i = 1; i < argc; i++ ) { arg = argv[ i ]; if ( arg[ 0 ] != '-' ) { uint32_t k; for ( k = 0; k < table_path_qty; k++ ) { if ( strcmp( arg, table_path[ k ] ) == 0 ) { break; } } if ( k >= table_path_qty ) { if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) ) { rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient ); goto Catch; } table_path[ table_path_qty++ ] = arg; } continue; } arg = NULL; if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) ) { rc = LogLevelSet( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } } else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) ) { } else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) ) { #if _DEBUGGING rc = KDbgSetString( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } #endif } else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) || SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) ) { CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS ); } else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) ) { HelpVersion ( argv[ 0 ], KAppVersion() ); return 0; } else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) ) { KStsLevelAdjust( 1 ); } else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) ) { LOGMSG( klogErr, "option -D is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) ) { LOGMSG( klogErr, "option -P is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) ) { } else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) ) { } else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) ) { to_stdout = true; } else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) ) { do_gzip = true; } else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) ) { do_bzip2 = true; } else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) ) { } else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) ) { minSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) ) { maxSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) ) { spot_group_on = true; } else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) ) { if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { int f = 0, t = 0; i++; while ( argv[ i ][ t ] != '\0' ) { if ( argv[ i ][ t ] == ',' ) { if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } f = t + 1; } t++; } if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } if ( spot_groups < 1 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } spot_group[ spot_groups ] = NULL; } } else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) ) { read_filter_on = true; if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { i++; if ( read_filter != 0xFF ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate ); PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)", PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( strcasecmp( argv[ i ], "pass" ) == 0 ) { read_filter = SRA_READ_FILTER_PASS; } else if ( strcasecmp( argv[ i ], "reject" ) == 0 ) { read_filter = SRA_READ_FILTER_REJECT; } else if ( strcasecmp( argv[ i ], "criteria" ) == 0 ) { read_filter = SRA_READ_FILTER_CRITERIA; } else if ( strcasecmp( argv[ i ], "redacted" ) == 0 ) { read_filter = SRA_READ_FILTER_REDACTED; } else { /* must be accession */ i--; } } } else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) ) { sub_dir = true; } else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) ) { keep_empty = true; } else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) ) { KConfigDisableUserSettings (); } else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) ) { } else { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } } if ( to_stdout ) { if ( outdir != NULL || sub_dir || keep_empty || spot_group_on || ( read_filter_on && read_filter == 0xFF ) ) { LOGMSG( klogWarn, "stdout mode is set, some options are ignored" ); spot_group_on = false; if ( read_filter == 0xFF ) { read_filter_on = false; } } KOutHandlerSetStdErr(); KStsHandlerSetStdErr(); KLogHandlerSetStdErr(); ( void ) KDbgHandlerSetStdErr(); } if ( do_gzip && do_bzip2 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous ); LOGERR( klogErr, rc, "output compression method" ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( minSpotId > maxSpotId ) { spotid_t temp = maxSpotId; maxSpotId = minSpotId; minSpotId = temp; } if ( table_path_qty == 0 ) { if ( D_option != NULL && D_option[ 0 ] != '\0' ) { /* support deprecated '-D' option */ table_path[ table_path_qty++ ] = D_option; } else if ( accession == NULL || accession[ 0 ] == '\0' ) { /* must have accession to proceed */ rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty ); LOGERR( klogErr, rc, "expected accession" ); goto Catch; } else if ( P_option != NULL && P_option[ 0 ] != '\0' ) { /* support deprecated '-P' option */ i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession ); if ( i < 0 || i >= sizeof( P_option_buffer ) ) { rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive ); LOGERR( klogErr, rc, "path too long" ); goto Catch; } table_path[ table_path_qty++ ] = P_option_buffer; } else { table_path[ table_path_qty++ ] = accession; } } rc = SRAMgrMakeRead( &sraMGR ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to open SRA manager" ); goto Catch; } else { rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to initialize files" ); goto Catch; } } { const VDBManager* vmgr = NULL; rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr ); if ( rc2 != 0 ) { LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" ); } rc2 = ReportSetVDBManager( vmgr ); VDBManagerRelease( vmgr ); } /* loop tables */ for ( i = 0; i < table_path_qty; i++ ) { const SRASplitterFactory* fact_head = NULL; spotid_t smax, smin; SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) ); if ( table_name != NULL ) { rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path):$(table)'", "path=%s,table=%s", table_path[ i ], table_name ) ); continue; } } ReportResetObject( table_path[ i ] ); if ( fmt.table == NULL ) { rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] ); if ( rc != 0 ) { if ( UIError( rc, NULL, NULL ) ) { UITableLOGError( rc, NULL, true ); } else { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path)'", "path=%s", table_path[ i ] ) ); if (GetRCState(rc) == rcNotFound) { failed_to_open = true; } } continue; } } /* infer accession from table_path if missing or more than one table */ fmt.accession = table_path_qty > 1 ? NULL : accession; if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 ) { char * basename; char *ext; size_t l; bool is_url = false; strcpy( P_option_buffer, table_path[ i ] ); basename = strchr ( P_option_buffer, ':' ); if ( basename ) { ++basename; if ( basename [0] == '\0' ) basename = P_option_buffer; else is_url = true; } else basename = P_option_buffer; if ( is_url ) { ext = strchr ( basename, '#' ); if ( ext ) ext[ 0 ] = '\0'; ext = strchr ( basename, '?' ); if ( ext ) ext[ 0 ] = '\0'; } l = strlen( basename ); while ( strchr( "\\/", basename[ l - 1 ] ) != NULL ) { basename[ --l ] = '\0'; } fmt.accession = strrchr( basename, '/' ); if ( fmt.accession++ == NULL ) { fmt.accession = basename; } /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */ ext = strrchr( fmt.accession, '.' ); if ( ext != NULL ) { if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); } if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 ) { *ext = '\0'; } } } } SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) ); rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession ); while ( rc == 0 ) { /* sort out the spot id range */ if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 || ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 ) { break; } { const struct VTable* tbl = NULL; rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl ); if ( rc == 0 ) { rc = rc2; } rc2 = ReportResetTable( table_path[i], tbl ); if ( rc == 0 ) { rc = rc2; } VTableRelease( tbl ); /* SRATableGetVTableRead adds Reference to tbl! */ } /* test if we have to dump anything... */ if ( smax < minSpotId || smin > maxSpotId ) { break; } if ( smax > maxSpotId ) { smax = maxSpotId; } if ( smin < minSpotId ) { smin = minSpotId; } /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */ if ( true ) /* ??? */ { const SRAColumn* c = NULL; nreads_max = NREADS_MAX; rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t ); if ( rc == 0 ) { const INSDC_SRA_platform_id *platform; bitsz_t o, z; rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z ); if ( rc == 0 && platform != NULL ) { if ( *platform != SRA_PLATFORM_PACBIO_SMRT ) { nreads_max = 32; } } SRAColumnRelease( c ); } else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn ) { rc = 0; } } /* table dependent */ rc = fmt.get_factory( &fmt, &fact_head ); if ( rc != 0 ) { break; } if ( fact_head == NULL ) { rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull ); break; } if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) ) { const SRASplitterFactory* f = NULL; rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 && read_filter_on ) { const SRASplitterFactory* f = NULL; rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 ) { /* this filter takes over head of chain to be first and kill off bad NREADS */ const SRASplitterFactory* f = NULL; rc = MaxNReadsValidatorFactory_Make( &f, fmt.table ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } rc = SRASplitterFactory_Init( fact_head ); if ( rc == 0 ) { /* ********************************************************** */ rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head ); /* ********************************************************** */ if ( rc == 0 ) { uint64_t total = 0, file = 0; SRASplitterFactory_FilerReport( &total, &file ); OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] )); if ( to_stdout && total > 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots", PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file )); } total_spots = total; } } break; } SRASplitterFactory_Release( fact_head ); SRATableRelease( fmt.table ); fmt.table = NULL; if ( rc == 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots", PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) ); } else if ( !reportToUser( rc, argv [0 ] ) ) { PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)", PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) ); } } Catch: if ( fmt.release ) { rc_t rr = fmt.release( &fmt ); if ( rr != 0 ) { SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) ); } } for ( i = 0; i < spot_groups; i++ ) { free( spot_group[ i ] ); } SRASplitterFiler_Release(); SRAMgrRelease( sraMGR ); OUTMSG(( "Written %lu spots total\n", total_spots )); if (failed_to_open) { ReportSilence(); } { /* Report execution environment if necessary */ rc_t rc2 = ReportFinalize( rc ); if ( rc == 0 ) { rc = rc2; } } return rc; }
static rc_t ReadFilterSplitter_GetKeySet( const SRASplitter* cself, const SRASplitter_Keys** key, uint32_t* keys, spotid_t spot, const readmask_t* readmask ) { rc_t rc = 0; ReadFilterSplitter* self = ( ReadFilterSplitter* )cself; if ( self == NULL || key == NULL ) { rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull ); } else { const INSDC_SRA_read_filter* rdf; bitsz_t o = 0, sz = 0; *keys = 0; if ( self->col_rdf != NULL ) { rc = SRAColumnRead( self->col_rdf, spot, (const void **)&rdf, &o, &sz ); if ( rc == 0 ) { int32_t j, i = sz / sizeof( INSDC_SRA_read_filter ) / 8; *key = self->keys; *keys = sizeof( self->keys ) / sizeof( self->keys[ 0 ] ); for ( j = 0; j < *keys; j++ ) { clear_readmask( self->keys[ j ].readmask ); } while ( i > 0 ) { i--; if ( self->read_filter != 0xFF && self->read_filter != rdf[i] ) { /* skip by filter value != to command line */ } else if ( rdf[ i ] == SRA_READ_FILTER_PASS ) { set_readmask( self->keys[ EReadFilterSplitter_pass ].readmask, i ); } else if ( rdf[ i ] == SRA_READ_FILTER_REJECT ) { set_readmask( self->keys[ EReadFilterSplitter_reject ].readmask, i ); } else if( rdf[ i ] == SRA_READ_FILTER_CRITERIA ) { set_readmask( self->keys[ EReadFilterSplitter_criteria ].readmask, i ); } else if( rdf[ i ] == SRA_READ_FILTER_REDACTED ) { set_readmask( self->keys[ EReadFilterSplitter_redacted ].readmask, i ); } else { set_readmask( self->keys[ EReadFilterSplitter_unknown ].readmask, i ); PLOGMSG( klogWarn, ( klogWarn, "unknown READ_FILTER value $(value) at spot id $(row)", PLOG_2( PLOG_U8( value ), PLOG_I64( row ) ), rdf[ i ], spot ) ); } } } } } return rc; }
rc_t txt2kdb_io() { rc_t rc = 0; uint64_t rowid = 1; uint64_t tix = 0; KColumnBlob * blob; bool blobopen = false; while (rc == 0) { size_t num_read; uint8_t buffer [4096]; uint8_t * limit; uint8_t * append_start = buffer; uint8_t * cursor = buffer; bool eol = true; /* quit if we are already past the end of the range */ if ( ! rowid_upper_range(rowid)) break; /* read a buffer full. It may straddle rows. */ rc = KFileRead (G.txt, tix, buffer, sizeof buffer, &num_read); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Read failed starting $(P)", PLOG_U64(P), tix)); break; } /* break at EOF */ if (num_read == 0) break; /* scan across the buffer looking for lines */ for (limit = buffer + num_read; cursor < limit; append_start = cursor) { /* if we are at the beginning of a line (end of previous line or start of first */ if (eol) { /* if we are within the pass thru range create a blob */ if (rowid_lower_range(rowid) && rowid_upper_range(rowid)) { rc = KColumnCreateBlob (G.col, &blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to create Blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); continue; } blobopen = true; } /* clear the flag */ eol = false; } /* this blob append will go until end of buffer or end of line */ for ( ; cursor < limit; ++ cursor, ++tix) { /* if we hit a NewLine flag it and break for append */ if (*cursor == '\n') { eol = true; ++cursor; ++tix; break; } } /* if we are within the selected range append this to the open blob * ir might be the first append, a middle append, a last append or only append */ if (blobopen) { rc = KColumnBlobAppend (blob, append_start, cursor - append_start); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to append Blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } } /* if we hit a NewLine and are within range we will close this blob */ if (eol) { if (blobopen) { /* single row blobs */ rc = KColumnBlobAssignRange (blob, rowid, 1); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } rc = KColumnBlobCommit (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } rc = KColumnBlobRelease (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to release blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); break; } blobopen = false; } ++rowid; if ( ! rowid_upper_range (rowid)) break; } } } /* if not in an error state and the last line was unterminated close the blob */ if ((rc == 0) && blobopen) { rc = KColumnBlobAssignRange (blob, rowid, 1); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to range assign blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); } else { rc = KColumnBlobCommit (blob); if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Failed to commit blob for row $(R) at $(P)", PLOG_2(PLOG_U64(R),PLOG_U64(P)), rowid, tix)); } } KColumnBlobRelease (blob); } return rc; }
rc_t txt2kdb_kfs (void) { rc_t rc; G.dir = NULL; G.txt = NULL; G.mgr = NULL; G.col = NULL; /* ----- * Mount the native filesystem as root */ rc = KDirectoryNativeDir (&G.dir); if (rc != 0) { G.dir = NULL; LOGMSG (klogFatal, "Failed to open native KDirectory"); } else { rc = KDirectoryOpenFileRead (G.dir, &G.txt, "%s", G.txtpath); if (rc != 0) { G.txt = NULL; PLOGERR (klogFatal, (klogFatal, rc, "Unable to open file at $(F)", PLOG_S(F), G.txtpath)); } else { rc = KDBManagerMakeUpdate (&G.mgr, G.dir); if (rc) { G.mgr = NULL; LOGERR (klogFatal, rc, "Unable to create a KDBManager at the current directory"); } else { KCreateMode kcm; KPathType kpt; const char * err = ""; kpt = KDirectoryPathType (G.dir, "%s", G.colpath) & ~ kptAlias; kcm = kcmCreate; /* Force means replace if exists */ /* Append means open in append mode if it exists */ switch (kpt) { case kptNotFound: kcm = kcmCreate; break; default: err = "Unknown"; rc = RC (rcExe, rcNoTarg, rcAccessing, rcPath, rcInvalid); break; case kptBadPath: err = "Bad Path"; rc = RC (rcExe, rcNoTarg, rcAccessing, rcPath, rcInvalid); break; case kptFile: case kptCharDev: case kptBlockDev: case kptFIFO: err = "Must be a Directory"; rc = RC (rcExe, rcNoTarg, rcAccessing, rcPath, rcInvalid); break; case kptDir: kcm = kcmCreate; if (G.append) { kcm = kcmOpen; } else if (G.force) { kcm = kcmInit; } break; } if (rc == 0) { rc = KDBManagerCreateColumn (G.mgr, &G.col, kcm, kcsNone, 0, "%s", G.colpath); if (rc) err = "Manager can not open column"; } if (rc) { PLOGERR (klogFatal, (klogFatal, rc, "Cannot open KColumn $(P) because $(R)", PLOG_2(PLOG_S(P),PLOG_S(R)), G.colpath, err)); } } } } return rc; }
rc_t run_test (VTable * table, test_params * pb) { VCursor * cursor; const VCursor * rcursor; int64_t ix; int64_t rowid; uint32_t dat; uint32_t len; uint32_t clen; uint32_t plen; rc_t rc; rc_t orc; uint8_t b [BUFSIZE]; cursor = NULL; do { if (verbose) printf ("%s call open_write_cursot\n", __func__); rc = open_write_cursor (table, &cursor, &dat, &len, pb->dat_name, pb->len_name); if (rc) { LOGERR (klogDebug1, rc, "failed to create write cursor"); cursor = NULL; break; } for (ix = 0; ix < Limit; ++ix) { if (verbose) printf ("%s call VCursorOpenRow\n", __func__); rc = VCursorOpenRow (cursor); if (rc) { LOGERR (klogErr, rc, "Failed to Open Cursor"); break; } else { uint32_t c[1]; pb->func(ix, b, c); if (verbose) printf ("%s call VCursorWrite %" LD64 "\n", __func__, ix); rc = VCursorWrite (cursor, dat, pb->bits, b, 0, *c); if (rc) { pLOGERR (klogErr, rc, "Write fail dat row $(R)", PLOG_I64(R), ix); break; } if (verbose) printf ("%s call VCursorWrite %" LD64 "\n", __func__, ix); rc = VCursorWrite (cursor, len, 32, &c, 0, 1); if (rc) { pLOGERR (klogErr, rc, "Write fail len row $(R)", PLOG_I64(R), ix); break; } if (verbose) printf ("%s call VCursorCommitRow\n", __func__); rc = VCursorCommitRow (cursor); if (rc) { pLOGERR (klogErr, rc, "Commit fail row $(R)", PLOG_I64(R), ix); break; } if (verbose) printf ("%s call VCursorCloseRow\n", __func__); rc = VCursorCloseRow (cursor); if (rc) { pLOGERR (klogErr, orc, "Commit fail row $(R)", PLOG_I64(R), ix); break; } } if (rc) break; } /* for (ix = 0; ix < Limit; ++ix) */ if (ix != Limit) fprintf (stderr, "Quit early %d\n", (int)ix); if (rc) { pLOGERR (klogInfo, rc, "failed in loop $(T) $(R)", PLOG_2(PLOG_S(T),PLOG_I64(R)), pb->test_name, ix); } else { if (verbose) printf ("%s call VCursorCommit\n", __func__); orc = VCursorCommit (cursor); if (orc && (rc == 0)) rc = orc; } if (verbose) printf ("%s call VCursorRelease\n", __func__); orc = VCursorRelease (cursor); if (orc && (rc == 0)) rc = orc; if (rc) break; if (verbose) printf ("%s call open_read_cursor\n",__func__); rc = open_read_cursor (table, &rcursor, &len, &plen, &clen, &dat, pb->len_name, pb->plen_name, pb->clen_name, pb->dat_name); if (rc) { LOGERR (klogErr, rc, "failed to open read cursor"); break; } for (ix = 0; ix < Limit; ++ix) { uint32_t l; uint32_t p; uint32_t c; uint32_t r; uint32_t x; rc = VCursorRowId (rcursor, &rowid); if (rc) { pLOGERR (klogErr, rc, "failed to get rowid $(R)", PLOG_I64(R), ix); break; } if (rowid != ix+1) { fprintf (stderr, "ROWID failure %" LD64 ":%" LD64 "\n", ix, rowid); failed = true; } rc = VCursorOpenRow (rcursor); if (rc) { pLOGERR (klogErr, rc, "failed to open row $(R)", PLOG_I64(R), ix); break; } rc = VCursorRead (rcursor, len, 32, &l, 1, &r); if (rc) { pLOGERR (klogErr, rc, "failed to read column $(N) $(R)", PLOG_2(PLOG_S(N),PLOG_I64(R)), pb->len_name, ix); break; } rc = VCursorRead (rcursor, clen, 32, &c, 1, &r); if (rc) { pLOGERR (klogErr, rc, "failed to read column $(N) $(R)", PLOG_2(PLOG_S(N),PLOG_I64(R)), pb->clen_name, ix); break; } rc = VCursorRead (rcursor, plen, 32, &p, 1, &r); if (rc) { pLOGERR (klogErr, rc, "failed to read column $(N) $(R)", PLOG_2(PLOG_S(N),PLOG_I64(R)), pb->plen_name, ix); break; } /* rc = VCursorReadBits (rcursor, dat, pb->bits, 0, b, 0, (BUFSIZE*8)/pb->bits, &r, &x); if (rc) { pLOGERR (klogErr, rc, "failed to read column $(N) $(R)", PLOG_2(PLOG_S(N),PLOG_I64(R)), pb->dat_name, ix); break; } */ VCursorCloseRow (rcursor); if (l != p) { fprintf (stderr, "error in physical column row_len() %u != %u\n", l, p); failed = true; } if (l != c) { fprintf (stderr, "error in physical column row_len() %u != %u\n", l, c); failed = true; } } if (verbose) printf ("%s call VCursorRelease\n",__func__); orc = VCursorRelease (rcursor); if (orc) { LOGERR (klogErr, rc, "release was funky"); } if (orc && (rc == 0)) rc = orc; if (rc) break; } while (0); return rc; }