static rc_t MaxNReadsValidator_GetKey( const SRASplitter* cself, const char** key, spotid_t spot, readmask_t* readmask ) { rc_t rc = 0; MaxNReadsValidator* self = ( MaxNReadsValidator* )cself; if ( self == NULL || key == NULL ) { rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull ); } else { const void* nreads = NULL; bitsz_t o = 0, sz = 0; uint64_t nn = 0; *key = ""; if ( self->col != NULL ) { rc = SRAColumnRead( self->col, spot, &nreads, &o, &sz ); if ( rc == 0 ) { switch( sz ) { case 8: nn = *((const uint8_t*)nreads); break; case 16: nn = *((const uint16_t*)nreads); break; case 32: nn = *((const uint32_t*)nreads); break; case 64: nn = *((const uint64_t*)nreads); break; default: rc = RC( rcSRA, rcNode, rcExecuting, rcData, rcUnexpected ); break; } if ( nn > nreads_max ) { clear_readmask( readmask ); PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), maximum $(max) supported, skipped", PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn, spot, nreads_max)); } else if ( nn == nreads_max - 1 ) { PLOGMSG(klogWarn, (klogWarn, "too many reads $(nreads) at spot id $(row), truncated to $(max)", PLOG_3(PLOG_U64(nreads),PLOG_I64(row),PLOG_U32(max)), nn + 1, spot, nreads_max)); } } } } return rc; }
static rc_t SFFLoaderFmt_ReadBlock(SFFLoaderFmt* self, const SRALoaderFile* file, size_t size, const char* location, bool silent) { size_t read = 0; rc_t rc = SRALoaderFileRead(file, self->file_advance, size, (const void**)&self->file_buf, &read); self->file_advance = 0; if( rc == 0 && (size > 0 && (self->file_buf == NULL || read < size)) ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInsufficient); } if( rc != 0 && !silent ) { SRALoaderFile_LOG(file, klogErr, rc, "$(l), needed $(needed) got $(got) bytes", PLOG_3(PLOG_S(l),PLOG_U32(needed),PLOG_U32(got)), location, size, read); } return rc; }
rc_t BufferQPopBuffer (BufferQ * self, const Buffer ** buff, timeout_t * tm) { rc_t rc = 0; timeout_t t; void * p; LOGMSG (klogDebug10, "BufferQPopBuffer"); assert (self != NULL); assert (buff != NULL); if (tm == NULL) { LOGMSG (klogDebug10, "BufferQPopBuffer tm was NULL"); tm = &t; rc = TimeoutInit (tm, self->timeout); } if (rc == 0) { LOGMSG (klogDebug10, "BufferQPopBuffer call KQueuePop"); rc = KQueuePop (self->q, &p, tm); PLOGMSG (klogDebug10, "BufferQPopBuffer back from KQueuePop $(rc)", PLOG_U32(rc), rc); if (rc == 0) *buff = p; else { *buff = NULL; } } LOGMSG (klogDebug10, "leave BufferQPopBuffer"); return rc; }
static rc_t SFFLoaderFmtWriteDataFile(SFFLoaderFmt* self, const SRALoaderFile* file) { rc_t rc = 0; while( rc == 0 ) { if( self->curr_read_number == 0 ) { if( (rc = SFFLoaderFmtReadCommonHeader(self, file)) == 0 ) { DEBUG_MSG (5, ("%s: Common header ok: %u reads\n", self->file_name, self->header.number_of_reads)); DEBUG_MSG (8, ("%s: flow_chars: [%hu] %s\n", self->file_name, self->header.num_flows_per_read, self->flow_chars.data)); DEBUG_MSG (8, ("%s: key_seq: [%hu] %s\n", self->file_name, self->header.key_length, self->key_seq.data)); } else if( GetRCObject(rc) == (enum RCObject)rcData && GetRCState(rc) == rcIgnored ) { rc = 0; break; } } if( rc == 0 && self->header.number_of_reads != 0 && (rc = SFFLoaderFmtSkipIndex(self, file)) == 0 && (rc = SFFLoaderFmtReadDataHeader(self, file)) == 0 && (rc = SFFLoaderFmtReadData(self, file)) == 0 ) { if( self->w454 ) { rc = SRAWriter454_WriteRead(self->w454, file, &self->name, &self->read, &self->quality, self->skip_signal ? NULL : &self->signal, self->skip_signal ? NULL : &self->position, self->read_header.clip_quality_left, self->read_header.clip_quality_right, self->read_header.clip_adapter_left, self->read_header.clip_adapter_right); } else { rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, file, &self->name, &self->read, &self->quality, self->skip_signal ? NULL : &self->signal, self->skip_signal ? NULL : &self->position, self->read_header.clip_quality_left, self->read_header.clip_quality_right, self->read_header.clip_adapter_left, self->read_header.clip_adapter_right); } if( rc == 0 ) { ++self->curr_read_number; } } if( rc != 0 && (GetRCObject(rc) != rcTransfer && GetRCState(rc) != rcDone) ) { SRALoaderFile_LOG(file, klogErr, rc, "on or about read #$(i)", PLOG_U32(i), self->curr_read_number + 1); } else if( self->curr_read_number == self->header.number_of_reads ) { DEBUG_MSG(5, ("%s: done loading declared %u reads\n", self->file_name, self->curr_read_number)); self->curr_read_number = 0; /* will skip indexes if they are at eof */ if( (rc = SFFLoaderFmtSkipIndex(self, file)) == 0 ) { /* This should be the end of file and/or beginning of next */ if( (rc = SFFLoaderFmt_ReadBlock(self, file, 0, "EOF", false)) == 0 ) { if( self->file_buf == NULL ) { DEBUG_MSG(5, ("%s: EOF detected\n", self->file_name)); self->index_correction = 0; break; } } } } } return rc; }
rc_t CopierDoOne (Copier * self) { rc_t rc = 0; const Buffer * b; LOGMSG (klogDebug10, "CopierDoOne"); rc = Quitting(); if (rc == 0) { LOGMSG (klogDebug10, "call BufferQPopBuffer"); rc = BufferQPopBuffer (self->q, &b, NULL); if (rc == 0) { size_t w; size_t z; LOGMSG (klogDebug10, "call BufferContentGetSize"); z = BufferContentGetSize (b); rc = KFileWrite (self->f, self->o, b, z, &w); self->o += w; if (w != z) rc = RC (rcExe, rcFile, rcWriting, rcTransfer, rcIncomplete); else rc = BufferRelease (b); } /* ow this is ugly! */ /* is the rc a "exhausted" on a timeout? */ else if ((GetRCObject(rc) == rcTimeout) && (GetRCState(rc) == rcExhausted)) { rc = 0; LOGMSG (klogDebug10, "CopierDoOne timeout"); /* if so is the queue also sealed? */ if (BufferQSealed (self->q) == true) { LOGMSG (klogDebug10, "CopierDoOne sealed"); /* if both then we are done and so signal */ rc = KFileRelease (self->f); PLOGMSG (klogDebug10, "CopierDoOne back from KFileRelease $(rc)",PLOG_U32(rc),rc); if (rc == 0) { self->f = NULL; rc = BufferQRelease (self->q); if (rc == 0) { self->q = NULL; rc = RC (rcExe, rcNoTarg, rcCopying, rcNoTarg, rcDone ); } } } } else LOGMSG (klogDebug10, "CopierDoOne pop failure"); } else LOGMSG (klogDebug10, "CopierDoOne: quitting"); return rc; }
rc_t WriteFileMeta(SIndexObj* obj) { rc_t rc = 0; KMDataNode* nd = NULL; PLOGMSG(klogInfo, (klogInfo, "Meta $(f) on index $(i): file size $(s), buffer $(b)", PLOG_4(PLOG_S(f),PLOG_S(i),PLOG_U64(s),PLOG_U32(b)), obj->file, obj->index, obj->file_size, obj->buffer_sz)); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Format")) == 0 ) { KMDataNode* opt = NULL; rc = KMDataNodeWriteCString(nd, obj->format); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(nd, &opt, "Options")) == 0 ) { KMDataNode* ond = NULL; if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "accession")) == 0 ) { rc = KMDataNodeWriteCString(ond, g_accession); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "minSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->minSpotId); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "maxSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->maxSpotId); KMDataNodeRelease(ond); } KMDataNodeRelease(opt); } KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Size")) == 0 ) { rc = KMDataNodeWriteB64(nd, &obj->file_size); KMDataNodeRelease(nd); } if( rc == 0 && obj->buffer_sz > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Buffer")) == 0 ) { rc = KMDataNodeWriteB32(nd, &obj->buffer_sz); KMDataNodeRelease(nd); } if( rc == 0 && strlen(obj->index) > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Index")) == 0 ) { rc = KMDataNodeWriteCString(nd, obj->index); KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "md5")) == 0 ) { char x[5]; int i; for( i = 0; rc == 0 && i < sizeof(obj->md5_digest); i++ ) { int l = snprintf(x, 4, "%02x", obj->md5_digest[i]); rc = KMDataNodeAppend(nd, x, l); } KMDataNodeRelease(nd); } return rc; }
/* Fill * fill buffer as far as possible, shift unread data in buffer to buffer start */ static rc_t KLoaderFile_Fill(KLoaderFile *self) { rc_t rc = 0; if (self->kfile == NULL) { rc = KLoaderFile_Open(self); } if( rc == 0 ) { /* determine space in buffer available */ size_t to_read = self->buffer_size - self->avail; if( to_read > 0 ) { #if _DEBUGGING if( to_read < self->buffer_size * 0.5 ) { self->small_reads++; if( self->small_reads > 10 ) { PLOGMSG(klogWarn, (klogWarn, "$(filename) INEFFECTIVE READING: $(times) times, now $(bytes) bytes", PLOG_3(PLOG_S(filename),PLOG_U32(times),PLOG_U32(bytes)), self->filename, self->small_reads, to_read)); } } #endif /* shift left unread data */ memmove(self->buffer, self->buffer_pos, self->avail); /* skip read chunk in buffer */ self->pos += self->buffer_pos - self->buffer; /* reset pointer */ self->buffer_pos = self->buffer; do { /* fill buffer up to eof */ size_t num_read = 0; if( (rc = KFileRead(self->file, self->pos + self->avail, &self->buffer[self->avail], to_read, &num_read)) == 0 ) { self->eof = (num_read == 0); self->avail += (uint32_t) num_read; to_read -= num_read; DBG(("KLoaderFile read %s from %lu %u bytes%s\n", self->filename, self->pos + self->avail - num_read, num_read, self->eof ? " EOF" : "")); } } while( rc == 0 && to_read > 0 && !self->eof ); } } return rc; }
static rc_t XMLThread( const KThread *self, void *data ) { KDirectory *dir = NULL; PLOGMSG(klogInfo, (klogInfo, "XML sync thread started with $(s) sec", PLOG_U32(s), g_xml_sync)); do { rc_t rc = 0; KTime_t dt = 0; DEBUG_MSG(8, ("XML sync thread checking %s\n", g_xml_path)); if( (rc = KDirectoryNativeDir(&dir)) == 0 ) { rc = KDirectoryDate(dir, &dt, "%s", g_xml_path); ReleaseComplain(KDirectoryRelease, dir); } if( rc == 0 ) { if( dt != g_xml_mtime ) { const FSNode* new_root = NULL; PLOGMSG(klogInfo, (klogInfo, "File $(f) changed ($(m) <> $(d)), updating...", PLOG_3(PLOG_S(f),PLOG_I64(m),PLOG_I64(d)), g_xml_path, g_xml_mtime, dt)); if( XML_Open(g_xml_path, &new_root) == 0 ) { if( (rc = XMLLock(true)) == 0 ) { const FSNode* old_root = g_root; g_root = new_root; g_xml_mtime = dt; XMLUnlock(); FSNode_Release(old_root); PLOGMSG(klogInfo, (klogInfo, "Data from $(f) updated successfully", PLOG_S(f), g_xml_path)); } } } else { DEBUG_MSG(8, ("XML sync thread up-to-date %s\n", g_xml_path)); } } else { LOGERR(klogErr, rc, g_xml_path); } SRAList_PostRefresh(); sleep(g_xml_sync); } while( g_xml_sync > 0 ); LOGMSG(klogInfo, "XML sync thread ended"); return 0; }
static rc_t SpotIteratorInit(struct SpotIterator* self, const SRATable* tbl, const char* redactFileName) { rc_t rc = 0; assert(self && tbl && redactFileName); memset(self, 0, sizeof *self); self->m_crnSpotId = 1; rc = SRATableMaxSpotId(tbl, &self->m_maxSpotId); if (rc != 0) { logerr(klogErr, rc, "while calling SRATableMaxSpotId"); } else { plogmsg(klogInfo, "MaxSpotId = $(spot)", PLOG_U32(spot), self->m_maxSpotId); } if (rc == 0) { rc = SpotIteratorInitDirectory(); } if (rc == 0) { self->m_filename = redactFileName; plogmsg(klogInfo, "Opening '$(path)'", "path=%s", self->m_filename); rc = KDirectoryOpenFileRead( __SpotIteratorDirectory, &self->m_file, "%s", self->m_filename); if (rc != 0) { plogerr(klogErr, rc, "while opening file '$(path)'", "path=%s", self->m_filename); } } if (rc == 0) { rc = SpotIteratorReadSpotToRedact(self); } return rc; }
static rc_t SBlobGetRange(const struct SBlob* self, spotid_t id, spotid_t* last) { rc_t rc = 0; spotid_t first = 0; assert(self && last); if (self->m_new) { first = (id & ~0xFFFF) + 1; *last = first + 0xFFFF; if (*last > self->m_maxSpotId) { *last = self->m_maxSpotId; } plogmsg(klogDebug1, "New blob range for spot $(id) is " "$(first) - $(last) ($(xfirst) - $(xlast))", PLOG_U32(id) "," PLOG_U32(first) "," PLOG_U32(last) "," PLOG_X32(xfirst) "," PLOG_X32(xlast), id, first, *last, first, *last); } else { assert(self->m_data && self->m_data->_origFilterCol); rc = SRAColumnGetRange(self->m_data->_origFilterCol, id, &first, last); if (rc != 0) { plogerr(klogErr, rc, "Cannot SRAColumnGetRange $(id)", PLOG_U32(id), id); } else { plogmsg(klogDebug1, "Existing blob range for spot $(id) is " "$(first) - $(last) ($(xfirst) - $(xlast))", PLOG_U32(id) "," PLOG_U32(first) "," PLOG_U32(last) "," PLOG_X32(xfirst) "," PLOG_X32(xlast), id, first, *last, first, *last); } } return rc; }
static rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint16_t zlib_ver = ZLIB_VERNUM; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk )); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } FastqReaderWhack(reader); return rc; }
static rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } if( rc == 0 ) { size_t written = 0; uint32_t blk = 0; SIndexNode* inode = NULL; while( rc == 0 ) { rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written); if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) { inode->key_size = blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n", inode->id_qty, inode->key, inode->key_size)); inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; } if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) { rc = 0; break; } if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key)); } inode->id_qty++; obj->file_size += written; blk += written; MD5StateAppend(&obj->md5, buffer, written); if( g_dump ) { fwrite(buffer, written, 1, stderr); } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } } FastqReaderWhack(reader); return rc; }
static rc_t SFF_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const SFFReader* reader = NULL; if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0; SIndexNode* inode = NULL; while( rc == 0 ) { rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written); if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) { inode->key_size = blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("SFF index closed spots %lu, offset %lu, block size %lu\n", inode->id_qty, inode->key, inode->key_size)); inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; } if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) { rc = 0; break; } if( inode == NULL ) { spotid_t spotid = 0; if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("SFF index opened spot %ld, offset %lu\n", inode->id, inode->key)); if( spotid == 1 ) { char hd[10240]; size_t hd_sz = 0; if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) { obj->file_size += hd_sz; blk += hd_sz; MD5StateAppend(&obj->md5, hd, hd_sz); if( g_dump ) { fwrite(hd, hd_sz, 1, stderr); } } } } obj->file_size += written; blk += written; inode->id_qty++; MD5StateAppend(&obj->md5, buffer, written); if( g_dump ) { fwrite(buffer, written, 1, stderr); } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; SFFReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } } SFFReaderWhack(reader); return rc; }
/******************************************************************************* * KMain - defined for use with kapp library *******************************************************************************/ rc_t CC KMain ( int argc, char* argv[] ) { rc_t rc = 0; int i; const char* arg; uint64_t total_spots = 0; const SRAMgr* sraMGR = NULL; SRADumperFmt fmt; bool to_stdout = false, do_gzip = false, do_bzip2 = false; char const* outdir = NULL; spotid_t minSpotId = 1; spotid_t maxSpotId = ~0; bool sub_dir = false; bool keep_empty = false; const char* table_path[10240]; int table_path_qty = 0; char const* D_option = NULL; char const* P_option = NULL; char P_option_buffer[4096]; const char* accession = NULL; const char* table_name = NULL; bool spot_group_on = false; int spot_groups = 0; char* spot_group[128] = {NULL}; bool read_filter_on = false; SRAReadFilter read_filter = 0xFF; bool failed_to_open = false; /* for the fasta-ouput of fastq-dump: branch out completely of 'common' code */ if ( fasta_dump_requested( argc, argv ) ) { return fasta_dump( argc, argv ); } /* Prepare for the worst: report this information after disaster */ ReportBuildDate ( __DATE__ ); memset( &fmt, 0, sizeof( fmt ) ); rc = SRADumper_Init( &fmt ); if ( rc != 0 ) { LOGERR(klogErr, rc, "formatter initialization"); return 100; } else if ( fmt.get_factory == NULL ) { rc = RC( rcExe, rcFormatter, rcValidating, rcInterface, rcNull ); LOGERR( klogErr, rc, "formatter factory" ); return 101; } else { rc = SRADumper_ArgsValidate( argv[0], &fmt ); if ( rc != 0 ) { LOGERR( klogErr, rc, "formatter args list" ); return 102; } } if ( argc < 2 ) { CoreUsage( argv[0], &fmt, true, EXIT_FAILURE ); return 0; } for ( i = 1; i < argc; i++ ) { arg = argv[ i ]; if ( arg[ 0 ] != '-' ) { uint32_t k; for ( k = 0; k < table_path_qty; k++ ) { if ( strcmp( arg, table_path[ k ] ) == 0 ) { break; } } if ( k >= table_path_qty ) { if ( ( table_path_qty + 1 ) >= ( sizeof( table_path ) / sizeof( table_path[ 0 ] ) ) ) { rc = RC( rcExe, rcArgv, rcReading, rcBuffer, rcInsufficient ); goto Catch; } table_path[ table_path_qty++ ] = arg; } continue; } arg = NULL; if ( SRADumper_GetArg( &fmt, "L", "log-level", &i, argc, argv, &arg ) ) { rc = LogLevelSet( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "log level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } } else if ( SRADumper_GetArg( &fmt, NULL, OPTION_REPORT, &i, argc, argv, &arg ) ) { } else if ( SRADumper_GetArg( &fmt, "+", "debug", &i, argc, argv, &arg ) ) { #if _DEBUGGING rc = KDbgSetString( arg ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "debug level $(lvl)", PLOG_S( lvl ), arg ) ); goto Catch; } #endif } else if ( SRADumper_GetArg( &fmt, "H", "help", &i, argc, argv, NULL ) || SRADumper_GetArg( &fmt, "?", "h", &i, argc, argv, NULL ) ) { CoreUsage( argv[ 0 ], &fmt, false, EXIT_SUCCESS ); } else if ( SRADumper_GetArg( &fmt, "V", "version", &i, argc, argv, NULL ) ) { HelpVersion ( argv[ 0 ], KAppVersion() ); return 0; } else if ( SRADumper_GetArg( &fmt, "v", NULL, &i, argc, argv, NULL ) ) { KStsLevelAdjust( 1 ); } else if ( SRADumper_GetArg( &fmt, "D", "table-path", &i, argc, argv, &D_option ) ) { LOGMSG( klogErr, "option -D is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "P", "path", &i, argc, argv, &P_option ) ) { LOGMSG( klogErr, "option -P is deprecated, see --help" ); } else if ( SRADumper_GetArg( &fmt, "A", "accession", &i, argc, argv, &accession ) ) { } else if ( SRADumper_GetArg( &fmt, "O", "outdir", &i, argc, argv, &outdir ) ) { } else if ( SRADumper_GetArg( &fmt, "Z", "stdout", &i, argc, argv, NULL ) ) { to_stdout = true; } else if ( fmt.gzip && SRADumper_GetArg( &fmt, NULL, "gzip", &i, argc, argv, NULL ) ) { do_gzip = true; } else if ( fmt.bzip2 && SRADumper_GetArg( &fmt, NULL, "bzip2", &i, argc, argv, NULL ) ) { do_bzip2 = true; } else if ( SRADumper_GetArg( &fmt, NULL, "table", &i, argc, argv, &table_name ) ) { } else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) ) { minSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) ) { maxSpotId = AsciiToU32( arg, NULL, NULL ); } else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) ) { spot_group_on = true; } else if ( SRADumper_GetArg( &fmt, NULL, "spot-groups", &i, argc, argv, NULL ) ) { if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { int f = 0, t = 0; i++; while ( argv[ i ][ t ] != '\0' ) { if ( argv[ i ][ t ] == ',' ) { if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } f = t + 1; } t++; } if ( t - f > 0 ) { spot_group[ spot_groups++ ] = strndup( &argv[ i ][ f ], t - f ); } if ( spot_groups < 1 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcEmpty ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i - 1 ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } spot_group[ spot_groups ] = NULL; } } else if ( SRADumper_GetArg( &fmt, "R", "read-filter", &i, argc, argv, NULL ) ) { read_filter_on = true; if ( i + 1 < argc && argv[ i + 1 ][ 0 ] != '-' ) { i++; if ( read_filter != 0xFF ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcDuplicate ); PLOGERR( klogErr, ( klogErr, rc, "$(p): $(o)", PLOG_2( PLOG_S( p ),PLOG_S( o ) ), argv[ i - 1 ], argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( strcasecmp( argv[ i ], "pass" ) == 0 ) { read_filter = SRA_READ_FILTER_PASS; } else if ( strcasecmp( argv[ i ], "reject" ) == 0 ) { read_filter = SRA_READ_FILTER_REJECT; } else if ( strcasecmp( argv[ i ], "criteria" ) == 0 ) { read_filter = SRA_READ_FILTER_CRITERIA; } else if ( strcasecmp( argv[ i ], "redacted" ) == 0 ) { read_filter = SRA_READ_FILTER_REDACTED; } else { /* must be accession */ i--; } } } else if ( SRADumper_GetArg( &fmt, "T", "group-in-dirs", &i, argc, argv, NULL ) ) { sub_dir = true; } else if ( SRADumper_GetArg( &fmt, "K", "keep-empty-files", &i, argc, argv, NULL ) ) { keep_empty = true; } else if ( SRADumper_GetArg( &fmt, NULL, "no-user-settings", &i, argc, argv, NULL ) ) { KConfigDisableUserSettings (); } else if ( fmt.add_arg && fmt.add_arg( &fmt, SRADumper_GetArg, &i, argc, argv ) ) { } else { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcIncorrect ); PLOGERR( klogErr, ( klogErr, rc, "$(p)", PLOG_S( p ), argv[ i ] ) ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } } if ( to_stdout ) { if ( outdir != NULL || sub_dir || keep_empty || spot_group_on || ( read_filter_on && read_filter == 0xFF ) ) { LOGMSG( klogWarn, "stdout mode is set, some options are ignored" ); spot_group_on = false; if ( read_filter == 0xFF ) { read_filter_on = false; } } KOutHandlerSetStdErr(); KStsHandlerSetStdErr(); KLogHandlerSetStdErr(); ( void ) KDbgHandlerSetStdErr(); } if ( do_gzip && do_bzip2 ) { rc = RC( rcApp, rcArgv, rcReading, rcParam, rcAmbiguous ); LOGERR( klogErr, rc, "output compression method" ); CoreUsage( argv[ 0 ], &fmt, false, EXIT_FAILURE ); } if ( minSpotId > maxSpotId ) { spotid_t temp = maxSpotId; maxSpotId = minSpotId; minSpotId = temp; } if ( table_path_qty == 0 ) { if ( D_option != NULL && D_option[ 0 ] != '\0' ) { /* support deprecated '-D' option */ table_path[ table_path_qty++ ] = D_option; } else if ( accession == NULL || accession[ 0 ] == '\0' ) { /* must have accession to proceed */ rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcEmpty ); LOGERR( klogErr, rc, "expected accession" ); goto Catch; } else if ( P_option != NULL && P_option[ 0 ] != '\0' ) { /* support deprecated '-P' option */ i = snprintf( P_option_buffer, sizeof( P_option_buffer ), "%s/%s", P_option, accession ); if ( i < 0 || i >= sizeof( P_option_buffer ) ) { rc = RC( rcExe, rcArgv, rcValidating, rcParam, rcExcessive ); LOGERR( klogErr, rc, "path too long" ); goto Catch; } table_path[ table_path_qty++ ] = P_option_buffer; } else { table_path[ table_path_qty++ ] = accession; } } rc = SRAMgrMakeRead( &sraMGR ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to open SRA manager" ); goto Catch; } else { rc = SRASplitterFactory_FilerInit( to_stdout, do_gzip, do_bzip2, sub_dir, keep_empty, outdir ); if ( rc != 0 ) { LOGERR( klogErr, rc, "failed to initialize files" ); goto Catch; } } { const VDBManager* vmgr = NULL; rc_t rc2 = SRAMgrGetVDBManagerRead( sraMGR, &vmgr ); if ( rc2 != 0 ) { LOGERR( klogErr, rc2, "while calling SRAMgrGetVDBManagerRead" ); } rc2 = ReportSetVDBManager( vmgr ); VDBManagerRelease( vmgr ); } /* loop tables */ for ( i = 0; i < table_path_qty; i++ ) { const SRASplitterFactory* fact_head = NULL; spotid_t smax, smin; SRA_DUMP_DBG( 5, ( "table path '%s', name '%s'\n", table_path[ i ], table_name ) ); if ( table_name != NULL ) { rc = SRAMgrOpenAltTableRead( sraMGR, &fmt.table, table_name, table_path[ i ] ); if ( rc != 0 ) { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path):$(table)'", "path=%s,table=%s", table_path[ i ], table_name ) ); continue; } } ReportResetObject( table_path[ i ] ); if ( fmt.table == NULL ) { rc = SRAMgrOpenTableRead( sraMGR, &fmt.table, table_path[ i ] ); if ( rc != 0 ) { if ( UIError( rc, NULL, NULL ) ) { UITableLOGError( rc, NULL, true ); } else { PLOGERR( klogErr, ( klogErr, rc, "failed to open '$(path)'", "path=%s", table_path[ i ] ) ); if (GetRCState(rc) == rcNotFound) { failed_to_open = true; } } continue; } } /* infer accession from table_path if missing or more than one table */ fmt.accession = table_path_qty > 1 ? NULL : accession; if ( fmt.accession == NULL || fmt.accession[ 0 ] == 0 ) { char * basename; char *ext; size_t l; bool is_url = false; strcpy( P_option_buffer, table_path[ i ] ); basename = strchr ( P_option_buffer, ':' ); if ( basename ) { ++basename; if ( basename [0] == '\0' ) basename = P_option_buffer; else is_url = true; } else basename = P_option_buffer; if ( is_url ) { ext = strchr ( basename, '#' ); if ( ext ) ext[ 0 ] = '\0'; ext = strchr ( basename, '?' ); if ( ext ) ext[ 0 ] = '\0'; } l = strlen( basename ); while ( strchr( "\\/", basename[ l - 1 ] ) != NULL ) { basename[ --l ] = '\0'; } fmt.accession = strrchr( basename, '/' ); if ( fmt.accession++ == NULL ) { fmt.accession = basename; } /* cut off [.lite].[c]sra[.nenc||.ncbi_enc] if any */ ext = strrchr( fmt.accession, '.' ); if ( ext != NULL ) { if ( strcasecmp( ext, ".nenc" ) == 0 || strcasecmp( ext, ",ncbi_enc" ) == 0 ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); } if ( ext != NULL && ( strcasecmp( ext, ".sra" ) == 0 || strcasecmp( ext, ".csra" ) == 0 ) ) { *ext = '\0'; ext = strrchr( fmt.accession, '.' ); if ( ext != NULL && strcasecmp( ext, ".lite" ) == 0 ) { *ext = '\0'; } } } } SRA_DUMP_DBG( 5, ( "accession: '%s'\n", fmt.accession ) ); rc = SRASplitterFactory_FilerPrefix( accession ? accession : fmt.accession ); while ( rc == 0 ) { /* sort out the spot id range */ if ( ( rc = SRATableMaxSpotId( fmt.table, &smax ) ) != 0 || ( rc = SRATableMinSpotId( fmt.table, &smin ) ) != 0 ) { break; } { const struct VTable* tbl = NULL; rc_t rc2 = SRATableGetVTableRead( fmt.table, &tbl ); if ( rc == 0 ) { rc = rc2; } rc2 = ReportResetTable( table_path[i], tbl ); if ( rc == 0 ) { rc = rc2; } VTableRelease( tbl ); /* SRATableGetVTableRead adds Reference to tbl! */ } /* test if we have to dump anything... */ if ( smax < minSpotId || smin > maxSpotId ) { break; } if ( smax > maxSpotId ) { smax = maxSpotId; } if ( smin < minSpotId ) { smin = minSpotId; } /* hack to reduce looping in AddSpot: needs redesign to pass nreads along through tree */ if ( true ) /* ??? */ { const SRAColumn* c = NULL; nreads_max = NREADS_MAX; rc = SRATableOpenColumnRead( fmt.table, &c, "PLATFORM", sra_platform_id_t ); if ( rc == 0 ) { const INSDC_SRA_platform_id *platform; bitsz_t o, z; rc = SRAColumnRead( c, 1, (const void **)&platform, &o, &z ); if ( rc == 0 && platform != NULL ) { if ( *platform != SRA_PLATFORM_PACBIO_SMRT ) { nreads_max = 32; } } SRAColumnRelease( c ); } else if ( GetRCState( rc ) == rcNotFound && GetRCObject( rc ) == rcColumn ) { rc = 0; } } /* table dependent */ rc = fmt.get_factory( &fmt, &fact_head ); if ( rc != 0 ) { break; } if ( fact_head == NULL ) { rc = RC( rcExe, rcFormatter, rcResolving, rcInterface, rcNull ); break; } if ( rc == 0 && ( spot_group_on || spot_groups > 0 ) ) { const SRASplitterFactory* f = NULL; rc = SpotGroupSplitterFactory_Make( &f, fmt.table, spot_group_on, spot_group ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 && read_filter_on ) { const SRASplitterFactory* f = NULL; rc = ReadFilterSplitterFactory_Make( &f, fmt.table, read_filter ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } if ( rc == 0 ) { /* this filter takes over head of chain to be first and kill off bad NREADS */ const SRASplitterFactory* f = NULL; rc = MaxNReadsValidatorFactory_Make( &f, fmt.table ); if ( rc == 0 ) { rc = SRASplitterFactory_AddNext( f, fact_head ); if ( rc == 0 ) { fact_head = f; } else { SRASplitterFactory_Release( f ); } } } rc = SRASplitterFactory_Init( fact_head ); if ( rc == 0 ) { /* ********************************************************** */ rc = SRADumper_DumpRun( fmt.table, smin, smax, fact_head ); /* ********************************************************** */ if ( rc == 0 ) { uint64_t total = 0, file = 0; SRASplitterFactory_FilerReport( &total, &file ); OUTMSG(( "Written %lu spots for %s\n", total - total_spots, table_path[ i ] )); if ( to_stdout && total > 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(t) biggest file has $(n) spots", PLOG_2( PLOG_S( t ), PLOG_U64( n ) ), table_path[ i ], file )); } total_spots = total; } } break; } SRASplitterFactory_Release( fact_head ); SRATableRelease( fmt.table ); fmt.table = NULL; if ( rc == 0 ) { PLOGMSG( klogInfo, ( klogInfo, "$(path)$(dot)$(table) $(spots) spots", PLOG_4(PLOG_S(path),PLOG_S(dot),PLOG_S(table),PLOG_U32(spots)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "", smax - smin + 1 ) ); } else if ( !reportToUser( rc, argv [0 ] ) ) { PLOGERR( klogErr, ( klogErr, rc, "failed $(path)$(dot)$(table)", PLOG_3(PLOG_S(path),PLOG_S(dot),PLOG_S(table)), table_path[ i ], table_name ? ":" : "", table_name ? table_name : "" ) ); } } Catch: if ( fmt.release ) { rc_t rr = fmt.release( &fmt ); if ( rr != 0 ) { SRA_DUMP_DBG( 1, ( "formatter release error %R\n", rr ) ); } } for ( i = 0; i < spot_groups; i++ ) { free( spot_group[ i ] ); } SRASplitterFiler_Release(); SRAMgrRelease( sraMGR ); OUTMSG(( "Written %lu spots total\n", total_spots )); if (failed_to_open) { ReportSilence(); } { /* Report execution environment if necessary */ rc_t rc2 = ReportFinalize( rc ); if ( rc == 0 ) { rc = rc2; } } return rc; }
static rc_t SDataUpdate(struct SData* self, const char* newColName, const char* redactFileName, spotid_t* redactedSpots, spotid_t* all) { struct SBlob blob; uint8_t filter[32]; rc_t rc = 0, rc2 = 0; uint32_t colIdx = 0; spotid_t spot = 0, last = 0; bool toRedact = false; struct SpotIterator it; assert(self && redactedSpots && all); memset(filter, SRA_READ_FILTER_REDACTED, sizeof filter); if ((rc = SpotIteratorInit(&it, self->_rdTbl, redactFileName)) == 0) { rc = SRATableOpenColumnWrite (self->_wrTbl, &colIdx, NULL, newColName, sra_read_filter_t); if (rc != 0) { plogerr(klogErr, rc, "cannot open Column $(path) for Write", "path=%s", newColName); return rc; } } else { return rc; } rc = SBlobInit(&blob, self, &it); if (rc != 0) { return rc; } while (rc == 0 && SpotIteratorNext(&it, &rc, &spot, &toRedact)) { bitsz_t offset = 0, size = 0; const void *base = NULL; uint8_t nReads = 0; if (rc != 0) { break; } plogmsg(klogDebug2, "Spot $(spot): $(action)", PLOG_U32(spot) ",action=%s", spot, toRedact ? "redact" : "original"); /* GET NEXT BLOB RANGE */ if (spot == 1 || spot > last) { rc = SBlobGetRange(&blob, spot, &last); if (rc != 0) { break; } } assert(spot <= last); /* GET NREADS */ if ((rc = SRAColumnRead (self->_NReadsCol, spot, &base, &offset, &size)) != 0) { logerr(klogErr, rc, "cannot SRAColumnRead"); break; } else if (offset != 0 || size != sizeof nReads * 8) { rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid); plogerr(klogErr, rc, "Bad SRAColumnRead(\"NREADS\", $(spot)) result", PLOG_U32(spot), spot); } else { nReads = *((uint8_t*) base); if (spot == 1) { if (nReads == 1) { plogmsg(klogInfo, "The first spot has $(nreads) read", "nreads=%d", nReads); } else { plogmsg(klogInfo, "The first spot has $(nreads) reads", "nreads=%d", nReads); } } } /* GET READ_FILTER */ if (toRedact) { base = filter; ++(*redactedSpots); } else { if ((rc = SRAColumnRead(self->_origFilterCol, spot, &base, &offset, &size)) != 0) { plogerr(klogErr, rc, "while calling SRAColumnRead($(name))", "name=%s", "READ_FILTER"); break; } else if (offset != 0 || size != sizeof (uint8_t) * 8 * nReads) { rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid); plogerr(klogErr, rc, "Bad SRAColumnRead($(spot)) result", PLOG_U32(spot), spot); } } if ((rc = SRATableOpenSpot(self->_wrTbl, spot)) != 0) { plogerr(klogErr, rc, "cannot open Spot $(id)", PLOG_U32(id), spot); break; } if ((rc = SRATableWriteIdxColumn(self->_wrTbl, colIdx, base, 0, sizeof (uint8_t) * 8 * nReads)) != 0) { logerr(klogErr, rc, "cannot SRATableWriteIdxColumn"); break; } if ((rc = SRATableCloseSpot(self->_wrTbl)) != 0) { logerr(klogErr, rc, "cannot SRATableCloseSpot"); break; } /* CUT THE BLOB */ if (spot == last) { rc = SRATableCloseCursor(self->_wrTbl); if (rc != 0) { plogerr(klogErr, rc, "cannot SRATableCloseCursor $(id)", PLOG_U32(id), spot); break; } } } rc2 = SpotIteratorDestroy(&it); if (rc == 0) { rc = rc2; } *all = spot; return rc; }
static rc_t SFFLoaderFmtReadCommonHeader(SFFLoaderFmt* self, const SRALoaderFile *file) { rc_t rc = 0; bool skiped_idx_pad = false; uint16_t head_sz; SFFCommonHeader prev_head; pstring prev_flow_chars; pstring prev_key_seq; if( (rc = SRALoaderFile_Offset(file, &self->index_correction)) != 0 ) { SRALoaderFile_LOG(file, klogErr, rc, "Reading initial file position", NULL); return rc; } SkipIndexPad: self->index_correction += self->file_advance; if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFCommonHeader_size, NULL, true)) != 0) { SRALoaderFile_LOG(file, klogErr, rc, "common header, needed $(needed) bytes", PLOG_U32(needed), SFFCommonHeader_size); return rc; } if( self->header.magic_number != 0 ) { /* next file in stream, remember prev to sync to each */ memcpy(&prev_head, &self->header, sizeof(SFFCommonHeader)); pstring_copy(&prev_flow_chars, &self->flow_chars); pstring_copy(&prev_key_seq, &self->key_seq); } else { prev_head.magic_number = 0; prev_head.index_length = 0; } memcpy(&self->header, self->file_buf, SFFCommonHeader_size); #if __BYTE_ORDER == __LITTLE_ENDIAN self->header.magic_number = bswap_32(self->header.magic_number); self->header.version = bswap_32(self->header.version); self->header.index_offset = bswap_64(self->header.index_offset); self->header.index_length = bswap_32(self->header.index_length); self->header.number_of_reads = bswap_32(self->header.number_of_reads); self->header.header_length = bswap_16(self->header.header_length); self->header.key_length = bswap_16(self->header.key_length); self->header.num_flows_per_read = bswap_16(self->header.num_flows_per_read); #endif if( self->header.magic_number != (('.'<<24)|('s'<<16)|('f'<<8)|('f'<<0)) ) { if( !skiped_idx_pad && prev_head.magic_number != 0 ) { /* possible concatination of 2 files with index at EOF and padded to 8 bytes with header values not padded, try skipping padding and reread */ uint32_t pad = 8 - prev_head.index_length % 8; if( pad != 0 ) { self->file_advance += pad; DEBUG_MSG(5, ("%s: trying to skip over %u bytes index section padding\n", self->file_name, pad)); skiped_idx_pad = true; goto SkipIndexPad; } } rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized); SRALoaderFile_LOG(file, klogErr, rc, "magic number: $(m)", PLOG_U32(m), self->header.magic_number); return rc; } if( self->header.version != 1 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcBadVersion); SRALoaderFile_LOG(file, klogErr, rc, "format version $(v)", PLOG_U32(v), self->header.version); return rc; } if( self->header.flowgram_format_code != SFFFormatCodeUI16Hundreths ) { /* NOTE: add a case here if flowgram coding gets new version to support different */ rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported); SRALoaderFile_LOG(file, klogErr, rc, "common header flowgram format code", NULL); return rc; } if( self->header.index_length % 8 != 0 ) { DEBUG_MSG(5, ("%s: index_length field value is not 8 byte padded: %u\n", self->file_name, self->header.index_length)); } head_sz = SFFCommonHeader_size + self->header.num_flows_per_read + self->header.key_length; head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0; if( head_sz != self->header.header_length ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid); SRALoaderFile_LOG(file, klogErr, rc, "header length $(h) <> $(s) ", PLOG_2(PLOG_U16(h),PLOG_U16(s)), self->header.header_length, head_sz); return rc; } /* read flow chars and key */ self->file_advance = SFFCommonHeader_size; if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFCommonHeader_size, "common header", false)) != 0) { return rc; } self->file_advance = head_sz - SFFCommonHeader_size; if( (rc = pstring_assign(&self->flow_chars, self->file_buf, self->header.num_flows_per_read)) != 0 || (rc = pstring_assign(&self->key_seq, self->file_buf + self->header.num_flows_per_read, self->header.key_length)) != 0 ) { SRALoaderFile_LOG(file, klogErr, rc, "reading flows/key sequence", NULL); return rc; } if( prev_head.magic_number != 0 ) { /* next file's common header must match previous file's common header, partially */ if( prev_head.key_length != self->header.key_length || prev_head.num_flows_per_read != self->header.num_flows_per_read || pstring_cmp(&prev_flow_chars, &self->flow_chars) != 0 || pstring_cmp(&prev_key_seq, &self->key_seq) != 0 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInconsistent); SRALoaderFile_LOG(file, klogErr, rc, "previous file common header differ in flows/key sequence", NULL); } } if( rc == 0 ) { if( self->w454 ) { rc = SRAWriter454_WriteHead(self->w454, &self->flow_chars, &self->key_seq); } else { rc = SRAWriterIonTorrent_WriteHead(self->wIonTorrent, &self->flow_chars, &self->key_seq); } } return rc; }
/** Get next spot from input file */ static rc_t SpotIteratorReadSpotToRedact(SpotIterator* self) { rc_t rc = 0; assert(self); while (rc == 0 && ! self->eof) { rc = SpotIteratorReadLine(self); /* skip empty lines */ if ((rc == 0) && (self->inBuffer > 0)) { spotid_t spot = 0; /* make sure the line contains digits only */ int i = 0; for (i = 0; i < self->inBuffer; ++i) { if (!isdigit(self->buffer[i])) { rc = RC(rcExe, rcFile, rcReading, rcChar, rcUnexpected); PLOGERR(klogErr, (klogErr, rc, "character '$(char)' on line $(lineno)" " while reading file '$(path)': '$(line)'", "char=%c," PLOG_U64(lineno) ",path=%s,line=%s", self->buffer[i], self->line, self->filename, self->buffer)); return rc; } } sscanf(self->buffer, "%ld", &spot); if (spot == 0) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "bad spot id '0' on line $(lineno) " "while reading file '$(path)': '$(line)'", PLOG_U64(lineno) ",path=%s,line=%s", self->line, self->filename, self->buffer)); } else if (spot == self->spotToReduct) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "duplicated spot id '$(spot)' " "on line $(lineno) while reading file '$(path)': '$(line)'", PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s,line=%s", spot, self->line, self->filename, self->buffer)); } else if (spot < self->spotToReduct) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "File '$(path)' is unsorted. " "$(id) < $(last). See line $(lineno): '$(line)'", "path=%s," PLOG_U32(id) "," PLOG_U32(last) "," PLOG_U64(lineno) ",line=%s", self->filename, spot, self->spotToReduct, self->line, self->buffer)); } else if (spot > self->maxSpotId) { rc = RC(rcExe, rcFile, rcReading, rcString, rcInvalid); PLOGERR(klogErr, (klogErr, rc, "spotId $(spot) on line $(lineno) " "of file '$(path)' is bigger that the max spotId $(max): " "'$(line)'", PLOG_U32(spot) "," PLOG_U64(lineno) ",path=%s," PLOG_U32(max) ",line=%s", spot, self->line, self->filename, self->maxSpotId, self->buffer)); } else { self->spotToReduct = spot; self->inBuffer = 0; } break; } } return rc; }
static rc_t SFFGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; uint16_t zlib_ver = ZLIB_VERNUM; const SFFReader* reader = NULL; if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); if( spotid == 1 ) { char hd[10240]; size_t hd_sz = 0; if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) { if( hd_sz + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } memmove(&spots_buf[blk], hd, hd_sz); blk += hd_sz; if( g_dump ) { fwrite(hd, hd_sz, 1, stderr); } } } } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %lu\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %lu > %lu\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %lu\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk)); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; SFFReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } SFFReaderWhack(reader); return rc; }