static rc_t SRAFastqFile_Read(const SRAFastqFile* self, uint64_t pos, void *buffer, size_t size, size_t *num_read) { rc_t rc = 0; if( pos >= self->file_sz ) { *num_read = 0; } else if( (rc = KLockAcquire(self->lock)) == 0 ) { do { if( pos < self->from || pos >= (self->from + self->size) ) { int64_t id = 0; uint64_t id_qty = 0; DEBUG_MSG(10, ("Caching for pos %lu %lu bytes\n", pos, size - *num_read)); if( (rc = KIndexFindU64(self->kidx, pos, &((SRAFastqFile*)self)->from, &((SRAFastqFile*)self)->size, &id, &id_qty)) == 0 ) { DEBUG_MSG(10, ("Caching from %lu:%lu, %lu bytes\n", self->from, self->from + self->size - 1, self->size)); DEBUG_MSG(10, ("Caching spot %ld, %lu spots\n", id, id_qty)); if( (rc = FastqReaderSeekSpot(self->reader, id)) == 0 ) { size_t inbuf = 0, w = 0; char* b = self->buf; uint64_t left = self->buffer_sz; do { if( (rc = FastqReader_GetCurrentSpotSplitData(self->reader, b, left, &w)) != 0 ) { break; } b += w; left -= w; inbuf += w; --id_qty; } while( id_qty > 0 && (rc = FastqReaderNextSpot(self->reader)) == 0); if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) { DEBUG_MSG(10, ("No more rows\n")); rc = 0; } DEBUG_MSG(8, ("Cached %u bytes\n", inbuf)); if( self->gzipped != NULL ) { size_t compressed = 0; if( (rc = ZLib_DeflateBlock(self->buf, inbuf, self->gzipped, self->buffer_sz, &compressed)) == 0 ) { char* b = self->buf; ((SRAFastqFile*)self)->buf = self->gzipped; ((SRAFastqFile*)self)->gzipped = b; ((SRAFastqFile*)self)->size = compressed; DEBUG_MSG(10, ("gzipped %lu bytes\n", self->size)); } } } } } if( rc == 0 ) { off_t from = pos - self->from; size_t q = (self->size - from) > (size - *num_read) ? (size - *num_read) : (self->size - from); DEBUG_MSG(10, ("Copying from %lu %u bytes\n", from, q)); memcpy(&((char*)buffer)[*num_read], &self->buf[from], q); *num_read = *num_read + q; pos += q; } } while( rc == 0 && *num_read < size && pos < self->file_sz ); ReleaseComplain(KLockUnlock, self->lock); } return rc; }
static rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint16_t zlib_ver = ZLIB_VERNUM; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk )); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } FastqReaderWhack(reader); return rc; }
static rc_t SFFGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; uint16_t zlib_ver = ZLIB_VERNUM; const SFFReader* reader = NULL; if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); if( spotid == 1 ) { char hd[10240]; size_t hd_sz = 0; if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) { if( hd_sz + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } memmove(&spots_buf[blk], hd, hd_sz); blk += hd_sz; if( g_dump ) { fwrite(hd, hd_sz, 1, stderr); } } } } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %lu\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %lu > %lu\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %lu\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk)); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; SFFReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } SFFReaderWhack(reader); return rc; }