rc_t WriteFileMeta(SIndexObj* obj) { rc_t rc = 0; KMDataNode* nd = NULL; PLOGMSG(klogInfo, (klogInfo, "Meta $(f) on index $(i): file size $(s), buffer $(b)", PLOG_4(PLOG_S(f),PLOG_S(i),PLOG_U64(s),PLOG_U32(b)), obj->file, obj->index, obj->file_size, obj->buffer_sz)); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Format")) == 0 ) { KMDataNode* opt = NULL; rc = KMDataNodeWriteCString(nd, obj->format); if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(nd, &opt, "Options")) == 0 ) { KMDataNode* ond = NULL; if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "accession")) == 0 ) { rc = KMDataNodeWriteCString(ond, g_accession); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "minSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->minSpotId); KMDataNodeRelease(ond); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "maxSpotId")) == 0 ) { rc = KMDataNodeWriteB64(ond, &obj->maxSpotId); KMDataNodeRelease(ond); } KMDataNodeRelease(opt); } KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Size")) == 0 ) { rc = KMDataNodeWriteB64(nd, &obj->file_size); KMDataNodeRelease(nd); } if( rc == 0 && obj->buffer_sz > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Buffer")) == 0 ) { rc = KMDataNodeWriteB32(nd, &obj->buffer_sz); KMDataNodeRelease(nd); } if( rc == 0 && strlen(obj->index) > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Index")) == 0 ) { rc = KMDataNodeWriteCString(nd, obj->index); KMDataNodeRelease(nd); } if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "md5")) == 0 ) { char x[5]; int i; for( i = 0; rc == 0 && i < sizeof(obj->md5_digest); i++ ) { int l = snprintf(x, 4, "%02x", obj->md5_digest[i]); rc = KMDataNodeAppend(nd, x, l); } KMDataNodeRelease(nd); } return rc; }
static rc_t CC refseq_meta_stats( void *self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc = 0; KMDataNode* node; refseq_meta_stats_data* data = self; uint64_t i, seq_len = argv[0].u.data.elem_count; const INSDC_4na_bin * seq = argv[0].u.data.base; seq += argv[0].u.data.first_elem; assert(data != NULL); if( data->buf_sz < seq_len ) { char* x = realloc(data->buf, seq_len); if( x == NULL ) { rc = RC(rcVDB, rcFunction, rcUpdating, rcMemory, rcExhausted); } else { data->buf = x; data->buf_sz = seq_len; } } for(i = 0; rc == 0 && i < seq_len; i++) { data->buf[i] = INSDC_4na_map_CHARSET[seq[i]]; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "TOTAL_SEQ_LEN")) == 0 ) { if( data->total_seq_len + seq_len < data->total_seq_len ) { rc = RC(rcVDB, rcFunction, rcUpdating, rcMetadata, rcOutofrange); } else { data->total_seq_len += seq_len; rc = KMDataNodeWriteB64(node, &data->total_seq_len); } KMDataNodeRelease(node); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "CRC32")) == 0 ) { data->crc32 = CRC32(data->crc32, data->buf, seq_len); rc = KMDataNodeWriteB32(node, &data->crc32); KMDataNodeRelease(node); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "MD5")) == 0 ) { uint8_t digest[16]; MD5State md5; MD5StateAppend(&data->md5, data->buf, seq_len); memcpy(&md5, &data->md5, sizeof(md5)); MD5StateFinish(&md5, digest); rc = KMDataNodeWrite(node, digest, sizeof(digest)); KMDataNodeRelease(node); } return rc; }
static rc_t enter_vdbcopy_node( KMetadata *dst_meta, const bool show_meta ) { rc_t rc; KMDataNode *hist_node; if ( show_meta ) KOutMsg( "--- entering Copy entry...\n" ); rc = KMetadataOpenNodeUpdate ( dst_meta, &hist_node, "HISTORY" ); DISP_RC( rc, "enter_vdbcopy_node:KMetadataOpenNodeUpdate('HISTORY') failed" ); if ( rc == 0 ) { char event_name[ 32 ]; uint32_t index = get_child_count( hist_node ) + 1; rc = string_printf ( event_name, sizeof( event_name ), NULL, "EVENT_%u", index ); DISP_RC( rc, "enter_vdbcopy_node:string_printf(EVENT_NR) failed" ); if ( rc == 0 ) { KMDataNode *event_node; rc = KMDataNodeOpenNodeUpdate ( hist_node, &event_node, event_name ); DISP_RC( rc, "enter_vdbcopy_node:KMDataNodeOpenNodeUpdate('EVENT_NR') failed" ); if ( rc == 0 ) { rc = enter_date_name_vers( event_node ); KMDataNodeRelease ( event_node ); } } KMDataNodeRelease ( hist_node ); } return rc; }
LIB_EXPORT rc_t CC TableWriterSeq_Whack(const TableWriterSeq* cself, bool commit, uint64_t* rows) { rc_t rc = 0; if( cself != NULL ) { TableWriterSeq* self = (TableWriterSeq*)cself; VTable *vtbl; TableReader_Whack(cself->tmpkey_reader); if (commit && (rc = TableWriter_GetVTable(cself->base, &vtbl)) == 0 ) { KMetadata* meta; if ((rc = VTableOpenMetadataUpdate(vtbl, &meta)) == 0) { KMDataNode* node = NULL; if (cself->stats) { if ((rc = KMetadataOpenNodeUpdate(meta, &node, "MATE_STATISTICS")) == 0) { rc = TableWriterSeq_WriteStatistics(cself, node); KMDataNodeRelease(node); } } if ((rc = KMetadataOpenNodeUpdate(meta, &node, "unaligned")) == 0) { KMDataNode *sub = NULL; KMDataNodeOpenNodeUpdate(node, &sub, "first-unaligned"); KMDataNodeWriteB64(sub, &self->firstUnaligned); KMDataNodeRelease(sub); KMDataNodeOpenNodeUpdate(node, &sub, "first-half-aligned"); KMDataNodeWriteB64(sub, &self->firstHalfAligned); KMDataNodeRelease(sub); KMDataNodeRelease(node); } KMetadataRelease(meta); } } rc = TableWriter_Whack(cself->base, commit && (rc == 0), rows); KVectorRelease(cself->stats); free(self->qual_buf); free(self); } return rc; }
static rc_t group_stats_write_node(KMDataNode *const node, char const name[], void const *const value) { KMDataNode *subnode; rc_t rc = KMDataNodeOpenNodeUpdate(node, &subnode, "%s", name); if (rc == 0) { rc = KMDataNodeWriteB64(subnode, value); KMDataNodeRelease(subnode); } return rc; }
static rc_t sra_meta_stats_node_group_open(KMDataNode* parent, sra_meta_stats_node_group* g, bool compressed) { rc_t rc = 0; assert(parent && g); if( (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_count, "SPOT_COUNT")) == 0 && (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_base_count, "BASE_COUNT")) == 0 && (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_bio_base_count, "BIO_BASE_COUNT")) == 0 && (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_min, "SPOT_MIN")) == 0 && (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_max, "SPOT_MAX")) == 0 ) { if( compressed ) { rc = KMDataNodeOpenNodeUpdate(parent, &g->node_cmp_base_count, "CMP_BASE_COUNT"); } } return rc; }
static rc_t enter_schema_update( KMetadata *dst_meta, const bool show_meta ) { rc_t rc; KMDataNode *sw_node; if ( show_meta ) KOutMsg( "--- entering schema-update\n" ); rc = KMetadataOpenNodeUpdate ( dst_meta, &sw_node, "SOFTWARE" ); DISP_RC( rc, "enter_schema_update:KMetadataOpenNodeUpdate('SOFTWARE') failed" ); if ( rc == 0 ) { KMDataNode *update_node; rc = KMDataNodeOpenNodeUpdate ( sw_node, &update_node, "update" ); DISP_RC( rc, "enter_schema_update:KMDataNodeOpenNodeUpdate('update') failed" ); if ( rc == 0 ) { rc = enter_date_name_vers( update_node ); KMDataNodeRelease ( update_node ); } KMDataNodeRelease ( sw_node ); } return rc; }
static rc_t MakeIndexes(const SRATable* stbl, KTable* ktbl, KMetadata* meta) { rc_t rc = 0; int i; char* buffer = NULL; size_t buffer_sz = g_file_block_sz * 100; SIndexObj idx[] = { /* meta, file, format, index, func, file_size, buffer_sz, minSpotId, maxSpotId */ {NULL, "fastq", "fastq", "fuse-fastq", Fastq_Idx, 0, 0, 0, 0}, {NULL, "sff", "SFF", "fuse-sff", SFF_Idx, 0, 0, 0, 0}, {NULL, "fastq.gz", "fastq-gzip", "fuse-fastq-gz", FastqGzip_Idx, 0, 0, 0, 0}, {NULL, "sff.gz", "SFF-gzip", "fuse-sff-gz", SFFGzip_Idx, 0, 0, 0, 0} }; for(i = 0; rc == 0 && i < sizeof(idx) / sizeof(idx[0]); i++) { KMDataNode* parent = NULL; if( (rc = KMetadataOpenNodeUpdate(meta, &parent, "/FUSE")) == 0 ) { KMDataNodeDropChild(parent, "root"); /* drop old stuff */ if( g_ungzip || strcmp(&idx[i].file[strlen(idx[i].file) - 3], ".gz") == 0 ) { STSMSG(0, ("Preparing index %s", idx[i].index)); MD5StateInit(&idx[i].md5); SLListInit(&idx[i].li); KMDataNodeDropChild(parent, "%s.tmp", idx[i].file); if( (rc = KMDataNodeOpenNodeUpdate(parent, &idx[i].meta, "%s.tmp", idx[i].file)) == 0 ) { if( idx[i].func != NULL ) { if( buffer == NULL ) { if( (buffer = malloc(buffer_sz)) == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } } rc = idx[i].func(stbl, &idx[i], buffer, buffer_sz); if( rc == 0 ) { MD5StateFinish(&idx[i].md5, idx[i].md5_digest); rc = CommitIndex(ktbl, idx[i].index, &idx[i].li); } } if( rc == 0 ) { rc = WriteFileMeta(&idx[i]); } KMDataNodeRelease(idx[i].meta); } if( GetRCState(rc) == rcUnsupported ) { KMDataNodeDropChild(parent, "%s", idx[i].file); PLOGERR(klogWarn, (klogWarn, rc, "Index $(i) is not supported for this table", PLOG_S(i), idx[i].index)); rc = 0; } else if( rc == 0 ) { char f[4096]; strcpy(f, idx[i].file); strcat(f, ".tmp"); KMDataNodeDropChild(parent, "%s", idx[i].file); rc = KMDataNodeRenameChild(parent, f, idx[i].file); } } else if( !g_ungzip ) { KTableDropIndex(ktbl, idx[i].index); KMDataNodeDropChild(parent, "%s", idx[i].file); } KMDataNodeDropChild(parent, "%s.tmp", idx[i].file); KMDataNodeRelease(parent); } SLListWhack(&idx[i].li, WhackIndexData, NULL); } free(buffer); return rc; }
static rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint16_t zlib_ver = ZLIB_VERNUM; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk )); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } FastqReaderWhack(reader); return rc; }
static rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } if( rc == 0 ) { size_t written = 0; uint32_t blk = 0; SIndexNode* inode = NULL; while( rc == 0 ) { rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written); if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) { inode->key_size = blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n", inode->id_qty, inode->key, inode->key_size)); inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; } if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) { rc = 0; break; } if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key)); } inode->id_qty++; obj->file_size += written; blk += written; MD5StateAppend(&obj->md5, buffer, written); if( g_dump ) { fwrite(buffer, written, 1, stderr); } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } } FastqReaderWhack(reader); return rc; }
static rc_t SFFGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; uint16_t zlib_ver = ZLIB_VERNUM; const SFFReader* reader = NULL; if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); if( spotid == 1 ) { char hd[10240]; size_t hd_sz = 0; if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) { if( hd_sz + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } memmove(&spots_buf[blk], hd, hd_sz); blk += hd_sz; if( g_dump ) { fwrite(hd, hd_sz, 1, stderr); } } } } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %lu\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %lu > %lu\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %lu\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk)); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; SFFReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } SFFReaderWhack(reader); return rc; }
static rc_t copy_metadata_child ( const KMDataNode *src_root, KMDataNode *dst_root, const char *node_path, const bool show_meta ) { const KMDataNode *snode; KMDataNode *dnode; KNamelist *names; rc_t rc = KMDataNodeOpenNodeRead ( src_root, & snode, node_path ); DISP_RC( rc, "copy_metadata_child:KMDataNodeOpenNodeRead(src) failed" ); if ( rc != 0 ) return rc; if ( show_meta ) KOutMsg( "copy child-node: %s\n", node_path ); rc = KMDataNodeOpenNodeUpdate ( dst_root, & dnode, node_path ); DISP_RC( rc, "copy_metadata_child:KMDataNodeOpenNodeUpdate(dst) failed" ); if ( rc == 0 ) { rc = copy_metadata_data ( snode, dnode ); if ( rc == 0 ) rc = copy_metadata_attribs ( snode, dnode, node_path, show_meta ); KMDataNodeRelease ( dnode ); } else { PLOGMSG( klogInfo, ( klogInfo, "cannot open child-node(dst): $(node)", "node=%s", node_path )); } if ( rc == 0 || ( GetRCState( rc ) == rcBusy ) ) { rc = KMDataNodeListChild ( snode, & names ); DISP_RC( rc, "copy_metadata_child:KMDataNodeListChild(src) failed" ); if ( rc == 0 ) { uint32_t i, count; char temp_path[ 1024 ]; size_t temp_len; string_copy ( temp_path, ( sizeof temp_path ) - 1, node_path, string_size( node_path ) ); temp_len = string_size( temp_path ); temp_path[ temp_len++ ] = '/'; temp_path[ temp_len ] = 0; rc = KNamelistCount ( names, & count ); for ( i = 0; rc == 0 && i < count; ++ i ) { const char *child_name; rc = KNamelistGet ( names, i, & child_name ); if ( rc == 0 ) { string_copy( temp_path + temp_len, ( sizeof temp_path ) - temp_len, child_name, string_size( child_name ) ); rc = copy_metadata_child ( src_root, dst_root, temp_path, show_meta ); temp_path[ temp_len ] = 0; } } KNamelistRelease ( names ); } } KMDataNodeRelease ( snode ); return rc; }