static rc_t TableWriterSeq_WriteStatistics(TableWriterSeq const *cself, KMDataNode *node) { pb_t pb; rc_t rc; KDataBuffer buf; rc = KDataBufferMake(&buf, 8 * sizeof(pb.stats[0]), cself->statsCount); if (rc) return rc; pb.stats = buf.base; pb.i = 0; rc = KVectorVisitU64(cself->stats, 0, stats_cb, &pb); if (rc == 0) { unsigned i; unsigned const n = cself->statsCount < 126 ? cself->statsCount : 126; uint64_t *const distance = buf.base; ksort(pb.stats, cself->statsCount, sizeof(pb.stats[0]), stats_cmp_count, NULL); ksort(pb.stats, n, sizeof(pb.stats[0]), stats_cmp_distance, NULL); for (i = 0; i != n; ++i) { distance[i] = pb.stats[i].distance; } rc = KMDataNodeWrite(node, distance, n * sizeof(distance[0])); } KDataBufferWhack(&buf); return rc; }
static rc_t CC refseq_meta_stats( void *self, const VXformInfo *info, int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] ) { rc_t rc = 0; KMDataNode* node; refseq_meta_stats_data* data = self; uint64_t i, seq_len = argv[0].u.data.elem_count; const INSDC_4na_bin * seq = argv[0].u.data.base; seq += argv[0].u.data.first_elem; assert(data != NULL); if( data->buf_sz < seq_len ) { char* x = realloc(data->buf, seq_len); if( x == NULL ) { rc = RC(rcVDB, rcFunction, rcUpdating, rcMemory, rcExhausted); } else { data->buf = x; data->buf_sz = seq_len; } } for(i = 0; rc == 0 && i < seq_len; i++) { data->buf[i] = INSDC_4na_map_CHARSET[seq[i]]; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "TOTAL_SEQ_LEN")) == 0 ) { if( data->total_seq_len + seq_len < data->total_seq_len ) { rc = RC(rcVDB, rcFunction, rcUpdating, rcMetadata, rcOutofrange); } else { data->total_seq_len += seq_len; rc = KMDataNodeWriteB64(node, &data->total_seq_len); } KMDataNodeRelease(node); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "CRC32")) == 0 ) { data->crc32 = CRC32(data->crc32, data->buf, seq_len); rc = KMDataNodeWriteB32(node, &data->crc32); KMDataNodeRelease(node); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "MD5")) == 0 ) { uint8_t digest[16]; MD5State md5; MD5StateAppend(&data->md5, data->buf, seq_len); memcpy(&md5, &data->md5, sizeof(md5)); MD5StateFinish(&md5, digest); rc = KMDataNodeWrite(node, digest, sizeof(digest)); KMDataNodeRelease(node); } return rc; }
/* StoreSchema */ rc_t VDatabaseStoreSchema ( VDatabase *self ) { /* open schema node */ KMDataNode *node; rc_t rc = KMetadataOpenNodeUpdate ( self -> meta, & node, "schema" ); if ( rc == 0 ) { size_t num_writ; char expr [ 256 ]; rc = VSchemaToText ( self -> schema, expr, sizeof expr - 1, & num_writ, "%N%V", self -> sdb -> name, self -> sdb -> version ); if ( rc != 0 ) LOGERR ( klogInt, rc, "failed to determine database schema" ); else { expr [ num_writ ] = 0; rc = KMDataNodeWriteAttr ( node, "name", expr ); if ( rc != 0 ) PLOGERR (klogInt, ( klogInt, rc, "failed to write database type '$(expr)'", "expr=%s", expr )); else { /* truncate existing schema */ rc = KMDataNodeWrite ( node, "", 0 ); if ( rc == 0 ) { rc = VSchemaDump ( self -> schema, sdmCompact, expr, ( rc_t ( CC * ) ( void*, const void*, size_t ) ) KMDataNodeAppend, node ); } if ( rc != 0 ) PLOGERR (klogInt, ( klogInt, rc, "failed to write database schema '$(expr)'", "expr=%s", expr )); } } KMDataNodeRelease ( node ); } return rc; }
/* StoreSchema * stores schema definition in metadata * * <schema name="">...</schema> */ LIB_EXPORT rc_t VTableStoreSchema ( VTable *self ) { /* open schema node */ KMDataNode *node; rc_t rc = KMetadataOpenNodeUpdate ( self -> meta, & node, "schema" ); if ( rc == 0 ) { size_t num_writ; char expr [ 256 ]; rc = VSchemaToText ( self -> schema, expr, sizeof expr - 1, & num_writ, "%N%V", self -> stbl -> name, self -> stbl -> version ); if ( rc != 0 ) LOGERR ( klogInt, rc, "failed to determine table schema" ); else { expr [ num_writ ] = 0; /* if table has a default view declaration, store the table information under a new attribute */ if ( self -> stbl -> dflt_view != NULL ) { uint32_t type; const SNameOverload *name; const STable *view = VSchemaFind ( self -> schema, & name, & type, self -> stbl -> dflt_view-> addr, __func__, false ); if ( view == NULL ) { rc = RC ( rcVDB, rcTable, rcUpdating, rcSchema, rcNotFound ); PLOGERR ( klogInt, ( klogInt, rc, "failed to locate default view schema '$(expr)'", "expr=%S", self -> stbl -> dflt_view )); } else { rc = KMDataNodeWriteAttr ( node, "table", expr ); if ( rc != 0 ) PLOGERR ( klogInt, ( klogInt, rc, "failed to write table type '$(expr)'", "expr=%s", expr )); else { rc = VSchemaToText ( self -> schema, expr, sizeof expr - 1, & num_writ, "%N%V", view -> name, view -> version ); if ( rc != 0 ) LOGERR ( klogInt, rc, "failed to determine table default view schema" ); else expr [ num_writ ] = 0; } } } if ( rc == 0 ) { rc = KMDataNodeWriteAttr ( node, "name", expr ); if ( rc != 0 ) PLOGERR ( klogInt, ( klogInt, rc, "failed to write table name '$(expr)'", "expr=%s", expr )); } if ( rc == 0 ) { /* truncate existing schema */ rc = KMDataNodeWrite ( node, "", 0 ); if ( rc == 0 ) { rc = VSchemaDump ( self -> schema, sdmCompact, expr, ( rc_t ( CC * ) ( void*, const void*, size_t ) ) KMDataNodeAppend, node ); } if ( rc != 0 ) PLOGERR ( klogInt, ( klogInt, rc, "failed to write table schema '$(expr)'", "expr=%s", expr )); } } KMDataNodeRelease ( node ); } return rc; }
/* Write * write a node value or attribute * overwrites anything already there * * "buffer" [ IN ] and "size" [ IN ] - new value data */ inline rc_t Write ( const void *buffer, size_t size ) throw() { return KMDataNodeWrite ( this, buffer, size ); }
static rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint16_t zlib_ver = ZLIB_VERNUM; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { size_t written = 0; uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0; SIndexNode* inode = NULL; size_t z_blk = 0; size_t spots_buf_sz = g_file_block_sz * 100; size_t zbuf_sz = spots_buf_sz + 100; char* zbuf = malloc(zbuf_sz); char* spots_buf = malloc(spots_buf_sz); bool eof = false; if( zbuf == NULL || spots_buf == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); } while( rc == 0 ) { if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) { if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key)); } if( blk + written > spots_buf_sz ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient); break; } inode->id_qty++; memmove(&spots_buf[blk], buffer, written); blk += written; if( g_dump ) { fwrite(buffer, written, 1, stderr); } } if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) { rc = 0; if( inode == NULL ) { break; } } if( rc == 0 && (eof || (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) { rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk); if( z_blk < g_file_block_sz ) { /* project needed id_qty */ proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05; DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty)); } else { DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz)); } } if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) { obj->file_size += z_blk; MD5StateAppend(&obj->md5, zbuf, z_blk); inode->key_size = z_blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n", obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk )); spots_per_block = inode->id_qty; inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; z_blk = 0; proj_id_qty = 0; } if( eof ) { break; } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } free(zbuf); free(spots_buf); } if( rc == 0 ) { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) { rc = KMDataNodeWriteB16(nd, &zlib_ver); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } FastqReaderWhack(reader); return rc; }
static rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz) { rc_t rc = 0; const FastqReader* reader = NULL; uint8_t colorSpace = false; char* colorSpaceKey = "\0"; uint8_t origFormat = false; uint8_t printLabel = true; uint8_t printReadId = true; uint8_t clipQuality = true; uint32_t minReadLen = 0; uint16_t qualityOffset = 0; {{ const SRAColumn* c = NULL; const uint8_t *platform = SRA_PLATFORM_UNDEFINED; bitsz_t o, z; if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) { return rc; } if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) { return rc; } if( *platform == SRA_PLATFORM_ABSOLID ) { colorSpace = true; } SRAColumnRelease(c); }} if( (rc = FastqReaderMake(&reader, sratbl, g_accession, colorSpace, origFormat, false, printLabel, printReadId, !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0], obj->minSpotId, obj->maxSpotId)) != 0 ) { return rc; } else { KMDataNode* opt = NULL, *nd = NULL; if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) { return rc; } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) { rc = KMDataNodeWriteB8(nd, &colorSpace); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) { rc = KMDataNodeWrite(nd, colorSpaceKey, 1); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) { rc = KMDataNodeWriteB8(nd, &origFormat); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printLabel); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) { rc = KMDataNodeWriteB8(nd, &printReadId); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) { rc = KMDataNodeWriteB8(nd, &clipQuality); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) { rc = KMDataNodeWriteB32(nd, &minReadLen); KMDataNodeRelease(nd); } if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) { rc = KMDataNodeWriteB16(nd, &qualityOffset); KMDataNodeRelease(nd); } KMDataNodeRelease(opt); } if( rc == 0 ) { size_t written = 0; uint32_t blk = 0; SIndexNode* inode = NULL; while( rc == 0 ) { rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written); if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) { inode->key_size = blk; SLListPushTail(&obj->li, &inode->n); DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n", inode->id_qty, inode->key, inode->key_size)); inode = NULL; if( blk > obj->buffer_sz ) { obj->buffer_sz = blk; } blk = 0; } if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) { rc = 0; break; } if( inode == NULL ) { spotid_t spotid = 0; if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) { break; } inode = malloc(sizeof(SIndexNode)); if( inode == NULL ) { rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted); break; } inode->key = obj->file_size; inode->key_size = 0; inode->id = spotid; inode->id_qty = 0; DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key)); } inode->id_qty++; obj->file_size += written; blk += written; MD5StateAppend(&obj->md5, buffer, written); if( g_dump ) { fwrite(buffer, written, 1, stderr); } } rc = rc ? rc : Quitting(); if( rc != 0 ) { spotid_t spot = 0; FastqReaderCurrentSpot(reader, &spot); PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot)); } } FastqReaderWhack(reader); return rc; }