Ejemplo n.º 1
0
rc_t WriteFileMeta(SIndexObj* obj)
{
    rc_t rc = 0;
    KMDataNode* nd = NULL;

    PLOGMSG(klogInfo, (klogInfo, "Meta $(f) on index $(i): file size $(s), buffer $(b)",
        PLOG_4(PLOG_S(f),PLOG_S(i),PLOG_U64(s),PLOG_U32(b)), obj->file, obj->index, obj->file_size, obj->buffer_sz));

    if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Format")) == 0 ) {
        KMDataNode* opt = NULL;
        rc = KMDataNodeWriteCString(nd, obj->format);
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(nd, &opt, "Options")) == 0 ) {
            KMDataNode* ond = NULL;
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "accession")) == 0 ) {
                rc = KMDataNodeWriteCString(ond, g_accession);
                KMDataNodeRelease(ond);
            }
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "minSpotId")) == 0 ) {
                rc = KMDataNodeWriteB64(ond, &obj->minSpotId);
                KMDataNodeRelease(ond);
            }
            if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &ond, "maxSpotId")) == 0 ) {
                rc = KMDataNodeWriteB64(ond, &obj->maxSpotId);
                KMDataNodeRelease(ond);
            }
            KMDataNodeRelease(opt);
        }
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Size")) == 0 ) {
        rc = KMDataNodeWriteB64(nd, &obj->file_size);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->buffer_sz > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Buffer")) == 0 ) {
        rc = KMDataNodeWriteB32(nd, &obj->buffer_sz);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && strlen(obj->index) > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "Index")) == 0 ) {
        rc = KMDataNodeWriteCString(nd, obj->index);
        KMDataNodeRelease(nd);
    }

    if( rc == 0 && obj->file_size > 0 && (rc = KMDataNodeOpenNodeUpdate(obj->meta, &nd, "md5")) == 0 ) {
        char x[5];
        int i;
        for( i = 0; rc == 0 && i < sizeof(obj->md5_digest); i++ ) {
            int l = snprintf(x, 4, "%02x", obj->md5_digest[i]);
            rc = KMDataNodeAppend(nd, x, l);
        }
        KMDataNodeRelease(nd);
    }
    return rc;
}
Ejemplo n.º 2
0
static
rc_t CC refseq_meta_stats( void *self, const VXformInfo *info, int64_t row_id,
                             VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc = 0;
    KMDataNode* node;
    refseq_meta_stats_data* data = self;
    uint64_t i, seq_len = argv[0].u.data.elem_count;
    const INSDC_4na_bin * seq = argv[0].u.data.base;
   
    seq +=  argv[0].u.data.first_elem;

    assert(data != NULL);

    if( data->buf_sz < seq_len ) {
        char* x = realloc(data->buf, seq_len);
        if( x == NULL ) {
            rc = RC(rcVDB, rcFunction, rcUpdating, rcMemory, rcExhausted);
        } else {
            data->buf = x;
            data->buf_sz = seq_len;
        }
    }
    for(i = 0; rc == 0 && i < seq_len; i++) {
        data->buf[i] = INSDC_4na_map_CHARSET[seq[i]];
    }
    if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "TOTAL_SEQ_LEN")) == 0 ) {
        if( data->total_seq_len + seq_len < data->total_seq_len ) {
            rc = RC(rcVDB, rcFunction, rcUpdating, rcMetadata, rcOutofrange);
        } else {
            data->total_seq_len += seq_len;
            rc = KMDataNodeWriteB64(node, &data->total_seq_len);
        }
        KMDataNodeRelease(node);
    }
    if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "CRC32")) == 0 ) {
        data->crc32 = CRC32(data->crc32, data->buf, seq_len);
        rc = KMDataNodeWriteB32(node, &data->crc32);
        KMDataNodeRelease(node);
    }
    if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(data->stats, &node, "MD5")) == 0 ) {
        uint8_t digest[16];
        MD5State md5;
        MD5StateAppend(&data->md5, data->buf, seq_len);
        memcpy(&md5, &data->md5, sizeof(md5));
        MD5StateFinish(&md5, digest);
        rc = KMDataNodeWrite(node, digest, sizeof(digest));
        KMDataNodeRelease(node);
    }
    return rc;
}
Ejemplo n.º 3
0
static rc_t enter_vdbcopy_node( KMetadata *dst_meta, const bool show_meta )
{
    rc_t rc;
    KMDataNode *hist_node;

    if ( show_meta )
        KOutMsg( "--- entering Copy entry...\n" );

    rc = KMetadataOpenNodeUpdate ( dst_meta, &hist_node, "HISTORY" );
    DISP_RC( rc, "enter_vdbcopy_node:KMetadataOpenNodeUpdate('HISTORY') failed" );
    if ( rc == 0 )
    {
        char event_name[ 32 ];
        uint32_t index = get_child_count( hist_node ) + 1;
        rc = string_printf ( event_name, sizeof( event_name ), NULL, "EVENT_%u", index );
        DISP_RC( rc, "enter_vdbcopy_node:string_printf(EVENT_NR) failed" );
        if ( rc == 0 )
        {
            KMDataNode *event_node;
            rc = KMDataNodeOpenNodeUpdate ( hist_node, &event_node, event_name );
            DISP_RC( rc, "enter_vdbcopy_node:KMDataNodeOpenNodeUpdate('EVENT_NR') failed" );
            if ( rc == 0 )
            {
                rc = enter_date_name_vers( event_node );
                KMDataNodeRelease ( event_node );
            }
        }
        KMDataNodeRelease ( hist_node );
    }
    return rc;
}
Ejemplo n.º 4
0
LIB_EXPORT rc_t CC TableWriterSeq_Whack(const TableWriterSeq* cself, bool commit, uint64_t* rows)
{
    rc_t rc = 0;

    if( cself != NULL ) {
        TableWriterSeq* self = (TableWriterSeq*)cself;
        VTable *vtbl;
        
        TableReader_Whack(cself->tmpkey_reader);

        if (commit && (rc = TableWriter_GetVTable(cself->base, &vtbl)) == 0 ) {
            KMetadata* meta;
            
            if ((rc = VTableOpenMetadataUpdate(vtbl, &meta)) == 0) {
                KMDataNode* node = NULL;
                
                if (cself->stats) {
                    if ((rc = KMetadataOpenNodeUpdate(meta, &node, "MATE_STATISTICS")) == 0) {
                        rc = TableWriterSeq_WriteStatistics(cself, node);
                        KMDataNodeRelease(node);
                    }
                }
                if ((rc = KMetadataOpenNodeUpdate(meta, &node, "unaligned")) == 0) {
                    KMDataNode *sub = NULL;
                    
                    KMDataNodeOpenNodeUpdate(node, &sub, "first-unaligned");
                    KMDataNodeWriteB64(sub, &self->firstUnaligned);
                    KMDataNodeRelease(sub);
                    
                    KMDataNodeOpenNodeUpdate(node, &sub, "first-half-aligned");
                    KMDataNodeWriteB64(sub, &self->firstHalfAligned);
                    KMDataNodeRelease(sub);

                    KMDataNodeRelease(node);
                }
                KMetadataRelease(meta);
            }
        }

        rc = TableWriter_Whack(cself->base, commit && (rc == 0), rows);
        KVectorRelease(cself->stats);
        free(self->qual_buf);
        free(self);
    }
    return rc;
}
Ejemplo n.º 5
0
static rc_t group_stats_write_node(KMDataNode *const node,
                                   char const name[],
                                   void const *const value)
{
    KMDataNode *subnode;
    rc_t rc = KMDataNodeOpenNodeUpdate(node, &subnode, "%s", name);
    if (rc == 0) {
        rc = KMDataNodeWriteB64(subnode, value);
        KMDataNodeRelease(subnode);
    }
    return rc;
}
Ejemplo n.º 6
0
static
rc_t sra_meta_stats_node_group_open(KMDataNode* parent, sra_meta_stats_node_group* g, bool compressed)
{
    rc_t rc = 0;
    assert(parent && g);

    if( (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_count, "SPOT_COUNT")) == 0 &&
        (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_base_count, "BASE_COUNT")) == 0 &&
        (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_bio_base_count, "BIO_BASE_COUNT")) == 0 &&
        (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_min, "SPOT_MIN")) == 0 &&
        (rc = KMDataNodeOpenNodeUpdate(parent, &g->node_spot_max, "SPOT_MAX")) == 0 ) {
        if( compressed ) { 
            rc = KMDataNodeOpenNodeUpdate(parent, &g->node_cmp_base_count, "CMP_BASE_COUNT");
        }
    }
    return rc;
}
Ejemplo n.º 7
0
static rc_t enter_schema_update( KMetadata *dst_meta, const bool show_meta )
{
    rc_t rc;
    KMDataNode *sw_node;

    if ( show_meta )
        KOutMsg( "--- entering schema-update\n" );

    rc = KMetadataOpenNodeUpdate ( dst_meta, &sw_node, "SOFTWARE" );
    DISP_RC( rc, "enter_schema_update:KMetadataOpenNodeUpdate('SOFTWARE') failed" );
    if ( rc == 0 )
    {
        KMDataNode *update_node;
        rc = KMDataNodeOpenNodeUpdate ( sw_node, &update_node, "update" );
        DISP_RC( rc, "enter_schema_update:KMDataNodeOpenNodeUpdate('update') failed" );
        if ( rc == 0 )
        {
            rc = enter_date_name_vers( update_node );
            KMDataNodeRelease ( update_node );
        }
        KMDataNodeRelease ( sw_node );
    }
    return rc;
}
Ejemplo n.º 8
0
static
rc_t MakeIndexes(const SRATable* stbl, KTable* ktbl, KMetadata* meta)
{
    rc_t rc = 0;
    int i;
    char* buffer = NULL;
    size_t buffer_sz = g_file_block_sz * 100;

    SIndexObj idx[] = {
     /*  meta, file,        format,         index,          func,    file_size, buffer_sz, minSpotId, maxSpotId */
        {NULL, "fastq",    "fastq",      "fuse-fastq",    Fastq_Idx,     0, 0, 0, 0},
        {NULL, "sff",      "SFF",        "fuse-sff",      SFF_Idx,       0, 0, 0, 0},
        {NULL, "fastq.gz", "fastq-gzip", "fuse-fastq-gz", FastqGzip_Idx, 0, 0, 0, 0},
        {NULL, "sff.gz",   "SFF-gzip",   "fuse-sff-gz",   SFFGzip_Idx,   0, 0, 0, 0}
    };

    for(i = 0; rc == 0 && i < sizeof(idx) / sizeof(idx[0]); i++) {
        KMDataNode* parent = NULL;
        if( (rc = KMetadataOpenNodeUpdate(meta, &parent, "/FUSE")) == 0 ) {
            KMDataNodeDropChild(parent, "root"); /* drop old stuff */
            if( g_ungzip || strcmp(&idx[i].file[strlen(idx[i].file) - 3], ".gz") == 0 ) {
                STSMSG(0, ("Preparing index %s", idx[i].index));
                MD5StateInit(&idx[i].md5);
                SLListInit(&idx[i].li);
                KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
                if( (rc = KMDataNodeOpenNodeUpdate(parent, &idx[i].meta, "%s.tmp", idx[i].file)) == 0 ) {
                    if( idx[i].func != NULL ) {
                        if( buffer == NULL ) {
                            if( (buffer = malloc(buffer_sz)) == NULL ) {
                                rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                                break;
                            }
                        }
                        rc = idx[i].func(stbl, &idx[i], buffer, buffer_sz);
                        if( rc == 0 ) {
                            MD5StateFinish(&idx[i].md5, idx[i].md5_digest);
                            rc = CommitIndex(ktbl, idx[i].index, &idx[i].li);
                        }
                    }
                    if( rc == 0 ) {
                        rc = WriteFileMeta(&idx[i]);
                    }
                    KMDataNodeRelease(idx[i].meta);
                }
                if( GetRCState(rc) == rcUnsupported ) {
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    PLOGERR(klogWarn, (klogWarn, rc, "Index $(i) is not supported for this table", PLOG_S(i), idx[i].index));
                    rc = 0;
                } else if( rc == 0 ) {
                    char f[4096];
                    strcpy(f, idx[i].file);
                    strcat(f, ".tmp");
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    rc = KMDataNodeRenameChild(parent, f, idx[i].file);
                }
            } else if( !g_ungzip ) {
                KTableDropIndex(ktbl, idx[i].index);
                KMDataNodeDropChild(parent, "%s", idx[i].file);
            }
            KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
            KMDataNodeRelease(parent);
        }
        SLListWhack(&idx[i].li, WhackIndexData, NULL);
    }
    free(buffer);
    return rc;
}
Ejemplo n.º 9
0
static
rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint16_t zlib_ver = ZLIB_VERNUM;
    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;
        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk ));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    FastqReaderWhack(reader);
    return rc;
}
Ejemplo n.º 10
0
static
rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }

    if( rc == 0 ) {
        size_t written = 0;
        uint32_t blk = 0;
        SIndexNode* inode = NULL;

        while( rc == 0 ) {
            rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written);
            if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) {
                inode->key_size = blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n",
                                                            inode->id_qty, inode->key, inode->key_size));
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
            }
            if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) {
                rc = 0;
                break;
            }
            if( inode == NULL ) {
                spotid_t spotid = 0;
                if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                    break;
                }
                inode = malloc(sizeof(SIndexNode));
                if( inode == NULL ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                    break;
                }
                inode->key = obj->file_size;
                inode->key_size = 0;
                inode->id = spotid;
                inode->id_qty = 0;
                DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key));
            }
            inode->id_qty++;
            obj->file_size += written;
            blk += written;
            MD5StateAppend(&obj->md5, buffer, written);
            if( g_dump ) {
                fwrite(buffer, written, 1, stderr);
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
    }
    FastqReaderWhack(reader);
    return rc;
}
Ejemplo n.º 11
0
static
rc_t SFFGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    uint16_t zlib_ver = ZLIB_VERNUM;
    const SFFReader* reader = NULL;

    if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;

        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                    if( spotid == 1 ) {
                        char hd[10240];
                        size_t hd_sz = 0;
                        if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) {
                            if( hd_sz + written > spots_buf_sz ) {
                                rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                                break;
                            }
                            memmove(&spots_buf[blk], hd, hd_sz);
                            blk += hd_sz;
                            if( g_dump ) {
                                fwrite(hd, hd_sz, 1, stderr);
                            }
                        }
                    }

                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %lu\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %lu > %lu\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %lu\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            SFFReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    SFFReaderWhack(reader);
    return rc;
}
Ejemplo n.º 12
0
static rc_t copy_metadata_child ( const KMDataNode *src_root, KMDataNode *dst_root,
                                  const char *node_path, const bool show_meta )
{
    const KMDataNode *snode;
    KMDataNode *dnode;
    KNamelist *names;

    rc_t rc = KMDataNodeOpenNodeRead ( src_root, & snode, node_path );
    DISP_RC( rc, "copy_metadata_child:KMDataNodeOpenNodeRead(src) failed" );
    if ( rc != 0 ) return rc;

    if ( show_meta )
        KOutMsg( "copy child-node: %s\n", node_path );

    rc = KMDataNodeOpenNodeUpdate ( dst_root, & dnode, node_path );
    DISP_RC( rc, "copy_metadata_child:KMDataNodeOpenNodeUpdate(dst) failed" );
    if ( rc == 0 )
    {
        rc = copy_metadata_data ( snode, dnode );
        if ( rc == 0 )
            rc = copy_metadata_attribs ( snode, dnode, node_path, show_meta );
        KMDataNodeRelease ( dnode );
    }
    else
    {
        PLOGMSG( klogInfo, ( klogInfo, 
                 "cannot open child-node(dst): $(node)", "node=%s", node_path ));
    }

    if ( rc == 0 || ( GetRCState( rc ) == rcBusy ) )
    {
        rc = KMDataNodeListChild ( snode, & names );
        DISP_RC( rc, "copy_metadata_child:KMDataNodeListChild(src) failed" );
        if ( rc == 0 )
        {
            uint32_t i, count;
            char temp_path[ 1024 ];
            size_t temp_len;

            string_copy ( temp_path, ( sizeof temp_path ) - 1, node_path, string_size( node_path ) );
            temp_len = string_size( temp_path );
            temp_path[ temp_len++ ] = '/';
            temp_path[ temp_len ] = 0;
            rc = KNamelistCount ( names, & count );
            for ( i = 0; rc == 0 && i < count; ++ i )
            {
                const char *child_name;
                rc = KNamelistGet ( names, i, & child_name );
                if ( rc == 0 )
                {
                    string_copy( temp_path + temp_len, ( sizeof temp_path ) - temp_len, child_name, string_size( child_name ) );
                    rc = copy_metadata_child ( src_root, dst_root, temp_path, show_meta );
                    temp_path[ temp_len ] = 0;
                }
            }
            KNamelistRelease ( names );
        }
    }

    KMDataNodeRelease ( snode );
    return rc;
}