Example #1
0
rc_t SRAFastqFile_Open(const KFile** cself, const SRAListNode* sra, const FileOptions* opt)
{
    rc_t rc = 0;
    SRAFastqFile* self;
    CALLOC( self, 1, sizeof( *self ) );
    if( self == NULL )
    {
        rc = RC( rcExe, rcFile, rcConstructing, rcMemory, rcExhausted );
    }
    else
    {
        if ( ( rc = KFileInit( &self->dad, (const KFile_vt*)&SRAFastqFile_vtbl, "SRAFastqFile", "no-name", true, false ) ) == 0 )
        {
            if ( ( rc = SRAListNode_TableOpen( sra, &self->stbl ) ) == 0 )
            {
                if ( ( rc = SRATableGetKTableRead( self->stbl, &self->ktbl ) ) == 0 )
                {
                    if ( ( rc = KTableOpenIndexRead( self->ktbl, &self->kidx, opt->index ) ) == 0 )
                    {
                        if ( ( rc = KLockMake( &self->lock ) ) == 0 )
                        {
                            MALLOC( self->buf, opt->buffer_sz * ( opt->f.fastq.gzip ? 2 : 1 ) );
                            if ( self->buf == NULL )
                            {
                                rc = RC( rcExe, rcFile, rcOpening, rcMemory, rcExhausted );
                            }
                            else
                            {
                                self->file_sz = opt->file_sz;
                                self->buffer_sz = opt->buffer_sz;
                                if ( opt->f.fastq.gzip )
                                {
                                    self->gzipped = &self->buf[ opt->buffer_sz ];
                                }
                                self->from = ~0; /* reset position beyond file end */
                                rc = FastqReaderMake( &self->reader, self->stbl,
                                                      opt->f.fastq.accession, opt->f.fastq.colorSpace,
                                                      opt->f.fastq.origFormat, false, opt->f.fastq.printLabel,
                                                      opt->f.fastq.printReadId, !opt->f.fastq.clipQuality, false,
                                                      opt->f.fastq.minReadLen, opt->f.fastq.qualityOffset,
                                                      opt->f.fastq.colorSpaceKey,
                                                      opt->f.fastq.minSpotId, opt->f.fastq.maxSpotId );
                            }
                        }
                    }
                }
            }
            if ( rc == 0 )
            {
                *cself = &self->dad;
            }
            else
            {
                KFileRelease( &self->dad );
            }
        }
    }
    return rc;
}
Example #2
0
static
rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint16_t zlib_ver = ZLIB_VERNUM;
    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;
        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk ));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    FastqReaderWhack(reader);
    return rc;
}
Example #3
0
static
rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }

    if( rc == 0 ) {
        size_t written = 0;
        uint32_t blk = 0;
        SIndexNode* inode = NULL;

        while( rc == 0 ) {
            rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written);
            if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) {
                inode->key_size = blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n",
                                                            inode->id_qty, inode->key, inode->key_size));
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
            }
            if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) {
                rc = 0;
                break;
            }
            if( inode == NULL ) {
                spotid_t spotid = 0;
                if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                    break;
                }
                inode = malloc(sizeof(SIndexNode));
                if( inode == NULL ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                    break;
                }
                inode->key = obj->file_size;
                inode->key_size = 0;
                inode->id = spotid;
                inode->id_qty = 0;
                DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key));
            }
            inode->id_qty++;
            obj->file_size += written;
            blk += written;
            MD5StateAppend(&obj->md5, buffer, written);
            if( g_dump ) {
                fwrite(buffer, written, 1, stderr);
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
    }
    FastqReaderWhack(reader);
    return rc;
}