Ejemplo n.º 1
0
static rc_t cigar_loop( const VCursor *cur,
                        uint32_t cigar_idx,
                        int64_t first,
                        uint64_t count,
                        uint32_t min_len )
{
    rc_t rc = 0;
    int64_t row_id, last_row = ( first + count );
    rna_splice_candidates candidates;

    for ( row_id = first; ( row_id < last_row ) && ( rc == 0 ) && ( Quitting() == 0 ); row_id++ )
    {
        const char * cigar;
        uint32_t row_len;
        rc = VCursorCellDataDirect ( cur, row_id, cigar_idx, NULL, ( const void ** )&cigar, NULL, &row_len );
        if ( rc == 0 )
        {
            candidates.count = 0;
            candidates.fwd_matched = 0;
            candidates.rev_matched = 0;

            rc = discover_rna_splicing_candidates( row_len, cigar, min_len, &candidates );
            if ( rc == 0 && candidates.count > 0 )
            {
                rc = KOutMsg( "%d rna-splice-candidates at row #%ld : %.*s\n", candidates.count, row_id, row_len, cigar );
            }
        }
    }
    return rc;
}
Ejemplo n.º 2
0
static rc_t vdi_interactive_loop( ictx * ctx )
{
    char cc[ 4 ];
    uint64_t pos = 0;
    rc_t rc = KOutMsg( "%S", &( ctx->PROMPT ) );    

    while ( rc == 0 && !ctx->done && ( 0 == Quitting() ) )
    {
        size_t num_read;
        rc = KFileRead( ctx->std_in, pos, cc, 1, &num_read );
        if ( rc != 0 )
            LOGERR ( klogErr, rc, "failed to read stdin" );
        else if ( num_read > 0 )
        {
            pos += num_read;
            switch( cc[ 0 ] )
            {
                case '\n' : rc = vdi_interactive_newline( ctx ); break;
                default   : rc = vdi_on_char( ctx, cc[ 0 ] ); break;
            }
        }
    }
    if ( rc == 0 ) rc = KOutMsg( "\n" );
    return rc;
}
Ejemplo n.º 3
0
rc_t run_sorter( const sorter_params * params )
{
    sorter sorter;
    rc_t rc = init_sorter( &sorter, params );
    if ( rc == 0 )
    {
        raw_read_rec rec;
        while ( rc == 0 && get_from_raw_read_iter( sorter.params.src, &rec, &rc ) )
        {
            rc = Quitting();
            if ( rc == 0 )
            {
                rc = write_to_sorter( &sorter, rec.seq_spot_id, rec.seq_read_id, &rec.raw_read );
                if ( rc == 0 && params->sort_progress != NULL )
                    atomic_inc( params->sort_progress );
            }
        }
        
        if ( rc == 0 )
            rc = save_store( &sorter );

        if ( rc == 0 && sorter.params.mem_limit > 0 )
            rc = final_merge_sort( params, sorter.sub_file_id );
            
        release_sorter( &sorter );
    }
    return rc;
}
Ejemplo n.º 4
0
rc_t CopierDoOne (Copier * self)
{
    rc_t rc = 0;
    const Buffer * b;

    LOGMSG (klogDebug10, "CopierDoOne");
    rc = Quitting();
    if (rc == 0)
    {
	LOGMSG (klogDebug10, "call BufferQPopBuffer");
	rc = BufferQPopBuffer (self->q, &b, NULL);
	if (rc == 0)
	{
	    size_t w;
	    size_t z;
	    LOGMSG (klogDebug10, "call BufferContentGetSize");
	    z = BufferContentGetSize (b);
	    rc = KFileWrite (self->f, self->o, b, z, &w);
	    self->o += w;
	    if (w != z)
		rc = RC (rcExe, rcFile, rcWriting, rcTransfer, rcIncomplete);
	    else
		rc = BufferRelease (b);
	}
	/* ow this is ugly! */
	/* is the rc a "exhausted" on a timeout? */
	else if ((GetRCObject(rc) == rcTimeout) && (GetRCState(rc) == rcExhausted))
	{
	    rc = 0;
	    LOGMSG (klogDebug10, "CopierDoOne timeout");
	    /* if so is the queue also sealed? */
	    if (BufferQSealed (self->q) == true)
	    {
		LOGMSG (klogDebug10, "CopierDoOne sealed");
		/* if both then we are done and so signal */
		rc = KFileRelease (self->f);
		PLOGMSG (klogDebug10, "CopierDoOne back from KFileRelease $(rc)",PLOG_U32(rc),rc);
		if (rc == 0)
		{
		    self->f = NULL;
		    rc = BufferQRelease (self->q);
		    if (rc == 0)
		    {
			self->q = NULL;
			rc = RC (rcExe, rcNoTarg, rcCopying, rcNoTarg, rcDone );
		    }
		}
	    }
	}
	else
	    LOGMSG (klogDebug10, "CopierDoOne pop failure");

    }
    else
	LOGMSG (klogDebug10, "CopierDoOne: quitting");
    return rc;
}
Ejemplo n.º 5
0
static rc_t passes_load_loop( ld_context *lctx, VCursor * cursor, Passes_src *tab,
                              uint32_t *col_idx )
{
    rc_t rc = 0;
    KLogLevel tmp_lvl;
    pass_block block;
    pl_progress *progress;
    uint64_t pos = 0;
    uint64_t total_passes = tab->AdapterHitBefore.extents[0];

    pl_progress_make( &progress, total_passes );
    rc = progress_chunk( &lctx->xml_progress, total_passes );

    tmp_lvl = KLogLevelGet();
    KLogLevelSet( klogInfo );
    PLOGMSG( klogInfo, ( klogInfo,
                         "loading passes-table ( $(rows) rows ) :",
                         "rows=%lu", total_passes ));
    KLogLevelSet( tmp_lvl );

    while( pos < total_passes && rc == 0 )
    {
        rc = pass_block_read_from_src( tab, total_passes, pos, &block );
        if ( rc == 0 )
        {
            uint32_t i;
            for ( i = 0; i < block.n_read && rc == 0; ++i )
            {
                rc = Quitting();
                if ( rc == 0 )
                {
                    rc = passes_load_pass( cursor, &block, i, col_idx );
                    if ( rc == 0 )
                    {
                        rc = progress_step( lctx->xml_progress );
                        if ( lctx->with_progress )
                            pl_progress_increment( progress, 1 );
                    }
                }
                else
                    LOGERR( klogErr, rc, "...loading passes interrupted" );
            }
            pos += block.n_read;
        }
    }

    pl_progress_destroy( progress );

    if ( rc == 0 )
    {
        rc = VCursorCommit( cursor );
        if ( rc != 0 )
            LOGERR( klogErr, rc, "cannot commit cursor on PASSES-tab" );
    }
    return rc;
}
Ejemplo n.º 6
0
static rc_t on_line( const String * line, void * data )
{
    rc_t rc = Quitting();
    if ( rc == 0 )
    {
        ictx * ctx = data;
        rc = vdi_on_newline( ctx, line );
    }
    return rc;
}
Ejemplo n.º 7
0
static rc_t perform_fastq_split_3_join( cmn_params * cp,
                                      join_stats * stats,
                                      const char * tbl_name,
                                      struct join_results * results,
                                      struct bg_progress * progress,
                                      const join_options * jo )
{
    rc_t rc;
    struct fastq_sra_iter * iter;
    fastq_iter_opt opt;
    opt . with_read_len = true;
    opt . with_name = !( jo -> rowid_as_name );
    opt . with_read_type = true;

    rc = make_fastq_sra_iter( cp, opt, tbl_name, &iter ); /* fastq-iter.c */
    if ( rc == 0 )
    {
        rc_t rc_iter;
        fastq_rec rec;
        join_options local_opt =
        {
            jo -> rowid_as_name,
            true,
            jo -> print_read_nr,
            jo -> print_name,
            jo -> terminate_on_invalid,
            jo -> min_read_len,
            jo -> filter_bases
        };
        
        while ( rc == 0 && get_from_fastq_sra_iter( iter, &rec, &rc_iter ) && rc_iter == 0 ) /* fastq-iter.c */
        {
            rc = Quitting();
            if ( rc == 0 )
            {
                stats -> spots_read++;
                stats -> reads_read += rec . num_read_len;
                
                if ( rec . num_read_len == 1 )
                    rc = print_fastq_1_read( stats, results, &rec, &local_opt, 0, 1 );
                else
                    rc = print_fastq_n_reads_split_3( stats, results, &rec, &local_opt );

                bg_progress_inc( progress ); /* progress_thread.c (ignores NULL) */
            }
        }
        if ( rc == 0 && rc_iter != 0 )
            rc = rc_iter;
        destroy_fastq_sra_iter( iter );
    }
    else
        ErrMsg( "make_fastq_iter() -> %R", rc );
    return rc;
}
Ejemplo n.º 8
0
/*
 * Copy a file from a const KFile * to a KFile * with the paths for the two
 * for logging purposes
 *
 * return rc_t = 0 for success
 * return rc_t != 0 for failure
 */
rc_t CopyFile (const KFile * src, KFile * dst, const char * source, const char * dest)
{
    rc_t rc;
    uint8_t	buff	[256 * 1024];
    size_t	num_read;
    size_t      num_writ;
    uint64_t	pos;

    for (pos = 0; ; pos += num_read)
    {
        rc = Quitting ();
        if (rc)
        {
            LOGMSG (klogFatal, "Received quit");
            break;
        }

        rc = KFileReadAll (src, pos, buff, sizeof (buff), &num_read);
        if (rc)
        {
            PLOGERR (klogErr,
                     (klogErr, rc,
                      "Failed to read from file $(F) at $(P)",
                      "F=%s,P=%lu", source, pos));
            break;
        }

        if (num_read == 0)
            break;

        rc = KFileWriteAll (dst, pos, buff, num_read, &num_writ);
        if (rc)
        {
            PLOGERR (klogErr,
                     (klogErr, rc,
                      "Failed to write to file $(F) at $(P)",
                      "F=%s,P=%lu", dest, pos));
            break;
        }

        if (num_writ != num_read)
        {
            rc = RC (rcExe, rcFile, rcWriting, rcFile, rcInsufficient);
            PLOGERR (klogErr,
                     (klogErr, rc,
                      "Failed to write all to file $(F) at $(P)",
                      "F=%s,P=%lu", dest, pos));
            break;
        }
    }
    return rc;
}
Ejemplo n.º 9
0
static rc_t vdb_fasta_loop_with_name( const p_dump_context ctx, const fastq_ctx * fctx )
{
    rc_t rc = 0;
    int64_t row_id;

    vdn_start( ctx->row_generator );
    while ( vdn_next( ctx->row_generator, (uint64_t*)&row_id ) && rc == 0 )
    {
        rc = Quitting();
        if ( rc == 0 )
        {
            uint32_t elem_bits, boff, row_len, name_len;
            const char * data;
            const char * name;

            rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
                                        (const void**)&name, &boff, &name_len );
            if ( rc != 0 )
                vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
            else
            {
                rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
                                            (const void**)&data, &boff, &row_len );
                if ( rc != 0 )
                    vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
                else
                {
                    uint32_t idx = 0;
                    int32_t to_print = row_len;

                    rc = KOutMsg( ">%s.%li %.*s length=%u\n",
                                  fctx->run_name, row_id, name_len, name, row_len );
                    if ( to_print > ctx->max_line_len )
                        to_print = ctx->max_line_len;
                    while ( rc == 0 && to_print > 0 )
                    {
                        rc = KOutMsg( "%.*s\n", to_print, &data[ idx ] );
                        if ( rc == 0 )
                        {
                            idx += ctx->max_line_len;
                            to_print = ( row_len - idx );
                            if ( to_print > ctx->max_line_len )
                                to_print = ctx->max_line_len;
                        }
                    }
                }
            }
        }
    }
    return rc;
}
Ejemplo n.º 10
0
static rc_t vdb_fastq_loop_with_name( const p_dump_context ctx, const fastq_ctx * fctx )
{
    rc_t rc = 0;
    int64_t row_id;

    vdn_start( ctx->row_generator );
    while ( vdn_next( ctx->row_generator, (uint64_t*)&row_id ) && rc == 0 )
    {
        rc = Quitting();
        if ( rc == 0 )
        {
            uint32_t elem_bits, boff, row_len, name_len;
            const char * data;
            const char * name;

            rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
                                        (const void**)&name, &boff, &name_len );
            if ( rc != 0 )
                vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
            else
            {
                rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
                                            (const void**)&data, &boff, &row_len );
                if ( rc != 0 )
                    vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
                else
                {
                    rc = KOutMsg( "@%s.%li %.*s length=%u\n%.*s\n",
                                  fctx->run_name, row_id, name_len, name, row_len, row_len, data );
                    if ( rc == 0 )
                    {
                        rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_qual, &elem_bits,
                                                    (const void**)&data, &boff, &row_len );
                        if ( rc != 0 )
                            vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), QUALITY ) failed", rc, row_id );
                        else
                            rc = KOutMsg( "+%s.%li %.*s length=%u\n%.*s\n",
                                          fctx->run_name, row_id, name_len, name, row_len, row_len, data );
                    }
                }
            }
        }
    }
    return rc;
}
Ejemplo n.º 11
0
static rc_t walk_ref_pos( walk_data * data, walk_funcs * funcs )
{
    rc_t rc;
    do
    {
        rc = ReferenceIteratorNextPos ( data->ref_iter, !data->options->no_skip );
        if ( GetRCState( rc ) != rcDone )
        {
            if ( rc != 0 )
            {
                LOGERR( klogInt, rc, "ReferenceIteratorNextPos() failed" );
            }
            else
            {
                rc = ReferenceIteratorPosition ( data->ref_iter, &data->ref_pos, &data->depth, &data->ref_base );
                if ( rc != 0 )
                {
                    LOGERR( klogInt, rc, "ReferenceIteratorPosition() failed" );
                }
                else if ( data->depth > 0 )
                {
                    bool skip = false;

                    if ( data->options->skiplist != NULL )
                        skip = skiplist_is_skip_position( data->options->skiplist, data->ref_pos + 1 );

                    if ( !skip )
                    {
                        if ( funcs->on_enter_ref_pos != NULL )
                            rc = funcs->on_enter_ref_pos( data );
                        if ( rc == 0 )
                            rc = walk_spot_group( data, funcs );
                        if ( rc == 0 && funcs->on_exit_ref_pos != NULL )
                            rc = funcs->on_exit_ref_pos( data );
                    }
                }
            }
            if ( rc == 0 ) { rc = Quitting(); }
        }
    } while ( rc == 0 );
    if ( GetRCState( rc ) == rcDone ) { rc = 0; }
    return rc;
}
Ejemplo n.º 12
0
bool CC FGroupMAP_LoadReads( BSTNode *node, void *data )
{
    FGroupMAP* n = (FGroupMAP*)node;
    FGroupMAP_LoadData* d = (FGroupMAP_LoadData*)data;

    DEBUG_MSG(5, (" started\n", FGroupKey_Validate(&n->key)));
    while( d->rc == 0 )
    {
        if( (d->rc = CGLoaderFile_GetRead(n->seq, d->db.reads)) == 0 ) {
            if( (d->db.reads->flags & (cg_eLeftHalfDnbNoMatches | cg_eLeftHalfDnbMapOverflow)) &&
                (d->db.reads->flags & (cg_eRightHalfDnbNoMatches | cg_eRightHalfDnbMapOverflow)) ) {
                d->db.mappings->map_qty = 0;
            } else {
                d->rc = CGLoaderFile_GetMapping(n->align, d->db.mappings);
            }
            /* alignment written 1st than sequence -> primary_alignment_id must be set!! */
            if( d->rc == 0 && (d->rc = CGWriterAlgn_Write(d->db.walgn, d->db.reads)) == 0 ) {
                d->rc = CGWriterSeq_Write(d->db.wseq);
            }
        }
        if( GetRCState(d->rc) == rcDone && GetRCObject(d->rc) == rcData ) {
            bool eof = false;
            d->rc = 0;
            if( n->align == NULL || ((d->rc = CGLoaderFile_IsEof(n->align, &eof)) == 0 && eof) ) {
                /* mappings file EOF detected ok */
                DEBUG_MSG(5, (" done\n", FGroupKey_Validate(&n->key)));
                break;
            } else if( d->rc == 0 ) {
                /* not EOF */
                d->rc = RC(rcExe, rcFile, rcReading, rcData, rcUnexpected);
                CGLoaderFile_LOG(n->align, klogErr, d->rc,
                    "extra mappings, possible that corresponding reads file is truncated", NULL);
            }
        }
        d->rc = d->rc ? d->rc : Quitting();
    }
    if( d->rc != 0 ) {
        CGLoaderFile_LOG(n->seq, klogErr, d->rc, NULL, NULL);
        CGLoaderFile_LOG(n->align, klogErr, d->rc, NULL, NULL);
    }
    FGroupMAP_CloseFiles(n);
    return d->rc != 0;
}
Ejemplo n.º 13
0
static rc_t SRADumper_DumpRun( const SRATable* table,
                               spotid_t minSpotId, spotid_t maxSpotId, const SRASplitterFactory* factories )
{
    rc_t rc = 0, rcr = 0;
    spotid_t spot = 0;
    make_readmask( readmask );
    const SRASplitter* root_splitter = NULL;

    rc = SRASplitterFactory_NewObj( factories, &root_splitter );

    for( spot = minSpotId; rc == 0 && spot <= maxSpotId; spot++ )
    {
        reset_readmask( readmask );
        rc = SRASplitter_AddSpot( root_splitter, spot, readmask );
        rc = rc ? rc : Quitting();
    }
    rcr = SRASplitter_Release( root_splitter );

    return rc ? rc : rcr;
}
Ejemplo n.º 14
0
static rc_t cg_dump_loop( cg_dump_opts * opts, cg_dump_ctx * cg_ctx )
{
    const num_gen_iter * iter;
    rc_t rc = num_gen_iterator_make( cg_ctx->rows, &iter );
    if ( rc != 0 )
    {
        (void)LOGERR( klogErr, rc, "cannot make num-gen-iterator" );
    }
    else
    {
        uint64_t row_id;
        rc_t rc1 = num_gen_iterator_next( iter, &row_id );
        while ( rc == 0 && rc1 == 0 )
        {
            rc = Quitting();    /* to be able to cancel the loop by signal */
            if ( rc == 0 )
            {
                rc = cg_dump_row( opts, cg_ctx, row_id ); /* <================== */
                if ( rc == 0 )
                {
                    rc1 = num_gen_iterator_next( iter, &row_id );
                    if ( rc1 == 0 )
                    {
                        if ( opts->show_progress )
                        {
                            uint32_t percent;
                            rc = num_gen_iterator_percent( iter, cg_ctx->fract_digits, &percent );
                            if ( rc == 0 )
                                update_progressbar( cg_ctx->progress, cg_ctx->fract_digits, percent );
                        }
                    }
                }
            }
        }
        if ( opts->show_progress )
            KOutMsg( "\n" );
        num_gen_iterator_destroy( iter );
    }
    return rc;
}
Ejemplo n.º 15
0
static size_t CC on_curl_data( void *ptr, size_t size, size_t nmemb, void *data )
{
    size_t given_bytes = size * nmemb; /* calculate the size given in ptr */
    p_data_ctx dctx = ( p_data_ctx )data;
    if ( dctx != NULL )
    {
        size_t idx;

        /* to be able to cancel the loop by signal */
        dctx->rc = Quitting();
        for ( idx = 0; idx < given_bytes && dctx->rc == 0; ++idx )
        {
            handle_curl_byte( ((char *)ptr)[idx], dctx );
        }

        /* tell curl via return value to stop downloading,
           if we encountered an error or quit */
        if ( dctx->rc != 0 )
            given_bytes = 0;
    }
    return given_bytes;
}
Ejemplo n.º 16
0
static
rc_t SFFGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    uint16_t zlib_ver = ZLIB_VERNUM;
    const SFFReader* reader = NULL;

    if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;

        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                    if( spotid == 1 ) {
                        char hd[10240];
                        size_t hd_sz = 0;
                        if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) {
                            if( hd_sz + written > spots_buf_sz ) {
                                rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                                break;
                            }
                            memmove(&spots_buf[blk], hd, hd_sz);
                            blk += hd_sz;
                            if( g_dump ) {
                                fwrite(hd, hd_sz, 1, stderr);
                            }
                        }
                    }

                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %lu\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %lu > %lu\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %lu\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            SFFReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    SFFReaderWhack(reader);
    return rc;
}
Ejemplo n.º 17
0
static rc_t ref_walker_walk_ref_range( struct ref_walker * self, ref_walker_data * rwd )
{
    ReferenceIterator * ref_iter;
    rc_t rc = AlignMgrMakeReferenceIterator ( self->amgr, &ref_iter, &self->cb_block, self->min_mapq ); /* align/iterator.h */
    if ( rc == 0 )
    {
        /* construct the reference iterator */

        uint32_t idx, count;
        uint32_t reflist_options = ref_walker_make_reflist_options( self ); /* above */
        Vector cur_id_vector;
        VectorInit ( &cur_id_vector, 0, 12 );

        rc = VNameListCount ( self->sources, &count );
        for ( idx = 0; idx < count && rc == 0; ++idx )
        {
            const char * src_name = NULL;
            rc = VNameListGet ( self->sources, idx, &src_name );
            if ( rc == 0 && src_name != NULL )
            {
                const VDatabase *db;
                rc = VDBManagerOpenDBRead ( self->vmgr, &db, self->vschema, "%s", src_name );
                if ( rc == 0 )
                {
                    const ReferenceList * ref_list;
                    rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 );
                    if ( rc == 0 )
                    {
                        const ReferenceObj * ref_obj;
                        rc = ReferenceList_Find( ref_list, &ref_obj, rwd->ref_name, string_size( rwd->ref_name ) );
                        if ( rc == 0 )
                        {
                            INSDC_coord_len len;
                            rc = ReferenceObj_SeqLength( ref_obj, &len );
                            if ( rc == 0 )
                            {
                                if ( rwd->ref_start == 0 )
                                    rwd->ref_start = 1;
                                if ( ( rwd->ref_end == 0 )||( rwd->ref_end > len + 1 ) )
                                    rwd->ref_end = ( len - rwd->ref_start ) + 1;

                                if ( self->primary_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_PRIM, primary_align_ids );

                                if ( rc == 0 && self->secondary_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_SEC, secondary_align_ids );

                                if ( rc == 0 && self->evidence_alignments )
                                    rc = ref_walker_add_iterator( self, rwd->ref_name, rwd->ref_start, rwd->ref_end, src_name, 
                                            &cur_id_vector, db, ref_obj, ref_iter, TBL_EV, evidence_align_ids );

                            }
                            ReferenceObj_Release( ref_obj );
                        }
                        ReferenceList_Release( ref_list );
                    }
                    VDatabaseRelease( db );
                }
            }
        }

        if ( rc == 0 )
        {
            /* walk the reference iterator */
            struct ReferenceObj const * ref_obj;
            rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, &ref_obj );
            if ( rc == 0 && ref_obj != NULL )
            {
                if ( self->use_seq_name )
                    rc = ReferenceObj_Name( ref_obj, &rwd->ref_name );
                else
                    rc = ReferenceObj_SeqId( ref_obj, &rwd->ref_name );
                if ( rc == 0 )
                {
                    INSDC_coord_zero first_pos;
                    INSDC_coord_len len;
                    rc_t rc_w = ReferenceIteratorNextWindow ( ref_iter, &first_pos, &len );
                    while ( rc == 0 && rc_w == 0 )
                    {
                        rc_t rc_p = ReferenceIteratorNextPos ( ref_iter, !self->no_skip );
                        if ( rc_p == 0 )
                        {
                            rc = ReferenceIteratorPosition ( ref_iter, &rwd->pos, &rwd->depth, &rwd->bin_ref_base );
                            if ( rwd->depth > 0 && rc == 0 )
                            {
                                rc_t rc_sg = 0;
                                rwd->ascii_ref_base = _4na_to_ascii( rwd->bin_ref_base, false );
                                if ( self->on_enter_ref_pos != NULL )
                                    rc = self->on_enter_ref_pos( rwd );

                                while ( rc_sg == 0 && rc == 0 )
                                {
                                    rc_sg = ReferenceIteratorNextSpotGroup ( ref_iter, &rwd->spot_group, &rwd->spot_group_len );
                                    if ( rc_sg == 0 )
                                    {
                                        rc_t rc_pr = 0;
                                        if ( self->on_enter_spot_group != NULL )
                                            rc = self->on_enter_spot_group( rwd );

                                        while ( rc == 0 && rc_pr == 0 )
                                        {
                                            const PlacementRecord * rec;
                                            rc_pr = ReferenceIteratorNextPlacement ( ref_iter, &rec );
                                            if ( rc_pr == 0 && self->on_alignment != NULL )
                                                rc = ref_walker_walk_alignment( self, ref_iter, rec, rwd );
                                        }

                                        if ( self->on_exit_spot_group != NULL )
                                            rc = self->on_exit_spot_group( rwd );
                                    }
                                }
                                if ( self->on_exit_ref_pos != NULL )
                                    rc = self->on_exit_ref_pos( rwd );
                            }
                            rc = Quitting();
                        }
                    }
                }
            }
        }

        /* free cur_id_vector */

        ReferenceIteratorRelease ( ref_iter );
    }
    return rc;
}
Ejemplo n.º 18
0
bool CC FGroupMAP_LoadEvidence( BSTNode *node, void *data )
{
    FGroupMAP* n = (FGroupMAP*)node;
    FGroupMAP_LoadData* d = (FGroupMAP_LoadData*)data;

    DEBUG_MSG(5, ("' started\n", FGroupKey_Validate(&n->key)));
     while( d->rc == 0 ) {
        if( (d->rc = CGLoaderFile_GetEvidenceIntervals(n->seq, d->db.ev_int)) == 0 ) {
            int64_t evint_rowid;
            if( n->align != NULL ) {
                d->rc = CGLoaderFile_GetEvidenceDnbs(n->align, d->db.ev_int->interval_id, d->db.ev_dnb);
            } else {
		d->db.ev_dnb->qty = 0; /***weird, but the easiest place to fix ***/
	    }
            /* interval written 1st than dnbs which uses interval as reference */
            if( d->rc == 0 ) {
                d->rc = CGWriterEvdInt_Write(d->db.wev_int, d->db.ev_dnb, &evint_rowid);
            }
            if( d->rc == 0 && n->align != NULL ) {
                /* attach dnbs to reads */
                uint16_t i;
                FGroupMAP_FindData found;

                found.key.type = cg_eFileType_READS;
                d->rc = CGLoaderFile_GetAssemblyId(n->align, &found.key.assembly_id);
                for(i = 0; d->rc == 0 && i < d->db.ev_dnb->qty; i++) {
                    found.key.u.map.slide = d->db.ev_dnb->dnbs[i].slide;
                    found.key.u.map.lane = d->db.ev_dnb->dnbs[i].lane;
                    found.key.u.map.batch_file_number = &d->db.ev_dnb->dnbs[i].file_num_in_lane;
                    if( BSTreeDoUntil(d->reads, false, FGroupMAP_FindRowId, &found) ) {
                        d->rc = CGWriterEvdDnbs_SetSEQ(d->db.wev_dnb, i, found.rowid);
                    } else {
                        d->rc = RC(rcExe, rcFile, rcWriting, rcData, rcInconsistent);
                    }
                }
            }
            if( d->rc == 0 && n->align != NULL ) {
                d->rc = CGWriterEvdDnbs_Write(d->db.wev_dnb, d->db.ev_int, evint_rowid);
            }
        }
        if( GetRCState(d->rc) == rcDone && GetRCObject(d->rc) == rcData ) {
            bool eof = false;
            d->rc = 0;
            if( n->align == NULL || ((d->rc = CGLoaderFile_IsEof(n->align, &eof)) == 0 && eof) ) {
                /* dnbs file EOF detected ok */
                DEBUG_MSG(5, ("' done\n", FGroupKey_Validate(&n->key)));
                break;
            } else if( d->rc == 0 ) {
                /* not EOF */
                d->rc = RC(rcExe, rcFile, rcReading, rcData, rcUnexpected);
                CGLoaderFile_LOG(n->align, klogErr, d->rc,
                    "extra dnbs, possible that corresponding intervals file is truncated", NULL);
            }
        }
        d->rc = d->rc ? d->rc : Quitting();
    }
    if( d->rc != 0 ) {
        CGLoaderFile_LOG(n->seq, klogErr, d->rc, NULL, NULL);
        CGLoaderFile_LOG(n->align, klogErr, d->rc, NULL, NULL);
    }
    FGroupMAP_CloseFiles(n);
    return d->rc != 0;
}
Ejemplo n.º 19
0
rc_t run (const char * table_path, uint64_t N )
{
    static const char *colInf[] = {
        "C1: Same value, same length",
        "C2: Var. value, same length",
        "C3: Var. value, var. legnth",
        "C4: Same value except I row",
        "C5: Same value except L row" };
    rc_t rc;
    uint64_t row = 0;
    VDBManager *mgr = NULL;
    VSchema *schema = NULL;
    VTable *table = NULL;
    VCursor *cursor;
    uint32_t idx[COLUMNS];
    uint64_t total[COLUMNS];
    int i = 0, j = 0, prev = 0;
    char *buffer[BUFFERS];
    



    /* Initialize arrays */
    memset(&idx, 0, sizeof idx);
    memset(&total, 0, sizeof total);
    for (i = 0; i < BUFFERS; ++i) {
        char c;
        size_t sz = ROWLEN + 1;
        if (i == (BUFFERS - 1))
            sz += ROWLEN;
        buffer[i] = malloc(sz);
        for (j = 0, c = 0; j < sz - 1; ++j, ++c) {
            if (c >= ROWLEN)
                c -= ROWLEN;
            buffer[i][j] = '0' + c;
        }
        buffer[i][j] = '\0';
    }
    /* Create manager */
    rc = VDBManagerMakeUpdate(&mgr, NULL);
    if (rc != 0) {
        LOGERR(klogInt, rc, "failed to open vdb library");
    }
    /* Initialize schema */
    if (rc == 0) {
        rc = VDBManagerMakeSchema(mgr, &schema);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to create empty schema");
    }
    if (rc == 0) {
        char text[512] = "table Table #1 {\n";
        char col [128];
        for (i = 1; i <=  COLUMNS; ++i) {
            sprintf(col,
                "  column ascii C%d = .C%d; physical ascii .C%d = C%d;\n",
                i, i, i, i);
            strcat(text, col);
        }
        strcat(text, "};");
        STSMSG(1,("Parsing schema:\n%s", text));
        rc = VSchemaParseText(schema, "Schema", text, strlen(text));
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to parse schema");
    }
    /* Create table */
    if (rc == 0) {
        STSMSG(1,("Creating %s", tablePath));
        rc = VDBManagerCreateTable(mgr, &table, schema, "Table", kcmInit,
            tablePath);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to create table");
    }
    /* Initialize cursor */
    if (rc == 0) {
        rc = VTableCreateCursorWrite(table, &cursor, kcmInsert);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to create cursor");
    }
    for (i = 0; rc == 0 && i < COLUMNS; ++i) {
        char col[3];
        sprintf(col, "C%d", i + 1);
        STSMSG(2,("Adding column %s to cursor", col));
        rc = VCursorAddColumn(cursor, &idx[i], col);
        if (rc != 0)
            PLOGERR(klogInt, (klogInt, rc,
                              "failed to add $(c) to cursor", "c=%s", col));
    }
    if (rc == 0) {
        rc = VCursorOpen(cursor);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to open cursor");
    }
    /* Write data */
    for (row = 0; row < N && rc == 0; ++row) {
        int max = 2 * ROWLEN - 1;
        int sz = 0;
        if ((row % 2) == 0) {
            int min = 1;
            sz = min + (int) (max * (rand() / (RAND_MAX + 1.0)));
            prev = sz;
            buffer[1][0] = '1';
        }
        else {
            sz = max + 1 - prev;
            buffer[1][0] = '2';
        }
        rc = Quitting();
        if (rc == 0) {
            KStsLevel lvl = 2;
            if (row > 0 && ((row % ROWS) == 0)) {
                lvl = 1;
            }
            STSMSG (lvl, ("Writing row %ji / %ji",
                          row + 1, N));
            rc = VCursorOpenRow(cursor);
            if (rc != 0)
                LOGERR(klogInt, rc, "failed to open row");
        }
        for (j = 0; j < COLUMNS && rc == 0; ++j) {
            uint32_t count = 0;
            int buf = j;
            switch (j) {
                case 0:
                case 1:
                    count = strlen(buffer[j]);
                    break;
                case 2:
                    count = sz;
                    break;
                case 3:
                    buf = 0;
                    if (row == 0)
                        buf = 1;
                    count = strlen(buffer[buf]);
                    break;
                case 4:
                    buf = 0;
                    if (row == (N - 1))
                        buf = 1;
                    count = strlen(buffer[buf]);
                    break;
                default:
                    assert(0);
                    break;
            }
            STSMSG (3, ("Row %ji/Col.%d: %sd %.*s\n",
                        row + 1, j + 1, count, count, buffer[buf]));
            rc = VCursorWrite
                (cursor, idx[j], 8, buffer[buf], 0, count);
            if (rc != 0)
                PLOGERR(klogInt, (klogInt, rc, "failed to write row[$i]", "i=%d", j + 1));
            total[j] += count;
        }
        if (rc == 0) {
            rc = VCursorCommitRow(cursor);
            if (rc != 0)
                LOGERR(klogInt, rc, "failed to commit row");
        }
        if (rc == 0) {
            rc = VCursorCloseRow(cursor);
            if (rc != 0)
                LOGERR(klogInt, rc, "failed to close row");
        }
    }
    if (rc == 0) {
        STSMSG (1, ("Commiting cursor\n"));
        rc = VCursorCommit(cursor);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to commit cursor");
    }
    /* Cleanup */
    VCursorRelease(cursor);
    VTableRelease(table);
    VSchemaRelease(schema);
    VDBManagerRelease(mgr);
    for (i = 0; i < BUFFERS; ++i) {
        free(buffer[i]);
    }
    /* Log */
    if (rc == 0) {
        PLOGMSG(klogInfo, (klogInfo, "$(t)", "t=%s", tablePath));
        PLOGMSG(klogInfo,(klogInfo, 
            "$(n)($(N)) rows written - $(b) bytes per row",
                          PLOG_I64(n) "," PLOG_X64(N) ",b=%d", N, N, ROWLEN));
        for (i = 0; i < COLUMNS; ++i) {
            PLOGMSG(klogInfo,(klogInfo, 
                              "$(i): $(n)($(N)) bytes",
                              "i=%s," PLOG_I64(n) "," PLOG_X64(N),
                              colInf[i], total[i], total[i]));
        }
    }
    if (rc == 0) {
        KDirectory *dir = NULL;
        uint64_t sizes[COLUMNS];
        memset(&sizes, 0, sizeof sizes);
        rc = KDirectoryNativeDir(&dir);
        if (rc != 0)
            LOGERR(klogInt, rc, "failed to KDirectoryNativeDir");
        else {
            for (i = 1; i <= COLUMNS; ++i) {
                uint64_t size = 0;
#define  FORMAT    "%s/col/%s/data"
#define KFORMAT "$(d)/col/$(n)/data", "d=%s,n=%s"
#define  STATUS(action) (action FORMAT, tablePath, name)
                char name[3];

                sprintf(name, "C%d", i);
                STSMSG (1, STATUS("checking "));
                rc = KDirectoryFileSize(dir, &size, FORMAT, tablePath, name);
                if (rc != 0) {
                    if (GetRCState(rc) == rcNotFound) {
                        STSMSG (2, STATUS("not found "));
                        rc = 0;
                    }
                    else
                        PLOGERR(klogInt, (klogInt, rc,
                                          "failed to check " KFORMAT, tablePath, name));
                }
                else {
                    STSMSG (2, STATUS("found "));
                }
                PLOGMSG(klogInfo, (klogInfo, "Size of $(d)/col/$(n)/data = $(s)",
                                   "d=%s,n=%s," PLOG_I64(s), tablePath, name, size));
                sizes[i - 1] = size;
            }
        }
        KDirectoryRelease(dir);
        if (rc == 0) {
            puts("");
            KOutMsg("%ld rows, %d bytes per row:\n", N, ROWLEN);
            for (i = 0; i < COLUMNS; ++i) {
                puts(colInf[i]);
            }
            puts("");
            for (i = 0; i < COLUMNS; ++i) {
                int64_t over = sizes[i] - total[i];
                KOutMsg("C%d: %9ld bytes written; "
                    "%9ld in 'data'", i + 1, total[i], sizes[i]);
                if (over > 0) {
                    double p = 100.0 * over / sizes[i];
                    printf(": %7ld extra bytes (%.4f%%)\n", over, p);
                }
                else {
                    puts("");
                }
            }
        }
    }

    return rc;
}
Ejemplo n.º 20
0
static rc_t Work(Db* db, SpotIterator* it)
{
    rc_t rc = 0;
    bool toRedact = false;
    int64_t row_id = 0;
    spotid_t nSpots = 0;
    spotid_t redactedSpots = 0;

    uint8_t filter[64];
    memset(filter, SRA_READ_FILTER_REDACTED, sizeof filter);

    assert(it);

    while (rc == 0 && SpotIteratorNext(it, &rc, &row_id, &toRedact)) {
        uint8_t nreads = 0;
        char bufferIn[64];
        void* buffer = NULL;
        uint32_t row_len = 0;

        rc = Quitting();

        ++nSpots;

        if (rc == 0) {
            uint32_t elem_bits = 8;
            rc = VCursorReadDirect(db->rCursor, row_id, db->rFilterIdx,
                elem_bits, bufferIn, sizeof bufferIn, &row_len);
            DISP_RC(rc, "while reading READ_FILTER");
	    nreads = row_len;
        }
        if (toRedact) {
            buffer = filter;
            ++redactedSpots;
            DBGMSG(DBG_APP,DBG_COND_1,
                ("Redacting spot %d: %d reads\n",row_id,nreads));
        }
        else {
            buffer = bufferIn;
        }
        if (rc == 0) {
            rc = VCursorOpenRow(db->wCursor);
            DISP_RC(rc, "while opening row to write");
            if (rc == 0) {
                rc = VCursorWrite
                    (db->wCursor, db->wIdx, 8 * nreads, buffer, 0, 1);
                DISP_RC(rc, "while writing READ_FILTER");
            }
            if (rc == 0) {
                rc = VCursorCommitRow(db->wCursor);
                DISP_RC(rc, "while committing row");
            }
            if (rc == 0) {
                rc = VCursorCloseRow(db->wCursor);
                DISP_RC(rc, "while closing row");
            }
        }
    }

    db->nSpots = nSpots;
    db->redactedSpots = redactedSpots;

    if (rc == 0) {
        rc = VCursorCommit(db->wCursor);
        DISP_RC(rc, "while committing cursor");
    }

    return rc;
}
Ejemplo n.º 21
0
static rc_t read_loop( statistic * data,
                       context *ctx,
                       statistic_reader *reader,
                       const VCursor *my_cursor )
{
    int64_t first;
    uint64_t count;
    rc_t rc = query_reader_rowrange( reader, &first, &count );
    if ( rc != 0 )
        LogErr( klogInt, rc, "query_statistic_rowrange() failed\n" );
    else
    {
        if ( num_gen_empty( ctx->row_generator ) )
        {
            rc = num_gen_add( ctx->row_generator, first, count );
            if ( rc != 0 )
                LogErr( klogInt, rc, "num_gen_add() failed() failed\n" );
        }
        else
        {
            rc = num_gen_trim( ctx->row_generator, first, count );
            if ( rc != 0 )
                LogErr( klogInt, rc, "num_gen_trim() failed() failed\n" );
        }

        if ( rc == 0 )
        {
            const num_gen_iter *iter;
            rc = num_gen_iterator_make( ctx->row_generator, &iter );
            if ( rc != 0 )
                LogErr( klogInt, rc, "num_gen_iterator_make() failed\n" );
            else
            {
                uint64_t row_id;
                progressbar * progress;

                rc = make_progressbar( &progress );
                if ( rc != 0 )
                    LogErr( klogInt, rc, "make_progressbar() failed\n" );
                else
                {
                    uint8_t fract_digits = calc_fract_digits( iter );
                    uint32_t percent;
                    row_input row_data;

                    while ( ( num_gen_iterator_next( iter, &row_id ) == 0 )&&
                            ( rc == 0 ) )
                    {
                        rc = Quitting();
                        if ( rc == 0 )
                        {
                            /* ******************************************** */
                            rc = reader_get_data( reader, &row_data, row_id );
                            if ( rc == 0 )
                            {
                                rc = extract_statistic_from_row( data, &row_data, row_id );
                            }
                            /* ******************************************** */
                            if ( ctx->show_progress )
                                if ( num_gen_iterator_percent( iter, fract_digits, &percent ) == 0 )
                                    update_progressbar( progress, fract_digits, percent );
                        }
                    }
                    destroy_progressbar( progress );
                    if ( ctx->show_progress )
                        OUTMSG(( "\n" ));
                }
                num_gen_iterator_destroy( iter );
            }
        }
    }
    return rc;
}
Ejemplo n.º 22
0
static
rc_t FastqGzip_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint16_t zlib_ver = ZLIB_VERNUM;
    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0, spots_per_block = 0, proj_id_qty = 0;
        SIndexNode* inode = NULL;
        size_t z_blk = 0;
        size_t spots_buf_sz = g_file_block_sz * 100;
        size_t zbuf_sz = spots_buf_sz + 100;
        char* zbuf = malloc(zbuf_sz);
        char* spots_buf = malloc(spots_buf_sz);
        bool eof = false;

        if( zbuf == NULL || spots_buf == NULL ) {
            rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
        }
        while( rc == 0 ) {
            if( (rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written)) == 0 ) {
                if( inode == NULL ) {
                    spotid_t spotid = 0;
                    if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                        break;
                    }
                    inode = malloc(sizeof(SIndexNode));
                    if( inode == NULL ) {
                        rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                        break;
                    }
                    inode->key = obj->file_size;
                    inode->key_size = 0;
                    inode->id = spotid;
                    inode->id_qty = 0;
                    DEBUG_MSG(5, ("%s open key: spot %ld, offset %lu\n", obj->index, inode->id, inode->key));
                }
                if( blk + written > spots_buf_sz ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcInsufficient);
                    break;
                }
                inode->id_qty++;
                memmove(&spots_buf[blk], buffer, written);
                blk += written;
                if( g_dump ) {
                    fwrite(buffer, written, 1, stderr);
                }
            }
            if( (eof = (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted)) ) {
                rc = 0;
                if( inode == NULL ) {
                    break;
                }
            }
            if( rc == 0 && (eof || 
                            (proj_id_qty == 0 && inode->id_qty > (spots_per_block * 0.95)) || 
                            (proj_id_qty > 0 && inode->id_qty >= proj_id_qty) ) ) {
                rc = ZLib_DeflateBlock(spots_buf, blk, zbuf, zbuf_sz, &z_blk);
                if( z_blk < g_file_block_sz ) {
                    /* project needed id_qty */
                    proj_id_qty = g_file_block_sz * inode->id_qty / z_blk * 1.05;
                    DEBUG_MSG(5, ("%s: project id qty %u\n", obj->index, proj_id_qty));
                } else {
                    DEBUG_MSG(10, ("%s: no projection %u > %u\n", obj->index, z_blk, g_file_block_sz));
                }
            }
            if( rc == 0 && (eof || z_blk >= g_file_block_sz) ) {
                obj->file_size += z_blk;
                MD5StateAppend(&obj->md5, zbuf, z_blk);
                inode->key_size = z_blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("%s close key: spots %lu, size %lu, ratio %hu%%, raw %u\n",
                         obj->index, inode->id_qty, inode->key_size, (uint16_t)(((float)(blk - z_blk)/blk)*100), blk ));
                spots_per_block = inode->id_qty;
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
                z_blk = 0;
                proj_id_qty = 0;
            }
            if( eof ) {
                break;
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
        free(zbuf);
        free(spots_buf);
    }
    if( rc == 0 ) {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "ZlibVersion")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &zlib_ver);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }
    FastqReaderWhack(reader);
    return rc;
}
Ejemplo n.º 23
0
static
rc_t SFF_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const SFFReader* reader = NULL;

    if( (rc = SFFReaderMake(&reader, sratbl, g_accession, obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        size_t written = 0;
        uint32_t blk = 0;
        SIndexNode* inode = NULL;

        while( rc == 0 ) {
            rc = SFFReader_GetNextSpotData(reader, buffer, buffer_sz, &written);
            if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) {
                inode->key_size = blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("SFF index closed spots %lu, offset %lu, block size %lu\n", inode->id_qty, inode->key, inode->key_size));
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
            }
            if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) {
                rc = 0;
                break;
            }
            if( inode == NULL ) {
                spotid_t spotid = 0;
                if( (rc = SFFReaderCurrentSpot(reader, &spotid)) != 0 ) {
                    break;
                }
                inode = malloc(sizeof(SIndexNode));
                if( inode == NULL ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                    break;
                }
                inode->key = obj->file_size;
                inode->key_size = 0;
                inode->id = spotid;
                inode->id_qty = 0;
                DEBUG_MSG(5, ("SFF index opened spot %ld, offset %lu\n", inode->id, inode->key));
                if( spotid == 1 ) {
                    char hd[10240];
                    size_t hd_sz = 0;
                    if( (rc = SFFReaderHeader(reader, 0, hd, sizeof(hd), &hd_sz)) == 0 ) {
                        obj->file_size += hd_sz;
                        blk += hd_sz;
                        MD5StateAppend(&obj->md5, hd, hd_sz);
                        if( g_dump ) {
                            fwrite(hd, hd_sz, 1, stderr);
                        }
                    }
                }
            }
            obj->file_size += written;
            blk += written;
            inode->id_qty++;
            MD5StateAppend(&obj->md5, buffer, written);
            if( g_dump ) {
                fwrite(buffer, written, 1, stderr);
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            SFFReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
    }
    SFFReaderWhack(reader);
    return rc;
}
Ejemplo n.º 24
0
static
rc_t Fastq_Idx(const SRATable* sratbl, SIndexObj* obj, char* buffer, const size_t buffer_sz)
{
    rc_t rc = 0;
    const FastqReader* reader = NULL;

    uint8_t colorSpace = false;
    char* colorSpaceKey = "\0";
    uint8_t origFormat = false;
    uint8_t printLabel = true;
    uint8_t printReadId = true;
    uint8_t clipQuality = true;
    uint32_t minReadLen = 0;
    uint16_t qualityOffset = 0;

    {{
        const SRAColumn* c = NULL;
        const uint8_t *platform = SRA_PLATFORM_UNDEFINED;
        bitsz_t o, z;

        if( (rc = SRATableOpenColumnRead(sratbl, &c, "PLATFORM", sra_platform_id_t)) != 0 ) {
            return rc;
        }
        if( (rc = SRAColumnRead(c, 1, (const void **)&platform, &o, &z)) != 0 ) {
            return rc;
        }
        if( *platform == SRA_PLATFORM_ABSOLID ) {
            colorSpace = true;
        }
        SRAColumnRelease(c);
    }}

    if( (rc = FastqReaderMake(&reader, sratbl, g_accession,
                        colorSpace, origFormat, false, printLabel, printReadId,
                        !clipQuality, minReadLen, qualityOffset, colorSpaceKey[0],
                        obj->minSpotId, obj->maxSpotId)) != 0 ) {
        return rc;
    } else {
        KMDataNode* opt = NULL, *nd = NULL;

        if( (rc = KMDataNodeOpenNodeUpdate(obj->meta, &opt, "Format/Options")) != 0 ) {
            return rc;
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpace")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &colorSpace);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "colorSpaceKey")) == 0 ) {
            rc = KMDataNodeWrite(nd, colorSpaceKey, 1);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "origFormat")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &origFormat);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printLabel")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printLabel);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "printReadId")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &printReadId);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "clipQuality")) == 0 ) {
            rc = KMDataNodeWriteB8(nd, &clipQuality);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "minReadLen")) == 0 ) {
            rc = KMDataNodeWriteB32(nd, &minReadLen);
            KMDataNodeRelease(nd);
        }
        if( rc == 0 && (rc = KMDataNodeOpenNodeUpdate(opt, &nd, "qualityOffset")) == 0 ) {
            rc = KMDataNodeWriteB16(nd, &qualityOffset);
            KMDataNodeRelease(nd);
        }
        KMDataNodeRelease(opt);
    }

    if( rc == 0 ) {
        size_t written = 0;
        uint32_t blk = 0;
        SIndexNode* inode = NULL;

        while( rc == 0 ) {
            rc = FastqReader_GetNextSpotSplitData(reader, buffer, buffer_sz, &written);
            if( blk >= g_file_block_sz || (GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted) ) {
                inode->key_size = blk;
                SLListPushTail(&obj->li, &inode->n);
                DEBUG_MSG(5, ("Fastq index closed spots %lu, offset %lu, block size %lu\n",
                                                            inode->id_qty, inode->key, inode->key_size));
                inode = NULL;
                if( blk > obj->buffer_sz ) {
                    obj->buffer_sz = blk;
                }
                blk = 0;
            }
            if( GetRCObject(rc) == rcRow && GetRCState(rc) == rcExhausted ) {
                rc = 0;
                break;
            }
            if( inode == NULL ) {
                spotid_t spotid = 0;
                if( (rc = FastqReaderCurrentSpot(reader, &spotid)) != 0 ) {
                    break;
                }
                inode = malloc(sizeof(SIndexNode));
                if( inode == NULL ) {
                    rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                    break;
                }
                inode->key = obj->file_size;
                inode->key_size = 0;
                inode->id = spotid;
                inode->id_qty = 0;
                DEBUG_MSG(5, ("Fastq index opened spot %ld, offset %lu\n", inode->id, inode->key));
            }
            inode->id_qty++;
            obj->file_size += written;
            blk += written;
            MD5StateAppend(&obj->md5, buffer, written);
            if( g_dump ) {
                fwrite(buffer, written, 1, stderr);
            }
        }
        rc = rc ? rc : Quitting();
        if( rc != 0 ) {
            spotid_t spot = 0;
            FastqReaderCurrentSpot(reader, &spot);
            PLOGERR(klogErr, (klogErr, rc, "spot $(s)", PLOG_U32(s), spot));
        }
    }
    FastqReaderWhack(reader);
    return rc;
}
Ejemplo n.º 25
0
static
rc_t copy_file (const char * src, const char * dst, const KFile * fin, KFile *fout)
{
    rc_t rc;
    uint8_t	buff	[64 * 1024];
    size_t	num_read;
    uint64_t	inpos;
    uint64_t	outpos;

    assert (src);
    assert (dst);
    assert (fin);
    assert (fout);

    inpos = 0;
    outpos = 0;

#if 1
    for (inpos = 0; ; inpos += num_read)
    {
        rc = Quitting ();
        if (rc)
        {
            LOGMSG (klogFatal, "Received quit");
            break;
        }
        else
        {
            rc = KFileReadAll (fin, inpos, buff, sizeof (buff), &num_read);
            if (rc)
            {
                PLOGERR (klogErr,
                         (klogErr, rc,
                          "Failed to read from file $(F) at $(P)",
                          "F=%s,P=%lu", src, inpos));
                break;
            }
            else if (num_read)
            {
                size_t num_writ;

                rc = KFileWriteAll (fout, inpos, buff, num_read, &num_writ);
                if (rc)
                {
                    PLOGERR (klogErr,
                             (klogErr, rc,
                              "Failed to write to file $(F) at $(P)",
                              "F=%s,P=%lu", dst, outpos));
                    break;
                }
                else if (num_writ != num_read)
                {
                    rc = RC (rcExe, rcFile, rcWriting, rcFile, rcInsufficient);
                    PLOGERR (klogErr,
                             (klogErr, rc,
                              "Failed to write all to file $(F) at $(P)",
                              "F=%s,P=%lu", dst, outpos));
                    break;
                }
            }
            else 
                break;
        }
    }
#else
    do
    {
        rc = Quitting ();
        if (rc)
        {
            LOGMSG (klogFatal, "Received quit");
            break;
        }
        rc = KFileRead (fin, inpos, buff, sizeof (buff), &num_read);
        if (rc)
        {
            PLOGERR (klogErr,
                     (klogErr, rc,
                      "Failed to read from file $(F) at $(P)",
                      "F=%s,P=%lu", src, inpos));
            break;
        }
        else if (num_read > 0)
        {
            size_t to_write;

            inpos += (uint64_t)num_read;

            STSMSG (2, ("Read %zu bytes to %lu", num_read, inpos));

            to_write = num_read;
            while (to_write > 0)
            {
                size_t num_writ;
                rc = KFileWrite (fout, outpos, buff, num_read, &num_writ);
                if (rc)
                {
                    PLOGERR (klogErr,
                             (klogErr, rc,
                              "Failed to write to file $(F) at $(P)",
                              "F=%s,P=%lu", dst, outpos));
                    break;
                }
                outpos += num_writ;
                to_write -= num_writ;
            }
        }
        if (rc)
            break;
    } while (num_read != 0);
#endif
    return rc;
}