Exemple #1
0
static
rc_t CC sra_meta_stats_update(sra_meta_stats_data* self,
    const int64_t spot_id, const uint32_t spot_len,
    const uint32_t bio_spot_len, const uint32_t cmp_spot_len,
    bool has_grp, const char* grp, uint64_t grp_len)
{
    rc_t rc = 0;
    const uint32_t max_grp_qty = 10000;

    assert(self != NULL);

    rc = sra_meta_stats_node_group_update(&self->table, spot_id, spot_len, bio_spot_len, cmp_spot_len);
    if( has_grp && self->grp_qty <= max_grp_qty && rc == 0 )
    {
        /* an empty group is considered default */
        if( grp_len == 0 || grp == NULL || grp[0] == '\0' ||
            (grp_len == 7 && strncasecmp("default", grp, grp_len) == 0 ) )
        {
            rc = sra_meta_stats_node_group_update(&self->dflt_grp, spot_id, spot_len, bio_spot_len, cmp_spot_len);
        }
        else
        {
            size_t i;
            KMDataNode* n;
            const KMDataNode *cn;
            bool new_group, unsafe;

            /* look for cached node */
            if ( self->last_grp_name != NULL &&
                 self->last_grp_name_len == grp_len &&
                 strncmp(self->last_grp_name, grp, grp_len) == 0 )
            {
                return sra_meta_stats_node_group_update(&self->last_grp, spot_id, spot_len, bio_spot_len, cmp_spot_len);
            }

            /* release cached group */
            sra_meta_stats_node_group_release(&self->last_grp);

            /* realloc cached name */
            if ( self->last_grp_name == NULL || grp_len >= self->last_grp_name_sz )
            {
                char *p = realloc ( self -> last_grp_name, grp_len + 1 );
                if ( p == NULL )
                    return RC ( rcXF, rcFunction, rcExecuting, rcMemory, rcExhausted );
    
                self -> last_grp_name = p;
                self -> last_grp_name_sz = grp_len + 1;
            }

            /* sanitize name */
            for ( unsafe = false, i = 0; i < grp_len; ++ i )
            {
                if ( ( self -> last_grp_name [ i ] = grp [ i ] ) == '/' )
                {
                    unsafe = true;
                    self -> last_grp_name [ i ] = '\\';
                }
            }
            self -> last_grp_name_len = i;
            self -> last_grp_name [ i ] = 0;

            /* look for new group */
            new_group = true;
            rc = KMetadataOpenNodeRead(self->meta, &cn, "STATS/SPOT_GROUP/%s", self->last_grp_name );
            if ( rc == 0 )
            {
                new_group = false;
                KMDataNodeRelease ( cn );
            }

            /* detect abusive quantity of nodes */
            if ( new_group && ++self->grp_qty > max_grp_qty )
            {
                rc = KMetadataOpenNodeUpdate(self->meta, &n, "STATS");
                if( rc == 0 )
                {
                    sra_meta_stats_node_group_release(&self->dflt_grp);
                    KMDataNodeDropChild(n, "SPOT_GROUP");
                    KMDataNodeRelease(n);
                    free(self->last_grp_name);
                    self->last_grp_name = NULL;
                }
                return rc;
            }

            /* create new or cache existing group */
            rc = KMetadataOpenNodeUpdate(self->meta, &n, "STATS/SPOT_GROUP/%s", self->last_grp_name );
            if ( rc == 0 )
            {
                rc = sra_meta_stats_node_group_open(n, &self->last_grp, self->compressed);
                if (rc == 0 && new_group) {
                    if (unsafe)
                    {
                        char value [ 512 ], *v = value;
                        if ( grp_len >= sizeof value )
                            v = malloc ( grp_len + 1 );
                        if ( v == NULL )
                            rc = RC ( rcXF, rcFunction, rcExecuting, rcMemory, rcExhausted );
                        else
                        {
                            rc = string_printf ( v, grp_len + 1, NULL, "%.*s", ( uint32_t ) grp_len, grp );
                            assert ( rc == 0 );
                            rc = KMDataNodeWriteAttr(n, "name", v);
                            if ( rc == 0 )
                                memcpy ( self->last_grp_name, grp, grp_len );
                            if ( v != value )
                                free ( v );
                        }
                    }
                    if ( rc == 0 )
                        rc = sra_meta_stats_node_group_update(&self->last_grp, 0, 0, 0, 0);
                }
                KMDataNodeRelease(n);

                if( rc == 0 )
                    rc = sra_meta_stats_node_group_update(&self->last_grp, spot_id, spot_len, bio_spot_len, cmp_spot_len);
            }
        }
    }
    return rc;
}
Exemple #2
0
static
rc_t MakeIndexes(const SRATable* stbl, KTable* ktbl, KMetadata* meta)
{
    rc_t rc = 0;
    int i;
    char* buffer = NULL;
    size_t buffer_sz = g_file_block_sz * 100;

    SIndexObj idx[] = {
     /*  meta, file,        format,         index,          func,    file_size, buffer_sz, minSpotId, maxSpotId */
        {NULL, "fastq",    "fastq",      "fuse-fastq",    Fastq_Idx,     0, 0, 0, 0},
        {NULL, "sff",      "SFF",        "fuse-sff",      SFF_Idx,       0, 0, 0, 0},
        {NULL, "fastq.gz", "fastq-gzip", "fuse-fastq-gz", FastqGzip_Idx, 0, 0, 0, 0},
        {NULL, "sff.gz",   "SFF-gzip",   "fuse-sff-gz",   SFFGzip_Idx,   0, 0, 0, 0}
    };

    for(i = 0; rc == 0 && i < sizeof(idx) / sizeof(idx[0]); i++) {
        KMDataNode* parent = NULL;
        if( (rc = KMetadataOpenNodeUpdate(meta, &parent, "/FUSE")) == 0 ) {
            KMDataNodeDropChild(parent, "root"); /* drop old stuff */
            if( g_ungzip || strcmp(&idx[i].file[strlen(idx[i].file) - 3], ".gz") == 0 ) {
                STSMSG(0, ("Preparing index %s", idx[i].index));
                MD5StateInit(&idx[i].md5);
                SLListInit(&idx[i].li);
                KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
                if( (rc = KMDataNodeOpenNodeUpdate(parent, &idx[i].meta, "%s.tmp", idx[i].file)) == 0 ) {
                    if( idx[i].func != NULL ) {
                        if( buffer == NULL ) {
                            if( (buffer = malloc(buffer_sz)) == NULL ) {
                                rc = RC(rcExe, rcIndex, rcConstructing, rcMemory, rcExhausted);
                                break;
                            }
                        }
                        rc = idx[i].func(stbl, &idx[i], buffer, buffer_sz);
                        if( rc == 0 ) {
                            MD5StateFinish(&idx[i].md5, idx[i].md5_digest);
                            rc = CommitIndex(ktbl, idx[i].index, &idx[i].li);
                        }
                    }
                    if( rc == 0 ) {
                        rc = WriteFileMeta(&idx[i]);
                    }
                    KMDataNodeRelease(idx[i].meta);
                }
                if( GetRCState(rc) == rcUnsupported ) {
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    PLOGERR(klogWarn, (klogWarn, rc, "Index $(i) is not supported for this table", PLOG_S(i), idx[i].index));
                    rc = 0;
                } else if( rc == 0 ) {
                    char f[4096];
                    strcpy(f, idx[i].file);
                    strcat(f, ".tmp");
                    KMDataNodeDropChild(parent, "%s", idx[i].file);
                    rc = KMDataNodeRenameChild(parent, f, idx[i].file);
                }
            } else if( !g_ungzip ) {
                KTableDropIndex(ktbl, idx[i].index);
                KMDataNodeDropChild(parent, "%s", idx[i].file);
            }
            KMDataNodeDropChild(parent, "%s.tmp", idx[i].file);
            KMDataNodeRelease(parent);
        }
        SLListWhack(&idx[i].li, WhackIndexData, NULL);
    }
    free(buffer);
    return rc;
}