Exemple #1
0
static
rc_t fe_new_region(fe_context_t *self, size_t region_count, const region_t region[])
{
    rc_t rc = 0;
    int i;

    self->region.nreads = region_count / 2;
    DEBUG_MSG(3, ("REGION: %u -> %u reads\n", region_count, self->region.nreads));
    if( self->region.nreads <= 0 || self->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), self->region.nreads);
    }
    for(i = 0; rc == 0 && i < self->region.nreads ; i++ ) {
        int j = i * 2 + 1;
        self->region.start[i] = region[j].start;
        if( (rc = set_label_type(region[j].name, &self->region.label[i], &self->region.type[i])) != 0 ) {
            break;
        }
        self->region.cs_key[i] = region[j - 1].name[0];
        DEBUG_MSG(3, ("REGION[%u]: '%s', %u, '%c', start: %u\n",
                      i, self->region.label[i].data, self->region.type[i], self->region.cs_key[i], self->region.start[i]));
        switch(region[j].type) {
            case Biological:
            case Normal:
            case Paired:
            case Technical:
                break;
            default:
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "read #$(read_id) type mismatch; expected $(expected), got $(got)",
                        "read_id=%u,expected=%s,got=%u", i, "(B|N|P|T)", region[j].type);
                return rc;
        }
    }
    if( rc == 0 &&
        self->region.nreads > 1 && self->region.type[0] == self->region.type[1] ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcDuplicate);
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "both reads have same type", NULL);
    }
    return rc;
}
Exemple #2
0
rc_t SRAWriterIllumina_Make(const SRAWriterIllumina** cself, const SRALoaderConfig* config)
{
    rc_t rc = 0;
    SRAWriterIllumina* self;
    const PlatformXML* platform;
    const ReadSpecXML_read* last_read;
    int32_t spot_len;
    uint32_t sequence_length;
    uint8_t nreads;

    if( cself == NULL || config == NULL ) {
        return RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcNull);
    }
    if( (rc = Experiment_GetPlatform(config->experiment, &platform)) != 0 ||
        (rc = Experiment_GetReadNumber(config->experiment, &nreads)) != 0 ||
        (rc = Experiment_GetSpotLength(config->experiment, &sequence_length)) != 0 ||
        (rc = Experiment_GetRead(config->experiment, nreads - 1, &last_read)) != 0 ) {
        return rc;
    }
    if( platform->id != SRA_PLATFORM_ILLUMINA ) {
        rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcInvalid);
        LOGERR(klogErr, rc, "platform type");
        return rc;
    }
    if( nreads > ILLUMINAWRITER_MAX_NUM_READS ) {
        rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported);
        PLOGERR(klogErr, (klogErr, rc, "more than $(max) reads", PLOG_U8(max), (uint8_t)ILLUMINAWRITER_MAX_NUM_READS));
        return rc;
    }
    self = calloc(1, sizeof(*self));
    if( self == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcConstructing, rcMemory, rcExhausted);
        return rc;
    }
    if( (rc = SRAWriter_Make(&self->base, config)) != 0 ) {
        LOGERR(klogInt, rc, "failed to initialize base writer");
    }
    self->config = config;
    self->platform = platform;
    self->nreads = nreads;
    self->barcode_read_id = ILLUMINAWRITER_READID_NONE;
    self->last_read = last_read;
    self->fixed_read_seg = true;
    self->col_mask = ILLUMINAWRITER_COLMASK_NOTSET;
    self->sequence_length = sequence_length;
    spot_len = sequence_length;

    do {
        const ReadSpecXML_read* read_spec;
        int16_t len = 0;
        --nreads;
        if( (rc = Experiment_GetRead(config->experiment, nreads, &read_spec)) != 0 ) {
            break;
        }
        if( read_spec->read_type == rdsp_BarCode_rt ) {
            if( self->barcode_read_id == ILLUMINAWRITER_READID_NONE ) {
                self->barcode_read_id = nreads;
            } else {
                rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcDuplicate);
                LOGERR(klogErr, rc, "only on BarCode READ_TYPE per spot supported");
                break;
            }
        }
        if( self->fixed_read_seg ) {
            switch(read_spec->coord_type) {
                case rdsp_BaseCoord_ct:
                case rdsp_CycleCoord_ct:
                    len = spot_len - read_spec->coord.start_coord + 1;
                    break;
                case rdsp_ExpectedBaseCall_ct:
                case rdsp_ExpectedBaseCallTable_ct:
                    if( read_spec->coord.expected_basecalls.default_length > 0 ) {
                        len = read_spec->coord.expected_basecalls.default_length;
                    } else {
                        rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported);
                    }
                    break;
                case rdsp_RelativeOrder_ct:
                    if( nreads == 0 ) {
                        len = spot_len - 1 + 1; /* as if BASE_COORD == 1 */
                        break;
                    }
                default:
                    rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported);
            }
            if( rc == 0 ) {
                spot_len -= len;
                if( spot_len < 0 || len <= 0 ) {
                    rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcInconsistent);
                    LOGERR(klogErr, rc, "SPOT_DECODE_SPEC and SEQUENCE_LENGTH");
                } else {
                    SRASegment* seg = &self->read_seg[nreads];
                    seg->start = spot_len;
                    seg->len = len;
                    DEBUG_MSG(3, ("#%u read fixed length = %i\n", nreads, len));
                }
            } else if( GetRCState(rc) == rcUnsupported ) {
                self->fixed_read_seg = false;
                DEBUG_MSG(3, ("not fixed spot segmentation"));
                rc = 0;
            }
        }
    } while( rc == 0 && nreads > 0 );

    if( rc == 0 ) {
        *cself = self;
    } else {
        SRAWriterIllumina_Whack(self, NULL);
    }
    return rc;
}
Exemple #3
0
static
rc_t parse_v1_read(SRF_context *ctx, ZTR_Context *ztr_ctx, const uint8_t *data, size_t size)
{
    rc_t rc = 0;
    size_t i, parsed;
    ztr_raw_t ztr_raw;
    ztr_t ztr;
    enum ztr_chunk_type type;
    fe_context_t* fe = (fe_context_t*)ctx;

    uint8_t flags;
    pstring readId;
    EAbisolidReadType read_type;
    pstring label;

    AbsolidRead read[ABSOLID_FMT_MAX_NUM_READS];
        
    if( fe->region.nreads == 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcNotFound);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing region chunk before 1st read chunk", NULL);
    }
    if( (rc = SRF_ParseReadChunk(data, size, &parsed, &flags, &readId)) != 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rc);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "SRF parsing failure", NULL);
    }
    ABI_ZTR_AddToBuffer(ztr_ctx, data + parsed, size - parsed);

    /* readId will have spotname */
    if( (rc = fe_new_read(fe, &readId, &read_type, &label)) != 0 ) {
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "parsing spot name suffix", NULL);
    }
    for(i = 0; i < sizeof(read) / sizeof(read[0]); i++) {
        AbsolidRead_Init(&read[i]);
    }
    while(!ABI_ZTR_BufferIsEmpty(ztr_ctx)) {
        if( (rc = ABI_ZTR_ParseBlock(ztr_ctx, &ztr_raw)) != 0 ||
            (rc = ABI_ZTR_ProcessBlock(ztr_ctx, &ztr_raw, &ztr, &type)) != 0 ) {
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "ZTR parsing failure", NULL);
            break;
        }
        switch (type) {
        case BASE:
            if(ztr.sequence->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "read: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].seq, ztr.sequence->data, ztr.sequence->datasize)) == 0 ) {
                    /* grab 1st, may be the only cs_key */
                    read[read_number].cs_key = fe->region.cs_key[0];
                    for(i = 1; i < fe->region.nreads; i++) {
                        if( read_type == fe->region.type[i] ) {
                            read[read_number].cs_key = fe->region.cs_key[i];
                            break;
                        }
                    }
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    rc = pstring_copy(&read[read_number].label, &label);
                    DEBUG_MSG(3, ("SRF READ: '%s'\n", read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying read", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.sequence->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].seq, &ztr.sequence->data[fe->region.start[i]], len);
                    read[read_number].cs_key = fe->region.cs_key[i];
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    if( fe->region.label[i].len != 0 ) {
                        rc = pstring_copy(&read[read_number].label, &fe->region.label[i]);
                    }
                    DEBUG_MSG(3, ("SRF READ[%u]: '%s'\n", i, read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying reads", NULL);
                }
            }
            break;
        case CNF1:
            if(ztr.quality1->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "quality: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].qual, ztr.quality1->data, ztr.quality1->datasize)) == 0 ) {
                    DEBUG_MSG(3, ("SRF QUAL: %u bytes\n", read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying quality", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.quality1->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].qual, &ztr.quality1->data[fe->region.start[i]], len);
                    DEBUG_MSG(3, ("SRF QUAL[%u]: %u bytes\n", i, read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying qualities", NULL);
                }
            }
            break;
        case SAMP:
            if( !fe->skip_signal ) {
                size_t i;
                int stype = ABSOLID_FMT_COLMASK_NOTSET;
                if(ztr.signal->datatype != f32) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: expected 32-bit float datatype", NULL);
                } else if( (ztr.signal->datasize % sizeof(float)) != 0 ) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInvalid);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: size not 32-bit float aligned", NULL);
                } else if (ztr.signal->channel == NULL) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcIncomplete);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: missing channel type", NULL);
                } else if(strcmp(ztr.signal->channel, "0FAM") == 0) {
                    stype = ABSOLID_FMT_COLMASK_FAM;
                } else if(strcmp(ztr.signal->channel, "1CY3") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY3;
                } else if(strcmp(ztr.signal->channel, "2TXR") == 0) {
                    stype = ABSOLID_FMT_COLMASK_TXR;
                } else if(strcmp(ztr.signal->channel, "3CY5") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY5;
                } else {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: unexpected channel type", NULL);
                }
#if __BYTE_ORDER == __LITTLE_ENDIAN
                for(i = 0; rc == 0 && i < ztr.signal->datasize; i += 4) {
                    uint32_t* r = (uint32_t*)&ztr.signal->data[i];
                    *r = bswap_32(*r);
                }
#endif
                if( rc == 0 ) {
                    if( read_type > eAbisolidReadType_SPOT ) {
                        int read_number = AbisolidReadType2ReadNumber[read_type];
                        pstring* d = NULL;
                        switch(stype) {
                            case ABSOLID_FMT_COLMASK_FAM:
                                read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                d = &read[read_number].fxx;
                                break;
                            case ABSOLID_FMT_COLMASK_CY3:
                                d = &read[read_number].cy3;
                               break;
                            case ABSOLID_FMT_COLMASK_TXR:
                                d = &read[read_number].txr;
                                break;
                            case ABSOLID_FMT_COLMASK_CY5:
                                d = &read[read_number].cy5;
                                break;
                        }
                        if( d ) {
                            rc = pstring_assign(d, ztr.signal->data, ztr.signal->datasize);
                            DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                        } else {
                            rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signal", NULL);
                        }
                    } else {
			 if( fe->region.nreads <= 0 || fe->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) {
				rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
				SRALoaderFile_LOG(fe->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), fe->region.nreads);
			 }

                        for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                            pstring* d = NULL;
                            int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                            size_t len = (i + 1 >= fe->region.nreads) ? ztr.signal->datasize : (fe->region.start[i + 1] * sizeof(float));
                            len -= fe->region.start[i] * sizeof(float);
                            switch(stype) {
                                case ABSOLID_FMT_COLMASK_FAM:
                                    read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                    d = &read[read_number].fxx;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY3:
                                    d = &read[read_number].cy3;
                                   break;
                                case ABSOLID_FMT_COLMASK_TXR:
                                    d = &read[read_number].txr;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY5:
                                    d = &read[read_number].cy5;
                                    break;
                            }
                            if( d ) {
                                rc = pstring_assign(d, &ztr.signal->data[fe->region.start[i] * sizeof(float)], len);
                                DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                            } else {
                                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                            }
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signals", NULL);
                        }
                    }
                }
            }
            break;
        default:
            break;
        }
        if(type != none && type != ignore) {
            free(*(void **)&ztr);
        }
    }
    if(rc == 0) {
        if( read_type <= eAbisolidReadType_SPOT ) {
            rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], &read[1]);
        } else {
            switch( AbisolidReadType2ReadNumber[read_type] ) {
                case 0:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], NULL);
                    break;
                case 1:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, NULL, &read[1]);
                    break;
                default:
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "more than 2 reads", NULL);
                    break;
            }
        }
    }
    return rc;
}
Exemple #4
0
static rc_t ReadFilterSplitter_GetKeySet( const SRASplitter* cself,
        const SRASplitter_Keys** key, uint32_t* keys, spotid_t spot, const readmask_t* readmask )
{
    rc_t rc = 0;
    ReadFilterSplitter* self = ( ReadFilterSplitter* )cself;

    if ( self == NULL || key == NULL )
    {
        rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull );
    }
    else
    {
        const INSDC_SRA_read_filter* rdf;
        bitsz_t o = 0, sz = 0;

        *keys = 0;
        if ( self->col_rdf != NULL )
        {
            rc = SRAColumnRead( self->col_rdf, spot, (const void **)&rdf, &o, &sz );
            if ( rc == 0 )
            {
                int32_t j, i = sz / sizeof( INSDC_SRA_read_filter ) / 8;
                *key = self->keys;
                *keys = sizeof( self->keys ) / sizeof( self->keys[ 0 ] );
                for ( j = 0; j < *keys; j++ )
                {
                    clear_readmask( self->keys[ j ].readmask );
                }
                while ( i > 0 )
                {
                    i--;
                    if ( self->read_filter != 0xFF && self->read_filter != rdf[i] )
                    {
                        /* skip by filter value != to command line */
                    }
                    else if ( rdf[ i ] == SRA_READ_FILTER_PASS )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_pass ].readmask, i );
                    }
                    else if ( rdf[ i ] == SRA_READ_FILTER_REJECT )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_reject ].readmask, i );
                    }
                    else if( rdf[ i ] == SRA_READ_FILTER_CRITERIA )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_criteria ].readmask, i );
                    }
                    else if( rdf[ i ] == SRA_READ_FILTER_REDACTED )
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_redacted ].readmask, i );
                    }
                    else
                    {
                        set_readmask( self->keys[ EReadFilterSplitter_unknown ].readmask, i );
                        PLOGMSG( klogWarn, ( klogWarn,
                                             "unknown READ_FILTER value $(value) at spot id $(row)",
                                             PLOG_2( PLOG_U8( value ), PLOG_I64( row ) ), rdf[ i ], spot ) );
                    }
                }
            }
        }
    }
    return rc;
}