static rc_t fe_new_region(fe_context_t *self, size_t region_count, const region_t region[]) { rc_t rc = 0; int i; self->region.nreads = region_count / 2; DEBUG_MSG(3, ("REGION: %u -> %u reads\n", region_count, self->region.nreads)); if( self->region.nreads <= 0 || self->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported); SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), self->region.nreads); } for(i = 0; rc == 0 && i < self->region.nreads ; i++ ) { int j = i * 2 + 1; self->region.start[i] = region[j].start; if( (rc = set_label_type(region[j].name, &self->region.label[i], &self->region.type[i])) != 0 ) { break; } self->region.cs_key[i] = region[j - 1].name[0]; DEBUG_MSG(3, ("REGION[%u]: '%s', %u, '%c', start: %u\n", i, self->region.label[i].data, self->region.type[i], self->region.cs_key[i], self->region.start[i])); switch(region[j].type) { case Biological: case Normal: case Paired: case Technical: break; default: rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected); SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "read #$(read_id) type mismatch; expected $(expected), got $(got)", "read_id=%u,expected=%s,got=%u", i, "(B|N|P|T)", region[j].type); return rc; } } if( rc == 0 && self->region.nreads > 1 && self->region.type[0] == self->region.type[1] ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcDuplicate); SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "both reads have same type", NULL); } return rc; }
rc_t SRAWriterIllumina_Make(const SRAWriterIllumina** cself, const SRALoaderConfig* config) { rc_t rc = 0; SRAWriterIllumina* self; const PlatformXML* platform; const ReadSpecXML_read* last_read; int32_t spot_len; uint32_t sequence_length; uint8_t nreads; if( cself == NULL || config == NULL ) { return RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcNull); } if( (rc = Experiment_GetPlatform(config->experiment, &platform)) != 0 || (rc = Experiment_GetReadNumber(config->experiment, &nreads)) != 0 || (rc = Experiment_GetSpotLength(config->experiment, &sequence_length)) != 0 || (rc = Experiment_GetRead(config->experiment, nreads - 1, &last_read)) != 0 ) { return rc; } if( platform->id != SRA_PLATFORM_ILLUMINA ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcInvalid); LOGERR(klogErr, rc, "platform type"); return rc; } if( nreads > ILLUMINAWRITER_MAX_NUM_READS ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported); PLOGERR(klogErr, (klogErr, rc, "more than $(max) reads", PLOG_U8(max), (uint8_t)ILLUMINAWRITER_MAX_NUM_READS)); return rc; } self = calloc(1, sizeof(*self)); if( self == NULL ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcMemory, rcExhausted); return rc; } if( (rc = SRAWriter_Make(&self->base, config)) != 0 ) { LOGERR(klogInt, rc, "failed to initialize base writer"); } self->config = config; self->platform = platform; self->nreads = nreads; self->barcode_read_id = ILLUMINAWRITER_READID_NONE; self->last_read = last_read; self->fixed_read_seg = true; self->col_mask = ILLUMINAWRITER_COLMASK_NOTSET; self->sequence_length = sequence_length; spot_len = sequence_length; do { const ReadSpecXML_read* read_spec; int16_t len = 0; --nreads; if( (rc = Experiment_GetRead(config->experiment, nreads, &read_spec)) != 0 ) { break; } if( read_spec->read_type == rdsp_BarCode_rt ) { if( self->barcode_read_id == ILLUMINAWRITER_READID_NONE ) { self->barcode_read_id = nreads; } else { rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcDuplicate); LOGERR(klogErr, rc, "only on BarCode READ_TYPE per spot supported"); break; } } if( self->fixed_read_seg ) { switch(read_spec->coord_type) { case rdsp_BaseCoord_ct: case rdsp_CycleCoord_ct: len = spot_len - read_spec->coord.start_coord + 1; break; case rdsp_ExpectedBaseCall_ct: case rdsp_ExpectedBaseCallTable_ct: if( read_spec->coord.expected_basecalls.default_length > 0 ) { len = read_spec->coord.expected_basecalls.default_length; } else { rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported); } break; case rdsp_RelativeOrder_ct: if( nreads == 0 ) { len = spot_len - 1 + 1; /* as if BASE_COORD == 1 */ break; } default: rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcUnsupported); } if( rc == 0 ) { spot_len -= len; if( spot_len < 0 || len <= 0 ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcData, rcInconsistent); LOGERR(klogErr, rc, "SPOT_DECODE_SPEC and SEQUENCE_LENGTH"); } else { SRASegment* seg = &self->read_seg[nreads]; seg->start = spot_len; seg->len = len; DEBUG_MSG(3, ("#%u read fixed length = %i\n", nreads, len)); } } else if( GetRCState(rc) == rcUnsupported ) { self->fixed_read_seg = false; DEBUG_MSG(3, ("not fixed spot segmentation")); rc = 0; } } } while( rc == 0 && nreads > 0 ); if( rc == 0 ) { *cself = self; } else { SRAWriterIllumina_Whack(self, NULL); } return rc; }
static rc_t parse_v1_read(SRF_context *ctx, ZTR_Context *ztr_ctx, const uint8_t *data, size_t size) { rc_t rc = 0; size_t i, parsed; ztr_raw_t ztr_raw; ztr_t ztr; enum ztr_chunk_type type; fe_context_t* fe = (fe_context_t*)ctx; uint8_t flags; pstring readId; EAbisolidReadType read_type; pstring label; AbsolidRead read[ABSOLID_FMT_MAX_NUM_READS]; if( fe->region.nreads == 0 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcNotFound); return SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing region chunk before 1st read chunk", NULL); } if( (rc = SRF_ParseReadChunk(data, size, &parsed, &flags, &readId)) != 0 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rc); return SRALoaderFile_LOG(ctx->file, klogErr, rc, "SRF parsing failure", NULL); } ABI_ZTR_AddToBuffer(ztr_ctx, data + parsed, size - parsed); /* readId will have spotname */ if( (rc = fe_new_read(fe, &readId, &read_type, &label)) != 0 ) { return SRALoaderFile_LOG(ctx->file, klogErr, rc, "parsing spot name suffix", NULL); } for(i = 0; i < sizeof(read) / sizeof(read[0]); i++) { AbsolidRead_Init(&read[i]); } while(!ABI_ZTR_BufferIsEmpty(ztr_ctx)) { if( (rc = ABI_ZTR_ParseBlock(ztr_ctx, &ztr_raw)) != 0 || (rc = ABI_ZTR_ProcessBlock(ztr_ctx, &ztr_raw, &ztr, &type)) != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "ZTR parsing failure", NULL); break; } switch (type) { case BASE: if(ztr.sequence->datatype != i8) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected); SRALoaderFile_LOG(ctx->file, klogErr, rc, "read: expected 8-bit datatype", NULL); } else if( read_type > eAbisolidReadType_SPOT ) { int read_number = AbisolidReadType2ReadNumber[read_type]; if( (rc = pstring_assign(&read[read_number].seq, ztr.sequence->data, ztr.sequence->datasize)) == 0 ) { /* grab 1st, may be the only cs_key */ read[read_number].cs_key = fe->region.cs_key[0]; for(i = 1; i < fe->region.nreads; i++) { if( read_type == fe->region.type[i] ) { read[read_number].cs_key = fe->region.cs_key[i]; break; } } SRF_set_read_filter(&read[read_number].filter, flags); rc = pstring_copy(&read[read_number].label, &label); DEBUG_MSG(3, ("SRF READ: '%s'\n", read[read_number].seq.data)); } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying read", NULL); } } else { for(i = 0; rc == 0 && i < fe->region.nreads; i++) { int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]]; size_t len = (i + 1 >= fe->region.nreads ? ztr.sequence->datasize : fe->region.start[i + 1]) - fe->region.start[i]; rc = pstring_assign(&read[read_number].seq, &ztr.sequence->data[fe->region.start[i]], len); read[read_number].cs_key = fe->region.cs_key[i]; SRF_set_read_filter(&read[read_number].filter, flags); if( fe->region.label[i].len != 0 ) { rc = pstring_copy(&read[read_number].label, &fe->region.label[i]); } DEBUG_MSG(3, ("SRF READ[%u]: '%s'\n", i, read[read_number].seq.data)); } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying reads", NULL); } } break; case CNF1: if(ztr.quality1->datatype != i8) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected); SRALoaderFile_LOG(ctx->file, klogErr, rc, "quality: expected 8-bit datatype", NULL); } else if( read_type > eAbisolidReadType_SPOT ) { int read_number = AbisolidReadType2ReadNumber[read_type]; if( (rc = pstring_assign(&read[read_number].qual, ztr.quality1->data, ztr.quality1->datasize)) == 0 ) { DEBUG_MSG(3, ("SRF QUAL: %u bytes\n", read[read_number].qual.len)); } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying quality", NULL); } } else { for(i = 0; rc == 0 && i < fe->region.nreads; i++) { int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]]; size_t len = (i + 1 >= fe->region.nreads ? ztr.quality1->datasize : fe->region.start[i + 1]) - fe->region.start[i]; rc = pstring_assign(&read[read_number].qual, &ztr.quality1->data[fe->region.start[i]], len); DEBUG_MSG(3, ("SRF QUAL[%u]: %u bytes\n", i, read[read_number].qual.len)); } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying qualities", NULL); } } break; case SAMP: if( !fe->skip_signal ) { size_t i; int stype = ABSOLID_FMT_COLMASK_NOTSET; if(ztr.signal->datatype != f32) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected); SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: expected 32-bit float datatype", NULL); } else if( (ztr.signal->datasize % sizeof(float)) != 0 ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInvalid); SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: size not 32-bit float aligned", NULL); } else if (ztr.signal->channel == NULL) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcIncomplete); SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: missing channel type", NULL); } else if(strcmp(ztr.signal->channel, "0FAM") == 0) { stype = ABSOLID_FMT_COLMASK_FAM; } else if(strcmp(ztr.signal->channel, "1CY3") == 0) { stype = ABSOLID_FMT_COLMASK_CY3; } else if(strcmp(ztr.signal->channel, "2TXR") == 0) { stype = ABSOLID_FMT_COLMASK_TXR; } else if(strcmp(ztr.signal->channel, "3CY5") == 0) { stype = ABSOLID_FMT_COLMASK_CY5; } else { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected); SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: unexpected channel type", NULL); } #if __BYTE_ORDER == __LITTLE_ENDIAN for(i = 0; rc == 0 && i < ztr.signal->datasize; i += 4) { uint32_t* r = (uint32_t*)&ztr.signal->data[i]; *r = bswap_32(*r); } #endif if( rc == 0 ) { if( read_type > eAbisolidReadType_SPOT ) { int read_number = AbisolidReadType2ReadNumber[read_type]; pstring* d = NULL; switch(stype) { case ABSOLID_FMT_COLMASK_FAM: read[read_number].fs_type = eAbisolidFSignalType_FAM; d = &read[read_number].fxx; break; case ABSOLID_FMT_COLMASK_CY3: d = &read[read_number].cy3; break; case ABSOLID_FMT_COLMASK_TXR: d = &read[read_number].txr; break; case ABSOLID_FMT_COLMASK_CY5: d = &read[read_number].cy5; break; } if( d ) { rc = pstring_assign(d, ztr.signal->data, ztr.signal->datasize); DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len)); } else { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized); } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signal", NULL); } } else { if( fe->region.nreads <= 0 || fe->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported); SRALoaderFile_LOG(fe->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), fe->region.nreads); } for(i = 0; rc == 0 && i < fe->region.nreads; i++) { pstring* d = NULL; int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]]; size_t len = (i + 1 >= fe->region.nreads) ? ztr.signal->datasize : (fe->region.start[i + 1] * sizeof(float)); len -= fe->region.start[i] * sizeof(float); switch(stype) { case ABSOLID_FMT_COLMASK_FAM: read[read_number].fs_type = eAbisolidFSignalType_FAM; d = &read[read_number].fxx; break; case ABSOLID_FMT_COLMASK_CY3: d = &read[read_number].cy3; break; case ABSOLID_FMT_COLMASK_TXR: d = &read[read_number].txr; break; case ABSOLID_FMT_COLMASK_CY5: d = &read[read_number].cy5; break; } if( d ) { rc = pstring_assign(d, &ztr.signal->data[fe->region.start[i] * sizeof(float)], len); DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len)); } else { rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized); } } if( rc != 0 ) { SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signals", NULL); } } } } break; default: break; } if(type != none && type != ignore) { free(*(void **)&ztr); } } if(rc == 0) { if( read_type <= eAbisolidReadType_SPOT ) { rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], &read[1]); } else { switch( AbisolidReadType2ReadNumber[read_type] ) { case 0: rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], NULL); break; case 1: rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, NULL, &read[1]); break; default: rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported); SRALoaderFile_LOG(ctx->file, klogErr, rc, "more than 2 reads", NULL); break; } } } return rc; }
static rc_t ReadFilterSplitter_GetKeySet( const SRASplitter* cself, const SRASplitter_Keys** key, uint32_t* keys, spotid_t spot, const readmask_t* readmask ) { rc_t rc = 0; ReadFilterSplitter* self = ( ReadFilterSplitter* )cself; if ( self == NULL || key == NULL ) { rc = RC( rcSRA, rcNode, rcExecuting, rcParam, rcNull ); } else { const INSDC_SRA_read_filter* rdf; bitsz_t o = 0, sz = 0; *keys = 0; if ( self->col_rdf != NULL ) { rc = SRAColumnRead( self->col_rdf, spot, (const void **)&rdf, &o, &sz ); if ( rc == 0 ) { int32_t j, i = sz / sizeof( INSDC_SRA_read_filter ) / 8; *key = self->keys; *keys = sizeof( self->keys ) / sizeof( self->keys[ 0 ] ); for ( j = 0; j < *keys; j++ ) { clear_readmask( self->keys[ j ].readmask ); } while ( i > 0 ) { i--; if ( self->read_filter != 0xFF && self->read_filter != rdf[i] ) { /* skip by filter value != to command line */ } else if ( rdf[ i ] == SRA_READ_FILTER_PASS ) { set_readmask( self->keys[ EReadFilterSplitter_pass ].readmask, i ); } else if ( rdf[ i ] == SRA_READ_FILTER_REJECT ) { set_readmask( self->keys[ EReadFilterSplitter_reject ].readmask, i ); } else if( rdf[ i ] == SRA_READ_FILTER_CRITERIA ) { set_readmask( self->keys[ EReadFilterSplitter_criteria ].readmask, i ); } else if( rdf[ i ] == SRA_READ_FILTER_REDACTED ) { set_readmask( self->keys[ EReadFilterSplitter_redacted ].readmask, i ); } else { set_readmask( self->keys[ EReadFilterSplitter_unknown ].readmask, i ); PLOGMSG( klogWarn, ( klogWarn, "unknown READ_FILTER value $(value) at spot id $(row)", PLOG_2( PLOG_U8( value ), PLOG_I64( row ) ), rdf[ i ], spot ) ); } } } } } return rc; }