static rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId ) { rc_t rc; char *suffix; pstring readName, spotGroup; static IlluminaSpot spot; /* look for spot group */ suffix = strchr(readId->data, '#'); if( suffix != NULL ) { readId->len = suffix++ - readId->data; if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) { SRALoaderFile_LOG(self->ctx.file, klogInt, rc, "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data); return rc; } } else { pstring_clear(&spotGroup); } /* build the read name from prefix (self->name_prefix) and read id */ if(self->name_prefix.len > 0 ) { if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) { if( isdigit(readName.data[readName.len - 1]) ) { rc = pstring_append(&readName, ":", 1); } if( rc == 0 ) { rc = pstring_concat(&readName, readId); } } } else { rc = pstring_copy(&readName, readId); } if( rc != 0 ) { SRALoaderFile_LOG(self->ctx.file, klogErr, rc, "preparing spot name $(spotname)", "spotname=%s", readId->data); return rc; } SRF_set_read_filter(&self->read.filter, flags); IlluminaSpot_Init(&spot); if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) { rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot); } return rc; }
static rc_t FastqLoaderFmt_WriteData(FastqLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count) { rc_t rc = 0; uint32_t i, g = 0; FastqFileInfo* files = NULL; bool done; static IlluminaSpot spot; if( (files = calloc(argc, sizeof(*files))) == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient); } for(i = 0; rc == 0 && i < argc; i++) { ExperimentQualityType qType; FastqFileInfo* file = &files[i]; file->file = argv[i]; FileReadData_init(file->spot, false); FileReadData_init(&file->spot[1], false); if( (rc = SRALoaderFile_QualityScoringSystem(file->file, &qType)) == 0 && (rc = SRALoaderFile_QualityEncoding(file->file, &file->qualEnc)) == 0 && (rc = SRALoaderFile_AsciiOffset(file->file, &file->qualOffset)) == 0 ) { file->qualType = ILLUMINAWRITER_COLMASK_NOTSET; if( qType == eExperimentQualityType_Undefined ) { qType = self->processing->quality_type; file->qualOffset = self->processing->quality_offset; } switch(qType) { case eExperimentQualityType_LogOdds: case eExperimentQualityType_Other: if( self->w454 != NULL || self->wIonTorrent != NULL ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcInvalid); LOGERR(klogInt, rc, "quality type other than Phred is not supported for this PLATFORM"); } file->qualMin = -40; file->qualMax = 41; file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS1; break; default: SRALoaderFile_LOG(file->file, klogWarn, rc, "quality_scoring_system attribute not set for this file, using Phred as default", NULL); case eExperimentQualityType_Phred: file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_PHRED; file->qualMin = 0; file->qualMax = (self->wIllumina) ? 61: 127; break; } } } do { done = true; for(i = 0; rc == 0 && i < argc; i++) { FastqFileInfo* file = &files[i]; if( (rc = read_next_spot(self, file)) != 0 || !file->spot->ready ) { continue; } done = false; #if _DEBUGGING {{ FileReadData* ss = file->spot; do { DEBUG_MSG(3, ("file-%u: name:'%s', bc:%s, rd:%i, flt:%hu, seq '%.*s', qual %u bytes\n", i + 1, ss->name.data, ss->barcode.data, ss->read.read_id, ss->read.filter, ss->read.seq.len, ss->read.seq.data, ss->read.qual.len)); if( ss == &file->spot[1]){ break; } ss = file->spot[1].ready ? &file->spot[1] : NULL; } while( ss != NULL ); }} #endif } if( rc != 0 || done ) { break; } /* collect spot reads, matching by spot name * spot data may be split across multiple files */ IlluminaSpot_Init(&spot); for(i = 0; rc == 0 && i < argc; i++) { FileReadData* fspot = files[i].spot[0].ready ? &files[i].spot[0] : NULL; while(rc == 0 && fspot != NULL ) { rc = IlluminaSpot_Add(&spot, &fspot->name, &fspot->barcode, &fspot->read); if( rc == 0 ) { g = i; fspot->ready = false; } else if( GetRCState(rc) == rcIgnored ) { rc = 0; } else { SRALoaderFile_LOG(files[i].file, klogErr, rc, "$(msg)", "msg=adding data to spot"); } if( fspot == &files[i].spot[1]) { break; } fspot = files[i].spot[1].ready ? &files[i].spot[1] : NULL; } } if( rc == 0 ) { if( self->wIllumina != NULL ) { if( (rc = SRAWriterIllumina_Write(self->wIllumina, argv[0], &spot)) != 0 && GetRCTarget(rc) == rcFormatter && GetRCContext(rc) == rcValidating ) { SRALoaderFile_LOG(files[g].file, klogWarn, rc, "$(msg) $(spot_name)", "msg=bad spot,spot_name=%.*s", spot.name->len, spot.name->data); self->spots_bad_count++; if( self->spots_bad_allowed < 0 || self->spots_bad_count <= self->spots_bad_allowed ) { rc = 0; } } } else if( spot.nreads != 1 ) { rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnsupported); SRALoaderFile_LOG(files[g].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform"); } else if( self->wIonTorrent != NULL ) { rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, argv[0], spot.name, spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0); } else { rc = SRAWriter454_WriteRead(self->w454, argv[0], spot.name, spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0); } } } while( rc == 0 ); free(files); *spots_bad_count = self->spots_bad_count; return rc; }
bool FGroup_Parse( SLNode *n, void *d ) { FGroup_Parse_data* data = (FGroup_Parse_data*)d; FGroup* g = (FGroup*)n; bool done; const SRALoaderFile* data_block_ref = NULL; data->rc = 0; do { IlluminaFileInfo* file = g->files; done = true; while( data->rc == 0 && file != NULL ) { if( (data->rc = read_next_spot(g->blk_pfx, file)) == 0 && file->ready ) { done = false; } file = file->next; } if( data->rc != 0 || done ) { break; } /* collect spot reads, matching by spot name * spot data may be split across multiple files */ IlluminaSpot_Init(&data->spot); file = g->files; while( data->rc == 0 && file != NULL ) { if( file->ready ) { if( (file->type == eIlluminaNativeFileTypeNoise && data->self->skip_noise) || (file->type == eIlluminaNativeFileTypeIntensity && data->self->skip_intensity) || (file->type == eIlluminaNativeFileTypeSignal && data->self->skip_signal) ) { file->ready = false; } else { data_block_ref = file->file; if( file->type == eIlluminaNativeFileTypeQSeq && (g->mask & eIlluminaNativeFileTypeQuality4) ) { /* drop quality1 from qseq data */ pstring_clear(&file->read.qual); } else if( file->type == eIlluminaNativeFileTypeQuality4 ) { IlluminaFileInfo* neib = file->next ? file->next : file->prev; /* need to fix spotname to be same cause prb do not have any name in it */ if( (data->rc = pstring_copy(&file->name, &neib->name)) != 0 ) { SRALoaderFile_LOG(file->file, klogErr, data->rc, "$(msg) '$(n)'", "msg=syncing prb spot name,n=%s", neib->name.data); } } if( data->rc == 0 ) { data->rc = IlluminaSpot_Add(&data->spot, &file->name, &file->barcode, &file->read); if( data->rc == 0 ) { file->ready = false; } else { if( GetRCState(data->rc) == rcIgnored ) { SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(s1)' <> '$(s2)'", "msg=spot name mismatch,s1=%.*s,s2=%.*s", data->spot.name->len, data->spot.name->data, file->name.len, file->name.data); data->self->spots_bad_count++; /* skip spot for all files in a group */ file = g->files; while( file != NULL ) { file->ready = false; SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(n)'", "msg=skipped spot,n=%s", file->name.data); file = file->next; } if( data->self->spots_bad_allowed >= 0 && data->self->spots_bad_count > data->self->spots_bad_allowed ) { data->rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid); } break; } } } } } file = file->next; } if( GetRCState(data->rc) == rcIgnored ) { data->rc = 0; continue; } if( data->rc == 0 ) { data->rc = SRAWriterIllumina_Write(data->self->writer, data_block_ref, &data->spot); } } while( data->rc == 0 ); return data->rc != 0; }