static rc_t HelicosLoaderFmt_WriteData(HelicosLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count) { rc_t rc = 0; uint32_t i; HelicosFileInfo* files = NULL; bool done; if( (files = calloc(argc, sizeof(*files))) == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient); } for(i = 0; rc == 0 && i < argc; i++) { HelicosFileInfo* file = &files[i]; HelicosFileInfo_init(file); file->file = argv[i]; } do { done = true; for(i = 0; rc == 0 && i < argc; i++) { HelicosFileInfo* file = &files[i]; if( (rc = read_next_spot(self, file)) == 0 && file->ready ) { done = false; rc = SRAWriterHelicos_Write(self->writer, argv[0], &file->name, &file->sequence, &file->quality); file->ready = false; } } } while( rc == 0 && !done ); free(files); return rc; }
static rc_t FastqLoaderFmt_WriteData(FastqLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count) { rc_t rc = 0; uint32_t i, g = 0; FastqFileInfo* files = NULL; bool done; static IlluminaSpot spot; if( (files = calloc(argc, sizeof(*files))) == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient); } for(i = 0; rc == 0 && i < argc; i++) { ExperimentQualityType qType; FastqFileInfo* file = &files[i]; file->file = argv[i]; FileReadData_init(file->spot, false); FileReadData_init(&file->spot[1], false); if( (rc = SRALoaderFile_QualityScoringSystem(file->file, &qType)) == 0 && (rc = SRALoaderFile_QualityEncoding(file->file, &file->qualEnc)) == 0 && (rc = SRALoaderFile_AsciiOffset(file->file, &file->qualOffset)) == 0 ) { file->qualType = ILLUMINAWRITER_COLMASK_NOTSET; if( qType == eExperimentQualityType_Undefined ) { qType = self->processing->quality_type; file->qualOffset = self->processing->quality_offset; } switch(qType) { case eExperimentQualityType_LogOdds: case eExperimentQualityType_Other: if( self->w454 != NULL || self->wIonTorrent != NULL ) { rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcInvalid); LOGERR(klogInt, rc, "quality type other than Phred is not supported for this PLATFORM"); } file->qualMin = -40; file->qualMax = 41; file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS1; break; default: SRALoaderFile_LOG(file->file, klogWarn, rc, "quality_scoring_system attribute not set for this file, using Phred as default", NULL); case eExperimentQualityType_Phred: file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_PHRED; file->qualMin = 0; file->qualMax = (self->wIllumina) ? 61: 127; break; } } } do { done = true; for(i = 0; rc == 0 && i < argc; i++) { FastqFileInfo* file = &files[i]; if( (rc = read_next_spot(self, file)) != 0 || !file->spot->ready ) { continue; } done = false; #if _DEBUGGING {{ FileReadData* ss = file->spot; do { DEBUG_MSG(3, ("file-%u: name:'%s', bc:%s, rd:%i, flt:%hu, seq '%.*s', qual %u bytes\n", i + 1, ss->name.data, ss->barcode.data, ss->read.read_id, ss->read.filter, ss->read.seq.len, ss->read.seq.data, ss->read.qual.len)); if( ss == &file->spot[1]){ break; } ss = file->spot[1].ready ? &file->spot[1] : NULL; } while( ss != NULL ); }} #endif } if( rc != 0 || done ) { break; } /* collect spot reads, matching by spot name * spot data may be split across multiple files */ IlluminaSpot_Init(&spot); for(i = 0; rc == 0 && i < argc; i++) { FileReadData* fspot = files[i].spot[0].ready ? &files[i].spot[0] : NULL; while(rc == 0 && fspot != NULL ) { rc = IlluminaSpot_Add(&spot, &fspot->name, &fspot->barcode, &fspot->read); if( rc == 0 ) { g = i; fspot->ready = false; } else if( GetRCState(rc) == rcIgnored ) { rc = 0; } else { SRALoaderFile_LOG(files[i].file, klogErr, rc, "$(msg)", "msg=adding data to spot"); } if( fspot == &files[i].spot[1]) { break; } fspot = files[i].spot[1].ready ? &files[i].spot[1] : NULL; } } if( rc == 0 ) { if( self->wIllumina != NULL ) { if( (rc = SRAWriterIllumina_Write(self->wIllumina, argv[0], &spot)) != 0 && GetRCTarget(rc) == rcFormatter && GetRCContext(rc) == rcValidating ) { SRALoaderFile_LOG(files[g].file, klogWarn, rc, "$(msg) $(spot_name)", "msg=bad spot,spot_name=%.*s", spot.name->len, spot.name->data); self->spots_bad_count++; if( self->spots_bad_allowed < 0 || self->spots_bad_count <= self->spots_bad_allowed ) { rc = 0; } } } else if( spot.nreads != 1 ) { rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnsupported); SRALoaderFile_LOG(files[g].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform"); } else if( self->wIonTorrent != NULL ) { rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, argv[0], spot.name, spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0); } else { rc = SRAWriter454_WriteRead(self->w454, argv[0], spot.name, spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0); } } } while( rc == 0 ); free(files); *spots_bad_count = self->spots_bad_count; return rc; }
static rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count) { rc_t rc = 0; uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]); SLList files; IlluminaFileInfo* file = NULL; SLListInit(&files); /* group files using spotname, for _prb. file name prefix is used, files reviewed by type detected from name and ordered by file_type array */ for(t = 0; rc == 0 && t < ftype_q; t++) { for(i = 0; rc == 0 && i < argc; i++) { const char* fname, *blk_pfx; int prefix_len = 0; ERunFileType ftype; EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet; FGroup_Find_data data; if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL); break; } if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL); break; } if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL); break; } if( blk_pfx == NULL ) { blk_pfx = ""; } {{ /* skip path if present */ const char* p = strrchr(fname, '/'); fname = p ? p + 1 : fname; p = NULL; for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) { const char* const* e = file_types[k].key; while( *e != NULL ) { p = strstr(fname, *e++); if( p != NULL ) { type = file_types[k].type; break; } } } if( p != NULL ) { prefix_len = p - fname; } }} if( ftype == rft_IlluminaNativeSeq ) { type = eIlluminaNativeFileTypeFasta; } else if( ftype == rft_IlluminaNativePrb ) { type = eIlluminaNativeFileTypeQuality4; } else if( ftype == rft_IlluminaNativeInt ) { type = eIlluminaNativeFileTypeIntensity; } else if( ftype == rft_IlluminaNativeQseq ) { type = eIlluminaNativeFileTypeQSeq; } if( type == eIlluminaNativeFileTypeNotSet ) { rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized); SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL); break; } if( type != file_types[t].type ) { /* one type at a time */ continue; } DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type)); file = calloc(1, sizeof(*file)); if( file == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted); SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL); break; } IlluminaFileInfo_init(file); file->file = argv[i]; file->type = type; if( file->type == eIlluminaNativeFileTypeQuality4 ) { /* in _prb there is no spotname inside so use file prefix */ rc = pstring_assign(&data.key, fname, prefix_len); } else { /* try to get 1st spot so group can be organized by spot name */ if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) { rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound); SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL); break; } rc = pstring_copy(&data.key, &file->name); } data.found = NULL; if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) { IlluminaFileInfo* ss = data.found->files; while( rc == 0 && file != NULL ) { if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) { rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate); SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL); } else if( ss->next != NULL ) { ss = ss->next; } else { ss->next = file; file->prev = ss; data.found->mask |= file->type; file = NULL; } } } else { data.found = calloc(1, sizeof(*data.found)); if( data.found == NULL ) { rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient); SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL); break; } else { if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) { SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL); FGroup_Whack(&data.found->dad, NULL); break; } else { FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL; data.found->blk_pfx = blk_pfx; data.found->files = file; data.found->mask = file->type; /* group inserted into list by coords in 1st spot */ while( curr != NULL ) { if( curr->files[0].coord[0] > file->coord[0] || (curr->files[0].coord[0] == file->coord[0] && curr->files[0].coord[1] > file->coord[1]) ) { data.found->dad.next = &curr->dad; if( prev == NULL ) { files.head = &data.found->dad; } else { prev->dad.next = &data.found->dad; } break; } prev = curr; curr = (FGroup*)curr->dad.next; } if( curr == NULL ) { SLListPushTail(&files, &data.found->dad); } file = NULL; } } } } } if( rc == 0 ) { SLListForEach(&files, FGroup_Validate, &rc); } if( rc == 0 ) { FGroup_Parse_data data; data.self = self; if( SLListDoUntil(&files, FGroup_Parse, &data) ) { rc = data.rc; } } else { free(file); } SLListWhack(&files, FGroup_Whack, NULL); *spots_bad_count = self->spots_bad_count; return rc; }
bool FGroup_Parse( SLNode *n, void *d ) { FGroup_Parse_data* data = (FGroup_Parse_data*)d; FGroup* g = (FGroup*)n; bool done; const SRALoaderFile* data_block_ref = NULL; data->rc = 0; do { IlluminaFileInfo* file = g->files; done = true; while( data->rc == 0 && file != NULL ) { if( (data->rc = read_next_spot(g->blk_pfx, file)) == 0 && file->ready ) { done = false; } file = file->next; } if( data->rc != 0 || done ) { break; } /* collect spot reads, matching by spot name * spot data may be split across multiple files */ IlluminaSpot_Init(&data->spot); file = g->files; while( data->rc == 0 && file != NULL ) { if( file->ready ) { if( (file->type == eIlluminaNativeFileTypeNoise && data->self->skip_noise) || (file->type == eIlluminaNativeFileTypeIntensity && data->self->skip_intensity) || (file->type == eIlluminaNativeFileTypeSignal && data->self->skip_signal) ) { file->ready = false; } else { data_block_ref = file->file; if( file->type == eIlluminaNativeFileTypeQSeq && (g->mask & eIlluminaNativeFileTypeQuality4) ) { /* drop quality1 from qseq data */ pstring_clear(&file->read.qual); } else if( file->type == eIlluminaNativeFileTypeQuality4 ) { IlluminaFileInfo* neib = file->next ? file->next : file->prev; /* need to fix spotname to be same cause prb do not have any name in it */ if( (data->rc = pstring_copy(&file->name, &neib->name)) != 0 ) { SRALoaderFile_LOG(file->file, klogErr, data->rc, "$(msg) '$(n)'", "msg=syncing prb spot name,n=%s", neib->name.data); } } if( data->rc == 0 ) { data->rc = IlluminaSpot_Add(&data->spot, &file->name, &file->barcode, &file->read); if( data->rc == 0 ) { file->ready = false; } else { if( GetRCState(data->rc) == rcIgnored ) { SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(s1)' <> '$(s2)'", "msg=spot name mismatch,s1=%.*s,s2=%.*s", data->spot.name->len, data->spot.name->data, file->name.len, file->name.data); data->self->spots_bad_count++; /* skip spot for all files in a group */ file = g->files; while( file != NULL ) { file->ready = false; SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(n)'", "msg=skipped spot,n=%s", file->name.data); file = file->next; } if( data->self->spots_bad_allowed >= 0 && data->self->spots_bad_count > data->self->spots_bad_allowed ) { data->rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid); } break; } } } } } file = file->next; } if( GetRCState(data->rc) == rcIgnored ) { data->rc = 0; continue; } if( data->rc == 0 ) { data->rc = SRAWriterIllumina_Write(data->self->writer, data_block_ref, &data->spot); } } while( data->rc == 0 ); return data->rc != 0; }