コード例 #1
0
ファイル: srf-illumina.c プロジェクト: ncbi/sra-tools
static
rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId )
{
    rc_t rc;
    char *suffix;
    pstring readName, spotGroup;
    static IlluminaSpot spot;

    /* look for spot group */
    suffix = strchr(readId->data, '#');
    if( suffix != NULL ) {
        readId->len = suffix++ - readId->data;
        if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) {
            SRALoaderFile_LOG(self->ctx.file, klogInt, rc,
                "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data);
            return rc;
        }
    } else {
        pstring_clear(&spotGroup);
    }

    /* build the read name from prefix (self->name_prefix) and read id */
    if(self->name_prefix.len > 0 ) {
        if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) {
            if( isdigit(readName.data[readName.len - 1]) ) {
                rc = pstring_append(&readName, ":", 1);
            }
            if( rc == 0 ) {
                rc = pstring_concat(&readName, readId);
            }
        }
    } else {
        rc = pstring_copy(&readName, readId);
    }
    if( rc != 0 ) {
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc,
            "preparing spot name $(spotname)", "spotname=%s", readId->data);
        return rc;
    }
    SRF_set_read_filter(&self->read.filter, flags);

    IlluminaSpot_Init(&spot);
    if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) {
        rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot);
    }
    return rc;
}
コード例 #2
0
ファイル: fastq-fmt.c プロジェクト: ncbi/sra-tools
static
rc_t FastqLoaderFmt_WriteData(FastqLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t i, g = 0;
    FastqFileInfo* files = NULL;
    bool done;
    static IlluminaSpot spot;
 
    if( (files = calloc(argc, sizeof(*files))) == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
    }

    for(i = 0; rc == 0 && i < argc; i++) {
        ExperimentQualityType qType;
        FastqFileInfo* file = &files[i];

        file->file = argv[i];
        FileReadData_init(file->spot, false);
        FileReadData_init(&file->spot[1], false);
        if( (rc = SRALoaderFile_QualityScoringSystem(file->file, &qType)) == 0 &&
            (rc = SRALoaderFile_QualityEncoding(file->file, &file->qualEnc)) == 0 &&
            (rc = SRALoaderFile_AsciiOffset(file->file, &file->qualOffset)) == 0 ) {

            file->qualType = ILLUMINAWRITER_COLMASK_NOTSET;

            if( qType == eExperimentQualityType_Undefined ) {
                qType = self->processing->quality_type;
                file->qualOffset = self->processing->quality_offset;
            }
            switch(qType) {
                case eExperimentQualityType_LogOdds:
                case eExperimentQualityType_Other:
                    if( self->w454 != NULL || self->wIonTorrent != NULL ) {
                        rc = RC(rcSRA, rcFormatter, rcConstructing, rcParam, rcInvalid);
                        LOGERR(klogInt, rc, "quality type other than Phred is not supported for this PLATFORM");
                    }
                    file->qualMin = -40;
                    file->qualMax = 41;
                    file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS1;
                    break;
                default:
                    SRALoaderFile_LOG(file->file, klogWarn, rc, 
                        "quality_scoring_system attribute not set for this file, using Phred as default", NULL);
                case eExperimentQualityType_Phred:
                    file->qualType = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
                    file->qualMin = 0;
                    file->qualMax = (self->wIllumina) ? 61: 127;
                    break;
            }
        }
    }
    do {
        done = true;
        for(i = 0; rc == 0 && i < argc; i++) {
            FastqFileInfo* file = &files[i];
            if( (rc = read_next_spot(self, file)) != 0 || !file->spot->ready ) {
                continue;
            }
            done = false;
#if _DEBUGGING
            {{
                FileReadData* ss = file->spot;
                do {
                    DEBUG_MSG(3, ("file-%u: name:'%s', bc:%s, rd:%i, flt:%hu, seq '%.*s', qual %u bytes\n",
                                  i + 1, ss->name.data, ss->barcode.data, ss->read.read_id, ss->read.filter,
                                  ss->read.seq.len, ss->read.seq.data, ss->read.qual.len));
                    if( ss == &file->spot[1]){ break; }
                    ss = file->spot[1].ready ? &file->spot[1] : NULL;
                } while( ss != NULL );
            }}
#endif
        }
        if( rc != 0 || done ) {
            break;
        }
        /* collect spot reads, matching by spot name
         * spot data may be split across multiple files
         */
        IlluminaSpot_Init(&spot);
        for(i = 0; rc == 0 && i < argc; i++) {
            FileReadData* fspot = files[i].spot[0].ready ? &files[i].spot[0] : NULL;
            while(rc == 0 && fspot != NULL ) {
                rc = IlluminaSpot_Add(&spot, &fspot->name, &fspot->barcode, &fspot->read);
                if( rc == 0 ) {
                    g = i;
                    fspot->ready = false;
                } else if( GetRCState(rc) == rcIgnored ) {
                    rc = 0;
                } else {
                    SRALoaderFile_LOG(files[i].file, klogErr, rc, "$(msg)", "msg=adding data to spot");
                }
                if( fspot == &files[i].spot[1]) { break; }
                fspot = files[i].spot[1].ready ? &files[i].spot[1] : NULL;
            }
        }
        if( rc == 0 ) {
            if( self->wIllumina != NULL ) {
                if( (rc = SRAWriterIllumina_Write(self->wIllumina, argv[0], &spot)) != 0 &&
                    GetRCTarget(rc) == rcFormatter && GetRCContext(rc) == rcValidating ) {
                    SRALoaderFile_LOG(files[g].file, klogWarn, rc, "$(msg) $(spot_name)", "msg=bad spot,spot_name=%.*s",
                                                spot.name->len, spot.name->data);
                    self->spots_bad_count++;
                    if( self->spots_bad_allowed < 0 ||
                        self->spots_bad_count <= self->spots_bad_allowed ) {
                        rc = 0;
                    }
                }
            } else if( spot.nreads != 1 ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnsupported);
                SRALoaderFile_LOG(files[g].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform");
            } else if( self->wIonTorrent != NULL ) {
                rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, argv[0], spot.name,
                                                   spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0);
            } else {
                rc = SRAWriter454_WriteRead(self->w454, argv[0], spot.name,
                                            spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0);
            }
        }
    } while( rc == 0 );
    free(files);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
コード例 #3
0
ファイル: illumina-fmt.c プロジェクト: Bhumi28/sra-tools
bool FGroup_Parse( SLNode *n, void *d )
{
    FGroup_Parse_data* data = (FGroup_Parse_data*)d;
    FGroup* g = (FGroup*)n;
    bool done;
    const SRALoaderFile* data_block_ref = NULL;

    data->rc = 0;
    do {
        IlluminaFileInfo* file = g->files;
        done = true;
        while( data->rc == 0 && file != NULL ) {
            if( (data->rc = read_next_spot(g->blk_pfx, file)) == 0 && file->ready ) {
                done = false;
            }
            file = file->next;
        }
        if( data->rc != 0 || done ) {
            break;
        }
        /* collect spot reads, matching by spot name
         * spot data may be split across multiple files
         */
        IlluminaSpot_Init(&data->spot);
        file = g->files;
        while( data->rc == 0 && file != NULL ) {
            if( file->ready ) {
                if( (file->type == eIlluminaNativeFileTypeNoise && data->self->skip_noise) ||
                    (file->type == eIlluminaNativeFileTypeIntensity && data->self->skip_intensity) ||
                    (file->type == eIlluminaNativeFileTypeSignal && data->self->skip_signal) ) {
                    file->ready = false;
                } else {
                    data_block_ref = file->file;
                    if( file->type == eIlluminaNativeFileTypeQSeq && (g->mask & eIlluminaNativeFileTypeQuality4) ) {
                        /* drop quality1 from qseq data */
                        pstring_clear(&file->read.qual);
                    } else if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                        IlluminaFileInfo* neib = file->next ? file->next : file->prev;
                        /* need to fix spotname to be same cause prb do not have any name in it */
                        if( (data->rc = pstring_copy(&file->name, &neib->name)) != 0 ) {
                            SRALoaderFile_LOG(file->file, klogErr, data->rc, "$(msg) '$(n)'", "msg=syncing prb spot name,n=%s", neib->name.data);
                        }
                    }
                    if( data->rc == 0 ) {
                        data->rc = IlluminaSpot_Add(&data->spot, &file->name, &file->barcode, &file->read);
                        if( data->rc == 0 ) {
                            file->ready = false;
                        } else {
                            if( GetRCState(data->rc) == rcIgnored ) {
                                SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(s1)' <> '$(s2)'",
                                                "msg=spot name mismatch,s1=%.*s,s2=%.*s",
                                                data->spot.name->len, data->spot.name->data, file->name.len, file->name.data);
                                data->self->spots_bad_count++;
                                /* skip spot for all files in a group */
                                file = g->files;
                                while( file != NULL ) {
                                    file->ready = false;
                                    SRALoaderFile_LOG(file->file, klogWarn, data->rc,
                                                      "$(msg) '$(n)'", "msg=skipped spot,n=%s", file->name.data);
                                    file = file->next;
                                }
                                if( data->self->spots_bad_allowed >= 0 &&
                                    data->self->spots_bad_count > data->self->spots_bad_allowed ) {
                                    data->rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                                }
                                break;
                            }
                        }
                    }
                }
            }
            file = file->next;
        }
        if( GetRCState(data->rc) == rcIgnored ) {
            data->rc = 0;
            continue;
        }
        if( data->rc == 0 ) {
            data->rc = SRAWriterIllumina_Write(data->self->writer, data_block_ref, &data->spot);
        }
    } while( data->rc == 0 );
    return data->rc != 0;
}