コード例 #1
ファイル: illumina-fmt.c プロジェクト: Bhumi28/sra-tools
rc_t read_spot_coord(IlluminaFileInfo* file, const char* data, size_t data_sz, const char** tail)
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;

    if( tail ) {
        *tail = NULL;
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    errno = 0;
                    file->coord[0] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[0] = 0;
                    rc = pstring_assign(&file->name, str, t - str);
                case 2:
                case 3:
                case 4:
                    errno = 0;
                    file->coord[tabs - 1] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 1] = 0;
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    if( tail ) {
                        *tail = t + 1;
            str = ++t;
    } while( rc == 0 && t != NULL && str < end && tabs < 4 );

    if( tabs < 4 ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcTooShort);
    return rc;
コード例 #2
ファイル: fastq-fmt.c プロジェクト: ncbi/sra-tools
 * read fasta or quality, which maybe wrapped on 70th column width,
 * into asciiZ buffer
rc_t read_multiline_seq_or_qual(FastqFileInfo* file, const char stop, pstring* str)
    rc_t rc = 0;
    bool append = false, optional = false;

    while( rc == 0 ) {
        if( (rc = file_read_line(file, optional)) == 0 ) {
            if( optional && (file->line == NULL || (file->line_len > 0 && file->line[0] == stop)) ) {
                /* eof or next line is defline -> stop, line stays in buffer */
            if( append && memchr(str->data, ' ', str->len) != NULL ) {
                rc = pstring_append(str, " ", 1);
            if( rc == 0 && (rc = pstring_append(str, file->line, file->line_len)) == 0 ) {
                file->line = NULL; /* line processed */
                optional = true;
            append = true;
    return rc;
コード例 #3
ファイル: srf-illumina.c プロジェクト: ncbi/sra-tools
rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId )
    rc_t rc;
    char *suffix;
    pstring readName, spotGroup;
    static IlluminaSpot spot;

    /* look for spot group */
    suffix = strchr(readId->data, '#');
    if( suffix != NULL ) {
        readId->len = suffix++ - readId->data;
        if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) {
            SRALoaderFile_LOG(self->ctx.file, klogInt, rc,
                "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data);
            return rc;
    } else {

    /* build the read name from prefix (self->name_prefix) and read id */
    if(self->name_prefix.len > 0 ) {
        if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) {
            if( isdigit(readName.data[readName.len - 1]) ) {
                rc = pstring_append(&readName, ":", 1);
            if( rc == 0 ) {
                rc = pstring_concat(&readName, readId);
    } else {
        rc = pstring_copy(&readName, readId);
    if( rc != 0 ) {
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc,
            "preparing spot name $(spotname)", "spotname=%s", readId->data);
        return rc;
    SRF_set_read_filter(&self->read.filter, flags);

    if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) {
        rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot);
    return rc;
コード例 #4
ファイル: writer-absolid.c プロジェクト: ncbi/sra-tools
rc_t SRAWriteAbsolid_MakeName(const pstring* prefix, const pstring* suffix, pstring* name)
    rc_t rc = 0;
    if( prefix == NULL || name == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcParam, rcNull);
    } else if( (rc = pstring_copy(name, prefix)) == 0 ) {
        if( suffix && suffix->len > 0 ) {
            if( name->len > 0 && name->data[name->len - 1] != '_' && suffix->data[0] != '_' ) {
                rc = pstring_append(name, "_", 1);
            if( rc == 0 ) {
                pstring_concat(name, suffix);
    if( rc != 0 ) {
        LOGERR(klogErr, rc, "preparing spot name");
    return rc;
コード例 #5
ファイル: pstring.c プロジェクト: Bhumi28/sra-tools
rc_t pstring_concat(pstring* dst, const pstring* src)
    return pstring_append(dst, src->data, src->len);
コード例 #6
ファイル: illumina-fmt.c プロジェクト: Bhumi28/sra-tools
/* reads from a file data for a sinlge spot, data may be partial */
rc_t read_next_spot(const char* blk_pfx, IlluminaFileInfo* file)
    rc_t rc = 0;
    const char* tail = file->line;

    if( file->ready ) {
        /* data still not used */
        return 0;
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
            if( (rc = parse_qseq(file, file->line, file->line_len)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading qseq");

        case eIlluminaNativeFileTypeFasta:
        case eIlluminaNativeFileTypeNoise:
        case eIlluminaNativeFileTypeIntensity:
        case eIlluminaNativeFileTypeSignal:
                /* read only common first 4 coords into name and prepend with DATA_BLOCK/@name */
                if( (rc = read_spot_coord(file, file->line, file->line_len, &tail)) == 0 ) {
                    if( blk_pfx != NULL ) {
                        pstring tmp_name;
                        if( (rc = pstring_copy(&tmp_name, &file->name)) == 0 &&
                            (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) == 0 &&
                            (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                            rc = pstring_concat(&file->name, &tmp_name);
                if( rc != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading spot coord");

        case eIlluminaNativeFileTypeQuality4:
            if( (rc = read_quality(file->line, file->line_len, &file->read)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality");
            } else if( (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=name for quality 4");

            rc = RC(rcSRA, rcFormatter, rcReading, rcFileFormat, rcUnknown);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=processing data line");

    /* process tail (after coords) for some file types */
    file->line_len -= tail - file->line; /* length of tail */
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
        case eIlluminaNativeFileTypeQuality4:
            /* completely processed before */

        case eIlluminaNativeFileTypeFasta:
            if( (rc = pstring_assign(&file->read.seq, tail, file->line_len)) != 0 ||
                !pstring_is_fasta(&file->read.seq) ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcCorrupt);
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading fasta");

        case eIlluminaNativeFileTypeNoise:
            if( (rc = read_signal(tail, file->line_len, &file->read.noise)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting noise");

        case eIlluminaNativeFileTypeIntensity:
            if( (rc = read_signal(tail, file->line_len, &file->read.intensity)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting intensity");

        case eIlluminaNativeFileTypeSignal:
            if( (rc = read_signal(tail, file->line_len, &file->read.signal)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting signal");
    file->line = NULL;
    file->ready = true;
    DEBUG_MSG(3, ("name:'%s' [%li:%li:%li:%li]\n", file->name.data, 
                file->coord[0], file->coord[1], file->coord[2], file->coord[3]));
    if( file->read.seq.len ) {
        DEBUG_MSG(3, ("seq:'%.*s'\n", file->read.seq.len, file->read.seq.data));
    if( file->read.qual.len ) {
        DEBUG_MSG(3, ("qual{0x%x}: %u bytes\n", file->read.qual_type, file->read.qual.len));
    return 0;
コード例 #7
ファイル: illumina-fmt.c プロジェクト: Bhumi28/sra-tools
 * assumes tab separated file:
 * first 2 postiions concatinated with "_" into spot prefix
 * nextg 4 postiions concatinated with ":" into spot id: lane:tile:x:y
 * 7th (index) ignored
 * 8th is read id
 * 9th fasta
 * 10th quality
 * 11th (optional) read filter
rc_t parse_qseq(IlluminaFileInfo* file, const char* data, size_t data_sz)
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    rc = pstring_assign(&file->name, str, t - str);
                case 2:
                    if( (rc = pstring_append(&file->name, "_", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                case 3:
                case 4:
                case 5:
                case 6:
                    errno = 0;
                    file->coord[tabs - 3] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 3] = 0;
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                case 7:
                    if( t - str != 1 || (*str != '0' && *str != '1') ) {
                        rc = pstring_assign(&file->barcode, str, t - str);
                case 8:
                    if( t - str != 1 || !isdigit(*str) ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                    } else {
                        file->read.read_id = *str - '0';
                        if( file->read.read_id == 0 ) {
                            file->read.read_id = ILLUMINAWRITER_READID_NONE;
                case 9:
                    rc = pstring_assign(&file->read.seq, str, t - str);
                case 10:
                    file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
                    rc = pstring_assign(&file->read.qual, str, t - str);
            str = ++t;
    } while( rc == 0 && t != NULL && str < end );

    if( rc == 0 ) {
        if( tabs == 9 ) {
            file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
            rc = pstring_assign(&file->read.qual, str, end - str);
        } else if( tabs == 10 ) {
            if( end - str != 1 ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
            } else if( *str == '1' ) {
                file->read.filter = SRA_READ_FILTER_PASS;
            } else if( *str == '0' ) {
                file->read.filter = SRA_READ_FILTER_REJECT;
            } else {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
        } else {
            rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
        if( rc == 0 ) {
            if( file->read.seq.len != file->read.qual.len ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInconsistent);
            } else {
                rc = pstring_quality_convert(&file->read.qual, eExperimentQualityEncoding_Ascii, 64, 0, 0x7F);
    return rc;