Пример #1
0
static
rc_t read_spot_coord(IlluminaFileInfo* file, const char* data, size_t data_sz, const char** tail)
{
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;

    if( tail ) {
        *tail = NULL;
    }
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    errno = 0;
                    file->coord[0] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[0] = 0;
                    }
                    rc = pstring_assign(&file->name, str, t - str);
                    break;
                case 2:
                case 3:
                case 4:
                    errno = 0;
                    file->coord[tabs - 1] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 1] = 0;
                    }
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    if( tail ) {
                        *tail = t + 1;
                    }
                    break;
            }
            str = ++t;
        }
    } while( rc == 0 && t != NULL && str < end && tabs < 4 );

    if( tabs < 4 ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcTooShort);
    }
    return rc;
}
Пример #2
0
/*
 * read fasta or quality, which maybe wrapped on 70th column width,
 * into asciiZ buffer
 */
static
rc_t read_multiline_seq_or_qual(FastqFileInfo* file, const char stop, pstring* str)
{
    rc_t rc = 0;
    bool append = false, optional = false;

    while( rc == 0 ) {
        if( (rc = file_read_line(file, optional)) == 0 ) {
            if( optional && (file->line == NULL || (file->line_len > 0 && file->line[0] == stop)) ) {
                /* eof or next line is defline -> stop, line stays in buffer */
                break;
            }
            if( append && memchr(str->data, ' ', str->len) != NULL ) {
                rc = pstring_append(str, " ", 1);
            }
            if( rc == 0 && (rc = pstring_append(str, file->line, file->line_len)) == 0 ) {
                file->line = NULL; /* line processed */
                optional = true;
            }
            append = true;
        }
    }
    return rc;
}
Пример #3
0
static
rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId )
{
    rc_t rc;
    char *suffix;
    pstring readName, spotGroup;
    static IlluminaSpot spot;

    /* look for spot group */
    suffix = strchr(readId->data, '#');
    if( suffix != NULL ) {
        readId->len = suffix++ - readId->data;
        if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) {
            SRALoaderFile_LOG(self->ctx.file, klogInt, rc,
                "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data);
            return rc;
        }
    } else {
        pstring_clear(&spotGroup);
    }

    /* build the read name from prefix (self->name_prefix) and read id */
    if(self->name_prefix.len > 0 ) {
        if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) {
            if( isdigit(readName.data[readName.len - 1]) ) {
                rc = pstring_append(&readName, ":", 1);
            }
            if( rc == 0 ) {
                rc = pstring_concat(&readName, readId);
            }
        }
    } else {
        rc = pstring_copy(&readName, readId);
    }
    if( rc != 0 ) {
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc,
            "preparing spot name $(spotname)", "spotname=%s", readId->data);
        return rc;
    }
    SRF_set_read_filter(&self->read.filter, flags);

    IlluminaSpot_Init(&spot);
    if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) {
        rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot);
    }
    return rc;
}
Пример #4
0
rc_t SRAWriteAbsolid_MakeName(const pstring* prefix, const pstring* suffix, pstring* name)
{
    rc_t rc = 0;
    if( prefix == NULL || name == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcParam, rcNull);
    } else if( (rc = pstring_copy(name, prefix)) == 0 ) {
        if( suffix && suffix->len > 0 ) {
            if( name->len > 0 && name->data[name->len - 1] != '_' && suffix->data[0] != '_' ) {
                rc = pstring_append(name, "_", 1);
            }
            if( rc == 0 ) {
                pstring_concat(name, suffix);
            }
        }
    }
    if( rc != 0 ) {
        LOGERR(klogErr, rc, "preparing spot name");
    }
    return rc;
}
Пример #5
0
rc_t pstring_concat(pstring* dst, const pstring* src)
{
    return pstring_append(dst, src->data, src->len);
}
Пример #6
0
/* reads from a file data for a sinlge spot, data may be partial */
static
rc_t read_next_spot(const char* blk_pfx, IlluminaFileInfo* file)
{
    rc_t rc = 0;
    const char* tail = file->line;

    if( file->ready ) {
        /* data still not used */
        return 0;
    }
    IlluminaFileInfo_init(file);
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    }
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
            if( (rc = parse_qseq(file, file->line, file->line_len)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading qseq");
            }
            break;

        case eIlluminaNativeFileTypeFasta:
        case eIlluminaNativeFileTypeNoise:
        case eIlluminaNativeFileTypeIntensity:
        case eIlluminaNativeFileTypeSignal:
            {{
                /* read only common first 4 coords into name and prepend with DATA_BLOCK/@name */
                if( (rc = read_spot_coord(file, file->line, file->line_len, &tail)) == 0 ) {
                    if( blk_pfx != NULL ) {
                        pstring tmp_name;
                        if( (rc = pstring_copy(&tmp_name, &file->name)) == 0 &&
                            (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) == 0 &&
                            (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                            rc = pstring_concat(&file->name, &tmp_name);
                        }
                    }
                }
                if( rc != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading spot coord");
                }
                break;
            }}

        case eIlluminaNativeFileTypeQuality4:
            if( (rc = read_quality(file->line, file->line_len, &file->read)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality");
            } else if( (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=name for quality 4");
            }
            break;

        default:
            rc = RC(rcSRA, rcFormatter, rcReading, rcFileFormat, rcUnknown);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=processing data line");
            break;
    }

    /* process tail (after coords) for some file types */
    file->line_len -= tail - file->line; /* length of tail */
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
        case eIlluminaNativeFileTypeQuality4:
        default:
            /* completely processed before */
            break;

        case eIlluminaNativeFileTypeFasta:
            if( (rc = pstring_assign(&file->read.seq, tail, file->line_len)) != 0 ||
                !pstring_is_fasta(&file->read.seq) ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcCorrupt);
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading fasta");
            }
            break;

        case eIlluminaNativeFileTypeNoise:
            if( (rc = read_signal(tail, file->line_len, &file->read.noise)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting noise");
            }
            break;

        case eIlluminaNativeFileTypeIntensity:
            if( (rc = read_signal(tail, file->line_len, &file->read.intensity)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting intensity");
            }
            break;

        case eIlluminaNativeFileTypeSignal:
            if( (rc = read_signal(tail, file->line_len, &file->read.signal)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting signal");
            }
            break;
    }
    file->line = NULL;
    file->ready = true;
#if _DEBUGGING
    DEBUG_MSG(3, ("name:'%s' [%li:%li:%li:%li]\n", file->name.data, 
                file->coord[0], file->coord[1], file->coord[2], file->coord[3]));
    if( file->read.seq.len ) {
        DEBUG_MSG(3, ("seq:'%.*s'\n", file->read.seq.len, file->read.seq.data));
    }
    if( file->read.qual.len ) {
        DEBUG_MSG(3, ("qual{0x%x}: %u bytes\n", file->read.qual_type, file->read.qual.len));
    }
#endif
    return 0;
}
Пример #7
0
/*
 * assumes tab separated file:
 * first 2 postiions concatinated with "_" into spot prefix
 * nextg 4 postiions concatinated with ":" into spot id: lane:tile:x:y
 * 7th (index) ignored
 * 8th is read id
 * 9th fasta
 * 10th quality
 * 11th (optional) read filter
 */
static
rc_t parse_qseq(IlluminaFileInfo* file, const char* data, size_t data_sz)
{
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    rc = pstring_assign(&file->name, str, t - str);
                    break;
                case 2:
                    if( (rc = pstring_append(&file->name, "_", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    break;
                case 3:
                case 4:
                case 5:
                case 6:
                    errno = 0;
                    file->coord[tabs - 3] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 3] = 0;
                    }
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    break;
                case 7:
                    if( t - str != 1 || (*str != '0' && *str != '1') ) {
                        rc = pstring_assign(&file->barcode, str, t - str);
                    }
                    break;
                case 8:
                    if( t - str != 1 || !isdigit(*str) ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                    } else {
                        file->read.read_id = *str - '0';
                        if( file->read.read_id == 0 ) {
                            file->read.read_id = ILLUMINAWRITER_READID_NONE;
                        }
                    }
                    break;
                case 9:
                    rc = pstring_assign(&file->read.seq, str, t - str);
                    break;
                case 10:
                    file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
                    rc = pstring_assign(&file->read.qual, str, t - str);
                    break;
            }
            str = ++t;
        }
    } while( rc == 0 && t != NULL && str < end );

    if( rc == 0 ) {
        if( tabs == 9 ) {
            file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
            rc = pstring_assign(&file->read.qual, str, end - str);
        } else if( tabs == 10 ) {
            if( end - str != 1 ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
            } else if( *str == '1' ) {
                file->read.filter = SRA_READ_FILTER_PASS;
            } else if( *str == '0' ) {
                file->read.filter = SRA_READ_FILTER_REJECT;
            } else {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
            }
        } else {
            rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
        }
        if( rc == 0 ) {
            if( file->read.seq.len != file->read.qual.len ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInconsistent);
            } else {
                rc = pstring_quality_convert(&file->read.qual, eExperimentQualityEncoding_Ascii, 64, 0, 0x7F);
            }
        }
    }
    return rc;
}