Пример #1
0
rc_t pstring_copy(pstring* dst, const pstring* src)
{
    if( src == NULL ) {
        return RC(rcSRA, rcFormatter, rcCopying, rcParam, rcNull);
    }
    return pstring_assign(dst, src->data, src->len);
}
Пример #2
0
/*
 * in a single line form tries to grab last to chunks defined by sep into seq and qual
 * ignores spaces adjucent to sep
 * normally line would look like "name sep seq sep sep qual"
 */
static
bool find_seq_qual_by_sep(FastqLoaderFmt* self, FastqFileInfo* file, const char sep)
{
    const char* seq = NULL, *qual = NULL;
    size_t seq_len = 0, qual_len = 0;

    FileReadData_init(file->spot, false);
    qual = memrchr(file->line, sep, file->line_len);
    if( qual != NULL ) {
        seq = memrchr(file->line, sep, qual - file->line);
        if( seq != NULL ) {
            if( parse_spot_name(file->file, file->spot, file->line, seq - file->line, 1) != 0 ) {
                /* skip leading spaces */
                do {
                    seq = seq + 1;
                } while( *seq == ' ' && seq < (file->line + file->line_len) );
                seq_len = qual - seq;
                do {
                    qual = qual + 1;
                } while( *qual == ' ' && qual < (file->line + file->line_len)  );
                qual_len = file->line_len - (qual - file->line);
                if( *seq != sep && *seq != ' ' && seq_len != 0 &&
                    *qual != sep && *qual != ' ' && qual_len != 0 ) {
                    if( match_seq_to_qual(seq, seq_len, qual, qual_len) ) {
                        rc_t rc;
                        if( (rc = pstring_assign(&file->spot->read.seq, seq, seq_len)) == 0 ) {
                            if( pstring_is_fasta(&file->spot->read.seq) ) {
                                if( (rc = pstring_assign(&file->spot->read.qual, qual, qual_len)) == 0 ) {
                                    file->spot->read.qual_type = file->qualType;
                                    return true;
                                }
                            }
                            file->spot->read.seq.len = 0;
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=storing read data");
                        }
                    }
                }
            }
        }
    }
    return false;
}
Пример #3
0
static
rc_t read_quality(const char* data, size_t data_sz, IlluminaRead* read)
{
    rc_t rc = 0;

    if( (rc = pstring_assign(&read->qual, data, data_sz)) == 0 ) {
        if( (rc = pstring_quality_convert(&read->qual, eExperimentQualityEncoding_Decimal, 0, -128, 127)) == 0 ) {
            read->qual_type = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS4;
        }
    }
    return rc;
}
Пример #4
0
static
rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId )
{
    rc_t rc;
    char *suffix;
    pstring readName, spotGroup;
    static IlluminaSpot spot;

    /* look for spot group */
    suffix = strchr(readId->data, '#');
    if( suffix != NULL ) {
        readId->len = suffix++ - readId->data;
        if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) {
            SRALoaderFile_LOG(self->ctx.file, klogInt, rc,
                "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data);
            return rc;
        }
    } else {
        pstring_clear(&spotGroup);
    }

    /* build the read name from prefix (self->name_prefix) and read id */
    if(self->name_prefix.len > 0 ) {
        if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) {
            if( isdigit(readName.data[readName.len - 1]) ) {
                rc = pstring_append(&readName, ":", 1);
            }
            if( rc == 0 ) {
                rc = pstring_concat(&readName, readId);
            }
        }
    } else {
        rc = pstring_copy(&readName, readId);
    }
    if( rc != 0 ) {
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc,
            "preparing spot name $(spotname)", "spotname=%s", readId->data);
        return rc;
    }
    SRF_set_read_filter(&self->read.filter, flags);

    IlluminaSpot_Init(&spot);
    if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) {
        rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot);
    }
    return rc;
}
Пример #5
0
static
rc_t read_spot_coord(IlluminaFileInfo* file, const char* data, size_t data_sz, const char** tail)
{
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;

    if( tail ) {
        *tail = NULL;
    }
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    errno = 0;
                    file->coord[0] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[0] = 0;
                    }
                    rc = pstring_assign(&file->name, str, t - str);
                    break;
                case 2:
                case 3:
                case 4:
                    errno = 0;
                    file->coord[tabs - 1] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 1] = 0;
                    }
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    if( tail ) {
                        *tail = t + 1;
                    }
                    break;
            }
            str = ++t;
        }
    } while( rc == 0 && t != NULL && str < end && tabs < 4 );

    if( tabs < 4 ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcTooShort);
    }
    return rc;
}
Пример #6
0
static
rc_t set_label_type(const char* label, pstring* name, EAbisolidReadType* type)
{
    rc_t rc = 0;

    assert(name && type);

    *type = AbsolidRead_Suffix2ReadType(label);

    if( *type == eAbisolidReadType_Unknown ) {
        DEBUG_MSG(3, ("read label is not recognized: '%s'\n", label));
    } else {
        const char* l = AbisolidReadType2ReadLabel[*type];
        rc = pstring_assign(name, l, strlen(l));
    }
    return rc;
}
Пример #7
0
static
rc_t SFFLoaderFmtReadDataHeader(SFFLoaderFmt* self, const SRALoaderFile* file)
{
    rc_t rc = 0;
    uint16_t head_sz = 0;

    /* Make sure the entire fixed portion of Read Header section is in the file buffer window */
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFReadHeader_size, "read header", false)) != 0 ) {
        return rc;
    }
    memcpy(&self->read_header, self->file_buf, SFFReadHeader_size);
#if __BYTE_ORDER == __LITTLE_ENDIAN
    self->read_header.header_length = bswap_16(self->read_header.header_length);
    self->read_header.name_length = bswap_16(self->read_header.name_length);
    self->read_header.number_of_bases = bswap_32(self->read_header.number_of_bases);
    self->read_header.clip_quality_left = bswap_16(self->read_header.clip_quality_left);
    self->read_header.clip_quality_right = bswap_16(self->read_header.clip_quality_right);
    self->read_header.clip_adapter_left = bswap_16(self->read_header.clip_adapter_left);
    self->read_header.clip_adapter_right = bswap_16(self->read_header.clip_adapter_right);
#endif

    head_sz = SFFReadHeader_size + self->read_header.name_length;
    head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0;
    if( head_sz != self->read_header.header_length ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid);
        SRALoaderFile_LOG(file, klogErr, rc, "read header length $(h) != $(s)", PLOG_2(PLOG_U16(h),PLOG_U16(s)),
                          self->header.header_length, head_sz);
        return rc;
    }
    /* read name */
    self->file_advance = SFFReadHeader_size;
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFReadHeader_size, "read header", false)) != 0) {
        return rc;
    }
    self->file_advance = head_sz - SFFReadHeader_size;

    if( (rc = pstring_assign(&self->name, self->file_buf, self->read_header.name_length)) != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "copying read name", NULL);
    }
    return rc;
}
Пример #8
0
static
rc_t parse_read(SRF_context *ctx, ZTR_Context *ztr_ctx, const uint8_t *data, size_t size)
{
    rc_t rc = 0;
    size_t parsed;
    uint8_t flags;
    pstring readId;
    ztr_raw_t ztr_raw;
    ztr_t ztr;
    enum ztr_chunk_type type;
    fe_context_t* fe = (fe_context_t*)ctx;

    *(void **)&fe->sequence =
    *(void **)&fe->quality1 =
    *(void **)&fe->quality4 =
    *(void **)&fe->signal =
    *(void **)&fe->intensity = 
    *(void **)&fe->noise = NULL;
    
    rc = SRF_ParseReadChunk(data, size, &parsed, &flags, &readId);
    if(rc) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rc);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "corrupt", NULL);
    }
    if(fe->defered != NULL)
        ZTR_AddToBuffer(ztr_ctx, fe->defered, fe->defered_len);
    ZTR_AddToBuffer(ztr_ctx, data + parsed, size - parsed);
    if(fe->defered == NULL) {
        rc = ZTR_ParseBlock(ztr_ctx, &ztr_raw);
        if(rc == 0)
            goto PARSE_BLOCK;
        rc = ZTR_ParseHeader(ztr_ctx);
        if(rc) {
            return SRALoaderFile_LOG(ctx->file, klogErr, rc, "corrupt", NULL);
        }
    }
    
    while (!ZTR_BufferIsEmpty(ztr_ctx)) {
        rc = ZTR_ParseBlock(ztr_ctx, &ztr_raw);
    PARSE_BLOCK:
        if(rc != 0 || (rc = ZTR_ProcessBlock(ztr_ctx, &ztr_raw, &ztr, &type)) != 0 ) {
            return SRALoaderFile_LOG(ctx->file, klogErr, rc, "corrupt", NULL);
        }
        
        switch (type) {
            case READ:
                if(ztr.sequence->datatype != i8) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    return SRALoaderFile_LOG(ctx->file, klogErr, rc, "invalid data type for sequence data", NULL);
                }
                fe->sequence = ztr;
                break;
            case QUALITY1:
                if(ztr.quality1->datatype != i8) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    return SRALoaderFile_LOG(ctx->file, klogErr, rc, "invalid data type for quality1 data", NULL);
                }
                fe->quality1 = ztr;
                break;
            case QUALITY4:
                if(ztr.quality4->datatype != i8) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    return SRALoaderFile_LOG(ctx->file, klogErr, rc, "invalid data type for quality4 data", NULL);
                }
                fe->quality4 = ztr;
                break;
            case SIGNAL4:
                if(ztr.signal4->Type != NULL && strncmp(ztr.signal4->Type, "SLXI", 4) == 0 ) {
                    if( !fe->skip_intensity ) {
                        fe->intensity = ztr;
                    } else if(ztr.signal4){
			if(ztr.signal4->data) free(ztr.signal4->data);
			free(ztr.signal4);
		    }
                } else if(ztr.signal4->Type != NULL && strncmp(ztr.signal4->Type, "SLXN", 4) == 0 ) {
                    if( !fe->skip_noise ) {
                        fe->noise = ztr;
                    } else if(ztr.signal4){
			if(ztr.signal4->data) free(ztr.signal4->data);
			free(ztr.signal4);
                    }
                } else if( !fe->skip_signal ) {
                    fe->signal = ztr;
		} else if(ztr.signal4){
			if(ztr.signal4->data) free(ztr.signal4->data);
			free(ztr.signal4);
                }
                break;

            default:
                free(*(void **)&ztr);

            case none:
            case ignore:
                if(ztr_raw.data) {
                    free(ztr_raw.data);
                }
                break;
        }
	if(ztr_raw.meta){
		free(ztr_raw.meta);
		ztr_raw.meta=NULL;
	}
    }
    
    while(rc == 0) {
        if(*(void **)&fe->sequence == NULL) {
            rc = RC(rcSRA, rcFormatter, rcParsing, rcConstraint, rcViolated);
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing sequence data", NULL);
            break;
        }
        if(*(void **)&fe->quality4 == NULL && *(void **)&fe->quality1 == NULL) {
            rc = RC(rcSRA, rcFormatter, rcParsing, rcConstraint, rcViolated);
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing quality data", NULL);
            break;
        }

        if( (rc = ILL_ZTR_Decompress(ztr_ctx, BASE, fe->sequence, fe->sequence)) != 0 ||
            (rc = pstring_assign(&fe->read.seq, fe->sequence.sequence->data, fe->sequence.sequence->datasize)) != 0 ) {
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress sequence data", NULL);
            break;
        }
        
        if( *(void **)&fe->quality4 != NULL ) {
            if( (rc = ILL_ZTR_Decompress(ztr_ctx, CNF4, fe->quality4, fe->sequence)) != 0 ||
                (rc = pstring_assign(&fe->read.qual, fe->quality4.quality4->data, fe->quality4.quality4->datasize)) != 0 ) {
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress quality4 data", NULL);
                break;
            }
            fe->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_LOGODDS4;
        } else if( *(void **)&fe->quality1 != NULL ) {
            if( (rc = ILL_ZTR_Decompress(ztr_ctx, CNF1, fe->quality1, fe->sequence)) != 0 ||
                (rc = pstring_assign(&fe->read.qual, fe->quality1.quality1->data, fe->quality1.quality4->datasize)) != 0 ) {
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress quality1 data", NULL);
                break;
            }
            fe->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
        }
        if( *(void **)&fe->signal != NULL ) {
            if( (rc = ILL_ZTR_Decompress(ztr_ctx, SMP4, fe->signal, fe->sequence)) != 0 ||
                (rc = pstring_assign(&fe->read.signal, fe->signal.signal4->data, fe->signal.signal4->datasize)) != 0 ) {
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress signal data", NULL);
                break;
            }
        }
        if( *(void **)&fe->intensity != NULL ) {
            if( (rc = ILL_ZTR_Decompress(ztr_ctx, SMP4, fe->intensity, fe->sequence)) != 0 ||
                (rc = pstring_assign(&fe->read.intensity, fe->intensity.signal4->data, fe->intensity.signal4->datasize)) != 0 ) {
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress intensity data", NULL);
                break;
            }
        }
        if( *(void **)&fe->noise != NULL ) {
            if( (rc = ILL_ZTR_Decompress(ztr_ctx, SMP4, fe->noise, fe->sequence)) != 0 ||
                (rc = pstring_assign(&fe->read.noise, fe->noise.signal4->data, fe->noise.signal4->datasize)) != 0 ) {
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "failed to decompress noise data", NULL);
                break;
            }
        }
        rc = fe_new_read(fe, flags, &readId);
        break;
    }
    if(fe->sequence.sequence) {
        if(fe->sequence.sequence->data)
            free(fe->sequence.sequence->data);
        free(fe->sequence.sequence);
    }
    if(fe->quality1.quality1) {
        if(fe->quality1.quality1->data)
            free(fe->quality1.quality1->data);
        free(fe->quality1.quality1);
    }
    if(fe->quality4.quality4) {
        if(fe->quality4.quality4->data)
            free(fe->quality4.quality4->data);
        free(fe->quality4.quality4);
    }
    if(fe->signal.signal4) {
        if(fe->signal.signal4->data)
            free(fe->signal.signal4->data);
        free(fe->signal.signal4);
    }
    if(fe->intensity.signal4) {
        if(fe->intensity.signal4->data)
            free(fe->intensity.signal4->data);
        free(fe->intensity.signal4);
    }
    if(fe->noise.signal4) {
        if(fe->noise.signal4->data)
            free(fe->noise.signal4->data);
        free(fe->noise.signal4);
    }
    return rc;
}
Пример #9
0
/* parses name as a given word number (1-based) in a str of size len
 * looks for name(#barcode)?([\/.]\d)?
 * returns score of found parts
 * score == 0 word not found
 */ 
static
uint8_t parse_spot_name(const SRALoaderFile* file, FileReadData* spot, const char* str, size_t len, uint8_t word_number)
{
    uint8_t w, score = 0;
    const char* name, *name_end;

    name = name_end = str;
    /* set name_end to end of word_number-th word */
    for(w = 1; w <= word_number || name_end == NULL; w++ ) {
        /* skip consecutive spaces */
        while( *name_end == ' ' && name_end != &str[len] ) {
            name_end++;
        }
        name = name_end;
        name_end = memchr(name, ' ', len - (name_end - str));
        if( name_end == NULL ) {
            if( w == word_number ) {
                name_end = &str[len];
            }
            break;
        }
    }
    if( name != name_end && name_end != NULL ) {
        char* x;
        rc_t rc;

        /* init only name portion */
        FileReadData_init(spot, true);
        --name_end; /* goto last char */
        if( isdigit(name_end[0])&& (name_end[-1] == '\\' || name_end[-1] == '/' )) {
            score++;
            spot->read.read_id = name_end[0] - '0';
            name_end -= 2;
        } else if( isdigit(*name_end) && name_end[-1] == '.' ) {
            int q = 0;
            if( memrchr(name, '#', name_end - name) != NULL ) {
                /* have barode -> this is read id */
                q = 4;
            } else {
                /* may a read id, check to see if 4 coords follow */
                const char* end = name_end - 1;
                while( --end >= name ) {
                    if( strchr(":|_", *end) != NULL ) {
                        q++;
                    } else if( !isdigit(*end) ) {
                        break;
                    }
                }
            }
            if( q == 4 ) {
                score++;
                spot->read.read_id = name_end[0] - '0';
                name_end -= 2;
            }
        }
        if( (x = memrchr(name, '#', name_end - name)) != NULL ) {
            score++;
            if( (rc = pstring_assign(&spot->barcode, x + 1, name_end - x)) != 0 ) {
                SRALoaderFile_LOG(file, klogErr, rc, "barcode $(b)", "b=%.*s", name_end - x, x + 1);
                return 0;
            }
            if( pstring_strcmp(&spot->barcode, "0") == 0 ) {
                pstring_clear(&spot->barcode);
            } else if( spot->barcode.len >= 4 &&
                       (strncmp(spot->barcode.data, "0/1_", 4) == 0 || strncmp(spot->barcode.data, "0/2_", 4) == 0) ) {
                spot->read.read_id = spot->barcode.data[2] - '0';
                pstring_assign(&spot->barcode, &spot->barcode.data[4], spot->barcode.len - 4);
            }
            name_end = --x;
        }
        score++;
        if( (rc = pstring_assign(&spot->name, name, name_end - name + 1)) != 0 ) {
            SRALoaderFile_LOG(file, klogErr, rc, "spot name $(n)", "n=%.*s", name_end - name + 1, name);
            return 0;
        }
        /* search for _R\d\D in name and use it as read id, remove from name or spot won't assemble */
        x = spot->name.data;
        while( (x = strrchr(x, 'R')) != NULL ) {
            if( x != spot->name.data && *(x - 1) == '_' && isdigit(*(x + 1)) && !isalnum(*(x + 2)) ) {
                score++;
		if(spot->read.read_id == -1){
			spot->read.read_id = *(x + 1) - '0';
		}
                strcpy(x - 1, x + 2);
                spot->name.len -= 4;
                break;
            }
            x++;
        }
        /* find last '=' and use only whatever is to the left of it */
        if( (x = memrchr(spot->name.data, '=', spot->name.len)) != NULL ) {
            rc = pstring_assign(&spot->name, spot->name.data, (x - spot->name.data) );
        }
    }
    return score;
}
Пример #10
0
static
rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]);
    SLList files;
    IlluminaFileInfo* file = NULL;

    SLListInit(&files);

    /* group files using spotname, for _prb. file name prefix is used,
       files reviewed by type detected from name and ordered by file_type array */
    for(t = 0; rc == 0 && t < ftype_q; t++) {
        for(i = 0; rc == 0 && i < argc; i++) {
            const char* fname, *blk_pfx;
            int prefix_len = 0;
            ERunFileType ftype;
            EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet;
            FGroup_Find_data data;

            if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL);
                break;
            }
            if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL);
                break;
            }
            if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL);
                break;
            }
            if( blk_pfx == NULL ) {
                blk_pfx = "";
            }
            {{
                /* skip path if present */
                const char* p = strrchr(fname, '/');
                fname = p ? p + 1 : fname;
                p = NULL;
                for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) {
                    const char* const* e = file_types[k].key;
                    while( *e != NULL ) {
                        p = strstr(fname, *e++);
                        if( p != NULL ) {
                            type = file_types[k].type;
                            break;
                        } 
                    }
                }
                if( p != NULL ) {
                    prefix_len = p - fname;
                }
            }}
            if( ftype == rft_IlluminaNativeSeq ) {
                type = eIlluminaNativeFileTypeFasta;
            } else if( ftype == rft_IlluminaNativePrb ) {
                type = eIlluminaNativeFileTypeQuality4;
            } else if( ftype == rft_IlluminaNativeInt ) {
                type = eIlluminaNativeFileTypeIntensity;
            } else if( ftype == rft_IlluminaNativeQseq ) {
                type = eIlluminaNativeFileTypeQSeq;
            }
            if( type == eIlluminaNativeFileTypeNotSet ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL);
                break;
            }
            if( type != file_types[t].type ) {
                /* one type at a time */
                continue;
            }
            DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type));
            file = calloc(1, sizeof(*file));
            if( file == NULL ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL);
                break;
            }
            IlluminaFileInfo_init(file);
            file->file = argv[i];
            file->type = type;

            if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                /* in _prb there is no spotname inside so use file prefix */
                rc = pstring_assign(&data.key, fname, prefix_len);
            } else {
                /* try to get 1st spot so group can be organized by spot name */
                if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) {
                    rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL);
                    break;
                }
                rc = pstring_copy(&data.key, &file->name);
            }

            data.found = NULL;
            if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) {
                IlluminaFileInfo* ss = data.found->files;

                while( rc == 0 && file != NULL ) {
                    if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate);
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL);
                    } else if( ss->next != NULL ) {
                        ss = ss->next;
                    } else {
                        ss->next = file;
                        file->prev = ss;
                        data.found->mask |= file->type;
                        file = NULL;
                    }
                }
            } else {
                data.found = calloc(1, sizeof(*data.found));
                if( data.found == NULL ) {
                    rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL);
                    break;
                } else {
                    if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) {
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL);
                        FGroup_Whack(&data.found->dad, NULL);
                        break;
                    } else {
                        FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL;
                        data.found->blk_pfx = blk_pfx;
                        data.found->files = file;
                        data.found->mask = file->type;
                        /* group inserted into list by coords in 1st spot */
                        while( curr != NULL ) {
                            if( curr->files[0].coord[0] > file->coord[0] ||
                                (curr->files[0].coord[0] == file->coord[0] &&
                                 curr->files[0].coord[1] > file->coord[1]) ) {
                                data.found->dad.next = &curr->dad;
                                if( prev == NULL ) {
                                    files.head = &data.found->dad;
                                } else {
                                    prev->dad.next = &data.found->dad;
                                }
                                break;
                            }
                            prev = curr;
                            curr = (FGroup*)curr->dad.next;
                        }
                        if( curr == NULL ) {
                            SLListPushTail(&files, &data.found->dad);
                        }
                        file = NULL;
                    }
                }
            }
        }
    }
    if( rc == 0 ) {
        SLListForEach(&files, FGroup_Validate, &rc);
    }
    if( rc == 0 ) {
        FGroup_Parse_data data;
        data.self = self;
        if( SLListDoUntil(&files, FGroup_Parse, &data) ) {
            rc = data.rc;
        }
    } else {
        free(file);
    }
    SLListWhack(&files, FGroup_Whack, NULL);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
Пример #11
0
/* reads from a file data for a sinlge spot, data may be partial */
static
rc_t read_next_spot(const char* blk_pfx, IlluminaFileInfo* file)
{
    rc_t rc = 0;
    const char* tail = file->line;

    if( file->ready ) {
        /* data still not used */
        return 0;
    }
    IlluminaFileInfo_init(file);
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    }
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
            if( (rc = parse_qseq(file, file->line, file->line_len)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading qseq");
            }
            break;

        case eIlluminaNativeFileTypeFasta:
        case eIlluminaNativeFileTypeNoise:
        case eIlluminaNativeFileTypeIntensity:
        case eIlluminaNativeFileTypeSignal:
            {{
                /* read only common first 4 coords into name and prepend with DATA_BLOCK/@name */
                if( (rc = read_spot_coord(file, file->line, file->line_len, &tail)) == 0 ) {
                    if( blk_pfx != NULL ) {
                        pstring tmp_name;
                        if( (rc = pstring_copy(&tmp_name, &file->name)) == 0 &&
                            (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) == 0 &&
                            (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                            rc = pstring_concat(&file->name, &tmp_name);
                        }
                    }
                }
                if( rc != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading spot coord");
                }
                break;
            }}

        case eIlluminaNativeFileTypeQuality4:
            if( (rc = read_quality(file->line, file->line_len, &file->read)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality");
            } else if( (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=name for quality 4");
            }
            break;

        default:
            rc = RC(rcSRA, rcFormatter, rcReading, rcFileFormat, rcUnknown);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=processing data line");
            break;
    }

    /* process tail (after coords) for some file types */
    file->line_len -= tail - file->line; /* length of tail */
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
        case eIlluminaNativeFileTypeQuality4:
        default:
            /* completely processed before */
            break;

        case eIlluminaNativeFileTypeFasta:
            if( (rc = pstring_assign(&file->read.seq, tail, file->line_len)) != 0 ||
                !pstring_is_fasta(&file->read.seq) ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcCorrupt);
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading fasta");
            }
            break;

        case eIlluminaNativeFileTypeNoise:
            if( (rc = read_signal(tail, file->line_len, &file->read.noise)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting noise");
            }
            break;

        case eIlluminaNativeFileTypeIntensity:
            if( (rc = read_signal(tail, file->line_len, &file->read.intensity)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting intensity");
            }
            break;

        case eIlluminaNativeFileTypeSignal:
            if( (rc = read_signal(tail, file->line_len, &file->read.signal)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting signal");
            }
            break;
    }
    file->line = NULL;
    file->ready = true;
#if _DEBUGGING
    DEBUG_MSG(3, ("name:'%s' [%li:%li:%li:%li]\n", file->name.data, 
                file->coord[0], file->coord[1], file->coord[2], file->coord[3]));
    if( file->read.seq.len ) {
        DEBUG_MSG(3, ("seq:'%.*s'\n", file->read.seq.len, file->read.seq.data));
    }
    if( file->read.qual.len ) {
        DEBUG_MSG(3, ("qual{0x%x}: %u bytes\n", file->read.qual_type, file->read.qual.len));
    }
#endif
    return 0;
}
Пример #12
0
/*
 * assumes tab separated file:
 * first 2 postiions concatinated with "_" into spot prefix
 * nextg 4 postiions concatinated with ":" into spot id: lane:tile:x:y
 * 7th (index) ignored
 * 8th is read id
 * 9th fasta
 * 10th quality
 * 11th (optional) read filter
 */
static
rc_t parse_qseq(IlluminaFileInfo* file, const char* data, size_t data_sz)
{
    rc_t rc = 0;
    const char* t, *str = data, *end = data + data_sz;
    int tabs = 0;
    do {
        if( (t = memchr(str, '\t', end - str)) != NULL ) {
            switch(++tabs) {
                case 1:
                    rc = pstring_assign(&file->name, str, t - str);
                    break;
                case 2:
                    if( (rc = pstring_append(&file->name, "_", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    break;
                case 3:
                case 4:
                case 5:
                case 6:
                    errno = 0;
                    file->coord[tabs - 3] = strtol(str, NULL, 10);
                    if( errno != 0 ) {
                        file->coord[tabs - 3] = 0;
                    }
                    if( (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                        rc = pstring_append(&file->name, str, t - str);
                    }
                    break;
                case 7:
                    if( t - str != 1 || (*str != '0' && *str != '1') ) {
                        rc = pstring_assign(&file->barcode, str, t - str);
                    }
                    break;
                case 8:
                    if( t - str != 1 || !isdigit(*str) ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                    } else {
                        file->read.read_id = *str - '0';
                        if( file->read.read_id == 0 ) {
                            file->read.read_id = ILLUMINAWRITER_READID_NONE;
                        }
                    }
                    break;
                case 9:
                    rc = pstring_assign(&file->read.seq, str, t - str);
                    break;
                case 10:
                    file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
                    rc = pstring_assign(&file->read.qual, str, t - str);
                    break;
            }
            str = ++t;
        }
    } while( rc == 0 && t != NULL && str < end );

    if( rc == 0 ) {
        if( tabs == 9 ) {
            file->read.qual_type = ILLUMINAWRITER_COLMASK_QUALITY_PHRED;
            rc = pstring_assign(&file->read.qual, str, end - str);
        } else if( tabs == 10 ) {
            if( end - str != 1 ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
            } else if( *str == '1' ) {
                file->read.filter = SRA_READ_FILTER_PASS;
            } else if( *str == '0' ) {
                file->read.filter = SRA_READ_FILTER_REJECT;
            } else {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
            }
        } else {
            rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
        }
        if( rc == 0 ) {
            if( file->read.seq.len != file->read.qual.len ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInconsistent);
            } else {
                rc = pstring_quality_convert(&file->read.qual, eExperimentQualityEncoding_Ascii, 64, 0, 0x7F);
            }
        }
    }
    return rc;
}
Пример #13
0
static
rc_t SFFLoaderFmtReadCommonHeader(SFFLoaderFmt* self, const SRALoaderFile *file)
{
    rc_t rc = 0;
    bool skiped_idx_pad = false;
    uint16_t head_sz;
    SFFCommonHeader prev_head;
    pstring prev_flow_chars;
    pstring prev_key_seq;

    if( (rc = SRALoaderFile_Offset(file, &self->index_correction)) != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "Reading initial file position", NULL);
        return rc;
    }
SkipIndexPad:
    self->index_correction += self->file_advance;
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFCommonHeader_size, NULL, true)) != 0) {
        SRALoaderFile_LOG(file, klogErr, rc, "common header, needed $(needed) bytes",
                          PLOG_U32(needed), SFFCommonHeader_size);
        return rc;
    }
    if( self->header.magic_number != 0 ) {
        /* next file in stream, remember prev to sync to each */
        memcpy(&prev_head, &self->header, sizeof(SFFCommonHeader));
        pstring_copy(&prev_flow_chars, &self->flow_chars);
        pstring_copy(&prev_key_seq, &self->key_seq);
    } else {
        prev_head.magic_number = 0;
        prev_head.index_length = 0;
    }

    memcpy(&self->header, self->file_buf, SFFCommonHeader_size);
#if __BYTE_ORDER == __LITTLE_ENDIAN
    self->header.magic_number = bswap_32(self->header.magic_number);
    self->header.version = bswap_32(self->header.version);
    self->header.index_offset = bswap_64(self->header.index_offset);
    self->header.index_length = bswap_32(self->header.index_length);
    self->header.number_of_reads = bswap_32(self->header.number_of_reads);
    self->header.header_length = bswap_16(self->header.header_length);
    self->header.key_length = bswap_16(self->header.key_length);
    self->header.num_flows_per_read = bswap_16(self->header.num_flows_per_read);
#endif

    if( self->header.magic_number != (('.'<<24)|('s'<<16)|('f'<<8)|('f'<<0)) ) {
        if( !skiped_idx_pad && prev_head.magic_number != 0 ) {
            /* possible concatination of 2 files with index at EOF and padded to 8 bytes with header values not padded,
               try skipping padding and reread */
            uint32_t pad = 8 - prev_head.index_length % 8;
            if( pad != 0 ) {
                self->file_advance += pad;
                DEBUG_MSG(5, ("%s: trying to skip over %u bytes index section padding\n", self->file_name, pad));
                skiped_idx_pad = true;
                goto SkipIndexPad;
            }
        }
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
        SRALoaderFile_LOG(file, klogErr, rc, "magic number: $(m)", PLOG_U32(m), self->header.magic_number);
        return rc;
    }
    if( self->header.version != 1 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcBadVersion);
        SRALoaderFile_LOG(file, klogErr, rc, "format version $(v)", PLOG_U32(v), self->header.version);
        return rc;
    }
    if( self->header.flowgram_format_code != SFFFormatCodeUI16Hundreths ) {
        /* NOTE: add a case here if flowgram coding gets new version to support different */
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
        SRALoaderFile_LOG(file, klogErr, rc, "common header flowgram format code", NULL);
        return rc;
    }
    if( self->header.index_length % 8 != 0 ) {
        DEBUG_MSG(5, ("%s: index_length field value is not 8 byte padded: %u\n", self->file_name, self->header.index_length));
    }
    head_sz = SFFCommonHeader_size + self->header.num_flows_per_read + self->header.key_length;
    head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0;
    if( head_sz != self->header.header_length ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid);
        SRALoaderFile_LOG(file, klogErr, rc, "header length $(h) <> $(s) ", PLOG_2(PLOG_U16(h),PLOG_U16(s)),
                          self->header.header_length, head_sz);
        return rc;
    }
    /* read flow chars and key */
    self->file_advance = SFFCommonHeader_size;
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFCommonHeader_size, "common header", false)) != 0) {
        return rc;
    }
    self->file_advance = head_sz - SFFCommonHeader_size;

    if( (rc = pstring_assign(&self->flow_chars, self->file_buf, self->header.num_flows_per_read)) != 0 ||
        (rc = pstring_assign(&self->key_seq, self->file_buf + self->header.num_flows_per_read, self->header.key_length)) != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "reading flows/key sequence", NULL);
        return rc;
    }
    if( prev_head.magic_number != 0 ) {
        /* next file's common header must match previous file's common header, partially */
        if( prev_head.key_length != self->header.key_length ||
            prev_head.num_flows_per_read != self->header.num_flows_per_read ||
            pstring_cmp(&prev_flow_chars, &self->flow_chars) != 0 ||
            pstring_cmp(&prev_key_seq, &self->key_seq) != 0 ) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInconsistent);
                SRALoaderFile_LOG(file, klogErr, rc, "previous file common header differ in flows/key sequence", NULL);
        }
    }
    if( rc == 0 ) {
        if( self->w454 ) {
            rc = SRAWriter454_WriteHead(self->w454, &self->flow_chars, &self->key_seq);
        } else {
            rc = SRAWriterIonTorrent_WriteHead(self->wIonTorrent, &self->flow_chars, &self->key_seq);
        }
    }
    return rc;
}
Пример #14
0
static
rc_t SFFLoaderFmtReadData(SFFLoaderFmt* self, const SRALoaderFile* file)
{
    rc_t rc = 0;
    uint32_t i;

    /* calc signal chunk size */
    size_t signal_sz = self->header.num_flows_per_read * sizeof(uint16_t);
    /* plus position, read, quality */
    size_t sz = signal_sz + self->read_header.number_of_bases * 3;
    /* + padding */
    sz += (sz % 8) ? (8 - (sz % 8)) : 0;

    /* adjust the buffer window to full data block size */
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, sz, "read data", false)) != 0 ) { 
        return rc;
    }
    self->file_advance = sz;

    if( !self->skip_signal ) {
        rc = pstring_assign(&self->signal, self->file_buf, signal_sz);
#if __BYTE_ORDER == __LITTLE_ENDIAN
        if( rc == 0 ) {
            uint16_t* sig = (uint16_t*)self->signal.data;
            for(i = 0; i < self->header.num_flows_per_read; i++) {
                sig[i] = bswap_16(sig[i]);
            }
        }
#endif
    }

    if( rc == 0 ) {
        const uint8_t* pos = self->file_buf + signal_sz;

        if( !self->skip_signal ) {
            INSDC_coord_one *p;
            /* reset buffer to proper size */
            pstring_clear(&self->position);
            rc = pstring_append_chr(&self->position, 0, self->read_header.number_of_bases * sizeof(*p));
            p = (INSDC_coord_one*)&self->position.data[0];
            p[0] = pos[0];
            for(i = 1; i < self->read_header.number_of_bases; i++) {
                p[i] = p[i - 1] + pos[i];
            }
        }
        if( rc == 0 ) {
            pos += self->read_header.number_of_bases;
            rc = pstring_assign(&self->read, pos, self->read_header.number_of_bases);
            /*for(i = 0; i< self->read.len; i++ ) {
                self->read.data[i] = tolower(self->read.data[i]);
            }*/
        }
        if( rc == 0 ) {
            pos += self->read_header.number_of_bases;
            rc = pstring_assign(&self->quality, pos, self->read_header.number_of_bases);
        }
    }
    if( rc != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "copying read data", NULL);
    }
    return rc;
}
Пример #15
0
static
rc_t parse_v1_read(SRF_context *ctx, ZTR_Context *ztr_ctx, const uint8_t *data, size_t size)
{
    rc_t rc = 0;
    size_t i, parsed;
    ztr_raw_t ztr_raw;
    ztr_t ztr;
    enum ztr_chunk_type type;
    fe_context_t* fe = (fe_context_t*)ctx;

    uint8_t flags;
    pstring readId;
    EAbisolidReadType read_type;
    pstring label;

    AbsolidRead read[ABSOLID_FMT_MAX_NUM_READS];
        
    if( fe->region.nreads == 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcNotFound);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing region chunk before 1st read chunk", NULL);
    }
    if( (rc = SRF_ParseReadChunk(data, size, &parsed, &flags, &readId)) != 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rc);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "SRF parsing failure", NULL);
    }
    ABI_ZTR_AddToBuffer(ztr_ctx, data + parsed, size - parsed);

    /* readId will have spotname */
    if( (rc = fe_new_read(fe, &readId, &read_type, &label)) != 0 ) {
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "parsing spot name suffix", NULL);
    }
    for(i = 0; i < sizeof(read) / sizeof(read[0]); i++) {
        AbsolidRead_Init(&read[i]);
    }
    while(!ABI_ZTR_BufferIsEmpty(ztr_ctx)) {
        if( (rc = ABI_ZTR_ParseBlock(ztr_ctx, &ztr_raw)) != 0 ||
            (rc = ABI_ZTR_ProcessBlock(ztr_ctx, &ztr_raw, &ztr, &type)) != 0 ) {
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "ZTR parsing failure", NULL);
            break;
        }
        switch (type) {
        case BASE:
            if(ztr.sequence->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "read: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].seq, ztr.sequence->data, ztr.sequence->datasize)) == 0 ) {
                    /* grab 1st, may be the only cs_key */
                    read[read_number].cs_key = fe->region.cs_key[0];
                    for(i = 1; i < fe->region.nreads; i++) {
                        if( read_type == fe->region.type[i] ) {
                            read[read_number].cs_key = fe->region.cs_key[i];
                            break;
                        }
                    }
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    rc = pstring_copy(&read[read_number].label, &label);
                    DEBUG_MSG(3, ("SRF READ: '%s'\n", read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying read", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.sequence->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].seq, &ztr.sequence->data[fe->region.start[i]], len);
                    read[read_number].cs_key = fe->region.cs_key[i];
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    if( fe->region.label[i].len != 0 ) {
                        rc = pstring_copy(&read[read_number].label, &fe->region.label[i]);
                    }
                    DEBUG_MSG(3, ("SRF READ[%u]: '%s'\n", i, read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying reads", NULL);
                }
            }
            break;
        case CNF1:
            if(ztr.quality1->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "quality: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].qual, ztr.quality1->data, ztr.quality1->datasize)) == 0 ) {
                    DEBUG_MSG(3, ("SRF QUAL: %u bytes\n", read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying quality", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.quality1->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].qual, &ztr.quality1->data[fe->region.start[i]], len);
                    DEBUG_MSG(3, ("SRF QUAL[%u]: %u bytes\n", i, read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying qualities", NULL);
                }
            }
            break;
        case SAMP:
            if( !fe->skip_signal ) {
                size_t i;
                int stype = ABSOLID_FMT_COLMASK_NOTSET;
                if(ztr.signal->datatype != f32) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: expected 32-bit float datatype", NULL);
                } else if( (ztr.signal->datasize % sizeof(float)) != 0 ) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInvalid);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: size not 32-bit float aligned", NULL);
                } else if (ztr.signal->channel == NULL) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcIncomplete);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: missing channel type", NULL);
                } else if(strcmp(ztr.signal->channel, "0FAM") == 0) {
                    stype = ABSOLID_FMT_COLMASK_FAM;
                } else if(strcmp(ztr.signal->channel, "1CY3") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY3;
                } else if(strcmp(ztr.signal->channel, "2TXR") == 0) {
                    stype = ABSOLID_FMT_COLMASK_TXR;
                } else if(strcmp(ztr.signal->channel, "3CY5") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY5;
                } else {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: unexpected channel type", NULL);
                }
#if __BYTE_ORDER == __LITTLE_ENDIAN
                for(i = 0; rc == 0 && i < ztr.signal->datasize; i += 4) {
                    uint32_t* r = (uint32_t*)&ztr.signal->data[i];
                    *r = bswap_32(*r);
                }
#endif
                if( rc == 0 ) {
                    if( read_type > eAbisolidReadType_SPOT ) {
                        int read_number = AbisolidReadType2ReadNumber[read_type];
                        pstring* d = NULL;
                        switch(stype) {
                            case ABSOLID_FMT_COLMASK_FAM:
                                read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                d = &read[read_number].fxx;
                                break;
                            case ABSOLID_FMT_COLMASK_CY3:
                                d = &read[read_number].cy3;
                               break;
                            case ABSOLID_FMT_COLMASK_TXR:
                                d = &read[read_number].txr;
                                break;
                            case ABSOLID_FMT_COLMASK_CY5:
                                d = &read[read_number].cy5;
                                break;
                        }
                        if( d ) {
                            rc = pstring_assign(d, ztr.signal->data, ztr.signal->datasize);
                            DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                        } else {
                            rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signal", NULL);
                        }
                    } else {
			 if( fe->region.nreads <= 0 || fe->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) {
				rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
				SRALoaderFile_LOG(fe->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), fe->region.nreads);
			 }

                        for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                            pstring* d = NULL;
                            int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                            size_t len = (i + 1 >= fe->region.nreads) ? ztr.signal->datasize : (fe->region.start[i + 1] * sizeof(float));
                            len -= fe->region.start[i] * sizeof(float);
                            switch(stype) {
                                case ABSOLID_FMT_COLMASK_FAM:
                                    read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                    d = &read[read_number].fxx;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY3:
                                    d = &read[read_number].cy3;
                                   break;
                                case ABSOLID_FMT_COLMASK_TXR:
                                    d = &read[read_number].txr;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY5:
                                    d = &read[read_number].cy5;
                                    break;
                            }
                            if( d ) {
                                rc = pstring_assign(d, &ztr.signal->data[fe->region.start[i] * sizeof(float)], len);
                                DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                            } else {
                                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                            }
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signals", NULL);
                        }
                    }
                }
            }
            break;
        default:
            break;
        }
        if(type != none && type != ignore) {
            free(*(void **)&ztr);
        }
    }
    if(rc == 0) {
        if( read_type <= eAbisolidReadType_SPOT ) {
            rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], &read[1]);
        } else {
            switch( AbisolidReadType2ReadNumber[read_type] ) {
                case 0:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], NULL);
                    break;
                case 1:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, NULL, &read[1]);
                    break;
                default:
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "more than 2 reads", NULL);
                    break;
            }
        }
    }
    return rc;
}
Пример #16
0
/* reads from a file data for a sinlge spot, data may be partial */
static
rc_t read_next_spot(HelicosLoaderFmt* self, HelicosFileInfo* file)
{
    rc_t rc = 0;

    if( file->ready ) {
        /* data still not used */
        return 0;
    }
    HelicosFileInfo_init(file);
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    }
    if( file->line[0] == '@' ) { /*** fastq format **/
        if( (rc = pstring_assign(&file->name, &file->line[1], file->line_len - 1)) != 0 ) {
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading name");
        }
        file->line = NULL;
        if( (rc = file_read_line(file, false)) != 0 || file->line_len > sizeof(file->sequence.data)-1 ||
            (rc = pstring_assign(&file->sequence, file->line, file->line_len)) != 0 ||
            !pstring_is_fasta(&file->sequence) ) {
            rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading sequence");
        }
        file->line = NULL;
        if( (rc = file_read_line(file, false)) != 0 ||
            file->line[0] != '+' || file->line_len != 1 ) {
            rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcCorrupt);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality defline");
        }
        file->line = NULL;
        if( (rc = file_read_line(file, false)) != 0 || file->line_len > sizeof(file->quality.data)-1 ||
            (rc = pstring_assign(&file->quality, file->line, file->line_len)) != 0 ||
            (rc = pstring_quality_convert(&file->quality, eExperimentQualityEncoding_Ascii, 33, 0, 0x7F)) != 0 ) {
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality");
        }
        file->line = NULL;
        file->ready = true;
    } else if( file->line[0] == '>' ) { /** fasta format **/
	if( (rc = pstring_assign(&file->name, &file->line[1], file->line_len - 1)) != 0 ) {
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading name");
        }
        file->line = NULL;
	if( (rc = file_read_line(file, false)) != 0 || file->line_len > sizeof(file->sequence.data)-1 ||
            (rc = pstring_assign(&file->sequence, file->line, file->line_len)) != 0 ||
            !pstring_is_fasta(&file->sequence) ) {
            rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading sequence");
        }
	file->line = NULL;
	file->quality.len = file->sequence.len;
	memset(file->quality.data,14,file->quality.len);
	file->ready = true;
    } else {
        rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcInvalid);
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=expected '@'");
    }
#if _DEBUGGING
 DEBUG_MSG(3, ("READ: name:'%s', seq[%u]:'%s', qual[%u]\n", file->name.data,
                file->sequence.len, file->sequence.data, file->quality.len)); /*
    DEBUG_MSG(3, ("READ: name:'%s', seq[%u]:'%s', qual[%u]:'%s'\n", file->name.data,
                file->sequence.len, file->sequence.data, file->quality.len, file->quality.data));*/
#endif
    return 0;
}