예제 #1
0
static
rc_t fe_new_read(fe_context_t *self, pstring *readId, EAbisolidReadType* type, pstring* label)
{
    rc_t rc = 0;
    pstring name_suffix;
    const char* p;

    assert(self && readId && type && label);
    DEBUG_MSG(3, ("READ_LABEL: '%s'\n", readId->data));
    /* spot name suffix may end with '_(F|R).+' */
    p = strrchr(readId->data, '_');
    if( p != NULL ) {
        rc = set_label_type(p + 1, label, type);
        if( rc == 0 && *type > eAbisolidReadType_SPOT) {
            /* cut label */
            readId->len -= label->len + 1;
        }
    } else {
        pstring_clear(label);
        *type = eAbisolidReadType_SPOT;
    }
    if( rc == 0 && (rc = pstring_copy(&name_suffix, readId)) == 0 ) {
        rc = SRAWriteAbsolid_MakeName(&self->name_prefix, &name_suffix, readId);
    }
    return rc;
}
예제 #2
0
static
rc_t fe_new_read(fe_context_t *self, int flags, pstring *readId )
{
    rc_t rc;
    char *suffix;
    pstring readName, spotGroup;
    static IlluminaSpot spot;

    /* look for spot group */
    suffix = strchr(readId->data, '#');
    if( suffix != NULL ) {
        readId->len = suffix++ - readId->data;
        if( (rc = pstring_assign(&spotGroup, suffix, strlen(suffix))) != 0 ) {
            SRALoaderFile_LOG(self->ctx.file, klogInt, rc,
                "extracting barcode from spot '$(spotname)'", "spotname=%s", readId->data);
            return rc;
        }
    } else {
        pstring_clear(&spotGroup);
    }

    /* build the read name from prefix (self->name_prefix) and read id */
    if(self->name_prefix.len > 0 ) {
        if( (rc = pstring_copy(&readName, &self->name_prefix)) == 0 ) {
            if( isdigit(readName.data[readName.len - 1]) ) {
                rc = pstring_append(&readName, ":", 1);
            }
            if( rc == 0 ) {
                rc = pstring_concat(&readName, readId);
            }
        }
    } else {
        rc = pstring_copy(&readName, readId);
    }
    if( rc != 0 ) {
        SRALoaderFile_LOG(self->ctx.file, klogErr, rc,
            "preparing spot name $(spotname)", "spotname=%s", readId->data);
        return rc;
    }
    SRF_set_read_filter(&self->read.filter, flags);

    IlluminaSpot_Init(&spot);
    if( (rc = IlluminaSpot_Add(&spot, &readName, &spotGroup, &self->read)) == 0 ) {
        rc = SRAWriterIllumina_Write(self->writer, self->ctx.file, &spot);
    }
    return rc;
}
예제 #3
0
파일: pstring.c 프로젝트: Bhumi28/sra-tools
rc_t pstring_quality_convert(pstring* qstr, ExperimentQualityEncoding enc, const uint8_t offset, const int8_t min, const int8_t max)
{
    rc_t rc = 0;
    char* c, *end, *next;
    pstring qbin;

    if( qstr == NULL || min > max ) {
        rc = RC(rcSRA, rcFormatter, rcReading, rcParam, rcInvalid);
    }
    errno = 0;
    c = qstr->data;
    end = qstr->data + qstr->len;
    pstring_clear(&qbin);
    if(enc == eExperimentQualityEncoding_Undefined) {
	if(memchr(c, ' ', qstr->len) != NULL || memchr(c, '\t', qstr->len) != NULL){
		enc = eExperimentQualityEncoding_Decimal;
	} else {
		enc = eExperimentQualityEncoding_Ascii;
	}
    }


    while( rc == 0 && c < end ) {
        long q;
        switch(enc) {
            case eExperimentQualityEncoding_Decimal:
            case eExperimentQualityEncoding_Hexadecimal:
                /* spaced numbers form */
                errno = 0;
                q = strtol(c, &next, enc == eExperimentQualityEncoding_Decimal ? 10 : 16);
                if( q == 0 && c == next ) {
                    /* no more digits in line */
		    goto DONE; /*** need do break while loop as well ***/
                }
                c = next;
                break;
            case eExperimentQualityEncoding_Ascii:
                /* textual form with offset */
                q = (long)(*c++) - offset;
                break;
            default:
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
                break;
        }
        if( rc == 0 ) {
            if( errno != 0 || q < min || q > max ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcOutofrange);
            } else {
                rc = pstring_append_chr(&qbin, (int8_t)q, 1);
            }
        }
    }
DONE:
    if( rc == 0 ) {
        rc = pstring_copy(qstr, &qbin);
    }
    return rc;
}
예제 #4
0
rc_t SRAWriteAbsolid_MakeName(const pstring* prefix, const pstring* suffix, pstring* name)
{
    rc_t rc = 0;
    if( prefix == NULL || name == NULL ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcParam, rcNull);
    } else if( (rc = pstring_copy(name, prefix)) == 0 ) {
        if( suffix && suffix->len > 0 ) {
            if( name->len > 0 && name->data[name->len - 1] != '_' && suffix->data[0] != '_' ) {
                rc = pstring_append(name, "_", 1);
            }
            if( rc == 0 ) {
                pstring_concat(name, suffix);
            }
        }
    }
    if( rc != 0 ) {
        LOGERR(klogErr, rc, "preparing spot name");
    }
    return rc;
}
예제 #5
0
파일: fastq-fmt.c 프로젝트: ncbi/sra-tools
/* reads from a file data for a sinlge spot, data may be partial */
static
rc_t read_next_spot(FastqLoaderFmt* self, FastqFileInfo* file)
{
    rc_t rc = 0;

    if( file->spot->ready ) {
        /* data still not used */
        return 0;
    }
    FileReadData_init(file->spot, false);
    FileReadData_init(&file->spot[1], false);
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    }
    if( find_seq_qual_by_sep(self, file, ':') || find_seq_qual_by_sep(self, file, ' ') ) {
        /* single line forms */
        file->line = NULL; /* line consumed */
        file->spot->ready = true;
    } else  if( file->line[0] == '>' || file->line[0] == '@' ) {
        /* 4 or 8 line format */
        FileReadData sd;
        uint8_t word = 0, best_word = 0;
        uint8_t score = 0, best_score = 0;
        /* find and parse spot name on defline */
        do {
            score = parse_spot_name(file->file, &sd, &file->line[1], file->line_len - 1, ++word);
            if( score > best_score ) {
                if( (rc = pstring_copy(&file->spot->name, &sd.name)) != 0 ||
                    (rc = pstring_copy(&file->spot->barcode, &sd.barcode)) != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=copying read name");
                }
                file->spot->read.read_id = sd.read.read_id;
                best_score = score;
                best_word = word; /* used below for quality defline parsing */
            }

        } while(score != 0);
        if( best_score == 0 ) {
            rc = RC(rcSRA, rcFormatter, rcReading, rcId, rcNotFound);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=spot name not detected");
        }
        if( file->line[0] == '@' ) {
            if( (rc = read_spot_data_3lines(file, file->spot, best_word, best_score, file->qualType)) != 0 ) {
                return rc;
            }
            /* now we MAY have 5th line in buffer so we can check if it's 2nd read for 8 lines format */
            if( file->line_len != 0 && file->line != NULL && file->line[0] == '@' ) {
                /* try to find read id on next line */
                FileReadData_init(&file->spot[1], false);
                if( parse_spot_name(file->file, &file->spot[1], &file->line[1], file->line_len - 1, best_word) == best_score ) {
                    if( pstring_cmp(&file->spot->name, &file->spot[1].name) == 0 &&
                        pstring_cmp(&file->spot->barcode, &file->spot[1].barcode) == 0 &&
                        file->spot->read.read_id != file->spot[1].read.read_id ) {
                        /* since it is different read id with same name and barcode, fill up second read */
                        if( (rc = read_spot_data_3lines(file, &file->spot[1], best_word, best_score, file->qualType)) != 0 ) {
                            return rc;
                        }
                    }
                }
            }
        } else {
            /* 2 line seq or quality form */
            file->line = NULL; /* line consumed */
            /* read sequence/quality */
            if( (rc = read_multiline_seq_or_qual(file, '>', &file->spot->read.seq)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading seq/qual data");
            }
            if( file->spot->read.seq.len == 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=empty string reading seq/qual data");
            } else if( !pstring_is_fasta(&file->spot->read.seq) ) {
                /* swap */
                if( (rc = pstring_copy(&file->spot->read.qual, &file->spot->read.seq)) == 0 ) {
                    file->spot->read.qual_type = file->qualType;
                    pstring_clear(&file->spot->read.seq);
                }
            }
            file->spot->ready = true;
        }
    } else {
            rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcInvalid);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=file corrupt or format unknown");
    }
    if( rc == 0 ) {
        int k;
        for(k = 0; k < 2; k++) {
            FileReadData* rd = &file->spot[k];
            if( rd->ready && rd->read.qual_type != ILLUMINAWRITER_COLMASK_NOTSET ) {
                if( file->qualOffset == 0 ) {
                    /* detect and remember */
                    file->qualOffset = 33;
		    file->qualMax = 94;
                    rc = pstring_quality_convert(&rd->read.qual, file->qualEnc, file->qualOffset, file->qualMin, file->qualMax);
                    if( GetRCState(rc) == rcOutofrange ) {
                        file->qualOffset = 64;
			file->qualMax = 61;
                        rc = pstring_quality_convert(&rd->read.qual, file->qualEnc, file->qualOffset, file->qualMin, file->qualMax);
                    }
                } else {
		    if(file->qualOffset == 33) file->qualMax = 94;
                    rc = pstring_quality_convert(&rd->read.qual, file->qualEnc, file->qualOffset, file->qualMin, file->qualMax);
                }
                if( rc != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting quality");
                }
            }
        }
    }
    return 0;
}
예제 #6
0
static
rc_t IlluminaLoaderFmt_WriteData(IlluminaLoaderFmt* self, uint32_t argc, const SRALoaderFile* const argv[], int64_t* spots_bad_count)
{
    rc_t rc = 0;
    uint32_t t, i, k, ftype_q = sizeof(file_types) / sizeof(file_types[0]);
    SLList files;
    IlluminaFileInfo* file = NULL;

    SLListInit(&files);

    /* group files using spotname, for _prb. file name prefix is used,
       files reviewed by type detected from name and ordered by file_type array */
    for(t = 0; rc == 0 && t < ftype_q; t++) {
        for(i = 0; rc == 0 && i < argc; i++) {
            const char* fname, *blk_pfx;
            int prefix_len = 0;
            ERunFileType ftype;
            EIlluminaNativeFileType type = eIlluminaNativeFileTypeNotSet;
            FGroup_Find_data data;

            if( (rc = SRALoaderFileName(argv[i], &fname)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file name", NULL);
                break;
            }
            if( (rc = SRALoaderFile_FileType(argv[i], &ftype)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading file type", NULL);
                break;
            }
            if( (rc = SRALoaderFileBlockName(argv[i], &blk_pfx)) != 0 ) {
                SRALoaderFile_LOG(argv[i], klogErr, rc, "reading DATA_BLOCK/@name", NULL);
                break;
            }
            if( blk_pfx == NULL ) {
                blk_pfx = "";
            }
            {{
                /* skip path if present */
                const char* p = strrchr(fname, '/');
                fname = p ? p + 1 : fname;
                p = NULL;
                for(k = 0; type == eIlluminaNativeFileTypeNotSet && k < ftype_q; k++) {
                    const char* const* e = file_types[k].key;
                    while( *e != NULL ) {
                        p = strstr(fname, *e++);
                        if( p != NULL ) {
                            type = file_types[k].type;
                            break;
                        } 
                    }
                }
                if( p != NULL ) {
                    prefix_len = p - fname;
                }
            }}
            if( ftype == rft_IlluminaNativeSeq ) {
                type = eIlluminaNativeFileTypeFasta;
            } else if( ftype == rft_IlluminaNativePrb ) {
                type = eIlluminaNativeFileTypeQuality4;
            } else if( ftype == rft_IlluminaNativeInt ) {
                type = eIlluminaNativeFileTypeIntensity;
            } else if( ftype == rft_IlluminaNativeQseq ) {
                type = eIlluminaNativeFileTypeQSeq;
            }
            if( type == eIlluminaNativeFileTypeNotSet ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnrecognized);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "detecting file type by file name", NULL);
                break;
            }
            if( type != file_types[t].type ) {
                /* one type at a time */
                continue;
            }
            DEBUG_MSG(3, ("file '%s' type set to %d\n", fname, type));
            file = calloc(1, sizeof(*file));
            if( file == NULL ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcExhausted);
                SRALoaderFile_LOG(argv[i], klogErr, rc, "allocating file object", NULL);
                break;
            }
            IlluminaFileInfo_init(file);
            file->file = argv[i];
            file->type = type;

            if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                /* in _prb there is no spotname inside so use file prefix */
                rc = pstring_assign(&data.key, fname, prefix_len);
            } else {
                /* try to get 1st spot so group can be organized by spot name */
                if( (rc = read_next_spot(blk_pfx, file)) != 0 || !file->ready ) {
                    rc = rc ? rc : RC(rcSRA, rcFormatter, rcReading, rcData, rcNotFound);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "reading 1st spot", NULL);
                    break;
                }
                rc = pstring_copy(&data.key, &file->name);
            }

            data.found = NULL;
            if( SLListDoUntil(&files, FGroup_Find, &data) && data.found != NULL ) {
                IlluminaFileInfo* ss = data.found->files;

                while( rc == 0 && file != NULL ) {
                    if( ss->type != eIlluminaNativeFileTypeQSeq && ss->type == file->type ) {
                        rc = RC(rcSRA, rcFormatter, rcReading, rcFile, rcDuplicate);
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "type of file for lane", NULL);
                    } else if( ss->next != NULL ) {
                        ss = ss->next;
                    } else {
                        ss->next = file;
                        file->prev = ss;
                        data.found->mask |= file->type;
                        file = NULL;
                    }
                }
            } else {
                data.found = calloc(1, sizeof(*data.found));
                if( data.found == NULL ) {
                    rc = RC(rcSRA, rcFormatter, rcReading, rcMemory, rcInsufficient);
                    SRALoaderFile_LOG(argv[i], klogErr, rc, "preparing file group", NULL);
                    break;
                } else {
                    if( (rc = pstring_assign(&data.found->key, fname, prefix_len)) != 0 ) {
                        SRALoaderFile_LOG(argv[i], klogErr, rc, "setting file group key", NULL);
                        FGroup_Whack(&data.found->dad, NULL);
                        break;
                    } else {
                        FGroup* curr = (FGroup*)SLListHead(&files), *prev = NULL;
                        data.found->blk_pfx = blk_pfx;
                        data.found->files = file;
                        data.found->mask = file->type;
                        /* group inserted into list by coords in 1st spot */
                        while( curr != NULL ) {
                            if( curr->files[0].coord[0] > file->coord[0] ||
                                (curr->files[0].coord[0] == file->coord[0] &&
                                 curr->files[0].coord[1] > file->coord[1]) ) {
                                data.found->dad.next = &curr->dad;
                                if( prev == NULL ) {
                                    files.head = &data.found->dad;
                                } else {
                                    prev->dad.next = &data.found->dad;
                                }
                                break;
                            }
                            prev = curr;
                            curr = (FGroup*)curr->dad.next;
                        }
                        if( curr == NULL ) {
                            SLListPushTail(&files, &data.found->dad);
                        }
                        file = NULL;
                    }
                }
            }
        }
    }
    if( rc == 0 ) {
        SLListForEach(&files, FGroup_Validate, &rc);
    }
    if( rc == 0 ) {
        FGroup_Parse_data data;
        data.self = self;
        if( SLListDoUntil(&files, FGroup_Parse, &data) ) {
            rc = data.rc;
        }
    } else {
        free(file);
    }
    SLListWhack(&files, FGroup_Whack, NULL);
    *spots_bad_count = self->spots_bad_count;
    return rc;
}
예제 #7
0
bool FGroup_Parse( SLNode *n, void *d )
{
    FGroup_Parse_data* data = (FGroup_Parse_data*)d;
    FGroup* g = (FGroup*)n;
    bool done;
    const SRALoaderFile* data_block_ref = NULL;

    data->rc = 0;
    do {
        IlluminaFileInfo* file = g->files;
        done = true;
        while( data->rc == 0 && file != NULL ) {
            if( (data->rc = read_next_spot(g->blk_pfx, file)) == 0 && file->ready ) {
                done = false;
            }
            file = file->next;
        }
        if( data->rc != 0 || done ) {
            break;
        }
        /* collect spot reads, matching by spot name
         * spot data may be split across multiple files
         */
        IlluminaSpot_Init(&data->spot);
        file = g->files;
        while( data->rc == 0 && file != NULL ) {
            if( file->ready ) {
                if( (file->type == eIlluminaNativeFileTypeNoise && data->self->skip_noise) ||
                    (file->type == eIlluminaNativeFileTypeIntensity && data->self->skip_intensity) ||
                    (file->type == eIlluminaNativeFileTypeSignal && data->self->skip_signal) ) {
                    file->ready = false;
                } else {
                    data_block_ref = file->file;
                    if( file->type == eIlluminaNativeFileTypeQSeq && (g->mask & eIlluminaNativeFileTypeQuality4) ) {
                        /* drop quality1 from qseq data */
                        pstring_clear(&file->read.qual);
                    } else if( file->type == eIlluminaNativeFileTypeQuality4 ) {
                        IlluminaFileInfo* neib = file->next ? file->next : file->prev;
                        /* need to fix spotname to be same cause prb do not have any name in it */
                        if( (data->rc = pstring_copy(&file->name, &neib->name)) != 0 ) {
                            SRALoaderFile_LOG(file->file, klogErr, data->rc, "$(msg) '$(n)'", "msg=syncing prb spot name,n=%s", neib->name.data);
                        }
                    }
                    if( data->rc == 0 ) {
                        data->rc = IlluminaSpot_Add(&data->spot, &file->name, &file->barcode, &file->read);
                        if( data->rc == 0 ) {
                            file->ready = false;
                        } else {
                            if( GetRCState(data->rc) == rcIgnored ) {
                                SRALoaderFile_LOG(file->file, klogWarn, data->rc, "$(msg) '$(s1)' <> '$(s2)'",
                                                "msg=spot name mismatch,s1=%.*s,s2=%.*s",
                                                data->spot.name->len, data->spot.name->data, file->name.len, file->name.data);
                                data->self->spots_bad_count++;
                                /* skip spot for all files in a group */
                                file = g->files;
                                while( file != NULL ) {
                                    file->ready = false;
                                    SRALoaderFile_LOG(file->file, klogWarn, data->rc,
                                                      "$(msg) '$(n)'", "msg=skipped spot,n=%s", file->name.data);
                                    file = file->next;
                                }
                                if( data->self->spots_bad_allowed >= 0 &&
                                    data->self->spots_bad_count > data->self->spots_bad_allowed ) {
                                    data->rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcInvalid);
                                }
                                break;
                            }
                        }
                    }
                }
            }
            file = file->next;
        }
        if( GetRCState(data->rc) == rcIgnored ) {
            data->rc = 0;
            continue;
        }
        if( data->rc == 0 ) {
            data->rc = SRAWriterIllumina_Write(data->self->writer, data_block_ref, &data->spot);
        }
    } while( data->rc == 0 );
    return data->rc != 0;
}
예제 #8
0
/* reads from a file data for a sinlge spot, data may be partial */
static
rc_t read_next_spot(const char* blk_pfx, IlluminaFileInfo* file)
{
    rc_t rc = 0;
    const char* tail = file->line;

    if( file->ready ) {
        /* data still not used */
        return 0;
    }
    IlluminaFileInfo_init(file);
    if( (rc = file_read_line(file, true)) != 0 ) {
        return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading more data");
    } else if( file->line == NULL ) {
        return 0; /* eof */
    }
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
            if( (rc = parse_qseq(file, file->line, file->line_len)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading qseq");
            }
            break;

        case eIlluminaNativeFileTypeFasta:
        case eIlluminaNativeFileTypeNoise:
        case eIlluminaNativeFileTypeIntensity:
        case eIlluminaNativeFileTypeSignal:
            {{
                /* read only common first 4 coords into name and prepend with DATA_BLOCK/@name */
                if( (rc = read_spot_coord(file, file->line, file->line_len, &tail)) == 0 ) {
                    if( blk_pfx != NULL ) {
                        pstring tmp_name;
                        if( (rc = pstring_copy(&tmp_name, &file->name)) == 0 &&
                            (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) == 0 &&
                            (rc = pstring_append(&file->name, ":", 1)) == 0 ) {
                            rc = pstring_concat(&file->name, &tmp_name);
                        }
                    }
                }
                if( rc != 0 ) {
                    return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading spot coord");
                }
                break;
            }}

        case eIlluminaNativeFileTypeQuality4:
            if( (rc = read_quality(file->line, file->line_len, &file->read)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading quality");
            } else if( (rc = pstring_assign(&file->name, blk_pfx, strlen(blk_pfx))) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=name for quality 4");
            }
            break;

        default:
            rc = RC(rcSRA, rcFormatter, rcReading, rcFileFormat, rcUnknown);
            return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=processing data line");
            break;
    }

    /* process tail (after coords) for some file types */
    file->line_len -= tail - file->line; /* length of tail */
    switch( file->type ) {
        case eIlluminaNativeFileTypeQSeq:
        case eIlluminaNativeFileTypeQuality4:
        default:
            /* completely processed before */
            break;

        case eIlluminaNativeFileTypeFasta:
            if( (rc = pstring_assign(&file->read.seq, tail, file->line_len)) != 0 ||
                !pstring_is_fasta(&file->read.seq) ) {
                rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcCorrupt);
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=reading fasta");
            }
            break;

        case eIlluminaNativeFileTypeNoise:
            if( (rc = read_signal(tail, file->line_len, &file->read.noise)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting noise");
            }
            break;

        case eIlluminaNativeFileTypeIntensity:
            if( (rc = read_signal(tail, file->line_len, &file->read.intensity)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting intensity");
            }
            break;

        case eIlluminaNativeFileTypeSignal:
            if( (rc = read_signal(tail, file->line_len, &file->read.signal)) != 0 ) {
                return SRALoaderFile_LOG(file->file, klogErr, rc, "$(msg)", "msg=converting signal");
            }
            break;
    }
    file->line = NULL;
    file->ready = true;
#if _DEBUGGING
    DEBUG_MSG(3, ("name:'%s' [%li:%li:%li:%li]\n", file->name.data, 
                file->coord[0], file->coord[1], file->coord[2], file->coord[3]));
    if( file->read.seq.len ) {
        DEBUG_MSG(3, ("seq:'%.*s'\n", file->read.seq.len, file->read.seq.data));
    }
    if( file->read.qual.len ) {
        DEBUG_MSG(3, ("qual{0x%x}: %u bytes\n", file->read.qual_type, file->read.qual.len));
    }
#endif
    return 0;
}
예제 #9
0
파일: sff-fmt.c 프로젝트: Jingyu9/sra-tools
static
rc_t SFFLoaderFmtReadCommonHeader(SFFLoaderFmt* self, const SRALoaderFile *file)
{
    rc_t rc = 0;
    bool skiped_idx_pad = false;
    uint16_t head_sz;
    SFFCommonHeader prev_head;
    pstring prev_flow_chars;
    pstring prev_key_seq;

    if( (rc = SRALoaderFile_Offset(file, &self->index_correction)) != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "Reading initial file position", NULL);
        return rc;
    }
SkipIndexPad:
    self->index_correction += self->file_advance;
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, SFFCommonHeader_size, NULL, true)) != 0) {
        SRALoaderFile_LOG(file, klogErr, rc, "common header, needed $(needed) bytes",
                          PLOG_U32(needed), SFFCommonHeader_size);
        return rc;
    }
    if( self->header.magic_number != 0 ) {
        /* next file in stream, remember prev to sync to each */
        memcpy(&prev_head, &self->header, sizeof(SFFCommonHeader));
        pstring_copy(&prev_flow_chars, &self->flow_chars);
        pstring_copy(&prev_key_seq, &self->key_seq);
    } else {
        prev_head.magic_number = 0;
        prev_head.index_length = 0;
    }

    memcpy(&self->header, self->file_buf, SFFCommonHeader_size);
#if __BYTE_ORDER == __LITTLE_ENDIAN
    self->header.magic_number = bswap_32(self->header.magic_number);
    self->header.version = bswap_32(self->header.version);
    self->header.index_offset = bswap_64(self->header.index_offset);
    self->header.index_length = bswap_32(self->header.index_length);
    self->header.number_of_reads = bswap_32(self->header.number_of_reads);
    self->header.header_length = bswap_16(self->header.header_length);
    self->header.key_length = bswap_16(self->header.key_length);
    self->header.num_flows_per_read = bswap_16(self->header.num_flows_per_read);
#endif

    if( self->header.magic_number != (('.'<<24)|('s'<<16)|('f'<<8)|('f'<<0)) ) {
        if( !skiped_idx_pad && prev_head.magic_number != 0 ) {
            /* possible concatination of 2 files with index at EOF and padded to 8 bytes with header values not padded,
               try skipping padding and reread */
            uint32_t pad = 8 - prev_head.index_length % 8;
            if( pad != 0 ) {
                self->file_advance += pad;
                DEBUG_MSG(5, ("%s: trying to skip over %u bytes index section padding\n", self->file_name, pad));
                skiped_idx_pad = true;
                goto SkipIndexPad;
            }
        }
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
        SRALoaderFile_LOG(file, klogErr, rc, "magic number: $(m)", PLOG_U32(m), self->header.magic_number);
        return rc;
    }
    if( self->header.version != 1 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcBadVersion);
        SRALoaderFile_LOG(file, klogErr, rc, "format version $(v)", PLOG_U32(v), self->header.version);
        return rc;
    }
    if( self->header.flowgram_format_code != SFFFormatCodeUI16Hundreths ) {
        /* NOTE: add a case here if flowgram coding gets new version to support different */
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
        SRALoaderFile_LOG(file, klogErr, rc, "common header flowgram format code", NULL);
        return rc;
    }
    if( self->header.index_length % 8 != 0 ) {
        DEBUG_MSG(5, ("%s: index_length field value is not 8 byte padded: %u\n", self->file_name, self->header.index_length));
    }
    head_sz = SFFCommonHeader_size + self->header.num_flows_per_read + self->header.key_length;
    head_sz += (head_sz % 8) ? (8 - (head_sz % 8)) : 0;
    if( head_sz != self->header.header_length ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcFormat, rcInvalid);
        SRALoaderFile_LOG(file, klogErr, rc, "header length $(h) <> $(s) ", PLOG_2(PLOG_U16(h),PLOG_U16(s)),
                          self->header.header_length, head_sz);
        return rc;
    }
    /* read flow chars and key */
    self->file_advance = SFFCommonHeader_size;
    if( (rc = SFFLoaderFmt_ReadBlock(self, file, head_sz - SFFCommonHeader_size, "common header", false)) != 0) {
        return rc;
    }
    self->file_advance = head_sz - SFFCommonHeader_size;

    if( (rc = pstring_assign(&self->flow_chars, self->file_buf, self->header.num_flows_per_read)) != 0 ||
        (rc = pstring_assign(&self->key_seq, self->file_buf + self->header.num_flows_per_read, self->header.key_length)) != 0 ) {
        SRALoaderFile_LOG(file, klogErr, rc, "reading flows/key sequence", NULL);
        return rc;
    }
    if( prev_head.magic_number != 0 ) {
        /* next file's common header must match previous file's common header, partially */
        if( prev_head.key_length != self->header.key_length ||
            prev_head.num_flows_per_read != self->header.num_flows_per_read ||
            pstring_cmp(&prev_flow_chars, &self->flow_chars) != 0 ||
            pstring_cmp(&prev_key_seq, &self->key_seq) != 0 ) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInconsistent);
                SRALoaderFile_LOG(file, klogErr, rc, "previous file common header differ in flows/key sequence", NULL);
        }
    }
    if( rc == 0 ) {
        if( self->w454 ) {
            rc = SRAWriter454_WriteHead(self->w454, &self->flow_chars, &self->key_seq);
        } else {
            rc = SRAWriterIonTorrent_WriteHead(self->wIonTorrent, &self->flow_chars, &self->key_seq);
        }
    }
    return rc;
}
예제 #10
0
static
rc_t parse_v1_read(SRF_context *ctx, ZTR_Context *ztr_ctx, const uint8_t *data, size_t size)
{
    rc_t rc = 0;
    size_t i, parsed;
    ztr_raw_t ztr_raw;
    ztr_t ztr;
    enum ztr_chunk_type type;
    fe_context_t* fe = (fe_context_t*)ctx;

    uint8_t flags;
    pstring readId;
    EAbisolidReadType read_type;
    pstring label;

    AbsolidRead read[ABSOLID_FMT_MAX_NUM_READS];
        
    if( fe->region.nreads == 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcNotFound);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "missing region chunk before 1st read chunk", NULL);
    }
    if( (rc = SRF_ParseReadChunk(data, size, &parsed, &flags, &readId)) != 0 ) {
        rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rc);
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "SRF parsing failure", NULL);
    }
    ABI_ZTR_AddToBuffer(ztr_ctx, data + parsed, size - parsed);

    /* readId will have spotname */
    if( (rc = fe_new_read(fe, &readId, &read_type, &label)) != 0 ) {
        return SRALoaderFile_LOG(ctx->file, klogErr, rc, "parsing spot name suffix", NULL);
    }
    for(i = 0; i < sizeof(read) / sizeof(read[0]); i++) {
        AbsolidRead_Init(&read[i]);
    }
    while(!ABI_ZTR_BufferIsEmpty(ztr_ctx)) {
        if( (rc = ABI_ZTR_ParseBlock(ztr_ctx, &ztr_raw)) != 0 ||
            (rc = ABI_ZTR_ProcessBlock(ztr_ctx, &ztr_raw, &ztr, &type)) != 0 ) {
            SRALoaderFile_LOG(ctx->file, klogErr, rc, "ZTR parsing failure", NULL);
            break;
        }
        switch (type) {
        case BASE:
            if(ztr.sequence->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "read: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].seq, ztr.sequence->data, ztr.sequence->datasize)) == 0 ) {
                    /* grab 1st, may be the only cs_key */
                    read[read_number].cs_key = fe->region.cs_key[0];
                    for(i = 1; i < fe->region.nreads; i++) {
                        if( read_type == fe->region.type[i] ) {
                            read[read_number].cs_key = fe->region.cs_key[i];
                            break;
                        }
                    }
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    rc = pstring_copy(&read[read_number].label, &label);
                    DEBUG_MSG(3, ("SRF READ: '%s'\n", read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying read", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.sequence->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].seq, &ztr.sequence->data[fe->region.start[i]], len);
                    read[read_number].cs_key = fe->region.cs_key[i];
                    SRF_set_read_filter(&read[read_number].filter, flags);
                    if( fe->region.label[i].len != 0 ) {
                        rc = pstring_copy(&read[read_number].label, &fe->region.label[i]);
                    }
                    DEBUG_MSG(3, ("SRF READ[%u]: '%s'\n", i, read[read_number].seq.data));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying reads", NULL);
                }
            }
            break;
        case CNF1:
            if(ztr.quality1->datatype != i8) {
                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                SRALoaderFile_LOG(ctx->file, klogErr, rc, "quality: expected 8-bit datatype", NULL);
            } else if( read_type > eAbisolidReadType_SPOT ) {
                int read_number = AbisolidReadType2ReadNumber[read_type];
                if( (rc = pstring_assign(&read[read_number].qual, ztr.quality1->data, ztr.quality1->datasize)) == 0 ) {
                    DEBUG_MSG(3, ("SRF QUAL: %u bytes\n", read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying quality", NULL);
                }
            } else {
                for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                    int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                    size_t len = (i + 1 >= fe->region.nreads ? ztr.quality1->datasize : fe->region.start[i + 1]) - fe->region.start[i];
                    rc = pstring_assign(&read[read_number].qual, &ztr.quality1->data[fe->region.start[i]], len);
                    DEBUG_MSG(3, ("SRF QUAL[%u]: %u bytes\n", i, read[read_number].qual.len));
                }
                if( rc != 0 ) {
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying qualities", NULL);
                }
            }
            break;
        case SAMP:
            if( !fe->skip_signal ) {
                size_t i;
                int stype = ABSOLID_FMT_COLMASK_NOTSET;
                if(ztr.signal->datatype != f32) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: expected 32-bit float datatype", NULL);
                } else if( (ztr.signal->datasize % sizeof(float)) != 0 ) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcInvalid);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "signal: size not 32-bit float aligned", NULL);
                } else if (ztr.signal->channel == NULL) {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcIncomplete);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: missing channel type", NULL);
                } else if(strcmp(ztr.signal->channel, "0FAM") == 0) {
                    stype = ABSOLID_FMT_COLMASK_FAM;
                } else if(strcmp(ztr.signal->channel, "1CY3") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY3;
                } else if(strcmp(ztr.signal->channel, "2TXR") == 0) {
                    stype = ABSOLID_FMT_COLMASK_TXR;
                } else if(strcmp(ztr.signal->channel, "3CY5") == 0) {
                    stype = ABSOLID_FMT_COLMASK_CY5;
                } else {
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnexpected);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "SIGNAL column: unexpected channel type", NULL);
                }
#if __BYTE_ORDER == __LITTLE_ENDIAN
                for(i = 0; rc == 0 && i < ztr.signal->datasize; i += 4) {
                    uint32_t* r = (uint32_t*)&ztr.signal->data[i];
                    *r = bswap_32(*r);
                }
#endif
                if( rc == 0 ) {
                    if( read_type > eAbisolidReadType_SPOT ) {
                        int read_number = AbisolidReadType2ReadNumber[read_type];
                        pstring* d = NULL;
                        switch(stype) {
                            case ABSOLID_FMT_COLMASK_FAM:
                                read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                d = &read[read_number].fxx;
                                break;
                            case ABSOLID_FMT_COLMASK_CY3:
                                d = &read[read_number].cy3;
                               break;
                            case ABSOLID_FMT_COLMASK_TXR:
                                d = &read[read_number].txr;
                                break;
                            case ABSOLID_FMT_COLMASK_CY5:
                                d = &read[read_number].cy5;
                                break;
                        }
                        if( d ) {
                            rc = pstring_assign(d, ztr.signal->data, ztr.signal->datasize);
                            DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                        } else {
                            rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signal", NULL);
                        }
                    } else {
			 if( fe->region.nreads <= 0 || fe->region.nreads > ABSOLID_FMT_MAX_NUM_READS ) {
				rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
				SRALoaderFile_LOG(fe->ctx.file, klogErr, rc, "read count $(c)", PLOG_U8(c), fe->region.nreads);
			 }

                        for(i = 0; rc == 0 && i < fe->region.nreads; i++) {
                            pstring* d = NULL;
                            int read_number = AbisolidReadType2ReadNumber[fe->region.type[i]];
                            size_t len = (i + 1 >= fe->region.nreads) ? ztr.signal->datasize : (fe->region.start[i + 1] * sizeof(float));
                            len -= fe->region.start[i] * sizeof(float);
                            switch(stype) {
                                case ABSOLID_FMT_COLMASK_FAM:
                                    read[read_number].fs_type = eAbisolidFSignalType_FAM;
                                    d = &read[read_number].fxx;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY3:
                                    d = &read[read_number].cy3;
                                   break;
                                case ABSOLID_FMT_COLMASK_TXR:
                                    d = &read[read_number].txr;
                                    break;
                                case ABSOLID_FMT_COLMASK_CY5:
                                    d = &read[read_number].cy5;
                                    break;
                            }
                            if( d ) {
                                rc = pstring_assign(d, &ztr.signal->data[fe->region.start[i] * sizeof(float)], len);
                                DEBUG_MSG(3, ("SRF SIGNAL[%s]: %u bytes\n", ztr.signal->channel, d->len));
                            } else {
                                rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnrecognized);
                            }
                        }
                        if( rc != 0 ) {
                            SRALoaderFile_LOG(ctx->file, klogErr, rc, "copying signals", NULL);
                        }
                    }
                }
            }
            break;
        default:
            break;
        }
        if(type != none && type != ignore) {
            free(*(void **)&ztr);
        }
    }
    if(rc == 0) {
        if( read_type <= eAbisolidReadType_SPOT ) {
            rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], &read[1]);
        } else {
            switch( AbisolidReadType2ReadNumber[read_type] ) {
                case 0:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, &read[0], NULL);
                    break;
                case 1:
                    rc = SRAWriteAbsolid_Write(fe->writer, ctx->file, &readId, NULL, NULL, &read[1]);
                    break;
                default:
                    rc = RC(rcSRA, rcFormatter, rcParsing, rcData, rcUnsupported);
                    SRALoaderFile_LOG(ctx->file, klogErr, rc, "more than 2 reads", NULL);
                    break;
            }
        }
    }
    return rc;
}