Exemple #1
0
static
rc_t CC CGReads15_Read(const CGReads15* cself, TReadsData* data)
{
    rc_t rc = 0;

    if( cself->start_rowid == 0 ) {
        ((CGReads15*)cself)->start_rowid = data->rowid;
    }
    CG_LINE_START(cself->file, b, len, p);
    if( b == NULL || len == 0) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcDone);
        break;
    }
    /*DEBUG_MSG(10, ("reads: '%.*s'\n", len, b));*/
    CG_LINE_NEXT_FIELD(b, len, p);
    if( (rc = str2u16(b, p - b, &data->flags)) != 0 ) {
    } else if( data->flags > 10 ) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
    } else if( (data->flags & 0x03) == 3 || (data->flags & 0x07) == 7 ) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInvalid);
    }
    CG_LINE_NEXT_FIELD(b, len, p);
    data->seq.sequence.elements = p - b;
    if( data->seq.sequence.elements != CG_READS_SPOT_LEN ) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInvalid);
    } else {
        rc = str2buf(b, data->seq.sequence.elements, data->read, sizeof(data->read));
        /* clear cache, set in algnment writer */
        data->reverse[0] = '\0';
        data->reverse[CG_READS_SPOT_LEN / 2] = '\0';
    }
    CG_LINE_LAST_FIELD(b, len, p);
    data->seq.quality.elements = p - b;
    if( data->seq.quality.elements != CG_READS_SPOT_LEN ) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInvalid);
    } else {
        rc = str2buf(b, data->seq.quality.elements, data->qual, sizeof(data->qual));
    }
    if( cself->records == 0 ) {
        size_t w;
        
#if 0
        rc = string_printf(((CGReads15*)cself)->spot_group, sizeof(cself->spot_group), &w, "%s:%s:%s:%04u",
                           cself->assembly_id, cself->slide, cself->lane, cself->batch_file_number);
#else
        rc = string_printf(((CGReads15*)cself)->spot_group, sizeof(cself->spot_group), &w, "%s-%s",
                           cself->slide, cself->lane);
#endif
        data->seq.spot_group.buffer = cself->spot_group;
        data->seq.spot_group.elements = w;
    }
    ((CGReads15*)cself)->records++;
    DEBUG_MSG(10, ("reads:  %u\t'%s'\t'%s'\n", data->flags, data->read, data->qual));
    CG_LINE_END();
    return rc;
}
Exemple #2
0
static
rc_t CC CGEvidenceIntervals20_Read(const CGEvidenceIntervals15* cself, TEvidenceIntervalsData* data)
{
    rc_t rc = 0;

    CG_LINE_START(cself->file, b, len, p);
    if( b == NULL || len == 0) {
        rc = RC(rcRuntime, rcFile, rcReading, rcData, rcDone);
        break;
    }
    /*DEBUG_MSG(10, ("evidenceIntervals: '%.*s'\n", len, b));*/
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->interval_id, sizeof(data->interval_id));
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->chr, sizeof(data->chr));
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2i32(b, p - b, &data->offset);
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2u32(b, p - b, &data->length);
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2u16(b, p - b, &data->ploidy);
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele_indexes, sizeof(data->allele_indexes));
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2i32(b, p - b, &data->scoreVAF);
    data->score = data->scoreVAF; /***TODO: do we need re-calculation? ***/
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2i32(b, p - b, &data->scoreEAF);
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele[0], sizeof(data->allele[0]));
    data->allele_length[0] = p - b;
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele[1], sizeof(data->allele[1]));
    data->allele_length[1] = p - b;
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele[2], sizeof(data->allele[2]));
    data->allele_length[2] = p - b;
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele[3], sizeof(data->allele[3]));
    data->allele_length[3] = p - b;
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele_alignment[1], sizeof(data->allele_alignment[1]));
    data->allele_alignment_length[1] = p - b;
    CG_LINE_NEXT_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele_alignment[2], sizeof(data->allele_alignment[2]));
    data->allele_alignment_length[2] = p - b;
    CG_LINE_LAST_FIELD(b, len, p);
    rc = str2buf(b, p - b, data->allele_alignment[3], sizeof(data->allele_alignment[3]));
    data->allele_alignment_length[3] = p - b;
    ((CGEvidenceIntervals15*)cself)->records++;
    DEBUG_MSG(10, (
                  "evidenceIntervals: '%s'\t'%s'\t%i\t%u\t%u\t%s\t%u\t%u\t'%s'\t'%s'\t'%s'\t'%s'\t'%s'\t'%s'\t'%s'\n",
                  data->interval_id, data->chr, data->offset, data->length, data->ploidy,
                  data->allele_indexes, data->scoreVAF, data->scoreEAF,
                  data->allele[0], data->allele[1], data->allele[2], data->allele[3],
                  data->allele_alignment[1], data->allele_alignment[2], data->allele_alignment[3]));
    CG_LINE_END();
    return rc;
}
static
rc_t CC CGEvidenceDnbs_Read(const CGEvidenceDnbs15* cself, const char* interval_id, TEvidenceDnbsData* data, int score_allele_num)
{
    rc_t rc = 0;
    TEvidenceDnbsData_dnb* m = NULL;
    static TEvidenceDnbsData_dnb next_rec;
    static char next_interval_id[32] = "";

    /* local copy of unused TEvidenceDnbsData_dnb struct elements */
    char reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
    INSDC_coord_zero mate_offset_in_reference;
    char mate_reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
    uint16_t score_allele[4] = {0, 0, 0, 0}; /* v1.5 has ScoreAllele[012]; v2.0 - [0123] */
    char qual[CG_EVDNC_SPOT_LEN];

    strcpy(data->interval_id, interval_id);
    data->qty = 0;
    /* already read one rec for this interval_id */
    if( next_interval_id[0] != '\0' ) {
        if( strcmp(next_interval_id, interval_id) != 0 ) {
            /* nothing todo since next interval id is different */
            return rc;
        }
        m = &data->dnbs[data->qty++];
        memcpy(m, &next_rec, sizeof(next_rec));
        DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
                        "\t%i\tnot_used\t0\tnot_used\t%c\t0\t0\t0\t'%.*s'\t'--'\n",
            data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
            m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
            m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
            m->mapping_quality, m->read_len, m->read));
    }
    do {
        int i = 0;
        char tmp[2];
        CG_LINE_START(cself->file, b, len, p);
        if( b == NULL || len == 0 ) {
            next_interval_id[0] = '\0';
            break; /* EOF */
        }
        if( data->qty >= data->max_qty ) {
            TEvidenceDnbsData_dnb* x;
            data->max_qty += 100;
            x = realloc(data->dnbs, sizeof(*(data->dnbs)) * data->max_qty);
            if( x == NULL ) {
                rc = RC(rcRuntime, rcFile, rcReading, rcMemory, rcExhausted);
                break;
            }
            data->dnbs = x;
        }
        m = &data->dnbs[data->qty++];

        /*DEBUG_MSG(10, ("%2hu evidenceDnbs: '%.*s'\n", data->qty, len, b));*/
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, next_interval_id, sizeof(next_interval_id));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, m->chr, sizeof(m->chr));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, m->slide, sizeof(m->slide));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, m->lane, sizeof(m->lane));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2u32(b, p - b, &m->file_num_in_lane);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2u64(b, p - b, &m->dnb_offset_in_lane_file);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2u16(b, p - b, &m->allele_index);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, tmp, sizeof(tmp));
        if( tmp[0] != 'L' && tmp[0] != 'R' ) {
            rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
        }
        m->side = tmp[0];
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, tmp, sizeof(tmp));
        if( tmp[0] != '+' && tmp[0] != '-' ) {
            rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
        }
        m->strand = tmp[0];
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2i32(b, p - b, &m->offset_in_allele);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, m->allele_alignment, sizeof(m->allele_alignment));
        m->allele_alignment_length = p - b;
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2i32(b, p - b, &m->offset_in_reference);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, reference_alignment, sizeof(reference_alignment));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2i32(b, p - b, &mate_offset_in_reference);
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, mate_reference_alignment, sizeof(mate_reference_alignment));
        CG_LINE_NEXT_FIELD(b, len, p);
        rc = str2buf(b, p - b, tmp, sizeof(tmp));
        if( tmp[0] < 33 || tmp[0] > 126 ) {
            rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
        }
        m->mapping_quality = tmp[0];
        for (i = 0; i < score_allele_num; ++i) {
            CG_LINE_NEXT_FIELD(b, len, p);
            rc = str2u16(b, p - b, &score_allele[i]);
	    if(rc){
		score_allele[i] =0;
		rc =0;
	    }
        }
        CG_LINE_NEXT_FIELD(b, len, p);
        m->read_len = p - b;
        rc = str2buf(b, m->read_len, m->read, sizeof(m->read));
        CG_LINE_LAST_FIELD(b, len, p);
        if( m->read_len != p - b ) {
            rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInconsistent);
        } else {
            rc = str2buf(b, p - b, qual, sizeof(qual));
        }
        ((CGEvidenceDnbs15*)cself)->records++;
        if( strcmp(next_interval_id, data->interval_id) != 0 ) {
            if (score_allele_num == 3) {
              DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
                            "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
                data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
                m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
                m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
                reference_alignment, mate_offset_in_reference, mate_reference_alignment,
                m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], m->read_len, m->read, qual));
            }
            else if (score_allele_num == 4) {
              DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
                            "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
                data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
                m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
                m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
                reference_alignment, mate_offset_in_reference, mate_reference_alignment,
                m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], score_allele[3], m->read_len, m->read, qual));
            }
            else { assert(0); }
        }
        CG_LINE_END();
        if( next_interval_id[0] == '\0' ) {
            break;
        }
        if( strcmp(next_interval_id, data->interval_id) != 0 ) {
            /* next record is from next interval, remeber it and stop */
            memcpy(&next_rec, m, sizeof(next_rec));
            data->qty--;
            break;
        }
    } while( rc == 0 );
    return rc;
}