int extractRead(Idx2BWT * idx2BWT, unsigned long long readID, char* seq, char* qual) 
{
    static char dnaChars[] = {'A','C','G','T'};
    int readLen = 0;
    BWT *bwt = idx2BWT->bwt;
    ULL l = idx2BWT->bwt->cumulativeFreq[4] + readID;
    for (;;) {
        unsigned int c = _getBWTvalue(idx2BWT->bwt, l);
        if (c == 4) {
          for (int i = 0, j = readLen - 1; i < j; ++i, --j) {
              seq[i] ^= seq[j];
              seq[j] ^= seq[i];
              seq[i] ^= seq[j];
          }
          seq[readLen] = 0;
          l = bwt->cumulativeFreq[c] + BWTOccValue(bwt, l, c);
          fprintf(stderr, "ReadID verify: %d. Just for debug, comment this at file %s, line: %d.\n", idx2BWT->readIDtable[l - bwt->cumulativeFreq[4]], __FILE__, __LINE__);
          return readLen;
        } else {
          l = bwt->cumulativeFreq[c] + BWTOccValue(bwt, l, c);
          seq[readLen] = dnaChars[c];
          ++readLen;
        }
    }
}
Exemple #2
0
//给出saIndex, foreward 算出 saIndex
void BWTSARangeForeward(Idx2BWT * idx2BWT, const unsigned char c, 
                        unsigned int *saIndexLeft, unsigned int *saIndexRight) {

    BWT * rev_bwt = idx2BWT->rev_bwt;
    BWT *bwt = idx2BWT->bwt;
    
    unsigned int l = (*saIndexLeft);
    unsigned int r = (*saIndexRight);
    (*saIndexLeft)  = bwt->cumulativeFreq[c] + BWTOccValue(rev_bwt, l, c) + 1;
    (*saIndexRight) = bwt->cumulativeFreq[c] + BWTOccValue(rev_bwt, r + 1, c);

}
int _dfsExtractReadInf(Idx2BWT * idx2BWT, ULL saL, ULL saR, ReadInf* &ri, int outputLimit, int strand, int _pos)
{
    ULL l, r;
    unsigned int c;
    ULL numSAs = 0;
    int totalCount = 0;
    l = saL, r = saR;

    if (saL == saR) { // only one route, backward search to the end
        do {
            c = _getBWTvalue(idx2BWT->bwt, saL);
            saL = idx2BWT->bwt->cumulativeFreq[c] + BWTOccValue(idx2BWT->bwt, saL, c);
            ++_pos;
        } while (c != 4);

        ri->read_id = idx2BWT->readIDtable[saL - idx2BWT->bwt->cumulativeFreq[4]];
        ri->strand = strand;
        ri->pos = _pos - 1;
        ++ri;
        return 1;
    } 

    for (int i = 0; i < 4; ++i) { // try A C G T
        BWTSARangeBackward(idx2BWT, i ,&saL, &saR);
        if (saL <= saR && saR-saL <= r-l) {
            int outputCount = _dfsExtractReadInf(idx2BWT, saL, saR, ri, outputLimit, strand, _pos + 1);
            outputLimit -= outputCount;
            totalCount += outputCount;
            if (outputLimit <= 0) return totalCount;
            
            numSAs += saR - saL + 1;
            if (numSAs == r - l + 1) { // no branch at all
                return totalCount;
            }
        }
        saL = l; saR = r;  // reset ranges
    }

    // try add $ 
    BWTSARangeBackward(idx2BWT, 4 ,&saL, &saR); // 4 is $
    if (saL <= saR) {
        int limit = saR - saL + 1;
        if (limit > outputLimit) {
            limit = outputLimit;
        }

        for (int i = 0; i < limit; ++i) {
            ri->read_id = idx2BWT->readIDtable[saL + i - idx2BWT->bwt->cumulativeFreq[4]];
            ri->strand = strand;
            ri->pos = _pos;
            ++ri;
        }
        totalCount += limit;
    }

    return totalCount;
}