PBWT *pbwtSubRange (PBWT *pOld, int start, int end) { int M = pOld->M ; PBWT *pNew = pbwtCreate (M) ; int i, j, k ; uchar *x ; PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ; pNew->yz = arrayCreate (pNew->N*8, uchar) ; PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ; if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ; if (start < 0 || end > pOld->N || end <= start) die ("subrange invalid start %d, end %d", start, end) ; x = myalloc (M, uchar) ; if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ; for (i = 0 ; i < end ; ++i) { if (i >= start) { for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ; for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ; pbwtCursorWriteForwards (uNew) ; if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site) ; ++pNew->N ; } pbwtCursorForwardsRead (uOld) ; } pNew->aFend = myalloc (pNew->M, int) ; memcpy (pNew->aFend, uNew->a, pNew->M*sizeof(int)) ; pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ; free(x) ; return pNew ; }
void pbwtBuildReverse (PBWT *p) { int i, j, M = p->M ; uchar *x = myalloc (M, uchar) ; PbwtCursor *uF ; if (p->aFend) uF = pbwtCursorCreate (p, TRUE, FALSE) ; else { uF = pbwtCursorCreate (p, TRUE, TRUE) ; for (i = 0 ; i < p->N ; ++i) /* first run forwards to the end */ pbwtCursorForwardsRead (uF) ; pbwtCursorToAFend (uF, p) ; } /* use p->aFend also to start the reverse cursor - this gives better performance */ if (!p->aRstart) p->aRstart = myalloc (M, int) ; memcpy (p->aRstart, uF->a, M * sizeof(int)) ; p->zz = arrayReCreate (p->zz, arrayMax(p->yz), uchar) ; PbwtCursor *uR = pbwtCursorCreate (p, FALSE, TRUE) ; /* will pick up aRstart */ for (i = p->N ; i-- ; ) { pbwtCursorReadBackwards (uF) ; for (j = 0 ; j < M ; ++j) x[uF->a[j]] = uF->y[j] ; for (j = 0 ; j < M ; ++j) uR->y[j] = x[uR->a[j]] ; pbwtCursorWriteForwards (uR) ; } /* save uR->a, which is the lexicographic order of the sequences */ if (!p->aRend) p->aRend = myalloc (M, int) ; memcpy (p->aRend, uR->a, M * sizeof(int)) ; fprintf (logFile, "built reverse PBWT - size %ld\n", arrayMax(p->zz)) ; if (isCheck) /* print out the reversed haplotypes */ { FILE *fp = fopen ("rev.haps","w") ; Array tz = p->yz ; p->yz = p->zz ; int* ta = p->aFstart ; p->aFstart = p->aRstart ; pbwtWriteHaplotypes (fp, p) ; p->yz = tz ; p->aFstart = ta ; } free (x) ; pbwtCursorDestroy (uF) ; pbwtCursorDestroy (uR) ; }
PBWT *pbwtSubSites (PBWT *pOld, double fmin, double frac) { int M = pOld->M ; PBWT *pNew = pbwtCreate (M, 0) ; int i, j, k, thresh = M*(1-fmin) ; double bit = 0.0 ; uchar *x ; PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ; PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ; if (!pOld || !pOld->yz) die ("subsites without an existing pbwt") ; if (fmin < 0 || fmin >= 1 || frac <= 0 || frac > 1) die ("fmin %f, frac %f for subsites out of range\n", fmin, frac) ; x = myalloc (M, uchar) ; if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ; for (i = 0 ; i < pOld->N ; ++i) { if ((uOld->c < thresh) && ((bit += frac) > 1.0)) { for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ; for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ; pbwtCursorWriteForwards (uNew) ; if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site) ; ++pNew->N ; bit -= 1.0 ; } pbwtCursorForwardsRead (uOld) ; } pbwtCursorToAFend (uNew, pNew) ; fprintf (logFile, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ; pNew->chrom = pOld->chrom ; pOld->chrom = 0 ; pNew->samples = pOld->samples ; pOld->samples = 0 ; pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ; pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ; pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ; free(x) ; return pNew ; }
PBWT *pbwtSubRange (PBWT *pOld, int start, int end) { int M = pOld->M ; PBWT *pNew = pbwtCreate (M, 0) ; int i, j, k ; uchar *x ; PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ; PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ; if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ; if (start < 0 || end > pOld->N || end <= start) die ("subrange invalid start %d, end %d", start, end) ; x = myalloc (M, uchar) ; if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ; for (i = 0 ; i < end ; ++i) { if (i >= start) { for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ; for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ; pbwtCursorWriteForwards (uNew) ; if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site) ; ++pNew->N ; } pbwtCursorForwardsRead (uOld) ; } pbwtCursorToAFend (uNew, pNew) ; pNew->chrom = pOld->chrom ; pOld->chrom = 0 ; pNew->samples = pOld->samples ; pOld->samples = 0 ; pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ; pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ; pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ; free(x) ; return pNew ; }