Пример #1
0
PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  pNew->yz = arrayCreate (pNew->N*8, uchar) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }

  pNew->aFend = myalloc (pNew->M, int) ; memcpy (pNew->aFend, uNew->a, pNew->M*sizeof(int)) ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
Пример #2
0
void pbwtBuildReverse (PBWT *p)
{
  int i, j, M = p->M ;
  uchar *x = myalloc (M, uchar) ;
  PbwtCursor *uF ;

  if (p->aFend)
    uF = pbwtCursorCreate (p, TRUE, FALSE) ;
  else
    { uF = pbwtCursorCreate (p, TRUE, TRUE) ;
      for (i = 0 ; i < p->N ; ++i)	/* first run forwards to the end */
	pbwtCursorForwardsRead (uF) ;
      pbwtCursorToAFend (uF, p) ;
    }

  /* use p->aFend also to start the reverse cursor - this gives better performance */
  if (!p->aRstart) p->aRstart = myalloc (M, int) ; memcpy (p->aRstart, uF->a, M * sizeof(int)) ;
  p->zz = arrayReCreate (p->zz, arrayMax(p->yz), uchar) ;
  PbwtCursor *uR = pbwtCursorCreate (p, FALSE, TRUE) ; /* will pick up aRstart */
  for (i = p->N ; i-- ; )
    { pbwtCursorReadBackwards (uF) ;
      for (j = 0 ; j < M ; ++j) x[uF->a[j]] = uF->y[j] ;
      for (j = 0 ; j < M ; ++j) uR->y[j] = x[uR->a[j]] ;
      pbwtCursorWriteForwards (uR) ;
    }
  /* save uR->a, which is the lexicographic order of the sequences */
  if (!p->aRend) p->aRend = myalloc (M, int) ; memcpy (p->aRend, uR->a, M * sizeof(int)) ;

  fprintf (logFile, "built reverse PBWT - size %ld\n", arrayMax(p->zz)) ;

  if (isCheck)			/* print out the reversed haplotypes */
    { FILE *fp = fopen ("rev.haps","w") ;
      Array tz = p->yz ; p->yz = p->zz ;
      int* ta = p->aFstart ; p->aFstart = p->aRstart ;
      pbwtWriteHaplotypes (fp, p) ;
      p->yz = tz ; p->aFstart = ta ;
    }

  free (x) ;
  pbwtCursorDestroy (uF) ; pbwtCursorDestroy (uR) ;
}
Пример #3
0
PBWT *pbwtSubSites (PBWT *pOld, double fmin, double frac)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k, thresh = M*(1-fmin)  ;
  double bit = 0.0 ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subsites without an existing pbwt") ;
  if (fmin < 0 || fmin >= 1 || frac <= 0 || frac > 1)
    die ("fmin %f, frac %f for subsites out of range\n", fmin, frac) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < pOld->N ; ++i)
    { if ((uOld->c < thresh) && ((bit += frac) > 1.0))
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	  bit -= 1.0 ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  fprintf (logFile, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
Пример #4
0
PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}