コード例 #1
0
ファイル: pbwtCore.c プロジェクト: BioinformaticsArchive/pbwt
PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  pNew->yz = arrayCreate (pNew->N*8, uchar) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }

  pNew->aFend = myalloc (pNew->M, int) ; memcpy (pNew->aFend, uNew->a, pNew->M*sizeof(int)) ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
コード例 #2
0
ファイル: pbwtCore.cpp プロジェクト: CoREse/PBWTLib
void pbwtBuildReverse (PBWT *p)
{
  int i, j, M = p->M ;
  uchar *x = myalloc (M, uchar) ;
  PbwtCursor *uF ;

  if (p->aFend)
    uF = pbwtCursorCreate (p, TRUE, FALSE) ;
  else
    { uF = pbwtCursorCreate (p, TRUE, TRUE) ;
      for (i = 0 ; i < p->N ; ++i)	/* first run forwards to the end */
	pbwtCursorForwardsRead (uF) ;
      pbwtCursorToAFend (uF, p) ;
    }

  /* use p->aFend also to start the reverse cursor - this gives better performance */
  if (!p->aRstart) p->aRstart = myalloc (M, int) ; memcpy (p->aRstart, uF->a, M * sizeof(int)) ;
  p->zz = arrayReCreate (p->zz, arrayMax(p->yz), uchar) ;
  PbwtCursor *uR = pbwtCursorCreate (p, FALSE, TRUE) ; /* will pick up aRstart */
  for (i = p->N ; i-- ; )
    { pbwtCursorReadBackwards (uF) ;
      for (j = 0 ; j < M ; ++j) x[uF->a[j]] = uF->y[j] ;
      for (j = 0 ; j < M ; ++j) uR->y[j] = x[uR->a[j]] ;
      pbwtCursorWriteForwards (uR) ;
    }
  /* save uR->a, which is the lexicographic order of the sequences */
  if (!p->aRend) p->aRend = myalloc (M, int) ; memcpy (p->aRend, uR->a, M * sizeof(int)) ;

  fprintf (logFile, "built reverse PBWT - size %ld\n", arrayMax(p->zz)) ;

  if (isCheck)			/* print out the reversed haplotypes */
    { FILE *fp = fopen ("rev.haps","w") ;
      Array tz = p->yz ; p->yz = p->zz ;
      int* ta = p->aFstart ; p->aFstart = p->aRstart ;
      pbwtWriteHaplotypes (fp, p) ;
      p->yz = tz ; p->aFstart = ta ;
    }

  free (x) ;
  pbwtCursorDestroy (uF) ; pbwtCursorDestroy (uR) ;
}
コード例 #3
0
ファイル: pbwtCore.cpp プロジェクト: CoREse/PBWTLib
PBWT *pbwtSubSites (PBWT *pOld, double fmin, double frac)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k, thresh = M*(1-fmin)  ;
  double bit = 0.0 ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subsites without an existing pbwt") ;
  if (fmin < 0 || fmin >= 1 || frac <= 0 || frac > 1)
    die ("fmin %f, frac %f for subsites out of range\n", fmin, frac) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < pOld->N ; ++i)
    { if ((uOld->c < thresh) && ((bit += frac) > 1.0))
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	  bit -= 1.0 ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  fprintf (logFile, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
コード例 #4
0
ファイル: pbwtCore.cpp プロジェクト: CoREse/PBWTLib
PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
コード例 #5
0
void pbwtShapeItWithMiss (PBWT *p, FILE *out) {
 
  if (!p || !p->yz) die ("option -longWithin called without a PBWT") ;
  
  /********   ref  *****/
  uchar **reference = pbwtHaplotypes (p) ; /* haplotypes for reference  (M * N)  */

  /*********************/

  uchar *x;                 /* use for current query */
  PbwtCursor *up = pbwtCursorCreate (p, TRUE, TRUE) ;
  int **u ;   /* stored indexes */
  int i, j, k, N = p->N, M = p->M ;
  int num_1;      /* for the num of heterozyogous */
  int s, seg_num; /* for the segment number and current segment */
  
  /* build indexes */
  u = myalloc (N,int*) ; for (i = 0 ; i < N ; ++i) u[i] = myalloc (p->M+1, int) ;
  x = myalloc (N, uchar) ; 
  int *cc = myalloc (p->N, int) ;

  /* make pbwt index */
  for (k = 0 ; k < N ; ++k)
    { 
      cc[k] = up->c ;
      pbwtCursorCalculateU (up) ;
      memcpy (u[k], up->u, (M+1)*sizeof(int)) ;
      pbwtCursorForwardsReadAD (up, k) ;
    }
  int time = 150;
  int **geno;
  geno = myalloc(time, int*);
  for (i = 0; i < time; ++i) geno[i] = myalloc (p->N, int);
  
  for (j = 0; j < time; ++j) {
     for (i = 0; i < p->N; ++i) {
        geno[j][i] = reference[j * 2][i] + reference[j * 2 + 1][i];
     }
  }

  //clean up
  pbwtCursorDestroy (up) ;
  for (j = 0 ; j < M ; ++j) free(reference[j]) ; free (reference) ;

  fprintf (stderr, "Made indices: \n") ; timeUpdate ();

  int *pos;           /* record the heterozyogous position */
  pos = myalloc (N, int) ;
for (int t = 0; t < time; ++t) {
  
  num_1 = 0;      /* for the num of heterozyogous */
  s = 0;
  seg_num = 1; /* for the segment number and current segment */
  
  /* find the heterozyogous position and record */
  for ( i = 0, j = 0; i < N; ++i) {
    if (geno[t][i] == 1) {
      ++j;
      pos[num_1++] = i;
      if (j == 3) {
        ++seg_num;
        j = 0;
      }
    }
  }

  int start, depth;
  int *het = myalloc(6, int);


fprintf (stderr, "seg_num  %d \n", seg_num);
  for (s = 0; s < seg_num - 2; ++s) {
      Tables *tables = 0;
      uchar *seq;
      if (!s)
	start = 0;
      else
	start = pos[s * 3 - 1] + 1;
      
      for (i = 0; i < 6; ++i) {
        het[i] = pos[s * 3 + i];
      }

      depth = het[5] - start + 1;
      seq = myalloc(depth + 1, uchar);
      memset(seq, '2', depth * sizeof(uchar));
      seq[depth] = '\0';
      tables = tablesCreate(500);
      extendMatch(het, start, 0, depth, seq, cc, u, 0, M, &tables);
//fprintf (stderr, "display  s = %d,  depth = %d \t table size = %d\n", s, depth, tables->num);
//tablesDisplay(tables);      
      free(seq);
      tablesDestroy(tables);
  }
  free(het);
}
  fprintf (stderr, "finished \n") ; timeUpdate ();
  /* cleanup */
  free(x); free(pos); free(cc);
  for (j = 0 ; j < N ; ++j) free(u[j]) ; free (u) ;
  for (j = 0 ; j < time; ++j) free(geno[j]) ; free (geno);
}