PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  pNew->yz = arrayCreate (pNew->N*8, uchar) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }

  pNew->aFend = myalloc (pNew->M, int) ; memcpy (pNew->aFend, uNew->a, pNew->M*sizeof(int)) ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
Exemple #2
0
PBWT *pbwtSubSites (PBWT *pOld, double fmin, double frac)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k, thresh = M*(1-fmin)  ;
  double bit = 0.0 ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subsites without an existing pbwt") ;
  if (fmin < 0 || fmin >= 1 || frac <= 0 || frac > 1)
    die ("fmin %f, frac %f for subsites out of range\n", fmin, frac) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < pOld->N ; ++i)
    { if ((uOld->c < thresh) && ((bit += frac) > 1.0))
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	  bit -= 1.0 ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  fprintf (logFile, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
Exemple #3
0
PBWT *pbwtSubRange (PBWT *pOld, int start, int end)
{
  int M = pOld->M ;
  PBWT *pNew = pbwtCreate (M, 0) ;
  int i, j, k ;
  uchar *x ;
  PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
  PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;

  if (!pOld || !pOld->yz) die ("subrange without an existing pbwt") ;
  if (start < 0 || end > pOld->N || end <= start) 
    die ("subrange invalid start %d, end %d", start, end) ;

  x = myalloc (M, uchar) ;
  if (pOld->sites) pNew->sites = arrayCreate (4096, Site) ;

  for (i = 0 ; i < end ; ++i)
    { if (i >= start)
	{ for (j = 0 ; j < M ; ++j) x[uOld->a[j]] = uOld->y[j] ;
	  for (j = 0 ; j < M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
	  pbwtCursorWriteForwards (uNew) ;
	  if (pOld->sites) array(pNew->sites, pNew->N, Site) = arr(pOld->sites, i, Site)  ;
	  ++pNew->N ;
	}  
      pbwtCursorForwardsRead (uOld) ;
    }
  pbwtCursorToAFend (uNew, pNew) ;

  pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
  pNew->samples = pOld->samples ; pOld->samples = 0 ;
  pNew->missingOffset = pOld->missingOffset ; pOld->missingOffset = 0 ;
  pNew->zMissing = pOld->zMissing ; pOld->zMissing = 0 ;
  pbwtDestroy (pOld) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
  free(x) ;
  return pNew ;
}
Exemple #4
0
PBWT *pbwtMerge(const char **fnames, int nfiles)
{
	pbwt_reader_t *reader = pbwt_reader_init(fnames, nfiles);

	int nhaps = 0, i;
	for (i=0; i<nfiles; i++) nhaps += reader->pbwt[i]->M;
	PBWT *out_pbwt     = pbwtCreate(nhaps, 0);
	PbwtCursor *cursor = pbwtNakedCursorCreate(nhaps, 0);
	uchar *yseq        = myalloc(nhaps, uchar);
	out_pbwt->yz       = arrayCreate (1<<20, uchar) ;
	out_pbwt->sites    = arrayReCreate(out_pbwt->sites, reader->pbwt[0]->N, Site);
	out_pbwt->chrom = strdup(reader->pbwt[0]->chrom);

	int pos, j;
	while ( (pos=pbwt_reader_next(reader, nfiles)) )
	{
		// Merge only records shared by all files
		for (i=0; i<nfiles; i++)
		{
			PBWT *p		 = reader->pbwt[i];
			Site *site = arrp(p->sites, reader->cpos[i], Site);

			// Both position and alleles must match. This requires that the records are sorted by alleles.

			if ( site->x!=pos ) break;
			char *als = dictName(variationDict, site->varD);
			if ( strcmp(als,reader->mals) ) break;
		}
		if ( i!=nfiles ) 
		{
			// intersection: skip records which are not present in all files
			for (i=0; i<nfiles; i++)
			{
				PBWT *p    = reader->pbwt[i];
				Site *site = arrp(p->sites, reader->cpos[i], Site);
				if ( site->x!=pos ) continue;
				char *als = dictName(variationDict, site->varD);
				if ( strcmp(als,reader->mals) ) continue;

				PbwtCursor *c = reader->cursor[i];
				reader->unpacked[i] += unpack3(arrp(p->yz,reader->unpacked[i],uchar), p->M, c->y, 0);
				pbwtCursorForwardsA(c);
			}
			continue;
		}

		// read and merge
		int ihap = 0;
		for (i=0; i<nfiles; i++)
		{
			PbwtCursor *c = reader->cursor[i];
			PBWT *p       = reader->pbwt[i];
			Site *site    = arrp(p->sites, reader->cpos[i], Site);
			reader->unpacked[i] += unpack3(arrp(p->yz,reader->unpacked[i],uchar), p->M, c->y, 0);
			for (j=0; j<p->M; j++) yseq[ihap + c->a[j]] = c->y[j];
			pbwtCursorForwardsA(c);
			ihap += p->M;
		}

		// pack merged haplotypes
		for (j=0; j<nhaps; j++)
			cursor->y[j] = yseq[cursor->a[j]];
		pack3arrayAdd(cursor->y, out_pbwt->M, out_pbwt->yz);
		pbwtCursorForwardsA(cursor);

		// insert new site
		arrayExtend(out_pbwt->sites, out_pbwt->N+1);
		Site *site = arrayp(out_pbwt->sites, out_pbwt->N, Site);
		site->x = pos;
		dictAdd(variationDict, reader->mals, &site->varD);

		out_pbwt->N++;
	}
	pbwtCursorToAFend (cursor, out_pbwt) ;

	free(yseq);
	pbwtCursorDestroy(cursor);
	pbwt_reader_destroy(reader);
	return out_pbwt;
}