Beispiel #1
0
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
	if (!for_only) { // add the reverse complemented sequence
		m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
		pac = realloc(pac, m_pac/4);
		memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
		for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
			_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
	}
	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
Beispiel #2
0
uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len)
{
	uint8_t *seq = 0;
	if (end < beg) end ^= beg, beg ^= end, end ^= beg; // if end is smaller, swap
	if (end > l_pac<<1) end = l_pac<<1;
	if (beg < 0) beg = 0;
	if (beg >= l_pac || end <= l_pac) {
		int64_t k, l = 0;
		*len = end - beg;
		seq = malloc(end - beg);
		if (beg >= l_pac) { // reverse strand
			int64_t beg_f = (l_pac<<1) - 1 - end;
			int64_t end_f = (l_pac<<1) - 1 - beg;
			for (k = end_f; k > beg_f; --k)
				seq[l++] = 3 - _get_pac(pac, k);
		} else { // forward strand
			for (k = beg; k < end; ++k)
				seq[l++] = _get_pac(pac, k);
		}
	} else *len = 0; // if bridging the forward-reverse boundary, return nothing
	return seq;
}
Beispiel #3
0
int main(int argc, char *argv[])
{
    bntseq_t *bns;
    bns = bns_restore(argv[1]);
    uint8_t *pac;
    pac = calloc(bns->l_pac/2+2, 1);
    fread(pac, 1, bns->l_pac/2+2, bns->fp_pac);
    int i;
    for(i = 0; i < bns->l_pac; ++i){
        putchar( "ACGT#"[_get_pac(pac, i)]);         
    }
    bns_destroy(bns);


}
Beispiel #4
0
int64_t bis_bns_fasta2bntseq(gzFile fp_fa, const char *prefix, uint8_t parent) {
 
  extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
  kseq_t *seq;
  char name[1024];
  bntseq_t *bns;
  uint8_t *pac = 0, *_pac = 0;
  int32_t m_seqs, m_holes;
  int64_t ret = -1, m_pac;
  bntamb1_t *q;
  FILE *fp;

  // initialization
  gzseek(fp_fa, 0, SEEK_SET);
  seq = kseq_init(fp_fa);
  bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
  bns->seed = 11; // fixed seed for random generator
  srand48(bns->seed);
  m_seqs = m_holes = 8; m_pac = 0x10000;
  bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
  bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
  _pac = calloc(m_pac/4, 1);
  q = bns->ambs;
  if (parent) {
    strcpy(name, prefix); strcat(name, ".par.pac");
  } else {
    strcpy(name, prefix); strcat(name, ".dau.pac");
  }
  fp = xopen(name, "wb");
  // read sequences

  while (kseq_read(seq) >= 0) {
    _pac = bis_add1(seq, bns, _pac, &m_pac, &m_seqs, &m_holes, &q);
  }
  /* kseq_rewind(seq); */
  /* gzseek(seq->f->f, 0, SEEK_SET); */
  /* fprintf(stderr, "foward end\n"); */
  /* fflush(stderr); */
  /* while (kseq_read(seq) >= 0) { */
  /* if (parent) nt256char_rev_ip(seq->seq.s, seq->seq.l); */
  /* pac = bis_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q, parent, 1); */
  /* } */

  int64_t l,k;
  m_pac = (bns->l_pac*2+3)/4*4; /* in bit */
  pac = calloc(m_pac/4,sizeof(uint8_t));
  for (l=0; l<bns->l_pac; ++l) {
    uint8_t c = _get_pac(_pac,l);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }

  for (k=bns->l_pac-1; k>=0; --k,++l) {
    uint8_t c = 3-_get_pac(_pac,k);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }
  free(_pac);
  /* int64_t l; */
  /* fprintf(stderr, "reverse end\n"); */
  /* fflush(stderr); */
  /* if (!for_only) { // add the reverse complemented sequence */
  /*   m_pac = (bns->l_pac * 2 + 3) / 4 * 4; */
  /*   pac = realloc(pac, m_pac/4); */
  /*   memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); */
  /*   for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) */
  /*     _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); */
  /* } */

  assert(bns->l_pac<<1 == l);
  { // finalize .pac file
    ubyte_t ct;
    err_fwrite(pac, 1, (l>>2) + ((l&3) == 0? 0 : 1), fp);
    // the following codes make the pac file size always (l_pac/4+1+1)
    if (l % 4 == 0) {
      ct = 0;
      err_fwrite(&ct, 1, 1, fp);
    }
    ct = l % 4;
    err_fwrite(&ct, 1, 1, fp);
    // close .pac file
    err_fflush(fp);
    err_fclose(fp);
  }
  if (parent) bis_bns_dump(bns, prefix);
  bns_destroy(bns);
  kseq_destroy(seq);
  free(pac);
  return l;
}
Beispiel #5
0
int64_t R_bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
//	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
    uint8_t *reverse_pac = NULL;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l, m_r_pac;
	bntamb1_t *q;
	FILE *fp, *fp_r;
    int i;
	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/NT_PER_BYTE, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
    memset(name, '\0', 1024);strcpy(name, prefix); strcat(name, ".rpac");
    fp_r = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = R_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
    
   // add the reverse complemented sequence





    ret = bns->l_pac;
    fprintf(stderr, "[R_bns_fasta2bntseq]:reverse_pac!\n");
    m_r_pac = (bns->l_pac +NT_PER_BYTE-1)/NT_PER_BYTE *NT_PER_BYTE;
    reverse_pac = calloc(m_r_pac/NT_PER_BYTE, sizeof(uint8_t));
    for(l = bns->l_pac-1, i =0; l>=0, i < bns->l_pac; --l, ++i)
    {
        _set_pac(reverse_pac, i, _get_pac(pac, l)); 
    } 
	ubyte_t ct;
    { // finalize .pac and  file
		fwrite(pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp);
		// close .pac file
		fclose(fp);
	}	
    { // finalize .rpac and  file
		fwrite(reverse_pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp_r);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp_r);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp_r);
		// close .rpac file
		fclose(fp_r);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
    free(reverse_pac);
	return ret;
}