示例#1
0
文件: bntseq.c 项目: a113n/bwa
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
	if (!for_only) { // add the reverse complemented sequence
		m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
		pac = realloc(pac, m_pac/4);
		memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
		for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
			_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
	}
	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
示例#2
0
文件: bntseq.c 项目: aleidenroth/bwa
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	bntamb1_t *q;
	int l_buf;
	unsigned char buf[0x10000];
	int32_t m_seqs, m_holes, l, i;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	q = bns->ambs;
	l_buf = 0;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	memset(buf, 0, 0x10000);
	// read sequences
	while ((l = kseq_read(seq)) >= 0) {
		bntann1_t *p;
		int lasts;
		if (bns->n_seqs == m_seqs) {
			m_seqs <<= 1;
			bns->anns = (bntann1_t*)realloc(bns->anns, m_seqs * sizeof(bntann1_t));
		}
		p = bns->anns + bns->n_seqs;
		p->name = strdup((char*)seq->name.s);
		p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)");
		p->gi = 0; p->len = l;
		p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
		p->n_ambs = 0;
		for (i = 0, lasts = 0; i < l; ++i) {
			int c = nst_nt4_table[(int)seq->seq.s[i]];
			if (c >= 4) { // N
				if (lasts == seq->seq.s[i]) { // contiguous N
					++q->len;
				} else {
					if (bns->n_holes == m_holes) {
						m_holes <<= 1;
						bns->ambs = (bntamb1_t*)realloc(bns->ambs, m_holes * sizeof(bntamb1_t));
					}
					q = bns->ambs + bns->n_holes;
					q->len = 1;
					q->offset = p->offset + i;
					q->amb = seq->seq.s[i];
					++p->n_ambs;
					++bns->n_holes;
				}
			}
			lasts = seq->seq.s[i];
			{ // fill buffer
				if (c >= 4) c = lrand48()&0x3;
				if (l_buf == 0x40000) {
					fwrite(buf, 1, 0x10000, fp);
					memset(buf, 0, 0x10000);
					l_buf = 0;
				}
				buf[l_buf>>2] |= c << ((3 - (l_buf&3)) << 1);
				++l_buf;
			}
		}
		++bns->n_seqs;
		bns->l_pac += seq->seq.l;
	}
	xassert(bns->l_pac, "zero length sequence.");
	{ // finalize .pac file
		ubyte_t ct;
		fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		fwrite(&ct, 1, 1, fp);
		// close .pac file
		fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
}
示例#3
0
int64_t R_bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
//	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
    uint8_t *reverse_pac = NULL;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l, m_r_pac;
	bntamb1_t *q;
	FILE *fp, *fp_r;
    int i;
	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/NT_PER_BYTE, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
    memset(name, '\0', 1024);strcpy(name, prefix); strcat(name, ".rpac");
    fp_r = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = R_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
    
   // add the reverse complemented sequence





    ret = bns->l_pac;
    fprintf(stderr, "[R_bns_fasta2bntseq]:reverse_pac!\n");
    m_r_pac = (bns->l_pac +NT_PER_BYTE-1)/NT_PER_BYTE *NT_PER_BYTE;
    reverse_pac = calloc(m_r_pac/NT_PER_BYTE, sizeof(uint8_t));
    for(l = bns->l_pac-1, i =0; l>=0, i < bns->l_pac; --l, ++i)
    {
        _set_pac(reverse_pac, i, _get_pac(pac, l)); 
    } 
	ubyte_t ct;
    { // finalize .pac and  file
		fwrite(pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp);
		// close .pac file
		fclose(fp);
	}	
    { // finalize .rpac and  file
		fwrite(reverse_pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp_r);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp_r);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp_r);
		// close .rpac file
		fclose(fp_r);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
    free(reverse_pac);
	return ret;
}