Esempio n. 1
0
SeqTagLib* load_tags(FileReader *fr, uint32_t fix_p1_len, uint32_t fix_p2_len){
	SeqTagLib *lib;
	Xmer X;
	uint32_t i;
	char *seq;
	lib = malloc(sizeof(SeqTagLib));
	lib->p1len = fix_p1_len;
	lib->p2len = fix_p2_len;
	lib->hash = init_xmerhash(1023);
	X.offset = 0;
	X.idx = 0;
	while(fread_table(fr) != -1){
		seq = get_col_str(fr, 0);
		lib->kmer_size = get_col_len(fr, 0);
		X.cnt = atoi(get_col_str(fr, 1));
		X.k1 = 0;
		X.k2 = 0;
		for(i=0;i<lib->kmer_size;i++){
			if(i < 32){
				X.k1 |= ((uint64_t)base_bit_table[(int)seq[i]]) << (i << 1);
			} else {
				X.k2 |= ((uint64_t)base_bit_table[(int)seq[i]]) << ((i - 32) << 1);
			}
		}
		put_xmerhash(lib->hash, X);
		X.offset += X.cnt;
	}
	lib->n_seq = X.offset;
	lib->seqs  = calloc((lib->n_seq * (lib->p1len + lib->p2len) + 31) / 32, sizeof(uint64_t));
	lib->ids   = calloc(lib->n_seq, sizeof(uint64_t));
	return lib;
}
Esempio n. 2
0
void prepare_reads(merge_t *merger, FileReader *in, uint32_t lastcid) {
	int n_col;
	uint32_t cid, ef_id, eid, eflen, id = 0;
	ef_id = 0; 
	contig_t *ctg = NULL;
	read_t *rd;
	char *efstr, *path;

	while ((n_col = fread_table(in)) != -1) {
		if (n_col == 0) continue;
		cid = atoi(get_col_str(in, 4));
		if (cid != lastcid) {
			froll_back(in);
			return;
		}
		eid = atoi(get_col_str(in, 1));
		if (eid != ef_id) {
			ef_id = eid;
			efstr = get_col_str(in, 2);
			eflen = get_col_len(in, 2);
			reverse_dna(efstr, eflen);
			ctg = lend_ctgv_merger(merger); 
			push_contigv(merger->ctgs, ctg);
			ctg->id = ef_id;
			ctg->closed = 0;
			path = get_col_str(in, 5);
			append_string(ctg->path, path, strlen(path));
//			ctg->path = strdup(get_col_str(in, 5));
//			ctg->efctgs = init_vec(sizeof(FContig*), 6);
			push_u32list(ctg->m_rds, id++);
			rd = next_ref_readv(ctg->rds);
			rd->seq_id = atol(get_col_str(in, 0));
			rd->rd_len = eflen;
			memmove(rd->seq, efstr, rd->rd_len);
			rd->seq[rd->rd_len]= '\0';
			rd->rank = 1;

		}
		rd = next_ref_readv(ctg->rds);
		rd->rank = 1;
		rd->seq_id = atol(get_col_str(in, 0));
		rd->rd_len = get_col_len(in, 3);
		memmove(rd->seq, get_col_str(in, 3), rd->rd_len);
		rd->seq[rd->rd_len]= '\0';
	}
}
Esempio n. 3
0
CtgDB* load_ctgdb(FileReader *fr1, FileReader *fr2) {
	uint32_t id = 0, i = 0;
	CtgDB *db;
	uuhash *map = init_uuhash(1023);
	uint32_t key, val;
	uuhash_t h;
	int len = 0;
	char *seq = NULL;
	String *line = init_string(1);

	while (fread_table(fr2) != -1) {
		key = atoi(get_col_str(fr2, 1));
		val = atoi(get_col_str(fr2, 4));
		h.key = key;
		h.val = val;

		if (!exists_uuhash(map, h)) {
			put_uuhash(map, h);
		}
	}

	db = (CtgDB*)malloc(sizeof(CtgDB));
	db->ctgnum = 0;
	db->ctgs = init_ctglist(6);

	while (fread_line(line, fr1) != -1) {
		if (line->string[0] == 'E') {
			if (len != 0) {
				Ctg contig;
				contig.id = id;
				contig.cls_id = i;
				h.key = id;
				h.val = 0;
				contig.old_clsid = get_uuhash(map, h)->val;
				contig.sz = 1;
				contig.seq = strdup(seq);
				db->ctgnum++;
				push_ctglist(db->ctgs, contig);
				i++;
			}
			free(seq); seq = NULL;
			len = 0;
			id = atoi(line->string+2);
		} else if (line->string[0] == 'S') {
			if (len < (int)strlen(line->string+2)) {
				len = (int)strlen(line->string+2);
				free(seq); seq = NULL;
				seq = strdup(line->string+2);
			}
		}
	}

	Ctg contig;
	contig.id = id;
	contig.cls_id = i;
	h.key = id;
	h.val = 0;
	contig.old_clsid = get_uuhash(map, h)->val;
	contig.sz = 1;
	contig.seq = strdup(seq);
	db->ctgnum++;
	push_ctglist(db->ctgs, contig);

	free(seq);
	free_string(line);
	free_uuhash(map);
	return db;
}