Пример #1
0
void get_elm_list(int *num_id, int *elm_id, int id, bool is_x, int num_list, struct DotList *dots)
{
	int i = 0;
	struct I temp;
	int t_val = LOOSE;

	*num_id = 0;
	if( is_x == true )
	{
		temp = assign_I(dots[id].x.lower, dots[id].x.upper);
	}
	else temp = assign_I(dots[id].y.lower, dots[id].y.upper);

	while( i < num_list )
	{
		if( (i == id) || (dots[i].sign == 2) ) i++;
		else
		{
			if( ((f_loose_subset(dots[i].x, temp, t_val) == false) && (f_loose_overlap(temp, dots[i].x, t_val) == true)) || ((f_loose_subset(dots[i].y, temp, t_val) == false) && (f_loose_overlap(temp, dots[i].y, t_val) == true))) 
			{
				elm_id[*num_id] = i;
				(*num_id)++;
			}
			i++;
		}
	}
}
Пример #2
0
int find_status(struct cv_list cur_cv, struct cv_list *cv, int num_cv, char *name1, char *name2)
{
	int i = 0;
	struct I src1, dst1, src2, dst2;
	int res = -1;
	char name[50];

	src1 = assign_I(cur_cv.a1, cur_cv.a2);
	dst1 = assign_I(cur_cv.b1, cur_cv.b2);
	for( i = 0; i < num_cv; i++ ) {
		src2 = assign_I(cv[i].a1, cv[i].a2);
		dst2 = assign_I(cv[i].b1, cv[i].b2);
		if( (strcmp( cv[i].name2, "NAN" ) != 0) && (strcmp( cv[i].name3, "NAN") != 0) ) {
			strcpy( name, cv[i].name2 );
		}
		else if( (strcmp( cv[i].name2, "NAN" ) == 0) && (strcmp( cv[i].name3, "NAN") != 0) ) {
			strcpy( name, cv[i].name3 );
		}
		else if( (strcmp( cv[i].name3, "NAN" ) == 0) && (strcmp( cv[i].name2, "NAN") != 0) ) {
			strcpy( name, cv[i].name2 );
		}
		else {
			fatalf("both out-species not found %s %s\n", cv[i].name2, cv[i].name3);
		}

		if( (cur_cv.fid == cv[i].fid) && ( ((strict_almost_equal(src1, src2) == true) && (strict_almost_equal(dst1, dst2) == true)) || ( (strict_almost_equal(src1, dst2) == true) && (strict_almost_equal(dst1, src2) == true) )) && (strcmp(name1, cv[i].name1) == 0) && (strcmp(name2, name) == 0)) {
			res = i;
		}
	}

	if( res == -1 ) {
		fatalf("status not found %d\n", cur_cv.fid);
	}
	return(res);
}
Пример #3
0
void predict_sp_op(int sp_code, int rm_sp, int left_sp, int *num_list, struct DotList *dots, int *cur_num, struct ops_list *ops)
{
	char op_ch;
	int r_st = -1, r_end = -1; // the range of a removed species
	struct I temp_reg;
	int len;
	int i = 0;

	check_gene_loss(num_list, dots, sp_code, rm_sp, left_sp, cur_num, ops);

	op_ch = 's';
	for( i = 0; i < (*num_list); i++ )
	{
		if( dots[i].sp_id == sp_code )
		{
			if( ( r_st == -1 ) && ( r_end == -1 ) )
			{
				r_st = dots[i].y.lower;
				r_end = dots[i].y.upper;
			}
			else 
			{
				if( dots[i].y.lower < r_st ) r_st = dots[i].y.lower;
				if( dots[i].y.upper > r_end ) r_end = dots[i].y.upper;
			}
		}
	}

	temp_reg = assign_I(r_st, r_end);
	len = r_end - r_st + 1;

	for( i = 0; i < (*num_list); i++ )
	{
		if( (proper_overlap(temp_reg, dots[i].x) == true) || (proper_overlap(temp_reg, dots[i].y) == true) )
		{
			dots[i].sign = 2;	
		}
		else 
		{
			if( dots[i].x.lower > r_st )
			{
				dots[i].x = assign_I(dots[i].x.lower - len, dots[i].x.upper - len);
				dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len);
			}
			else if( dots[i].y.lower > r_st )
			{
				dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len);
			}
		}
	}

	overwrite_dots(num_list, dots);
	ops[*cur_num].sign = op_ch;
	ops[*cur_num].src_b = r_st;
	ops[*cur_num].src_e = r_end;
	ops[*cur_num].dst_b = 0;
	ops[*cur_num].dst_e = 0;
	ops[*cur_num].sp_id = rm_sp;
}
Пример #4
0
int check_into_own(struct DotList *dots, int loc_id, int comp_id)
{
	struct I temp;
	int res;

	if( (dots[loc_id].sign == 0) && (dots[comp_id].sign == 0) )
	{
		if( (strict_overlap(dots[loc_id].x, dots[loc_id].y, 10*T_OP_TH) == true) && (strict_overlap(dots[comp_id].x, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[loc_id].x.upper < dots[comp_id].x.lower) )
		{
			temp = assign_I(dots[loc_id].x.upper, dots[comp_id].x.lower);
			if( strict_almost_equal(dots[loc_id].y, temp) == true )
			{
				res = COPY_OWN;	
			}
			else res = NON_COPY;
		}
		else if( (strict_overlap(dots[loc_id].x, dots[loc_id].y, 10*T_OP_TH) == true) && (strict_overlap(dots[comp_id].x, dots[comp_id].y, 10*T_OP_TH) == true) && (dots[comp_id].x.upper < dots[loc_id].x.lower ) )
		{
			temp = assign_I(dots[comp_id].x.upper, dots[loc_id].x.lower);
			if( strict_almost_equal(dots[comp_id].y, temp) == true )
			{
				res = COPY_OWN;
			}
			else res = NON_COPY;
		}
		else 
		{
			res = NON_COPY;
		}
	}
	else if( (dots[loc_id].sign == 1) && (dots[comp_id].sign == 1) )
	{
		if( (strict_overlap(dots[loc_id].x, dots[comp_id].x, 10*T_OP_TH) == true ) && (strict_overlap(dots[loc_id].y, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[comp_id].x.upper < dots[comp_id].y.lower))
		{
			temp = assign_I(dots[comp_id].x.upper, dots[comp_id].y.lower);
			if( strict_almost_equal(dots[loc_id].y, temp) == true )
			{
				res = COPY_OWN_INV;
			}
			else res = NON_COPY;
		}
		else if( (strict_overlap(dots[loc_id].x, dots[comp_id].x, 10*T_OP_TH) == true ) &&(strict_overlap(dots[loc_id].y, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[loc_id].x.upper < dots[loc_id].y.lower))
		{
			temp = assign_I(dots[loc_id].x.upper, dots[loc_id].y.lower);
			if( strict_almost_equal(dots[comp_id].y, temp) == true )
			{
				res = COPY_OWN_INV;
			}
			else res = NON_COPY;
		}
		else
		{
			res = NON_COPY;
		}
	}
	else res = NON_COPY;

	return(res);
}
Пример #5
0
struct exons_list assign_exons(struct exons_list a)
{
  struct exons_list res;

  res.fid = a.fid; // id in the inital list
  res.reg = assign_I(a.reg.lower, a.reg.upper);
  res.cmp_reg = assign_I(a.cmp_reg.lower, a.cmp_reg.upper);
  res.sp_id = a.sp_id;
  res.val = a.val;
  res.sign = a.sign; // '<' or '>'
  res.ctg_id = a.ctg_id;

  return(res);
}
Пример #6
0
void adjust_init_offset(struct DotList *init_algns, int init_id, struct DotList t1, struct DotList *algns, int cur_id)
{

	if( ((init_algns[init_id].x.upper + t1.x.upper - algns[cur_id].x.upper) > (init_algns[init_id].x.lower + t1.x.lower - algns[cur_id].x.lower)) && ((init_algns[init_id].y.lower + t1.y.lower - algns[cur_id].y.lower) < (init_algns[init_id].y.upper + t1.y.upper - algns[cur_id].y.upper)) ) 
	{
		init_algns[init_id].xl_offset = init_algns[init_id].xl_offset + t1.x.lower - algns[cur_id].x.lower;
		init_algns[init_id].xr_offset = init_algns[init_id].xr_offset + t1.x.upper - algns[cur_id].x.upper;
		init_algns[init_id].yl_offset = init_algns[init_id].yl_offset + t1.y.lower - algns[cur_id].y.lower;
		init_algns[init_id].yr_offset = init_algns[init_id].yr_offset + t1.y.upper - algns[cur_id].y.upper;
		init_algns[init_id].x = assign_I(init_algns[init_id].x.lower + t1.x.lower - algns[cur_id].x.lower, init_algns[init_id].x.upper + t1.x.upper - algns[cur_id].x.upper);
		init_algns[init_id].y = assign_I(init_algns[init_id].y.lower + t1.y.lower - algns[cur_id].y.lower, init_algns[init_id].y.upper + t1.y.upper - algns[cur_id].y.upper);
		init_algns[init_id].rp1_id = 0;
	}
}
Пример #7
0
bool is_repeats(struct exons_list *exons, int num_exons, char *name, int from, int to) // exons assume to be already sorted by genomic positions
{
	int i = 0;
	int mid = 0;
	bool res = false;
	struct I reg;

	if( to > from ) {
		reg = assign_I(from, to);
	}
	else {
		fatalf("unexpected interval: %d-%d\n", from, to);
	}

	mid = quick_search_close_exons(exons, 0, num_exons-1, from);
	i = mid;
	while( (res == false) && (i < num_exons) && (exons[i].reg.lower <= to)) {
		if( width(reg) <= SHORT_LEN_TH ) {
// (strcmp(reg, exons[i].reg) == 0) && (almost_subset(reg, exons[i].reg) == true) ) {
//			printf("%d-%d too short\n", from, to);
			res = true;	
		}
		else if( (strcmp(name, exons[i].chr) == 0) && subset(reg, exons[i].reg) ) {
//			printf("%d-%d belongs to %s %d-%d\n", from, to, exons[i].chr, exons[i].reg.lower, exons[i].reg.upper);
			res = true;
		}
		i++;
	}

	return(res);
}
Пример #8
0
bool is_s_list(struct DotList *dots, int ins_id, int cur_id, struct ID_List *dlist, int num_dup, struct kdnode *tree, struct perm_pt *p_pts, int size, FILE *fp, struct DotList *init_dots)
{
	bool res = false;
	int i = 0;
	struct I temp;
	int x_opt_id, y_opt_id;
	bool *f_is_x;

	f_is_x = (bool *) ckalloc(sizeof(bool));

	x_opt_id = find_alt_ins_id(cur_id, dots, tree, p_pts, ins_id, true, f_is_x, size, fp, init_dots);
	y_opt_id = find_alt_ins_id(cur_id, dots, tree, p_pts, ins_id, false, f_is_x, size, fp, init_dots);

	if( (x_opt_id != -1) || (y_opt_id != -1) )
	{
		res = true;
	}

	while( (i < num_dup) && (res == false))
	{
		if( dlist[i].is_x == true )
		{
			temp = assign_I(dots[dlist[i].m_id].x.lower, dots[dlist[i].m_id].x.upper);
		}
		else
		{
			temp = assign_I(dots[dlist[i].m_id].y.lower, dots[dlist[i].m_id].y.upper);
		}

		if( (dlist[i].m_id == ins_id) || (strict_almost_equal(dots[ins_id].x, temp) == true) || (strict_almost_equal(dots[ins_id].y, temp) == true) )
		{
			if( (cur_id == dlist[i].left_id) || (cur_id == dlist[i].right_id) )
			{
				res = true;
			}
		}
		i++;
	}

	free(f_is_x);
	return(res);
}
Пример #9
0
bool is_on_prev_events(struct I reg, struct ops_list *ops, int from, int to)
{
	int i = 0;
	struct I src, dst;
	bool res = false;

	i = from;
	while( (i <= to) && (res == false) ) {
		if( (ops[i].sign == '+') || (ops[i].sign == '-') ) {
			src = assign_I(ops[i].srcStart, ops[i].srcEnd);
			dst = assign_I(ops[i].dstStart, ops[i].dstEnd);
			if( (f_loose_subset(reg, src, STRICT) == true ) || (f_loose_subset(reg, dst, STRICT) == true) )
			{
				res = true;
			}
		}
		i++;
	}

	return(res);
}
Пример #10
0
bool tandem_exist(struct DotList *dots, struct perm_pt *p_pts, struct kdnode *tree, int size, int id1, int id2)
{
	bool res = false;
	struct I reg1, reg2;
	int sid = 0, eid = 0;
	int i = 0;
	int cur_id = 0;

	reg1 = assign_I(0, 1);
	reg2 = assign_I(0, 1);
	
	if( (dots[id1].sign == dots[id2].sign) && (proper_overlap(dots[id1].x, dots[id2].x) == true ) && (proper_overlap(dots[id1].y, dots[id2].y) == true) ) {
		reg1 = intersect(dots[id1].x, dots[id2].x);			
		sid = find_id_len(tree, size, width(reg1), reg1.lower, reg1.lower, W_SID);
		eid = find_id_len(tree, size, width(reg1), reg1.upper, reg1.upper, W_FID);

		i = sid;
		while( (i <= eid) && (res == false) ) {
			cur_id = p_pts[i].id;
			if( is_tandem(dots[cur_id]) == true ) res = true;
			i++;
		}

		if( res == false ) {
			reg2 = intersect(dots[id1].y, dots[id2].y);
			sid = find_id_len(tree, size, width(reg2), reg2.lower, reg2.lower, W_SID);
			eid = find_id_len(tree, size, width(reg2), reg2.upper, reg2.upper, W_FID);

			i = sid;
			while( (i <= eid) && (res == false)) {
				cur_id = p_pts[i].id;
				if( is_tandem(dots[cur_id]) == true ) res = true;
				i++;
			}
		}	
	}

	return(res);
}
Пример #11
0
bool check_inclusion_alignments(struct gap_list gp, struct DotList *dots, int num)
{
	struct I x, y;
	bool res = false;
	int i;

	if( gp.x1 >= gp.x2 )
	{
	}
	else
	{
		x = assign_I(gp.x1, gp.x2);
	}

	if( gp.y1 >= gp.y2 )
	{
	}
	else
	{
		y = assign_I(gp.y1, gp.y2);
	}

	for( i = 0; i < num; i++ )
	{
		if( dots[i].sign != 2 )
		{
			if( (subset(dots[i].x, x) == true) && (dots[i].identity > dots[gp.id1].identity) )
			{
				res = true;
			}
			else if( (subset(dots[i].y, y) == true) && (dots[i].identity > dots[gp.id2].identity))
			{
				res = true;
			}
		}
	}
	
	return(res);
}
Пример #12
0
void init_tree(struct p_tree *t)
{
	t->left = NULL;
	t->right = NULL;
	t->parent = NULL;
	t->reg = assign_I(0,1);
	t->name = NULL;
	t->b_len = (double) 0;
  t->d_mode = SP; 
  t->od = 0; // orientation for printing orthologous alignments
  t->sp_code = -1; // a code number of self-alignment: species id, seq id for orthologous mappings
  t->gid = -1; // a gene identifier 
  t->nid = -1; // a node identifier
  t->val = 0;
  t->ch_sp = NULL; // the list of children nodes(species for a species tree)
  t->num_csp = 0; // the number of child species
  t->depth = 0; // the depth of each node
  t->visited = false;
}
Пример #13
0
void conv_td_reg(struct DotList *dots, int num, int id, int *t_list, int num_tandem, struct DotList *init_dots, int flag, int *val1, int *val2, int *val_org)
{
	int i;
	int cur_id, cmp_id;
	struct DotList t1, t2;
	struct DotList *cur_t;
	int len_x, len_y;
	int cur_len = 0;
	int val_t1, val_t2, val_org_reg;
	int init_id;

	cur_t = (struct DotList *) ckalloc(sizeof(struct DotList));
	
	for( i = 0; i < num_tandem; i++ )
	{

		if( flag == FIRST_RUN ) {
			val_org_reg = -1;
			val_t1 = -1;
			val_t2 = -1;
		}
		else {
			val_org_reg = val_org[i];
			val_t1 = val1[i];
			val_t2 = val2[i];
		}

		t1.x = assign_I(-1, 0);
		t2.x = assign_I(-1, 0);
		t1.y = assign_I(-1, 0);
		t2.y = assign_I(-1, 0);
		cmp_id = t_list[i];
		if( i == 0 ) cur_id = id;
		else cur_id = t_list[i-1];

		if( dots[cmp_id].ctg_id1 != dots[cur_id].ctg_id1 ) {
			fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name1, dots[cur_id].name1);
		}
		
		if( dots[cmp_id].ctg_id2 != dots[cur_id].ctg_id2 ) {
			fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name2, dots[cur_id].name2);
		}

		if( ( strict_almost_equal( dots[cmp_id].x, dots[cur_id].x ) == true ) || ( strict_almost_equal( dots[cmp_id].y, dots[cur_id].y) == true ) ) {}
		else if( ( strict_subset( dots[cmp_id].x, dots[cur_id].x ) == true ) && ( strict_subset( dots[cmp_id].y, dots[cur_id].y ) == true ) )
		{
			if( abs(dots[cur_id].x.upper - dots[cmp_id].x.upper) > abs(dots[cur_id].x.lower - dots[cmp_id].x.lower)	)
			{
				if( ( dots[cur_id].x.upper - dots[cmp_id].x.upper ) <= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cmp_id].x.upper, dots[cur_id].x.upper);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					t1.y = assign_I(dots[cur_id].x.upper, dots[cur_id].x.upper + cur_len);
				}
			}
			else
			{
				if( ( dots[cur_id].x.lower - dots[cmp_id].x.lower ) >= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cur_id].x.lower, dots[cmp_id].x.lower);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + cur_len);
				}
			}

			if( abs(dots[cmp_id].y.lower - dots[cur_id].y.lower) > abs(dots[cur_id].y.upper - dots[cmp_id].y.upper)	)
			{
				if( ( dots[cmp_id].y.lower - dots[cur_id].y.lower ) <= 0 ) t2.x = assign_I(-1, 0); 
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cur_id].y.lower, dots[cmp_id].y.lower);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					t2.x = assign_I(dots[cur_id].y.lower - cur_len, dots[cur_id].y.lower);
				}
			}
			else
			{
				if( ( dots[cur_id].y.upper - dots[cmp_id].y.upper ) <= 0 ) t2.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cmp_id].y.upper, dots[cur_id].y.upper);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					t2.x = assign_I(dots[cmp_id].y.upper - cur_len, dots[cmp_id].y.upper);
				}
			}
		}

		val_org_reg = -1;
		if( !proper_overlap(dots[cur_id].x, dots[cur_id].y) ) {
			val_org_reg = STRICT;
			val_org_reg = check_tandem_reg( dots[cur_id], dots, num );
		}

		if( flag == FIRST_RUN ) {
			if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) {
				val_t1 = check_tandem_reg( t1, dots, num );
			}
			else val_t1 = -1;

			if( (t2.x.lower >= 0) && (t2.y.lower >= 0)) {
				val_t2 = check_tandem_reg( t2, dots, num );
			}
			else val_t2 = -1;

			if( (val_t1 == -1) && (val_t2 == -1) ) {
				if( t1.x.lower >= 0 ) val_t1 = LOOSE;
				else if( t2.x.lower >= 0 ) val_t2 = LOOSE;
			}

			val_org[i] = val_org_reg;
			val1[i] = val_t1;
			val2[i] = val_t2;
		}

		if( val_org_reg != -1 ) {}
		else if( (val_t1 != -1) && (val_t2 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0) && (t2.x.lower >= 0) && (t2.y.lower >= 0)) 
		{
			if( val_t1 <= val_t2 )
			{
				init_id = dots[cur_id].index;
				if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to get the original boundaries, offsets defined here should be just substrated.
					adjust_init_offset(init_dots, init_id, t1, dots, cur_id);
				}

				dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
				dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
				dots[cur_id].rp1_id = 0;
			}
			else 
			{
				init_id = dots[cur_id].index;
				if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
					adjust_init_offset(init_dots, init_id, t2, dots, cur_id);
				}
				dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
				dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
				dots[cur_id].rp1_id = 0;
				init_id = dots[cur_id].index;
			}
		}
		else if( (val_t1 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0))
		{
			init_id = dots[cur_id].index;
			if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to reflect the change of the boundaries, offsets defined here should be just added.
				adjust_init_offset(init_dots, init_id, t1, dots, cur_id);
			}

			dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
			dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
			dots[cur_id].rp1_id = 0;
			init_id = dots[cur_id].index;
		}
		else if( (val_t2 != -1) && (t2.x.lower >= 0) && (t2.y.lower >= 0))
		{
			init_id = dots[cur_id].index;
			if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
				adjust_init_offset(init_dots, init_id, t2, dots, cur_id);
			}

			dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
			dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
			dots[cur_id].rp1_id = 0;
			init_id = dots[cur_id].index;
		}
	}

	val_org_reg = -1;
	cmp_id = t_list[num_tandem-1];
	len_x = width(dots[cmp_id].x);
	len_y = width(dots[cmp_id].y);
	if( proper_overlap(dots[cmp_id].x, dots[cmp_id].y) ) {
		t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2);
		t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2);
	}
	else {
		val_org_reg = STRICT;
		t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.upper);
		t1.y = assign_I(dots[cmp_id].y.lower, dots[cmp_id].y.upper);
	}

	cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
	t1.y = assign_I(t1.x.upper, t1.x.upper + cur_len);
	if( t2.y.lower != -1 ) {
		cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
		t2.x = assign_I(t2.y.lower - cur_len, t2.y.lower);
	}
	else t2.x = assign_I(-1,0);

	if( flag == FIRST_RUN ) {
		if( val_org_reg != -1 ) val_org_reg = check_tandem_reg(dots[cmp_id], dots, num);
		if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) val_t1 = check_tandem_reg(t1, dots, num);
		else val_t1 = -1;

		if( (t2.x.lower < 0) || (t2.y.lower < 0) ) val_t2 = -1;
		else val_t2 = check_tandem_reg(t2, dots, num);
		val_org[num_tandem] = val_org_reg;
		val1[num_tandem] = val_t1;
		val2[num_tandem] = val_t2;
	}
	else {
		val_org_reg = val_org[num_tandem];
		val_t1 = val1[num_tandem];
		val_t2 = val2[num_tandem];
	}

	if( (t1.x.lower < 0) && (t1.y.lower < 0) ) val_t1 = -1;
	if( (t2.x.lower < 0) && (t2.y.lower < 0) ) val_t2 = -1;

	if( val_org_reg != -1 ) {}
	else if( (val_t1 != -1) && (val_t2 != -1) ) {
		if( val_t1 < val_t2 ) {
			assign_algn(cur_t, 0, t1);
		}
		else assign_algn(cur_t, 0, t2);
	}		
	else if( val_t1 != -1 ) assign_algn(cur_t, 0, t1);
	else if( val_t2 != -1 ) assign_algn(cur_t, 0, t2);

	if( val_org_reg != -1 ) {}
	else if( (val_t1 != -1) || (val_t2 != -1) ) {
		init_id = dots[cmp_id].index;
		if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to reflect the change of the boundaries, offsets defined here should be just added.
			adjust_init_offset(init_dots, init_id, *cur_t, dots, cmp_id);
		}

		dots[cmp_id].x = assign_I((*cur_t).x.lower, (*cur_t).x.upper);
		dots[cmp_id].y = assign_I((*cur_t).y.lower, (*cur_t).y.upper);
		dots[cmp_id].rp1_id = 0;
	}

	free(cur_t);
}
Пример #14
0
int det_dup_reg_in_self(int num_list, struct DotList *self, struct I check_x, struct I check_y, int sign)
{
	int i = 0, j = 0;
	struct I temp_1, temp_2;
	struct I cur_1, cur_2, cmp_1, cmp_2;
	bool is_end = false;
	int count_left = 0, count_right = 0;
	int left_pid = 0, right_pid = 0;
	int cut_len_left = 0, cut_len_right = 0;
	int res = TIE;
	int distance = 0;
	bool is_candi = false;
	bool is_assigned = false;

	temp_1 = assign_I(0, 1);
	temp_2 = assign_I(0, 1);

	for( i = 0; i < num_list; i++ )
	{
		cur_1 = assign_I(0, 1);
		cur_2 = assign_I(0, 1);
		cmp_1 = assign_I(0, 1);
		cmp_2 = assign_I(0, 1);
		is_candi = false;
		left_pid = self[i].identity;
		right_pid = self[i].identity;

		if( ( is_assigned == true ) && ((width(temp_1) <= MIN_INTERVAL) || (width(temp_2) <= MIN_INTERVAL)) ) {
			is_end = true;
		}
		else if( (width(self[i].x) <= MIN_INTERVAL) || (width(self[i].y) <= MIN_INTERVAL) )
		{
			is_candi = false;
		}
		else if( (almost_subset(self[i].x, check_x) == true) || ((f_loose_overlap(self[i].x, check_x, SECOND_RUN) == true) && (width(intersect(self[i].x, check_x)) >= MIN_LEN) ))
		{
			temp_1 = assign_I(self[i].x.lower, self[i].x.upper);
			temp_2 = assign_I(self[i].y.lower, self[i].y.upper);
			is_assigned = true;
			is_candi = true;
		}
		else if( (almost_subset(self[i].y, check_x) == true) || (f_loose_overlap(self[i].y, check_x, SECOND_RUN) == true) )
		{
			temp_1 = assign_I(self[i].y.lower, self[i].y.upper);
			temp_2 = assign_I(self[i].x.lower, self[i].x.upper);
			is_assigned = true;
			is_candi = true;
		}
		else {
			is_candi = false;
		}

		if( (is_end == false) && (is_candi == true) )
		{
			if( (temp_1.lower < check_x.lower) && (temp_1.upper > check_x.upper) )
			{
				if( width(check_x) > 0 ) {
					cur_1 = assign_I(0, width(check_x));

					cut_len_left = check_x.lower - temp_1.lower;
					cut_len_right = temp_1.upper - check_x.upper; 
					if( (temp_2.upper - cut_len_right) <= (temp_2.lower + cut_len_left) )
					{
						is_end = true;
					}
					else
					{
						cur_2 = assign_I(temp_2.lower + cut_len_left, temp_2.upper - cut_len_right);
					}
				}
				else {
					is_end = true;
				}
			}
			else
			{
				if( temp_1.lower < check_x.lower )
				{
					if( temp_1.upper > check_x.lower ) {
						cur_1 = assign_I( 0, temp_1.upper - check_x.lower );

						cut_len_left = check_x.lower - temp_1.lower;
						if( temp_2.upper <= (temp_2.lower + cut_len_left) )
						{
							is_end = true;
						}
						else
						{
							cur_2 = assign_I( temp_2.lower + cut_len_left, temp_2.upper );
						}
					}
					else {
						is_end = true;
					}	
				}
				else if( temp_1.upper > check_x.upper )
				{
					if( width(check_x) > (temp_1.lower - check_x.lower) ) {
						cur_1 = assign_I( temp_1.lower - check_x.lower, width(check_x) );
						cut_len_right = temp_1.upper - check_x.upper;
						if( (temp_2.upper - cut_len_right) <= temp_2.lower )
						{
							is_end = true;
						}
						else
						{
							cur_2 = assign_I( temp_2.lower, temp_2.upper - cut_len_right );
						}			
					}
					else {
						is_end = true;
					}
				}
				else
				{
					if( ( temp_1.upper - check_x.lower ) <= (temp_1.lower - check_x.lower) ) {
						is_end = true;
					}
					else {
						cur_1 = assign_I( temp_1.lower - check_x.lower, temp_1.upper - check_x.lower);
						cur_2 = assign_I( temp_2.lower, temp_2.upper );
					}
				}
			}

			if( is_end == false )
			{
				if( sign == 0 )
				{
					if( (cur_1.upper + check_y.lower) > (cur_1.lower + check_y.lower) ) {
						cmp_1 = assign_I(cur_1.lower + check_y.lower, cur_1.upper + check_y.lower);
						cmp_2 = assign_I(cur_2.lower, cur_2.upper);
					}
					else is_end = true;
				}
				else if( sign == 1 )
				{
					if( (check_y.upper - cur_1.lower) > (check_y.upper - cur_1.upper) ) {
						cmp_1 = assign_I(check_y.upper - cur_1.upper, check_y.upper - cur_1.lower);
						cmp_2 = assign_I(cur_2.lower, cur_2.upper);
					}
					else is_end = true;
				}

				if( is_end == false ) {
					if( (width(cmp_1) <= MIN_INTERVAL) || (width(cmp_2) <= MIN_INTERVAL) ) is_end = true;
				}
			}

			j = 0;
			while( (is_end == false) && (j < num_list) )
			{
				if( j == i ) {}
				else
				{
					if( (strict_almost_equal(self[j].x, cmp_1) == true) && ( strict_almost_equal(self[j].y, cmp_2) == true ) )
					{
						right_pid = self[j].identity;
						is_end = true;
						if( left_pid > right_pid ) count_left++;
						else if( left_pid < right_pid ) count_right++;
					}	
					else if( (loose_subset(cmp_1, self[j].x) == true) && (loose_subset(cmp_2, self[j].y) == true ) )
					{
						distance = compute_distance(cmp_1, cmp_2, self[j].x, self[j].y, sign);
						if( distance <= DIS_THRESHOLD )
						{
							right_pid = self[j].identity;
							is_end = true;
							if( left_pid > right_pid ) count_left++;
							else if( left_pid < right_pid ) count_right++;
						}
					}
				}
				j++;
			}
			is_end = false;
		}
	}	

	if( count_left > count_right )
	{
		res = LEFT_SIDE;
	}
	else if( count_left < count_right )
	{
		res = RIGHT_SIDE;
	}
	else
	{
		res = TIE;
	}

	return(res);
}
Пример #15
0
void read_maf(char *fname, int mode, struct DotList *algns, int *num_algns, int *size1, int *size2) {
	FILE *fp;
	char *status;
	int i = 0;
	int count = 0;
	int temp;
	int a_pid;
	int b1, e1, b2, e2;
	char strand[100], len1[100], len2[100];
	char *s, *t;
	int algn_type = SELF1 - 1;
	int j = 0;
	int srcblock = -1;
	char token[50];
	char name1[LEN_NAME], name2[LEN_NAME];

	strcpy(name1, "");
	strcpy(name2, "");
	strcpy(len1, "0");
	strcpy(len2, "0");
	strcpy(strand, "+");
	strcpy(token, "");

	fp = ckopen(fname, "r");
	if (((status = fgets(S, BIG, fp)) == NULL) || strncmp(S, "##maf", 5))
		fatalf("%s is not a maf file", fname);
/*
	while (S[0] == '#')
		if ((status = fgets(S, BIG, fp)) == NULL)
			fatalf("no alignments in %s", fname);
*/

	while ((status != NULL) && (strstr(S, "eof") == NULL)) {
		if(S[0] == '#') {
			if((mode == C_MODE) || (mode == S_MODE)) {
				while((status != NULL) && (S[0] == '#')) {
					if( strncmp(S, "##maf", 5) == 0 ) algn_type++;
					status = fgets(S, BIG, fp);
				}	
				if( algn_type > PAIR ) fatal("too many alignments are combined\n");
			}
			else {
				while ((status != NULL ) && (S[0] == '#')) {
					status = fgets(S, BIG, fp);
				}
			}
			j = 0;
		}

		srcblock = -1;
		if ( status == NULL ) {
		}
		else {
			if (S[0] != 'a')
				fatalf("expecting an a-line in %s, saw %s",
				  fname, S);

			if( mode == O_MODE ) {
				sscanf(S, "%*s %s", token);
				srcblock = cat_srcblock(token);
			}
	
			if ((fgets(S, BIG, fp) == NULL) || (fgets(T, BIG, fp) == NULL))
				fatalf("cannot find alignment in %s", fname);
			if ((sscanf(S, "%*s %s %d %d %*s %s", name1, &b1, &e1, len1) != 4) || (sscanf(T, "%*s %s %d %d %s %s", name2, &b2, &e2, strand, len2) != 5))
			{
				fatalf("bad alignment info of 2 in %s", fname);
			}
			// aligned interval given as base-0 start and length
			e1 += b1;
			e2 += b2;
	
			if( strcmp(strand, "-") == 0) {
				temp = b2;
				b2 = atoi(len2) - e2;
				e2 = atoi(len2) - temp;	
			}			
	
			b1++;
			b2++;
			e1++;
			e2++;
	
			s = nucs(S);
			t = nucs(T);
			a_pid = cal_pid(s, t, strlen(s)-1);
	
			if( ((mode == D_MODE) || ((mode == C_MODE) && (algn_type <= PAIR))) && (( (algn_type != PAIR) && (b1 >= b2)) || ((algn_type != PAIR) && (abs(b1-b2) <= DEL_TH) && (abs(e1-e2) <=DEL_TH)) || ((e1-b1) < ALT_EFFEC_VALUE) || (a_pid <= PID_TH) )) {}
			else if( (mode == S_MODE) && ( algn_type != PAIR  ) ) {}
			else if( (abs(e1-b1) <= ERR_SM_TH) || (abs(e2-b2) <= ERR_SM_TH) ) {}
			else  {
				algns[count].x = assign_I(b1, e1);
				if( b2 < e2 ) algns[count].y = assign_I(b2, e2);
				else algns[count].y = assign_I(e2, b2);
				algns[count].identity = a_pid;
				algns[count].m_pid = a_pid;
	
				if( strcmp(strand, "+") == 0 ) {
					algns[count].sign = 0;
					algns[count].init_sign = 0;
				}	
				else if( strcmp(strand, "-") == 0 ) {
					algns[count].sign = 1;
					algns[count].init_sign = 1;
				}
				else {
					algns[count].sign = DELETED;
					algns[count].init_sign = DELETED;
				}
	
				if( mode == O_MODE ) {
					algns[count].indiv_fid = srcblock; // ith alignment
				}
				else {
					algns[count].indiv_fid = j; // j alignment
				}
				algns[count].fid = i; // ith alignment
				algns[count].index = count; // ith alignment
 	   		algns[count].c_id = -1; // not chained alignment
 	    	algns[count].m_id = -1; // not chained alignment
 	    	algns[count].rp1_id = -1; // the inserted repeat id of the chained alignment in first seq
      	algns[count].rp2_id = -1; // the inserted repeat id of the chained alignment in second seq 
   	  	algns[count].l_id = -1;
      	algns[count].lock = -1;  
      	algns[count].m_x = assign_I(0,1);
      	algns[count].m_y = assign_I(0,1);
      	algns[count].xl_diff = 0; // the offset of the left end
      	algns[count].yl_diff = 0; // the offset of the left end
      	algns[count].xr_diff = 0; // the offset of the right end
      	algns[count].yr_diff = 0; // the offset of the right end
      	algns[count].pair_self = -1;
      	algns[count].l_pid = -1;
				if( (mode == O_MODE) || (mode == PAIR_MODE) ) {
					algns[count].sp_id = PAIR;
				}
				else {
					algns[count].sp_id = algn_type; // SELF1 for first self-alignment, SELF2 for second self-alignment and PAIR for pairwise alignment
				}
 	    	algns[count].xl_offset = 0; // the offset of low of x
      	algns[count].yl_offset = 0; // the offset of up of x
      	algns[count].xr_offset = 0; // the offset of low of y 
				if( algn_type == PAIR ) algns[count].pair_self = PAIR;
				else algns[count].pair_self = SELF;
      	strcpy(algns[count].name1, name1);  
      	strcpy(algns[count].name2, name2);  
      	algns[count].len1 = atoi(len1);  
      	algns[count].len2 = atoi(len2);  
      	algns[count].ctg_id1 = -1;  
      	algns[count].ctg_id2 = -1;  

				count++;
			}

			if ((fgets(S, BIG, fp) == NULL) || (S[0] != '\n'))
				fatalf("bad alignment end in %s", fname);
			status = fgets(S, BIG, fp);
			i++; // ith alignment 
			j++;
		}
	}

	*size1 = atoi(len1);
	*size2 = atoi(len2);
	*num_algns = count;
	fclose(fp);
}
Пример #16
0
int main(int argc, char *argv[])
{
	FILE *f;
	struct chain *Chain;
	struct chain *SubChain, *chainToFree;
	struct chain *ch_p, *next_p;
	char buf[NUM_CHARS];
	struct lineFile *lf;
	int i = 0;
	int b = 0, e = 0;
	bool is_null = true;
	struct exons_list *homologs;
	int num_chains = 0;
	int num_homologs = 0;
	struct exons_list *repeats;
	int num_repeats = 0;
	char chr[LEN_NAME];

	strcpy(chr, "");
	if( argc == 3 ) {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
	}
	else if( argc != 4 ) {
		fatal("args: chain_file interval_text features_gff_file\n");
	}
	else {
		if( (f = ckopen(argv[2], "r")) ) {
			if( fgets(buf, NUM_CHARS, f) ) {
				if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) {
					fatalf("format errors: chr beg end in %s", buf);
				}
			}
			else {
				fatalf("%s is empty\n", argv[2]);
			}
		}
		fclose(f);
		
		if( (f = ckopen(argv[3], "r")) ) {
			while(fgets(buf, NUM_CHARS, f)) {
				i++;
			}
			num_repeats = i;
			repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list));
			init_exons(repeats, 0, num_repeats-1);	
			fseek(f, 0, SEEK_SET);
			assign_gff_exons_chr(f, repeats, num_repeats, chr);
			quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE);
		}
		else {
			fatalf("file %s invalid\n", argv[4]);
		}
		fclose(f);
	}

	lf = lineFileOpen(argv[1], true);
	Chain = chainRead(lf);
	ch_p = Chain;
	while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) {
		ch_p->next = next_p;
		ch_p = ch_p->next;
		i++;
	}

//	printf("Number of chains: %d\n", i);
	i = 0;
	ch_p = Chain;
//	while( (i < NUM_LOOPS) && (ch_p != NULL)  ) {
	while( ch_p != NULL  ) {
//		printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd);	
		ch_p = ch_p->next;
		i++;
	}

	num_chains = i;
	homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list));
	i = 0;
	f = ckopen(argv[2], "r");
	while( fgets(buf, NUM_CHARS, f) ) { 	
		if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) {
			fatalf("format errors: chr beg end in %s", buf);
		}
		else {
			ch_p = Chain;

			if( ch_p != NULL ) {
				while( (ch_p != NULL) && (is_null == true) ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					if( SubChain != NULL ) is_null = false;
					ch_p = ch_p->next;
				}
			}

			if( is_null == false ) {
				if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) {
					homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
					homologs[i].dir = SubChain->qStrand;
					strcpy(homologs[i].chr, SubChain->qName);
					i++;
				}
//				printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
				if( chainToFree != NULL ) {
					chainFree(&chainToFree);
				}

				while( ch_p != NULL ) {
					chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree);
					ch_p = ch_p->next;
					if( SubChain != NULL ) {
						if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) {
							if( SubChain->qStrand == '-' ) {
								homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart);
							}
							else {
								homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd);
							}
							homologs[i].dir = SubChain->qStrand;
							strcpy(homologs[i].chr, SubChain->qName);
							i++;
						}
//						printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd);
						if( chainToFree != NULL ) {
							chainFree(&chainToFree);
						}
					}
				}
			}
		}
	}

	num_homologs = i;
	selection_sort_exons(homologs, num_homologs);
//	print_exons_list(homologs, num_homologs);
	num_homologs = remove_redundant_intervals(homologs, num_homologs);
	print_exons_list(homologs, num_homologs);
	free(homologs);
	free(repeats);
	chainFreeList(&Chain);

	fclose(f);
	lineFileClose(&lf);

	return EXIT_SUCCESS;
}
Пример #17
0
int check_inclusion_close_dup(int id, struct DotList *dots, int num_lines, bool *x_ins, bool *t_ins)
{
	int res = -1;
	int i = 0;
	int temp_res = id;
	struct I temp;

	if( (*x_ins) == true ) 
	{
		temp = assign_I(dots[id].x.lower, dots[id].x.upper);
	}
	else temp = assign_I(dots[id].y.lower, dots[id].y.upper);

	while( ( i < num_lines) && (res == -1) )
	{
		if( dots[i].pair_self == PAIR ) 
		{
		}
		else if( dots[i].sign == 2 ) {}
		else if( (i != id) && (dots[i].sign == 0) && ((dots[i].y.lower - dots[i].x.upper) <= THRESHOLD) )
		{
			if( strict_almost_equal(temp, dots[i].x) == true)
			{
				temp_res = i;
				res = i;
				*x_ins = true;
			}

			if( strict_almost_equal(temp, dots[i].y) == true)
			{
				temp_res = i;
				res = i;
				*x_ins = false;
			}
		}
		i++;
	}
	
	if( res != -1 )
	{
		if( is_tandem(dots[res]) == true )
		{
			*t_ins = true;
		}
		else 
		{
			*t_ins = false;
		}
	}
	else
	{
		i = 0;
		while( (i < num_lines) && (res == -1) )
		{
			if( dots[i].pair_self == PAIR ) {}
			else if( dots[i].sign == 2 ) {}
			else if( dots[temp_res].pair_self == PAIR )
			{
				if( strict_almost_equal(temp, dots[i].x) == true )
				{
					res = i;
					*x_ins = true;
				}
				else if( strict_almost_equal(temp, dots[i].y) == true )
				{
					res = i;
					*x_ins = false;
				}
			}
			i++;
		}

		if(res != -1) {
			if( is_tandem(dots[res]) == true ) *t_ins = true;
			else *t_ins = false;
		}
	}
	return( res);
}
Пример #18
0
/* when two alignments have an overlapped region */
struct gap_list define_gap_new_type(struct DotList *dots, int loc_id, int comp_id, bool is_x)
{
	struct gap_list gp;
	struct I temp;
	int len_x, len_y;

	gp.id1 = loc_id;
	gp.id2 = comp_id;

	gp.type = -1;
	gp.x1 = 0;
	gp.x2 = 1;
	gp.y1 = 0;
	gp.y2 = 1;
	gp.offset = 0;
	if( is_x == true ) // the overlap of x region is larger than y's
	{
		if( proper_overlap(dots[loc_id].x, dots[comp_id].x) == true )
		{
			temp = intersect(dots[loc_id].x, dots[comp_id].x);
			gp.type = 21; // the gap is in y side
			
			if( dots[loc_id].y.lower <= dots[comp_id].y.lower )
			{
				gp.y1 = dots[loc_id].y.upper;
				gp.y2 = dots[comp_id].y.lower + width(temp);
				len_x = width(dots[comp_id].x);
				len_y = width(dots[comp_id].y);
				gp.offset = len_y - len_x;

				if( dots[loc_id].sign == 0 )
				{
					gp.x1 = dots[loc_id].x.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[loc_id].sign == 1 )
				{
					gp.x1 = dots[loc_id].x.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
			else
			{
				gp.y1 = dots[comp_id].y.upper;
				gp.y2 = dots[loc_id].y.lower + width(temp);
				len_x = width(dots[loc_id].x);
				len_y = width(dots[loc_id].y);
				gp.offset = len_y - len_x;

				if( dots[comp_id].sign == 0 )
				{
					gp.x1 = dots[comp_id].x.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[comp_id].sign == 1 )
				{
					gp.x1 = dots[comp_id].x.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
		}
		else
		{
			gp.type = -1;
		}
	}
	else 
	{
		if( proper_overlap(dots[loc_id].y, dots[comp_id].y) == true )
		{
			temp = intersect(dots[loc_id].y, dots[comp_id].y);
			gp.type = 22; // the gap is in x side

			if( dots[loc_id].x.lower <= dots[comp_id].x.lower )
			{
				gp.y1 = dots[loc_id].x.upper;
				gp.y2 = dots[comp_id].x.lower + width(temp);
				len_x = width(dots[comp_id].x);
				len_y = width(dots[comp_id].y);
				gp.offset = len_x - len_y;

				if( dots[loc_id].sign == 0 )
				{
					gp.x1 = dots[loc_id].y.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[loc_id].sign == 1 )
				{
					gp.x1 = dots[loc_id].y.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
			else 
			{
				gp.y1 = dots[comp_id].x.upper;
				gp.y2 = dots[loc_id].x.lower + width(temp);
				len_x = width(dots[loc_id].x);
				len_y = width(dots[loc_id].y);
				gp.offset = len_x - len_y;
			}

		}
		else
		{
			gp.type = -1;
		}
	}

	if( (gp.type != -1) && (gp.y2 <= gp.y1) ) {
		gp.type = -1;
	}

	if( gp.type != -1 ) {
		temp = assign_I(gp.y1, gp.y2);
		if( ( strict_almost_equal(temp, dots[comp_id].x) == true ) || ( strict_almost_equal(temp, dots[comp_id].y) == true ) || ( strict_almost_equal(temp, dots[loc_id].x) == true ) || (strict_almost_equal(temp, dots[loc_id].y) == true ))
		{
			gp.type = -1;
		}
	}	

	return(gp);
}
Пример #19
0
int main(int argc, char *argv[])
{
	FILE *f;
	char buf[1000];
	int i, j;
	int num_genes = 0;
	int num_exons = 0;
	struct g_list *genes;
	struct exons_list *exons;
	int b, e;
	char name[100], scf_name[100];
	int cur_exons_count;

	if( argc != 2 ) {
		printf("sort_exons exons_file\n");
		return EXIT_FAILURE;
	}

	strcpy(name, "");
	strcpy(scf_name, "");
	f = fopen(argv[1], "r");

	while(fgets(buf, 1000, f))
	{
		if( buf[0] == '#' ) {}	
		else if((buf[0] == '>') || (buf[0] == '<')) num_genes++;
		else num_exons++;

	}

	if( num_genes > 0 ) {
		genes = (struct g_list *) ckalloc(sizeof(struct g_list) * num_genes);
	}
	else {
		genes = (struct g_list *) ckalloc(sizeof(struct g_list));
	}

	if( num_exons > 0 ) {
		exons = (struct exons_list *) ckalloc(sizeof(struct exons_list) * num_exons);
	}
	else {
		exons = (struct exons_list *) ckalloc(sizeof(struct exons_list));
	}
	
	fseek(f, 0, SEEK_SET);

	i = -1;
	j = 0;
	while(fgets(buf, 1000, f))
	{
		if( buf[0] == '#' ) {}
		else if( (buf[0] == '>') || (buf[0] == '<') )
		{
			if( i >= 0 ) {
				genes[i].exonCount = cur_exons_count;
				genes[i].exEnd = j-1;
			}
			i++;
			cur_exons_count = 0;
			if(buf[0] == '>') genes[i].strand = '+';
			else if(buf[0] == '<' ) genes[i].strand = '-';
			else fatalf("unexpected strand %c\n", buf[0]);

			if( sscanf(buf, "%*s %d %d %s %s %*s", &b, &e, name, scf_name) == 4 ) {
				strcpy(genes[i].sname, scf_name);
			}
			else if( sscanf(buf,  "%*s %d %d %s %*s", &b, &e, name) != 3 ) {
				printf("wrong format in %s\n", buf);	
			}
			else strcpy(genes[i].sname, "");

			genes[i].gid = i;
			genes[i].txStart = b;
			genes[i].txEnd = e;
			strcpy(genes[i].gname, name);
		}
		else {
			sscanf(buf, "%d %d", &b, &e);
			if( cur_exons_count == 0 ) genes[i].exStart = j;
			exons[j].fid = i;
			exons[j].reg = assign_I(b, e);
			cur_exons_count++;
			j++;
		}
	}
	genes[i].exonCount = cur_exons_count;
	genes[i].exEnd = j-1;

	quick_sort_inc_genes(genes, 0, num_genes-1, POS_BASE);
	i = 0;
	while( i < num_genes ) {
		j = 0;
		while( ((i+j) < num_genes) && (genes[i].txStart == genes[i+j].txStart )) j++;
		quick_sort_dec_genes(genes, i, i+j-1, LEN_BASE);
		i = i+j;
	}

	for( i = 0; i < num_genes; i++ ) {
		if( genes[i].txStart < 0 ) {} 
		else {
			if( genes[i].strand == '+' ) {
				if( strcmp(genes[i].sname, "") == 0 ) {
					printf("> %d %d %s\n", genes[i].txStart, genes[i].txEnd, genes[i].gname);
				}
				else {
					printf("> %d %d %s %s\n", genes[i].txStart, genes[i].txEnd, genes[i].gname, genes[i].sname);

				}
			}
			else if( genes[i].strand == '-' ) {
				if( strcmp(genes[i].sname, "") == 0 ) {
					printf("< %d %d %s (complement)\n", genes[i].txStart, genes[i].txEnd, genes[i].gname);
				}
				else {
					printf("< %d %d %s %s (complement)\n", genes[i].txStart, genes[i].txEnd, genes[i].gname, genes[i].sname);
				}
			}
			else fatalf("unexpected strand %c\n", genes[i].strand);

			for( j = genes[i].exStart; j <= genes[i].exEnd; j++ ) {
				printf("%d %d\n", exons[j].reg.lower, exons[j].reg.upper);
			}
		}
	}
	fclose(f);

	free(genes);
	free(exons);

	return EXIT_SUCCESS;
}
Пример #20
0
void pred_dup(int con, char op_ch, int pred_op, bool is_x_to_y, int id, int *num_list, struct DotList *dots, int num_ops, struct ops_list *ops)
{
	int wide = 0;
	struct I from = {0, 1}, to = {0, 1};
	int flag = DEL;
	int i = 0;
	int sp_id = dots[id].sp_id;

	sp_id = dots[id].sp_id;

	if((dots[id].l_id == -1) && (proper_overlap(dots[id].x, dots[id].y) == true) && (width(intersect(dots[id].x, dots[id].y)) <= THRESHOLD))
	{
		dots[id].y = assign_I(dots[id].x.upper, dots[id].x.upper + width(dots[id].x));
	}

	for( i = 0; i < *num_list; i++ )
	{
		if( i != id )
		{
			if( (dots[i].l_id != -1) && (dots[i].sign != 2) )
			{
				dots[i].x = assign_I(dots[i].m_x.lower, dots[i].m_x.upper);
				dots[i].y = assign_I(dots[i].m_y.lower, dots[i].m_y.upper);
				dots[dots[i].l_id].sign = dots[i].sign;
				dots[i].l_id = -1;
				dots[i].identity = dots[i].m_pid;
				dots[i].m_x = assign_I(0,1);
				dots[i].m_y = assign_I(0,1);
			}
		}
	}

	if( dots[id].l_id != -1 )
	{
		from = assign_I(dots[id].x.lower, dots[id].x.upper);
		to = assign_I(dots[id].y.lower, dots[id].y.upper);
		dots[id].sign = 2;
		flag = NONE;	
	}
	else if( is_x_to_y )
	{
		from = assign_I(dots[id].x.lower, dots[id].x.upper);
		to = assign_I(dots[id].y.lower, dots[id].y.upper);
	}
	else
	{
		from = assign_I(dots[id].y.lower, dots[id].y.upper);
		to = assign_I(dots[id].x.lower, dots[id].x.upper);
	}

	if( pred_op == 0 ) 
	{
		wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 2)
	{	
		wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 3)
	{
		wide = rollback_step_dup_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 4)
	{
		wide = rollback_step_conversion(is_x_to_y, id, num_list, dots);
		if( con > 0 ) wide = con;
	}
	else wide = 0;

	generate_ops(op_ch, wide, is_x_to_y, from, to, flag, num_ops, ops, sp_id);

/*
	if( is_x_to_y ) {
		ops[num_ops].ctg_id1 = dots[id].ctg_id1;
		ops[num_ops].ctg_id2 = dots[id].ctg_id2;
	}
	else {
		ops[num_ops].ctg_id2 = dots[id].ctg_id1;
		ops[num_ops].ctg_id1 = dots[id].ctg_id2;
	}
*/
	ops[num_ops].id = dots[id].index;
}
Пример #21
0
int main(int argc, char *argv[])
{
	SEQ *sf;
	uchar *s;
	FILE *f;
	char buf[10000];
	char head[MAX_LEN];
	char cur[LEN_NAME], chr_name[LEN_NAME], annot[LEN_NAME], gname[LEN_NAME], filter[LEN_NAME];
	int gid = -1;
	int rid = -1;
	int i = 0;
	int b = 0, e = 1, num_cds = 0;
	char dir[3];
	struct exons_list *exons;
	char annot_name[LEN_NAME];
	float qual = (float)0;
	char ref[LEN_NAME], alt[LEN_NAME];
	int rest = 0;
	char codon[4], alt_codon[4];
	char aa1 = '\0', aa2 = '\0';
	int num_rmsk = 0;
	struct exons_list *rmsk;
	int num_snps = 0, num_pass = 0, num_filter = 0, num_coding1 = 0, num_syn1 = 0, num_non1 = 0, num_repeats1 = 0, num_coding_repeats1 = 0;
	int num_coding = 0, num_syn = 0, num_non = 0, num_repeats = 0, num_coding_repeats = 0;
	bool is_num_print = false;

	strcpy(buf, "");
	strcpy(head, "");
	strcpy(cur, "");
	strcpy(chr_name, "");
	strcpy(annot, "");
	strcpy(gname, "");
	strcpy(annot_name, "");
	strcpy(ref, "");
	strcpy(alt, "");
	strcpy(codon, "");
	strcpy(alt_codon, "");
	strcpy(dir, "");
	codon[3] = '\0';
	alt_codon[3] = '\0';

	if( argc != 7 ) {
		printf("link_to_annot vcf_file gff_file seq_file annot_type(exon, gene, ...) rmsk_file print_mode(NUM or SITES)\n");
		return EXIT_FAILURE;
	}
	else {
		if(!(f = ckopen(argv[2], "r"))) {
			printf("no file %s exists\n", argv[2]);
			return EXIT_FAILURE;
		}

		strcpy(annot_name, argv[4]);
		if( strcmp(annot_name, "exon") != 0 ) {
			fatalf("seq file is required only when the annot type is exon, but %s here\n", annot_name);
		}
		sf = seq_get(argv[3]);
		s = SEQ_CHARS(sf) - 1;
		if( strcmp(argv[6], "NUM") == 0 ) {
			is_num_print = true;
		}
		else if( strcmp(argv[6], "SITES") == 0 ) {
			is_num_print = false;
		}
		else {
			fatalf("unsupported print option: %s\n", argv[6]);
		}
	}

  compl['a'] = compl['A'] = 'T';
  compl['c'] = compl['C'] = 'G';
  compl['g'] = compl['G'] = 'C';
  compl['t'] = compl['T'] = 'A';

	while(fgets(buf, 10000, f))
	{
		if( (buf[0] == '#') || (buf[0] == '>') ) {}
		else if( sscanf(buf, "%*s %*s %s %d %d %*s", annot, &b, &e) != 3 ) {
			fatalf("line in wrong gff format: %s\n", buf);
		}
		else {
			if( strcmp(annot, annot_name) == 0 ) {
				num_cds++;
			}
		}
	}

	if( num_cds > 0 ) exons = (struct exons_list *) ckalloc(num_cds * sizeof(struct exons_list));

	initialize_exons_list(exons, 0, num_cds);

	fseek(f, 0, SEEK_SET);

	i = 0;
	
	while(fgets(buf, 10000, f))
	{
		if( (buf[0] == '#') || (buf[0] == '>') ) {}
		else if( sscanf(buf, "%s %*s %s %d %d %*s %s %*s %s", chr_name, annot, &b, &e, dir, cur) != 6 ) {
			fatalf("line in wrong gff format: %s\n", buf);
		}
		else {
			if( strcmp(annot, annot_name) == 0 ) {
				get_gene_name(cur, gname);
				strcpy(exons[i].name, gname);
				exons[i].reg = assign_I(b, e);
				exons[i].dir = dir[0];
				strcpy(exons[i].chr, chr_name);
				i++;
			}	
		}
	}

	if( i != num_cds ) {
		fatalf("%s counting error: %d - %d\n", annot_name, num_cds, i);
	}
	fclose(f);

	if(!(f = ckopen(argv[5], "r"))) {
		fatalf("%s file not found\n", argv[5]);
	}

	rmsk = 0;
	while(fgets(buf, 10000, f))
	{
		if( (buf[0] == '#') || (buf[0] == '>') ) {}
		else if( sscanf(buf, "%*s %*s %s %d %d %*s", annot, &b, &e) != 3 ) {
			fatalf("line in wrong gff format: %s\n", buf);
		}
		else {
			num_rmsk++;
		}
	}

	if( num_rmsk > 0 ) rmsk = (struct exons_list *) ckalloc(num_rmsk * sizeof(struct exons_list));

	initialize_exons_list(rmsk, 0, num_rmsk);

	fseek(f, 0, SEEK_SET);

	i = 0;
	
	while(fgets(buf, 10000, f))
	{
		if( (buf[0] == '#') || (buf[0] == '>') ) {}
		else if( sscanf(buf, "%s %*s %s %d %d %*s %s %*s %s", chr_name, annot, &b, &e, dir, cur) != 6 ) {
			fatalf("line in wrong gff format: %s\n", buf);
		}
		else {
			strcpy(rmsk[i].name, annot);
			rmsk[i].reg = assign_I(b, e);
			rmsk[i].dir = dir[0];
			strcpy(rmsk[i].chr, chr_name);
			i++;
		}
	}

	if( i != num_rmsk ) {
		fatalf("%s counting error: %d - %d\n", annot_name, num_cds, i);
	}
	fclose(f);

	if(!(f = ckopen(argv[1], "r"))) {
		printf("no file %s exists\n", argv[1]);
		return EXIT_FAILURE;
	}

	i = 0;
	while(fgets(buf, 10000, f))
	{
		if( buf[0] != '#' ) {
			num_snps++;
			if( sscanf(buf, "%s %d %*s %s %s %f %s %*s", chr_name, &b, ref, alt, &qual, filter) != 6 ) {
				fatalf("bad format in %s\n", buf);
			}
			else 
			{
				if( strstr(filter, "PASS") == 0 ) {
					num_pass++;
				}
				else if( strstr(filter, "filter") == 0 ) {
					num_filter++;
				}

				rid = -1;
				rid = find_overlap_gene(chr_name, b, rmsk, num_rmsk);
				if( rid != -1 ) {
					num_repeats++;
					if( strstr(filter, "filter") == 0 ) {}
					else if( strstr(filter, "PASS") == 0 ) {
						num_repeats1++;
					}
					else {
						fatalf("unexpected filter option: %s\n", filter);
					}
				}

				if( (gid = find_overlap_gene(chr_name, b, exons, num_cds)) != -1 ) {
					num_coding++;
					if( strstr(filter, "PASS") == 0 ) {
						num_coding1++;
					}

					if( ref[0] != s[b] ) {
						fatalf("nucleotides not match: %c - %c\n", alt, s[b]);
					}

					if( exons[gid].dir == '+' ) {
						rest = (b - exons[gid].reg.lower)%3;
						if( rest == 0 ) {
							sprintf(codon, "%c%c%c", s[b], s[b+1], s[b+2]);
							sprintf(alt_codon, "%c%c%c", alt[0], s[b+1], s[b+2]);
						}
						else if( rest == 1 ) {
							sprintf(codon, "%c%c%c", s[b-1], s[b], s[b+1]);
							sprintf(alt_codon, "%c%c%c", s[b-1], alt[0], s[b+1]);
						}
						else {
							sprintf(codon, "%c%c%c", s[b-2], s[b-1], s[b]);
							sprintf(alt_codon, "%c%c%c", s[b-2], s[b-1], alt[0]);
						}
					}
					else if( exons[gid].dir == '-' ) {
						rest = (b - exons[gid].reg.upper)%3;
						if( rest == 0 ) {
							sprintf(codon, "%c%c%c", compl[s[b]], compl[s[b-1]], compl[s[b-2]]);
							sprintf(alt_codon, "%c%c%c", compl[alt[0]], compl[s[b-1]], compl[s[b-2]]);
						}
						else if( rest == 1 ) {
							sprintf(codon, "%c%c%c", compl[s[b+1]], compl[s[b]], compl[s[b-1]]);
							sprintf(alt_codon, "%c%c%c", compl[s[b+1]], compl[alt[0]], compl[s[b-1]]);
						}
						else {
							sprintf(codon, "%c%c%c", compl[s[b+2]], compl[s[b+1]], compl[s[b]]);
							sprintf(alt_codon, "%c%c%c", compl[s[b+2]], compl[s[b+1]], compl[alt[0]]);
						}
					}
					else {
						fatalf("%c unsupported\n", exons[gid].dir);
					}
					aa1 = dna2oneaa(codon);
					aa2 = dna2oneaa(alt_codon);
					
					if( aa1 == aa2 ) {
						num_syn++;
						if( strstr(filter, "filter") == 0) {
						}
						else if( strstr(filter, "PASS") == 0 ) {
							num_syn1++;
						}
						else {
							fatalf("unexpected filter option: %s\n", filter);
						}
					}
					else {
						num_non++;
						if( strstr(filter, "filter") == 0) {
						}
						else if( strstr(filter, "PASS") == 0 ) {
							num_non1++;
						}
						else {
							fatalf("unexpected filter option: %s\n", filter);
						}
					}
					
					if( rid != -1 ) {
						num_coding_repeats++;
						if( strstr(filter, "PASS") == 0 ) {
							num_coding_repeats1++;
						}
					}

					if( is_num_print == false ) {
						if( rid == -1 ) {
							printf("%s\t%d\t%s\t%s\t%f\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t.\n", chr_name, b, ref, alt, qual, filter, exons[gid].name, exons[gid].reg.lower, exons[gid].reg.upper, exons[gid].dir, aa1, aa2);
						}
						else {
							printf("%s\t%d\t%s\t%s\t%f\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n", chr_name, b, ref, alt, qual, filter, exons[gid].name, exons[gid].reg.lower, exons[gid].reg.upper, exons[gid].dir, aa1, aa2, rmsk[rid].name);
						}
					}
				}
				else {
				}
			}
		}
	}
	
	if( is_num_print == true ) {
		printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", chr_name, num_snps, num_pass, num_filter, num_coding, num_coding1, num_non, num_syn, num_non1, num_syn1, num_repeats, num_repeats1, num_coding_repeats, num_coding_repeats1);
	}

	if( num_cds > 0 ) {
		free(exons);
	}
	fclose(f);

	return EXIT_SUCCESS;
}
Пример #22
0
void filter_gff_lists(struct g_list *genes1, int num_genes1, struct exons_list *exons1, int num_exons1, int type)
{
	int i = 0, j = 0;
	struct I cur, tmp;
	int sid = 0, eid = 0;

	cur = assign_I(0, 1);
	tmp = assign_I(0, 1);

	if( type == SGD ) {
		for( i = 0; i < num_genes1; i++ ) {
    sid = genes1[i].cdsStart;
    eid = genes1[i].cdsEnd;
    if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
      cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
    }
    else {
      if( genes1[i].strand == '-' ) {
        cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
      }
      else {
        fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd);
      }
    }

//			cur = assign_I(genes1[i].txStart, genes1[i].txEnd);
//			if( (width(cur) < MIN_ORF_BASES) && (strstr(genes1[i].info, "Dubious") != 0) )
			if( (genes1[i].txStart <= 0)  || (genes1[i].txEnd <= 0) ) {
				genes1[i].type = REDUN;	
			}
			else if( width(cur) < MIN_ORF_BASES )
			{
				genes1[i].type = REDUN;
			}
			else if( genes1[i].type == REDUN ) {}
			else {
				j = i+1;
				if( j < num_genes1 ) {
		      sid = genes1[j].cdsStart;
   		 		eid = genes1[j].cdsEnd;
     	 		if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
        		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
					}
      		else {
        		if( genes1[j].strand == '-' ) {
          		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
        		}
        		else {
          		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
        		}
      		}

//					tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
				}

				while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) {
					if( width(tmp) < MIN_ORF_BASES )
					{
						genes1[j].type = REDUN;
					}
					else if( genes1[j].type == REDUN ) {}
					else {
						if( width(intersect(cur, tmp)) >= MIN_BASES ) {
							if( (strstr(genes1[i].info, "Verified") != 0) || (strstr(genes1[i].info, "Uncharacterized") != 0) ) {
								if(strstr(genes1[j].info, "Dubious") != 0 ) {
//									if( genes1[j].strand == genes1[i].strand ) {
										genes1[j].type = REDUN;
//									}
								}
							}
							else if( strstr(genes1[i].info, "Dubious") != 0 ) {
								if( (strstr(genes1[j].info, "Verified") != 0) || (strstr(genes1[j].info, "Uncharacterized") != 0) ) {
//									if( genes1[j].strand == genes1[i].strand ) {
										genes1[i].type = REDUN;
//									}
								}
								else if( strstr(genes1[j].info, "Dubious") != 0 ) {
									if(width(tmp) < width(cur)) {
//										if( genes1[j].strand == genes1[i].strand ) {
											genes1[j].type = REDUN;
//										}
									}
									else if(width(tmp) >= width(cur)) {
//										if( genes1[j].strand == genes1[i].strand ) {
											genes1[i].type = REDUN;
//										}
									}
								}
							}
						}
					}
	
					j++;	
					if( j < num_genes1 ) {
		   	  	sid = genes1[j].cdsStart;
   		 			eid = genes1[j].cdsEnd;
     	 			if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       		 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
						}
      			else {
       		 		if( genes1[j].strand == '-' ) {
       		   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       		 		}
       		 		else {
       		   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       		 		}
      			}
//							tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
					}
				}
			}
		}	
	}
	else if( type == MAKER ) {
		for( i = 0; i < num_genes1; i++ ) {
 	  	sid = genes1[i].cdsStart;
   		eid = genes1[i].cdsEnd;
    	if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
      	cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
    	}
    	else {
      	if( genes1[i].strand == '-' ) {
        	cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
      	}
      	else {
        	fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd);
      	}
    	}
//			cur = assign_I(genes1[i].txStart, genes1[i].txEnd);
//			if( (width(cur) < MIN_ORF_BASES) && (strcmp(genes1[i].gname, "UNDEF") == 0)  )
			if( (genes1[i].type == REDUN) || (genes1[i].type == MATCH) || (genes1[i].type == PARTIAL) ) {
				genes1[i].type = REDUN;
			}
			else if( width(cur) < MIN_ORF_BASES ) 
			{
				genes1[i].type = REDUN;
			}
			else {
				j = i+1;
				if( j < num_genes1 ) {
		     	sid = genes1[j].cdsStart;
   		 		eid = genes1[j].cdsEnd;
     	 		if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       	 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
					}
      		else {
       	 		if( genes1[j].strand == '-' ) {
       	   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       	 		}
       	 		else {
       	   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       	 		}
      		}
//					tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
				}

				while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) {
					if( (genes1[j].type == REDUN) || (genes1[j].type == MATCH) || (genes1[j].type == PARTIAL) ) {
						genes1[j].type = REDUN;
					}
					else if( width(cur) < MIN_ORF_BASES ) 
					{
						genes1[j].type = REDUN;
					}
					else {
//						if( (width(intersect(cur, tmp)) >= MIN_BASES) && (genes1[i].strand == genes1[j].strand) ) {
						if( width(intersect(cur, tmp)) >= MIN_BASES ) {
							if(width(tmp) < width(cur)) {
								genes1[j].type = REDUN;
							}
							else if(width(tmp) >= width(cur)) {
								genes1[i].type = REDUN;
							}
						}
					}
					j++;
					if( j < num_genes1 ) {
		   	  	sid = genes1[j].cdsStart;
   		 			eid = genes1[j].cdsEnd;
     	 			if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       		 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
						}
      			else {
       		 		if( genes1[j].strand == '-' ) {
       		   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       		 		}
       		 		else {
       		   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       		 		}
      			}
//						tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
					}
				}
			}
		}
	}
	else if ( type == MULTI_CDS ) {
		for( i = 0; i < num_genes1; i++ ) {
			if( genes1[i].exonCount >= 2 ) {
			}
			else {
				genes1[i].type = REDUN;
			}
		}
	}
	else {
		fatalf("Unsupported type: %d\n", type);
	}

}
Пример #23
0
void adjust_algn_pos(struct DotList *algns, int num_algns, struct n_pair *contigs1, int num1, int *size1, struct n_pair *contigs2, int num2, int *size2, int mode)
{
  int *len_sum1, *len_sum2;
	int i = 0;
  int id1 = 0, id2 = 0;
  char name[LEN_NAME] = "", sp_name[LEN_NAME] = "", ctg_name[LEN_NAME] = "";
  int ctg_id = -1;

  if( num1 > 0 ) len_sum1 = (int *) ckalloc(sizeof(int) * num1);
  if( num2 > 0 ) len_sum2 = (int *) ckalloc(sizeof(int) * num2);

  if( mode == CTG_NOT_ASSIGNED_BUT_LEN ) {
		for( i = 0; i < num1; i++ ) len_sum1[i] = contigs1[i].len;
		for( i = 0; i < num2; i++ ) len_sum2[i] = contigs2[i].len;
	}
	else {
		cal_length_sum(len_sum1, contigs1, num1);
		cal_length_sum(len_sum2, contigs2, num2);
	}

  for( i = 0; i < num_algns; i++ ) {
    if( (mode == CTG_NOT_ASSIGNED) || (mode == CTG_NOT_ASSIGNED_BUT_LEN) ) {
      strcpy(name, algns[i].name1);
      if( algns[i].sp_id == SELF2 ) {
        concat_ctg_name(name, sp_name, ctg_name);
        ctg_id = is_ctg_in(sp_name, ctg_name, contigs2, num2);
      }
      else {
        concat_ctg_name(name, sp_name, ctg_name);
        ctg_id = is_ctg_in(sp_name, ctg_name, contigs1, num1);
      }

      if( ctg_id == -1 ) {
        fatalf("Contig %s not assigned in the list\n", ctg_name);
      }
      else {
        algns[i].ctg_id1 = ctg_id;
      }

      strcpy(name, algns[i].name2);
      if( algns[i].sp_id == SELF1 ) {
        concat_ctg_name(name, sp_name, ctg_name);
        ctg_id = is_ctg_in(sp_name, ctg_name, contigs1, num1);
      }
      else {
        concat_ctg_name(name, sp_name, ctg_name);
        ctg_id = is_ctg_in(sp_name, ctg_name, contigs2, num2);
      }

      if( ctg_id == -1 ) {
        fatalf("Contig %s not assigned in the list\n", ctg_name);
      }
      else {
        algns[i].ctg_id2 = ctg_id;
      }
    }

	  if( algns[i].sp_id == SELF1 ) {
      id1 = algns[i].ctg_id1;
      if( id1 >= num1 ) fatalf("%d: not valid, larger than %d\n", id1, num1);
      if( (id1 == -1) && (num1 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum1[id1], algns[i].x.upper + len_sum1[id1]);

      id2 = algns[i].ctg_id2;
      if( id2 >= num1 ) {
        fatalf("%d: not valid, larger than %d\n", id2, num1);
      }
      if( (id2 == -1) && (num1 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum1[id2], algns[i].y.upper + len_sum1[id2]);
    }
    else if( algns[i].sp_id == SELF2 ) {
      id1 = algns[i].ctg_id1;
      if( id1 >= num2 ) {
        fatalf("%d: not valid, larger than %d\n", id1, num2);
      }
      if( (id1 == -1) && (num2 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum2[id1], algns[i].x.upper + len_sum2[id1]);

      id2 = algns[i].ctg_id2;
      if( id2 >= num2 ) {
        fatalf("%d: not valid, larger than %d\n", id2, num2);
      }
      if( (id2 == -1) && (num2 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum2[id2], algns[i].y.upper + len_sum2[id2]);
    }
    else if( algns[i].sp_id == PAIR ) {
      id1 = algns[i].ctg_id1;
      if( id1 >= num1 ) {
        fatalf("%d: not valid, larger than %d\n", id1, num1);
      }
      if( (id1 == -1) && (num1 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum1[id1], algns[i].x.upper + len_sum1[id1]);

      id2 = algns[i].ctg_id2;
      if( id2 >= num2 ) {
        fatalf("%d: not valid, larger than %d\n", id2, num2);
      }
      if( (id2 == -1) && (num2 > 0) ) {
        fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2);
      }

      if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum2[id2], algns[i].y.upper + len_sum2[id2]);
    }
  }

  if( num1 > 0 ) {
    *size1 = len_sum1[num1-1] + contigs1[num1-1].len;
    free(len_sum1);
  }
  if( num2 > 0 ) {
    *size2 = len_sum2[num2-1] + contigs2[num2-1].len;
    free(len_sum2);
  }
}
Пример #24
0
int main(int argc, char **argv)
{
	FILE *f;
	int i = 0;
	int count = 0;
	int num_match_regions = 0;
	struct orf_I * match_regions;
	char scaf_name[MAX_NAME], cur_name[MAX_NAME];
	char buf[MAX_NAME];
	struct I reg;
	int b = 0, e = 0;

	reg = assign_I(0, 1);

	debug_mode = FALSE;
	if( argc == 4 ) {
		debug_mode = TRUE;
	}
	else if( argc != 3 ) {
		fatal("args: intervals1 intervals2\n");
	}

	strcpy(buf, "");
	strcpy(scaf_name, "");
	strcpy(cur_name, "");

	if( (f = fopen(argv[1], "r")) == NULL ) {
		fatalf("cannot find alignment in %s", argv[1]);    
	}
	else {
		while(fgets(buf, MAX_NAME, f)) count++;
	}

	if( count > 0 ) {
		match_regions = (struct orf_I *) ckalloc(count * (sizeof(struct orf_I)) );
		initialize_orf_I_list(match_regions, count);
		num_match_regions = input_orf_I_list(f, match_regions, count);
	}
	fclose(f);

	count = 0;
	if( (f = fopen(argv[2], "r")) == NULL ) {
		fatalf("cannot find alignment in %s", argv[2]);    
	}
	else {
		while(fgets(buf, MAX_NAME, f)) {
			if( buf[0] == '>' ) {
				printf("%s", buf);
			}
			else {
				if( sscanf(buf, "%s %d %d %s %*s", scaf_name, &b, &e, cur_name) != 4 ) {
					fatalf("wrong interval line: %s", buf);    
				}
				else {
					i = 0;
					reg = assign_I(b, e);
					while( i < num_match_regions ) {
						if( strcmp(cur_name, match_regions[i].strain_name) == 0 ) {
							if( strcmp(scaf_name, match_regions[i].name) == 0 ) {
								if( proper_overlap(reg, match_regions[i].region) == true ) {
									printf("%s %d %d %s\n", match_regions[i].name, match_regions[i].region.lower, match_regions[i].region.upper, match_regions[i].strain_name);
								}							
							}
						}	
						i++;
					}
				}
			}
		}
	}

	if( count > 0 ) {
		free(match_regions);
	}
	return EXIT_SUCCESS;
}
Пример #25
0
void convert_tandem_region(struct DotList *dots, int num, int id, int *t_list, int num_tandem) 
{
	int i;
	int cur_id, cmp_id;
	struct DotList t1, t2;
	int len_x, len_y;
	int cur_len = 0;
	int val_t1, val_t2;
	
	for( i = 0; i < num_tandem; i++ )
	{
		val_t1 = -1;
		val_t2 = -1;
		t1.x = assign_I(-1, 0);
		t2.x = assign_I(-1, 0);
		t1.y = assign_I(-1, 0);
		t2.y = assign_I(-1, 0);
		cmp_id = t_list[i];
		if( i == 0 ) cur_id = id;
		else cur_id = t_list[i-1];

		if( ( strict_almost_equal( dots[cmp_id].x, dots[cur_id].x ) == true ) || ( strict_almost_equal( dots[cmp_id].y, dots[cur_id].y) == true ) ) {}
		else if( ( strict_subset( dots[cmp_id].x, dots[cur_id].x ) == true ) && ( strict_subset( dots[cmp_id].y, dots[cur_id].y ) == true ) )
		{
			if( abs(dots[cur_id].x.upper - dots[cmp_id].x.upper) > abs(dots[cur_id].x.lower - dots[cmp_id].x.lower)	)
			{
				if( ( dots[cur_id].x.upper - dots[cmp_id].x.upper ) <= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cmp_id].x.upper, dots[cur_id].x.upper);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					if( cur_len < DEL_TH ) {
						t1.x = assign_I(-1, 0);
						t1.y = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 0 ) {
						if( dots[cur_id].y.upper > (dots[cur_id].y.upper - cur_len)) t1.y = assign_I(dots[cur_id].y.upper - cur_len, dots[cur_id].y.upper);
						else t1.x = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 1 ) {
						if( (dots[cur_id].y.lower + cur_len) > dots[cur_id].y.lower ) t1.y = assign_I(dots[cur_id].y.lower, dots[cur_id].y.lower + cur_len);
						else t1.x = assign_I(-1, 0);
					}
				}
			}
			else
			{
				if( ( dots[cur_id].x.lower - dots[cmp_id].x.lower ) >= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cur_id].x.lower, dots[cmp_id].x.lower);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					if( cur_len < DEL_TH ) {
						t1.x = assign_I(-1, 0);
						t1.y = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 0 ) {
						if( (dots[cur_id].y.lower + cur_len) > dots[cur_id].y.lower ) t1.y = assign_I(dots[cur_id].y.lower, dots[cur_id].y.lower + cur_len);
						else t1.x = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 1 ) {
						if( dots[cur_id].y.upper > dots[cur_id].y.upper ) t1.y = assign_I(dots[cur_id].y.upper - cur_len, dots[cur_id].y.upper);
						else t1.x = assign_I(-1, 0);
					}
				}
			}

			if( abs(dots[cmp_id].y.lower - dots[cur_id].y.lower) > abs(dots[cur_id].y.upper - dots[cmp_id].y.upper)	)
			{
				if( ( dots[cmp_id].y.lower - dots[cur_id].y.lower ) <= 0 ) t2.x = assign_I(-1, 0); 
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cur_id].y.lower, dots[cmp_id].y.lower);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					if( cur_len < DEL_TH ) {
						t2.x = assign_I(-1, 0);
						t2.y = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 0 ) {
						if( (dots[cur_id].x.lower + cur_len) > dots[cur_id].x.lower ) t2.x = assign_I(dots[cur_id].x.lower, dots[cur_id].x.lower + cur_len);
						else t2.x = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 1 ) {
						if( dots[cur_id].x.upper > (dots[cur_id].x.upper - cur_len) ) t2.x = assign_I(dots[cur_id].x.upper - cur_len, dots[cur_id].x.upper);
						else t2.x = assign_I(-1, 0);
					}
				}
			}
			else
			{
				if( ( dots[cur_id].y.upper - dots[cmp_id].y.upper ) <= 0 ) t2.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cmp_id].y.upper, dots[cur_id].y.upper);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					if( cur_len < DEL_TH ) {
						t2.x = assign_I(-1, 0);
						t2.y = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 0 ) {
						if( dots[cur_id].x.upper > (dots[cur_id].x.upper - cur_len) ) t2.x = assign_I(dots[cur_id].x.upper - cur_len, dots[cur_id].x.upper);
						else t2.x = assign_I(-1, 0);
					}
					else if( dots[cur_id].sign == 1 ) {
						if( (dots[cur_id].x.lower + cur_len) > dots[cur_id].x.lower ) t2.x = assign_I(dots[cur_id].x.lower, dots[cur_id].x.lower + cur_len);
						else t2.x = assign_I(-1, 0);
					}
				}
			}
		}

		if( (t1.x.lower >= 0) && (t1.y.lower >= 0)) val_t1 = check_tandem_reg( t1, dots, num );
		else val_t1 = -1;

		if( (t2.x.lower >= 0) && (t2.y.lower >= 0) ) val_t2 = check_tandem_reg( t2, dots, num );
		else val_t2 = -1;

		if( (val_t1 != -1) && (val_t2 != -1) ) 
		{
			if( val_t1 <= val_t2 )
			{
				dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
				dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
				dots[cur_id].rp1_id = 0;
			}
			else 
			{
				dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
				dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
				dots[cur_id].rp1_id = 0;
			}
		}
		else if( val_t1 != -1 )
		{
			dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
			dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
			dots[cur_id].rp1_id = 0;
		}
		else if( val_t2 != -1 )
		{
			dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
			dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
			dots[cur_id].rp1_id = 0;
		}
	}
}
Пример #26
0
bool check_whole_regions_inclusion(struct DotList *dots, int num_lines, int mid, int left_id, int right_id, bool is_x)			
{
	int i;
	bool res = false;
	struct I temp;

	if( is_x == true ) temp = assign_I(dots[mid].x.lower, dots[mid].x.upper);
	else temp = assign_I(dots[mid].y.lower, dots[mid].y.upper);

	for( i = 0; i < num_lines; i++ )
	{
		if( is_x == true )
		{
			if( strict_overlap(dots[left_id].y, dots[right_id].y, (M_TH/2)+1) == true )
			{
				if( (loose_subset(dots[mid].x, dots[i].x) == true) && (loose_subset(dots[left_id].x, dots[i].x) == true) && (loose_subset(dots[right_id].x, dots[i].x) == true))
				{
					res = true;	
				}
				else if( (loose_subset(dots[mid].x, dots[i].y) == true) && (loose_subset(dots[left_id].x, dots[i].y) == true) && (loose_subset(dots[right_id].x, dots[i].y) == true))
				{
					res = true;
				}
			}
			else if( strict_overlap(dots[left_id].x, dots[right_id].x, (M_TH/2)+1) == true )
			{
				if( (loose_subset(dots[mid].x, dots[i].x) == true) && (loose_subset(dots[left_id].y, dots[i].x) == true) && (loose_subset(dots[right_id].y, dots[i].x) == true))
				{
					res = true;	
				}
				else if( (loose_subset(dots[mid].x, dots[i].y) == true) && (loose_subset(dots[left_id].y, dots[i].y) == true) && (loose_subset(dots[right_id].y, dots[i].y) == true))
				{
					res = true;
				}
			}
			else
			{
			}
		}
		else
		{
			if( strict_overlap(dots[left_id].x, dots[right_id].x, (M_TH/2)+1) == true )
			{
				if( (loose_subset(dots[mid].y, dots[i].x) == true) && (loose_subset(dots[left_id].y, dots[i].x) == true) && (loose_subset(dots[right_id].y, dots[i].x) == true))
				{
					res = true;	
				}
				else if( (loose_subset(dots[mid].y, dots[i].y) == true) && (loose_subset(dots[left_id].y, dots[i].y) == true) && (loose_subset(dots[right_id].y, dots[i].y) == true))
				{
					res = true;
				}
			}
			else if( strict_overlap(dots[left_id].y, dots[right_id].y, (M_TH/2)+1) == true )
			{
				if( (loose_subset(dots[mid].y, dots[i].x) == true) && (loose_subset(dots[left_id].x, dots[i].x) == true) && (loose_subset(dots[right_id].x, dots[i].x) == true))
				{
					res = true;	
				}
				else if( (loose_subset(dots[mid].y, dots[i].y) == true) && (loose_subset(dots[left_id].x, dots[i].y) == true) && (loose_subset(dots[right_id].x, dots[i].y) == true))
				{
					res = true;
				}
			}
			else
			{
			}
		}
	}

	return(res);
}