コード例 #1
0
ファイル: pred_sp.c プロジェクト: gtsong/CHAP2
void predict_sp_op(int sp_code, int rm_sp, int left_sp, int *num_list, struct DotList *dots, int *cur_num, struct ops_list *ops)
{
	char op_ch;
	int r_st = -1, r_end = -1; // the range of a removed species
	struct I temp_reg;
	int len;
	int i = 0;

	check_gene_loss(num_list, dots, sp_code, rm_sp, left_sp, cur_num, ops);

	op_ch = 's';
	for( i = 0; i < (*num_list); i++ )
	{
		if( dots[i].sp_id == sp_code )
		{
			if( ( r_st == -1 ) && ( r_end == -1 ) )
			{
				r_st = dots[i].y.lower;
				r_end = dots[i].y.upper;
			}
			else 
			{
				if( dots[i].y.lower < r_st ) r_st = dots[i].y.lower;
				if( dots[i].y.upper > r_end ) r_end = dots[i].y.upper;
			}
		}
	}

	temp_reg = assign_I(r_st, r_end);
	len = r_end - r_st + 1;

	for( i = 0; i < (*num_list); i++ )
	{
		if( (proper_overlap(temp_reg, dots[i].x) == true) || (proper_overlap(temp_reg, dots[i].y) == true) )
		{
			dots[i].sign = 2;	
		}
		else 
		{
			if( dots[i].x.lower > r_st )
			{
				dots[i].x = assign_I(dots[i].x.lower - len, dots[i].x.upper - len);
				dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len);
			}
			else if( dots[i].y.lower > r_st )
			{
				dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len);
			}
		}
	}

	overwrite_dots(num_list, dots);
	ops[*cur_num].sign = op_ch;
	ops[*cur_num].src_b = r_st;
	ops[*cur_num].src_e = r_end;
	ops[*cur_num].dst_b = 0;
	ops[*cur_num].dst_e = 0;
	ops[*cur_num].sp_id = rm_sp;
}
コード例 #2
0
ファイル: deal_gaps.c プロジェクト: gtsong/CHAP2
struct gap_list redefine_for_del(struct DotList *dots, struct gap_list gps) // when the type of a gap is 2, 12, or 22
{
	struct I temp;
	int loc_id, comp_id;
	int len;
	int x;

	loc_id = gps.id1;
	comp_id = gps.id2;
	if( (gps.type == 12) || (gps.type == 22) )
	{
		if( proper_overlap(dots[loc_id].y, dots[comp_id].y) == true )
		{
			temp = intersect(dots[loc_id].y, dots[comp_id].y);
			gps.x1 = gps.y1;
			gps.x2 = gps.y2;
			gps.y1 = temp.lower;
			gps.y2 = temp.upper;
		}
	}
	else if( gps.type == 2 )
	{
		len = abs(gps.y2 - gps.y1);
		x = gps.y1;
		gps.y1 = gps.x1;
		gps.y2 = gps.x1 + len;
		gps.x1 = x;
		gps.x2 = x + len;
	}
	return(gps);
}
コード例 #3
0
ファイル: pred_regions.c プロジェクト: gtsong/CHAP2
int is_left_to_right_count_strict(int *num_x, int *num_y, int id, int num_list, struct DotList *org)
{
	int i = 0;
	int res = 0;

	*num_x = 0;
	*num_y = 0;
	while( i < num_list )
	{
		if( (i == id) || (org[i].sign == 2) ) i++;
		else
		{
			if( ((subset(org[i].x, org[id].x) == false) && (proper_overlap(org[id].x, org[i].x) == true)) || ((subset(org[i].y, org[id].x) == false) && (proper_overlap(org[id].x, org[i].y) == true))) 
			{
				(*num_x)++;
				if( (res == 2) || (res == 3) )
				{
					res = 3;
				}
				else
				{
					res = 1;
				}
			}

			if( ((subset(org[i].x, org[id].y) == false) && (overlap(org[id].y, org[i].x) == true)) || ((subset(org[i].y, org[id].y) == false) && (overlap(org[id].y, org[i].y) == true))) 
			{
				(*num_y)++;
				if( (res == 1) || (res == 3) )
				{
					res = 3;
				}
				else 
				{
					res = 2;
				}
			}
			i++;
		}
	}
	return(res);
}
コード例 #4
0
ファイル: find_dup_copy.c プロジェクト: gtsong/CHAP2
bool tandem_exist(struct DotList *dots, struct perm_pt *p_pts, struct kdnode *tree, int size, int id1, int id2)
{
	bool res = false;
	struct I reg1, reg2;
	int sid = 0, eid = 0;
	int i = 0;
	int cur_id = 0;

	reg1 = assign_I(0, 1);
	reg2 = assign_I(0, 1);
	
	if( (dots[id1].sign == dots[id2].sign) && (proper_overlap(dots[id1].x, dots[id2].x) == true ) && (proper_overlap(dots[id1].y, dots[id2].y) == true) ) {
		reg1 = intersect(dots[id1].x, dots[id2].x);			
		sid = find_id_len(tree, size, width(reg1), reg1.lower, reg1.lower, W_SID);
		eid = find_id_len(tree, size, width(reg1), reg1.upper, reg1.upper, W_FID);

		i = sid;
		while( (i <= eid) && (res == false) ) {
			cur_id = p_pts[i].id;
			if( is_tandem(dots[cur_id]) == true ) res = true;
			i++;
		}

		if( res == false ) {
			reg2 = intersect(dots[id1].y, dots[id2].y);
			sid = find_id_len(tree, size, width(reg2), reg2.lower, reg2.lower, W_SID);
			eid = find_id_len(tree, size, width(reg2), reg2.upper, reg2.upper, W_FID);

			i = sid;
			while( (i <= eid) && (res == false)) {
				cur_id = p_pts[i].id;
				if( is_tandem(dots[cur_id]) == true ) res = true;
				i++;
			}
		}	
	}

	return(res);
}
コード例 #5
0
ファイル: adjust_plot.c プロジェクト: gtsong/CHAP2
int find_opt_fr(struct DotList *dots, int id, struct perm_pt *st, int w_sid, int w_fid, int h_sid, int h_fid, struct r_list *rp1, int num_rp1, struct r_list *rp2, int num_rp2, int *rp1_id, int *rp2_id, FILE *fp)
{
	int i = 0;
	int min_score = 1000;
	int max_id = -1;
	bool *is_x;
	int *sd;
	int d = 0;
	struct gap_list gps;
	int temp_score = 0;
	int start, mid1 = -1, mid2 = -1, end;
	float *d_rate;
	float min_rate = 100;
	int len1 = 0, len2 = 0, len = 0, m_th = 0;
	int op_len = 0, op_len_x = 0, op_len_y = 0;
	int closeness = 0;
	struct I temp;
	int y_cur = 0, y_old = 0;
	int *id1, *id2;
	
	is_x = (bool *) ckalloc(sizeof(bool));
	sd = (int *) ckalloc(sizeof(int));
	d_rate = (float *) ckalloc(sizeof(float));
	id1 = (int *) ckalloc(sizeof(int));
	id2 = (int *) ckalloc(sizeof(int));
	*rp1_id = -1;
	*rp2_id = -1;
	*id1 = -1;
	*id2 = -1;

	gps.type = -1;
	gps.id1 = -1;
	gps.id2 = -1;
	gps.x1 = 0;
	gps.x2 = 1;
	gps.y1 = 0;
	gps.y2 = 1;
	strcpy(gps.name1, "");
	strcpy(gps.name2, "");

	if( w_sid < h_sid )
	{
		start = w_sid;
		if( w_fid < h_sid )
		{
			mid1 = w_fid;
			mid2 = h_sid;
			end = h_fid;
		}
		else 
		{
			if( w_fid < h_fid ) end = h_fid;
			else end = w_fid;
		}
	}
	else 
	{
		start = h_sid;
		if( h_fid < w_sid )
		{
			mid1 = h_fid;
			mid2 = w_sid;
			end = w_fid;
		}
		else
		{
			if( h_fid < w_fid ) end = w_fid;
			else end = h_fid;
		}
	}

	// m_x and m_y save the coordinated of the initial alignment before getting chained
	for( i = start; i <= end; i++ )
	{
		if( st[i].id == id ) {}	
		else if( (strcmp(dots[st[i].id].name1, dots[id].name1) != 0) || (strcmp(dots[st[i].id].name2, dots[id].name2) != 0) ) {}
		else if( dots[st[i].id].x.lower > dots[id].x.lower ) {}
		else if( dots[st[i].id].x.lower > dots[id].m_x.lower ) {}
		else if( (dots[st[i].id].sign == 0) && (dots[st[i].id].y.lower > dots[id].y.lower )) {}
		else if( (dots[st[i].id].sign == 1) && (dots[st[i].id].y.lower < dots[id].y.lower )) {}
		else if( subset(dots[st[i].id].m_x, dots[id].x) || subset(dots[st[i].id].m_y, dots[id].y) || subset(dots[id].m_x, dots[st[i].id].x) || subset(dots[id].m_y, dots[st[i].id].y) ) {}
		else if( (mid1 != -1) && (i > mid1) && (i < mid2)) {}
		else
		{
// is_x of 'distance' function is true if x region is larger 
			if((dots[st[i].id].sign != 2) && (dots[st[i].id].sign == dots[id].sign) && ((d = distance(dots, st[i].id, id, is_x, sd)) <= MDIS_THRESHOLD))
			{
				len1 = width(dots[st[i].id].x);
				len2 = width(dots[id].x);

				if( len1 > len2 ) len = len2;
				else len = len1;

				if( (len1 >= LG_TH) && (len2 >= LG_TH ) ) m_th = L_M_TH;
				else m_th = M_TH;

				if((*sd) <= len)
				{
					op_len = 0;
					op_len_x = 0;
					op_len_y = 0;

       		if( proper_overlap(dots[st[i].id].x, dots[id].x) == true )
          {
            op_len_x = width(intersect(dots[st[i].id].x, dots[id].x));
            op_len = op_len_x;
          }

          if( proper_overlap(dots[st[i].id].y, dots[id].y) == true )
          {
            op_len_y = width(intersect(dots[st[i].id].y, dots[id].y));
            if( op_len < op_len_y ) op_len = op_len_y;
					}

          if( ((*sd) > m_th) || (op_len > m_th) )
          {
            if( (strict_almost_equal(dots[st[i].id].x, dots[id].x) == true) || (strict_almost_equal(dots[st[i].id].y, dots[id].y) == true) )
            {
              gps.type = -1;
            }
						else if( ((closeness = compute_closeness(dots, st[i].id, id)) <= C_OP_TH) && ((*sd) > m_th) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true )) {
							gps = define_gap(dots, st[i].id, id, d, *sd, *is_x);
						}
						else if( ((*sd) > m_th) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true )) {
							gps.type = -1;
						}
						else if((proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true ))
						{
							temp = intersect(dots[st[i].id].x, dots[id].x);
							if( dots[id].sign == 0 ) {
								y_cur = dots[id].y.lower;
								y_old = find_yloc_one(dots[st[i].id], fp, temp.lower-dots[st[i].id].x.lower, NO_GAP_INC);
							}
							else if( dots[id].sign == 1 ) {
								y_cur = find_yloc_one(dots[st[i].id], fp, temp.lower-dots[st[i].id].x.lower, NO_GAP_INC);
								y_old = dots[id].y.upper;
							}

							if( y_old >= y_cur ) {
								gps = define_gap_new_type(dots, st[i].id, id, false);
								if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, true);
							}
							else if( y_old < y_cur ) {
								gps = define_gap_new_type(dots, st[i].id, id, true);
								if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, false);
							}
						}
            else if( proper_overlap(dots[st[i].id].x, dots[id].x) == true )
            {
              gps = define_gap_new_type(dots, st[i].id, id, true);
            }
            else if( proper_overlap(dots[st[i].id].y, dots[id].y) == true )
            {
              gps = define_gap_new_type(dots, st[i].id, id, false);
            }
						else gps.type = -1;
					}
					else if( ((closeness = compute_closeness(dots, st[i].id, id)) > C_OP_TH) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true )) 
					{
						closeness = compute_closeness(dots, st[i].id, id);
						temp = intersect(dots[st[i].id].x, dots[id].x);
						if( dots[id].sign == 0 ) {
							y_cur = dots[id].y.lower;
							y_old = find_yloc_one(dots[st[i].id], fp, temp.lower-dots[st[i].id].x.lower, NO_GAP_INC);
						}
						else if( dots[id].sign == 1 ) {
							y_cur = find_yloc_one(dots[st[i].id], fp, temp.lower-dots[st[i].id].x.lower, NO_GAP_INC);
							y_old = dots[id].y.upper;
						}
						if( y_old >= y_cur ) {
							gps = define_gap_new_type(dots, st[i].id, id, false);
							if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, true);
						}
						else if( y_old < y_cur ) {
							gps = define_gap_new_type(dots, st[i].id, id, true);
							if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, false);
						}
					}
					else if( (check_candi(dots, id, st[i].id, *is_x) == false) && ( (*sd) > TD_TH)) gps.type = -1;
			  	else
					{
            if( (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true ) ) {
							gps = define_gap(dots, st[i].id, id, d, *sd, *is_x);
						}
            else if( proper_overlap(dots[st[i].id].x, dots[id].x) == true )
            {
              gps = define_gap_new_type(dots, st[i].id, id, true);
            }
            else if( proper_overlap(dots[st[i].id].y, dots[id].y) == true )
            {

              gps = define_gap_new_type(dots, st[i].id, id, false);
            }
						else gps = define_gap(dots, st[i].id, id, d, *sd, *is_x);
					}

					if((gps.type == -1) || (gps.type == 3)) // this gap is meaningless
					{
					}
					else
					{
						gps.rp_id1 = -1;
						gps.rp_id2 = -1;
						if( abs(gps.y2 - gps.y1) < ERR_LG_TH ) {
							gps.type = 0;
						}
						temp_score = get_score(dots, gps, d_rate, rp1, num_rp1, rp2, num_rp2, id1, id2, fp);

						if( temp_score == -1 ) {
							if( (gps.type == 21) || (gps.type == 22) ) {
								if( gps.type == 21 ) {
									gps = define_gap_new_type(dots, st[i].id, id, false);
								}
								else if( gps.type == 22 ) {
									gps = define_gap_new_type(dots, st[i].id, id, true);
								}
					
								if((gps.type == -1) || (gps.type == 3)) {}// this gap is meaningless
								else temp_score = get_score(dots, gps, d_rate, rp1, num_rp1, rp2, num_rp2, id1, id2, fp);
							}
						}

					  if( temp_score != -1 )
						{
							if( min_score > temp_score )
							{
								min_score = temp_score;
								max_id = st[i].id;
								*rp1_id = *id1;
								*rp2_id = *id2;
							}
							else if( min_score == temp_score )
							{
								if( (*d_rate) <= min_rate ) 
								{
									min_rate = (*d_rate);
									min_score = temp_score;
									max_id = st[i].id;
									*rp1_id = *id1;
									*rp2_id = *id2;
								}
							}	
						}
					}
				}
			}
		}
	}

	free(id1);
	free(id2);
	free(d_rate);
	free(sd);
	free(is_x);
	if( max_id == -1 ) 
	{
		return(-1);
	}
	else 
	{
		return(max_id);
	}
}
コード例 #6
0
ファイル: filter_gff.c プロジェクト: cestmoi7/AGAPE
void filter_gff_lists(struct g_list *genes1, int num_genes1, struct exons_list *exons1, int num_exons1, int type)
{
	int i = 0, j = 0;
	struct I cur, tmp;
	int sid = 0, eid = 0;

	cur = assign_I(0, 1);
	tmp = assign_I(0, 1);

	if( type == SGD ) {
		for( i = 0; i < num_genes1; i++ ) {
    sid = genes1[i].cdsStart;
    eid = genes1[i].cdsEnd;
    if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
      cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
    }
    else {
      if( genes1[i].strand == '-' ) {
        cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
      }
      else {
        fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd);
      }
    }

//			cur = assign_I(genes1[i].txStart, genes1[i].txEnd);
//			if( (width(cur) < MIN_ORF_BASES) && (strstr(genes1[i].info, "Dubious") != 0) )
			if( (genes1[i].txStart <= 0)  || (genes1[i].txEnd <= 0) ) {
				genes1[i].type = REDUN;	
			}
			else if( width(cur) < MIN_ORF_BASES )
			{
				genes1[i].type = REDUN;
			}
			else if( genes1[i].type == REDUN ) {}
			else {
				j = i+1;
				if( j < num_genes1 ) {
		      sid = genes1[j].cdsStart;
   		 		eid = genes1[j].cdsEnd;
     	 		if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
        		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
					}
      		else {
        		if( genes1[j].strand == '-' ) {
          		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
        		}
        		else {
          		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
        		}
      		}

//					tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
				}

				while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) {
					if( width(tmp) < MIN_ORF_BASES )
					{
						genes1[j].type = REDUN;
					}
					else if( genes1[j].type == REDUN ) {}
					else {
						if( width(intersect(cur, tmp)) >= MIN_BASES ) {
							if( (strstr(genes1[i].info, "Verified") != 0) || (strstr(genes1[i].info, "Uncharacterized") != 0) ) {
								if(strstr(genes1[j].info, "Dubious") != 0 ) {
//									if( genes1[j].strand == genes1[i].strand ) {
										genes1[j].type = REDUN;
//									}
								}
							}
							else if( strstr(genes1[i].info, "Dubious") != 0 ) {
								if( (strstr(genes1[j].info, "Verified") != 0) || (strstr(genes1[j].info, "Uncharacterized") != 0) ) {
//									if( genes1[j].strand == genes1[i].strand ) {
										genes1[i].type = REDUN;
//									}
								}
								else if( strstr(genes1[j].info, "Dubious") != 0 ) {
									if(width(tmp) < width(cur)) {
//										if( genes1[j].strand == genes1[i].strand ) {
											genes1[j].type = REDUN;
//										}
									}
									else if(width(tmp) >= width(cur)) {
//										if( genes1[j].strand == genes1[i].strand ) {
											genes1[i].type = REDUN;
//										}
									}
								}
							}
						}
					}
	
					j++;	
					if( j < num_genes1 ) {
		   	  	sid = genes1[j].cdsStart;
   		 			eid = genes1[j].cdsEnd;
     	 			if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       		 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
						}
      			else {
       		 		if( genes1[j].strand == '-' ) {
       		   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       		 		}
       		 		else {
       		   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       		 		}
      			}
//							tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
					}
				}
			}
		}	
	}
	else if( type == MAKER ) {
		for( i = 0; i < num_genes1; i++ ) {
 	  	sid = genes1[i].cdsStart;
   		eid = genes1[i].cdsEnd;
    	if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
      	cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
    	}
    	else {
      	if( genes1[i].strand == '-' ) {
        	cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
      	}
      	else {
        	fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd);
      	}
    	}
//			cur = assign_I(genes1[i].txStart, genes1[i].txEnd);
//			if( (width(cur) < MIN_ORF_BASES) && (strcmp(genes1[i].gname, "UNDEF") == 0)  )
			if( (genes1[i].type == REDUN) || (genes1[i].type == MATCH) || (genes1[i].type == PARTIAL) ) {
				genes1[i].type = REDUN;
			}
			else if( width(cur) < MIN_ORF_BASES ) 
			{
				genes1[i].type = REDUN;
			}
			else {
				j = i+1;
				if( j < num_genes1 ) {
		     	sid = genes1[j].cdsStart;
   		 		eid = genes1[j].cdsEnd;
     	 		if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       	 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
					}
      		else {
       	 		if( genes1[j].strand == '-' ) {
       	   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       	 		}
       	 		else {
       	   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       	 		}
      		}
//					tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
				}

				while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) {
					if( (genes1[j].type == REDUN) || (genes1[j].type == MATCH) || (genes1[j].type == PARTIAL) ) {
						genes1[j].type = REDUN;
					}
					else if( width(cur) < MIN_ORF_BASES ) 
					{
						genes1[j].type = REDUN;
					}
					else {
//						if( (width(intersect(cur, tmp)) >= MIN_BASES) && (genes1[i].strand == genes1[j].strand) ) {
						if( width(intersect(cur, tmp)) >= MIN_BASES ) {
							if(width(tmp) < width(cur)) {
								genes1[j].type = REDUN;
							}
							else if(width(tmp) >= width(cur)) {
								genes1[i].type = REDUN;
							}
						}
					}
					j++;
					if( j < num_genes1 ) {
		   	  	sid = genes1[j].cdsStart;
   		 			eid = genes1[j].cdsEnd;
     	 			if( exons1[sid].reg.lower < exons1[eid].reg.upper ) {
       		 		tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper);
						}
      			else {
       		 		if( genes1[j].strand == '-' ) {
       		   		tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper);
       		 		}
       		 		else {
       		   		fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd);
       		 		}
      			}
//						tmp = assign_I(genes1[j].txStart, genes1[j].txEnd);
					}
				}
			}
		}
	}
	else if ( type == MULTI_CDS ) {
		for( i = 0; i < num_genes1; i++ ) {
			if( genes1[i].exonCount >= 2 ) {
			}
			else {
				genes1[i].type = REDUN;
			}
		}
	}
	else {
		fatalf("Unsupported type: %d\n", type);
	}

}
コード例 #7
0
ファイル: deal_gaps.c プロジェクト: gtsong/CHAP2
/* when two alignments have an overlapped region */
struct gap_list define_gap_new_type(struct DotList *dots, int loc_id, int comp_id, bool is_x)
{
	struct gap_list gp;
	struct I temp;
	int len_x, len_y;

	gp.id1 = loc_id;
	gp.id2 = comp_id;

	gp.type = -1;
	gp.x1 = 0;
	gp.x2 = 1;
	gp.y1 = 0;
	gp.y2 = 1;
	gp.offset = 0;
	if( is_x == true ) // the overlap of x region is larger than y's
	{
		if( proper_overlap(dots[loc_id].x, dots[comp_id].x) == true )
		{
			temp = intersect(dots[loc_id].x, dots[comp_id].x);
			gp.type = 21; // the gap is in y side
			
			if( dots[loc_id].y.lower <= dots[comp_id].y.lower )
			{
				gp.y1 = dots[loc_id].y.upper;
				gp.y2 = dots[comp_id].y.lower + width(temp);
				len_x = width(dots[comp_id].x);
				len_y = width(dots[comp_id].y);
				gp.offset = len_y - len_x;

				if( dots[loc_id].sign == 0 )
				{
					gp.x1 = dots[loc_id].x.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[loc_id].sign == 1 )
				{
					gp.x1 = dots[loc_id].x.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
			else
			{
				gp.y1 = dots[comp_id].y.upper;
				gp.y2 = dots[loc_id].y.lower + width(temp);
				len_x = width(dots[loc_id].x);
				len_y = width(dots[loc_id].y);
				gp.offset = len_y - len_x;

				if( dots[comp_id].sign == 0 )
				{
					gp.x1 = dots[comp_id].x.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[comp_id].sign == 1 )
				{
					gp.x1 = dots[comp_id].x.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
		}
		else
		{
			gp.type = -1;
		}
	}
	else 
	{
		if( proper_overlap(dots[loc_id].y, dots[comp_id].y) == true )
		{
			temp = intersect(dots[loc_id].y, dots[comp_id].y);
			gp.type = 22; // the gap is in x side

			if( dots[loc_id].x.lower <= dots[comp_id].x.lower )
			{
				gp.y1 = dots[loc_id].x.upper;
				gp.y2 = dots[comp_id].x.lower + width(temp);
				len_x = width(dots[comp_id].x);
				len_y = width(dots[comp_id].y);
				gp.offset = len_x - len_y;

				if( dots[loc_id].sign == 0 )
				{
					gp.x1 = dots[loc_id].y.upper;
					gp.x2 = gp.x1 + 1;
				}
				else if( dots[loc_id].sign == 1 )
				{
					gp.x1 = dots[loc_id].y.lower;
					gp.x2 = gp.x1 + 1;
				}
				else gp.type = -1;
			}
			else 
			{
				gp.y1 = dots[comp_id].x.upper;
				gp.y2 = dots[loc_id].x.lower + width(temp);
				len_x = width(dots[loc_id].x);
				len_y = width(dots[loc_id].y);
				gp.offset = len_x - len_y;
			}

		}
		else
		{
			gp.type = -1;
		}
	}

	if( (gp.type != -1) && (gp.y2 <= gp.y1) ) {
		gp.type = -1;
	}

	if( gp.type != -1 ) {
		temp = assign_I(gp.y1, gp.y2);
		if( ( strict_almost_equal(temp, dots[comp_id].x) == true ) || ( strict_almost_equal(temp, dots[comp_id].y) == true ) || ( strict_almost_equal(temp, dots[loc_id].x) == true ) || (strict_almost_equal(temp, dots[loc_id].y) == true ))
		{
			gp.type = -1;
		}
	}	

	return(gp);
}
コード例 #8
0
ファイル: pred_ops.c プロジェクト: gtsong/CHAP2
void pred_dup(int con, char op_ch, int pred_op, bool is_x_to_y, int id, int *num_list, struct DotList *dots, int num_ops, struct ops_list *ops)
{
	int wide = 0;
	struct I from = {0, 1}, to = {0, 1};
	int flag = DEL;
	int i = 0;
	int sp_id = dots[id].sp_id;

	sp_id = dots[id].sp_id;

	if((dots[id].l_id == -1) && (proper_overlap(dots[id].x, dots[id].y) == true) && (width(intersect(dots[id].x, dots[id].y)) <= THRESHOLD))
	{
		dots[id].y = assign_I(dots[id].x.upper, dots[id].x.upper + width(dots[id].x));
	}

	for( i = 0; i < *num_list; i++ )
	{
		if( i != id )
		{
			if( (dots[i].l_id != -1) && (dots[i].sign != 2) )
			{
				dots[i].x = assign_I(dots[i].m_x.lower, dots[i].m_x.upper);
				dots[i].y = assign_I(dots[i].m_y.lower, dots[i].m_y.upper);
				dots[dots[i].l_id].sign = dots[i].sign;
				dots[i].l_id = -1;
				dots[i].identity = dots[i].m_pid;
				dots[i].m_x = assign_I(0,1);
				dots[i].m_y = assign_I(0,1);
			}
		}
	}

	if( dots[id].l_id != -1 )
	{
		from = assign_I(dots[id].x.lower, dots[id].x.upper);
		to = assign_I(dots[id].y.lower, dots[id].y.upper);
		dots[id].sign = 2;
		flag = NONE;	
	}
	else if( is_x_to_y )
	{
		from = assign_I(dots[id].x.lower, dots[id].x.upper);
		to = assign_I(dots[id].y.lower, dots[id].y.upper);
	}
	else
	{
		from = assign_I(dots[id].y.lower, dots[id].y.upper);
		to = assign_I(dots[id].x.lower, dots[id].x.upper);
	}

	if( pred_op == 0 ) 
	{
		wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 2)
	{	
		wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 3)
	{
		wide = rollback_step_dup_overlap(is_x_to_y, id, num_list, dots);
	}
	else if(pred_op == 4)
	{
		wide = rollback_step_conversion(is_x_to_y, id, num_list, dots);
		if( con > 0 ) wide = con;
	}
	else wide = 0;

	generate_ops(op_ch, wide, is_x_to_y, from, to, flag, num_ops, ops, sp_id);

/*
	if( is_x_to_y ) {
		ops[num_ops].ctg_id1 = dots[id].ctg_id1;
		ops[num_ops].ctg_id2 = dots[id].ctg_id2;
	}
	else {
		ops[num_ops].ctg_id2 = dots[id].ctg_id1;
		ops[num_ops].ctg_id1 = dots[id].ctg_id2;
	}
*/
	ops[num_ops].id = dots[id].index;
}
コード例 #9
0
ファイル: find_dup_copy.c プロジェクト: gtsong/CHAP2
int find_opt_du_copy(struct DotList *dots, int num_lines, int id, struct perm_pt *st, struct kdnode *tree, int size, int w_sid, int w_fid, int h_sid, int h_fid, int *cid, bool *x_ins, bool *f_is_x, int *t_ins, FILE *fp, struct DotList *init_dots)
{
	int i;
	int min_score = 1000;
	int max_id = -1;
	bool *is_x;
	int *sd;
	int d;
	struct gap_list gps;
	int temp_score;
	int y_cur, y_old;
	struct I temp;
	int closeness;
	int start, mid1 = -1, mid2 = -1, end;
	int len1, len2, len;
	int opt_cid;
	int op_len = 0, op_len_x, op_len_y;
	int m_th;
	int from = 0, to = 1;

	is_x = (bool *) ckalloc(sizeof(bool));
	sd = (int *) ckalloc(sizeof(int));

	if( w_sid < h_sid )
	{
		start = w_sid;
		if( w_fid < h_sid )
		{
			mid1 = w_fid;
			mid2 = h_sid;
			end = h_fid;
		}
		else 
		{
			if( w_fid < h_fid ) end = h_fid;
			else end = w_fid;
		}
	}
	else 
	{
		start = h_sid;
		if( h_fid < w_sid )
		{
			mid1 = h_fid;
			mid2 = w_sid;
			end = w_fid;
		}
		else
		{
			if( h_fid < w_fid ) end = w_fid;
			else end = h_fid;
		}
	}

	for( i = start; i <= end; i++ )
	{
		if( (mid1 != -1) && (i > mid1) && (i < mid2)) {}
		else
		{
			if( st[i].id == id ) {}
			else if( dots[st[i].id].sign == 2) {}
			else if( dots[st[i].id].x.lower > dots[id].x.lower ) {}
			else if( dots[st[i].id].ctg_id1 != dots[id].ctg_id1 ) {}
			else if( dots[st[i].id].ctg_id2 != dots[id].ctg_id2 ) {}
			else if( (dots[st[i].id].sign == 0) && (dots[st[i].id].y.lower > dots[id].y.lower )) {}
			else if( (dots[st[i].id].sign == 1) && (dots[st[i].id].y.lower < dots[id].y.lower )) {}
			else if( subset(dots[st[i].id].m_x, dots[id].x) || subset(dots[st[i].id].m_y, dots[id].y) || subset(dots[id].m_x, dots[st[i].id].x) || subset(dots[id].m_y, dots[st[i].id].y) ) {}
			else if( ((dots[st[i].id].pair_self == SELF) && (is_tandem(dots[st[i].id]) == true)) && ((dots[id].pair_self == SELF) && (is_tandem(dots[id]) == true))) {}
			else 
			{
				if((dots[st[i].id].sign != 2) && (dots[st[i].id].sign == dots[id].sign) && (dots[st[i].id].sp_id == dots[id].sp_id) && ((d = distance(dots, st[i].id, id, is_x, sd)) <= MDIS_THRESHOLD))
				{
					len1 = width(dots[st[i].id].x);
					len2 = width(dots[id].x);

					if( len1 > len2 ) len = len2;
					else len = len1;

					if( (len1 >= LG_TH) && (len2 >= LG_TH)) m_th = L_M_TH;
					else m_th = M_TH;

					if((*sd) <= len) 
					{
						op_len = 0;
						op_len_x = 0;
						op_len_y = 0;

						if( proper_overlap(dots[st[i].id].x, dots[id].x) == true )
						{
							op_len_x = width(intersect(dots[st[i].id].x, dots[id].x));
							op_len = op_len_x;
						}

						if( proper_overlap(dots[st[i].id].y, dots[id].y) == true )
						{
							op_len_y = width(intersect(dots[st[i].id].y, dots[id].y));
							if( op_len < op_len_y )
							{
								op_len = op_len_y;
							}
						}

						if( ((*sd) > m_th) || (op_len > m_th) )
						{
							if( (strict_almost_equal(dots[st[i].id].x, dots[id].x) == true) || (strict_almost_equal(dots[st[i].id].y, dots[id].y) == true ) ) gps.type = -1;
//            	else if( ((*sd) > m_th) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true )) {
            	else if( (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true ) && (tandem_exist(dots, st, tree, size, st[i].id, id) == false)) { 
              	gps.type = -1;            
							}
							else if( (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true) ) 
							{
          	    temp = intersect(dots[st[i].id].x, dots[id].x);
           	  	if( dots[id].sign == 0 ) {
									y_cur = init_dots[dots[id].index].y.lower + init_dots[dots[id].index].yl_diff;
             	  	y_old = find_yloc_one_ch(init_dots, dots[st[i].id], fp, width(temp), NO_GAP_INC);
									if( y_old == -1 ) {
										y_cur = dots[id].y.lower;
										y_old = dots[st[i].id].y.upper - width(temp);
									}
              	}
              	else if( dots[id].sign == 1 ) {
									y_old = init_dots[dots[id].index].y.upper - init_dots[dots[id].index].yr_diff;
                	y_cur = find_yloc_one_ch(init_dots, dots[st[i].id], fp, width(temp), NO_GAP_INC);
									if( y_cur == -1 ) {
										y_old = dots[id].y.upper;
										y_cur = dots[st[i].id].y.lower + width(temp);
									}
              	}

              	if( y_old >= y_cur ) {
									gps = define_gap_new_type(dots, st[i].id, id, false);
									if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, true);
								}
              	else {
									gps = define_gap_new_type(dots, st[i].id, id, true);
									if( gps.type == -1 ) gps = define_gap_new_type(dots, st[i].id, id, false);
								}				
							}
							else if( proper_overlap(dots[st[i].id].x, dots[id].x) == true ) 
							{
								gps = define_gap_new_type(dots, st[i].id, id, true);
							}
							else if( proper_overlap(dots[st[i].id].y, dots[id].y) == true ) 
							{
								gps = define_gap_new_type(dots, st[i].id, id, false);
							}
							else gps.type = -1;
						}
						else if( ((closeness = compute_closeness(dots, st[i].id, id)) > C_OP_TH) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true ))          
						{            
							if( ((subset(dots[st[i].id].x, dots[id].x) == true) || (subset(dots[id].x, dots[st[i].id].x) == true)) || ((subset(dots[st[i].id].y, dots[id].y) == true) || (subset(dots[id].y, dots[st[i].id].y) == true)) ) {
								gps.type = -1;
							}
							else {
								temp = intersect(dots[id].x, dots[st[i].id].x);
								if( dots[id].sign == 0 ) {              
									y_cur = init_dots[dots[id].index].y.lower + init_dots[dots[id].index].yl_diff;
									y_old = find_yloc_one_ch(init_dots, dots[st[i].id], fp, width(temp), NO_GAP_INC);            

									if( y_old == -1 ) {
										y_cur = dots[id].y.lower;
										y_old = dots[st[i].id].y.upper - width(temp);
									}
								}            
								else if( dots[id].sign == 1 ) {              
									y_cur = find_yloc_one_ch(init_dots, dots[st[i].id], fp, width(temp), NO_GAP_INC);              
									y_old = init_dots[dots[id].index].y.upper - init_dots[dots[id].index].yr_diff;
									if( y_cur == -1 ) {
										y_old = dots[id].y.upper;
										y_cur = dots[st[i].id].y.lower + width(temp);
									}
								}            
								else gps.type = -1;

								if( (dots[id].sign == 0) || (dots[id].sign == 1) ) {
									if( y_old >= y_cur ) gps = define_gap_new_type(dots, st[i].id, id, false);            
									else gps = define_gap_new_type(dots, st[i].id, id, true);          
 							
									if( debug_mode == TRUE ) printf("Gap: %d-%d, %d-%d\n", dots[st[i].id].x.lower, dots[st[i].id].x.upper, dots[st[i].id].y.lower, dots[st[i].id].y.upper);
								}
							}
						}          
						else if( (check_candi(dots, id, st[i].id, CHECK_INS_DUP) == false) && ( (*sd) > TD_TH)) gps.type = -1;          
						else          
						{            
							if( (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true ) ) 
							{              
								gps.type = -1;	
							}            
							else if( proper_overlap(dots[st[i].id].x, dots[id].x) == true )  
							{              
								gps = define_gap_new_type(dots, st[i].id, id, true);            
							}            
							else if( proper_overlap(dots[st[i].id].y, dots[id].y) == true )            
							{              
								gps = define_gap_new_type(dots, st[i].id, id, false);            
							}
							else gps = define_gap(dots, st[i].id, id, d, *sd, *is_x);
						}
					}
					else gps.type = -1;

					if((gps.type == -1) || (gps.type == 3) || (gps.type == 0))
					{
					}
					else
					{
						gps.gid = 0;

						temp_score = get_score_copy(dots, num_lines, gps, cid, x_ins);
						if( temp_score != -1 )
						{
							if( check_whole_regions_inclusion(dots, num_lines, *cid, st[i].id, id, *x_ins) == true )
							{
								temp_score = -1;	
							}			
						}

						if( (temp_score == -1) && (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true) && ((gps.type == 21) || (gps.type == 22) ) ) {
							from = gps.y1;
							to = gps.y2;
							gps.y1 = from - abs(to - from);
							gps.y2 = from;
							temp_score = get_score_copy(dots, num_lines, gps, cid, x_ins);
							if( temp_score != -1 )
							{
								if( check_whole_regions_inclusion(dots, num_lines, *cid, st[i].id, id, *x_ins) == true )
								{
									temp_score = -1;	
								}			
							}

							if( temp_score == -1 ) {
								gps.y1 = from - abs(to-from)/2;
								gps.y2 = from + abs(to-from)/2;
								temp_score = get_score_copy(dots, num_lines, gps, cid, x_ins);
								if( temp_score != -1 )
								{
									if( check_whole_regions_inclusion(dots, num_lines, *cid, st[i].id, id, *x_ins) == true )
									{
										temp_score = -1;	
									}			
								}
							}
						}

						if( temp_score == -1 ) {
							if( (proper_overlap(dots[st[i].id].x, dots[id].x) == true) && (proper_overlap(dots[st[i].id].y, dots[id].y) == true) ) {
								if( (gps.type == 21) || (gps.type == 22) ){
									if( gps.type == 21 ) {
										gps = define_gap_new_type(dots, st[i].id, id, false);
									}
									else if( gps.type == 22 ) {
										gps = define_gap_new_type(dots, st[i].id, id, true);
									}

									if((gps.type == -1) || (gps.type == 3) || (gps.type == 0)) {}
									else {
										gps.gid = 0;
										temp_score = get_score_copy(dots, num_lines, gps, cid, x_ins);
										if( temp_score != -1 )
										{
											if( check_whole_regions_inclusion(dots, num_lines, *cid, st[i].id, id, *x_ins) == true )
											{
												temp_score = -1;	
											}			
										}
									}
								}
							}
						}

						if( temp_score != -1 )
						{
							if( min_score > temp_score )
							{
								min_score = temp_score;
								max_id = st[i].id;
								opt_cid = *cid;
								
								if( gps.type == 1 )
								{
									*f_is_x = false;
								}
								else if( gps.type == 2 )
								{
									*f_is_x = true;
								}

								if( (gps.type == 11) || (gps.type == 21) )
								{
									*f_is_x = false;
									*t_ins = gps.type;
								}
								else if( (gps.type == 12) || (gps.type == 22) )
								{
									*f_is_x = true;
									*t_ins = gps.type;
								}
								else
								{
									*t_ins = -1;
								}
							}
						}
					}
				}
			}
		}
	}

	free(sd);
	free(is_x);

	if( max_id == -1 ) return(-1);
	else 
	{
		*cid = opt_cid;
		return(max_id);
	}
}
コード例 #10
0
ファイル: common_orf_intervals.c プロジェクト: cestmoi7/AGAPE
int main(int argc, char **argv)
{
	FILE *f;
	int i = 0;
	int count = 0;
	int num_match_regions = 0;
	struct orf_I * match_regions;
	char scaf_name[MAX_NAME], cur_name[MAX_NAME];
	char buf[MAX_NAME];
	struct I reg;
	int b = 0, e = 0;

	reg = assign_I(0, 1);

	debug_mode = FALSE;
	if( argc == 4 ) {
		debug_mode = TRUE;
	}
	else if( argc != 3 ) {
		fatal("args: intervals1 intervals2\n");
	}

	strcpy(buf, "");
	strcpy(scaf_name, "");
	strcpy(cur_name, "");

	if( (f = fopen(argv[1], "r")) == NULL ) {
		fatalf("cannot find alignment in %s", argv[1]);    
	}
	else {
		while(fgets(buf, MAX_NAME, f)) count++;
	}

	if( count > 0 ) {
		match_regions = (struct orf_I *) ckalloc(count * (sizeof(struct orf_I)) );
		initialize_orf_I_list(match_regions, count);
		num_match_regions = input_orf_I_list(f, match_regions, count);
	}
	fclose(f);

	count = 0;
	if( (f = fopen(argv[2], "r")) == NULL ) {
		fatalf("cannot find alignment in %s", argv[2]);    
	}
	else {
		while(fgets(buf, MAX_NAME, f)) {
			if( buf[0] == '>' ) {
				printf("%s", buf);
			}
			else {
				if( sscanf(buf, "%s %d %d %s %*s", scaf_name, &b, &e, cur_name) != 4 ) {
					fatalf("wrong interval line: %s", buf);    
				}
				else {
					i = 0;
					reg = assign_I(b, e);
					while( i < num_match_regions ) {
						if( strcmp(cur_name, match_regions[i].strain_name) == 0 ) {
							if( strcmp(scaf_name, match_regions[i].name) == 0 ) {
								if( proper_overlap(reg, match_regions[i].region) == true ) {
									printf("%s %d %d %s\n", match_regions[i].name, match_regions[i].region.lower, match_regions[i].region.upper, match_regions[i].strain_name);
								}							
							}
						}	
						i++;
					}
				}
			}
		}
	}

	if( count > 0 ) {
		free(match_regions);
	}
	return EXIT_SUCCESS;
}
コード例 #11
0
ファイル: pred_regions.c プロジェクト: gtsong/CHAP2
int is_left_to_right_again(int id, int num_list, struct DotList *dots)
{
	int i = 0;
	bool is_conv = false;
	bool is_overlap_x = false;
	bool is_overlap_y = false;
	bool is_subset_x = false;
	bool is_subset_y = false;

	while( (i < num_list) && (is_conv == false) )
	{
		if( (i == id) || (dots[i].sign == 2) ) i++;
		else
		{
			if((proper_overlap(dots[id].x, dots[i].x) && (!subset(dots[id].x, dots[i].x))) || (proper_overlap(dots[id].x, dots[i].y) && (!subset(dots[id].x, dots[i].y))))
			{
				if(is_overlap_x == false) is_overlap_x = true;
				if(is_subset_y || is_overlap_y) 
				{
					is_conv = true;
				}
			}

			if(proper_subset(dots[id].x, dots[i].x) || proper_subset(dots[id].x, dots[i].y))
			{
				if(is_subset_x == false) is_subset_x = true;
				if(is_overlap_y == true) 
				{
					is_conv = true;
				}
			}

			if((proper_overlap(dots[id].y, dots[i].x) && (!subset(dots[id].y, dots[i].x))) || (proper_overlap(dots[id].y, dots[i].y) && (!subset(dots[id].y, dots[i].y))))
			{
				if(is_overlap_y == false) is_overlap_y = true;
				if(is_subset_x || is_overlap_x)
				{
					is_conv = true;
				}
			}

			if(proper_subset(dots[id].y, dots[i].x) || proper_subset(dots[id].y, dots[i].y))
			{
				if(is_subset_y == false) is_subset_y = true;
				if(is_overlap_x == true) 
				{
					is_conv = true;
				}
			}
			i++;
		}
	}

	i = 0;
	while( i < num_list )
	{
		if( i == id ) i++;
		else
		{
			if(equal(dots[id].y, dots[i].x) || equal(dots[id].y, dots[i].y))
			{
				if(is_conv == true) return(CON_X_TO_Y);
				else return(DUP_X_TO_Y);
			}
			else if(equal(dots[id].x, dots[i].x) || equal(dots[id].x, dots[i].y) )
			{
				if(is_conv == true) return(CON_Y_TO_X);
				else return(DUP_Y_TO_X);
			}
			i++;
		}
	}

	i = 0;
	while( i < num_list )
	{
		if( i == id ) i++;
		else
		{
			if(subset(dots[id].x, dots[i].x) || subset(dots[id].x, dots[i].y) )
			{
				if(is_conv == true) return(CON_X_TO_Y);
				else return(DUP_X_TO_Y);	
			}
			else if(subset(dots[id].y, dots[i].x) || subset(dots[id].y, dots[i].y))
			{
				if(is_conv == true) return(CON_X_TO_Y);
				else return(DUP_Y_TO_X);
			}
			i++;
		}
	}
	
	if(is_conv == true) return(CON_X_TO_Y);
	else return(DUP_X_TO_Y);
}
コード例 #12
0
ファイル: handle_tandem_dup.c プロジェクト: gtsong/CHAP2
void conv_td_reg(struct DotList *dots, int num, int id, int *t_list, int num_tandem, struct DotList *init_dots, int flag, int *val1, int *val2, int *val_org)
{
	int i;
	int cur_id, cmp_id;
	struct DotList t1, t2;
	struct DotList *cur_t;
	int len_x, len_y;
	int cur_len = 0;
	int val_t1, val_t2, val_org_reg;
	int init_id;

	cur_t = (struct DotList *) ckalloc(sizeof(struct DotList));
	
	for( i = 0; i < num_tandem; i++ )
	{

		if( flag == FIRST_RUN ) {
			val_org_reg = -1;
			val_t1 = -1;
			val_t2 = -1;
		}
		else {
			val_org_reg = val_org[i];
			val_t1 = val1[i];
			val_t2 = val2[i];
		}

		t1.x = assign_I(-1, 0);
		t2.x = assign_I(-1, 0);
		t1.y = assign_I(-1, 0);
		t2.y = assign_I(-1, 0);
		cmp_id = t_list[i];
		if( i == 0 ) cur_id = id;
		else cur_id = t_list[i-1];

		if( dots[cmp_id].ctg_id1 != dots[cur_id].ctg_id1 ) {
			fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name1, dots[cur_id].name1);
		}
		
		if( dots[cmp_id].ctg_id2 != dots[cur_id].ctg_id2 ) {
			fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name2, dots[cur_id].name2);
		}

		if( ( strict_almost_equal( dots[cmp_id].x, dots[cur_id].x ) == true ) || ( strict_almost_equal( dots[cmp_id].y, dots[cur_id].y) == true ) ) {}
		else if( ( strict_subset( dots[cmp_id].x, dots[cur_id].x ) == true ) && ( strict_subset( dots[cmp_id].y, dots[cur_id].y ) == true ) )
		{
			if( abs(dots[cur_id].x.upper - dots[cmp_id].x.upper) > abs(dots[cur_id].x.lower - dots[cmp_id].x.lower)	)
			{
				if( ( dots[cur_id].x.upper - dots[cmp_id].x.upper ) <= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cmp_id].x.upper, dots[cur_id].x.upper);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					t1.y = assign_I(dots[cur_id].x.upper, dots[cur_id].x.upper + cur_len);
				}
			}
			else
			{
				if( ( dots[cur_id].x.lower - dots[cmp_id].x.lower ) >= 0 ) t1.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t1.x = assign_I(dots[cur_id].x.lower, dots[cmp_id].x.lower);
					cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
					t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + cur_len);
				}
			}

			if( abs(dots[cmp_id].y.lower - dots[cur_id].y.lower) > abs(dots[cur_id].y.upper - dots[cmp_id].y.upper)	)
			{
				if( ( dots[cmp_id].y.lower - dots[cur_id].y.lower ) <= 0 ) t2.x = assign_I(-1, 0); 
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cur_id].y.lower, dots[cmp_id].y.lower);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					t2.x = assign_I(dots[cur_id].y.lower - cur_len, dots[cur_id].y.lower);
				}
			}
			else
			{
				if( ( dots[cur_id].y.upper - dots[cmp_id].y.upper ) <= 0 ) t2.x = assign_I(-1, 0);
				else
				{
					len_x = width(dots[cur_id].x);
					len_y = width(dots[cur_id].y);

					t2.y = assign_I(dots[cmp_id].y.upper, dots[cur_id].y.upper);
					cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
					t2.x = assign_I(dots[cmp_id].y.upper - cur_len, dots[cmp_id].y.upper);
				}
			}
		}

		val_org_reg = -1;
		if( !proper_overlap(dots[cur_id].x, dots[cur_id].y) ) {
			val_org_reg = STRICT;
			val_org_reg = check_tandem_reg( dots[cur_id], dots, num );
		}

		if( flag == FIRST_RUN ) {
			if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) {
				val_t1 = check_tandem_reg( t1, dots, num );
			}
			else val_t1 = -1;

			if( (t2.x.lower >= 0) && (t2.y.lower >= 0)) {
				val_t2 = check_tandem_reg( t2, dots, num );
			}
			else val_t2 = -1;

			if( (val_t1 == -1) && (val_t2 == -1) ) {
				if( t1.x.lower >= 0 ) val_t1 = LOOSE;
				else if( t2.x.lower >= 0 ) val_t2 = LOOSE;
			}

			val_org[i] = val_org_reg;
			val1[i] = val_t1;
			val2[i] = val_t2;
		}

		if( val_org_reg != -1 ) {}
		else if( (val_t1 != -1) && (val_t2 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0) && (t2.x.lower >= 0) && (t2.y.lower >= 0)) 
		{
			if( val_t1 <= val_t2 )
			{
				init_id = dots[cur_id].index;
				if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to get the original boundaries, offsets defined here should be just substrated.
					adjust_init_offset(init_dots, init_id, t1, dots, cur_id);
				}

				dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
				dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
				dots[cur_id].rp1_id = 0;
			}
			else 
			{
				init_id = dots[cur_id].index;
				if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
					adjust_init_offset(init_dots, init_id, t2, dots, cur_id);
				}
				dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
				dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
				dots[cur_id].rp1_id = 0;
				init_id = dots[cur_id].index;
			}
		}
		else if( (val_t1 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0))
		{
			init_id = dots[cur_id].index;
			if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to reflect the change of the boundaries, offsets defined here should be just added.
				adjust_init_offset(init_dots, init_id, t1, dots, cur_id);
			}

			dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper);
			dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper);
			dots[cur_id].rp1_id = 0;
			init_id = dots[cur_id].index;
		}
		else if( (val_t2 != -1) && (t2.x.lower >= 0) && (t2.y.lower >= 0))
		{
			init_id = dots[cur_id].index;
			if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
				adjust_init_offset(init_dots, init_id, t2, dots, cur_id);
			}

			dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper);
			dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper);
			dots[cur_id].rp1_id = 0;
			init_id = dots[cur_id].index;
		}
	}

	val_org_reg = -1;
	cmp_id = t_list[num_tandem-1];
	len_x = width(dots[cmp_id].x);
	len_y = width(dots[cmp_id].y);
	if( proper_overlap(dots[cmp_id].x, dots[cmp_id].y) ) {
		t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2);
		t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2);
	}
	else {
		val_org_reg = STRICT;
		t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.upper);
		t1.y = assign_I(dots[cmp_id].y.lower, dots[cmp_id].y.upper);
	}

	cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x));
	t1.y = assign_I(t1.x.upper, t1.x.upper + cur_len);
	if( t2.y.lower != -1 ) {
		cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y));
		t2.x = assign_I(t2.y.lower - cur_len, t2.y.lower);
	}
	else t2.x = assign_I(-1,0);

	if( flag == FIRST_RUN ) {
		if( val_org_reg != -1 ) val_org_reg = check_tandem_reg(dots[cmp_id], dots, num);
		if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) val_t1 = check_tandem_reg(t1, dots, num);
		else val_t1 = -1;

		if( (t2.x.lower < 0) || (t2.y.lower < 0) ) val_t2 = -1;
		else val_t2 = check_tandem_reg(t2, dots, num);
		val_org[num_tandem] = val_org_reg;
		val1[num_tandem] = val_t1;
		val2[num_tandem] = val_t2;
	}
	else {
		val_org_reg = val_org[num_tandem];
		val_t1 = val1[num_tandem];
		val_t2 = val2[num_tandem];
	}

	if( (t1.x.lower < 0) && (t1.y.lower < 0) ) val_t1 = -1;
	if( (t2.x.lower < 0) && (t2.y.lower < 0) ) val_t2 = -1;

	if( val_org_reg != -1 ) {}
	else if( (val_t1 != -1) && (val_t2 != -1) ) {
		if( val_t1 < val_t2 ) {
			assign_algn(cur_t, 0, t1);
		}
		else assign_algn(cur_t, 0, t2);
	}		
	else if( val_t1 != -1 ) assign_algn(cur_t, 0, t1);
	else if( val_t2 != -1 ) assign_algn(cur_t, 0, t2);

	if( val_org_reg != -1 ) {}
	else if( (val_t1 != -1) || (val_t2 != -1) ) {
		init_id = dots[cmp_id].index;
		if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) {
// in order to reflect the change of the boundaries, offsets defined here should be just added.
			adjust_init_offset(init_dots, init_id, *cur_t, dots, cmp_id);
		}

		dots[cmp_id].x = assign_I((*cur_t).x.lower, (*cur_t).x.upper);
		dots[cmp_id].y = assign_I((*cur_t).y.lower, (*cur_t).y.upper);
		dots[cmp_id].rp1_id = 0;
	}

	free(cur_t);
}
コード例 #13
0
ファイル: handle_tandem_dup.c プロジェクト: gtsong/CHAP2
void handle_tandem_dup(struct DotList *dots, int *num, struct DotList *init_dots)
{
	struct slist *sorted;
	int i = 0;
	int cur_id = 0;
	struct DotList *self;
	int count = 0;
	int j = 0;
	int temp = 0;
	int num_lines;
	int *t_list; // a list of tandem dups
	int *cur_tlist;
	int *val1, *val2, *val_org;
	int num_tandem = 0;

	for( i = 0; i < *num; i++ )
	{
		if( dots[i].pair_self == SELF )
			count++;
	}

	if( count > 0 ) {
		self = (struct DotList *) ckalloc(count * (sizeof(struct DotList)));
		sorted = (struct slist *) ckalloc(count * (sizeof(struct slist)));
		t_list = (int *) ckalloc(count * (sizeof(int)));
		cur_tlist = (int *) ckalloc(count * (sizeof(int)));
		val1 = (int *) ckalloc(count * (sizeof(int)));
		val2 = (int *) ckalloc(count * (sizeof(int)));
		val_org = (int *) ckalloc(count * (sizeof(int)));

		initialize_algns(self, 0, count);
		initialize_slist(sorted, 0, count);

		j = 0;
		for( i = 0; i < *num; i++ )
		{
			if( dots[i].pair_self == SELF ) 
			{
				assign_algn(self, j, dots[i]);	
				self[j].c_id = i;
				j++;
			}
		}
	
		count = j;
		num_lines = *num;
		
		for( i = 0; i < count; i++ ) {
			t_list[i] = 0;
			cur_tlist[i] = 0;
			val1[i] = -1;
			val2[i] = -1;
			val_org[i] = -1;
			sorted[i].id = i;
		}
		sort_by_width(sorted, self, count);
	
		for( i = 0; i < count; i++ )
		{
			cur_id = sorted[i].id;
			if( (self[cur_id].sign == 2) || (self[cur_id].pair_self == PAIR) ) {}
			else if( proper_overlap( self[cur_id].x, self[cur_id].y ) == true )
			{
				num_tandem = 0;
				num_tandem = find_tandem_list(self, sorted, i, count, t_list);
				if( num_tandem > 0 ) 
				{
					for( j = 0; j < num_tandem; j++ )
					{
						temp = t_list[j];
						cur_tlist[j] = self[temp].c_id;
					}
					
					conv_td_reg(dots, num_lines, self[cur_id].c_id, cur_tlist, num_tandem, init_dots, FIRST_RUN, val1, val2, val_org);
					conv_td_reg(self, count, cur_id, t_list, num_tandem, init_dots, SECOND_RUN, val1, val2, val_org);
				}
			}
		}

		free(val_org);
		free(val1);
		free(val2);
		free(cur_tlist);
		free(t_list);
		free(sorted);
		free(self);
	}
}