void get_elm_list(int *num_id, int *elm_id, int id, bool is_x, int num_list, struct DotList *dots) { int i = 0; struct I temp; int t_val = LOOSE; *num_id = 0; if( is_x == true ) { temp = assign_I(dots[id].x.lower, dots[id].x.upper); } else temp = assign_I(dots[id].y.lower, dots[id].y.upper); while( i < num_list ) { if( (i == id) || (dots[i].sign == 2) ) i++; else { if( ((f_loose_subset(dots[i].x, temp, t_val) == false) && (f_loose_overlap(temp, dots[i].x, t_val) == true)) || ((f_loose_subset(dots[i].y, temp, t_val) == false) && (f_loose_overlap(temp, dots[i].y, t_val) == true))) { elm_id[*num_id] = i; (*num_id)++; } i++; } } }
int find_status(struct cv_list cur_cv, struct cv_list *cv, int num_cv, char *name1, char *name2) { int i = 0; struct I src1, dst1, src2, dst2; int res = -1; char name[50]; src1 = assign_I(cur_cv.a1, cur_cv.a2); dst1 = assign_I(cur_cv.b1, cur_cv.b2); for( i = 0; i < num_cv; i++ ) { src2 = assign_I(cv[i].a1, cv[i].a2); dst2 = assign_I(cv[i].b1, cv[i].b2); if( (strcmp( cv[i].name2, "NAN" ) != 0) && (strcmp( cv[i].name3, "NAN") != 0) ) { strcpy( name, cv[i].name2 ); } else if( (strcmp( cv[i].name2, "NAN" ) == 0) && (strcmp( cv[i].name3, "NAN") != 0) ) { strcpy( name, cv[i].name3 ); } else if( (strcmp( cv[i].name3, "NAN" ) == 0) && (strcmp( cv[i].name2, "NAN") != 0) ) { strcpy( name, cv[i].name2 ); } else { fatalf("both out-species not found %s %s\n", cv[i].name2, cv[i].name3); } if( (cur_cv.fid == cv[i].fid) && ( ((strict_almost_equal(src1, src2) == true) && (strict_almost_equal(dst1, dst2) == true)) || ( (strict_almost_equal(src1, dst2) == true) && (strict_almost_equal(dst1, src2) == true) )) && (strcmp(name1, cv[i].name1) == 0) && (strcmp(name2, name) == 0)) { res = i; } } if( res == -1 ) { fatalf("status not found %d\n", cur_cv.fid); } return(res); }
void predict_sp_op(int sp_code, int rm_sp, int left_sp, int *num_list, struct DotList *dots, int *cur_num, struct ops_list *ops) { char op_ch; int r_st = -1, r_end = -1; // the range of a removed species struct I temp_reg; int len; int i = 0; check_gene_loss(num_list, dots, sp_code, rm_sp, left_sp, cur_num, ops); op_ch = 's'; for( i = 0; i < (*num_list); i++ ) { if( dots[i].sp_id == sp_code ) { if( ( r_st == -1 ) && ( r_end == -1 ) ) { r_st = dots[i].y.lower; r_end = dots[i].y.upper; } else { if( dots[i].y.lower < r_st ) r_st = dots[i].y.lower; if( dots[i].y.upper > r_end ) r_end = dots[i].y.upper; } } } temp_reg = assign_I(r_st, r_end); len = r_end - r_st + 1; for( i = 0; i < (*num_list); i++ ) { if( (proper_overlap(temp_reg, dots[i].x) == true) || (proper_overlap(temp_reg, dots[i].y) == true) ) { dots[i].sign = 2; } else { if( dots[i].x.lower > r_st ) { dots[i].x = assign_I(dots[i].x.lower - len, dots[i].x.upper - len); dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len); } else if( dots[i].y.lower > r_st ) { dots[i].y = assign_I(dots[i].y.lower - len, dots[i].y.upper - len); } } } overwrite_dots(num_list, dots); ops[*cur_num].sign = op_ch; ops[*cur_num].src_b = r_st; ops[*cur_num].src_e = r_end; ops[*cur_num].dst_b = 0; ops[*cur_num].dst_e = 0; ops[*cur_num].sp_id = rm_sp; }
int check_into_own(struct DotList *dots, int loc_id, int comp_id) { struct I temp; int res; if( (dots[loc_id].sign == 0) && (dots[comp_id].sign == 0) ) { if( (strict_overlap(dots[loc_id].x, dots[loc_id].y, 10*T_OP_TH) == true) && (strict_overlap(dots[comp_id].x, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[loc_id].x.upper < dots[comp_id].x.lower) ) { temp = assign_I(dots[loc_id].x.upper, dots[comp_id].x.lower); if( strict_almost_equal(dots[loc_id].y, temp) == true ) { res = COPY_OWN; } else res = NON_COPY; } else if( (strict_overlap(dots[loc_id].x, dots[loc_id].y, 10*T_OP_TH) == true) && (strict_overlap(dots[comp_id].x, dots[comp_id].y, 10*T_OP_TH) == true) && (dots[comp_id].x.upper < dots[loc_id].x.lower ) ) { temp = assign_I(dots[comp_id].x.upper, dots[loc_id].x.lower); if( strict_almost_equal(dots[comp_id].y, temp) == true ) { res = COPY_OWN; } else res = NON_COPY; } else { res = NON_COPY; } } else if( (dots[loc_id].sign == 1) && (dots[comp_id].sign == 1) ) { if( (strict_overlap(dots[loc_id].x, dots[comp_id].x, 10*T_OP_TH) == true ) && (strict_overlap(dots[loc_id].y, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[comp_id].x.upper < dots[comp_id].y.lower)) { temp = assign_I(dots[comp_id].x.upper, dots[comp_id].y.lower); if( strict_almost_equal(dots[loc_id].y, temp) == true ) { res = COPY_OWN_INV; } else res = NON_COPY; } else if( (strict_overlap(dots[loc_id].x, dots[comp_id].x, 10*T_OP_TH) == true ) &&(strict_overlap(dots[loc_id].y, dots[comp_id].y, 10*T_OP_TH) == true ) && (dots[loc_id].x.upper < dots[loc_id].y.lower)) { temp = assign_I(dots[loc_id].x.upper, dots[loc_id].y.lower); if( strict_almost_equal(dots[comp_id].y, temp) == true ) { res = COPY_OWN_INV; } else res = NON_COPY; } else { res = NON_COPY; } } else res = NON_COPY; return(res); }
struct exons_list assign_exons(struct exons_list a) { struct exons_list res; res.fid = a.fid; // id in the inital list res.reg = assign_I(a.reg.lower, a.reg.upper); res.cmp_reg = assign_I(a.cmp_reg.lower, a.cmp_reg.upper); res.sp_id = a.sp_id; res.val = a.val; res.sign = a.sign; // '<' or '>' res.ctg_id = a.ctg_id; return(res); }
void adjust_init_offset(struct DotList *init_algns, int init_id, struct DotList t1, struct DotList *algns, int cur_id) { if( ((init_algns[init_id].x.upper + t1.x.upper - algns[cur_id].x.upper) > (init_algns[init_id].x.lower + t1.x.lower - algns[cur_id].x.lower)) && ((init_algns[init_id].y.lower + t1.y.lower - algns[cur_id].y.lower) < (init_algns[init_id].y.upper + t1.y.upper - algns[cur_id].y.upper)) ) { init_algns[init_id].xl_offset = init_algns[init_id].xl_offset + t1.x.lower - algns[cur_id].x.lower; init_algns[init_id].xr_offset = init_algns[init_id].xr_offset + t1.x.upper - algns[cur_id].x.upper; init_algns[init_id].yl_offset = init_algns[init_id].yl_offset + t1.y.lower - algns[cur_id].y.lower; init_algns[init_id].yr_offset = init_algns[init_id].yr_offset + t1.y.upper - algns[cur_id].y.upper; init_algns[init_id].x = assign_I(init_algns[init_id].x.lower + t1.x.lower - algns[cur_id].x.lower, init_algns[init_id].x.upper + t1.x.upper - algns[cur_id].x.upper); init_algns[init_id].y = assign_I(init_algns[init_id].y.lower + t1.y.lower - algns[cur_id].y.lower, init_algns[init_id].y.upper + t1.y.upper - algns[cur_id].y.upper); init_algns[init_id].rp1_id = 0; } }
bool is_repeats(struct exons_list *exons, int num_exons, char *name, int from, int to) // exons assume to be already sorted by genomic positions { int i = 0; int mid = 0; bool res = false; struct I reg; if( to > from ) { reg = assign_I(from, to); } else { fatalf("unexpected interval: %d-%d\n", from, to); } mid = quick_search_close_exons(exons, 0, num_exons-1, from); i = mid; while( (res == false) && (i < num_exons) && (exons[i].reg.lower <= to)) { if( width(reg) <= SHORT_LEN_TH ) { // (strcmp(reg, exons[i].reg) == 0) && (almost_subset(reg, exons[i].reg) == true) ) { // printf("%d-%d too short\n", from, to); res = true; } else if( (strcmp(name, exons[i].chr) == 0) && subset(reg, exons[i].reg) ) { // printf("%d-%d belongs to %s %d-%d\n", from, to, exons[i].chr, exons[i].reg.lower, exons[i].reg.upper); res = true; } i++; } return(res); }
bool is_s_list(struct DotList *dots, int ins_id, int cur_id, struct ID_List *dlist, int num_dup, struct kdnode *tree, struct perm_pt *p_pts, int size, FILE *fp, struct DotList *init_dots) { bool res = false; int i = 0; struct I temp; int x_opt_id, y_opt_id; bool *f_is_x; f_is_x = (bool *) ckalloc(sizeof(bool)); x_opt_id = find_alt_ins_id(cur_id, dots, tree, p_pts, ins_id, true, f_is_x, size, fp, init_dots); y_opt_id = find_alt_ins_id(cur_id, dots, tree, p_pts, ins_id, false, f_is_x, size, fp, init_dots); if( (x_opt_id != -1) || (y_opt_id != -1) ) { res = true; } while( (i < num_dup) && (res == false)) { if( dlist[i].is_x == true ) { temp = assign_I(dots[dlist[i].m_id].x.lower, dots[dlist[i].m_id].x.upper); } else { temp = assign_I(dots[dlist[i].m_id].y.lower, dots[dlist[i].m_id].y.upper); } if( (dlist[i].m_id == ins_id) || (strict_almost_equal(dots[ins_id].x, temp) == true) || (strict_almost_equal(dots[ins_id].y, temp) == true) ) { if( (cur_id == dlist[i].left_id) || (cur_id == dlist[i].right_id) ) { res = true; } } i++; } free(f_is_x); return(res); }
bool is_on_prev_events(struct I reg, struct ops_list *ops, int from, int to) { int i = 0; struct I src, dst; bool res = false; i = from; while( (i <= to) && (res == false) ) { if( (ops[i].sign == '+') || (ops[i].sign == '-') ) { src = assign_I(ops[i].srcStart, ops[i].srcEnd); dst = assign_I(ops[i].dstStart, ops[i].dstEnd); if( (f_loose_subset(reg, src, STRICT) == true ) || (f_loose_subset(reg, dst, STRICT) == true) ) { res = true; } } i++; } return(res); }
bool tandem_exist(struct DotList *dots, struct perm_pt *p_pts, struct kdnode *tree, int size, int id1, int id2) { bool res = false; struct I reg1, reg2; int sid = 0, eid = 0; int i = 0; int cur_id = 0; reg1 = assign_I(0, 1); reg2 = assign_I(0, 1); if( (dots[id1].sign == dots[id2].sign) && (proper_overlap(dots[id1].x, dots[id2].x) == true ) && (proper_overlap(dots[id1].y, dots[id2].y) == true) ) { reg1 = intersect(dots[id1].x, dots[id2].x); sid = find_id_len(tree, size, width(reg1), reg1.lower, reg1.lower, W_SID); eid = find_id_len(tree, size, width(reg1), reg1.upper, reg1.upper, W_FID); i = sid; while( (i <= eid) && (res == false) ) { cur_id = p_pts[i].id; if( is_tandem(dots[cur_id]) == true ) res = true; i++; } if( res == false ) { reg2 = intersect(dots[id1].y, dots[id2].y); sid = find_id_len(tree, size, width(reg2), reg2.lower, reg2.lower, W_SID); eid = find_id_len(tree, size, width(reg2), reg2.upper, reg2.upper, W_FID); i = sid; while( (i <= eid) && (res == false)) { cur_id = p_pts[i].id; if( is_tandem(dots[cur_id]) == true ) res = true; i++; } } } return(res); }
bool check_inclusion_alignments(struct gap_list gp, struct DotList *dots, int num) { struct I x, y; bool res = false; int i; if( gp.x1 >= gp.x2 ) { } else { x = assign_I(gp.x1, gp.x2); } if( gp.y1 >= gp.y2 ) { } else { y = assign_I(gp.y1, gp.y2); } for( i = 0; i < num; i++ ) { if( dots[i].sign != 2 ) { if( (subset(dots[i].x, x) == true) && (dots[i].identity > dots[gp.id1].identity) ) { res = true; } else if( (subset(dots[i].y, y) == true) && (dots[i].identity > dots[gp.id2].identity)) { res = true; } } } return(res); }
void init_tree(struct p_tree *t) { t->left = NULL; t->right = NULL; t->parent = NULL; t->reg = assign_I(0,1); t->name = NULL; t->b_len = (double) 0; t->d_mode = SP; t->od = 0; // orientation for printing orthologous alignments t->sp_code = -1; // a code number of self-alignment: species id, seq id for orthologous mappings t->gid = -1; // a gene identifier t->nid = -1; // a node identifier t->val = 0; t->ch_sp = NULL; // the list of children nodes(species for a species tree) t->num_csp = 0; // the number of child species t->depth = 0; // the depth of each node t->visited = false; }
void conv_td_reg(struct DotList *dots, int num, int id, int *t_list, int num_tandem, struct DotList *init_dots, int flag, int *val1, int *val2, int *val_org) { int i; int cur_id, cmp_id; struct DotList t1, t2; struct DotList *cur_t; int len_x, len_y; int cur_len = 0; int val_t1, val_t2, val_org_reg; int init_id; cur_t = (struct DotList *) ckalloc(sizeof(struct DotList)); for( i = 0; i < num_tandem; i++ ) { if( flag == FIRST_RUN ) { val_org_reg = -1; val_t1 = -1; val_t2 = -1; } else { val_org_reg = val_org[i]; val_t1 = val1[i]; val_t2 = val2[i]; } t1.x = assign_I(-1, 0); t2.x = assign_I(-1, 0); t1.y = assign_I(-1, 0); t2.y = assign_I(-1, 0); cmp_id = t_list[i]; if( i == 0 ) cur_id = id; else cur_id = t_list[i-1]; if( dots[cmp_id].ctg_id1 != dots[cur_id].ctg_id1 ) { fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name1, dots[cur_id].name1); } if( dots[cmp_id].ctg_id2 != dots[cur_id].ctg_id2 ) { fatalf("error: handling alignments from different contigs %s vs %s in handling_tandem_duplications.c\n", dots[cmp_id].name2, dots[cur_id].name2); } if( ( strict_almost_equal( dots[cmp_id].x, dots[cur_id].x ) == true ) || ( strict_almost_equal( dots[cmp_id].y, dots[cur_id].y) == true ) ) {} else if( ( strict_subset( dots[cmp_id].x, dots[cur_id].x ) == true ) && ( strict_subset( dots[cmp_id].y, dots[cur_id].y ) == true ) ) { if( abs(dots[cur_id].x.upper - dots[cmp_id].x.upper) > abs(dots[cur_id].x.lower - dots[cmp_id].x.lower) ) { if( ( dots[cur_id].x.upper - dots[cmp_id].x.upper ) <= 0 ) t1.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t1.x = assign_I(dots[cmp_id].x.upper, dots[cur_id].x.upper); cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x)); t1.y = assign_I(dots[cur_id].x.upper, dots[cur_id].x.upper + cur_len); } } else { if( ( dots[cur_id].x.lower - dots[cmp_id].x.lower ) >= 0 ) t1.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t1.x = assign_I(dots[cur_id].x.lower, dots[cmp_id].x.lower); cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x)); t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + cur_len); } } if( abs(dots[cmp_id].y.lower - dots[cur_id].y.lower) > abs(dots[cur_id].y.upper - dots[cmp_id].y.upper) ) { if( ( dots[cmp_id].y.lower - dots[cur_id].y.lower ) <= 0 ) t2.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t2.y = assign_I(dots[cur_id].y.lower, dots[cmp_id].y.lower); cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y)); t2.x = assign_I(dots[cur_id].y.lower - cur_len, dots[cur_id].y.lower); } } else { if( ( dots[cur_id].y.upper - dots[cmp_id].y.upper ) <= 0 ) t2.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t2.y = assign_I(dots[cmp_id].y.upper, dots[cur_id].y.upper); cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y)); t2.x = assign_I(dots[cmp_id].y.upper - cur_len, dots[cmp_id].y.upper); } } } val_org_reg = -1; if( !proper_overlap(dots[cur_id].x, dots[cur_id].y) ) { val_org_reg = STRICT; val_org_reg = check_tandem_reg( dots[cur_id], dots, num ); } if( flag == FIRST_RUN ) { if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) { val_t1 = check_tandem_reg( t1, dots, num ); } else val_t1 = -1; if( (t2.x.lower >= 0) && (t2.y.lower >= 0)) { val_t2 = check_tandem_reg( t2, dots, num ); } else val_t2 = -1; if( (val_t1 == -1) && (val_t2 == -1) ) { if( t1.x.lower >= 0 ) val_t1 = LOOSE; else if( t2.x.lower >= 0 ) val_t2 = LOOSE; } val_org[i] = val_org_reg; val1[i] = val_t1; val2[i] = val_t2; } if( val_org_reg != -1 ) {} else if( (val_t1 != -1) && (val_t2 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0) && (t2.x.lower >= 0) && (t2.y.lower >= 0)) { if( val_t1 <= val_t2 ) { init_id = dots[cur_id].index; if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) { // in order to get the original boundaries, offsets defined here should be just substrated. adjust_init_offset(init_dots, init_id, t1, dots, cur_id); } dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper); dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper); dots[cur_id].rp1_id = 0; } else { init_id = dots[cur_id].index; if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) { adjust_init_offset(init_dots, init_id, t2, dots, cur_id); } dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper); dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper); dots[cur_id].rp1_id = 0; init_id = dots[cur_id].index; } } else if( (val_t1 != -1) && (t1.x.lower >= 0) && (t1.y.lower >= 0)) { init_id = dots[cur_id].index; if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) { // in order to reflect the change of the boundaries, offsets defined here should be just added. adjust_init_offset(init_dots, init_id, t1, dots, cur_id); } dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper); dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper); dots[cur_id].rp1_id = 0; init_id = dots[cur_id].index; } else if( (val_t2 != -1) && (t2.x.lower >= 0) && (t2.y.lower >= 0)) { init_id = dots[cur_id].index; if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) { adjust_init_offset(init_dots, init_id, t2, dots, cur_id); } dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper); dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper); dots[cur_id].rp1_id = 0; init_id = dots[cur_id].index; } } val_org_reg = -1; cmp_id = t_list[num_tandem-1]; len_x = width(dots[cmp_id].x); len_y = width(dots[cmp_id].y); if( proper_overlap(dots[cmp_id].x, dots[cmp_id].y) ) { t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2); t1.y = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.lower + (dots[cmp_id].y.upper - dots[cmp_id].x.lower)/2); } else { val_org_reg = STRICT; t1.x = assign_I(dots[cmp_id].x.lower, dots[cmp_id].x.upper); t1.y = assign_I(dots[cmp_id].y.lower, dots[cmp_id].y.upper); } cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x)); t1.y = assign_I(t1.x.upper, t1.x.upper + cur_len); if( t2.y.lower != -1 ) { cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y)); t2.x = assign_I(t2.y.lower - cur_len, t2.y.lower); } else t2.x = assign_I(-1,0); if( flag == FIRST_RUN ) { if( val_org_reg != -1 ) val_org_reg = check_tandem_reg(dots[cmp_id], dots, num); if( (t1.x.lower >= 0) && (t1.y.lower >= 0) ) val_t1 = check_tandem_reg(t1, dots, num); else val_t1 = -1; if( (t2.x.lower < 0) || (t2.y.lower < 0) ) val_t2 = -1; else val_t2 = check_tandem_reg(t2, dots, num); val_org[num_tandem] = val_org_reg; val1[num_tandem] = val_t1; val2[num_tandem] = val_t2; } else { val_org_reg = val_org[num_tandem]; val_t1 = val1[num_tandem]; val_t2 = val2[num_tandem]; } if( (t1.x.lower < 0) && (t1.y.lower < 0) ) val_t1 = -1; if( (t2.x.lower < 0) && (t2.y.lower < 0) ) val_t2 = -1; if( val_org_reg != -1 ) {} else if( (val_t1 != -1) && (val_t2 != -1) ) { if( val_t1 < val_t2 ) { assign_algn(cur_t, 0, t1); } else assign_algn(cur_t, 0, t2); } else if( val_t1 != -1 ) assign_algn(cur_t, 0, t1); else if( val_t2 != -1 ) assign_algn(cur_t, 0, t2); if( val_org_reg != -1 ) {} else if( (val_t1 != -1) || (val_t2 != -1) ) { init_id = dots[cmp_id].index; if( (flag == FIRST_RUN) && (init_dots[init_id].c_id == -1) && (init_dots[init_id].m_id == -1) ) { // in order to reflect the change of the boundaries, offsets defined here should be just added. adjust_init_offset(init_dots, init_id, *cur_t, dots, cmp_id); } dots[cmp_id].x = assign_I((*cur_t).x.lower, (*cur_t).x.upper); dots[cmp_id].y = assign_I((*cur_t).y.lower, (*cur_t).y.upper); dots[cmp_id].rp1_id = 0; } free(cur_t); }
int det_dup_reg_in_self(int num_list, struct DotList *self, struct I check_x, struct I check_y, int sign) { int i = 0, j = 0; struct I temp_1, temp_2; struct I cur_1, cur_2, cmp_1, cmp_2; bool is_end = false; int count_left = 0, count_right = 0; int left_pid = 0, right_pid = 0; int cut_len_left = 0, cut_len_right = 0; int res = TIE; int distance = 0; bool is_candi = false; bool is_assigned = false; temp_1 = assign_I(0, 1); temp_2 = assign_I(0, 1); for( i = 0; i < num_list; i++ ) { cur_1 = assign_I(0, 1); cur_2 = assign_I(0, 1); cmp_1 = assign_I(0, 1); cmp_2 = assign_I(0, 1); is_candi = false; left_pid = self[i].identity; right_pid = self[i].identity; if( ( is_assigned == true ) && ((width(temp_1) <= MIN_INTERVAL) || (width(temp_2) <= MIN_INTERVAL)) ) { is_end = true; } else if( (width(self[i].x) <= MIN_INTERVAL) || (width(self[i].y) <= MIN_INTERVAL) ) { is_candi = false; } else if( (almost_subset(self[i].x, check_x) == true) || ((f_loose_overlap(self[i].x, check_x, SECOND_RUN) == true) && (width(intersect(self[i].x, check_x)) >= MIN_LEN) )) { temp_1 = assign_I(self[i].x.lower, self[i].x.upper); temp_2 = assign_I(self[i].y.lower, self[i].y.upper); is_assigned = true; is_candi = true; } else if( (almost_subset(self[i].y, check_x) == true) || (f_loose_overlap(self[i].y, check_x, SECOND_RUN) == true) ) { temp_1 = assign_I(self[i].y.lower, self[i].y.upper); temp_2 = assign_I(self[i].x.lower, self[i].x.upper); is_assigned = true; is_candi = true; } else { is_candi = false; } if( (is_end == false) && (is_candi == true) ) { if( (temp_1.lower < check_x.lower) && (temp_1.upper > check_x.upper) ) { if( width(check_x) > 0 ) { cur_1 = assign_I(0, width(check_x)); cut_len_left = check_x.lower - temp_1.lower; cut_len_right = temp_1.upper - check_x.upper; if( (temp_2.upper - cut_len_right) <= (temp_2.lower + cut_len_left) ) { is_end = true; } else { cur_2 = assign_I(temp_2.lower + cut_len_left, temp_2.upper - cut_len_right); } } else { is_end = true; } } else { if( temp_1.lower < check_x.lower ) { if( temp_1.upper > check_x.lower ) { cur_1 = assign_I( 0, temp_1.upper - check_x.lower ); cut_len_left = check_x.lower - temp_1.lower; if( temp_2.upper <= (temp_2.lower + cut_len_left) ) { is_end = true; } else { cur_2 = assign_I( temp_2.lower + cut_len_left, temp_2.upper ); } } else { is_end = true; } } else if( temp_1.upper > check_x.upper ) { if( width(check_x) > (temp_1.lower - check_x.lower) ) { cur_1 = assign_I( temp_1.lower - check_x.lower, width(check_x) ); cut_len_right = temp_1.upper - check_x.upper; if( (temp_2.upper - cut_len_right) <= temp_2.lower ) { is_end = true; } else { cur_2 = assign_I( temp_2.lower, temp_2.upper - cut_len_right ); } } else { is_end = true; } } else { if( ( temp_1.upper - check_x.lower ) <= (temp_1.lower - check_x.lower) ) { is_end = true; } else { cur_1 = assign_I( temp_1.lower - check_x.lower, temp_1.upper - check_x.lower); cur_2 = assign_I( temp_2.lower, temp_2.upper ); } } } if( is_end == false ) { if( sign == 0 ) { if( (cur_1.upper + check_y.lower) > (cur_1.lower + check_y.lower) ) { cmp_1 = assign_I(cur_1.lower + check_y.lower, cur_1.upper + check_y.lower); cmp_2 = assign_I(cur_2.lower, cur_2.upper); } else is_end = true; } else if( sign == 1 ) { if( (check_y.upper - cur_1.lower) > (check_y.upper - cur_1.upper) ) { cmp_1 = assign_I(check_y.upper - cur_1.upper, check_y.upper - cur_1.lower); cmp_2 = assign_I(cur_2.lower, cur_2.upper); } else is_end = true; } if( is_end == false ) { if( (width(cmp_1) <= MIN_INTERVAL) || (width(cmp_2) <= MIN_INTERVAL) ) is_end = true; } } j = 0; while( (is_end == false) && (j < num_list) ) { if( j == i ) {} else { if( (strict_almost_equal(self[j].x, cmp_1) == true) && ( strict_almost_equal(self[j].y, cmp_2) == true ) ) { right_pid = self[j].identity; is_end = true; if( left_pid > right_pid ) count_left++; else if( left_pid < right_pid ) count_right++; } else if( (loose_subset(cmp_1, self[j].x) == true) && (loose_subset(cmp_2, self[j].y) == true ) ) { distance = compute_distance(cmp_1, cmp_2, self[j].x, self[j].y, sign); if( distance <= DIS_THRESHOLD ) { right_pid = self[j].identity; is_end = true; if( left_pid > right_pid ) count_left++; else if( left_pid < right_pid ) count_right++; } } } j++; } is_end = false; } } if( count_left > count_right ) { res = LEFT_SIDE; } else if( count_left < count_right ) { res = RIGHT_SIDE; } else { res = TIE; } return(res); }
void read_maf(char *fname, int mode, struct DotList *algns, int *num_algns, int *size1, int *size2) { FILE *fp; char *status; int i = 0; int count = 0; int temp; int a_pid; int b1, e1, b2, e2; char strand[100], len1[100], len2[100]; char *s, *t; int algn_type = SELF1 - 1; int j = 0; int srcblock = -1; char token[50]; char name1[LEN_NAME], name2[LEN_NAME]; strcpy(name1, ""); strcpy(name2, ""); strcpy(len1, "0"); strcpy(len2, "0"); strcpy(strand, "+"); strcpy(token, ""); fp = ckopen(fname, "r"); if (((status = fgets(S, BIG, fp)) == NULL) || strncmp(S, "##maf", 5)) fatalf("%s is not a maf file", fname); /* while (S[0] == '#') if ((status = fgets(S, BIG, fp)) == NULL) fatalf("no alignments in %s", fname); */ while ((status != NULL) && (strstr(S, "eof") == NULL)) { if(S[0] == '#') { if((mode == C_MODE) || (mode == S_MODE)) { while((status != NULL) && (S[0] == '#')) { if( strncmp(S, "##maf", 5) == 0 ) algn_type++; status = fgets(S, BIG, fp); } if( algn_type > PAIR ) fatal("too many alignments are combined\n"); } else { while ((status != NULL ) && (S[0] == '#')) { status = fgets(S, BIG, fp); } } j = 0; } srcblock = -1; if ( status == NULL ) { } else { if (S[0] != 'a') fatalf("expecting an a-line in %s, saw %s", fname, S); if( mode == O_MODE ) { sscanf(S, "%*s %s", token); srcblock = cat_srcblock(token); } if ((fgets(S, BIG, fp) == NULL) || (fgets(T, BIG, fp) == NULL)) fatalf("cannot find alignment in %s", fname); if ((sscanf(S, "%*s %s %d %d %*s %s", name1, &b1, &e1, len1) != 4) || (sscanf(T, "%*s %s %d %d %s %s", name2, &b2, &e2, strand, len2) != 5)) { fatalf("bad alignment info of 2 in %s", fname); } // aligned interval given as base-0 start and length e1 += b1; e2 += b2; if( strcmp(strand, "-") == 0) { temp = b2; b2 = atoi(len2) - e2; e2 = atoi(len2) - temp; } b1++; b2++; e1++; e2++; s = nucs(S); t = nucs(T); a_pid = cal_pid(s, t, strlen(s)-1); if( ((mode == D_MODE) || ((mode == C_MODE) && (algn_type <= PAIR))) && (( (algn_type != PAIR) && (b1 >= b2)) || ((algn_type != PAIR) && (abs(b1-b2) <= DEL_TH) && (abs(e1-e2) <=DEL_TH)) || ((e1-b1) < ALT_EFFEC_VALUE) || (a_pid <= PID_TH) )) {} else if( (mode == S_MODE) && ( algn_type != PAIR ) ) {} else if( (abs(e1-b1) <= ERR_SM_TH) || (abs(e2-b2) <= ERR_SM_TH) ) {} else { algns[count].x = assign_I(b1, e1); if( b2 < e2 ) algns[count].y = assign_I(b2, e2); else algns[count].y = assign_I(e2, b2); algns[count].identity = a_pid; algns[count].m_pid = a_pid; if( strcmp(strand, "+") == 0 ) { algns[count].sign = 0; algns[count].init_sign = 0; } else if( strcmp(strand, "-") == 0 ) { algns[count].sign = 1; algns[count].init_sign = 1; } else { algns[count].sign = DELETED; algns[count].init_sign = DELETED; } if( mode == O_MODE ) { algns[count].indiv_fid = srcblock; // ith alignment } else { algns[count].indiv_fid = j; // j alignment } algns[count].fid = i; // ith alignment algns[count].index = count; // ith alignment algns[count].c_id = -1; // not chained alignment algns[count].m_id = -1; // not chained alignment algns[count].rp1_id = -1; // the inserted repeat id of the chained alignment in first seq algns[count].rp2_id = -1; // the inserted repeat id of the chained alignment in second seq algns[count].l_id = -1; algns[count].lock = -1; algns[count].m_x = assign_I(0,1); algns[count].m_y = assign_I(0,1); algns[count].xl_diff = 0; // the offset of the left end algns[count].yl_diff = 0; // the offset of the left end algns[count].xr_diff = 0; // the offset of the right end algns[count].yr_diff = 0; // the offset of the right end algns[count].pair_self = -1; algns[count].l_pid = -1; if( (mode == O_MODE) || (mode == PAIR_MODE) ) { algns[count].sp_id = PAIR; } else { algns[count].sp_id = algn_type; // SELF1 for first self-alignment, SELF2 for second self-alignment and PAIR for pairwise alignment } algns[count].xl_offset = 0; // the offset of low of x algns[count].yl_offset = 0; // the offset of up of x algns[count].xr_offset = 0; // the offset of low of y if( algn_type == PAIR ) algns[count].pair_self = PAIR; else algns[count].pair_self = SELF; strcpy(algns[count].name1, name1); strcpy(algns[count].name2, name2); algns[count].len1 = atoi(len1); algns[count].len2 = atoi(len2); algns[count].ctg_id1 = -1; algns[count].ctg_id2 = -1; count++; } if ((fgets(S, BIG, fp) == NULL) || (S[0] != '\n')) fatalf("bad alignment end in %s", fname); status = fgets(S, BIG, fp); i++; // ith alignment j++; } } *size1 = atoi(len1); *size2 = atoi(len2); *num_algns = count; fclose(fp); }
int main(int argc, char *argv[]) { FILE *f; struct chain *Chain; struct chain *SubChain, *chainToFree; struct chain *ch_p, *next_p; char buf[NUM_CHARS]; struct lineFile *lf; int i = 0; int b = 0, e = 0; bool is_null = true; struct exons_list *homologs; int num_chains = 0; int num_homologs = 0; struct exons_list *repeats; int num_repeats = 0; char chr[LEN_NAME]; strcpy(chr, ""); if( argc == 3 ) { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); } else if( argc != 4 ) { fatal("args: chain_file interval_text features_gff_file\n"); } else { if( (f = ckopen(argv[2], "r")) ) { if( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%s %d %d", chr, &b, &e) != 3 ) { fatalf("format errors: chr beg end in %s", buf); } } else { fatalf("%s is empty\n", argv[2]); } } fclose(f); if( (f = ckopen(argv[3], "r")) ) { while(fgets(buf, NUM_CHARS, f)) { i++; } num_repeats = i; repeats = (struct exons_list *) ckalloc(num_repeats * sizeof(struct exons_list)); init_exons(repeats, 0, num_repeats-1); fseek(f, 0, SEEK_SET); assign_gff_exons_chr(f, repeats, num_repeats, chr); quick_sort_inc_exons(repeats, 0, num_repeats-1, POS_BASE); } else { fatalf("file %s invalid\n", argv[4]); } fclose(f); } lf = lineFileOpen(argv[1], true); Chain = chainRead(lf); ch_p = Chain; while( (ch_p != NULL) && ((next_p = chainRead(lf)) != NULL) ) { ch_p->next = next_p; ch_p = ch_p->next; i++; } // printf("Number of chains: %d\n", i); i = 0; ch_p = Chain; // while( (i < NUM_LOOPS) && (ch_p != NULL) ) { while( ch_p != NULL ) { // printf("chain %d: %d-%d\n", ch_p->id, ch_p->tStart, ch_p->tEnd); ch_p = ch_p->next; i++; } num_chains = i; homologs = (struct exons_list *) ckalloc(num_chains * sizeof(struct exons_list)); i = 0; f = ckopen(argv[2], "r"); while( fgets(buf, NUM_CHARS, f) ) { if( sscanf(buf, "%*s %d %d", &b, &e) != 2 ) { fatalf("format errors: chr beg end in %s", buf); } else { ch_p = Chain; if( ch_p != NULL ) { while( (ch_p != NULL) && (is_null == true) ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); if( SubChain != NULL ) is_null = false; ch_p = ch_p->next; } } if( is_null == false ) { if( (num_repeats == 0 ) || (is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false) ) { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } while( ch_p != NULL ) { chainSubsetOnT(ch_p, b, e, &SubChain, &chainToFree); ch_p = ch_p->next; if( SubChain != NULL ) { if( (num_repeats == 0 ) || ( is_repeats(repeats, num_repeats, SubChain->tName, SubChain->tStart, SubChain->tEnd) == false )) { if( SubChain->qStrand == '-' ) { homologs[i].reg = assign_I(SubChain->qSize - SubChain->qEnd, SubChain->qSize - SubChain->qStart); } else { homologs[i].reg = assign_I(SubChain->qStart, SubChain->qEnd); } homologs[i].dir = SubChain->qStrand; strcpy(homologs[i].chr, SubChain->qName); i++; } // printf("query: %s %d %d\n", SubChain->qName, SubChain->qStart, SubChain->qEnd); if( chainToFree != NULL ) { chainFree(&chainToFree); } } } } } } num_homologs = i; selection_sort_exons(homologs, num_homologs); // print_exons_list(homologs, num_homologs); num_homologs = remove_redundant_intervals(homologs, num_homologs); print_exons_list(homologs, num_homologs); free(homologs); free(repeats); chainFreeList(&Chain); fclose(f); lineFileClose(&lf); return EXIT_SUCCESS; }
int check_inclusion_close_dup(int id, struct DotList *dots, int num_lines, bool *x_ins, bool *t_ins) { int res = -1; int i = 0; int temp_res = id; struct I temp; if( (*x_ins) == true ) { temp = assign_I(dots[id].x.lower, dots[id].x.upper); } else temp = assign_I(dots[id].y.lower, dots[id].y.upper); while( ( i < num_lines) && (res == -1) ) { if( dots[i].pair_self == PAIR ) { } else if( dots[i].sign == 2 ) {} else if( (i != id) && (dots[i].sign == 0) && ((dots[i].y.lower - dots[i].x.upper) <= THRESHOLD) ) { if( strict_almost_equal(temp, dots[i].x) == true) { temp_res = i; res = i; *x_ins = true; } if( strict_almost_equal(temp, dots[i].y) == true) { temp_res = i; res = i; *x_ins = false; } } i++; } if( res != -1 ) { if( is_tandem(dots[res]) == true ) { *t_ins = true; } else { *t_ins = false; } } else { i = 0; while( (i < num_lines) && (res == -1) ) { if( dots[i].pair_self == PAIR ) {} else if( dots[i].sign == 2 ) {} else if( dots[temp_res].pair_self == PAIR ) { if( strict_almost_equal(temp, dots[i].x) == true ) { res = i; *x_ins = true; } else if( strict_almost_equal(temp, dots[i].y) == true ) { res = i; *x_ins = false; } } i++; } if(res != -1) { if( is_tandem(dots[res]) == true ) *t_ins = true; else *t_ins = false; } } return( res); }
/* when two alignments have an overlapped region */ struct gap_list define_gap_new_type(struct DotList *dots, int loc_id, int comp_id, bool is_x) { struct gap_list gp; struct I temp; int len_x, len_y; gp.id1 = loc_id; gp.id2 = comp_id; gp.type = -1; gp.x1 = 0; gp.x2 = 1; gp.y1 = 0; gp.y2 = 1; gp.offset = 0; if( is_x == true ) // the overlap of x region is larger than y's { if( proper_overlap(dots[loc_id].x, dots[comp_id].x) == true ) { temp = intersect(dots[loc_id].x, dots[comp_id].x); gp.type = 21; // the gap is in y side if( dots[loc_id].y.lower <= dots[comp_id].y.lower ) { gp.y1 = dots[loc_id].y.upper; gp.y2 = dots[comp_id].y.lower + width(temp); len_x = width(dots[comp_id].x); len_y = width(dots[comp_id].y); gp.offset = len_y - len_x; if( dots[loc_id].sign == 0 ) { gp.x1 = dots[loc_id].x.upper; gp.x2 = gp.x1 + 1; } else if( dots[loc_id].sign == 1 ) { gp.x1 = dots[loc_id].x.lower; gp.x2 = gp.x1 + 1; } else gp.type = -1; } else { gp.y1 = dots[comp_id].y.upper; gp.y2 = dots[loc_id].y.lower + width(temp); len_x = width(dots[loc_id].x); len_y = width(dots[loc_id].y); gp.offset = len_y - len_x; if( dots[comp_id].sign == 0 ) { gp.x1 = dots[comp_id].x.upper; gp.x2 = gp.x1 + 1; } else if( dots[comp_id].sign == 1 ) { gp.x1 = dots[comp_id].x.lower; gp.x2 = gp.x1 + 1; } else gp.type = -1; } } else { gp.type = -1; } } else { if( proper_overlap(dots[loc_id].y, dots[comp_id].y) == true ) { temp = intersect(dots[loc_id].y, dots[comp_id].y); gp.type = 22; // the gap is in x side if( dots[loc_id].x.lower <= dots[comp_id].x.lower ) { gp.y1 = dots[loc_id].x.upper; gp.y2 = dots[comp_id].x.lower + width(temp); len_x = width(dots[comp_id].x); len_y = width(dots[comp_id].y); gp.offset = len_x - len_y; if( dots[loc_id].sign == 0 ) { gp.x1 = dots[loc_id].y.upper; gp.x2 = gp.x1 + 1; } else if( dots[loc_id].sign == 1 ) { gp.x1 = dots[loc_id].y.lower; gp.x2 = gp.x1 + 1; } else gp.type = -1; } else { gp.y1 = dots[comp_id].x.upper; gp.y2 = dots[loc_id].x.lower + width(temp); len_x = width(dots[loc_id].x); len_y = width(dots[loc_id].y); gp.offset = len_x - len_y; } } else { gp.type = -1; } } if( (gp.type != -1) && (gp.y2 <= gp.y1) ) { gp.type = -1; } if( gp.type != -1 ) { temp = assign_I(gp.y1, gp.y2); if( ( strict_almost_equal(temp, dots[comp_id].x) == true ) || ( strict_almost_equal(temp, dots[comp_id].y) == true ) || ( strict_almost_equal(temp, dots[loc_id].x) == true ) || (strict_almost_equal(temp, dots[loc_id].y) == true )) { gp.type = -1; } } return(gp); }
int main(int argc, char *argv[]) { FILE *f; char buf[1000]; int i, j; int num_genes = 0; int num_exons = 0; struct g_list *genes; struct exons_list *exons; int b, e; char name[100], scf_name[100]; int cur_exons_count; if( argc != 2 ) { printf("sort_exons exons_file\n"); return EXIT_FAILURE; } strcpy(name, ""); strcpy(scf_name, ""); f = fopen(argv[1], "r"); while(fgets(buf, 1000, f)) { if( buf[0] == '#' ) {} else if((buf[0] == '>') || (buf[0] == '<')) num_genes++; else num_exons++; } if( num_genes > 0 ) { genes = (struct g_list *) ckalloc(sizeof(struct g_list) * num_genes); } else { genes = (struct g_list *) ckalloc(sizeof(struct g_list)); } if( num_exons > 0 ) { exons = (struct exons_list *) ckalloc(sizeof(struct exons_list) * num_exons); } else { exons = (struct exons_list *) ckalloc(sizeof(struct exons_list)); } fseek(f, 0, SEEK_SET); i = -1; j = 0; while(fgets(buf, 1000, f)) { if( buf[0] == '#' ) {} else if( (buf[0] == '>') || (buf[0] == '<') ) { if( i >= 0 ) { genes[i].exonCount = cur_exons_count; genes[i].exEnd = j-1; } i++; cur_exons_count = 0; if(buf[0] == '>') genes[i].strand = '+'; else if(buf[0] == '<' ) genes[i].strand = '-'; else fatalf("unexpected strand %c\n", buf[0]); if( sscanf(buf, "%*s %d %d %s %s %*s", &b, &e, name, scf_name) == 4 ) { strcpy(genes[i].sname, scf_name); } else if( sscanf(buf, "%*s %d %d %s %*s", &b, &e, name) != 3 ) { printf("wrong format in %s\n", buf); } else strcpy(genes[i].sname, ""); genes[i].gid = i; genes[i].txStart = b; genes[i].txEnd = e; strcpy(genes[i].gname, name); } else { sscanf(buf, "%d %d", &b, &e); if( cur_exons_count == 0 ) genes[i].exStart = j; exons[j].fid = i; exons[j].reg = assign_I(b, e); cur_exons_count++; j++; } } genes[i].exonCount = cur_exons_count; genes[i].exEnd = j-1; quick_sort_inc_genes(genes, 0, num_genes-1, POS_BASE); i = 0; while( i < num_genes ) { j = 0; while( ((i+j) < num_genes) && (genes[i].txStart == genes[i+j].txStart )) j++; quick_sort_dec_genes(genes, i, i+j-1, LEN_BASE); i = i+j; } for( i = 0; i < num_genes; i++ ) { if( genes[i].txStart < 0 ) {} else { if( genes[i].strand == '+' ) { if( strcmp(genes[i].sname, "") == 0 ) { printf("> %d %d %s\n", genes[i].txStart, genes[i].txEnd, genes[i].gname); } else { printf("> %d %d %s %s\n", genes[i].txStart, genes[i].txEnd, genes[i].gname, genes[i].sname); } } else if( genes[i].strand == '-' ) { if( strcmp(genes[i].sname, "") == 0 ) { printf("< %d %d %s (complement)\n", genes[i].txStart, genes[i].txEnd, genes[i].gname); } else { printf("< %d %d %s %s (complement)\n", genes[i].txStart, genes[i].txEnd, genes[i].gname, genes[i].sname); } } else fatalf("unexpected strand %c\n", genes[i].strand); for( j = genes[i].exStart; j <= genes[i].exEnd; j++ ) { printf("%d %d\n", exons[j].reg.lower, exons[j].reg.upper); } } } fclose(f); free(genes); free(exons); return EXIT_SUCCESS; }
void pred_dup(int con, char op_ch, int pred_op, bool is_x_to_y, int id, int *num_list, struct DotList *dots, int num_ops, struct ops_list *ops) { int wide = 0; struct I from = {0, 1}, to = {0, 1}; int flag = DEL; int i = 0; int sp_id = dots[id].sp_id; sp_id = dots[id].sp_id; if((dots[id].l_id == -1) && (proper_overlap(dots[id].x, dots[id].y) == true) && (width(intersect(dots[id].x, dots[id].y)) <= THRESHOLD)) { dots[id].y = assign_I(dots[id].x.upper, dots[id].x.upper + width(dots[id].x)); } for( i = 0; i < *num_list; i++ ) { if( i != id ) { if( (dots[i].l_id != -1) && (dots[i].sign != 2) ) { dots[i].x = assign_I(dots[i].m_x.lower, dots[i].m_x.upper); dots[i].y = assign_I(dots[i].m_y.lower, dots[i].m_y.upper); dots[dots[i].l_id].sign = dots[i].sign; dots[i].l_id = -1; dots[i].identity = dots[i].m_pid; dots[i].m_x = assign_I(0,1); dots[i].m_y = assign_I(0,1); } } } if( dots[id].l_id != -1 ) { from = assign_I(dots[id].x.lower, dots[id].x.upper); to = assign_I(dots[id].y.lower, dots[id].y.upper); dots[id].sign = 2; flag = NONE; } else if( is_x_to_y ) { from = assign_I(dots[id].x.lower, dots[id].x.upper); to = assign_I(dots[id].y.lower, dots[id].y.upper); } else { from = assign_I(dots[id].y.lower, dots[id].y.upper); to = assign_I(dots[id].x.lower, dots[id].x.upper); } if( pred_op == 0 ) { wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots); } else if(pred_op == 2) { wide = rollback_step_dup_no_overlap(is_x_to_y, id, num_list, dots); } else if(pred_op == 3) { wide = rollback_step_dup_overlap(is_x_to_y, id, num_list, dots); } else if(pred_op == 4) { wide = rollback_step_conversion(is_x_to_y, id, num_list, dots); if( con > 0 ) wide = con; } else wide = 0; generate_ops(op_ch, wide, is_x_to_y, from, to, flag, num_ops, ops, sp_id); /* if( is_x_to_y ) { ops[num_ops].ctg_id1 = dots[id].ctg_id1; ops[num_ops].ctg_id2 = dots[id].ctg_id2; } else { ops[num_ops].ctg_id2 = dots[id].ctg_id1; ops[num_ops].ctg_id1 = dots[id].ctg_id2; } */ ops[num_ops].id = dots[id].index; }
int main(int argc, char *argv[]) { SEQ *sf; uchar *s; FILE *f; char buf[10000]; char head[MAX_LEN]; char cur[LEN_NAME], chr_name[LEN_NAME], annot[LEN_NAME], gname[LEN_NAME], filter[LEN_NAME]; int gid = -1; int rid = -1; int i = 0; int b = 0, e = 1, num_cds = 0; char dir[3]; struct exons_list *exons; char annot_name[LEN_NAME]; float qual = (float)0; char ref[LEN_NAME], alt[LEN_NAME]; int rest = 0; char codon[4], alt_codon[4]; char aa1 = '\0', aa2 = '\0'; int num_rmsk = 0; struct exons_list *rmsk; int num_snps = 0, num_pass = 0, num_filter = 0, num_coding1 = 0, num_syn1 = 0, num_non1 = 0, num_repeats1 = 0, num_coding_repeats1 = 0; int num_coding = 0, num_syn = 0, num_non = 0, num_repeats = 0, num_coding_repeats = 0; bool is_num_print = false; strcpy(buf, ""); strcpy(head, ""); strcpy(cur, ""); strcpy(chr_name, ""); strcpy(annot, ""); strcpy(gname, ""); strcpy(annot_name, ""); strcpy(ref, ""); strcpy(alt, ""); strcpy(codon, ""); strcpy(alt_codon, ""); strcpy(dir, ""); codon[3] = '\0'; alt_codon[3] = '\0'; if( argc != 7 ) { printf("link_to_annot vcf_file gff_file seq_file annot_type(exon, gene, ...) rmsk_file print_mode(NUM or SITES)\n"); return EXIT_FAILURE; } else { if(!(f = ckopen(argv[2], "r"))) { printf("no file %s exists\n", argv[2]); return EXIT_FAILURE; } strcpy(annot_name, argv[4]); if( strcmp(annot_name, "exon") != 0 ) { fatalf("seq file is required only when the annot type is exon, but %s here\n", annot_name); } sf = seq_get(argv[3]); s = SEQ_CHARS(sf) - 1; if( strcmp(argv[6], "NUM") == 0 ) { is_num_print = true; } else if( strcmp(argv[6], "SITES") == 0 ) { is_num_print = false; } else { fatalf("unsupported print option: %s\n", argv[6]); } } compl['a'] = compl['A'] = 'T'; compl['c'] = compl['C'] = 'G'; compl['g'] = compl['G'] = 'C'; compl['t'] = compl['T'] = 'A'; while(fgets(buf, 10000, f)) { if( (buf[0] == '#') || (buf[0] == '>') ) {} else if( sscanf(buf, "%*s %*s %s %d %d %*s", annot, &b, &e) != 3 ) { fatalf("line in wrong gff format: %s\n", buf); } else { if( strcmp(annot, annot_name) == 0 ) { num_cds++; } } } if( num_cds > 0 ) exons = (struct exons_list *) ckalloc(num_cds * sizeof(struct exons_list)); initialize_exons_list(exons, 0, num_cds); fseek(f, 0, SEEK_SET); i = 0; while(fgets(buf, 10000, f)) { if( (buf[0] == '#') || (buf[0] == '>') ) {} else if( sscanf(buf, "%s %*s %s %d %d %*s %s %*s %s", chr_name, annot, &b, &e, dir, cur) != 6 ) { fatalf("line in wrong gff format: %s\n", buf); } else { if( strcmp(annot, annot_name) == 0 ) { get_gene_name(cur, gname); strcpy(exons[i].name, gname); exons[i].reg = assign_I(b, e); exons[i].dir = dir[0]; strcpy(exons[i].chr, chr_name); i++; } } } if( i != num_cds ) { fatalf("%s counting error: %d - %d\n", annot_name, num_cds, i); } fclose(f); if(!(f = ckopen(argv[5], "r"))) { fatalf("%s file not found\n", argv[5]); } rmsk = 0; while(fgets(buf, 10000, f)) { if( (buf[0] == '#') || (buf[0] == '>') ) {} else if( sscanf(buf, "%*s %*s %s %d %d %*s", annot, &b, &e) != 3 ) { fatalf("line in wrong gff format: %s\n", buf); } else { num_rmsk++; } } if( num_rmsk > 0 ) rmsk = (struct exons_list *) ckalloc(num_rmsk * sizeof(struct exons_list)); initialize_exons_list(rmsk, 0, num_rmsk); fseek(f, 0, SEEK_SET); i = 0; while(fgets(buf, 10000, f)) { if( (buf[0] == '#') || (buf[0] == '>') ) {} else if( sscanf(buf, "%s %*s %s %d %d %*s %s %*s %s", chr_name, annot, &b, &e, dir, cur) != 6 ) { fatalf("line in wrong gff format: %s\n", buf); } else { strcpy(rmsk[i].name, annot); rmsk[i].reg = assign_I(b, e); rmsk[i].dir = dir[0]; strcpy(rmsk[i].chr, chr_name); i++; } } if( i != num_rmsk ) { fatalf("%s counting error: %d - %d\n", annot_name, num_cds, i); } fclose(f); if(!(f = ckopen(argv[1], "r"))) { printf("no file %s exists\n", argv[1]); return EXIT_FAILURE; } i = 0; while(fgets(buf, 10000, f)) { if( buf[0] != '#' ) { num_snps++; if( sscanf(buf, "%s %d %*s %s %s %f %s %*s", chr_name, &b, ref, alt, &qual, filter) != 6 ) { fatalf("bad format in %s\n", buf); } else { if( strstr(filter, "PASS") == 0 ) { num_pass++; } else if( strstr(filter, "filter") == 0 ) { num_filter++; } rid = -1; rid = find_overlap_gene(chr_name, b, rmsk, num_rmsk); if( rid != -1 ) { num_repeats++; if( strstr(filter, "filter") == 0 ) {} else if( strstr(filter, "PASS") == 0 ) { num_repeats1++; } else { fatalf("unexpected filter option: %s\n", filter); } } if( (gid = find_overlap_gene(chr_name, b, exons, num_cds)) != -1 ) { num_coding++; if( strstr(filter, "PASS") == 0 ) { num_coding1++; } if( ref[0] != s[b] ) { fatalf("nucleotides not match: %c - %c\n", alt, s[b]); } if( exons[gid].dir == '+' ) { rest = (b - exons[gid].reg.lower)%3; if( rest == 0 ) { sprintf(codon, "%c%c%c", s[b], s[b+1], s[b+2]); sprintf(alt_codon, "%c%c%c", alt[0], s[b+1], s[b+2]); } else if( rest == 1 ) { sprintf(codon, "%c%c%c", s[b-1], s[b], s[b+1]); sprintf(alt_codon, "%c%c%c", s[b-1], alt[0], s[b+1]); } else { sprintf(codon, "%c%c%c", s[b-2], s[b-1], s[b]); sprintf(alt_codon, "%c%c%c", s[b-2], s[b-1], alt[0]); } } else if( exons[gid].dir == '-' ) { rest = (b - exons[gid].reg.upper)%3; if( rest == 0 ) { sprintf(codon, "%c%c%c", compl[s[b]], compl[s[b-1]], compl[s[b-2]]); sprintf(alt_codon, "%c%c%c", compl[alt[0]], compl[s[b-1]], compl[s[b-2]]); } else if( rest == 1 ) { sprintf(codon, "%c%c%c", compl[s[b+1]], compl[s[b]], compl[s[b-1]]); sprintf(alt_codon, "%c%c%c", compl[s[b+1]], compl[alt[0]], compl[s[b-1]]); } else { sprintf(codon, "%c%c%c", compl[s[b+2]], compl[s[b+1]], compl[s[b]]); sprintf(alt_codon, "%c%c%c", compl[s[b+2]], compl[s[b+1]], compl[alt[0]]); } } else { fatalf("%c unsupported\n", exons[gid].dir); } aa1 = dna2oneaa(codon); aa2 = dna2oneaa(alt_codon); if( aa1 == aa2 ) { num_syn++; if( strstr(filter, "filter") == 0) { } else if( strstr(filter, "PASS") == 0 ) { num_syn1++; } else { fatalf("unexpected filter option: %s\n", filter); } } else { num_non++; if( strstr(filter, "filter") == 0) { } else if( strstr(filter, "PASS") == 0 ) { num_non1++; } else { fatalf("unexpected filter option: %s\n", filter); } } if( rid != -1 ) { num_coding_repeats++; if( strstr(filter, "PASS") == 0 ) { num_coding_repeats1++; } } if( is_num_print == false ) { if( rid == -1 ) { printf("%s\t%d\t%s\t%s\t%f\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t.\n", chr_name, b, ref, alt, qual, filter, exons[gid].name, exons[gid].reg.lower, exons[gid].reg.upper, exons[gid].dir, aa1, aa2); } else { printf("%s\t%d\t%s\t%s\t%f\t%s\t%s\t%d\t%d\t%c\t%c\t%c\t%s\n", chr_name, b, ref, alt, qual, filter, exons[gid].name, exons[gid].reg.lower, exons[gid].reg.upper, exons[gid].dir, aa1, aa2, rmsk[rid].name); } } } else { } } } } if( is_num_print == true ) { printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", chr_name, num_snps, num_pass, num_filter, num_coding, num_coding1, num_non, num_syn, num_non1, num_syn1, num_repeats, num_repeats1, num_coding_repeats, num_coding_repeats1); } if( num_cds > 0 ) { free(exons); } fclose(f); return EXIT_SUCCESS; }
void filter_gff_lists(struct g_list *genes1, int num_genes1, struct exons_list *exons1, int num_exons1, int type) { int i = 0, j = 0; struct I cur, tmp; int sid = 0, eid = 0; cur = assign_I(0, 1); tmp = assign_I(0, 1); if( type == SGD ) { for( i = 0; i < num_genes1; i++ ) { sid = genes1[i].cdsStart; eid = genes1[i].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[i].strand == '-' ) { cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd); } } // cur = assign_I(genes1[i].txStart, genes1[i].txEnd); // if( (width(cur) < MIN_ORF_BASES) && (strstr(genes1[i].info, "Dubious") != 0) ) if( (genes1[i].txStart <= 0) || (genes1[i].txEnd <= 0) ) { genes1[i].type = REDUN; } else if( width(cur) < MIN_ORF_BASES ) { genes1[i].type = REDUN; } else if( genes1[i].type == REDUN ) {} else { j = i+1; if( j < num_genes1 ) { sid = genes1[j].cdsStart; eid = genes1[j].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[j].strand == '-' ) { tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd); } } // tmp = assign_I(genes1[j].txStart, genes1[j].txEnd); } while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) { if( width(tmp) < MIN_ORF_BASES ) { genes1[j].type = REDUN; } else if( genes1[j].type == REDUN ) {} else { if( width(intersect(cur, tmp)) >= MIN_BASES ) { if( (strstr(genes1[i].info, "Verified") != 0) || (strstr(genes1[i].info, "Uncharacterized") != 0) ) { if(strstr(genes1[j].info, "Dubious") != 0 ) { // if( genes1[j].strand == genes1[i].strand ) { genes1[j].type = REDUN; // } } } else if( strstr(genes1[i].info, "Dubious") != 0 ) { if( (strstr(genes1[j].info, "Verified") != 0) || (strstr(genes1[j].info, "Uncharacterized") != 0) ) { // if( genes1[j].strand == genes1[i].strand ) { genes1[i].type = REDUN; // } } else if( strstr(genes1[j].info, "Dubious") != 0 ) { if(width(tmp) < width(cur)) { // if( genes1[j].strand == genes1[i].strand ) { genes1[j].type = REDUN; // } } else if(width(tmp) >= width(cur)) { // if( genes1[j].strand == genes1[i].strand ) { genes1[i].type = REDUN; // } } } } } } j++; if( j < num_genes1 ) { sid = genes1[j].cdsStart; eid = genes1[j].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[j].strand == '-' ) { tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd); } } // tmp = assign_I(genes1[j].txStart, genes1[j].txEnd); } } } } } else if( type == MAKER ) { for( i = 0; i < num_genes1; i++ ) { sid = genes1[i].cdsStart; eid = genes1[i].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { cur = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[i].strand == '-' ) { cur = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[i].gname, genes1[i].sname, genes1[i].txStart, genes1[i].txEnd); } } // cur = assign_I(genes1[i].txStart, genes1[i].txEnd); // if( (width(cur) < MIN_ORF_BASES) && (strcmp(genes1[i].gname, "UNDEF") == 0) ) if( (genes1[i].type == REDUN) || (genes1[i].type == MATCH) || (genes1[i].type == PARTIAL) ) { genes1[i].type = REDUN; } else if( width(cur) < MIN_ORF_BASES ) { genes1[i].type = REDUN; } else { j = i+1; if( j < num_genes1 ) { sid = genes1[j].cdsStart; eid = genes1[j].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[j].strand == '-' ) { tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd); } } // tmp = assign_I(genes1[j].txStart, genes1[j].txEnd); } while( (j < num_genes1) && (proper_overlap(cur, tmp) == true) ) { if( (genes1[j].type == REDUN) || (genes1[j].type == MATCH) || (genes1[j].type == PARTIAL) ) { genes1[j].type = REDUN; } else if( width(cur) < MIN_ORF_BASES ) { genes1[j].type = REDUN; } else { // if( (width(intersect(cur, tmp)) >= MIN_BASES) && (genes1[i].strand == genes1[j].strand) ) { if( width(intersect(cur, tmp)) >= MIN_BASES ) { if(width(tmp) < width(cur)) { genes1[j].type = REDUN; } else if(width(tmp) >= width(cur)) { genes1[i].type = REDUN; } } } j++; if( j < num_genes1 ) { sid = genes1[j].cdsStart; eid = genes1[j].cdsEnd; if( exons1[sid].reg.lower < exons1[eid].reg.upper ) { tmp = assign_I(exons1[sid].reg.lower, exons1[eid].reg.upper); } else { if( genes1[j].strand == '-' ) { tmp = assign_I(exons1[eid].reg.lower, exons1[sid].reg.upper); } else { fatalf("check exons list for %s,%s:%d-%d\n", genes1[j].gname, genes1[j].sname, genes1[i].txStart, genes1[j].txEnd); } } // tmp = assign_I(genes1[j].txStart, genes1[j].txEnd); } } } } } else if ( type == MULTI_CDS ) { for( i = 0; i < num_genes1; i++ ) { if( genes1[i].exonCount >= 2 ) { } else { genes1[i].type = REDUN; } } } else { fatalf("Unsupported type: %d\n", type); } }
void adjust_algn_pos(struct DotList *algns, int num_algns, struct n_pair *contigs1, int num1, int *size1, struct n_pair *contigs2, int num2, int *size2, int mode) { int *len_sum1, *len_sum2; int i = 0; int id1 = 0, id2 = 0; char name[LEN_NAME] = "", sp_name[LEN_NAME] = "", ctg_name[LEN_NAME] = ""; int ctg_id = -1; if( num1 > 0 ) len_sum1 = (int *) ckalloc(sizeof(int) * num1); if( num2 > 0 ) len_sum2 = (int *) ckalloc(sizeof(int) * num2); if( mode == CTG_NOT_ASSIGNED_BUT_LEN ) { for( i = 0; i < num1; i++ ) len_sum1[i] = contigs1[i].len; for( i = 0; i < num2; i++ ) len_sum2[i] = contigs2[i].len; } else { cal_length_sum(len_sum1, contigs1, num1); cal_length_sum(len_sum2, contigs2, num2); } for( i = 0; i < num_algns; i++ ) { if( (mode == CTG_NOT_ASSIGNED) || (mode == CTG_NOT_ASSIGNED_BUT_LEN) ) { strcpy(name, algns[i].name1); if( algns[i].sp_id == SELF2 ) { concat_ctg_name(name, sp_name, ctg_name); ctg_id = is_ctg_in(sp_name, ctg_name, contigs2, num2); } else { concat_ctg_name(name, sp_name, ctg_name); ctg_id = is_ctg_in(sp_name, ctg_name, contigs1, num1); } if( ctg_id == -1 ) { fatalf("Contig %s not assigned in the list\n", ctg_name); } else { algns[i].ctg_id1 = ctg_id; } strcpy(name, algns[i].name2); if( algns[i].sp_id == SELF1 ) { concat_ctg_name(name, sp_name, ctg_name); ctg_id = is_ctg_in(sp_name, ctg_name, contigs1, num1); } else { concat_ctg_name(name, sp_name, ctg_name); ctg_id = is_ctg_in(sp_name, ctg_name, contigs2, num2); } if( ctg_id == -1 ) { fatalf("Contig %s not assigned in the list\n", ctg_name); } else { algns[i].ctg_id2 = ctg_id; } } if( algns[i].sp_id == SELF1 ) { id1 = algns[i].ctg_id1; if( id1 >= num1 ) fatalf("%d: not valid, larger than %d\n", id1, num1); if( (id1 == -1) && (num1 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum1[id1], algns[i].x.upper + len_sum1[id1]); id2 = algns[i].ctg_id2; if( id2 >= num1 ) { fatalf("%d: not valid, larger than %d\n", id2, num1); } if( (id2 == -1) && (num1 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum1[id2], algns[i].y.upper + len_sum1[id2]); } else if( algns[i].sp_id == SELF2 ) { id1 = algns[i].ctg_id1; if( id1 >= num2 ) { fatalf("%d: not valid, larger than %d\n", id1, num2); } if( (id1 == -1) && (num2 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum2[id1], algns[i].x.upper + len_sum2[id1]); id2 = algns[i].ctg_id2; if( id2 >= num2 ) { fatalf("%d: not valid, larger than %d\n", id2, num2); } if( (id2 == -1) && (num2 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum2[id2], algns[i].y.upper + len_sum2[id2]); } else if( algns[i].sp_id == PAIR ) { id1 = algns[i].ctg_id1; if( id1 >= num1 ) { fatalf("%d: not valid, larger than %d\n", id1, num1); } if( (id1 == -1) && (num1 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id1 != -1 ) algns[i].x = assign_I(algns[i].x.lower + len_sum1[id1], algns[i].x.upper + len_sum1[id1]); id2 = algns[i].ctg_id2; if( id2 >= num2 ) { fatalf("%d: not valid, larger than %d\n", id2, num2); } if( (id2 == -1) && (num2 > 0) ) { fatalf("wrong contig num assigned: %s - %s in read_maf.c\n", algns[i].name1, algns[i].name2); } if( id2 != -1 ) algns[i].y = assign_I(algns[i].y.lower + len_sum2[id2], algns[i].y.upper + len_sum2[id2]); } } if( num1 > 0 ) { *size1 = len_sum1[num1-1] + contigs1[num1-1].len; free(len_sum1); } if( num2 > 0 ) { *size2 = len_sum2[num2-1] + contigs2[num2-1].len; free(len_sum2); } }
int main(int argc, char **argv) { FILE *f; int i = 0; int count = 0; int num_match_regions = 0; struct orf_I * match_regions; char scaf_name[MAX_NAME], cur_name[MAX_NAME]; char buf[MAX_NAME]; struct I reg; int b = 0, e = 0; reg = assign_I(0, 1); debug_mode = FALSE; if( argc == 4 ) { debug_mode = TRUE; } else if( argc != 3 ) { fatal("args: intervals1 intervals2\n"); } strcpy(buf, ""); strcpy(scaf_name, ""); strcpy(cur_name, ""); if( (f = fopen(argv[1], "r")) == NULL ) { fatalf("cannot find alignment in %s", argv[1]); } else { while(fgets(buf, MAX_NAME, f)) count++; } if( count > 0 ) { match_regions = (struct orf_I *) ckalloc(count * (sizeof(struct orf_I)) ); initialize_orf_I_list(match_regions, count); num_match_regions = input_orf_I_list(f, match_regions, count); } fclose(f); count = 0; if( (f = fopen(argv[2], "r")) == NULL ) { fatalf("cannot find alignment in %s", argv[2]); } else { while(fgets(buf, MAX_NAME, f)) { if( buf[0] == '>' ) { printf("%s", buf); } else { if( sscanf(buf, "%s %d %d %s %*s", scaf_name, &b, &e, cur_name) != 4 ) { fatalf("wrong interval line: %s", buf); } else { i = 0; reg = assign_I(b, e); while( i < num_match_regions ) { if( strcmp(cur_name, match_regions[i].strain_name) == 0 ) { if( strcmp(scaf_name, match_regions[i].name) == 0 ) { if( proper_overlap(reg, match_regions[i].region) == true ) { printf("%s %d %d %s\n", match_regions[i].name, match_regions[i].region.lower, match_regions[i].region.upper, match_regions[i].strain_name); } } } i++; } } } } } if( count > 0 ) { free(match_regions); } return EXIT_SUCCESS; }
void convert_tandem_region(struct DotList *dots, int num, int id, int *t_list, int num_tandem) { int i; int cur_id, cmp_id; struct DotList t1, t2; int len_x, len_y; int cur_len = 0; int val_t1, val_t2; for( i = 0; i < num_tandem; i++ ) { val_t1 = -1; val_t2 = -1; t1.x = assign_I(-1, 0); t2.x = assign_I(-1, 0); t1.y = assign_I(-1, 0); t2.y = assign_I(-1, 0); cmp_id = t_list[i]; if( i == 0 ) cur_id = id; else cur_id = t_list[i-1]; if( ( strict_almost_equal( dots[cmp_id].x, dots[cur_id].x ) == true ) || ( strict_almost_equal( dots[cmp_id].y, dots[cur_id].y) == true ) ) {} else if( ( strict_subset( dots[cmp_id].x, dots[cur_id].x ) == true ) && ( strict_subset( dots[cmp_id].y, dots[cur_id].y ) == true ) ) { if( abs(dots[cur_id].x.upper - dots[cmp_id].x.upper) > abs(dots[cur_id].x.lower - dots[cmp_id].x.lower) ) { if( ( dots[cur_id].x.upper - dots[cmp_id].x.upper ) <= 0 ) t1.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t1.x = assign_I(dots[cmp_id].x.upper, dots[cur_id].x.upper); cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x)); if( cur_len < DEL_TH ) { t1.x = assign_I(-1, 0); t1.y = assign_I(-1, 0); } else if( dots[cur_id].sign == 0 ) { if( dots[cur_id].y.upper > (dots[cur_id].y.upper - cur_len)) t1.y = assign_I(dots[cur_id].y.upper - cur_len, dots[cur_id].y.upper); else t1.x = assign_I(-1, 0); } else if( dots[cur_id].sign == 1 ) { if( (dots[cur_id].y.lower + cur_len) > dots[cur_id].y.lower ) t1.y = assign_I(dots[cur_id].y.lower, dots[cur_id].y.lower + cur_len); else t1.x = assign_I(-1, 0); } } } else { if( ( dots[cur_id].x.lower - dots[cmp_id].x.lower ) >= 0 ) t1.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t1.x = assign_I(dots[cur_id].x.lower, dots[cmp_id].x.lower); cur_len = (int)(((float)(width(t1.x)) * ((float)len_y)/(float)len_x)); if( cur_len < DEL_TH ) { t1.x = assign_I(-1, 0); t1.y = assign_I(-1, 0); } else if( dots[cur_id].sign == 0 ) { if( (dots[cur_id].y.lower + cur_len) > dots[cur_id].y.lower ) t1.y = assign_I(dots[cur_id].y.lower, dots[cur_id].y.lower + cur_len); else t1.x = assign_I(-1, 0); } else if( dots[cur_id].sign == 1 ) { if( dots[cur_id].y.upper > dots[cur_id].y.upper ) t1.y = assign_I(dots[cur_id].y.upper - cur_len, dots[cur_id].y.upper); else t1.x = assign_I(-1, 0); } } } if( abs(dots[cmp_id].y.lower - dots[cur_id].y.lower) > abs(dots[cur_id].y.upper - dots[cmp_id].y.upper) ) { if( ( dots[cmp_id].y.lower - dots[cur_id].y.lower ) <= 0 ) t2.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t2.y = assign_I(dots[cur_id].y.lower, dots[cmp_id].y.lower); cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y)); if( cur_len < DEL_TH ) { t2.x = assign_I(-1, 0); t2.y = assign_I(-1, 0); } else if( dots[cur_id].sign == 0 ) { if( (dots[cur_id].x.lower + cur_len) > dots[cur_id].x.lower ) t2.x = assign_I(dots[cur_id].x.lower, dots[cur_id].x.lower + cur_len); else t2.x = assign_I(-1, 0); } else if( dots[cur_id].sign == 1 ) { if( dots[cur_id].x.upper > (dots[cur_id].x.upper - cur_len) ) t2.x = assign_I(dots[cur_id].x.upper - cur_len, dots[cur_id].x.upper); else t2.x = assign_I(-1, 0); } } } else { if( ( dots[cur_id].y.upper - dots[cmp_id].y.upper ) <= 0 ) t2.x = assign_I(-1, 0); else { len_x = width(dots[cur_id].x); len_y = width(dots[cur_id].y); t2.y = assign_I(dots[cmp_id].y.upper, dots[cur_id].y.upper); cur_len = (int)(((float)(width(t2.y)) * ((float)len_x)/(float)len_y)); if( cur_len < DEL_TH ) { t2.x = assign_I(-1, 0); t2.y = assign_I(-1, 0); } else if( dots[cur_id].sign == 0 ) { if( dots[cur_id].x.upper > (dots[cur_id].x.upper - cur_len) ) t2.x = assign_I(dots[cur_id].x.upper - cur_len, dots[cur_id].x.upper); else t2.x = assign_I(-1, 0); } else if( dots[cur_id].sign == 1 ) { if( (dots[cur_id].x.lower + cur_len) > dots[cur_id].x.lower ) t2.x = assign_I(dots[cur_id].x.lower, dots[cur_id].x.lower + cur_len); else t2.x = assign_I(-1, 0); } } } } if( (t1.x.lower >= 0) && (t1.y.lower >= 0)) val_t1 = check_tandem_reg( t1, dots, num ); else val_t1 = -1; if( (t2.x.lower >= 0) && (t2.y.lower >= 0) ) val_t2 = check_tandem_reg( t2, dots, num ); else val_t2 = -1; if( (val_t1 != -1) && (val_t2 != -1) ) { if( val_t1 <= val_t2 ) { dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper); dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper); dots[cur_id].rp1_id = 0; } else { dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper); dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper); dots[cur_id].rp1_id = 0; } } else if( val_t1 != -1 ) { dots[cur_id].x = assign_I(t1.x.lower, t1.x.upper); dots[cur_id].y = assign_I(t1.y.lower, t1.y.upper); dots[cur_id].rp1_id = 0; } else if( val_t2 != -1 ) { dots[cur_id].x = assign_I(t2.x.lower, t2.x.upper); dots[cur_id].y = assign_I(t2.y.lower, t2.y.upper); dots[cur_id].rp1_id = 0; } } }
bool check_whole_regions_inclusion(struct DotList *dots, int num_lines, int mid, int left_id, int right_id, bool is_x) { int i; bool res = false; struct I temp; if( is_x == true ) temp = assign_I(dots[mid].x.lower, dots[mid].x.upper); else temp = assign_I(dots[mid].y.lower, dots[mid].y.upper); for( i = 0; i < num_lines; i++ ) { if( is_x == true ) { if( strict_overlap(dots[left_id].y, dots[right_id].y, (M_TH/2)+1) == true ) { if( (loose_subset(dots[mid].x, dots[i].x) == true) && (loose_subset(dots[left_id].x, dots[i].x) == true) && (loose_subset(dots[right_id].x, dots[i].x) == true)) { res = true; } else if( (loose_subset(dots[mid].x, dots[i].y) == true) && (loose_subset(dots[left_id].x, dots[i].y) == true) && (loose_subset(dots[right_id].x, dots[i].y) == true)) { res = true; } } else if( strict_overlap(dots[left_id].x, dots[right_id].x, (M_TH/2)+1) == true ) { if( (loose_subset(dots[mid].x, dots[i].x) == true) && (loose_subset(dots[left_id].y, dots[i].x) == true) && (loose_subset(dots[right_id].y, dots[i].x) == true)) { res = true; } else if( (loose_subset(dots[mid].x, dots[i].y) == true) && (loose_subset(dots[left_id].y, dots[i].y) == true) && (loose_subset(dots[right_id].y, dots[i].y) == true)) { res = true; } } else { } } else { if( strict_overlap(dots[left_id].x, dots[right_id].x, (M_TH/2)+1) == true ) { if( (loose_subset(dots[mid].y, dots[i].x) == true) && (loose_subset(dots[left_id].y, dots[i].x) == true) && (loose_subset(dots[right_id].y, dots[i].x) == true)) { res = true; } else if( (loose_subset(dots[mid].y, dots[i].y) == true) && (loose_subset(dots[left_id].y, dots[i].y) == true) && (loose_subset(dots[right_id].y, dots[i].y) == true)) { res = true; } } else if( strict_overlap(dots[left_id].y, dots[right_id].y, (M_TH/2)+1) == true ) { if( (loose_subset(dots[mid].y, dots[i].x) == true) && (loose_subset(dots[left_id].x, dots[i].x) == true) && (loose_subset(dots[right_id].x, dots[i].x) == true)) { res = true; } else if( (loose_subset(dots[mid].y, dots[i].y) == true) && (loose_subset(dots[left_id].x, dots[i].y) == true) && (loose_subset(dots[right_id].x, dots[i].y) == true)) { res = true; } } else { } } } return(res); }