static void trim_polyA_align(struct edit_script_list **Sptr, Exon *lblock, Exon **exons, const int bc, int *pA, uchar *s1,uchar *s2) { edit_script_list *head = *Sptr; edit_script *tp; int tmpi = 0, num, idents = 0; uchar *a, *b; Exon *prev; int i, j; /* i index in the cDNA */ if (bc>head->offset2+head->len2-1) { *pA = bc; return; } if (bc==head->offset2) { /* cDNA gap: remove the entire script; this should be properly sorted */ *Sptr = head->next_script; Free_script(head->script); free(head); while ((*exons)->from2>=bc) { prev = find_previous(lblock,*exons); prev->next_exon = (*exons)->next_exon; free(*exons); *exons = prev; } *pA = bc; return; } Flip_script(&(head->script)); i = head->offset2 + head->len2 -1; j = head->offset1 + head->len1 -1; tp = head->script; while (i>=bc && tp) { num = tp->num; switch (tp->op_type) { case INSERT: if (i>=bc && bc>i-num+1) { tmpi += i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { i -= num; tmpi += num; head->script = tp->next; free(tp); tp = head->script; } break; case DELETE: j -= num; tmpi += num; head->script = tp->next; free(tp); tp = head->script; break; case SUBSTITUTE: if (i>=bc && bc>i-num+1) { a = s2+i-1; b = s1+j-1; while (a>=s2+bc-1) { if (*a--!=*b--) tmpi++; else idents++; } j -= i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { /* at most 1 nt remaining */ a = s2+i-1; b = s1+j-1; while (a>=s2+i-num) { if (*a--!=*b--) tmpi++; else idents++; } i -= num; j -= num; head->script = tp->next; free(tp); tp = head->script; } break; default: fatalf("Unrecognized opcode %d.\n",tp->op_type); } /* indel walk */ } assert(i==bc-1); while (tp->op_type!=SUBSTITUTE && j+1>=(*exons)->from1) { if (tp->op_type==INSERT) { i -= tp->num; tmpi += tp->num; } else if (j<(*exons)->from1 && i<(*exons)->from2) { j -= tp->num; } else { j -= tp->num; tmpi += tp->num; } head->script = tp->next; free(tp); tp = head->script; } if (head->script==NULL) { *Sptr = head->next_script; free(head); } else { head->len1 = j-head->offset1+1; head->len2 = i-head->offset2+1; head->score -= tmpi; Flip_script(&(head->script)); } if ((*exons)->from2>i) { prev = find_previous(lblock,*exons); prev->next_exon = (*exons)->next_exon; free(*exons); *exons = prev; } else { double tmp_matches; (*exons)->to2 = i; (*exons)->to1 = j; (*exons)->length = (*exons)->to2-(*exons)->from2+1; tmp_matches = (*exons)->nmatches - idents; (*exons)->alen -= tmpi+idents; (*exons)->match = (int)(100*tmp_matches/(*exons)->alen); } *pA = i+1; return; }
void Sim4::trim_polyA_align(struct edit_script_list **Sptr, Exon *lblock, Exon **exons, const int bc, int *pA, char *s1,char *s2) { edit_script_list *head = *Sptr; edit_script *tp; int tmpi = 0, num, idents = 0, identsN = 0; char *a, *b; Exon *prev; int i, j; /* i index in the cDNA */ if (bc>head->offset2+head->len2-1) { *pA = bc; return; } if (bc==head->offset2) { /* cDNA gap: remove the entire script; is this properly sorted? LLL */ *Sptr = head->next_script; Free_script(head->script); ckfree(head); while ((*exons)->frEST>=bc) { prev = find_previous(lblock,*exons); if (prev == 0L) { fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (remove entire script).\n"); for (; lblock; lblock = lblock->next_exon) fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag); kill(getpid(), SIGKILL); } prev->next_exon = (*exons)->next_exon; //freeExon(*exons); garbage collected *exons = prev; } *pA = bc; return; } Flip_script(&(head->script)); i = head->offset2 + head->len2 -1; j = head->offset1 + head->len1 -1; tp = head->script; while (i>=bc && tp) { num = tp->num; switch (tp->op_type) { case INSERT: if (i>=bc && bc>i-num+1) { (*exons)->numInDel -= i - bc + 1; (*exons)->numEdits -= i - bc + 1; tmpi += i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { (*exons)->numInDel -= num; (*exons)->numEdits -= num; tmpi += num; i -= num; head->script = tp->next; ckfree(tp); tp = head->script; } break; case DELETE: (*exons)->numInDel -= num; (*exons)->numEdits -= num; j -= num; tmpi += num; head->script = tp->next; ckfree(tp); tp = head->script; break; case SUBSTITUTE: if (i>=bc && bc>i-num+1) { a = s2+i-1; b = s1+j-1; while (a>=s2+bc-1) { if (*a != *b) { (*exons)->numEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a--; b--; } j -= i-bc+1; tp->num -= i-bc+1; i = bc-1; } else { /* at most 1 nt remaining */ a = s2+i-1; b = s1+j-1; while (a>=s2+i-num) { if (*a != *b) { (*exons)->numEdits--; tmpi++; } else { if (*a == 'N') { (*exons)->numNs--; identsN++; } else { (*exons)->numMatches--; idents++; } } a--; b--; } i -= num; j -= num; head->script = tp->next; ckfree(tp); tp = head->script; } break; #if 0 default: fatalf("Unrecognized opcode %d.\n",tp->op_type); #endif } /* indel walk */ } assert(i==bc-1); while ((tp != 0L) && (tp->op_type != SUBSTITUTE) && (j+1 >= (*exons)->frGEN)) { if (tp->op_type==INSERT) { i -= tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } else if (j<(*exons)->frGEN && i<(*exons)->frEST) { j -= tp->num; } else { j -= tp->num; tmpi += tp->num; (*exons)->numInDel -= tp->num; (*exons)->numEdits -= tp->num; } head->script = tp->next; ckfree(tp); tp = head->script; } if (head->script==NULL) { *Sptr = head->next_script; ckfree(head); } else { head->len1 = j-head->offset1+1; head->len2 = i-head->offset2+1; head->score -= tmpi; Flip_script(&(head->script)); } if ((*exons)->frEST>i) { prev = find_previous(lblock,*exons); if (prev == 0L) { fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (frEST).\n"); for (; lblock; lblock = lblock->next_exon) fprintf(stderr, " GEN f=%8d t=%8d EST f=%8d t=%8d flag=%d\n", lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag); kill(getpid(), SIGKILL); } prev->next_exon = (*exons)->next_exon; //freeExon(*exons); garbage collected *exons = prev; } else { (*exons)->toEST = i; (*exons)->toGEN = j; (*exons)->length = (*exons)->toEST-(*exons)->frEST+1; (*exons)->alignmentLength = ((*exons)->toGEN - (*exons)->frGEN + 1 + (*exons)->toEST - (*exons)->frEST + 1 + (*exons)->numInDel); (*exons)->percentID = computePercentIdentity((*exons)->numEdits, (*exons)->alignmentLength); } *pA = i+1; return; }