Exemple #1
0
Fichier : poly.c Projet : gitpan/GH
static void trim_polyA_align(struct edit_script_list **Sptr, Exon *lblock, Exon **exons, const int bc, int *pA, uchar *s1,uchar *s2) 
{
    edit_script_list *head = *Sptr;
    edit_script *tp;
    int tmpi = 0, num, idents = 0;
    uchar *a, *b;
    Exon *prev;

    int i, j;  /* i index in the cDNA */

    if (bc>head->offset2+head->len2-1) {
        *pA = bc;
        return;
    }

    if (bc==head->offset2) {
        /* cDNA gap: remove the entire script; this should be properly sorted */
        *Sptr = head->next_script;
        Free_script(head->script);
        free(head);
        while ((*exons)->from2>=bc) {
           prev = find_previous(lblock,*exons);
           prev->next_exon = (*exons)->next_exon;
           free(*exons); *exons = prev;
        }
        *pA = bc;
        return;
    }

    Flip_script(&(head->script));
    i = head->offset2 + head->len2 -1;
    j = head->offset1 + head->len1 -1;
    tp = head->script;

    while (i>=bc && tp) {
       num = tp->num;
       switch (tp->op_type) {
          case INSERT:
                   if (i>=bc && bc>i-num+1) {
                       tmpi += i-bc+1; tp->num -= i-bc+1; i = bc-1;
                   } else {
                       i -= num; tmpi += num; head->script = tp->next;
                       free(tp); tp = head->script;
                   }
                   break;
          case DELETE:
                   j -= num; tmpi += num; head->script = tp->next;
                   free(tp); tp = head->script;
                   break;
          case SUBSTITUTE:
                   if (i>=bc && bc>i-num+1) {
                       a = s2+i-1; b = s1+j-1;
                       while (a>=s2+bc-1) {
                          if (*a--!=*b--) tmpi++; else idents++;
                       }
                       j -= i-bc+1; tp->num -= i-bc+1; i = bc-1;
                   } else {
                       /* at most 1 nt remaining */
                       a = s2+i-1; b = s1+j-1;
                       while (a>=s2+i-num) {
                          if (*a--!=*b--) tmpi++; else idents++;
                       }

                       i -= num; j -= num;
                       head->script = tp->next;
                       free(tp); tp = head->script;
                   }
                   break;
          default: fatalf("Unrecognized opcode %d.\n",tp->op_type);
       }
       /* indel walk */
    }
    assert(i==bc-1);

    while (tp->op_type!=SUBSTITUTE && j+1>=(*exons)->from1) {
       if (tp->op_type==INSERT) {
           i -= tp->num; tmpi += tp->num;
       } else if (j<(*exons)->from1 && i<(*exons)->from2) {
           j -= tp->num;
       } else {
           j -= tp->num; tmpi += tp->num;
       }
       head->script = tp->next;
       free(tp); tp = head->script;
    }

    if (head->script==NULL) {
        *Sptr = head->next_script;
        free(head);
    } else {
        head->len1 = j-head->offset1+1;
        head->len2 = i-head->offset2+1;
        head->score -= tmpi;
        Flip_script(&(head->script));
    }

    if ((*exons)->from2>i) {
        prev = find_previous(lblock,*exons);
        prev->next_exon = (*exons)->next_exon;
        free(*exons); *exons = prev;
    } else {
        double tmp_matches;
        (*exons)->to2 = i;
        (*exons)->to1 = j;
        (*exons)->length = (*exons)->to2-(*exons)->from2+1;
        tmp_matches = (*exons)->nmatches - idents;
        (*exons)->alen -= tmpi+idents;
        (*exons)->match = (int)(100*tmp_matches/(*exons)->alen);
    }
    *pA = i+1;

    return;
}
Exemple #2
0
void
Sim4::trim_polyA_align(struct edit_script_list **Sptr, Exon *lblock, Exon **exons, const int bc, int *pA, char *s1,char *s2) 
{
  edit_script_list *head = *Sptr;
  edit_script *tp;
  int tmpi = 0, num, idents = 0, identsN = 0;
  char *a, *b;
  Exon *prev;

  int i, j;  /* i index in the cDNA */

  if (bc>head->offset2+head->len2-1) {
    *pA = bc;
    return;
  }

  if (bc==head->offset2) {
    /* cDNA gap: remove the entire script; is this properly sorted? LLL */
    *Sptr = head->next_script;
    Free_script(head->script);
    ckfree(head);
    while ((*exons)->frEST>=bc) {
      prev = find_previous(lblock,*exons);

      if (prev == 0L) {
        fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (remove entire script).\n");
        for (; lblock; lblock = lblock->next_exon)
          fprintf(stderr, "  GEN f=%8d t=%8d  EST f=%8d t=%8d   flag=%d\n",
                  lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag);
        kill(getpid(), SIGKILL);
      }

      prev->next_exon = (*exons)->next_exon;
      //freeExon(*exons);  garbage collected
      *exons = prev;
    }
    *pA = bc;
    return;
  }

  Flip_script(&(head->script));
  i = head->offset2 + head->len2 -1;
  j = head->offset1 + head->len1 -1;
  tp = head->script;

  while (i>=bc && tp) {
    num = tp->num;
    switch (tp->op_type) {
      case INSERT:
        if (i>=bc && bc>i-num+1) {
          (*exons)->numInDel -= i - bc + 1;
          (*exons)->numEdits -= i - bc + 1;
          tmpi    += i-bc+1;
          tp->num -= i-bc+1;
          i        = bc-1;
        } else {
          (*exons)->numInDel -= num;
          (*exons)->numEdits -= num;
          tmpi += num;
          i    -= num;
          head->script = tp->next;
          ckfree(tp);
          tp = head->script;
        }
        break;
      case DELETE:
        (*exons)->numInDel -= num;
        (*exons)->numEdits -= num;
        j    -= num;
        tmpi += num;
        head->script = tp->next;
        ckfree(tp);
        tp = head->script;
        break;
      case SUBSTITUTE:
        if (i>=bc && bc>i-num+1) {
          a = s2+i-1; b = s1+j-1;
          while (a>=s2+bc-1) {
            if (*a != *b) {
              (*exons)->numEdits--;
              tmpi++;
            } else {
              if (*a == 'N') {
                (*exons)->numNs--;
                identsN++;
              } else {
                (*exons)->numMatches--;
                idents++;
              }
            }
            a--;
            b--;
          }
          j -= i-bc+1; tp->num -= i-bc+1; i = bc-1;
        } else {
          /* at most 1 nt remaining */
          a = s2+i-1; b = s1+j-1;
          while (a>=s2+i-num) {
            if (*a != *b) {
              (*exons)->numEdits--;
              tmpi++;
            } else {
              if (*a == 'N') {
                (*exons)->numNs--;
                identsN++;
              } else {
                (*exons)->numMatches--;
                idents++;
              }
            }
            a--;
            b--;
          }

          i -= num; j -= num;
          head->script = tp->next;
          ckfree(tp);
          tp = head->script;
        }
        break;
#if 0
      default:
        fatalf("Unrecognized opcode %d.\n",tp->op_type);
#endif
    }
    /* indel walk */
  }
  assert(i==bc-1);

  while ((tp != 0L) &&
         (tp->op_type != SUBSTITUTE) && (j+1 >= (*exons)->frGEN)) {
    if (tp->op_type==INSERT) {
      i -= tp->num;
      tmpi += tp->num;
      (*exons)->numInDel -= tp->num;
      (*exons)->numEdits -= tp->num;
    } else if (j<(*exons)->frGEN && i<(*exons)->frEST) {
      j -= tp->num;
    } else {
      j -= tp->num;
      tmpi += tp->num;
      (*exons)->numInDel -= tp->num;
      (*exons)->numEdits -= tp->num;
    }
    head->script = tp->next;
    ckfree(tp);
    tp = head->script;
  }

  if (head->script==NULL) {
    *Sptr = head->next_script;
    ckfree(head);
  } else {
    head->len1 = j-head->offset1+1;
    head->len2 = i-head->offset2+1;
    head->score -= tmpi;
    Flip_script(&(head->script));
  }

  if ((*exons)->frEST>i) {
    prev = find_previous(lblock,*exons);

    if (prev == 0L) {
      fprintf(stderr, "trim_polyA_align(): Corrupted exon list, cannot find the previous exon (frEST).\n");
      for (; lblock; lblock = lblock->next_exon)
        fprintf(stderr, "  GEN f=%8d t=%8d  EST f=%8d t=%8d   flag=%d\n",
                lblock->frGEN, lblock->toGEN, lblock->frEST, lblock->toEST, lblock->flag);
      kill(getpid(), SIGKILL);
    }

    prev->next_exon = (*exons)->next_exon;
    //freeExon(*exons);  garbage collected
    *exons = prev;
  } else {
    (*exons)->toEST = i;
    (*exons)->toGEN = j;
    (*exons)->length = (*exons)->toEST-(*exons)->frEST+1;

    (*exons)->alignmentLength = ((*exons)->toGEN - (*exons)->frGEN + 1 +
                                 (*exons)->toEST - (*exons)->frEST + 1 +
                                 (*exons)->numInDel);
    (*exons)->percentID   = computePercentIdentity((*exons)->numEdits,
                                                   (*exons)->alignmentLength);
  }
  *pA = i+1;

  return;
}