Example #1
0
File: poly.c Project: gitpan/GH
void remove_poly(struct edit_script_list **Script, Exon *Exons, uchar *s1, uchar *s2, int len2, int *pT, int *pA)
{
     remove_polyT_front(Script, Exons, s1, s2, pT); 
     remove_polyA_back(Script, Exons, s1, s2, len2, pA);
     *pA = len2-(*pA)+1;
/*   printf("pT: %d pA: %d\n", *pT, *pA); */

     return;
}
Example #2
0
Sim4::edit_script_list *
Sim4::SIM4(int            *dist_ptr,
           Exon          **Exons,
           int            *pA,
           int            *pT,
           sim4_stats_t   *st) {

  int     rollbflag;
  Exon   *Lblock=0L, *tmp_Lblock=0L;
  Exon   *Rblock=0L, *tmp_Rblock=0L;
  Exon   *tmp_block=0L;
  Exon   *tmp_block1=0L;

  *dist_ptr = 0;
  *Exons    = 0L;
  *pA = 0;
  *pT = 0;

  //
  //  The call to exon_cores() that used to be here is now done in sim4string.
  //

  //  See if there are too many MSPs found.  If so, fail.
  //
  st->tooManyMSPs = false;
  if (_mspManager.tooManyMSPs()) {
    st->tooManyMSPs     = true;
    st->numberOfMatches = _mspManager.numberOfMSPs();
    return(0L);
  }

  PRINTEXONS("initial exon set\n", exon_list);

  tmp_block = Lblock = exon_list;
  while (tmp_block) {
    if (tmp_block->next_exon==NULL)
      Rblock = tmp_block;
    tmp_block = tmp_block->next_exon;
  }

  if (Lblock && 
      ((Lblock->frGEN>50000 && Lblock->frEST>100) || 
       ((_genLen - Rblock->toGEN > 50000) && (_estLen - Rblock->toEST > 100)))) {
    //freeExonList(exon_list);  garbage collected

    exon_list = _mspManager.doLinking(globalParams->_relinkWeight,
                                      DEFAULT_DRANGE,
                                      1,
                                      1,
                                      0,
                                      true,
                                      _genSeq, _estSeq);

    PRINTEXONS("relink the initial stuff\n", exon_list);

    tmp_block = Lblock = exon_list;
    while (tmp_block) {
      if (tmp_block->next_exon==NULL)
        Rblock = tmp_block;
      tmp_block = tmp_block->next_exon;
    }
  }
  _mspManager.clear();

  tmp_block = Lblock = exon_list;
  while (tmp_block) {
    if (tmp_block->next_exon==NULL) 
      Rblock = tmp_block;
    tmp_block = tmp_block->next_exon; 
  }

  PRINTEXONS("initial exon set after possibly relinking\n", exon_list);

  /* enclose the current path in the (0,0,0,0) and (M+1,N+1,0,0) brackets */

#ifdef SHOW_PROGRESS
  fprintf(stderr, "exon bracket at start\n");
#endif
  Lblock = _exonManager.newExon(0,0,0,0,0,0,0,Lblock);
  if (Rblock == NULL)
    Rblock = Lblock;
#ifdef SHOW_PROGRESS
  fprintf(stderr, "exon bracket at end; Lblock = 0x%08lx, Rblock = 0x%08lx\n", Lblock, Rblock);
#endif
  Rblock->next_exon = _exonManager.newExon(_genLen+1,_estLen+1,0,0,0,0,0,NULL); 

  PRINTEXONS("initial exon set after inserting brackets\n", Lblock);

  /* compute current statistics */
  bool good_match = get_match_quality(Lblock, Rblock, st, _estLen);


  PRINTEXONS("after get_match_quality\n", Lblock);


#ifdef SHOW_PROGRESS
  fprintf(stderr, "before big nasty while loop\n");
#endif


  tmp_block = Lblock;
  while ((tmp_block1 = tmp_block->next_exon)!=NULL) {

    PRINTEXONS("start of loop to fill in missing pieces\n", Lblock);

    rollbflag = 0;

    //  This is the distance from this exon to the next exon
    //  in the EST
    //
    int diff = (int)(tmp_block1->frEST - tmp_block->toEST - 1);

#ifdef SHOW_PROGRESS
    fprintf(stdout, "tmp_block: %8d %8d %8d %8d %d diff=%d\n",
            tmp_block->frGEN,
            tmp_block->toGEN,
            tmp_block->frEST,
            tmp_block->toEST,
            tmp_block->flag,
            diff);
#endif

    if (diff) {

      if (diff < 0) {
        //  If the diff is less than zero, then there is an overlap in
        //  the EST.  Wobble the boundary using GTAG signals (so
        //  obviously, this won't work correctly if we are not cDNA).
        //
#ifdef SHOW_PROGRESS
        fprintf(stderr, "Called SIM4_block1() with diff=%d\n", diff);
#endif
        rollbflag = SIM4_block1(Lblock, tmp_block, tmp_block1);
      } else {

        //  Otherwise, there is a gap in the EST, and we need to fill
        //  it in.  This is done only if there is no overlap in the
        //  genomic.
        //
        if (tmp_block1->frGEN - tmp_block->toGEN - 1 > 0) {
          if (tmp_block1->toEST &&
              tmp_block->toEST) {
            //  We are not the first or last gap -- an interior gap
            //  between two exons.
            //
#ifdef SHOW_PROGRESS
            fprintf(stderr, "Called SIM4_block2()\n");
#endif
            rollbflag = SIM4_block2(tmp_Lblock,
                                    tmp_Rblock,
                                    tmp_block,
                                    tmp_block1);
          } else if (tmp_block1->toGEN) {
            //  Not the last gap, so must be the first gap.
            //
#ifdef SHOW_PROGRESS
            fprintf(stderr, "Called SIM4_block3()\n");
#endif
            rollbflag = SIM4_block3(good_match,
                                    tmp_Lblock,
                                    tmp_Rblock,
                                    tmp_block,
                                    tmp_block1);
          } else {
            //  By default, the last gap.
            //
#ifdef SHOW_PROGRESS
            fprintf(stderr, "Called SIM4_block4()\n");
#endif
            rollbflag = SIM4_block4(good_match,
                                    tmp_Lblock,
                                    tmp_Rblock,
                                    tmp_block,
                                    tmp_block1);
          } 
        } else {
          //  Overlapping genomic.  What these do when set to
          //  NULL is unknown.
          //
          tmp_Rblock = tmp_Lblock = NULL;
        }

        //  Merge block in the exon list; make connections to the
        //  previous list of blocks; maintain increasing order
        //
        if (tmp_Lblock) {       
          tmp_block->next_exon = tmp_Lblock;
          tmp_Rblock->next_exon = tmp_block1;

          PRINTEXONS("before merge tmp_block\n",  tmp_block);
          PRINTEXONS("before merge tmp_block1\n", tmp_block1);
          PRINTEXONS("before merge tmp_Lblock\n", tmp_Lblock);
          PRINTEXONS("before merge tmp_Rblock\n", tmp_Rblock);

          merge(&tmp_block,&tmp_block1);
        }
      }
    }

    //  If this exon block was not removed, move to the next.  If it was removed,
    //  we're already there.
    //
    if (rollbflag == 0)
      tmp_block = tmp_block1;
  }


  PRINTEXONS("all done -- final Lblock\n", Lblock);

#ifdef SHOW_PROGRESS
  fprintf(stderr, "sim4b1 -- before compact_list\n");
#endif

  /* compaction step; note: it resets the right end of the list to   */ 
  /* the last item in the block list                                 */

  compact_list(&(Lblock->next_exon), &Rblock, (globalParams->_interspecies ? SHORT_INTRON : wordSize));

  if (globalParams->_interspecies)
     filter(&Lblock, &Rblock);


#ifdef SHOW_PROGRESS
  fprintf(stderr, "sim4b1 -- before small block at start removal\n");
#endif

  /* eliminate marginal small blocks at the start of the sequence;   */
  /* resets the empty alignment to one block (Lblock) only           */

  tmp_block = Lblock->next_exon;

  while ((tmp_block!=NULL) && (tmp_block->length<wordSize) && tmp_block->toGEN) {
    tmp_block1 = tmp_block;
    tmp_block = tmp_block->next_exon;
    //freeExon(tmp_block1);  garbage collected
  }
  Lblock->next_exon = tmp_block;

  PRINTEXONS("all done -- after removing small blocks at the start\n", Lblock);

  //  eliminate marginal small blocks at the end of the sequence
  //  XXX:  Yes, there is a leak here.  That's why we garbage collect!

#ifdef SHOW_PROGRESS
  fprintf(stderr, "Rblock before end of list removal 0x%08lx\n", Rblock);
#endif

  Exon *last = Lblock->next_exon;
  tmp_block = last;
  while (tmp_block!=NULL) {
    if (tmp_block->length>=wordSize)
      last = tmp_block;
    tmp_block = tmp_block->next_exon;
  }
  if (last && last->toGEN)
    last->next_exon = Rblock->next_exon;
  Rblock = last;

#ifdef SHOW_PROGRESS
  fprintf(stderr, "Rblock after end of list removal 0x%08lx\n", Rblock);
#endif

  PRINTEXONS("all done -- after removing small blocks at the end\n", Lblock);

  /* if high accuracy requirement, adjust boundaries of marginal exons */
  if (_accurateSequences)
    adjustBoundariesOfMarginalExons(Lblock);

  /* Slide exon boundaries for optimal intron signals */
  if (globalParams->_slideIntrons) {
    if (globalParams->_interspecies == 1) {
       SLIDE_INTRON(MIN(15,MAX_SLIDE), Lblock->next_exon, Rblock, spliceModel, st, 1);
     } else {
       if (get_sync_flag(Lblock, Rblock, 6) == 1)
         SLIDE_INTRON(6, Lblock->next_exon, Rblock, SPLICE_ORIGINAL, st, 1);
       else
         SLIDE_INTRON(6, Lblock->next_exon, Rblock, SPLICE_ORIGINAL, st, 0);
     }
  } else {
    //  Set orientation flag on introns to be unknown -- this has an
    //  undesired side effect of forcing the resulting match to have a
    //  strand orientation the same as the intron orientation (if one
    //  exon) instead of 'unknown'.
    Exon *t0 = Lblock->next_exon;
    Exon *t1 = NULL;

    while (t0 && (t1=t0->next_exon) && t1->toGEN) {
      t0->ori = 'E';
      t0 = t1;
    }
  }

  /* decreasingly; script will be in reverse order */

  struct edit_script_list *Shead = NULL;

  flip_list(&Lblock, &Rblock); 
  pluri_align(dist_ptr, Lblock, &Shead, st); 
  flip_list(&Lblock, &Rblock);      /* increasingly */

  *pT = 0;
  *pA = 0;
  if (Shead) {
    if (globalParams->_ignorePolyTails) {
      remove_polyT_front(&Shead, Lblock, _genSeq, _estSeq, pT); 
      remove_polyA_back(&Shead, Lblock, _genSeq, _estSeq, _estLen, pA);

      if (*pA || *pT)
        updateStatistics(Lblock, st);
    }

    get_stats(Lblock, st);

    *Exons = Lblock->next_exon;
    //freeExon(Lblock);  garbage collected
  } else {
    *Exons = 0L;

    //freeExonList(Lblock);  garbage collected
  }

  //  Memory leak when Script_head == 0L -- see pluri_align, too!

  return(Shead);
}