示例#1
0
static
int
abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) {

  int32 num_frags   = GetNumIntMultiPoss(uma->f_list);
  int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list);

  HashTable_AS *unitigFrags = CreateScalarHashTable_AS();

  int32 num_columns   = GetMultiAlignLength(uma);
  int32 ungapped_pos  = 0;

  int32 *gapped_positions = new int32 [num_columns + 1];
  char  *consensus        = Getchar(uma->consensus,0);

  for (int32 i=0; i<num_columns+1; i++) {
    gapped_positions[i] = ungapped_pos;

    if (consensus[i] != '-')
      ungapped_pos++;
  }

  //  Remember the first fragment we add.
  int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions);

  for (int32 ifrag=0; ifrag<num_frags; ifrag++) {
    CNS_AlignedContigElement epos;
    IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag);

    if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident);
      assert(0);
    }
    if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident);
      assert(0);
    }

    assert(frag->position.bgn >= 0);
    assert(frag->position.bgn < num_columns + 1);
    assert(frag->position.end >= 0);
    assert(frag->position.end < num_columns + 1);

    epos.frg_or_utg                  = CNS_ELEMENT_IS_FRAGMENT;
    epos.idx.fragment.frgIdent       = frag->ident;
    epos.idx.fragment.frgType        = frag->type;
    epos.idx.fragment.frgContained   = frag->contained;
    epos.idx.fragment.frgInUnitig    = (type == AS_CONTIG) ? -1 : uma->maID;
    epos.position.bgn                = gapped_positions[frag->position.bgn];
    epos.position.end                = gapped_positions[frag->position.end];

    //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n",
    //        frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end);

    //  Adjust the ungapped position if we fall within a gap
    //
    if (epos.position.bgn == epos.position.end) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n",
              epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end);
      assert(frag->position.bgn != frag->position.end);

      if (frag->position.bgn < frag->position.end) {
        if (epos.position.bgn > 0)
          epos.position.bgn--;
        else
          epos.position.end++;
      } else {
        if (epos.position.end > 0)
          epos.position.end--;
        else
          epos.position.bgn++;
      }
      fprintf(stderr,"SetUngappedFragmentPositions()--   Reset to "F_S32","F_S32"\n",
              epos.position.bgn,
              epos.position.end);
    }

    AppendVA_CNS_AlignedContigElement(fragment_positions, &epos);
  }


  for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){
    CNS_AlignedContigElement epos;
    IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag);

    epos.frg_or_utg           = CNS_ELEMENT_IS_UNITIG;
    epos.idx.unitig.utgIdent  = unitig->ident;
    epos.idx.unitig.utgType   = unitig->type;
    epos.position.bgn         = gapped_positions[unitig->position.bgn];
    epos.position.end         = gapped_positions[unitig->position.end];

    //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n",
    //        unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end);

    AppendVA_CNS_AlignedContigElement(fragment_positions,&epos);
  }

  //  This is used only by ReplaceEndUnitigInContig().  Mark fragments in the "anchoring" contig
  //  that belong to this unitig.
  //
  if (type != AS_CONTIG) {
    Fragment *anchor = GetFragment(fragmentStore,0);

    if ((anchor != NULL) &&
        (anchor->type == AS_CONTIG)) {
      CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components);

      for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) {
        if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) &&
            (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0)))
          af->idx.fragment.frgInUnitig = uma->maID;
      }
    }
  }

  DeleteHashTable_AS(unitigFrags);
  delete [] gapped_positions;

  return first_frag;
}
示例#2
0
static
void
PlaceFragments(int32 fid,
               IntUnitigPos *aiup,
               CNS_Options  *opp) {

  Fragment                 *afrag = GetFragment(fragmentStore,fid);
  Fragment                 *bfrag = NULL;

  CNS_AlignedContigElement *belem = GetCNS_AlignedContigElement(fragment_positions, afrag->components);

  if (afrag->n_components == 0)
    return;

  VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1);

  for (; belem->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT; belem++) {

    if (FALSE == ExistsInHashTable_AS(fragmentMap, belem->idx.fragment.frgIdent, 0))
      //  Fragment is not in the contigs f_list.  It is an unplaced read from a surrogate.
      continue;

    // if it exists in the fragmentMap it should exist in this map as well since it was added at the same time
    // look up where this fragment is placed within the entire contig, see if that matches where we're about to place it
    // this is necessary for surrogates that are multiply placed in a single contig for example:
    // contig: --------------*****--------*****--------> where ***** represents a surrogate
    //                        ---->                      readA
    // when placing readA within the surrogate unitig, we see if readA belongs in surrogate instance A or B
    // by computing the position of the unitig within the contig and adding ahang to it
    // if this computed position matches the position that the IMP record retrieved below tells us, proceed, otherwise skip placement
    IntMultiPos *bimp = (IntMultiPos *)LookupValueInHashTable_AS(fragmentToIMP, belem->idx.fragment.frgIdent, 0);
    int32  bbgn = (bimp->position.bgn < bimp->position.end ? bimp->position.bgn : bimp->position.end);
    int32  abgn = (aiup->position.bgn < aiup->position.end ? aiup->position.bgn : aiup->position.end);

    int32 fcomplement = afrag->complement;
    int32 bcomplement = (belem->position.bgn < belem->position.end) ? 0 : 1;

    int32           ahang = 0;
    int32           bhang = 0;
    int32           ovl   = 0;
    OverlapType   otype;

    //  all of fid's component frags will be aligned to it (not to
    //  each other)
    //
    //            fcomplement==0                                fcomplement==1
    //
    //        A)       fid                                  C)     fid
    //          ------------------>                            <----------------
    //          --->                                                        <---
    //           bid (bcomplement==0)                                       bid
    //
    //        B)       fid                                  D)     fid
    //          ------------------>                            <----------------
    //          <---                                                        --->
    //           bid (bcomplement==1)                                       bid
    //

    //  The afrag is a unitig, so b is always contained (length of
    //  overlap is length of belem).

    if        (fcomplement && bcomplement) {
      ahang = afrag->length - belem->position.bgn; /* Case D */
      bhang = belem->position.end - afrag->length;
      ovl   = belem->position.bgn - belem->position.end;
    } else if (fcomplement && !bcomplement) {
      ahang = afrag->length - belem->position.end; /* Case C */
      bhang = belem->position.bgn - afrag->length;
      ovl   = belem->position.end - belem->position.bgn;
    } else if (!fcomplement && bcomplement) {
      ahang = belem->position.end;                 /* Case B */
      bhang = belem->position.bgn - afrag->length;
      ovl   = belem->position.bgn - belem->position.end;
    } else {
      ahang = belem->position.bgn;                 /* Case A */
      bhang = belem->position.end - afrag->length;
      ovl   = belem->position.end - belem->position.bgn;
    }

    assert(ahang >= 0);
    assert(bhang <= 0);
    assert(ovl   >  0);

    if (aiup->num_instances > 1 && abs(ahang + abgn - bbgn) > MAX_SURROGATE_FUDGE_FACTOR) { 
      if (VERBOSE_MULTIALIGN_OUTPUT)
        fprintf(stderr, "Not placing fragment %d into unitig %d because the positions (%d, %d) do not match (%d, %d)\n",
                belem->idx.fragment.frgIdent, afrag->iid,
                bimp->position.bgn, bimp->position.end,
                ahang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get()                ))->column_index)->ma_index,
                bhang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get()+afrag->length-1))->column_index)->ma_index+1);
      continue;
    }

    int32 blid = AppendFragToLocalStore(belem->idx.fragment.frgType,
                                      belem->idx.fragment.frgIdent,
                                      (bcomplement != fcomplement),
                                      belem->idx.fragment.frgContained,
                                      AS_OTHER_UNITIG);

    afrag = GetFragment(fragmentStore, fid);  // AppendFragToLocalStore can change the pointer on us.
    bfrag = GetFragment(fragmentStore, blid);

    if (!GetAlignmentTraceDriver(afrag, NULL, bfrag, &ahang, &bhang, ovl, trace, &otype, GETALIGNTRACE_CONTIGF, 0)) {

      //if (!GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, DP_Compare,              DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE) &&
      //  !GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE)) {

      Bead   *afirst = GetBead(beadStore, afrag->firstbead.get() + ahang);
      Column *col    = GetColumn(columnStore, afirst->column_index);
      MANode *manode = GetMANode(manodeStore, col->ma_id);

      RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0);  //  BPW not sure why we need this

      fprintf(stderr, "Could (really) not find overlap between %d (%c) and %d (%c) estimated ahang: %d (ejecting frag %d from contig)\n",
              afrag->iid, afrag->type, belem->idx.fragment.frgIdent, belem->idx.fragment.frgType, ahang, belem->idx.fragment.frgIdent);

      GetFragment(fragmentStore,blid)->deleted = 1;
    } else {
      ApplyAlignment(afrag->lid, 0, NULL, blid, ahang, Getint32(trace,0));
    }
  }  //  over all fragments

  Delete_VA(trace);
}
MultiAlignT *
ReplaceEndUnitigInContig(uint32 contig_iid,
                         uint32 unitig_iid,
                         int32 extendingLeft,
                         CNS_Options *opp) {
  int32 cid,tid; // local id of contig (cid), and unitig(tid)
  int32 aid,bid;
  int32 i,num_unitigs;
  MultiAlignT *oma;
  MultiAlignT *cma;
  IntUnitigPos *u_list;
  IntMultiPos *f_list;
  IntMultiVar  *v_list;
  int32 append_left=0;
  int32 num_frags=0;
  int32 complement=0;
  MANode *ma;
  Fragment *cfrag;
  Fragment *tfrag = NULL;
  static VA_TYPE(int32) *trace=NULL;

  oma =  tigStore->loadMultiAlign(contig_iid, FALSE);

  ResetStores(2 * GetNumchars(oma->consensus),
              2,
              2 * GetNumchars(oma->consensus));

  num_unitigs = GetNumIntUnitigPoss(oma->u_list);
  num_frags   = GetNumIntMultiPoss(oma->f_list);

  u_list = GetIntUnitigPos(oma->u_list,0);
  f_list = GetIntMultiPos(oma->f_list,0);
  v_list = GetIntMultiVar(oma->v_list,0);

  //PrintIMPInfo(stderr, num_frags,   f_list);
  //PrintIUPInfo(stderr, num_unitigs, u_list);

  // capture the consensus sequence of the original contig and put into local "fragment" format
  cid = AppendFragToLocalStore(AS_CONTIG,
                               contig_iid,
                               0,
                               0,
                               AS_OTHER_UNITIG);

  fprintf(stderr,"ReplaceEndUnitigInContig()-- contig %d unitig %d isLeft(%d)\n",
          contig_iid,unitig_iid,extendingLeft);

  //  The only real value-added from ReplaceUnitigInContig is a new consensus sequence for the contig
  //  some adjustments to positions go along with this, but the real compute is an alignment
  //  between the old contig consensus and the updated unitig
  //
  //  first we want to determine whether unitig is on left or right of contig,
  //  so that alignment can be done with a positive ahang
  //  if u is at left, i.e.:
  //
  //  C---------------C
  //  u------u
  //  then initialize new alignment with unitig, and add contig, else
  //
  //  if u is at right, i.e.:
  //
  //  C---------------C
  //           u------u
  //  then initialize new alignment with contig, and add unitig, else

  ma = CreateMANode(0);

  if ( trace == NULL )
    trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN);
  ResetVA_int32(trace);

  {
    int32 ahang,bhang,pos_offset=0;
    int32 tigs_adjusted_pos=0;
    OverlapType otype;
    int32 olap_success=0;
    cfrag=GetFragment(fragmentStore,cid);
    for(i=0;i<num_unitigs;i++) {
      uint32 id=u_list[i].ident;
      if ( id == unitig_iid ) {
        int32 bgn=u_list[i].position.bgn;
        int32 end=u_list[i].position.end;
        int32 complement_tmp=(bgn<end)?0:1;
        int32 left=(complement_tmp)?end:bgn;
        int32 right=(complement_tmp)?bgn:end;
        complement=complement_tmp;
        tid = AppendFragToLocalStore(AS_UNITIG,
                                     id,
                                     complement,
                                     0,
                                     AS_OTHER_UNITIG);
        tfrag=GetFragment(fragmentStore,tid);

        if ( extendingLeft ) {
          // need to set aid to unitig to preserve positive ahang -- and we now should always
          // have a bhang of zero.
          append_left=1;
          aid=tid;
          bid=cid;
          // and ahang estimate is the diff in size between
          // new unitig (GetFragment(fragmentStore,tid)->length) and old unitig (right-left)
          ahang = GetFragment(fragmentStore,tid)->length - (right-left);
          bhang = 0;
        }  else {
          //  --------
          //       ---+++
          //  We extended the unitig by "+++".  The ahang is just the
          //  start position of the original placement, and the bhang
          //  is the amount extended (as above).
          aid=cid;
          bid=tid;
          ahang = left;
          bhang = GetFragment(fragmentStore,tid)->length - (right-left);
        }
        SeedMAWithFragment(ma->lid, aid, opp);

        //  The expected length of this alignment is always the length of the original unitig.
        int32 ovl = right - left;

        olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE);

        if (!olap_success)
          olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE);

        //  If the alignment fails -- usually because the ahang is
        //  negative -- return an empty alignment.  This causes
        //  extendClearRanges (the sole user of this function) to
        //  gracefully handle the failure.
        //
        if (olap_success == 0) {
          return(NULL);
          assert(olap_success);
        }

        ApplyAlignment(aid, 0, NULL, bid, ahang, Getint32(trace,0));
        RefreshMANode(ma->lid, 0, opp, NULL, NULL, 0, 0);

        //PrintAlignment(stderr,ma->lid,0,-1);

        break;
      }
    }
  }

  // Now, want to generate a new MultiAlignT which is an appropriate adjustment of original
  cma = CreateMultiAlignT();

  cma->maID = -1;
  cma->data = oma->data;

  cma->consensus = CreateVA_char(GetMANodeLength(ma->lid)+1);
  cma->quality   = CreateVA_char(GetMANodeLength(ma->lid)+1);

  GetMANodeConsensus(ma->lid, cma->consensus, cma->quality);

  // no deltas required at this stage
  // merge the f_lists and u_lists by cloning and concating

  cma->f_list = Clone_VA(oma->f_list);
  cma->u_list = Clone_VA(oma->u_list);
  cma->v_list = Clone_VA(oma->v_list);

  cma->fdelta = CreateVA_int32(0);
  cma->udelta = CreateVA_int32(0);

  {
    CNS_AlignedContigElement *components;
    CNS_AlignedContigElement *tcomponents;
    CNS_AlignedContigElement *contig_component;
    CNS_AlignedContigElement *aligned_component;
    int32 ifrag=0;
    int32 iunitig=0;
    IntMultiPos *imp;
    IntUnitigPos *iup;
    Fragment *frag;
    int32 ci=0;
    int32 tc=0; //unitig component index
    int32 bgn,end,left,right,tmp;
    int32 range_bgn=0,range_end=0,new_tig=0;
    components=GetCNS_AlignedContigElement(fragment_positions,cfrag->components);
    tcomponents=GetCNS_AlignedContigElement(fragment_positions,tfrag->components);
    // make adjustments to positions
    if ( append_left) {
      // fragments within unitig are 0 to tfrag->n_components
      // and cfrag->n_components-num_unitigs
      range_bgn = 0;
      range_end = tfrag->n_components-1;
      new_tig=cfrag->n_components-num_unitigs;
    } else {  // changed unitig on right
      // fragments within unitig are (num_frags-tfrag->n_components) to num_frags
      // and cfrag->n_components-1;
      range_bgn = (num_frags-(tfrag->n_components-1));
      range_end = num_frags;
      new_tig=cfrag->n_components-1;
    }
    while (ci < cfrag->n_components) {
      contig_component = &components[ci];
      if ( contig_component->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT && contig_component->idx.fragment.frgInUnitig == unitig_iid ) {
        aligned_component = &tcomponents[tc++];
        if ( complement ) {
          bgn = tfrag->length-aligned_component->position.bgn;
          end = tfrag->length-aligned_component->position.end;
        } else {
          bgn = aligned_component->position.bgn;
          end = aligned_component->position.end;
        }
        frag = tfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      } else if ( ci == new_tig ) {
        aligned_component =  &tcomponents[tc++];
        if ( complement ) {
          bgn = tfrag->length-aligned_component->position.bgn;
          end = tfrag->length-aligned_component->position.end;
        } else {
          bgn = aligned_component->position.bgn;
          end = aligned_component->position.end;
        }
        frag = tfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      } else {
        aligned_component =  contig_component;
        bgn = aligned_component->position.bgn;
        end = aligned_component->position.end;
        frag = cfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      }
      left = (bgn<end)?bgn:end;
      right = (bgn<end)?end:bgn;
      //if ( ci == new_tig ) {
      //    left = 0;
      //    right = frag->length;
      //}
      left = GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + left   )->column_index)->ma_index;
      right= GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + right-1)->column_index)->ma_index + 1;
      tmp = bgn;
      bgn = (bgn<end)?left:right;
      end = (tmp<end)?right:left;
      if (aligned_component->frg_or_utg==CNS_ELEMENT_IS_UNITIG) {
        iup = GetIntUnitigPos(cma->u_list,iunitig);
        iup->position.bgn = bgn;
        iup->position.end = end;
        iup->delta_length = 0;
        iup->delta = NULL;
#ifdef DEBUG_POSITIONS
        fprintf(stderr," element %d at %d,%d\n",
                ci,bgn,end);
#endif
        ci++;iunitig++;
      } else {
        imp = GetIntMultiPos(cma->f_list,ifrag);
        imp->ident = aligned_component->idx.fragment.frgIdent;
        imp->contained = aligned_component->idx.fragment.frgContained;
        imp->position.bgn = bgn;
        imp->position.end = end;
#ifdef DEBUG_POSITIONS
        fprintf(stderr," element %d at %d,%d\n", ci,bgn,end);
#endif
        imp->delta_length = 0;
        imp->delta = NULL;
        ci++;ifrag++;
      }
    }
  }
  DeleteMANode(ma->lid);
  return cma;
}