Example #1
0
AS_CGB_Bubble_List_t
_collect_bubbles(BubGraph_t bg, BubVertexSet *fwd, BubVertexSet *rvs,
		 IntFragment_ID *top, int num_valid)
{
  IntFragment_ID f, bub_start;
  HashTable_AS *init_nodes = NULL;
  IntFragment_ID *i_node = NULL;
  AS_CGB_Bubble_List result;
  AS_CGB_Bubble_List_t *ins_h = &(result.next);
  BVSPair *bp_ins_keys = NULL, bp_find_key;

  memset(&result,0,sizeof(AS_CGB_Bubble_List));
  init_nodes  = CreateGenericHashTable_AS(_hash_vset_hash, _hash_vset_cmp);
  bp_ins_keys = (BVSPair *)safe_malloc(sizeof(BVSPair) * num_valid );
  result.next = NULL;

  for (f = 0; f < num_valid; ++f)
    if (_is_initiation_node(BG_inDegree(bg, top[f], AS_CGB_BUBBLE_E_VALID),
			    BG_outDegree(bg, top[f], AS_CGB_BUBBLE_E_VALID)) &&
	!BVS_empty(&(fwd[top[f]])) &&
	!BVS_empty(&(rvs[top[f]]))) {
#if AS_CGB_BUBBLE_VERY_VERBOSE
      fprintf(stderr, "Inserting "F_IID " ("F_IID ") into the table.\n", top[f],
	      get_iid_fragment(BG_vertices(bg), top[f]));
#endif
      bp_ins_keys[f].f = &(fwd[top[f]]);
      bp_ins_keys[f].r = &(rvs[top[f]]);
      InsertInHashTable_AS(init_nodes, (uint64)(INTPTR)&bp_ins_keys[f], sizeof(BVSPair), (uint64)(INTPTR)&top[f], 0);
    }

  for (f = 0; f < num_valid; ++f)
    if (_is_termination_node(BG_inDegree(bg, top[f], AS_CGB_BUBBLE_E_VALID),
			     BG_outDegree(bg, top[f], AS_CGB_BUBBLE_E_VALID))&&
	!BVS_empty(&(fwd[top[f]])) &&
	!BVS_empty(&(rvs[top[f]]))) {
#if AS_CGB_BUBBLE_VERY_VERBOSE
      fprintf(stderr, "Looking for matches for "F_IID " ("F_IID ") in the table.  ",
	      top[f], get_iid_fragment(BG_vertices(bg), top[f]));
#endif
      bp_find_key.f = &(fwd[top[f]]);
      bp_find_key.r = &(rvs[top[f]]);
      i_node = (IntFragment_ID *)(INTPTR)LookupValueInHashTable_AS(init_nodes, (uint64)(INTPTR)&bp_find_key, sizeof(BVSPair));
#if AS_CGB_BUBBLE_VERY_VERBOSE
      if (!i_node)
	fprintf(stderr, "None found.\n");
      else
	fprintf(stderr, "Found init node = "F_IID " ("F_IID ").\n", *i_node,
		get_iid_fragment(BG_vertices(bg), *i_node));
#endif

      if (i_node) {
	AS_CGB_Bubble_List_t new_bub = NULL;
	new_bub = (AS_CGB_Bubble_List *)safe_malloc(sizeof(AS_CGB_Bubble_List));
	bub_start = *i_node;
	new_bub->start = bub_start;
	new_bub->end = top[f];
	*ins_h = new_bub;
	ins_h = &(new_bub->next);
	*ins_h = NULL;
      }
    }

  DeleteHashTable_AS(init_nodes);
  safe_free(bp_ins_keys);
  return result.next;
}
Example #2
0
int
updateFragmentWithParent(IntUnitigMesg *iunitig, int thisFrag, OverlapStore *ovs) {
    uint32         ovlMax = 0;
    uint32         ovlLen = 0;
    OVSoverlap    *ovl    = NULL;

    int     testFrag = thisFrag - 1;
    int     testOvl  = 0;

    int     oldParent = iunitig->f_list[thisFrag].parent;
    int     oldAHang  = iunitig->f_list[thisFrag].ahang;
    int     oldBHang  = iunitig->f_list[thisFrag].bhang;

    uint32  consensusCutoff = AS_OVS_encodeQuality(AS_CNS_ERROR_RATE);

    int     contained       = 0;
    int     fragment        = -1;
    int     overlap         = -1;
    int     overlapIdentity = consensusCutoff;
    int     overlapBHang    = AS_READ_MAX_NORMAL_LEN;

    HashTable_AS  *ovlBefore = CreateScalarHashTable_AS();
    HashTable_AS  *ovlAfter  = CreateScalarHashTable_AS();
    HashTable_AS  *iidIndex  = CreateScalarHashTable_AS();

    int     hangSlop = 0;

    int     failed   = -1;

    fprintf(stderr, "\n");
    fprintf(stderr, "WORKING on fragment %d == %d\n", thisFrag, iunitig->f_list[thisFrag].ident);

    //  Save in the hash table the fragments before/after this one.
    //
    for (testFrag=0; testFrag<iunitig->num_frags; testFrag++) {
        InsertInHashTable_AS(iidIndex,
                             (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                             (uint64)testFrag, 0);

        if (testFrag < thisFrag)
            InsertInHashTable_AS(ovlBefore,
                                 (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                                 ~(uint64)0, 0);
        if (testFrag > thisFrag)
            InsertInHashTable_AS(ovlAfter,
                                 (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                                 ~(uint64)0, 0);
    }

    //  Get the overlaps for this fragment.
    //
    AS_OVS_setRangeOverlapStore(ovs, iunitig->f_list[thisFrag].ident, iunitig->f_list[thisFrag].ident);

    if (ovlMax < AS_OVS_numOverlapsInRange(ovs)) {
        ovlMax = AS_OVS_numOverlapsInRange(ovs) * 2;
        ovl    = (OVSoverlap *)safe_realloc(ovl, sizeof(OVSoverlap) * ovlMax);
    }
    ovlLen = 0;
    while (AS_OVS_readOverlapFromStore(ovs, ovl+ovlLen, AS_OVS_TYPE_OVL)) {
        int  aid=0,  bid=0;
        int  afwd=0, bfwd=0;
        int  correct=0;

        //  Reorient the overlap so the b_iid is thisFrag.
        //
        {
            AS_IID x = ovl[ovlLen].a_iid;
            ovl[ovlLen].a_iid = ovl[ovlLen].b_iid;
            ovl[ovlLen].b_iid = x;

            if (ovl[ovlLen].dat.ovl.flipped) {
                int x = ovl[ovlLen].dat.ovl.a_hang;
                ovl[ovlLen].dat.ovl.a_hang = ovl[ovlLen].dat.ovl.b_hang;
                ovl[ovlLen].dat.ovl.b_hang = x;
            } else {
                ovl[ovlLen].dat.ovl.a_hang = -ovl[ovlLen].dat.ovl.a_hang;
                ovl[ovlLen].dat.ovl.b_hang = -ovl[ovlLen].dat.ovl.b_hang;
            }
        }

        //  Make sure we get the correct overlap.  We seem to be allowed
        //  to have both an I and an N overlap for a given pair of
        //  fragments.  At least, I hope that's all we're allowed.
        //
        aid = LookupValueInHashTable_AS(iidIndex, (uint64)ovl[ovlLen].a_iid, sizeof(uint64));
        bid = LookupValueInHashTable_AS(iidIndex, (uint64)ovl[ovlLen].b_iid, sizeof(uint64));

        afwd = (iunitig->f_list[aid].position.bgn < iunitig->f_list[aid].position.end);
        bfwd = (iunitig->f_list[bid].position.bgn < iunitig->f_list[bid].position.end);

        if ((afwd == bfwd) && (ovl[ovlLen].dat.ovl.flipped == 0))
            correct = 1;
        if ((afwd != bfwd) && (ovl[ovlLen].dat.ovl.flipped == 1))
            correct = 1;


        if (ExistsInHashTable_AS(ovlBefore, (uint64)ovl[ovlLen].a_iid, sizeof(uint64))) {
            if (correct)
                ReplaceInHashTable_AS(ovlBefore,
                                      (uint64)ovl[ovlLen].a_iid, sizeof(uint64),
                                      (uint64)ovlLen, 0);
            fprintf(stderr, "%s before overlap for %d (%c) to %d (%c) ("F_S64","F_S64",%c) at ovl position %d\n",
                    correct ? "save" : "skip",
                    ovl[ovlLen].a_iid, afwd ? 'F' : 'R',
                    ovl[ovlLen].b_iid, bfwd ? 'F' : 'R',
                    ovl[ovlLen].dat.ovl.a_hang,
                    ovl[ovlLen].dat.ovl.b_hang,
                    ovl[ovlLen].dat.ovl.flipped ? 'I' : 'N',
                    ovlLen);
        }

        if (ExistsInHashTable_AS(ovlAfter, (uint64)ovl[ovlLen].a_iid, sizeof(uint64))) {
            if (correct)
                ReplaceInHashTable_AS(ovlAfter,
                                      (uint64)ovl[ovlLen].a_iid, sizeof(uint64),
                                      (uint64)ovlLen, 0);
            fprintf(stderr, "%s after  overlap for %d (%c) to %d (%c) ("F_S64","F_S64",%c) at ovl position %d\n",
                    correct ? "save" : "skip",
                    ovl[ovlLen].a_iid, afwd ? 'F' : 'R',
                    ovl[ovlLen].b_iid, bfwd ? 'F' : 'R',
                    ovl[ovlLen].dat.ovl.a_hang,
                    ovl[ovlLen].dat.ovl.b_hang,
                    ovl[ovlLen].dat.ovl.flipped ? 'I' : 'N',
                    ovlLen);
        }

        ovlLen++;
    }

tryAgain:

    //  See if we're contained in any of these overlaps.
    if (overlap == -1) {
        for (testFrag=thisFrag-1; testFrag>=0; testFrag--) {
            if (ExistsInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64))) {
                testOvl = LookupValueInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

                //  Fragment has no overlap
                if (testOvl == -1)
                    continue;

                fprintf(stderr, "found testFrag = %d testOvl = %d erates "F_U64" %u hang "F_S64" "F_S64" (CONTAIN) slop=%d\n",
                        testFrag, testOvl,
                        ovl[testOvl].dat.ovl.orig_erate, consensusCutoff,
                        ovl[testOvl].dat.ovl.a_hang,
                        ovl[testOvl].dat.ovl.b_hang,
                        hangSlop);

                //  Three if's for documentation:
                //  1)  If we're an overlap we care about
                //  2)  If we're a contained overlap
                //  3)  If we're better than what we've seen so far
                //  Then save the overlap
                //
                if (ovl[testOvl].dat.ovl.orig_erate < consensusCutoff) {
                    if ((ovl[testOvl].dat.ovl.a_hang >= -hangSlop) &&
                            (ovl[testOvl].dat.ovl.b_hang <= hangSlop)) {
                        if (ovl[testOvl].dat.ovl.orig_erate < overlapIdentity) {
                            contained       = 1;
                            fragment        = testFrag;
                            overlap         = testOvl;
                            overlapBHang    = 0;
                            overlapIdentity = ovl[testOvl].dat.ovl.orig_erate;
                        }
                    }
                }
            }
        }
    }


    //  If not contained, scan the overlaps again, looking for the
    //  thickest/bestest.  This will be the overlap with the smallest a
    //  or b hang -- depending on the orientation of the parent
    //  fragment.
    //
    //  Instead of working through overlaps, we work through fragments.
    //
    if (overlap == -1) {
        for (testFrag=thisFrag-1; testFrag>=0; testFrag--) {
            if (ExistsInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64))) {
                int ahang = 0;
                int bhang = 0;

                testOvl = LookupValueInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

                //  Fragment has no overlap
                if (testOvl == -1)
                    continue;

                //  Overlap is too noisy
                if (ovl[testOvl].dat.ovl.orig_erate >= consensusCutoff)
                    continue;

                if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
                    ahang = ovl[testOvl].dat.ovl.a_hang;
                    bhang = ovl[testOvl].dat.ovl.b_hang;
                } else {
                    ahang = -ovl[testOvl].dat.ovl.b_hang;
                    bhang = -ovl[testOvl].dat.ovl.a_hang;
                }

                //  Overlap isn't dovetail -- negative ahang
                if (ahang < 0)
                    continue;

                //  Overlap isn't dovetail -- containment
                if (bhang < 0)
                    continue;

                fprintf(stderr, "found testFrag = %d testOvl = %d erates "F_U64" %u hang "F_S64" "F_S64" (DOVETAIL) slop=%d\n",
                        testFrag, testOvl,
                        ovl[testOvl].dat.ovl.orig_erate, consensusCutoff,
                        ovl[testOvl].dat.ovl.a_hang,
                        ovl[testOvl].dat.ovl.b_hang,
                        hangSlop);

                if (bhang < overlapBHang) {
                    contained       = 0;
                    fragment        = testFrag;
                    overlap         = testOvl;
                    overlapIdentity = ovl[testOvl].dat.ovl.orig_erate;
                    overlapBHang    = bhang;
                }
            }
        }
    }


    //  Now, if we have found the parent fragment, update.
    //
    if (overlap >= 0) {
        testOvl  = overlap;
        testFrag = fragment;

        iunitig->f_list[thisFrag].parent = ovl[testOvl].a_iid;

        if (contained)
            iunitig->f_list[thisFrag].contained = iunitig->f_list[thisFrag].parent;
        else
            iunitig->f_list[thisFrag].contained = 0;

        //  Reorient again based on the orientation of the testFrag.
        //
        if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
            //  testFrag is forward
            iunitig->f_list[thisFrag].ahang  = ovl[testOvl].dat.ovl.a_hang;
            iunitig->f_list[thisFrag].bhang  = ovl[testOvl].dat.ovl.b_hang;
        } else {
            //  testFrag is reverse
            iunitig->f_list[thisFrag].ahang  = -ovl[testOvl].dat.ovl.b_hang;
            iunitig->f_list[thisFrag].bhang  = -ovl[testOvl].dat.ovl.a_hang;
        }

        //  Report we did something.
        //
        fprintf(stderr, "Updated fragment "F_IID" from "F_IID",%d,%d to "F_IID",%d,%d\n",
                iunitig->f_list[thisFrag].ident,
                oldParent,
                oldAHang,
                oldBHang,
                iunitig->f_list[thisFrag].parent,
                iunitig->f_list[thisFrag].ahang,
                iunitig->f_list[thisFrag].bhang);

        goto successfullyUpdated;
    }


    //  Otherwise, try to find an overlap again, this time allowing a
    //  bit of slop in the hangs.
    //
    if (hangSlop == 0) {
        hangSlop = 10;
        goto tryAgain;
    }


    //  Now, we're convinced there is no decent overlap between this
    //  fragment and any fragment before it.
    //
    //  Scan forward for the first thing we overlap.

    for (testFrag=thisFrag+1; testFrag < iunitig->num_frags; testFrag++) {
        int ahang = 0;
        int bhang = 0;

        testOvl = LookupValueInHashTable_AS(ovlAfter, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

        //  Fragment has no overlap
        if (testOvl == -1)
            continue;

        //  Overlap is too noisy
        if (ovl[testOvl].dat.ovl.orig_erate >= consensusCutoff)
            continue;

        if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
            ahang = ovl[testOvl].dat.ovl.a_hang;
            bhang = ovl[testOvl].dat.ovl.b_hang;
        } else {
            ahang = -ovl[testOvl].dat.ovl.b_hang;
            bhang = -ovl[testOvl].dat.ovl.a_hang;
        }

        //  Don't allow negative ahangs.  At all.  This catches the case
        //  where the parent might be contained in us, and generally makes
        //  consensus happier.
        //
        //  Don't allow empty hangs - this can lead to infinite loops
        //  where we keep swapping the same two fragments.  OK, not
        //  infinite, since we eventually run out of stack space and
        //  crash.
        //
        if (ahang <= 0)
            continue;

        fprintf(stderr, "shifttest ovl=%d testFrag="F_IID" pos %d-%d  thisFrag="F_IID" pos %d-%d  hangs %d,%d\n",
                testOvl,
                iunitig->f_list[testFrag].ident,
                iunitig->f_list[testFrag].position.bgn,
                iunitig->f_list[testFrag].position.end,
                iunitig->f_list[thisFrag].ident,
                iunitig->f_list[thisFrag].position.bgn,
                iunitig->f_list[thisFrag].position.end,
                ahang, bhang);

        IntMultiPos  fragCopy = iunitig->f_list[thisFrag];

        memmove(iunitig->f_list + thisFrag,
                iunitig->f_list + thisFrag + 1,
                sizeof(IntMultiPos) * (testFrag - thisFrag));

        iunitig->f_list[testFrag] = fragCopy;

        fprintf(stderr, "Shifted fragment "F_IID" from position %d to position %d\n",
                iunitig->f_list[testFrag].ident,
                thisFrag, testFrag);

        //  Since we moved things around, we must process the new fragment
        //  at 'thisFrag's location.
        //
        failed = updateFragmentWithParent(iunitig, thisFrag, ovs);

        if (failed == -1)
            goto successfullyUpdated;

        break;
    }


    //  And we failed.  Good luck with this one.
    //
    fprintf(stderr, "Failed to update fragment "F_IID" from "F_IID",%d,%d.\n",
            iunitig->f_list[thisFrag].ident,
            oldParent,
            oldAHang,
            oldBHang);

    failed = thisFrag;

successfullyUpdated:
    DeleteHashTable_AS(ovlBefore);
    DeleteHashTable_AS(ovlAfter);
    safe_free(ovl);

    return(failed);
}
Example #3
0
static
void
PlaceFragments(int32 fid,
               IntUnitigPos *aiup,
               CNS_Options  *opp) {

  Fragment                 *afrag = GetFragment(fragmentStore,fid);
  Fragment                 *bfrag = NULL;

  CNS_AlignedContigElement *belem = GetCNS_AlignedContigElement(fragment_positions, afrag->components);

  if (afrag->n_components == 0)
    return;

  VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1);

  for (; belem->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT; belem++) {

    if (FALSE == ExistsInHashTable_AS(fragmentMap, belem->idx.fragment.frgIdent, 0))
      //  Fragment is not in the contigs f_list.  It is an unplaced read from a surrogate.
      continue;

    // if it exists in the fragmentMap it should exist in this map as well since it was added at the same time
    // look up where this fragment is placed within the entire contig, see if that matches where we're about to place it
    // this is necessary for surrogates that are multiply placed in a single contig for example:
    // contig: --------------*****--------*****--------> where ***** represents a surrogate
    //                        ---->                      readA
    // when placing readA within the surrogate unitig, we see if readA belongs in surrogate instance A or B
    // by computing the position of the unitig within the contig and adding ahang to it
    // if this computed position matches the position that the IMP record retrieved below tells us, proceed, otherwise skip placement
    IntMultiPos *bimp = (IntMultiPos *)LookupValueInHashTable_AS(fragmentToIMP, belem->idx.fragment.frgIdent, 0);
    int32  bbgn = (bimp->position.bgn < bimp->position.end ? bimp->position.bgn : bimp->position.end);
    int32  abgn = (aiup->position.bgn < aiup->position.end ? aiup->position.bgn : aiup->position.end);

    int32 fcomplement = afrag->complement;
    int32 bcomplement = (belem->position.bgn < belem->position.end) ? 0 : 1;

    int32           ahang = 0;
    int32           bhang = 0;
    int32           ovl   = 0;
    OverlapType   otype;

    //  all of fid's component frags will be aligned to it (not to
    //  each other)
    //
    //            fcomplement==0                                fcomplement==1
    //
    //        A)       fid                                  C)     fid
    //          ------------------>                            <----------------
    //          --->                                                        <---
    //           bid (bcomplement==0)                                       bid
    //
    //        B)       fid                                  D)     fid
    //          ------------------>                            <----------------
    //          <---                                                        --->
    //           bid (bcomplement==1)                                       bid
    //

    //  The afrag is a unitig, so b is always contained (length of
    //  overlap is length of belem).

    if        (fcomplement && bcomplement) {
      ahang = afrag->length - belem->position.bgn; /* Case D */
      bhang = belem->position.end - afrag->length;
      ovl   = belem->position.bgn - belem->position.end;
    } else if (fcomplement && !bcomplement) {
      ahang = afrag->length - belem->position.end; /* Case C */
      bhang = belem->position.bgn - afrag->length;
      ovl   = belem->position.end - belem->position.bgn;
    } else if (!fcomplement && bcomplement) {
      ahang = belem->position.end;                 /* Case B */
      bhang = belem->position.bgn - afrag->length;
      ovl   = belem->position.bgn - belem->position.end;
    } else {
      ahang = belem->position.bgn;                 /* Case A */
      bhang = belem->position.end - afrag->length;
      ovl   = belem->position.end - belem->position.bgn;
    }

    assert(ahang >= 0);
    assert(bhang <= 0);
    assert(ovl   >  0);

    if (aiup->num_instances > 1 && abs(ahang + abgn - bbgn) > MAX_SURROGATE_FUDGE_FACTOR) { 
      if (VERBOSE_MULTIALIGN_OUTPUT)
        fprintf(stderr, "Not placing fragment %d into unitig %d because the positions (%d, %d) do not match (%d, %d)\n",
                belem->idx.fragment.frgIdent, afrag->iid,
                bimp->position.bgn, bimp->position.end,
                ahang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get()                ))->column_index)->ma_index,
                bhang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get()+afrag->length-1))->column_index)->ma_index+1);
      continue;
    }

    int32 blid = AppendFragToLocalStore(belem->idx.fragment.frgType,
                                      belem->idx.fragment.frgIdent,
                                      (bcomplement != fcomplement),
                                      belem->idx.fragment.frgContained,
                                      AS_OTHER_UNITIG);

    afrag = GetFragment(fragmentStore, fid);  // AppendFragToLocalStore can change the pointer on us.
    bfrag = GetFragment(fragmentStore, blid);

    if (!GetAlignmentTraceDriver(afrag, NULL, bfrag, &ahang, &bhang, ovl, trace, &otype, GETALIGNTRACE_CONTIGF, 0)) {

      //if (!GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, DP_Compare,              DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE) &&
      //  !GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE)) {

      Bead   *afirst = GetBead(beadStore, afrag->firstbead.get() + ahang);
      Column *col    = GetColumn(columnStore, afirst->column_index);
      MANode *manode = GetMANode(manodeStore, col->ma_id);

      RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0);  //  BPW not sure why we need this

      fprintf(stderr, "Could (really) not find overlap between %d (%c) and %d (%c) estimated ahang: %d (ejecting frag %d from contig)\n",
              afrag->iid, afrag->type, belem->idx.fragment.frgIdent, belem->idx.fragment.frgType, ahang, belem->idx.fragment.frgIdent);

      GetFragment(fragmentStore,blid)->deleted = 1;
    } else {
      ApplyAlignment(afrag->lid, 0, NULL, blid, ahang, Getint32(trace,0));
    }
  }  //  over all fragments

  Delete_VA(trace);
}
Example #4
0
int main (int argc, char *argv[]) {
   char    *asmFileName    = NULL;
   char    *tigStoreName   = NULL;
   uint32   tigStoreVers   = 2;

   int      minLength      = DEFAULT_UNITIG_LENGTH;
   int      numInstances   = DEFAULT_NUM_INSTANCES;
   int      distanceToEnds = DEFAULT_DISTANCE_TO_ENDS;

   uint32   numToggled     = 0;

   argc = AS_configure(argc, argv);
  
   int arg=1;
   int err=0;
   while (arg < argc) {
      if        (strcmp(argv[arg], "-a") == 0) {
         asmFileName = argv[++arg];

      } else if (strcmp(argv[arg], "-t") == 0) {
        tigStoreName = argv[++arg];
        tigStoreVers = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-l") == 0) {
         minLength = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-n") == 0) {
         numInstances = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-d") == 0) {
         distanceToEnds = atoi(argv[++arg]);

      } else {
         fprintf(stderr, "%s: unknown option '%s'\n", argv[0], argv[arg]);
         err++;
      }

      arg++;
   }

   if (minLength <= 0) err++;
   if (numInstances < 0) err++;
   if (distanceToEnds <= 0) err++;

   if ((asmFileName == NULL) || (tigStoreName == NULL) || (err > 0)) {
      fprintf(stderr, "usage: %s -a asmFile -t tigStore version [-l minLength] [-n numInstances] [-d distanceToEnd]\n", argv[0]);
      fprintf(stderr, "\n");
      fprintf(stderr, "  -a asmFile            path to the assembly .asm file\n");
      fprintf(stderr, "  -t tigStore version   path to the tigStore and version to modify\n");

      fprintf(stderr, "  -l minLength          minimum size of a unitig to be toggled, default=%d)\n", DEFAULT_UNITIG_LENGTH);
      fprintf(stderr, "  -n numInstances       number of instances of a surrogate that is toggled, default = %d\n", DEFAULT_NUM_INSTANCES);
      fprintf(stderr, "  -d distanceToEnd      max number of bases the surrogate can be from the end of a scaffold for toggling, default = %d\n", DEFAULT_DISTANCE_TO_ENDS);
      fprintf(stderr, "\n");
      fprintf(stderr, "  Labels surrogate unitigs as non-repeat if they match any of the following conditions:\n");
      fprintf(stderr, "    1. the unitig meets all the -l, -n and -d conditions\n");
      fprintf(stderr, "    2. When -n = 0, all surrogate unitigs with more than one read\n");
      fprintf(stderr, "    3. the unitig appears exactly twice, within '-d' bases from the end of a scaffold\n");
      exit(1);
   }
  
   HashTable_AS      *UIDtoIID         = CreateScalarHashTable_AS();
   HashTable_AS      *CTGtoFirstUTG    = CreateScalarHashTable_AS();
   HashTable_AS      *CTGtoLastUTG     = CreateScalarHashTable_AS();
   VA_TYPE(int32)    *unitigLength	   = CreateVA_int32(8192);
   VA_TYPE(uint32)   *surrogateCount   = CreateVA_uint32(8192);
   VA_TYPE(uint32)   *surrogateAtScaffoldEnds   = CreateVA_uint32(8192);
   
   GenericMesg    *pmesg;
   FILE           *infp = fopen(asmFileName, "r");   

   while ((EOF != ReadProtoMesg_AS(infp, &pmesg))) {
      SnapUnitigMesg    *utg     = NULL;
      SnapConConMesg    *ctg     = NULL;
      SnapScaffoldMesg  *scf     = NULL;
      uint32             count   = 0;
      uint32             forward = TRUE;
      uint32             lastCtg = 0;

      switch(pmesg->t) {
         case MESG_UTG:
            utg = (SnapUnitigMesg*)(pmesg->m);
            Setint32(unitigLength, utg->iaccession, &utg->length);

            if (utg->length >= minLength && (utg->status == AS_NOTREZ || utg->status == AS_SEP)) {               
               // store the mapping for this unitig's UID to IID and initialize it's instance counter at 0
               count = 0;
               InsertInHashTable_AS(UIDtoIID, AS_UID_toInteger(utg->eaccession), 0, (uint64)utg->iaccession, 0);               
               Setuint32(surrogateCount, utg->iaccession, &count);
            }
            break;    

         case MESG_CCO:
            ctg = (SnapConConMesg *)(pmesg->m);
            
            for (int32 i = 0; i < ctg->num_unitigs; i++) {
               // increment the surrogate unitigs instance counter
               if (ExistsInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0)) {
                  uint32 *ret = Getuint32(surrogateCount, (uint32) LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0));
                  assert(ret != NULL);
                  (*ret)++;

                  // store first surrogate in a contig
                  if (!ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(ctg->eaccession), 0) && 
                        MIN(ctg->unitigs[i].position.bgn, ctg->unitigs[i].position.end) < distanceToEnds) {
                     InsertInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(ctg->eaccession), 0, LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0), 0); 
                  }

                  // also store the last
                  if ((ctg->length - MAX(ctg->unitigs[i].position.bgn, ctg->unitigs[i].position.end)) < distanceToEnds) {
                     ReplaceInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(ctg->eaccession), 0, LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0), 0);
                  }
               }
            }
            break;

         case MESG_SCF:
            scf = (SnapScaffoldMesg *)(pmesg->m);
            
            count = scf->iaccession;
            if (scf->contig_pairs[0].orient.isAnti() || scf->contig_pairs[0].orient.isOuttie()) {
               forward = FALSE;
            }
            lastCtg = MAX(scf->num_contig_pairs - 1, 0);
            
            // All four cases below follow the same pattern
            // The first time a surrogate is found at the end of a scaffold, we record the scaffold ID
            // When the surrogate is seen at the end of a second scaffold, we record that it has been found at the ends of two scaffolds (UINT32_MAX)
            // If the surrogate is seen more than once in a single scaffold, it is eliminated (it can't connect two scaffolds)
            // If the surrogate is only seen once at the end of a scaffold (and again in the middle), it is eliminated
            // 1. Contig is first in scaffold and is forward, take the surrogate from the beginning of contig, if it exists                        
            if (ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0) && forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0), &count);
               count = scf->iaccession;
            }
            // 2. Contig is last in scaffold and is reversed, take the surrogate from the beginning of the contig, if it exists
            if (ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0) && !forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0), &count);
               count = scf->iaccession;
            }
            // 3. Contig is first in scaffold and is reversed, take the surrogate from the end of the contig, if it exists            
            if (ExistsInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0) && !forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0), &count);
               count = scf->iaccession;
            }
            // 4. Contig is last in scaffold and is forward, take the surrogate from the end of the contig, if it exists
            if (ExistsInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0) && forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0), &count);
               count = scf->iaccession;
            }
            break;
         default:
            break;
      }
   }
   fclose(infp);
  



   uint32 *ret       = NULL;
   uint32 *atScfEnd  = NULL;

   // open the tig store for in-place writing (we don't increment the version since CGW always reads a fixed version initially)
   // this also removes any partitioning

   MultiAlignStore *tigStore = new MultiAlignStore(tigStoreName, tigStoreVers, 0, 0, TRUE, TRUE);

   for (uint32 i = 0; i < tigStore->numUnitigs(); i++) {
      uint32 *ret      = Getuint32(surrogateCount, i);
      uint32 *atScfEnd = Getuint32(surrogateAtScaffoldEnds, i);
      uint32 *length   = Getuint32(unitigLength, i);

      bool toggled = false;
                     
      if (ret != NULL && (*ret) == (uint32)numInstances && numInstances != 0) {
         toggled = TRUE;
      } 

      // if we find a surrogate that has two instances and it is at scaffold ends mark toggle it as well
      else if (ret != NULL && (*ret) == NUM_INSTANCES_AT_SCAFFOLD_ENDS && atScfEnd != NULL && (*atScfEnd) == UINT32_MAX) {
         toggled = TRUE;
      }   

      // special case, mark non-singleton unitigs as unique if we are given no instances
      else if (numInstances == 0 && (length != NULL && (*length) >= minLength) && tigStore->getNumFrags(i, TRUE) > 1) {
         toggled = TRUE;
      }
      
      if (toggled) {
         tigStore->setUnitigFUR(i, AS_FORCED_UNIQUE);
         numToggled++;
      }      
   }
   
   DeleteHashTable_AS(UIDtoIID);
   DeleteHashTable_AS(CTGtoFirstUTG);
   DeleteHashTable_AS(CTGtoLastUTG);

   delete tigStore;
   
   fprintf(stderr, "Toggled %d\n", numToggled);
   
   return 0;
}