示例#1
0
int
updateFragmentWithParent(IntUnitigMesg *iunitig, int thisFrag, OverlapStore *ovs) {
    uint32         ovlMax = 0;
    uint32         ovlLen = 0;
    OVSoverlap    *ovl    = NULL;

    int     testFrag = thisFrag - 1;
    int     testOvl  = 0;

    int     oldParent = iunitig->f_list[thisFrag].parent;
    int     oldAHang  = iunitig->f_list[thisFrag].ahang;
    int     oldBHang  = iunitig->f_list[thisFrag].bhang;

    uint32  consensusCutoff = AS_OVS_encodeQuality(AS_CNS_ERROR_RATE);

    int     contained       = 0;
    int     fragment        = -1;
    int     overlap         = -1;
    int     overlapIdentity = consensusCutoff;
    int     overlapBHang    = AS_READ_MAX_NORMAL_LEN;

    HashTable_AS  *ovlBefore = CreateScalarHashTable_AS();
    HashTable_AS  *ovlAfter  = CreateScalarHashTable_AS();
    HashTable_AS  *iidIndex  = CreateScalarHashTable_AS();

    int     hangSlop = 0;

    int     failed   = -1;

    fprintf(stderr, "\n");
    fprintf(stderr, "WORKING on fragment %d == %d\n", thisFrag, iunitig->f_list[thisFrag].ident);

    //  Save in the hash table the fragments before/after this one.
    //
    for (testFrag=0; testFrag<iunitig->num_frags; testFrag++) {
        InsertInHashTable_AS(iidIndex,
                             (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                             (uint64)testFrag, 0);

        if (testFrag < thisFrag)
            InsertInHashTable_AS(ovlBefore,
                                 (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                                 ~(uint64)0, 0);
        if (testFrag > thisFrag)
            InsertInHashTable_AS(ovlAfter,
                                 (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64),
                                 ~(uint64)0, 0);
    }

    //  Get the overlaps for this fragment.
    //
    AS_OVS_setRangeOverlapStore(ovs, iunitig->f_list[thisFrag].ident, iunitig->f_list[thisFrag].ident);

    if (ovlMax < AS_OVS_numOverlapsInRange(ovs)) {
        ovlMax = AS_OVS_numOverlapsInRange(ovs) * 2;
        ovl    = (OVSoverlap *)safe_realloc(ovl, sizeof(OVSoverlap) * ovlMax);
    }
    ovlLen = 0;
    while (AS_OVS_readOverlapFromStore(ovs, ovl+ovlLen, AS_OVS_TYPE_OVL)) {
        int  aid=0,  bid=0;
        int  afwd=0, bfwd=0;
        int  correct=0;

        //  Reorient the overlap so the b_iid is thisFrag.
        //
        {
            AS_IID x = ovl[ovlLen].a_iid;
            ovl[ovlLen].a_iid = ovl[ovlLen].b_iid;
            ovl[ovlLen].b_iid = x;

            if (ovl[ovlLen].dat.ovl.flipped) {
                int x = ovl[ovlLen].dat.ovl.a_hang;
                ovl[ovlLen].dat.ovl.a_hang = ovl[ovlLen].dat.ovl.b_hang;
                ovl[ovlLen].dat.ovl.b_hang = x;
            } else {
                ovl[ovlLen].dat.ovl.a_hang = -ovl[ovlLen].dat.ovl.a_hang;
                ovl[ovlLen].dat.ovl.b_hang = -ovl[ovlLen].dat.ovl.b_hang;
            }
        }

        //  Make sure we get the correct overlap.  We seem to be allowed
        //  to have both an I and an N overlap for a given pair of
        //  fragments.  At least, I hope that's all we're allowed.
        //
        aid = LookupValueInHashTable_AS(iidIndex, (uint64)ovl[ovlLen].a_iid, sizeof(uint64));
        bid = LookupValueInHashTable_AS(iidIndex, (uint64)ovl[ovlLen].b_iid, sizeof(uint64));

        afwd = (iunitig->f_list[aid].position.bgn < iunitig->f_list[aid].position.end);
        bfwd = (iunitig->f_list[bid].position.bgn < iunitig->f_list[bid].position.end);

        if ((afwd == bfwd) && (ovl[ovlLen].dat.ovl.flipped == 0))
            correct = 1;
        if ((afwd != bfwd) && (ovl[ovlLen].dat.ovl.flipped == 1))
            correct = 1;


        if (ExistsInHashTable_AS(ovlBefore, (uint64)ovl[ovlLen].a_iid, sizeof(uint64))) {
            if (correct)
                ReplaceInHashTable_AS(ovlBefore,
                                      (uint64)ovl[ovlLen].a_iid, sizeof(uint64),
                                      (uint64)ovlLen, 0);
            fprintf(stderr, "%s before overlap for %d (%c) to %d (%c) ("F_S64","F_S64",%c) at ovl position %d\n",
                    correct ? "save" : "skip",
                    ovl[ovlLen].a_iid, afwd ? 'F' : 'R',
                    ovl[ovlLen].b_iid, bfwd ? 'F' : 'R',
                    ovl[ovlLen].dat.ovl.a_hang,
                    ovl[ovlLen].dat.ovl.b_hang,
                    ovl[ovlLen].dat.ovl.flipped ? 'I' : 'N',
                    ovlLen);
        }

        if (ExistsInHashTable_AS(ovlAfter, (uint64)ovl[ovlLen].a_iid, sizeof(uint64))) {
            if (correct)
                ReplaceInHashTable_AS(ovlAfter,
                                      (uint64)ovl[ovlLen].a_iid, sizeof(uint64),
                                      (uint64)ovlLen, 0);
            fprintf(stderr, "%s after  overlap for %d (%c) to %d (%c) ("F_S64","F_S64",%c) at ovl position %d\n",
                    correct ? "save" : "skip",
                    ovl[ovlLen].a_iid, afwd ? 'F' : 'R',
                    ovl[ovlLen].b_iid, bfwd ? 'F' : 'R',
                    ovl[ovlLen].dat.ovl.a_hang,
                    ovl[ovlLen].dat.ovl.b_hang,
                    ovl[ovlLen].dat.ovl.flipped ? 'I' : 'N',
                    ovlLen);
        }

        ovlLen++;
    }

tryAgain:

    //  See if we're contained in any of these overlaps.
    if (overlap == -1) {
        for (testFrag=thisFrag-1; testFrag>=0; testFrag--) {
            if (ExistsInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64))) {
                testOvl = LookupValueInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

                //  Fragment has no overlap
                if (testOvl == -1)
                    continue;

                fprintf(stderr, "found testFrag = %d testOvl = %d erates "F_U64" %u hang "F_S64" "F_S64" (CONTAIN) slop=%d\n",
                        testFrag, testOvl,
                        ovl[testOvl].dat.ovl.orig_erate, consensusCutoff,
                        ovl[testOvl].dat.ovl.a_hang,
                        ovl[testOvl].dat.ovl.b_hang,
                        hangSlop);

                //  Three if's for documentation:
                //  1)  If we're an overlap we care about
                //  2)  If we're a contained overlap
                //  3)  If we're better than what we've seen so far
                //  Then save the overlap
                //
                if (ovl[testOvl].dat.ovl.orig_erate < consensusCutoff) {
                    if ((ovl[testOvl].dat.ovl.a_hang >= -hangSlop) &&
                            (ovl[testOvl].dat.ovl.b_hang <= hangSlop)) {
                        if (ovl[testOvl].dat.ovl.orig_erate < overlapIdentity) {
                            contained       = 1;
                            fragment        = testFrag;
                            overlap         = testOvl;
                            overlapBHang    = 0;
                            overlapIdentity = ovl[testOvl].dat.ovl.orig_erate;
                        }
                    }
                }
            }
        }
    }


    //  If not contained, scan the overlaps again, looking for the
    //  thickest/bestest.  This will be the overlap with the smallest a
    //  or b hang -- depending on the orientation of the parent
    //  fragment.
    //
    //  Instead of working through overlaps, we work through fragments.
    //
    if (overlap == -1) {
        for (testFrag=thisFrag-1; testFrag>=0; testFrag--) {
            if (ExistsInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64))) {
                int ahang = 0;
                int bhang = 0;

                testOvl = LookupValueInHashTable_AS(ovlBefore, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

                //  Fragment has no overlap
                if (testOvl == -1)
                    continue;

                //  Overlap is too noisy
                if (ovl[testOvl].dat.ovl.orig_erate >= consensusCutoff)
                    continue;

                if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
                    ahang = ovl[testOvl].dat.ovl.a_hang;
                    bhang = ovl[testOvl].dat.ovl.b_hang;
                } else {
                    ahang = -ovl[testOvl].dat.ovl.b_hang;
                    bhang = -ovl[testOvl].dat.ovl.a_hang;
                }

                //  Overlap isn't dovetail -- negative ahang
                if (ahang < 0)
                    continue;

                //  Overlap isn't dovetail -- containment
                if (bhang < 0)
                    continue;

                fprintf(stderr, "found testFrag = %d testOvl = %d erates "F_U64" %u hang "F_S64" "F_S64" (DOVETAIL) slop=%d\n",
                        testFrag, testOvl,
                        ovl[testOvl].dat.ovl.orig_erate, consensusCutoff,
                        ovl[testOvl].dat.ovl.a_hang,
                        ovl[testOvl].dat.ovl.b_hang,
                        hangSlop);

                if (bhang < overlapBHang) {
                    contained       = 0;
                    fragment        = testFrag;
                    overlap         = testOvl;
                    overlapIdentity = ovl[testOvl].dat.ovl.orig_erate;
                    overlapBHang    = bhang;
                }
            }
        }
    }


    //  Now, if we have found the parent fragment, update.
    //
    if (overlap >= 0) {
        testOvl  = overlap;
        testFrag = fragment;

        iunitig->f_list[thisFrag].parent = ovl[testOvl].a_iid;

        if (contained)
            iunitig->f_list[thisFrag].contained = iunitig->f_list[thisFrag].parent;
        else
            iunitig->f_list[thisFrag].contained = 0;

        //  Reorient again based on the orientation of the testFrag.
        //
        if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
            //  testFrag is forward
            iunitig->f_list[thisFrag].ahang  = ovl[testOvl].dat.ovl.a_hang;
            iunitig->f_list[thisFrag].bhang  = ovl[testOvl].dat.ovl.b_hang;
        } else {
            //  testFrag is reverse
            iunitig->f_list[thisFrag].ahang  = -ovl[testOvl].dat.ovl.b_hang;
            iunitig->f_list[thisFrag].bhang  = -ovl[testOvl].dat.ovl.a_hang;
        }

        //  Report we did something.
        //
        fprintf(stderr, "Updated fragment "F_IID" from "F_IID",%d,%d to "F_IID",%d,%d\n",
                iunitig->f_list[thisFrag].ident,
                oldParent,
                oldAHang,
                oldBHang,
                iunitig->f_list[thisFrag].parent,
                iunitig->f_list[thisFrag].ahang,
                iunitig->f_list[thisFrag].bhang);

        goto successfullyUpdated;
    }


    //  Otherwise, try to find an overlap again, this time allowing a
    //  bit of slop in the hangs.
    //
    if (hangSlop == 0) {
        hangSlop = 10;
        goto tryAgain;
    }


    //  Now, we're convinced there is no decent overlap between this
    //  fragment and any fragment before it.
    //
    //  Scan forward for the first thing we overlap.

    for (testFrag=thisFrag+1; testFrag < iunitig->num_frags; testFrag++) {
        int ahang = 0;
        int bhang = 0;

        testOvl = LookupValueInHashTable_AS(ovlAfter, (uint64)iunitig->f_list[testFrag].ident, sizeof(uint64));

        //  Fragment has no overlap
        if (testOvl == -1)
            continue;

        //  Overlap is too noisy
        if (ovl[testOvl].dat.ovl.orig_erate >= consensusCutoff)
            continue;

        if (iunitig->f_list[testFrag].position.bgn < iunitig->f_list[testFrag].position.end) {
            ahang = ovl[testOvl].dat.ovl.a_hang;
            bhang = ovl[testOvl].dat.ovl.b_hang;
        } else {
            ahang = -ovl[testOvl].dat.ovl.b_hang;
            bhang = -ovl[testOvl].dat.ovl.a_hang;
        }

        //  Don't allow negative ahangs.  At all.  This catches the case
        //  where the parent might be contained in us, and generally makes
        //  consensus happier.
        //
        //  Don't allow empty hangs - this can lead to infinite loops
        //  where we keep swapping the same two fragments.  OK, not
        //  infinite, since we eventually run out of stack space and
        //  crash.
        //
        if (ahang <= 0)
            continue;

        fprintf(stderr, "shifttest ovl=%d testFrag="F_IID" pos %d-%d  thisFrag="F_IID" pos %d-%d  hangs %d,%d\n",
                testOvl,
                iunitig->f_list[testFrag].ident,
                iunitig->f_list[testFrag].position.bgn,
                iunitig->f_list[testFrag].position.end,
                iunitig->f_list[thisFrag].ident,
                iunitig->f_list[thisFrag].position.bgn,
                iunitig->f_list[thisFrag].position.end,
                ahang, bhang);

        IntMultiPos  fragCopy = iunitig->f_list[thisFrag];

        memmove(iunitig->f_list + thisFrag,
                iunitig->f_list + thisFrag + 1,
                sizeof(IntMultiPos) * (testFrag - thisFrag));

        iunitig->f_list[testFrag] = fragCopy;

        fprintf(stderr, "Shifted fragment "F_IID" from position %d to position %d\n",
                iunitig->f_list[testFrag].ident,
                thisFrag, testFrag);

        //  Since we moved things around, we must process the new fragment
        //  at 'thisFrag's location.
        //
        failed = updateFragmentWithParent(iunitig, thisFrag, ovs);

        if (failed == -1)
            goto successfullyUpdated;

        break;
    }


    //  And we failed.  Good luck with this one.
    //
    fprintf(stderr, "Failed to update fragment "F_IID" from "F_IID",%d,%d.\n",
            iunitig->f_list[thisFrag].ident,
            oldParent,
            oldAHang,
            oldBHang);

    failed = thisFrag;

successfullyUpdated:
    DeleteHashTable_AS(ovlBefore);
    DeleteHashTable_AS(ovlAfter);
    safe_free(ovl);

    return(failed);
}
示例#2
0
int main (int argc, char *argv[]) {
   char    *asmFileName    = NULL;
   char    *tigStoreName   = NULL;
   uint32   tigStoreVers   = 2;

   int      minLength      = DEFAULT_UNITIG_LENGTH;
   int      numInstances   = DEFAULT_NUM_INSTANCES;
   int      distanceToEnds = DEFAULT_DISTANCE_TO_ENDS;

   uint32   numToggled     = 0;

   argc = AS_configure(argc, argv);
  
   int arg=1;
   int err=0;
   while (arg < argc) {
      if        (strcmp(argv[arg], "-a") == 0) {
         asmFileName = argv[++arg];

      } else if (strcmp(argv[arg], "-t") == 0) {
        tigStoreName = argv[++arg];
        tigStoreVers = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-l") == 0) {
         minLength = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-n") == 0) {
         numInstances = atoi(argv[++arg]);

      } else if (strcmp(argv[arg], "-d") == 0) {
         distanceToEnds = atoi(argv[++arg]);

      } else {
         fprintf(stderr, "%s: unknown option '%s'\n", argv[0], argv[arg]);
         err++;
      }

      arg++;
   }

   if (minLength <= 0) err++;
   if (numInstances < 0) err++;
   if (distanceToEnds <= 0) err++;

   if ((asmFileName == NULL) || (tigStoreName == NULL) || (err > 0)) {
      fprintf(stderr, "usage: %s -a asmFile -t tigStore version [-l minLength] [-n numInstances] [-d distanceToEnd]\n", argv[0]);
      fprintf(stderr, "\n");
      fprintf(stderr, "  -a asmFile            path to the assembly .asm file\n");
      fprintf(stderr, "  -t tigStore version   path to the tigStore and version to modify\n");

      fprintf(stderr, "  -l minLength          minimum size of a unitig to be toggled, default=%d)\n", DEFAULT_UNITIG_LENGTH);
      fprintf(stderr, "  -n numInstances       number of instances of a surrogate that is toggled, default = %d\n", DEFAULT_NUM_INSTANCES);
      fprintf(stderr, "  -d distanceToEnd      max number of bases the surrogate can be from the end of a scaffold for toggling, default = %d\n", DEFAULT_DISTANCE_TO_ENDS);
      fprintf(stderr, "\n");
      fprintf(stderr, "  Labels surrogate unitigs as non-repeat if they match any of the following conditions:\n");
      fprintf(stderr, "    1. the unitig meets all the -l, -n and -d conditions\n");
      fprintf(stderr, "    2. When -n = 0, all surrogate unitigs with more than one read\n");
      fprintf(stderr, "    3. the unitig appears exactly twice, within '-d' bases from the end of a scaffold\n");
      exit(1);
   }
  
   HashTable_AS      *UIDtoIID         = CreateScalarHashTable_AS();
   HashTable_AS      *CTGtoFirstUTG    = CreateScalarHashTable_AS();
   HashTable_AS      *CTGtoLastUTG     = CreateScalarHashTable_AS();
   VA_TYPE(int32)    *unitigLength	   = CreateVA_int32(8192);
   VA_TYPE(uint32)   *surrogateCount   = CreateVA_uint32(8192);
   VA_TYPE(uint32)   *surrogateAtScaffoldEnds   = CreateVA_uint32(8192);
   
   GenericMesg    *pmesg;
   FILE           *infp = fopen(asmFileName, "r");   

   while ((EOF != ReadProtoMesg_AS(infp, &pmesg))) {
      SnapUnitigMesg    *utg     = NULL;
      SnapConConMesg    *ctg     = NULL;
      SnapScaffoldMesg  *scf     = NULL;
      uint32             count   = 0;
      uint32             forward = TRUE;
      uint32             lastCtg = 0;

      switch(pmesg->t) {
         case MESG_UTG:
            utg = (SnapUnitigMesg*)(pmesg->m);
            Setint32(unitigLength, utg->iaccession, &utg->length);

            if (utg->length >= minLength && (utg->status == AS_NOTREZ || utg->status == AS_SEP)) {               
               // store the mapping for this unitig's UID to IID and initialize it's instance counter at 0
               count = 0;
               InsertInHashTable_AS(UIDtoIID, AS_UID_toInteger(utg->eaccession), 0, (uint64)utg->iaccession, 0);               
               Setuint32(surrogateCount, utg->iaccession, &count);
            }
            break;    

         case MESG_CCO:
            ctg = (SnapConConMesg *)(pmesg->m);
            
            for (int32 i = 0; i < ctg->num_unitigs; i++) {
               // increment the surrogate unitigs instance counter
               if (ExistsInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0)) {
                  uint32 *ret = Getuint32(surrogateCount, (uint32) LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0));
                  assert(ret != NULL);
                  (*ret)++;

                  // store first surrogate in a contig
                  if (!ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(ctg->eaccession), 0) && 
                        MIN(ctg->unitigs[i].position.bgn, ctg->unitigs[i].position.end) < distanceToEnds) {
                     InsertInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(ctg->eaccession), 0, LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0), 0); 
                  }

                  // also store the last
                  if ((ctg->length - MAX(ctg->unitigs[i].position.bgn, ctg->unitigs[i].position.end)) < distanceToEnds) {
                     ReplaceInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(ctg->eaccession), 0, LookupValueInHashTable_AS(UIDtoIID, AS_UID_toInteger(ctg->unitigs[i].eident), 0), 0);
                  }
               }
            }
            break;

         case MESG_SCF:
            scf = (SnapScaffoldMesg *)(pmesg->m);
            
            count = scf->iaccession;
            if (scf->contig_pairs[0].orient.isAnti() || scf->contig_pairs[0].orient.isOuttie()) {
               forward = FALSE;
            }
            lastCtg = MAX(scf->num_contig_pairs - 1, 0);
            
            // All four cases below follow the same pattern
            // The first time a surrogate is found at the end of a scaffold, we record the scaffold ID
            // When the surrogate is seen at the end of a second scaffold, we record that it has been found at the ends of two scaffolds (UINT32_MAX)
            // If the surrogate is seen more than once in a single scaffold, it is eliminated (it can't connect two scaffolds)
            // If the surrogate is only seen once at the end of a scaffold (and again in the middle), it is eliminated
            // 1. Contig is first in scaffold and is forward, take the surrogate from the beginning of contig, if it exists                        
            if (ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0) && forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0), &count);
               count = scf->iaccession;
            }
            // 2. Contig is last in scaffold and is reversed, take the surrogate from the beginning of the contig, if it exists
            if (ExistsInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0) && !forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoFirstUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0), &count);
               count = scf->iaccession;
            }
            // 3. Contig is first in scaffold and is reversed, take the surrogate from the end of the contig, if it exists            
            if (ExistsInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0) && !forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[0].econtig1), 0), &count);
               count = scf->iaccession;
            }
            // 4. Contig is last in scaffold and is forward, take the surrogate from the end of the contig, if it exists
            if (ExistsInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0) && forward) {
               uint32 *myval = Getuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0));
               if (myval != NULL && (*myval) == scf->iaccession) {
                  count = 0;
               } else if (myval != NULL && (*myval) != 0 && (*myval) != scf->iaccession) {
                  count = UINT32_MAX;
               }
               Setuint32(surrogateAtScaffoldEnds, (uint32) LookupValueInHashTable_AS(CTGtoLastUTG, AS_UID_toInteger(scf->contig_pairs[lastCtg].econtig2), 0), &count);
               count = scf->iaccession;
            }
            break;
         default:
            break;
      }
   }
   fclose(infp);
  



   uint32 *ret       = NULL;
   uint32 *atScfEnd  = NULL;

   // open the tig store for in-place writing (we don't increment the version since CGW always reads a fixed version initially)
   // this also removes any partitioning

   MultiAlignStore *tigStore = new MultiAlignStore(tigStoreName, tigStoreVers, 0, 0, TRUE, TRUE);

   for (uint32 i = 0; i < tigStore->numUnitigs(); i++) {
      uint32 *ret      = Getuint32(surrogateCount, i);
      uint32 *atScfEnd = Getuint32(surrogateAtScaffoldEnds, i);
      uint32 *length   = Getuint32(unitigLength, i);

      bool toggled = false;
                     
      if (ret != NULL && (*ret) == (uint32)numInstances && numInstances != 0) {
         toggled = TRUE;
      } 

      // if we find a surrogate that has two instances and it is at scaffold ends mark toggle it as well
      else if (ret != NULL && (*ret) == NUM_INSTANCES_AT_SCAFFOLD_ENDS && atScfEnd != NULL && (*atScfEnd) == UINT32_MAX) {
         toggled = TRUE;
      }   

      // special case, mark non-singleton unitigs as unique if we are given no instances
      else if (numInstances == 0 && (length != NULL && (*length) >= minLength) && tigStore->getNumFrags(i, TRUE) > 1) {
         toggled = TRUE;
      }
      
      if (toggled) {
         tigStore->setUnitigFUR(i, AS_FORCED_UNIQUE);
         numToggled++;
      }      
   }
   
   DeleteHashTable_AS(UIDtoIID);
   DeleteHashTable_AS(CTGtoFirstUTG);
   DeleteHashTable_AS(CTGtoLastUTG);

   delete tigStore;
   
   fprintf(stderr, "Toggled %d\n", numToggled);
   
   return 0;
}