示例#1
0
void
GetFragmentPositionInScaffold(CIFragT *frag, int *left_end, int *right_end,
                              int *fragmentScaffoldOrientation) {
  ContigT *containingContig = GetGraphNode(ScaffoldGraph->ContigGraph, frag->contigID);
  int contigLeftEnd, contigRightEnd, contigScaffoldOrientation;

  GetContigPositionInScaffold( containingContig, &contigLeftEnd, &contigRightEnd, &contigScaffoldOrientation);

  GetFragmentPositionInScaffoldFromContig( frag, left_end, right_end, fragmentScaffoldOrientation,
                                           contigLeftEnd, contigRightEnd, contigScaffoldOrientation);
}
示例#2
0
void USoundNode::InsertChildNode( int32 Index )
{
	check( Index >= 0 && Index <= ChildNodes.Num() );
	int32 MaxChildNodes = GetMaxChildNodes();
	if (MaxChildNodes > ChildNodes.Num())
	{
		ChildNodes.InsertZeroed( Index );
#if WITH_EDITOR
		GetGraphNode()->CreateInputPin();
#endif //WITH_EDITORONLY_DATA
	}
}
示例#3
0
void USoundNodeSwitch::RenamePins()
{
	TArray<class UEdGraphPin*> InputPins;

#if WITH_EDITORONLY_DATA
	GetGraphNode()->GetInputPins(InputPins);
#endif

	for (int32 i = 0; i < InputPins.Num(); i++)
	{
		if (InputPins[i])
		{
			InputPins[i]->PinName = GetInputPinName(i);
		}
	}
}
示例#4
0
int
SurrogatedSingleUnitigContig(NodeCGW_T* contig) {

  if (contig->info.Contig.numCI > 1)
    //  Contig has multiple unitigs
    return(FALSE);

  if (contig->scaffoldID != NULLINDEX)
    //  Contig is placed
    return(FALSE);

  NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI);

  if (unitig->info.CI.numInstances == 0)
    //  Unitig has not been placed as a surrogate
    return(FALSE);

  //  Else, the unitig in this contig appears as a surrogate elsewhere in the assembly
  return(TRUE);
}
示例#5
0
void PrintScaffoldContents(FILE *stream, ScaffoldGraphT *graph, char *message) {
	// output initial scaffolds before breaking
		  GraphNodeIterator   scaffolds;
		  CIScaffoldT        *scaffold;

		  InitGraphNodeIterator(&scaffolds, graph->ScaffoldGraph, GRAPH_NODE_DEFAULT);
		  while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) {
		    if(scaffold->type != REAL_SCAFFOLD)
		      continue;

		    assert(scaffold->info.Scaffold.numElements > 0);

		    fprintf(stream, "%s scaffold %d contains %d elements: ", message, scaffold->id, scaffold->info.Scaffold.numElements);
		    CIScaffoldTIterator	    contigs;
		    ChunkInstanceT         *contig;


		    InitCIScaffoldTIterator(graph, scaffold, TRUE, FALSE, &contigs);
		    while ((contig = NextCIScaffoldTIterator(&contigs)) != NULL) {
		    	// of course why would a sanity check work in cgw
		    	// line 2129 of TransitiveReduction_CGW.c sets the contig's scaffold ID to NULL but doesn't remove it from the scaffold!
		    	if (contig->scaffoldID != scaffold->id) {
		    		fprintf(stream, "Scaffold %d thinks it contains contig %d but contig thinks it belongs to %d\n", scaffold->id, contig->id, contig->scaffoldID);
		    	}
		    	assert(contig->scaffoldID == scaffold->id || contig->scaffoldID == NULLINDEX);

		    	fprintf(stream, "%d (%f) (%f) (%f) ", contig->id, contig->bpLength.mean, contig->offsetAEnd.mean, contig->offsetBEnd.mean);

          GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES);
		        CIEdgeT          *edge;

		        while ((edge = edges.nextMerged()) != NULL) {
		        	NodeCGW_T *otherNode = GetGraphNode(graph->ContigGraph, (edge->idA == contig->id ? edge->idB : edge->idA));
		        	//fprintf(stream, "The edge active %d is unique %d confirming %d ori:%c dist: %f connects %d and %d with weight %d and node is in scaffold %d and other end of edge is %d in scaffold %d\n", edge->flags.bits.isActive, edge->flags.bits.isUniquetoUnique, edge->flags.bits.isContigConfirming, edge->orient.toLetter(), edge->distance.mean, edge->idA, edge->idB, edge->edgesContributing, contig->scaffoldID, otherNode->id, otherNode->scaffoldID);
		        }
		    }
		    fprintf(stream, "\n");
		  }

		  PrintContigContents(stream, graph, message);
}
示例#6
0
int
main( int argc, char **argv) {
  int          ckptNum           = NULLINDEX;
  int          makeMiniScaffolds = 1;
  uint64       uidStart          = 1230000;
  UIDserver   *uids              = NULL;

  GlobalData = new Globals_CGW();

  argc = AS_configure(argc, argv);

  int err=0;
  int arg=1;
  while (arg < argc) {
    if        (strcmp(argv[arg], "-p") == 0) {
      ckptNum = GlobalData->setPrefix(argv[++arg]);
    } else if (strcmp(argv[arg], "-c") == 0) {
      strcpy(GlobalData->outputPrefix, argv[++arg]);
    } else if (strcmp(argv[arg], "-g") == 0) {
      strcpy(GlobalData->gkpStoreName, argv[++arg]);
    } else if (strcmp(argv[arg], "-t") == 0) {
      strcpy(GlobalData->tigStoreName, argv[++arg]);
    } else if (strcmp(argv[arg], "-n") == 0) {
      ckptNum = atoi(argv[++arg]);
    } else if (strcmp(argv[arg], "-U") == 0) {
      uidStart = 0;
    } else if (strcmp(argv[arg], "-S") == 0) {
      makeMiniScaffolds = 0;
    } else {
      fprintf(stderr, "unknown option '%s'\n", argv[arg]);
      err = 1;
    }
    arg++;
  }

  if ((GlobalData->outputPrefix[0] == 0) ||
      (GlobalData->gkpStoreName[0] == 0)) {
    fprintf(stderr, "usage: %s [[-p prefix] | [-c name -g gkpstore -n ckptNum]] [-U] [-S]\n", argv[0]);
    fprintf(stderr, "  -p      Attempt to locate the last checkpoint in directory 7-CGW.\n");
    fprintf(stderr, "  -c      Look for checkpoints in 'name'\n");
    fprintf(stderr, "  -g      Path to gkpStore\n");
    fprintf(stderr, "  -n      Checkpoint number to load\n");
    fprintf(stderr, "  -U      Use real UIDs for miniscaffolds, otherwise, UIDs start at 1230000\n");
    fprintf(stderr, "  -S      Do NOT make mini scaffolds.\n");
    exit(1);
  }

  uids = UIDserverInitialize(256, uidStart);

  char *toprint = (char *)safe_malloc(sizeof(char) * (AS_READ_MAX_NORMAL_LEN + 51 + AS_READ_MAX_NORMAL_LEN + 2));

  LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix, ckptNum, FALSE);

  int ifrag;
  for (ifrag=0; ifrag < GetNumVA_CIFragT(ScaffoldGraph->CIFrags); ifrag++) {
    CIFragT *frag = GetCIFragT(ScaffoldGraph->CIFrags, ifrag);
    CIFragT *mate = NULL;

    if (frag->flags.bits.isDeleted)
      continue;

    assert(frag->cid != NULLINDEX);
    assert((frag->flags.bits.hasMate == 0) || (frag->mate_iid != 0));

    //  Fix for missing mates -- OBT used to not delete mate links, leaving
    //  dangling mates.  Somebody else seems to be doing this too.
    //
    if (frag->flags.bits.hasMate) {
      mate = GetCIFragT(ScaffoldGraph->CIFrags, frag->mate_iid);
      if (mate == NULL)
        frag->flags.bits.hasMate = 0;
    }

    //  If this fragment is not chaff, we have nothing to do here.
    //
    if (GetGraphNode(ScaffoldGraph->CIGraph,frag->cid)->flags.bits.isChaff == 0)
      continue;

    //  Print a singleton if there is no mate, the mate isn't chaff,
    //  or we were told to not make miniscaffolds.
    //
    if ((mate == NULL) ||
        (mate->flags.bits.isChaff == 0) ||
        (makeMiniScaffolds == 0)) {
      AS_UID  fUID = getFragmentClear(frag->read_iid, 0, toprint);

      AS_UTL_writeFastA(stdout,
                        toprint, strlen(toprint), 0,
                        ">%s /type=singleton\n", AS_UID_toString(fUID));

    } else if ((mate != NULL) &&
               (mate->flags.bits.isChaff == 1) &&
               (makeMiniScaffolds == 1) &&
               (frag->read_iid < mate->read_iid)) {

      //  make sure the following chain of Ns is divisible by three;
      //  the exact length is arbitrary but Doug Rusch points out that
      //  by making it divisible by 3, we can get lucky and maintain
      //  the phase of a protein ...  which helps in the
      //  auto-annotation of environmental samples

      AS_UID  fUID = getFragmentClear(frag->read_iid, 0, toprint);

      strcat(toprint, "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN");

      AS_UID  mUID = getFragmentClear(mate->read_iid, 1, toprint + strlen(toprint));

      AS_UTL_writeFastA(stdout,
                        toprint, strlen(toprint), 0,
                        ">"F_U64" /type=mini_scaffold /frgs=(%s,%s)\n",
                        getUID(uids),
                        AS_UID_toString(fUID),
                        AS_UID_toString(mUID));
    }
  }

  delete GlobalData;

  exit(0);
}
示例#7
0
int
main(int argc, char **argv) {

  //  Options controlling main

  int    generateOutput = 1;
  int    preserveConsensus = 0;

  int    preMergeRezLevel = -1;
  int    repeatRezLevel   = 0;

  int    restartFromCheckpoint = -1;
  char  *restartFromLogical    = "ckp00-NUL";

  bool   recomputeLeastSquaresOnLoad = false;
  bool   reloadMates                 = false;

  int    doResolveSurrogates               = 1;      //  resolveSurrogates
  int    placeAllFragsInSinglePlacedSurros = 0;      //  resolveSurrogates
  double cutoffToInferSingleCopyStatus     = 0.666;  //  resolveSurrogates

  int    firstFileArg = 0;

  int32  outputFragsPerPartition = 0;

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
  ContigOrientChecker * coc;
  coc = CreateContigOrientChecker();
  assert(coc != NULL);
#endif

  //  temporary!
  fprintf(stderr, "Using up to %d OpenMP threads.\n", omp_get_max_threads());

  GlobalData = new Globals_CGW();

  argc = AS_configure(argc, argv);

  int arg     = 1;
  int err     = 0;
  int unk[64] = {0};
  int unl     = 0;

  while (arg < argc) {
    if        (strcmp(argv[arg], "-C") == 0) {
      GlobalData->performCleanupScaffolds = 0;

    } else if (strcmp(argv[arg], "-D") == 0) {
      GlobalData->debugLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-E") == 0) {
      GlobalData->outputOverlapOnlyContigEdges = 1;

    } else if (strcmp(argv[arg], "-F") == 0) {
      GlobalData->allowDemoteMarkedUnitigs = FALSE;

    } else if (strcmp(argv[arg], "-G") == 0) {
      generateOutput = 0;

    } else if (strcmp(argv[arg], "-GG") == 0) {
      preserveConsensus = 1;

    } else if (strcmp(argv[arg], "-g") == 0) {
      strcpy(GlobalData->gkpStoreName, argv[++arg]);

    } else if (strcmp(argv[arg], "-t") == 0) {
      strcpy(GlobalData->tigStoreName, argv[++arg]);

    } else if (strcmp(argv[arg], "-I") == 0) {
      GlobalData->ignoreChaffUnitigs = 1;

    } else if (strcmp(argv[arg], "-j") == 0) {
      GlobalData->cgbUniqueCutoff = atof(argv[++arg]);

    } else if (strcmp(argv[arg], "-K") == 0) {
      GlobalData->removeNonOverlapingContigsFromScaffold = 1;

    } else if (strcmp(argv[arg], "-k") == 0) {
      GlobalData->cgbDefinitelyUniqueCutoff = atof(argv[++arg]);

    } else if (strcmp(argv[arg], "-m") == 0) {
      GlobalData->minSamplesForOverride = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-N") == 0) {
      restartFromLogical = argv[++arg];

    } else if (strcmp(argv[arg], "-o") == 0) {
      strcpy(GlobalData->outputPrefix, argv[++arg]);

    } else if (strcmp(argv[arg], "-B") == 0) {
      outputFragsPerPartition = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-P") == 0) {
      GlobalData->closurePlacement = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-p") == 0) {
      preMergeRezLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-R") == 0) {
      restartFromCheckpoint = atoi(argv[++arg]);
    
    } else if (strcmp(argv[arg], "-r") == 0) {
      repeatRezLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-S") == 0) {
      doResolveSurrogates               = 1;
      cutoffToInferSingleCopyStatus     = atof(argv[++arg]);
      placeAllFragsInSinglePlacedSurros = 0;

      if (cutoffToInferSingleCopyStatus == 0.0)
        doResolveSurrogates               = 0;

      if (cutoffToInferSingleCopyStatus < 0) {
        cutoffToInferSingleCopyStatus     = 0.0;
        placeAllFragsInSinglePlacedSurros = 1;
      }

    } else if (strcmp(argv[arg], "-s") == 0) {
      GlobalData->stoneLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-filter") == 0) {
      GlobalData->mergeFilterLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-shatter") == 0) {
      GlobalData->shatterLevel = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-missingMate") == 0) {
      GlobalData->mergeScaffoldMissingMates = atof(argv[++arg]);

      // the value is a percentage between 0 and 1 so make sure it never goes out of those bounds
      if (GlobalData->mergeScaffoldMissingMates < 0) {
    	  GlobalData->mergeScaffoldMissingMates = -1;
      } else if (GlobalData->mergeScaffoldMissingMates > 1) {
    	  GlobalData->mergeScaffoldMissingMates = 1;
      }

    } else if (strcmp(argv[arg], "-U") == 0) {
      GlobalData->doUnjiggleWhenMerging = 1;

    } else if (strcmp(argv[arg], "-u") == 0) {
      fprintf(stderr, "Option -u is broken.\n");
      exit(1);
      strcpy(GlobalData->unitigOverlaps, argv[++arg]);

    } else if (strcmp(argv[arg], "-Z") == 0) {
      GlobalData->demoteSingletonScaffolds = FALSE;

    } else if (strcmp(argv[arg], "-z") == 0) {
      GlobalData->checkRepeatBranchPattern = TRUE;

    } else if (strcmp(argv[arg], "-minmergeweight") == 0) {
      GlobalData->minWeightToMerge = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-recomputegaps") == 0) {
      recomputeLeastSquaresOnLoad = true;

    } else if (strcmp(argv[arg], "-reloadmates") == 0) {
      reloadMates = true;

    } else if ((argv[arg][0] != '-') && (firstFileArg == 0)) {
      firstFileArg = arg;
      arg = argc;

    } else {
      unk[unl++] = arg;
      err++;
    }

    arg++;
  }

  if (GlobalData->gkpStoreName[0] == 0)
    err++;

  if (GlobalData->outputPrefix[0] == 0)
    err++;

  if (cutoffToInferSingleCopyStatus > 1.0)
    err++;

  if (err) {
    fprintf(stderr, "usage: %s [options] -g <GatekeeperStoreName> -o <OutputPath> <unitigs*.cgb>\n", argv[0]);
    fprintf(stderr, "   -C                     Don't cleanup scaffolds\n");    
    fprintf(stderr, "   -D <lvl>               Debug\n");
    fprintf(stderr, "   -E                     output overlap only contig edges\n");
    fprintf(stderr, "   -e <thresh>            Microhet score probability cutoff\n");
    fprintf(stderr, "   -F                     strongly enforce unique/repeat flag set in unitig, default if not set is to still\n");
    fprintf(stderr, "                              allow those marked unique to be demoted due to Repeat Branch Pattern or being\n");
    fprintf(stderr, "                              too small\n");
    fprintf(stderr, "   -g                     gkp Store path (required)\n");
    fprintf(stderr, "   -G                     Don't generate output (cgw or cam)\n");
    fprintf(stderr, "   -GG                    Don't destroy consensus on output (ctgcns will do nothing)\n");
    fprintf(stderr, "   -I                     ignore chaff unitigs\n");
    fprintf(stderr, "   -i <thresh>            Set max coverage stat for microhet determination of non-uniqueness (default -1)\n");
    fprintf(stderr, "   -j <thresh>            Set min coverage stat for definite uniqueness\n");
    fprintf(stderr, "   -K                     Allow kicking out a contig placed in a scaffold by mate pairs that has no overlaps\n");
    fprintf(stderr, "                            to both its left and right neighbor contigs.\n");
    fprintf(stderr, "   -k <thresh>            Set max coverage stat for possible uniqueness\n");
    fprintf(stderr, "   -M                     don't do interleaved scaffold merging\n");
    fprintf(stderr, "   -m <min>               Number of mate samples to recompute an insert size, default is 100\n");
    fprintf(stderr, "   -N <ckp>               restart from checkpoint location 'ckp' (see the timing file)\n");
    fprintf(stderr, "   -o                     Output Name (required)\n");
    fprintf(stderr, "   -P <int>               how to place closure reads.\n");
    fprintf(stderr, "                              0 - place at first location found\n");
    fprintf(stderr, "                              1 - place at best gap\n");
    fprintf(stderr, "                              2 - allow to be placed in multiple gaps\n");
    fprintf(stderr, "   -R <ckp>               restart from checkpoint file number 'ckp'\n");
    fprintf(stderr, "   -r <lvl>               repeat resolution level\n");
    fprintf(stderr, "   -S <t>                 place all frags in singly-placed surrogates if at least fraction <x> can be placed\n");
    fprintf(stderr, "                          two special cases:\n");
    fprintf(stderr, "                              if <t> = -1, place all frags in singly-placed surrogates aggressively\n");
    fprintf(stderr, "                                           (which really mean t = 0.0, but triggers a better algorithm)\n");
    fprintf(stderr, "                              if <t> =  0, do not resolve surrogate fragments\n");
    fprintf(stderr, "   -s <lvl>               stone throwing level\n");
    fprintf(stderr, "   -shatter <thresh>      Set threshold for shattering scaffolds when loading from checkpoint. Any contigs\n");
    fprintf(stderr, "                            connected to a scaffold only by edges with less weight than the threshold will be\n");
    fprintf(stderr, "                            split into a new scaffold (default OFF)\n");
    fprintf(stderr, "   -missingMate <thresh>  Set threshold (0-1) for the percentage of mates (out of total) that are allowed to be\n");
    fprintf(stderr, "                            missing when attempting a scaffold merge (default 0). A value of -1 will ignore all\n");
    fprintf(stderr, "                            missing mates\n");
    fprintf(stderr, "   -minmergeweight <w>    Only use weight w or better edges for merging scaffolds.\n");
    fprintf(stderr, "   -recomputegaps         if loading a checkpoint, recompute gaps, merging contigs and splitting low weight scaffolds.\n");
    fprintf(stderr, "   -reloadmates           If loading a checkpoint, also load any new mates from gkpStore.\n");
    fprintf(stderr, "   -U                     after inserting rocks/stones try shifting contig positions back to their original location\n");
    fprintf(stderr, "                            when computing overlaps to see if they overlap with the rock/stone and allow them to merge\n");
    fprintf(stderr, "                            if they do\n");
    fprintf(stderr, "   -u <file>              load these overlaps (from BOG) into the scaffold graph\n");
    fprintf(stderr, "   -v                     verbose\n");
    fprintf(stderr, "   -Z                     Don't demote singleton scaffolds\n");
    fprintf(stderr, "   -z                     Turn on Check for Repeat Branch Pattern (demotes some unique unitigs to repeat)\n");

    fprintf(stderr, "\n");

    if (GlobalData->gkpStoreName[0] == 0)
      fprintf(stderr, "ERROR:  No gatekeeper (-g) supplied.\n");

    if (GlobalData->outputPrefix[0] == 0)
      fprintf(stderr, "ERROR:  No output prefix (-o) supplied.\n");

    if (cutoffToInferSingleCopyStatus > 1.0)
      fprintf(stderr, "ERROR:  surrogate fraction cutoff (-S) must be between 0.0 and 1.0.\n");

    if (unl) {
      for (arg=0; arg<unl; arg++)
        fprintf(stderr, "ERROR:  Unknown option '%s'\n", argv[unk[arg]]);
    }

    exit(1);
  }

  isValidCheckpointName(restartFromLogical);

  if(GlobalData->cgbDefinitelyUniqueCutoff < GlobalData->cgbUniqueCutoff)
    GlobalData->cgbDefinitelyUniqueCutoff = GlobalData->cgbUniqueCutoff;


  if (preMergeRezLevel >= 0)
    GlobalData->repeatRezLevel = preMergeRezLevel;
  else
    GlobalData->repeatRezLevel = repeatRezLevel;


  if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_LOADING) == true) {
    int ctme     = time(0);

    //  Create the checkpoint from scratch
    ScaffoldGraph = CreateScaffoldGraph(GlobalData->outputPrefix);

    ProcessInput(firstFileArg, argc, argv);

    //  Insert sizes are set already, but we'll estimate again anyway.
    ComputeMatePairStatisticsRestricted(UNITIG_OPERATIONS, GlobalData->minSamplesForOverride, "unitig_initial");

    if (time(0) - ctme > 60 * 60)
      CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_LOADING], "after loading");

  } else if (isThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_LOADING) == true) {
    //  Load the checkpoint if we are exactly after loading, otherwise, fall through to the
    //  real load.
    LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix,restartFromCheckpoint, TRUE);
  }


  if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_EDGE_BUILDING) == true) {
    vector<CDS_CID_t>  rawEdges;

    BuildGraphEdgesDirectly(ScaffoldGraph->CIGraph, rawEdges);

    //  Broken, see comments in ChunkOverlap_CGW.c
    //
    //if (GlobalData->unitigOverlaps[0])
    //  AddUnitigOverlaps(ScaffoldGraph->CIGraph, GlobalData->unitigOverlaps, rawEdges);

    // Compute all overlaps implied by mate links between pairs of unique unitigs
    ComputeOverlaps(ScaffoldGraph->CIGraph, rawEdges);

    MergeAllGraphEdges(ScaffoldGraph->CIGraph, rawEdges, FALSE, FALSE);

    CheckEdgesAgainstOverlapper(ScaffoldGraph->CIGraph);
    CheckSurrogateUnitigs();

    //  Mark some Unitigs/Chunks/CIs as repeats based on overlaps GRANGER 2/2/07
    //
    if (GlobalData->checkRepeatBranchPattern)
      DemoteUnitigsWithRBP(stderr, ScaffoldGraph->CIGraph);

    //  At this Point we've constructed the CIGraph

    BuildInitialContigs(ScaffoldGraph);

    if(GlobalData->debugLevel > 0){
      CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph);
      CheckSurrogateUnitigs();
    }

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_EDGE_BUILDING], "after building edges");
  } else {
    LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix,restartFromCheckpoint, TRUE);

    if (reloadMates)
      ReloadMatesFromGatekeeper();

    //  Dump stats on the loaded checkpoint
    //GeneratePlacedContigGraphStats(tmpBuffer,0);
    //GenerateScaffoldGraphStats(tmpBuffer,0);

    // shatter scaffolds if requested
    if (GlobalData->shatterLevel > 0) {
    	ShatterScaffoldsConnectedByLowWeight(stderr, ScaffoldGraph, GlobalData->shatterLevel, TRUE);
    }

    //  Useful for checking mate happiness on loading.  Currently only checks one scaffold.
    if (0) {
      vector<instrumentLIB>   libs;

      for (int32 i=0; i<GetNumDistTs(ScaffoldGraph->Dists); i++) {
        DistT *dptr = GetDistT(ScaffoldGraph->Dists, i);

        libs.push_back(instrumentLIB(i, dptr->mu, dptr->sigma, true));
      }

      for (int32 sID=287340; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
        CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

        fprintf(stderr, "ANALYZING SCAFFOLD %d\n", sID);

        if (scaffold->flags.bits.isDead == true)
          continue;

        instrumentSCF   A(scaffold);
        A.analyze(libs);
        A.report();

        exit(0);
      }
    }

    if (recomputeLeastSquaresOnLoad) {
      for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
        CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

        if (scaffold->flags.bits.isDead == true)
          continue;

        if (true == LeastSquaresGapEstimates(ScaffoldGraph, GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID), LeastSquares_Cleanup | LeastSquares_Split))
          ScaffoldSanity(ScaffoldGraph, scaffold);
      }
    }
  }


  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  ScaffoldGraph->tigStore->flushCache();


  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_DURING_INITIAL_SCAFFOLDING) == true) &&
      (GlobalData->repeatRezLevel > 0)) {
    int ctme     = time(0);

    if(GlobalData->debugLevel > 0)
      DumpContigs(stderr,ScaffoldGraph, FALSE);

    // Transitive reduction of ContigGraph followed by construction of SEdges

    //  With markShakyBifurcations enabled.
    BuildUniqueCIScaffolds(ScaffoldGraph, TRUE, FALSE);

    CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph);

    //  Equivalent to TidyUpScaffolds().
    //
    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }

    if (time(0) - ctme > 60 * 60)
      CheckpointScaffoldGraph(ckpNames[CHECKPOINT_DURING_INITIAL_SCAFFOLDING], "during initial scaffolding");
  }


  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_INITIAL_SCAFFOLDING) == true) &&
      (GlobalData->repeatRezLevel > 0)) {

    //CheckAllTrustedEdges(ScaffoldGraph);

    {
      vector<CDS_CID_t>  rawEdges;

      BuildSEdges(rawEdges, FALSE);
      MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE);
    }

    //ScaffoldSanity(ScaffoldGraph);

    //  rocks is called inside of here
    //  checkpoints are written inside of here

    int iter     = 0;
    int iterMax  = 10;  //  MAX_OUTPUT_REZ_ITERATIONS
    int ctme     = time(0);
    int changed  = TRUE;

    fprintf(stderr,"** Running Level 1 Repeat Rez **\n");

    while ((changed) && (iter < iterMax)) {
      CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph);
      CheckCITypes(ScaffoldGraph);

      changed = RepeatRez(GlobalData->repeatRezLevel, GlobalData->outputPrefix);

      if (changed){
        CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE);
        ScaffoldSanity(ScaffoldGraph);

        //  With markShakyBifurcations disabled.
        BuildUniqueCIScaffolds(ScaffoldGraph, FALSE, FALSE);

        CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph);

        for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
          CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

          if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
            ScaffoldSanity(ScaffoldGraph, scaffold);
        }

        //CheckAllTrustedEdges(ScaffoldGraph);

        //  This shouldn't be necessary (RepeatRez() calling TidyUpScaffolds() should be doing it),
        //  but it is infrequent (at most iterMax=10 times).
        {
          vector<CDS_CID_t>  rawEdges;

          BuildSEdges(rawEdges, FALSE);
          MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE);
        }

        //  If we've been running for 2 hours, AND we've not just
        //  completed the last iteration, checkpoint.
        //
        if ((time(0) - ctme > 120 * 60) && (changed) && (iter+1 < iterMax)) {
          ctme = time(0);
          CheckpointScaffoldGraph(ckpNames[CHECKPOINT_DURING_INITIAL_SCAFFOLDING], "during initial scaffolding");
        }

        iter++;
      }
    }

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    if(GlobalData->debugLevel > 0)
      DumpCIScaffolds(stderr,ScaffoldGraph, FALSE);

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_INITIAL_SCAFFOLDING], "after initial scaffolding");
  }
  //  else TidyUpScaffolds (ScaffoldGraph);


  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  ScaffoldGraph->tigStore->flushCache();


  if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_1ST_SCAFF_MERGE) == true) {
    CleanupScaffolds(ScaffoldGraph,FALSE, NULLINDEX, FALSE);

    ScaffoldSanity(ScaffoldGraph);

    /* First we try to merge Scaffolds agressively */
    MergeScaffoldsAggressive(ScaffoldGraph, ckpNames[CHECKPOINT_DURING_1ST_SCAFF_MERGE], FALSE);
    CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE);

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    fprintf(stderr, "---Checking contig orders after MergeScaffoldsAggressive (1)\n\n");
    CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif

#ifdef CHECK_CONTIG_ORDERS_INCREMENTAL
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_1ST_SCAFF_MERGE], "after 1st scaffold merge");
  }


  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  ScaffoldGraph->tigStore->flushCache();


  /*
    now that we are done with initial scaffold merge, we want to use the
    standard/default repeatRezLevel. Up to now, the value of preMergeRezLevel
    was in use if set on the command line
  */
  GlobalData->repeatRezLevel = repeatRezLevel;



  /* Now we throw stones */
  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_STONES) == true) &&
      (GlobalData->stoneLevel > 0)) {

    // Convert single-contig scaffolds that are marginally unique back
    // to unplaced contigs so they might be placed as stones
    //
    //  If we removed any scaffolds, rebuild all the edges.
    //
    if ((GlobalData->demoteSingletonScaffolds == true) &&
        (DemoteSmallSingletonScaffolds() == true)) {
      vector<CDS_CID_t>  rawEdges;

      BuildSEdges(rawEdges, TRUE);
      MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE);
    }

    ScaffoldSanity(ScaffoldGraph);
    Throw_Stones(GlobalData->outputPrefix, GlobalData->stoneLevel, FALSE);

    //  Cleanup and split scaffolds.  The cleanup shouldn't do anything, but it's cheap.
    CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE);

    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }

    vector<CDS_CID_t>  rawEdges;

    BuildSEdges(rawEdges, TRUE);
    MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE);

    ScaffoldSanity(ScaffoldGraph);

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    fprintf(stderr, "---Checking contig orders after Throw_Stones\n\n");
    CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif

#ifdef CHECK_CONTIG_ORDERS_INCREMENTAL
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_STONES], "after stone throwing");

    //GenerateLinkStats(ScaffoldGraph->CIGraph, "Stones", 0);
    //GeneratePlacedContigGraphStats("Stones", 0);
    //GenerateLinkStats(ScaffoldGraph->ContigGraph, "Stones", 0);
    //GenerateScaffoldGraphStats("Stones", 0);
  }


  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_2ND_SCAFF_MERGE) == true) &&
      (GlobalData->stoneLevel > 0)) {

    ScaffoldSanity(ScaffoldGraph);

    MergeScaffoldsAggressive(ScaffoldGraph, ckpNames[CHECKPOINT_DURING_2ND_SCAFF_MERGE], FALSE);

    CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE);

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    fprintf(stderr, "---Checking contig orders after MergeScaffoldsAggressive (2)\n\n");
    CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif

#ifdef CHECK_CONTIG_ORDERS_INCREMENTAL
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_2ND_SCAFF_MERGE], "after 2nd scaffold merge");
  }

  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  ScaffoldGraph->tigStore->flushCache();

  //  The original rock throwing (above, RepeatRez()) calls TidyUpScaffolds() after each call to
  //  Fill_Gaps().  This does CleanupAScaffold() and LeastSquaresGapEstimates().  The it rebuilds
  //  scaffold edges (but not contig edges).  It's not been tested here, so we don't do it yet.

  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_ROCKS) == true) &&
      (GlobalData->repeatRezLevel > 0)) {
    int32  extra_rocks = 0;
    int32  iter        = 0;
    do {

      //  Zero means to rebuild the hopeless scaffold array - e.g., try all scaffolds again.
      //  Before this, it was using iter, but iter was never changed from zero.
      extra_rocks = Fill_Gaps(GlobalData->outputPrefix, GlobalData->repeatRezLevel, 0);
      fprintf(stderr, "Threw additional %d rocks on iter %d\n", extra_rocks, iter++);

#if 0
      CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE);

      for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
        CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

        if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
          ScaffoldSanity(ScaffoldGraph, scaffold);
      }

      vector<CDS_CID_t>  rawEdges;

      BuildSEdges(rawEdges, FALSE);
      MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE);
#endif

      //ScaffoldGraph->tigStore->flushCache();
    } while (extra_rocks > 1);

    //
    //  XXX do we need least squares here?
    //
#if 1
    fprintf(stderr, "Beta - LeastSquaresGapEstimates #1 after final rocks\n");
    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_ROCKS], "after final rocks");
  }

  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_PARTIAL_STONES) == true) &&
      (GlobalData->stoneLevel > 0)) {

    ScaffoldSanity (ScaffoldGraph);

    int partial_stones = Throw_Stones(GlobalData->outputPrefix, GlobalData->stoneLevel, TRUE);

    //
    //  XXX do we need least squares here?
    //
#if 1
    fprintf(stderr, "Beta - LeastSquaresGapEstimates #2 after partial stones\n");
    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }
#endif

    //  If throw_stones splits scaffolds, rebuild edges
    {
      vector<CDS_CID_t>  rawEdges;

      BuildSEdges(rawEdges, TRUE);
      MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE);
    }

    ScaffoldSanity (ScaffoldGraph);

    //ScaffoldGraph->tigStore->flushCache();

    fprintf (stderr, "Threw %d partial stones\n", partial_stones);
#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    fprintf(stderr,
            "---Checking contig orders after partial_stones\n\n");
    CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif
#ifdef CHECK_CONTIG_ORDERS_INCREMENTAL
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_PARTIAL_STONES], "after partial stones");

    //GenerateLinkStats (ScaffoldGraph->CIGraph, "PStones", 0);
    //GeneratePlacedContigGraphStats ("PStones", 0);
    //GenerateLinkStats(ScaffoldGraph->ContigGraph, "PStones", 0);
    //GenerateScaffoldGraphStats ("PStones", 0);
  }

  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_CONTAINED_STONES) == true) &&
      (GlobalData->stoneLevel > 0)) {

    ScaffoldSanity (ScaffoldGraph);

    int contained_stones = Toss_Contained_Stones (GlobalData->outputPrefix, GlobalData->stoneLevel, 0);
    fprintf(stderr, "Threw %d contained stones\n", contained_stones);
    fprintf (stderr, "**** Finished Final Contained Stones level %d ****\n", GlobalData->stoneLevel);

    //  Merge contigs before fiddling with gap sizes.
    CleanupScaffolds (ScaffoldGraph, FALSE, NULLINDEX, FALSE);

    //
    //  XXX do we need least squares here?
    //
#if 1
    fprintf(stderr, "Beta - LeastSquaresGapEstimates #3 after contained stones\n");
    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }
#endif

    ScaffoldSanity (ScaffoldGraph);

    // Remove copies of surrogates which are placed multiple times in the same place in a contig

    RemoveSurrogateDuplicates();

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
    fprintf(stderr, "---Checking contig orders after contained_stones\n\n");
    CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif
#ifdef CHECK_CONTIG_ORDERS_INCREMENTAL
    ResetContigOrientChecker(coc);
    AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc);
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_CONTAINED_STONES], "after final contained stones");

    //GenerateLinkStats (ScaffoldGraph->CIGraph, "CStones", 0);
    //GeneratePlacedContigGraphStats ("CStones", 0);
    //GenerateLinkStats(ScaffoldGraph->ContigGraph, "CStones", 0);
    //GenerateScaffoldGraphStats ("CStones", 0);
  }

  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  ScaffoldGraph->tigStore->flushCache();


  if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_CLEANUP) == true) {

    // Try to cleanup failed merges, and if we do, generate a checkpoint
    if(CleanupFailedMergesInScaffolds(ScaffoldGraph)){
      // This call deletes surrogate-only contigs that failed to merge
      if(CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, TRUE)){

#if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL)
        fprintf(stderr, "---Checking contig orders after final cleanup\n\n");
        CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE);
#endif
      }

      //
      //  XXX do we need least squares here?
      //
#if 1
      fprintf(stderr, "Beta - LeastSquaresGapEstimates #4 after final cleanup\n");
      for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
        CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

        if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
          ScaffoldSanity(ScaffoldGraph, scaffold);
      }
#endif

      CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_CLEANUP], "after final cleanup");
    }
  }


  if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_RESOLVE_SURROGATES) == true) &&
      (doResolveSurrogates > 0)) {

    resolveSurrogates(placeAllFragsInSinglePlacedSurros, cutoffToInferSingleCopyStatus);
    // Call resolve surrogate twice, this is necessary for finishing (closure) reads.
    // Consider a closure read and its two bounding reads, named left and right:
    //    If one (right) is placed in a unique region while the other (left) is in a surrogate itself, the closure read cannot be placed
    //    However, once the surrogate bounding read is placed (and fully incorporated which happens at the very end of resolveSurrogates)
    //    the closure read can be placed. 
    //    Therefore, we run resolve surrogates twice. 
    // Note that is closure reads are themselves mated, it may be necessary to do a third round of placement.  
    resolveSurrogates(placeAllFragsInSinglePlacedSurros, cutoffToInferSingleCopyStatus);
    
    //
    //  XXX do we need least squares here?
    //
#if 1
    fprintf(stderr, "Beta - LeastSquaresGapEstimates #5 after resolve surrogates\n");
    for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) {
      CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID);

      if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split))
        ScaffoldSanity(ScaffoldGraph, scaffold);
    }
#endif

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_RESOLVE_SURROGATES], "after resolve surrogates");
  }

  //  This generates the 'rezlog/gapreads' file.  It's hugely
  //  expensive, usually dies on a negative variance assert, and as
  //  far as BPW knows, unused.
  //
  //Show_Reads_In_Gaps (GlobalData->outputPrefix);

  ComputeMatePairStatisticsRestricted(SCAFFOLD_OPERATIONS, GlobalData->minSamplesForOverride, "scaffold_final");
  ComputeMatePairStatisticsRestricted(CONTIG_OPERATIONS, GlobalData->minSamplesForOverride, "contig_final");

  GenerateCIGraph_U_Stats();
  GenerateLinkStats(ScaffoldGraph->CIGraph,"final",0);
  GeneratePlacedContigGraphStats("final",0);
  GenerateLinkStats(ScaffoldGraph->ContigGraph,"final",0);
  GenerateScaffoldGraphStats("final",0);
  GenerateSurrogateStats("final");
  
#ifdef DEBUG
  int j = 0;
  for (j = 0; j < GetNumVA_CIFragT(ScaffoldGraph->CIFrags); j++) {
    CIFragT * frag = GetCIFragT(ScaffoldGraph->CIFrags, j);
         
    if (ScaffoldGraph->gkpStore->gkStore_getFRGtoPLC(frag->read_iid) != 0) {
      AS_UID uid = getGatekeeperIIDtoUID(ScaffoldGraph->gkpStore, frag->read_iid, AS_IID_FRG);
      if (frag->contigID != -1) {
        ChunkInstanceT * ctg = GetGraphNode(ScaffoldGraph->ContigGraph, frag->contigID);            
        fprintf(stderr, "CLOSURE_READS: CLOSURE READ %s PLACED=%d CHAFF=%d SINGLETON=%d IN ASM type %c in SCF %d\n", AS_UID_toString(uid), frag->flags.bits.isPlaced, frag->flags.bits.isChaff, frag->flags.bits.isSingleton, frag->type, ctg->scaffoldID);
      }
    }
  }
#endif

  //  We DO want to flush unused unitigs/contigs at this point.  They're not in
  //  a scaffold, and possibly will never be used again (except as rocks/stones).
  //
  //  (This assumes that output doesn't load unitigs/contigs again)
  //
  ScaffoldGraph->tigStore->flushCache();

  SetCIScaffoldTLengths(ScaffoldGraph);

  if(generateOutput){
    CelamyAssembly(GlobalData->outputPrefix);

    MarkContigEdges();
    ComputeMatePairDetailedStatus();

    //  Note that OutputContigs partitions the tigStore, and closes ScaffoldGraph->tigStore.  The
    //  only operation valid after this function is CheckpointScaffoldGraph().

    OutputUnitigsFromMultiAligns();
    OutputContigsFromMultiAligns(outputFragsPerPartition, preserveConsensus);

    CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_OUTPUT], "after output");
  }

  DestroyScaffoldGraph(ScaffoldGraph);

  delete GlobalData;

  fprintf(stderr,"* Bye *\n");

  exit(0);
}
示例#8
0
void
dumpContigInfo(ChunkInstanceT *contig) {
  int           contigOrientation;
  MultiAlignT  *ma;
  char         *seq1;
  int           len1;

  VA_TYPE(char) *consensus = CreateVA_char(2048);
  VA_TYPE(char) *quality   = CreateVA_char(2048);

  fprintf( stderr, "*********************** contig analysis **************************\n");
  fprintf( stderr, "analyzing contig: %d\n", contig->id);

  if (contig->offsetAEnd.mean < contig->offsetBEnd.mean)
    contigOrientation = 0;
  else
    contigOrientation = 1;

  fprintf(stderr, "contig orientation: %d\t length: %d  contig offsetAEnd: %d\t offsetBEnd: %d\n",
          contigOrientation,
          (int)contig->bpLength.mean,
          (int)contig->offsetAEnd.mean,
          (int)contig->offsetBEnd.mean);

  ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, ScaffoldGraph->ContigGraph->type == CI_GRAPH);

  // Get the consensus sequences for the contig from the Store
  GetConsensus(ScaffoldGraph->ContigGraph, contig->id, consensus, quality);

  seq1 = Getchar(consensus, 0);
  len1 = strlen(seq1);

  if (contigOrientation == 1)
    reverseComplementSequence(seq1, len1);

  if (len1 < 5000) {
    fprintf( stderr, ">contig%d consensus seq (flipped to reflect scaff orientation)\n", contig->id);
    fprintf( stderr, "%s\n", seq1);
  } else {
    char tmpchar = seq1[2500];
    seq1[2500] = '\0';

    fprintf( stderr, ">contig%d left end\n", contig->id);
    fprintf( stderr, "%s\n", seq1);

    seq1[2500] = tmpchar;

    fprintf( stderr, ">contig%d right end\n", contig->id);
    fprintf( stderr, "%s\n", seq1 + len1 - 2501);
  }

#if 1
  int numUnitigs = GetNumIntUnitigPoss(ma->u_list);
  fprintf( stderr, "number unitigs: %d\n", numUnitigs);

  int i;
  for (i = 0; i < numUnitigs; i++) {
    IntUnitigPos *upos = GetIntUnitigPos( ma->u_list, i);
    ChunkInstanceT *unitig = GetGraphNode( ScaffoldGraph->CIGraph, upos->ident);
    MultiAlignT *uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH);
    IntMultiPos *ump;
    int icntfrag;

    fprintf( stderr, "  unitig: %d\t num frags: %ld surrogate: %d\n", unitig->id, GetNumIntMultiPoss(uma->f_list),
             (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate));

    if (unitig->flags.bits.isStoneSurrogate ||
        unitig->flags.bits.isWalkSurrogate) {
      fprintf (stderr, "  surrogate unitig offsetAEnd: %f, offsetBEnd: %f\n", unitig->offsetAEnd.mean, unitig->offsetBEnd.mean);

      unitig = GetGraphNode( ScaffoldGraph->CIGraph, unitig->info.CI.baseID);
      fprintf ( stderr, "  using original unitig: %d\n", unitig->id);
      uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id,
                                                    ScaffoldGraph->CIGraph->type == CI_GRAPH);
    }

    // now print out info on the frags in the unitig
    for (icntfrag = 0; icntfrag < GetNumIntMultiPoss(uma->f_list); icntfrag++) {
      IntMultiPos *imp = GetIntMultiPos(uma->f_list, icntfrag);
      CIFragT     *frag = GetCIFragT(ScaffoldGraph->CIFrags, imp->ident);

      fprintf(stderr, "    frag: %6d\t contig pos (5p, 3p): %6d, %6d\n",
              imp->ident, (int) frag->contigOffset5p.mean, (int) frag->contigOffset3p.mean);
    }
  }
#endif


#if 1
  CIEdgeT * e;
  GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES);

  //  FALSE == ITERATOR_VERBOSE

  while((e = edges.nextRaw()) != NULL)
    PrintGraphEdge( stderr, ScaffoldGraph->ContigGraph, "Analyzing edge", e, 0);
#endif

  DeleteVA_char(consensus);
  DeleteVA_char(quality);
}
示例#9
0
void
BuildInitialContigs(ScaffoldGraphT *graph) {

  //  Resize the ContigGraph to the same size as the CI Graph

  fprintf(stderr,"BuildInitialContigs()-- converting %d unitigs with %d edges to contigs.\n",
          GetNumGraphNodes(graph->CIGraph),
          GetNumGraphEdges(graph->CIGraph));

  DeleteVA_NodeCGW_T(graph->ContigGraph->nodes);
  DeleteVA_EdgeCGW_T(graph->ContigGraph->edges);

  graph->ContigGraph->nodes = CreateVA_NodeCGW_T(GetNumGraphNodes(graph->CIGraph));
  graph->ContigGraph->edges = CreateVA_EdgeCGW_T(GetNumGraphEdges(graph->CIGraph));

  EnableRange_VA(graph->ContigGraph->nodes, GetNumGraphNodes(graph->CIGraph));

  graph->ContigGraph->edgeLists.clear();

  ResizeEdgeList(graph->ContigGraph);

  //  Clear contigs.

  for (int32 cid=0; cid < GetNumGraphNodes(graph->ContigGraph); cid++) {
    NodeCGW_T *ctg = GetGraphNode(graph->ContigGraph, cid);

    ctg->flags.all           = 0;
    ctg->flags.bits.isContig = TRUE;
    ctg->flags.bits.isDead   = TRUE;

    //ctg->edgeHead            = NULLINDEX;

    graph->ContigGraph->edgeLists[cid].clear();
  }

  //  And copy.

  GraphNodeIterator CIs;
  NodeCGW_T        *CI;

  InitGraphNodeIterator(&CIs, graph->CIGraph, GRAPH_NODE_DEFAULT);

  while ((CI = NextGraphNodeIterator(&CIs)) != NULL){
    assert(CI->flags.bits.isDead == 0);

    //  Reset the unitig.

    CI->AEndNext                = NULLINDEX;
    CI->BEndNext                = NULLINDEX;
    CI->info.CI.contigID        = CI->id;

    //  Copy to a new contig

    ContigT contig = *CI;

    contig.type                 = CONTIG_CGW;
    contig.id                   = CI->id;
    contig.scaffoldID           = NULLINDEX;
    contig.smoothExpectedCID    = NULLINDEX;
    contig.numEssentialA        = 0;
    contig.numEssentialB        = 0;
    contig.essentialEdgeA       = NULLINDEX;
    contig.essentialEdgeB       = NULLINDEX;
    contig.info.Contig.AEndCI   = CI->id;
    contig.info.Contig.BEndCI   = CI->id;
    contig.info.Contig.numCI    = 1;
    contig.indexInScaffold      = NULLINDEX;
    contig.flags.bits.isCI      = FALSE;
    contig.flags.bits.isContig  = TRUE;
    contig.flags.bits.isChaff   = CI->flags.bits.isChaff;
    contig.flags.bits.isClosure = CI->flags.bits.isClosure;
    //contig.edgeHead             = NULLINDEX;

    SetNodeCGW_T(graph->ContigGraph->nodes, contig.id, &contig);

    //  Ensure that there are no edges, and that the edgeList is allocated.
    assert(graph->ContigGraph->edgeLists[contig.id].empty() == true);
  }

  graph->numContigs = GetNumGraphNodes(graph->ContigGraph);

  //  Now, work on the edges.

  uint32   nRawSkipped = 0;
  uint32   nMerged     = 0;
  uint32   nTopRaw     = 0;
  uint32   nRaw        = 0;

  for (uint32 i=0; i<GetNumGraphEdges(graph->CIGraph); i++) {
    CIEdgeT  *edge = GetGraphEdge(graph->CIGraph, i);

    if (edge->flags.bits.isDeleted)
      continue;

    //  If this isn't a top-level edge, skip it.
    //  It must also be raw, and therefore already added.

    if (edge->topLevelEdge != GetVAIndex_CIEdgeT(graph->CIGraph->edges, edge)) {
      assert(edge->flags.bits.isRaw == true);
      nRawSkipped++;
      continue;
    }

    //  Is it a top-level raw edge?

    if (edge->flags.bits.isRaw == true) {
      CIEdgeT   newEdge     = *edge;

      newEdge.referenceEdge = i;
      newEdge.topLevelEdge  = GetNumGraphEdges(graph->ContigGraph);

      AppendGraphEdge(graph->ContigGraph, &newEdge);

      InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idA);
      InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idB);

      nTopRaw++;

      continue;
    }

    //  Otherwise, it must be a top-level merged edge

    assert(edge->nextRawEdge != NULLINDEX);

    if (edge->flags.bits.isRaw == FALSE) {
      CIEdgeT   newEdge     = *edge;
      CIEdgeT   rawEdge;

      newEdge.topLevelEdge  = GetNumGraphEdges(graph->ContigGraph);
      newEdge.nextRawEdge   = GetNumGraphEdges(graph->ContigGraph) + 1;  //  Must be raw edges!

      AppendGraphEdge(graph->ContigGraph, &newEdge);

      InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idA);
      InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idB);

      nMerged++;

      //  And copy over all the raw edges that compose this merged edge.

      while (edge->nextRawEdge != NULLINDEX) {
        CIEdgeT  *redge       = GetGraphEdge(graph->CIGraph, edge->nextRawEdge);  //  Grab the raw CI edge

        rawEdge               = *redge;

        //  These used to be assignments, but they should be correct as is.
        assert(rawEdge.idA == newEdge.idA);
        assert(rawEdge.idB == newEdge.idB);

        rawEdge.topLevelEdge  = newEdge.topLevelEdge;   //  The ID of the new contig top level edge
        rawEdge.referenceEdge = edge->nextRawEdge;      //  The ID of the current CI raw edge

        //  rawEdge.nextRawEdge is currently the next CI raw edge.  If that is defined,
        //  reset it to the next edge we'd add to the contig graph.
        if (rawEdge.nextRawEdge != NULLINDEX)
          rawEdge.nextRawEdge = GetNumGraphEdges(graph->ContigGraph) + 1;

        AppendGraphEdge(graph->ContigGraph, &rawEdge);

        nRaw++;

        edge = redge;
      }
    }
  }

  fprintf(stderr,"BuildInitialContigs()-- converted "F_U32" merged edges with "F_U32" raw edges; skipped "F_U32" raw edges in merged edges; converted "F_U32" top level raw edges.\n",
          nMerged, nRaw, nRawSkipped, nTopRaw);

  assert(nRawSkipped == nRaw);
}
示例#10
0
        edge = redge;
      }
    }
  }

  fprintf(stderr,"BuildInitialContigs()-- converted "F_U32" merged edges with "F_U32" raw edges; skipped "F_U32" raw edges in merged edges; converted "F_U32" top level raw edges.\n",
          nMerged, nRaw, nRawSkipped, nTopRaw);

  assert(nRawSkipped == nRaw);
}

int GetConsensus(GraphCGW_T *graph, CDS_CID_t CIindex,
                 VA_TYPE(char) *consensusVA, VA_TYPE(char) *qualityVA){
  // Return value is length of unitig or contig  sequence/quality (-1 if failure)
  ChunkInstanceT *CI = GetGraphNode(graph, CIindex);
  MultiAlignT *MA = NULL;

  ResetVA_char(consensusVA);
  ResetVA_char(qualityVA);
  if(CI->flags.bits.isCI){
    // Get it from the store of Unitig multi alignments
    MA = ScaffoldGraph->tigStore->loadMultiAlign(CIindex, TRUE);
  }else if(CI->flags.bits.isContig){// Get it from the store of Contig multi alignments
    assert(graph->type == CONTIG_GRAPH);
    MA = ScaffoldGraph->tigStore->loadMultiAlign(CIindex, FALSE);
  }else assert(0);

  GetMultiAlignUngappedConsensus(MA, consensusVA, qualityVA);

  return GetNumchars(consensusVA);
示例#11
0
void
writeSLK(FILE *asmFile, bool doWrite) {
  SnapScaffoldLinkMesg slk;
  GenericMesg          pmesg = { &slk, MESG_SLK };
  GraphNodeIterator    scaffolds;
  CIScaffoldT         *scaffold;
  CIScaffoldT         *scafmate;

  fprintf(stderr, "writeSLK()--\n");

  InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT);
  while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) {
    GraphEdgeIterator    edges(ScaffoldGraph->ScaffoldGraph, scaffold->id, ALL_END, ALL_EDGES);
    CIEdgeT             *edge;
    CIEdgeT             *redge;

    while((edge = edges.nextMerged()) != NULL) {
      if (edge->idA != scaffold->id)
        continue;

      scafmate = GetGraphNode(ScaffoldGraph->ScaffoldGraph, edge->idB);

      assert(!isOverlapEdge(edge));

      slk.escaffold1       = SCFmap.lookup(scaffold->id);
      slk.escaffold2       = SCFmap.lookup(scafmate->id);

      slk.orientation      = edge->orient;

      slk.mean_distance    = edge->distance.mean;
      slk.std_deviation    = sqrt(edge->distance.variance);
      slk.num_contributing = edge->edgesContributing;

      int edgeTotal = slk.num_contributing;
      int edgeCount = 0;

      if(edgeTotal < 2)
        continue;

      slk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * slk.num_contributing);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal <= 1);    // sanity check

        if (edgeTotal == 1) {
          slk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA);
          slk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB);
        }else{
          slk.jump_list[edgeCount].in1 = AS_UID_undefined();
          slk.jump_list[edgeCount].in2 = AS_UID_undefined();
        }

        slk.jump_list[edgeCount].type.setIsMatePair();

        edgeCount++;

      } else {
        redge = edge;

        assert(redge->flags.bits.isRaw == FALSE);

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->ScaffoldGraph,redge->nextRawEdge);

          assert(!isOverlapEdge(redge));

          slk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          slk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          slk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(slk.jump_list);
    }
  }
}
示例#12
0
void
writeCLK(FILE *asmFile, bool doWrite) {
  SnapContigLinkMesg     clk;
  GenericMesg            pmesg = { &clk, MESG_CLK };
  GraphNodeIterator      nodes;
  ContigT               *ctg;

  fprintf(stderr, "writeCLK()--\n");

  InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((ctg = NextGraphNodeIterator(&nodes)) != NULL) {

    if (ctg->flags.bits.isChaff)
      continue;

    if (SurrogatedSingleUnitigContig(ctg))
      continue;

    GraphEdgeIterator  edges(ScaffoldGraph->ContigGraph, ctg->id, ALL_END, ALL_EDGES);
    CIEdgeT           *edge;

    while((edge = edges.nextMerged()) != NULL){

      if (edge->idA != ctg->id)
        continue;

      ContigT *mate = GetGraphNode(ScaffoldGraph->ContigGraph, edge->idB);

      if(mate->flags.bits.isChaff)
        continue;

      if (SurrogatedSingleUnitigContig(mate))
        continue;

      clk.econtig1 = CCOmap.lookup(edge->idA);
      clk.econtig2 = CCOmap.lookup(edge->idB);

      clk.orientation = edge->orient;  //  Don't need to map orientation, always using canonical orientation

      clk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP;

      switch (GetEdgeStatus(edge)) {
        case LARGE_VARIANCE_EDGE_STATUS:
        case UNKNOWN_EDGE_STATUS:
        case INTER_SCAFFOLD_EDGE_STATUS:
          clk.status = AS_UNKNOWN_IN_ASSEMBLY;
          break;

        case TENTATIVE_TRUSTED_EDGE_STATUS:
        case TRUSTED_EDGE_STATUS:
          clk.status = AS_IN_ASSEMBLY;
          break;

        case TENTATIVE_UNTRUSTED_EDGE_STATUS:
        case UNTRUSTED_EDGE_STATUS:
          clk.status = AS_BAD;
          break;

        default:
          assert(0 /* Invalid edge status */);
      }

      clk.is_possible_chimera = edge->flags.bits.isPossibleChimera;
      clk.mean_distance       = edge->distance.mean;
      clk.std_deviation       = sqrt(edge->distance.variance);
      clk.num_contributing    = edge->edgesContributing;

      uint32 edgeCount = 0;
      uint32 edgeTotal = clk.num_contributing;

      if ((edgeTotal == 1) &&
          (clk.overlap_type == AS_OVERLAP) &&
          (GlobalData->outputOverlapOnlyContigEdges == FALSE))
        // don't output pure overlap edges
        continue;

      clk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal == 1);

        if (clk.overlap_type == AS_NO_OVERLAP) {
          clk.jump_list[edgeCount].in1  = FRGmap.lookup(edge->fragA);
          clk.jump_list[edgeCount].in2  = FRGmap.lookup(edge->fragB);
          clk.jump_list[edgeCount].type.setIsMatePair();
        } else {
          assert(GlobalData->outputOverlapOnlyContigEdges);
          clk.jump_list[edgeCount].in1  = AS_UID_undefined();
          clk.jump_list[edgeCount].in2  = AS_UID_undefined();
          clk.jump_list[edgeCount].type.setIsOverlap();
        }

        edgeCount++;

      } else {
        CIEdgeT *redge = edge;

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->ContigGraph, redge->nextRawEdge);

          if (isOverlapEdge(redge)) {
            // overlap edges don't count
            edgeTotal--;
            continue;
          }

          clk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          clk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          clk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(clk.jump_list);
    }
  }
}
示例#13
0
void
writeCCO(FILE *asmFile, bool doWrite) {
  SnapConConMesg      cco;
  GenericMesg         pmesg = { &cco, MESG_CCO };
  GraphNodeIterator   contigs;
  ContigT             *contig;

  fprintf(stderr, "writeCCO()--\n");

  InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((contig = NextGraphNodeIterator(&contigs)) != NULL) {
    assert(contig->id >= 0);
    assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph));

    if (contig->flags.bits.isChaff)
      continue;

    NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI);

    if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) &&
        (contig->scaffoldID == NULLINDEX) &&
        (unitig->info.CI.numInstances > 0))
      //  Contig is a surrogate instance
      continue;

    MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE);

    cco.eaccession  = AS_UID_fromInteger(getUID(uidServer));
    cco.iaccession  = contig->id;
    cco.placed      = ScaffoldGraph->tigStore->getContigStatus(contig->id);
    cco.length      = GetMultiAlignLength(ma);
    cco.consensus   = Getchar(ma->consensus, 0);
    cco.quality     = Getchar(ma->quality, 0);
    cco.forced      = 0;
    cco.num_pieces  = GetNumIntMultiPoss(ma->f_list);
    cco.num_unitigs = GetNumIntMultiPoss(ma->u_list);
    cco.num_vars    = GetNumIntMultiPoss(ma->v_list);
    cco.pieces      = NULL;
    cco.unitigs     = NULL;
    cco.vars        = NULL;

    if (cco.consensus == NULL)
      fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n",
              cco.iaccession);
    assert(cco.consensus != NULL);
    if (cco.length != strlen(cco.consensus))
      fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n",
              cco.iaccession, cco.length, strlen(cco.consensus));
    assert(cco.length == strlen(cco.consensus));

    if (cco.num_pieces > 0) {
      cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos));

      for(int32 i=0; i<cco.num_pieces; i++) {
        IntMultiPos *imp = GetIntMultiPos(ma->f_list, i);

        cco.pieces[i].type         = imp->type;
        cco.pieces[i].eident       = FRGmap.lookup(imp->ident);
        cco.pieces[i].delta_length = imp->delta_length;
        cco.pieces[i].position     = imp->position;
        cco.pieces[i].delta        = imp->delta;
      }
    }

    if (cco.num_unitigs > 0) {
      cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos));

      for(int32 i=0; i<cco.num_unitigs; i++) {
        IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i);

        cco.unitigs[i].type         = imp->type;
        cco.unitigs[i].eident       = UTGmap.lookup(imp->ident);
        cco.unitigs[i].position     = imp->position;
        cco.unitigs[i].delta        = imp->delta;
        cco.unitigs[i].delta_length = imp->delta_length;
      }
    }

    if (cco.num_vars > 0) {
      cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar));

      for(int32 i=0; i<cco.num_vars; i++) {
        IntMultiVar *imv = GetIntMultiVar(ma->v_list, i);

        cco.vars[i].var_id                = imv->var_id;
        cco.vars[i].phased_id             = imv->phased_id;

        cco.vars[i].position              = imv->position;
        cco.vars[i].num_reads             = imv->num_reads;
        cco.vars[i].num_alleles           = imv->num_alleles;
        cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed;
        cco.vars[i].min_anchor_size       = imv->min_anchor_size;
        cco.vars[i].var_length            = imv->var_length;

        cco.vars[i].alleles               = imv->alleles;
        cco.vars[i].var_seq_memory        = imv->var_seq_memory;
        cco.vars[i].read_id_memory        = imv->read_id_memory;

        cco.vars[i].enc_num_reads         = NULL;
        cco.vars[i].enc_weights           = NULL;
        cco.vars[i].enc_var_seq           = NULL;
        cco.vars[i].enc_read_ids          = NULL;
      }
    }

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    safe_free(cco.pieces);
    safe_free(cco.unitigs);
    safe_free(cco.vars);

    CCOmap.add(cco.iaccession, cco.eaccession);
  }
}
示例#14
0
void
writeULK(FILE *asmFile, bool doWrite) {
  SnapUnitigLinkMesg   ulk;
  GenericMesg          pmesg = { & ulk, MESG_ULK };
  GraphNodeIterator    nodes;
  ChunkInstanceT      *ci;

  fprintf(stderr, "writeULK()--\n");

  InitGraphNodeIterator(&nodes, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT);
  while ((ci = NextGraphNodeIterator(&nodes)) != NULL) {
    assert(ci->type != CONTIG_CGW);

    if (ci->type == RESOLVEDREPEATCHUNK_CGW)
      continue;

    if (ci->flags.bits.isChaff)
      continue;

    GraphEdgeIterator  edges(ScaffoldGraph->CIGraph, ci->id, ALL_END, ALL_EDGES);
    CIEdgeT           *edge;

    while ((edge = edges.nextMerged()) != NULL) {

      if (edge->idA != ci->id ||
          edge->flags.bits.isInferred ||
          edge->flags.bits.isInferredRemoved ||
          edge->flags.bits.isMarkedForDeletion)
        continue;

      ChunkInstanceT *mi = GetGraphNode(ScaffoldGraph->CIGraph, edge->idB);

      if (mi->flags.bits.isChaff)
        continue;

      ulk.eunitig1 = UTGmap.lookup(edge->idA);  //  == ci->id
      ulk.eunitig2 = UTGmap.lookup(edge->idB);

      ulk.orientation = edge->orient;  //  Don't need to map orientation, always using canonical orientation

      ulk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP;

      ulk.is_possible_chimera = edge->flags.bits.isPossibleChimera;
      ulk.mean_distance       = edge->distance.mean;
      ulk.std_deviation       = sqrt(edge->distance.variance);
      ulk.num_contributing    = edge->edgesContributing;
      ulk.status              = AS_UNKNOWN_IN_ASSEMBLY;

      uint32  edgeCount = 0;
      uint32  edgeTotal = ulk.num_contributing;

      if ((edgeTotal == 1) && (ulk.overlap_type == AS_OVERLAP))
        // don't output pure overlap edges
        continue;

      // Look through the fragment pairs in this edge to decide the status of the link.

      CIEdgeT *redge = (edge->flags.bits.isRaw) ? edge : GetGraphEdge(ScaffoldGraph->CIGraph, edge->nextRawEdge);

      int numBad     = 0;
      int numGood    = 0;
      int numUnknown = 0;

      for (; redge != NULL; redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge)) {
        if(isOverlapEdge(redge))
          continue;

        CIFragT *fragA = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragA);
        CIFragT *fragB = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragB);

        assert(fragA->flags.bits.edgeStatus == fragB->flags.bits.edgeStatus);

        if ((fragA->flags.bits.edgeStatus == UNTRUSTED_EDGE_STATUS) ||
            (fragA->flags.bits.edgeStatus == TENTATIVE_UNTRUSTED_EDGE_STATUS))
          numBad++;

        else if ((fragA->flags.bits.edgeStatus == TRUSTED_EDGE_STATUS) ||
                 (fragA->flags.bits.edgeStatus == TENTATIVE_TRUSTED_EDGE_STATUS))
          numGood++;

        else
          numUnknown++;
      }

      if (numBad > 0)
        ulk.status = AS_BAD;

      else if (numGood > 0)
        ulk.status = AS_IN_ASSEMBLY;

      else
        ulk.status = AS_UNKNOWN_IN_ASSEMBLY;

      ulk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal == 1);

        ulk.jump_list[edgeCount].in1  = FRGmap.lookup(edge->fragA);
        ulk.jump_list[edgeCount].in2  = FRGmap.lookup(edge->fragB);
        ulk.jump_list[edgeCount].type.setIsMatePair();

        edgeCount++;
      } else {
        assert(edgeTotal > 0);

        redge = edge;

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge);

          if (isOverlapEdge(redge)) {
            // overlap edges don't count
            edgeTotal--;
            continue;
          }

          ulk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          ulk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          ulk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(ulk.jump_list);
    }
  }
}
示例#15
0
void ShatterScaffoldsConnectedByLowWeight(FILE *stream, ScaffoldGraphT *graph, uint32 minWeight, int verbose){
  GraphNodeIterator nodes;
  NodeCGW_T        *node;

  InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((node = NextGraphNodeIterator(&nodes)) != NULL) {
    GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, node->id, ALL_END, ALL_EDGES);
    CIEdgeT          *edge;

    int disconnected = (edges.nextMerged() == NULL ? FALSE : TRUE);	// don't disconnect a node if it has no edges

    while ((edge = edges.nextMerged()) != NULL) {
    	NodeCGW_T *otherNode = GetGraphNode(graph->ContigGraph, (edge->idA == node->id ? edge->idB : edge->idA));
    	if (verbose == TRUE)
    		fprintf(stream, "The edge ori:%c dist: %f connects %d and %d with weight %d and node is in scaffold %d and other end of edge is %d in scaffold %d\n", edge->orient.toLetter(), edge->distance.mean, edge->idA, edge->idB, edge->edgesContributing, node->scaffoldID, otherNode->id, otherNode->scaffoldID);

    	if (otherNode->scaffoldID == node->scaffoldID && otherNode->scaffoldID != NULLINDEX && edge->edgesContributing >= minWeight) {
    		disconnected = FALSE;
    		if (verbose == TRUE) {
    			fprintf(stream, "Node %d will not be disconnected from scaffold %d because it has edge %d higher than min %d\n", node->id, node->scaffoldID, edge->edgesContributing, minWeight);
    		}
    	}
    }

    if (disconnected == TRUE && node->scaffoldID != NULLINDEX) {
		if (verbose == TRUE)
			fprintf(stream, "Disconnecting contig with id %d from scaffold %d\n", node->id, node->scaffoldID);
		fprintf(stream, "Disconnecting contig with id %d from scaffold %d\n", node->id, node->scaffoldID);

		// is this all we need to do
		// don't set any of the flags for repeats, let it stay whatever it is now
        LengthT     offsetAEnd      = {0.0, 0.0};
        LengthT     offsetBEnd      = {0.0, 0.0};
        LengthT     firstOffset     = {0.0, 0.0};

        CIScaffoldT CIScaffold;
        InitializeScaffold(&CIScaffold, REAL_SCAFFOLD);
        CIScaffold.info.Scaffold.AEndCI = NULLINDEX;
        CIScaffold.info.Scaffold.BEndCI = NULLINDEX;
        CIScaffold.info.Scaffold.numElements = 0;
        CIScaffold.bpLength = node->bpLength;
        CIScaffold.id = GetNumGraphNodes(graph->ScaffoldGraph);
        CIScaffold.flags.bits.isDead = FALSE;
        CIScaffold.numEssentialA = CIScaffold.numEssentialB = 0;
        CIScaffold.essentialEdgeB = CIScaffold.essentialEdgeA = NULLINDEX;

        AppendGraphNode(graph->ScaffoldGraph, &CIScaffold);

        //  Ensure that there are no edges, and that the edgeList is allocated.
        assert(ScaffoldGraph->ScaffoldGraph->edgeLists[CIScaffold.id].empty() == true);

        node->numEssentialA = node->numEssentialB = 0;
        node->essentialEdgeA = node->essentialEdgeB = NULLINDEX;

        if(GetNodeOrient(node).isForward()){
          firstOffset = node->offsetAEnd;
        }else{
          firstOffset = node->offsetBEnd;
        }
        offsetAEnd.mean     = node->offsetAEnd.mean     - firstOffset.mean;
        offsetAEnd.variance = node->offsetAEnd.variance - firstOffset.variance;
        offsetBEnd.mean     = node->offsetBEnd.mean     - firstOffset.mean;
        offsetBEnd.variance = node->offsetBEnd.variance - firstOffset.variance;

        if (verbose == TRUE) {
			fprintf(stream, "Inserted node %d into scaffold %d at offsets (%f, %f) and (%f, %f) it used to be (%f, %f) and (%f, %f)\n",
					node->id, CIScaffold.id,
					offsetAEnd.mean, offsetAEnd.variance, offsetBEnd.mean, offsetBEnd.variance,
        			node->offsetAEnd.mean, node->offsetAEnd.variance, node->offsetBEnd.mean, node->offsetBEnd.variance);
        }

        CIScaffoldT *scaffold = GetGraphNode(graph->ScaffoldGraph, node->scaffoldID);
        RemoveCIFromScaffold(graph, scaffold, node, FALSE);
        if (scaffold->info.Scaffold.numElements == 0) {
        	scaffold->type = SCRATCH_SCAFFOLD;
			scaffold->flags.bits.isDead = 1;
        }
        InsertCIInScaffold(graph, node->id, CIScaffold.id, offsetAEnd, offsetBEnd, TRUE, FALSE);
    }
  }
}