void GetFragmentPositionInScaffold(CIFragT *frag, int *left_end, int *right_end, int *fragmentScaffoldOrientation) { ContigT *containingContig = GetGraphNode(ScaffoldGraph->ContigGraph, frag->contigID); int contigLeftEnd, contigRightEnd, contigScaffoldOrientation; GetContigPositionInScaffold( containingContig, &contigLeftEnd, &contigRightEnd, &contigScaffoldOrientation); GetFragmentPositionInScaffoldFromContig( frag, left_end, right_end, fragmentScaffoldOrientation, contigLeftEnd, contigRightEnd, contigScaffoldOrientation); }
void USoundNode::InsertChildNode( int32 Index ) { check( Index >= 0 && Index <= ChildNodes.Num() ); int32 MaxChildNodes = GetMaxChildNodes(); if (MaxChildNodes > ChildNodes.Num()) { ChildNodes.InsertZeroed( Index ); #if WITH_EDITOR GetGraphNode()->CreateInputPin(); #endif //WITH_EDITORONLY_DATA } }
void USoundNodeSwitch::RenamePins() { TArray<class UEdGraphPin*> InputPins; #if WITH_EDITORONLY_DATA GetGraphNode()->GetInputPins(InputPins); #endif for (int32 i = 0; i < InputPins.Num(); i++) { if (InputPins[i]) { InputPins[i]->PinName = GetInputPinName(i); } } }
int SurrogatedSingleUnitigContig(NodeCGW_T* contig) { if (contig->info.Contig.numCI > 1) // Contig has multiple unitigs return(FALSE); if (contig->scaffoldID != NULLINDEX) // Contig is placed return(FALSE); NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if (unitig->info.CI.numInstances == 0) // Unitig has not been placed as a surrogate return(FALSE); // Else, the unitig in this contig appears as a surrogate elsewhere in the assembly return(TRUE); }
void PrintScaffoldContents(FILE *stream, ScaffoldGraphT *graph, char *message) { // output initial scaffolds before breaking GraphNodeIterator scaffolds; CIScaffoldT *scaffold; InitGraphNodeIterator(&scaffolds, graph->ScaffoldGraph, GRAPH_NODE_DEFAULT); while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) { if(scaffold->type != REAL_SCAFFOLD) continue; assert(scaffold->info.Scaffold.numElements > 0); fprintf(stream, "%s scaffold %d contains %d elements: ", message, scaffold->id, scaffold->info.Scaffold.numElements); CIScaffoldTIterator contigs; ChunkInstanceT *contig; InitCIScaffoldTIterator(graph, scaffold, TRUE, FALSE, &contigs); while ((contig = NextCIScaffoldTIterator(&contigs)) != NULL) { // of course why would a sanity check work in cgw // line 2129 of TransitiveReduction_CGW.c sets the contig's scaffold ID to NULL but doesn't remove it from the scaffold! if (contig->scaffoldID != scaffold->id) { fprintf(stream, "Scaffold %d thinks it contains contig %d but contig thinks it belongs to %d\n", scaffold->id, contig->id, contig->scaffoldID); } assert(contig->scaffoldID == scaffold->id || contig->scaffoldID == NULLINDEX); fprintf(stream, "%d (%f) (%f) (%f) ", contig->id, contig->bpLength.mean, contig->offsetAEnd.mean, contig->offsetBEnd.mean); GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES); CIEdgeT *edge; while ((edge = edges.nextMerged()) != NULL) { NodeCGW_T *otherNode = GetGraphNode(graph->ContigGraph, (edge->idA == contig->id ? edge->idB : edge->idA)); //fprintf(stream, "The edge active %d is unique %d confirming %d ori:%c dist: %f connects %d and %d with weight %d and node is in scaffold %d and other end of edge is %d in scaffold %d\n", edge->flags.bits.isActive, edge->flags.bits.isUniquetoUnique, edge->flags.bits.isContigConfirming, edge->orient.toLetter(), edge->distance.mean, edge->idA, edge->idB, edge->edgesContributing, contig->scaffoldID, otherNode->id, otherNode->scaffoldID); } } fprintf(stream, "\n"); } PrintContigContents(stream, graph, message); }
int main( int argc, char **argv) { int ckptNum = NULLINDEX; int makeMiniScaffolds = 1; uint64 uidStart = 1230000; UIDserver *uids = NULL; GlobalData = new Globals_CGW(); argc = AS_configure(argc, argv); int err=0; int arg=1; while (arg < argc) { if (strcmp(argv[arg], "-p") == 0) { ckptNum = GlobalData->setPrefix(argv[++arg]); } else if (strcmp(argv[arg], "-c") == 0) { strcpy(GlobalData->outputPrefix, argv[++arg]); } else if (strcmp(argv[arg], "-g") == 0) { strcpy(GlobalData->gkpStoreName, argv[++arg]); } else if (strcmp(argv[arg], "-t") == 0) { strcpy(GlobalData->tigStoreName, argv[++arg]); } else if (strcmp(argv[arg], "-n") == 0) { ckptNum = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-U") == 0) { uidStart = 0; } else if (strcmp(argv[arg], "-S") == 0) { makeMiniScaffolds = 0; } else { fprintf(stderr, "unknown option '%s'\n", argv[arg]); err = 1; } arg++; } if ((GlobalData->outputPrefix[0] == 0) || (GlobalData->gkpStoreName[0] == 0)) { fprintf(stderr, "usage: %s [[-p prefix] | [-c name -g gkpstore -n ckptNum]] [-U] [-S]\n", argv[0]); fprintf(stderr, " -p Attempt to locate the last checkpoint in directory 7-CGW.\n"); fprintf(stderr, " -c Look for checkpoints in 'name'\n"); fprintf(stderr, " -g Path to gkpStore\n"); fprintf(stderr, " -n Checkpoint number to load\n"); fprintf(stderr, " -U Use real UIDs for miniscaffolds, otherwise, UIDs start at 1230000\n"); fprintf(stderr, " -S Do NOT make mini scaffolds.\n"); exit(1); } uids = UIDserverInitialize(256, uidStart); char *toprint = (char *)safe_malloc(sizeof(char) * (AS_READ_MAX_NORMAL_LEN + 51 + AS_READ_MAX_NORMAL_LEN + 2)); LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix, ckptNum, FALSE); int ifrag; for (ifrag=0; ifrag < GetNumVA_CIFragT(ScaffoldGraph->CIFrags); ifrag++) { CIFragT *frag = GetCIFragT(ScaffoldGraph->CIFrags, ifrag); CIFragT *mate = NULL; if (frag->flags.bits.isDeleted) continue; assert(frag->cid != NULLINDEX); assert((frag->flags.bits.hasMate == 0) || (frag->mate_iid != 0)); // Fix for missing mates -- OBT used to not delete mate links, leaving // dangling mates. Somebody else seems to be doing this too. // if (frag->flags.bits.hasMate) { mate = GetCIFragT(ScaffoldGraph->CIFrags, frag->mate_iid); if (mate == NULL) frag->flags.bits.hasMate = 0; } // If this fragment is not chaff, we have nothing to do here. // if (GetGraphNode(ScaffoldGraph->CIGraph,frag->cid)->flags.bits.isChaff == 0) continue; // Print a singleton if there is no mate, the mate isn't chaff, // or we were told to not make miniscaffolds. // if ((mate == NULL) || (mate->flags.bits.isChaff == 0) || (makeMiniScaffolds == 0)) { AS_UID fUID = getFragmentClear(frag->read_iid, 0, toprint); AS_UTL_writeFastA(stdout, toprint, strlen(toprint), 0, ">%s /type=singleton\n", AS_UID_toString(fUID)); } else if ((mate != NULL) && (mate->flags.bits.isChaff == 1) && (makeMiniScaffolds == 1) && (frag->read_iid < mate->read_iid)) { // make sure the following chain of Ns is divisible by three; // the exact length is arbitrary but Doug Rusch points out that // by making it divisible by 3, we can get lucky and maintain // the phase of a protein ... which helps in the // auto-annotation of environmental samples AS_UID fUID = getFragmentClear(frag->read_iid, 0, toprint); strcat(toprint, "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"); AS_UID mUID = getFragmentClear(mate->read_iid, 1, toprint + strlen(toprint)); AS_UTL_writeFastA(stdout, toprint, strlen(toprint), 0, ">"F_U64" /type=mini_scaffold /frgs=(%s,%s)\n", getUID(uids), AS_UID_toString(fUID), AS_UID_toString(mUID)); } } delete GlobalData; exit(0); }
int main(int argc, char **argv) { // Options controlling main int generateOutput = 1; int preserveConsensus = 0; int preMergeRezLevel = -1; int repeatRezLevel = 0; int restartFromCheckpoint = -1; char *restartFromLogical = "ckp00-NUL"; bool recomputeLeastSquaresOnLoad = false; bool reloadMates = false; int doResolveSurrogates = 1; // resolveSurrogates int placeAllFragsInSinglePlacedSurros = 0; // resolveSurrogates double cutoffToInferSingleCopyStatus = 0.666; // resolveSurrogates int firstFileArg = 0; int32 outputFragsPerPartition = 0; #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) ContigOrientChecker * coc; coc = CreateContigOrientChecker(); assert(coc != NULL); #endif // temporary! fprintf(stderr, "Using up to %d OpenMP threads.\n", omp_get_max_threads()); GlobalData = new Globals_CGW(); argc = AS_configure(argc, argv); int arg = 1; int err = 0; int unk[64] = {0}; int unl = 0; while (arg < argc) { if (strcmp(argv[arg], "-C") == 0) { GlobalData->performCleanupScaffolds = 0; } else if (strcmp(argv[arg], "-D") == 0) { GlobalData->debugLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-E") == 0) { GlobalData->outputOverlapOnlyContigEdges = 1; } else if (strcmp(argv[arg], "-F") == 0) { GlobalData->allowDemoteMarkedUnitigs = FALSE; } else if (strcmp(argv[arg], "-G") == 0) { generateOutput = 0; } else if (strcmp(argv[arg], "-GG") == 0) { preserveConsensus = 1; } else if (strcmp(argv[arg], "-g") == 0) { strcpy(GlobalData->gkpStoreName, argv[++arg]); } else if (strcmp(argv[arg], "-t") == 0) { strcpy(GlobalData->tigStoreName, argv[++arg]); } else if (strcmp(argv[arg], "-I") == 0) { GlobalData->ignoreChaffUnitigs = 1; } else if (strcmp(argv[arg], "-j") == 0) { GlobalData->cgbUniqueCutoff = atof(argv[++arg]); } else if (strcmp(argv[arg], "-K") == 0) { GlobalData->removeNonOverlapingContigsFromScaffold = 1; } else if (strcmp(argv[arg], "-k") == 0) { GlobalData->cgbDefinitelyUniqueCutoff = atof(argv[++arg]); } else if (strcmp(argv[arg], "-m") == 0) { GlobalData->minSamplesForOverride = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-N") == 0) { restartFromLogical = argv[++arg]; } else if (strcmp(argv[arg], "-o") == 0) { strcpy(GlobalData->outputPrefix, argv[++arg]); } else if (strcmp(argv[arg], "-B") == 0) { outputFragsPerPartition = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-P") == 0) { GlobalData->closurePlacement = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-p") == 0) { preMergeRezLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-R") == 0) { restartFromCheckpoint = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-r") == 0) { repeatRezLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-S") == 0) { doResolveSurrogates = 1; cutoffToInferSingleCopyStatus = atof(argv[++arg]); placeAllFragsInSinglePlacedSurros = 0; if (cutoffToInferSingleCopyStatus == 0.0) doResolveSurrogates = 0; if (cutoffToInferSingleCopyStatus < 0) { cutoffToInferSingleCopyStatus = 0.0; placeAllFragsInSinglePlacedSurros = 1; } } else if (strcmp(argv[arg], "-s") == 0) { GlobalData->stoneLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-filter") == 0) { GlobalData->mergeFilterLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-shatter") == 0) { GlobalData->shatterLevel = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-missingMate") == 0) { GlobalData->mergeScaffoldMissingMates = atof(argv[++arg]); // the value is a percentage between 0 and 1 so make sure it never goes out of those bounds if (GlobalData->mergeScaffoldMissingMates < 0) { GlobalData->mergeScaffoldMissingMates = -1; } else if (GlobalData->mergeScaffoldMissingMates > 1) { GlobalData->mergeScaffoldMissingMates = 1; } } else if (strcmp(argv[arg], "-U") == 0) { GlobalData->doUnjiggleWhenMerging = 1; } else if (strcmp(argv[arg], "-u") == 0) { fprintf(stderr, "Option -u is broken.\n"); exit(1); strcpy(GlobalData->unitigOverlaps, argv[++arg]); } else if (strcmp(argv[arg], "-Z") == 0) { GlobalData->demoteSingletonScaffolds = FALSE; } else if (strcmp(argv[arg], "-z") == 0) { GlobalData->checkRepeatBranchPattern = TRUE; } else if (strcmp(argv[arg], "-minmergeweight") == 0) { GlobalData->minWeightToMerge = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-recomputegaps") == 0) { recomputeLeastSquaresOnLoad = true; } else if (strcmp(argv[arg], "-reloadmates") == 0) { reloadMates = true; } else if ((argv[arg][0] != '-') && (firstFileArg == 0)) { firstFileArg = arg; arg = argc; } else { unk[unl++] = arg; err++; } arg++; } if (GlobalData->gkpStoreName[0] == 0) err++; if (GlobalData->outputPrefix[0] == 0) err++; if (cutoffToInferSingleCopyStatus > 1.0) err++; if (err) { fprintf(stderr, "usage: %s [options] -g <GatekeeperStoreName> -o <OutputPath> <unitigs*.cgb>\n", argv[0]); fprintf(stderr, " -C Don't cleanup scaffolds\n"); fprintf(stderr, " -D <lvl> Debug\n"); fprintf(stderr, " -E output overlap only contig edges\n"); fprintf(stderr, " -e <thresh> Microhet score probability cutoff\n"); fprintf(stderr, " -F strongly enforce unique/repeat flag set in unitig, default if not set is to still\n"); fprintf(stderr, " allow those marked unique to be demoted due to Repeat Branch Pattern or being\n"); fprintf(stderr, " too small\n"); fprintf(stderr, " -g gkp Store path (required)\n"); fprintf(stderr, " -G Don't generate output (cgw or cam)\n"); fprintf(stderr, " -GG Don't destroy consensus on output (ctgcns will do nothing)\n"); fprintf(stderr, " -I ignore chaff unitigs\n"); fprintf(stderr, " -i <thresh> Set max coverage stat for microhet determination of non-uniqueness (default -1)\n"); fprintf(stderr, " -j <thresh> Set min coverage stat for definite uniqueness\n"); fprintf(stderr, " -K Allow kicking out a contig placed in a scaffold by mate pairs that has no overlaps\n"); fprintf(stderr, " to both its left and right neighbor contigs.\n"); fprintf(stderr, " -k <thresh> Set max coverage stat for possible uniqueness\n"); fprintf(stderr, " -M don't do interleaved scaffold merging\n"); fprintf(stderr, " -m <min> Number of mate samples to recompute an insert size, default is 100\n"); fprintf(stderr, " -N <ckp> restart from checkpoint location 'ckp' (see the timing file)\n"); fprintf(stderr, " -o Output Name (required)\n"); fprintf(stderr, " -P <int> how to place closure reads.\n"); fprintf(stderr, " 0 - place at first location found\n"); fprintf(stderr, " 1 - place at best gap\n"); fprintf(stderr, " 2 - allow to be placed in multiple gaps\n"); fprintf(stderr, " -R <ckp> restart from checkpoint file number 'ckp'\n"); fprintf(stderr, " -r <lvl> repeat resolution level\n"); fprintf(stderr, " -S <t> place all frags in singly-placed surrogates if at least fraction <x> can be placed\n"); fprintf(stderr, " two special cases:\n"); fprintf(stderr, " if <t> = -1, place all frags in singly-placed surrogates aggressively\n"); fprintf(stderr, " (which really mean t = 0.0, but triggers a better algorithm)\n"); fprintf(stderr, " if <t> = 0, do not resolve surrogate fragments\n"); fprintf(stderr, " -s <lvl> stone throwing level\n"); fprintf(stderr, " -shatter <thresh> Set threshold for shattering scaffolds when loading from checkpoint. Any contigs\n"); fprintf(stderr, " connected to a scaffold only by edges with less weight than the threshold will be\n"); fprintf(stderr, " split into a new scaffold (default OFF)\n"); fprintf(stderr, " -missingMate <thresh> Set threshold (0-1) for the percentage of mates (out of total) that are allowed to be\n"); fprintf(stderr, " missing when attempting a scaffold merge (default 0). A value of -1 will ignore all\n"); fprintf(stderr, " missing mates\n"); fprintf(stderr, " -minmergeweight <w> Only use weight w or better edges for merging scaffolds.\n"); fprintf(stderr, " -recomputegaps if loading a checkpoint, recompute gaps, merging contigs and splitting low weight scaffolds.\n"); fprintf(stderr, " -reloadmates If loading a checkpoint, also load any new mates from gkpStore.\n"); fprintf(stderr, " -U after inserting rocks/stones try shifting contig positions back to their original location\n"); fprintf(stderr, " when computing overlaps to see if they overlap with the rock/stone and allow them to merge\n"); fprintf(stderr, " if they do\n"); fprintf(stderr, " -u <file> load these overlaps (from BOG) into the scaffold graph\n"); fprintf(stderr, " -v verbose\n"); fprintf(stderr, " -Z Don't demote singleton scaffolds\n"); fprintf(stderr, " -z Turn on Check for Repeat Branch Pattern (demotes some unique unitigs to repeat)\n"); fprintf(stderr, "\n"); if (GlobalData->gkpStoreName[0] == 0) fprintf(stderr, "ERROR: No gatekeeper (-g) supplied.\n"); if (GlobalData->outputPrefix[0] == 0) fprintf(stderr, "ERROR: No output prefix (-o) supplied.\n"); if (cutoffToInferSingleCopyStatus > 1.0) fprintf(stderr, "ERROR: surrogate fraction cutoff (-S) must be between 0.0 and 1.0.\n"); if (unl) { for (arg=0; arg<unl; arg++) fprintf(stderr, "ERROR: Unknown option '%s'\n", argv[unk[arg]]); } exit(1); } isValidCheckpointName(restartFromLogical); if(GlobalData->cgbDefinitelyUniqueCutoff < GlobalData->cgbUniqueCutoff) GlobalData->cgbDefinitelyUniqueCutoff = GlobalData->cgbUniqueCutoff; if (preMergeRezLevel >= 0) GlobalData->repeatRezLevel = preMergeRezLevel; else GlobalData->repeatRezLevel = repeatRezLevel; if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_LOADING) == true) { int ctme = time(0); // Create the checkpoint from scratch ScaffoldGraph = CreateScaffoldGraph(GlobalData->outputPrefix); ProcessInput(firstFileArg, argc, argv); // Insert sizes are set already, but we'll estimate again anyway. ComputeMatePairStatisticsRestricted(UNITIG_OPERATIONS, GlobalData->minSamplesForOverride, "unitig_initial"); if (time(0) - ctme > 60 * 60) CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_LOADING], "after loading"); } else if (isThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_LOADING) == true) { // Load the checkpoint if we are exactly after loading, otherwise, fall through to the // real load. LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix,restartFromCheckpoint, TRUE); } if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_EDGE_BUILDING) == true) { vector<CDS_CID_t> rawEdges; BuildGraphEdgesDirectly(ScaffoldGraph->CIGraph, rawEdges); // Broken, see comments in ChunkOverlap_CGW.c // //if (GlobalData->unitigOverlaps[0]) // AddUnitigOverlaps(ScaffoldGraph->CIGraph, GlobalData->unitigOverlaps, rawEdges); // Compute all overlaps implied by mate links between pairs of unique unitigs ComputeOverlaps(ScaffoldGraph->CIGraph, rawEdges); MergeAllGraphEdges(ScaffoldGraph->CIGraph, rawEdges, FALSE, FALSE); CheckEdgesAgainstOverlapper(ScaffoldGraph->CIGraph); CheckSurrogateUnitigs(); // Mark some Unitigs/Chunks/CIs as repeats based on overlaps GRANGER 2/2/07 // if (GlobalData->checkRepeatBranchPattern) DemoteUnitigsWithRBP(stderr, ScaffoldGraph->CIGraph); // At this Point we've constructed the CIGraph BuildInitialContigs(ScaffoldGraph); if(GlobalData->debugLevel > 0){ CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph); CheckSurrogateUnitigs(); } CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_EDGE_BUILDING], "after building edges"); } else { LoadScaffoldGraphFromCheckpoint(GlobalData->outputPrefix,restartFromCheckpoint, TRUE); if (reloadMates) ReloadMatesFromGatekeeper(); // Dump stats on the loaded checkpoint //GeneratePlacedContigGraphStats(tmpBuffer,0); //GenerateScaffoldGraphStats(tmpBuffer,0); // shatter scaffolds if requested if (GlobalData->shatterLevel > 0) { ShatterScaffoldsConnectedByLowWeight(stderr, ScaffoldGraph, GlobalData->shatterLevel, TRUE); } // Useful for checking mate happiness on loading. Currently only checks one scaffold. if (0) { vector<instrumentLIB> libs; for (int32 i=0; i<GetNumDistTs(ScaffoldGraph->Dists); i++) { DistT *dptr = GetDistT(ScaffoldGraph->Dists, i); libs.push_back(instrumentLIB(i, dptr->mu, dptr->sigma, true)); } for (int32 sID=287340; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); fprintf(stderr, "ANALYZING SCAFFOLD %d\n", sID); if (scaffold->flags.bits.isDead == true) continue; instrumentSCF A(scaffold); A.analyze(libs); A.report(); exit(0); } } if (recomputeLeastSquaresOnLoad) { for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (scaffold->flags.bits.isDead == true) continue; if (true == LeastSquaresGapEstimates(ScaffoldGraph, GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID), LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } } } // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // ScaffoldGraph->tigStore->flushCache(); if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_DURING_INITIAL_SCAFFOLDING) == true) && (GlobalData->repeatRezLevel > 0)) { int ctme = time(0); if(GlobalData->debugLevel > 0) DumpContigs(stderr,ScaffoldGraph, FALSE); // Transitive reduction of ContigGraph followed by construction of SEdges // With markShakyBifurcations enabled. BuildUniqueCIScaffolds(ScaffoldGraph, TRUE, FALSE); CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph); // Equivalent to TidyUpScaffolds(). // for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } if (time(0) - ctme > 60 * 60) CheckpointScaffoldGraph(ckpNames[CHECKPOINT_DURING_INITIAL_SCAFFOLDING], "during initial scaffolding"); } if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_INITIAL_SCAFFOLDING) == true) && (GlobalData->repeatRezLevel > 0)) { //CheckAllTrustedEdges(ScaffoldGraph); { vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, FALSE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE); } //ScaffoldSanity(ScaffoldGraph); // rocks is called inside of here // checkpoints are written inside of here int iter = 0; int iterMax = 10; // MAX_OUTPUT_REZ_ITERATIONS int ctme = time(0); int changed = TRUE; fprintf(stderr,"** Running Level 1 Repeat Rez **\n"); while ((changed) && (iter < iterMax)) { CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph); CheckCITypes(ScaffoldGraph); changed = RepeatRez(GlobalData->repeatRezLevel, GlobalData->outputPrefix); if (changed){ CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE); ScaffoldSanity(ScaffoldGraph); // With markShakyBifurcations disabled. BuildUniqueCIScaffolds(ScaffoldGraph, FALSE, FALSE); CheckEdgesAgainstOverlapper(ScaffoldGraph->ContigGraph); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } //CheckAllTrustedEdges(ScaffoldGraph); // This shouldn't be necessary (RepeatRez() calling TidyUpScaffolds() should be doing it), // but it is infrequent (at most iterMax=10 times). { vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, FALSE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE); } // If we've been running for 2 hours, AND we've not just // completed the last iteration, checkpoint. // if ((time(0) - ctme > 120 * 60) && (changed) && (iter+1 < iterMax)) { ctme = time(0); CheckpointScaffoldGraph(ckpNames[CHECKPOINT_DURING_INITIAL_SCAFFOLDING], "during initial scaffolding"); } iter++; } } #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif if(GlobalData->debugLevel > 0) DumpCIScaffolds(stderr,ScaffoldGraph, FALSE); CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_INITIAL_SCAFFOLDING], "after initial scaffolding"); } // else TidyUpScaffolds (ScaffoldGraph); // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // ScaffoldGraph->tigStore->flushCache(); if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_1ST_SCAFF_MERGE) == true) { CleanupScaffolds(ScaffoldGraph,FALSE, NULLINDEX, FALSE); ScaffoldSanity(ScaffoldGraph); /* First we try to merge Scaffolds agressively */ MergeScaffoldsAggressive(ScaffoldGraph, ckpNames[CHECKPOINT_DURING_1ST_SCAFF_MERGE], FALSE); CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE); #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after MergeScaffoldsAggressive (1)\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif #ifdef CHECK_CONTIG_ORDERS_INCREMENTAL ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_1ST_SCAFF_MERGE], "after 1st scaffold merge"); } // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // ScaffoldGraph->tigStore->flushCache(); /* now that we are done with initial scaffold merge, we want to use the standard/default repeatRezLevel. Up to now, the value of preMergeRezLevel was in use if set on the command line */ GlobalData->repeatRezLevel = repeatRezLevel; /* Now we throw stones */ if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_STONES) == true) && (GlobalData->stoneLevel > 0)) { // Convert single-contig scaffolds that are marginally unique back // to unplaced contigs so they might be placed as stones // // If we removed any scaffolds, rebuild all the edges. // if ((GlobalData->demoteSingletonScaffolds == true) && (DemoteSmallSingletonScaffolds() == true)) { vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, TRUE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE); } ScaffoldSanity(ScaffoldGraph); Throw_Stones(GlobalData->outputPrefix, GlobalData->stoneLevel, FALSE); // Cleanup and split scaffolds. The cleanup shouldn't do anything, but it's cheap. CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, TRUE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE); ScaffoldSanity(ScaffoldGraph); #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after Throw_Stones\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif #ifdef CHECK_CONTIG_ORDERS_INCREMENTAL ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_STONES], "after stone throwing"); //GenerateLinkStats(ScaffoldGraph->CIGraph, "Stones", 0); //GeneratePlacedContigGraphStats("Stones", 0); //GenerateLinkStats(ScaffoldGraph->ContigGraph, "Stones", 0); //GenerateScaffoldGraphStats("Stones", 0); } if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_2ND_SCAFF_MERGE) == true) && (GlobalData->stoneLevel > 0)) { ScaffoldSanity(ScaffoldGraph); MergeScaffoldsAggressive(ScaffoldGraph, ckpNames[CHECKPOINT_DURING_2ND_SCAFF_MERGE], FALSE); CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE); #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after MergeScaffoldsAggressive (2)\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif #ifdef CHECK_CONTIG_ORDERS_INCREMENTAL ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_2ND_SCAFF_MERGE], "after 2nd scaffold merge"); } // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // ScaffoldGraph->tigStore->flushCache(); // The original rock throwing (above, RepeatRez()) calls TidyUpScaffolds() after each call to // Fill_Gaps(). This does CleanupAScaffold() and LeastSquaresGapEstimates(). The it rebuilds // scaffold edges (but not contig edges). It's not been tested here, so we don't do it yet. if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_ROCKS) == true) && (GlobalData->repeatRezLevel > 0)) { int32 extra_rocks = 0; int32 iter = 0; do { // Zero means to rebuild the hopeless scaffold array - e.g., try all scaffolds again. // Before this, it was using iter, but iter was never changed from zero. extra_rocks = Fill_Gaps(GlobalData->outputPrefix, GlobalData->repeatRezLevel, 0); fprintf(stderr, "Threw additional %d rocks on iter %d\n", extra_rocks, iter++); #if 0 CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, FALSE); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, FALSE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, FALSE); #endif //ScaffoldGraph->tigStore->flushCache(); } while (extra_rocks > 1); // // XXX do we need least squares here? // #if 1 fprintf(stderr, "Beta - LeastSquaresGapEstimates #1 after final rocks\n"); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_ROCKS], "after final rocks"); } if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_PARTIAL_STONES) == true) && (GlobalData->stoneLevel > 0)) { ScaffoldSanity (ScaffoldGraph); int partial_stones = Throw_Stones(GlobalData->outputPrefix, GlobalData->stoneLevel, TRUE); // // XXX do we need least squares here? // #if 1 fprintf(stderr, "Beta - LeastSquaresGapEstimates #2 after partial stones\n"); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } #endif // If throw_stones splits scaffolds, rebuild edges { vector<CDS_CID_t> rawEdges; BuildSEdges(rawEdges, TRUE); MergeAllGraphEdges(ScaffoldGraph->ScaffoldGraph, rawEdges, TRUE, TRUE); } ScaffoldSanity (ScaffoldGraph); //ScaffoldGraph->tigStore->flushCache(); fprintf (stderr, "Threw %d partial stones\n", partial_stones); #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after partial_stones\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif #ifdef CHECK_CONTIG_ORDERS_INCREMENTAL ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_PARTIAL_STONES], "after partial stones"); //GenerateLinkStats (ScaffoldGraph->CIGraph, "PStones", 0); //GeneratePlacedContigGraphStats ("PStones", 0); //GenerateLinkStats(ScaffoldGraph->ContigGraph, "PStones", 0); //GenerateScaffoldGraphStats ("PStones", 0); } if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_CONTAINED_STONES) == true) && (GlobalData->stoneLevel > 0)) { ScaffoldSanity (ScaffoldGraph); int contained_stones = Toss_Contained_Stones (GlobalData->outputPrefix, GlobalData->stoneLevel, 0); fprintf(stderr, "Threw %d contained stones\n", contained_stones); fprintf (stderr, "**** Finished Final Contained Stones level %d ****\n", GlobalData->stoneLevel); // Merge contigs before fiddling with gap sizes. CleanupScaffolds (ScaffoldGraph, FALSE, NULLINDEX, FALSE); // // XXX do we need least squares here? // #if 1 fprintf(stderr, "Beta - LeastSquaresGapEstimates #3 after contained stones\n"); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } #endif ScaffoldSanity (ScaffoldGraph); // Remove copies of surrogates which are placed multiple times in the same place in a contig RemoveSurrogateDuplicates(); #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after contained_stones\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif #ifdef CHECK_CONTIG_ORDERS_INCREMENTAL ResetContigOrientChecker(coc); AddAllScaffoldsToContigOrientChecker(ScaffoldGraph, coc); #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_CONTAINED_STONES], "after final contained stones"); //GenerateLinkStats (ScaffoldGraph->CIGraph, "CStones", 0); //GeneratePlacedContigGraphStats ("CStones", 0); //GenerateLinkStats(ScaffoldGraph->ContigGraph, "CStones", 0); //GenerateScaffoldGraphStats ("CStones", 0); } // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // ScaffoldGraph->tigStore->flushCache(); if (runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_FINAL_CLEANUP) == true) { // Try to cleanup failed merges, and if we do, generate a checkpoint if(CleanupFailedMergesInScaffolds(ScaffoldGraph)){ // This call deletes surrogate-only contigs that failed to merge if(CleanupScaffolds(ScaffoldGraph, FALSE, NULLINDEX, TRUE)){ #if defined(CHECK_CONTIG_ORDERS) || defined(CHECK_CONTIG_ORDERS_INCREMENTAL) fprintf(stderr, "---Checking contig orders after final cleanup\n\n"); CheckAllContigOrientationsInAllScaffolds(ScaffoldGraph, coc, POPULATE_COC_HASHTABLE); #endif } // // XXX do we need least squares here? // #if 1 fprintf(stderr, "Beta - LeastSquaresGapEstimates #4 after final cleanup\n"); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_FINAL_CLEANUP], "after final cleanup"); } } if ((runThisCheckpoint(restartFromLogical, CHECKPOINT_AFTER_RESOLVE_SURROGATES) == true) && (doResolveSurrogates > 0)) { resolveSurrogates(placeAllFragsInSinglePlacedSurros, cutoffToInferSingleCopyStatus); // Call resolve surrogate twice, this is necessary for finishing (closure) reads. // Consider a closure read and its two bounding reads, named left and right: // If one (right) is placed in a unique region while the other (left) is in a surrogate itself, the closure read cannot be placed // However, once the surrogate bounding read is placed (and fully incorporated which happens at the very end of resolveSurrogates) // the closure read can be placed. // Therefore, we run resolve surrogates twice. // Note that is closure reads are themselves mated, it may be necessary to do a third round of placement. resolveSurrogates(placeAllFragsInSinglePlacedSurros, cutoffToInferSingleCopyStatus); // // XXX do we need least squares here? // #if 1 fprintf(stderr, "Beta - LeastSquaresGapEstimates #5 after resolve surrogates\n"); for (int32 sID=0; sID < GetNumCIScaffoldTs(ScaffoldGraph->CIScaffolds); sID++) { CIScaffoldT *scaffold = GetCIScaffoldT(ScaffoldGraph->CIScaffolds, sID); if (true == LeastSquaresGapEstimates(ScaffoldGraph, scaffold, LeastSquares_Cleanup | LeastSquares_Split)) ScaffoldSanity(ScaffoldGraph, scaffold); } #endif CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_RESOLVE_SURROGATES], "after resolve surrogates"); } // This generates the 'rezlog/gapreads' file. It's hugely // expensive, usually dies on a negative variance assert, and as // far as BPW knows, unused. // //Show_Reads_In_Gaps (GlobalData->outputPrefix); ComputeMatePairStatisticsRestricted(SCAFFOLD_OPERATIONS, GlobalData->minSamplesForOverride, "scaffold_final"); ComputeMatePairStatisticsRestricted(CONTIG_OPERATIONS, GlobalData->minSamplesForOverride, "contig_final"); GenerateCIGraph_U_Stats(); GenerateLinkStats(ScaffoldGraph->CIGraph,"final",0); GeneratePlacedContigGraphStats("final",0); GenerateLinkStats(ScaffoldGraph->ContigGraph,"final",0); GenerateScaffoldGraphStats("final",0); GenerateSurrogateStats("final"); #ifdef DEBUG int j = 0; for (j = 0; j < GetNumVA_CIFragT(ScaffoldGraph->CIFrags); j++) { CIFragT * frag = GetCIFragT(ScaffoldGraph->CIFrags, j); if (ScaffoldGraph->gkpStore->gkStore_getFRGtoPLC(frag->read_iid) != 0) { AS_UID uid = getGatekeeperIIDtoUID(ScaffoldGraph->gkpStore, frag->read_iid, AS_IID_FRG); if (frag->contigID != -1) { ChunkInstanceT * ctg = GetGraphNode(ScaffoldGraph->ContigGraph, frag->contigID); fprintf(stderr, "CLOSURE_READS: CLOSURE READ %s PLACED=%d CHAFF=%d SINGLETON=%d IN ASM type %c in SCF %d\n", AS_UID_toString(uid), frag->flags.bits.isPlaced, frag->flags.bits.isChaff, frag->flags.bits.isSingleton, frag->type, ctg->scaffoldID); } } } #endif // We DO want to flush unused unitigs/contigs at this point. They're not in // a scaffold, and possibly will never be used again (except as rocks/stones). // // (This assumes that output doesn't load unitigs/contigs again) // ScaffoldGraph->tigStore->flushCache(); SetCIScaffoldTLengths(ScaffoldGraph); if(generateOutput){ CelamyAssembly(GlobalData->outputPrefix); MarkContigEdges(); ComputeMatePairDetailedStatus(); // Note that OutputContigs partitions the tigStore, and closes ScaffoldGraph->tigStore. The // only operation valid after this function is CheckpointScaffoldGraph(). OutputUnitigsFromMultiAligns(); OutputContigsFromMultiAligns(outputFragsPerPartition, preserveConsensus); CheckpointScaffoldGraph(ckpNames[CHECKPOINT_AFTER_OUTPUT], "after output"); } DestroyScaffoldGraph(ScaffoldGraph); delete GlobalData; fprintf(stderr,"* Bye *\n"); exit(0); }
void dumpContigInfo(ChunkInstanceT *contig) { int contigOrientation; MultiAlignT *ma; char *seq1; int len1; VA_TYPE(char) *consensus = CreateVA_char(2048); VA_TYPE(char) *quality = CreateVA_char(2048); fprintf( stderr, "*********************** contig analysis **************************\n"); fprintf( stderr, "analyzing contig: %d\n", contig->id); if (contig->offsetAEnd.mean < contig->offsetBEnd.mean) contigOrientation = 0; else contigOrientation = 1; fprintf(stderr, "contig orientation: %d\t length: %d contig offsetAEnd: %d\t offsetBEnd: %d\n", contigOrientation, (int)contig->bpLength.mean, (int)contig->offsetAEnd.mean, (int)contig->offsetBEnd.mean); ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, ScaffoldGraph->ContigGraph->type == CI_GRAPH); // Get the consensus sequences for the contig from the Store GetConsensus(ScaffoldGraph->ContigGraph, contig->id, consensus, quality); seq1 = Getchar(consensus, 0); len1 = strlen(seq1); if (contigOrientation == 1) reverseComplementSequence(seq1, len1); if (len1 < 5000) { fprintf( stderr, ">contig%d consensus seq (flipped to reflect scaff orientation)\n", contig->id); fprintf( stderr, "%s\n", seq1); } else { char tmpchar = seq1[2500]; seq1[2500] = '\0'; fprintf( stderr, ">contig%d left end\n", contig->id); fprintf( stderr, "%s\n", seq1); seq1[2500] = tmpchar; fprintf( stderr, ">contig%d right end\n", contig->id); fprintf( stderr, "%s\n", seq1 + len1 - 2501); } #if 1 int numUnitigs = GetNumIntUnitigPoss(ma->u_list); fprintf( stderr, "number unitigs: %d\n", numUnitigs); int i; for (i = 0; i < numUnitigs; i++) { IntUnitigPos *upos = GetIntUnitigPos( ma->u_list, i); ChunkInstanceT *unitig = GetGraphNode( ScaffoldGraph->CIGraph, upos->ident); MultiAlignT *uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); IntMultiPos *ump; int icntfrag; fprintf( stderr, " unitig: %d\t num frags: %ld surrogate: %d\n", unitig->id, GetNumIntMultiPoss(uma->f_list), (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate)); if (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate) { fprintf (stderr, " surrogate unitig offsetAEnd: %f, offsetBEnd: %f\n", unitig->offsetAEnd.mean, unitig->offsetBEnd.mean); unitig = GetGraphNode( ScaffoldGraph->CIGraph, unitig->info.CI.baseID); fprintf ( stderr, " using original unitig: %d\n", unitig->id); uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); } // now print out info on the frags in the unitig for (icntfrag = 0; icntfrag < GetNumIntMultiPoss(uma->f_list); icntfrag++) { IntMultiPos *imp = GetIntMultiPos(uma->f_list, icntfrag); CIFragT *frag = GetCIFragT(ScaffoldGraph->CIFrags, imp->ident); fprintf(stderr, " frag: %6d\t contig pos (5p, 3p): %6d, %6d\n", imp->ident, (int) frag->contigOffset5p.mean, (int) frag->contigOffset3p.mean); } } #endif #if 1 CIEdgeT * e; GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES); // FALSE == ITERATOR_VERBOSE while((e = edges.nextRaw()) != NULL) PrintGraphEdge( stderr, ScaffoldGraph->ContigGraph, "Analyzing edge", e, 0); #endif DeleteVA_char(consensus); DeleteVA_char(quality); }
void BuildInitialContigs(ScaffoldGraphT *graph) { // Resize the ContigGraph to the same size as the CI Graph fprintf(stderr,"BuildInitialContigs()-- converting %d unitigs with %d edges to contigs.\n", GetNumGraphNodes(graph->CIGraph), GetNumGraphEdges(graph->CIGraph)); DeleteVA_NodeCGW_T(graph->ContigGraph->nodes); DeleteVA_EdgeCGW_T(graph->ContigGraph->edges); graph->ContigGraph->nodes = CreateVA_NodeCGW_T(GetNumGraphNodes(graph->CIGraph)); graph->ContigGraph->edges = CreateVA_EdgeCGW_T(GetNumGraphEdges(graph->CIGraph)); EnableRange_VA(graph->ContigGraph->nodes, GetNumGraphNodes(graph->CIGraph)); graph->ContigGraph->edgeLists.clear(); ResizeEdgeList(graph->ContigGraph); // Clear contigs. for (int32 cid=0; cid < GetNumGraphNodes(graph->ContigGraph); cid++) { NodeCGW_T *ctg = GetGraphNode(graph->ContigGraph, cid); ctg->flags.all = 0; ctg->flags.bits.isContig = TRUE; ctg->flags.bits.isDead = TRUE; //ctg->edgeHead = NULLINDEX; graph->ContigGraph->edgeLists[cid].clear(); } // And copy. GraphNodeIterator CIs; NodeCGW_T *CI; InitGraphNodeIterator(&CIs, graph->CIGraph, GRAPH_NODE_DEFAULT); while ((CI = NextGraphNodeIterator(&CIs)) != NULL){ assert(CI->flags.bits.isDead == 0); // Reset the unitig. CI->AEndNext = NULLINDEX; CI->BEndNext = NULLINDEX; CI->info.CI.contigID = CI->id; // Copy to a new contig ContigT contig = *CI; contig.type = CONTIG_CGW; contig.id = CI->id; contig.scaffoldID = NULLINDEX; contig.smoothExpectedCID = NULLINDEX; contig.numEssentialA = 0; contig.numEssentialB = 0; contig.essentialEdgeA = NULLINDEX; contig.essentialEdgeB = NULLINDEX; contig.info.Contig.AEndCI = CI->id; contig.info.Contig.BEndCI = CI->id; contig.info.Contig.numCI = 1; contig.indexInScaffold = NULLINDEX; contig.flags.bits.isCI = FALSE; contig.flags.bits.isContig = TRUE; contig.flags.bits.isChaff = CI->flags.bits.isChaff; contig.flags.bits.isClosure = CI->flags.bits.isClosure; //contig.edgeHead = NULLINDEX; SetNodeCGW_T(graph->ContigGraph->nodes, contig.id, &contig); // Ensure that there are no edges, and that the edgeList is allocated. assert(graph->ContigGraph->edgeLists[contig.id].empty() == true); } graph->numContigs = GetNumGraphNodes(graph->ContigGraph); // Now, work on the edges. uint32 nRawSkipped = 0; uint32 nMerged = 0; uint32 nTopRaw = 0; uint32 nRaw = 0; for (uint32 i=0; i<GetNumGraphEdges(graph->CIGraph); i++) { CIEdgeT *edge = GetGraphEdge(graph->CIGraph, i); if (edge->flags.bits.isDeleted) continue; // If this isn't a top-level edge, skip it. // It must also be raw, and therefore already added. if (edge->topLevelEdge != GetVAIndex_CIEdgeT(graph->CIGraph->edges, edge)) { assert(edge->flags.bits.isRaw == true); nRawSkipped++; continue; } // Is it a top-level raw edge? if (edge->flags.bits.isRaw == true) { CIEdgeT newEdge = *edge; newEdge.referenceEdge = i; newEdge.topLevelEdge = GetNumGraphEdges(graph->ContigGraph); AppendGraphEdge(graph->ContigGraph, &newEdge); InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idA); InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idB); nTopRaw++; continue; } // Otherwise, it must be a top-level merged edge assert(edge->nextRawEdge != NULLINDEX); if (edge->flags.bits.isRaw == FALSE) { CIEdgeT newEdge = *edge; CIEdgeT rawEdge; newEdge.topLevelEdge = GetNumGraphEdges(graph->ContigGraph); newEdge.nextRawEdge = GetNumGraphEdges(graph->ContigGraph) + 1; // Must be raw edges! AppendGraphEdge(graph->ContigGraph, &newEdge); InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idA); InsertGraphEdgeInList(graph->ContigGraph, newEdge.topLevelEdge, newEdge.idB); nMerged++; // And copy over all the raw edges that compose this merged edge. while (edge->nextRawEdge != NULLINDEX) { CIEdgeT *redge = GetGraphEdge(graph->CIGraph, edge->nextRawEdge); // Grab the raw CI edge rawEdge = *redge; // These used to be assignments, but they should be correct as is. assert(rawEdge.idA == newEdge.idA); assert(rawEdge.idB == newEdge.idB); rawEdge.topLevelEdge = newEdge.topLevelEdge; // The ID of the new contig top level edge rawEdge.referenceEdge = edge->nextRawEdge; // The ID of the current CI raw edge // rawEdge.nextRawEdge is currently the next CI raw edge. If that is defined, // reset it to the next edge we'd add to the contig graph. if (rawEdge.nextRawEdge != NULLINDEX) rawEdge.nextRawEdge = GetNumGraphEdges(graph->ContigGraph) + 1; AppendGraphEdge(graph->ContigGraph, &rawEdge); nRaw++; edge = redge; } } } fprintf(stderr,"BuildInitialContigs()-- converted "F_U32" merged edges with "F_U32" raw edges; skipped "F_U32" raw edges in merged edges; converted "F_U32" top level raw edges.\n", nMerged, nRaw, nRawSkipped, nTopRaw); assert(nRawSkipped == nRaw); }
edge = redge; } } } fprintf(stderr,"BuildInitialContigs()-- converted "F_U32" merged edges with "F_U32" raw edges; skipped "F_U32" raw edges in merged edges; converted "F_U32" top level raw edges.\n", nMerged, nRaw, nRawSkipped, nTopRaw); assert(nRawSkipped == nRaw); } int GetConsensus(GraphCGW_T *graph, CDS_CID_t CIindex, VA_TYPE(char) *consensusVA, VA_TYPE(char) *qualityVA){ // Return value is length of unitig or contig sequence/quality (-1 if failure) ChunkInstanceT *CI = GetGraphNode(graph, CIindex); MultiAlignT *MA = NULL; ResetVA_char(consensusVA); ResetVA_char(qualityVA); if(CI->flags.bits.isCI){ // Get it from the store of Unitig multi alignments MA = ScaffoldGraph->tigStore->loadMultiAlign(CIindex, TRUE); }else if(CI->flags.bits.isContig){// Get it from the store of Contig multi alignments assert(graph->type == CONTIG_GRAPH); MA = ScaffoldGraph->tigStore->loadMultiAlign(CIindex, FALSE); }else assert(0); GetMultiAlignUngappedConsensus(MA, consensusVA, qualityVA); return GetNumchars(consensusVA);
void writeSLK(FILE *asmFile, bool doWrite) { SnapScaffoldLinkMesg slk; GenericMesg pmesg = { &slk, MESG_SLK }; GraphNodeIterator scaffolds; CIScaffoldT *scaffold; CIScaffoldT *scafmate; fprintf(stderr, "writeSLK()--\n"); InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT); while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) { GraphEdgeIterator edges(ScaffoldGraph->ScaffoldGraph, scaffold->id, ALL_END, ALL_EDGES); CIEdgeT *edge; CIEdgeT *redge; while((edge = edges.nextMerged()) != NULL) { if (edge->idA != scaffold->id) continue; scafmate = GetGraphNode(ScaffoldGraph->ScaffoldGraph, edge->idB); assert(!isOverlapEdge(edge)); slk.escaffold1 = SCFmap.lookup(scaffold->id); slk.escaffold2 = SCFmap.lookup(scafmate->id); slk.orientation = edge->orient; slk.mean_distance = edge->distance.mean; slk.std_deviation = sqrt(edge->distance.variance); slk.num_contributing = edge->edgesContributing; int edgeTotal = slk.num_contributing; int edgeCount = 0; if(edgeTotal < 2) continue; slk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * slk.num_contributing); if (edge->flags.bits.isRaw) { assert(edgeTotal <= 1); // sanity check if (edgeTotal == 1) { slk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); slk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); }else{ slk.jump_list[edgeCount].in1 = AS_UID_undefined(); slk.jump_list[edgeCount].in2 = AS_UID_undefined(); } slk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } else { redge = edge; assert(redge->flags.bits.isRaw == FALSE); assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->ScaffoldGraph,redge->nextRawEdge); assert(!isOverlapEdge(redge)); slk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); slk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); slk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(slk.jump_list); } } }
void writeCLK(FILE *asmFile, bool doWrite) { SnapContigLinkMesg clk; GenericMesg pmesg = { &clk, MESG_CLK }; GraphNodeIterator nodes; ContigT *ctg; fprintf(stderr, "writeCLK()--\n"); InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((ctg = NextGraphNodeIterator(&nodes)) != NULL) { if (ctg->flags.bits.isChaff) continue; if (SurrogatedSingleUnitigContig(ctg)) continue; GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, ctg->id, ALL_END, ALL_EDGES); CIEdgeT *edge; while((edge = edges.nextMerged()) != NULL){ if (edge->idA != ctg->id) continue; ContigT *mate = GetGraphNode(ScaffoldGraph->ContigGraph, edge->idB); if(mate->flags.bits.isChaff) continue; if (SurrogatedSingleUnitigContig(mate)) continue; clk.econtig1 = CCOmap.lookup(edge->idA); clk.econtig2 = CCOmap.lookup(edge->idB); clk.orientation = edge->orient; // Don't need to map orientation, always using canonical orientation clk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP; switch (GetEdgeStatus(edge)) { case LARGE_VARIANCE_EDGE_STATUS: case UNKNOWN_EDGE_STATUS: case INTER_SCAFFOLD_EDGE_STATUS: clk.status = AS_UNKNOWN_IN_ASSEMBLY; break; case TENTATIVE_TRUSTED_EDGE_STATUS: case TRUSTED_EDGE_STATUS: clk.status = AS_IN_ASSEMBLY; break; case TENTATIVE_UNTRUSTED_EDGE_STATUS: case UNTRUSTED_EDGE_STATUS: clk.status = AS_BAD; break; default: assert(0 /* Invalid edge status */); } clk.is_possible_chimera = edge->flags.bits.isPossibleChimera; clk.mean_distance = edge->distance.mean; clk.std_deviation = sqrt(edge->distance.variance); clk.num_contributing = edge->edgesContributing; uint32 edgeCount = 0; uint32 edgeTotal = clk.num_contributing; if ((edgeTotal == 1) && (clk.overlap_type == AS_OVERLAP) && (GlobalData->outputOverlapOnlyContigEdges == FALSE)) // don't output pure overlap edges continue; clk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal); if (edge->flags.bits.isRaw) { assert(edgeTotal == 1); if (clk.overlap_type == AS_NO_OVERLAP) { clk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); clk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); clk.jump_list[edgeCount].type.setIsMatePair(); } else { assert(GlobalData->outputOverlapOnlyContigEdges); clk.jump_list[edgeCount].in1 = AS_UID_undefined(); clk.jump_list[edgeCount].in2 = AS_UID_undefined(); clk.jump_list[edgeCount].type.setIsOverlap(); } edgeCount++; } else { CIEdgeT *redge = edge; assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->ContigGraph, redge->nextRawEdge); if (isOverlapEdge(redge)) { // overlap edges don't count edgeTotal--; continue; } clk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); clk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); clk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(clk.jump_list); } } }
void writeCCO(FILE *asmFile, bool doWrite) { SnapConConMesg cco; GenericMesg pmesg = { &cco, MESG_CCO }; GraphNodeIterator contigs; ContigT *contig; fprintf(stderr, "writeCCO()--\n"); InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph)); if (contig->flags.bits.isChaff) continue; NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) && (contig->scaffoldID == NULLINDEX) && (unitig->info.CI.numInstances > 0)) // Contig is a surrogate instance continue; MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); cco.eaccession = AS_UID_fromInteger(getUID(uidServer)); cco.iaccession = contig->id; cco.placed = ScaffoldGraph->tigStore->getContigStatus(contig->id); cco.length = GetMultiAlignLength(ma); cco.consensus = Getchar(ma->consensus, 0); cco.quality = Getchar(ma->quality, 0); cco.forced = 0; cco.num_pieces = GetNumIntMultiPoss(ma->f_list); cco.num_unitigs = GetNumIntMultiPoss(ma->u_list); cco.num_vars = GetNumIntMultiPoss(ma->v_list); cco.pieces = NULL; cco.unitigs = NULL; cco.vars = NULL; if (cco.consensus == NULL) fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n", cco.iaccession); assert(cco.consensus != NULL); if (cco.length != strlen(cco.consensus)) fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n", cco.iaccession, cco.length, strlen(cco.consensus)); assert(cco.length == strlen(cco.consensus)); if (cco.num_pieces > 0) { cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos)); for(int32 i=0; i<cco.num_pieces; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); cco.pieces[i].type = imp->type; cco.pieces[i].eident = FRGmap.lookup(imp->ident); cco.pieces[i].delta_length = imp->delta_length; cco.pieces[i].position = imp->position; cco.pieces[i].delta = imp->delta; } } if (cco.num_unitigs > 0) { cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos)); for(int32 i=0; i<cco.num_unitigs; i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); cco.unitigs[i].type = imp->type; cco.unitigs[i].eident = UTGmap.lookup(imp->ident); cco.unitigs[i].position = imp->position; cco.unitigs[i].delta = imp->delta; cco.unitigs[i].delta_length = imp->delta_length; } } if (cco.num_vars > 0) { cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar)); for(int32 i=0; i<cco.num_vars; i++) { IntMultiVar *imv = GetIntMultiVar(ma->v_list, i); cco.vars[i].var_id = imv->var_id; cco.vars[i].phased_id = imv->phased_id; cco.vars[i].position = imv->position; cco.vars[i].num_reads = imv->num_reads; cco.vars[i].num_alleles = imv->num_alleles; cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed; cco.vars[i].min_anchor_size = imv->min_anchor_size; cco.vars[i].var_length = imv->var_length; cco.vars[i].alleles = imv->alleles; cco.vars[i].var_seq_memory = imv->var_seq_memory; cco.vars[i].read_id_memory = imv->read_id_memory; cco.vars[i].enc_num_reads = NULL; cco.vars[i].enc_weights = NULL; cco.vars[i].enc_var_seq = NULL; cco.vars[i].enc_read_ids = NULL; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(cco.pieces); safe_free(cco.unitigs); safe_free(cco.vars); CCOmap.add(cco.iaccession, cco.eaccession); } }
void writeULK(FILE *asmFile, bool doWrite) { SnapUnitigLinkMesg ulk; GenericMesg pmesg = { & ulk, MESG_ULK }; GraphNodeIterator nodes; ChunkInstanceT *ci; fprintf(stderr, "writeULK()--\n"); InitGraphNodeIterator(&nodes, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT); while ((ci = NextGraphNodeIterator(&nodes)) != NULL) { assert(ci->type != CONTIG_CGW); if (ci->type == RESOLVEDREPEATCHUNK_CGW) continue; if (ci->flags.bits.isChaff) continue; GraphEdgeIterator edges(ScaffoldGraph->CIGraph, ci->id, ALL_END, ALL_EDGES); CIEdgeT *edge; while ((edge = edges.nextMerged()) != NULL) { if (edge->idA != ci->id || edge->flags.bits.isInferred || edge->flags.bits.isInferredRemoved || edge->flags.bits.isMarkedForDeletion) continue; ChunkInstanceT *mi = GetGraphNode(ScaffoldGraph->CIGraph, edge->idB); if (mi->flags.bits.isChaff) continue; ulk.eunitig1 = UTGmap.lookup(edge->idA); // == ci->id ulk.eunitig2 = UTGmap.lookup(edge->idB); ulk.orientation = edge->orient; // Don't need to map orientation, always using canonical orientation ulk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP; ulk.is_possible_chimera = edge->flags.bits.isPossibleChimera; ulk.mean_distance = edge->distance.mean; ulk.std_deviation = sqrt(edge->distance.variance); ulk.num_contributing = edge->edgesContributing; ulk.status = AS_UNKNOWN_IN_ASSEMBLY; uint32 edgeCount = 0; uint32 edgeTotal = ulk.num_contributing; if ((edgeTotal == 1) && (ulk.overlap_type == AS_OVERLAP)) // don't output pure overlap edges continue; // Look through the fragment pairs in this edge to decide the status of the link. CIEdgeT *redge = (edge->flags.bits.isRaw) ? edge : GetGraphEdge(ScaffoldGraph->CIGraph, edge->nextRawEdge); int numBad = 0; int numGood = 0; int numUnknown = 0; for (; redge != NULL; redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge)) { if(isOverlapEdge(redge)) continue; CIFragT *fragA = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragA); CIFragT *fragB = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragB); assert(fragA->flags.bits.edgeStatus == fragB->flags.bits.edgeStatus); if ((fragA->flags.bits.edgeStatus == UNTRUSTED_EDGE_STATUS) || (fragA->flags.bits.edgeStatus == TENTATIVE_UNTRUSTED_EDGE_STATUS)) numBad++; else if ((fragA->flags.bits.edgeStatus == TRUSTED_EDGE_STATUS) || (fragA->flags.bits.edgeStatus == TENTATIVE_TRUSTED_EDGE_STATUS)) numGood++; else numUnknown++; } if (numBad > 0) ulk.status = AS_BAD; else if (numGood > 0) ulk.status = AS_IN_ASSEMBLY; else ulk.status = AS_UNKNOWN_IN_ASSEMBLY; ulk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal); if (edge->flags.bits.isRaw) { assert(edgeTotal == 1); ulk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); ulk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); ulk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } else { assert(edgeTotal > 0); redge = edge; assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge); if (isOverlapEdge(redge)) { // overlap edges don't count edgeTotal--; continue; } ulk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); ulk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); ulk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(ulk.jump_list); } } }
void ShatterScaffoldsConnectedByLowWeight(FILE *stream, ScaffoldGraphT *graph, uint32 minWeight, int verbose){ GraphNodeIterator nodes; NodeCGW_T *node; InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((node = NextGraphNodeIterator(&nodes)) != NULL) { GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, node->id, ALL_END, ALL_EDGES); CIEdgeT *edge; int disconnected = (edges.nextMerged() == NULL ? FALSE : TRUE); // don't disconnect a node if it has no edges while ((edge = edges.nextMerged()) != NULL) { NodeCGW_T *otherNode = GetGraphNode(graph->ContigGraph, (edge->idA == node->id ? edge->idB : edge->idA)); if (verbose == TRUE) fprintf(stream, "The edge ori:%c dist: %f connects %d and %d with weight %d and node is in scaffold %d and other end of edge is %d in scaffold %d\n", edge->orient.toLetter(), edge->distance.mean, edge->idA, edge->idB, edge->edgesContributing, node->scaffoldID, otherNode->id, otherNode->scaffoldID); if (otherNode->scaffoldID == node->scaffoldID && otherNode->scaffoldID != NULLINDEX && edge->edgesContributing >= minWeight) { disconnected = FALSE; if (verbose == TRUE) { fprintf(stream, "Node %d will not be disconnected from scaffold %d because it has edge %d higher than min %d\n", node->id, node->scaffoldID, edge->edgesContributing, minWeight); } } } if (disconnected == TRUE && node->scaffoldID != NULLINDEX) { if (verbose == TRUE) fprintf(stream, "Disconnecting contig with id %d from scaffold %d\n", node->id, node->scaffoldID); fprintf(stream, "Disconnecting contig with id %d from scaffold %d\n", node->id, node->scaffoldID); // is this all we need to do // don't set any of the flags for repeats, let it stay whatever it is now LengthT offsetAEnd = {0.0, 0.0}; LengthT offsetBEnd = {0.0, 0.0}; LengthT firstOffset = {0.0, 0.0}; CIScaffoldT CIScaffold; InitializeScaffold(&CIScaffold, REAL_SCAFFOLD); CIScaffold.info.Scaffold.AEndCI = NULLINDEX; CIScaffold.info.Scaffold.BEndCI = NULLINDEX; CIScaffold.info.Scaffold.numElements = 0; CIScaffold.bpLength = node->bpLength; CIScaffold.id = GetNumGraphNodes(graph->ScaffoldGraph); CIScaffold.flags.bits.isDead = FALSE; CIScaffold.numEssentialA = CIScaffold.numEssentialB = 0; CIScaffold.essentialEdgeB = CIScaffold.essentialEdgeA = NULLINDEX; AppendGraphNode(graph->ScaffoldGraph, &CIScaffold); // Ensure that there are no edges, and that the edgeList is allocated. assert(ScaffoldGraph->ScaffoldGraph->edgeLists[CIScaffold.id].empty() == true); node->numEssentialA = node->numEssentialB = 0; node->essentialEdgeA = node->essentialEdgeB = NULLINDEX; if(GetNodeOrient(node).isForward()){ firstOffset = node->offsetAEnd; }else{ firstOffset = node->offsetBEnd; } offsetAEnd.mean = node->offsetAEnd.mean - firstOffset.mean; offsetAEnd.variance = node->offsetAEnd.variance - firstOffset.variance; offsetBEnd.mean = node->offsetBEnd.mean - firstOffset.mean; offsetBEnd.variance = node->offsetBEnd.variance - firstOffset.variance; if (verbose == TRUE) { fprintf(stream, "Inserted node %d into scaffold %d at offsets (%f, %f) and (%f, %f) it used to be (%f, %f) and (%f, %f)\n", node->id, CIScaffold.id, offsetAEnd.mean, offsetAEnd.variance, offsetBEnd.mean, offsetBEnd.variance, node->offsetAEnd.mean, node->offsetAEnd.variance, node->offsetBEnd.mean, node->offsetBEnd.variance); } CIScaffoldT *scaffold = GetGraphNode(graph->ScaffoldGraph, node->scaffoldID); RemoveCIFromScaffold(graph, scaffold, node, FALSE); if (scaffold->info.Scaffold.numElements == 0) { scaffold->type = SCRATCH_SCAFFOLD; scaffold->flags.bits.isDead = 1; } InsertCIInScaffold(graph, node->id, CIScaffold.id, offsetAEnd, offsetBEnd, TRUE, FALSE); } } }