void PrintContigContents(FILE *stream, ScaffoldGraphT *graph, char *message) { GraphNodeIterator contigs; ContigT *contig; InitGraphNodeIterator(&contigs, graph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(graph->ContigGraph)); MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); fprintf(stream, "%s contig %d placed status %c with %d unitigs: ", message, contig->id, graph->tigStore->getContigStatus(contig->id), GetNumIntMultiPoss(ma->u_list)); for (int i = 0; i < GetNumIntMultiPoss(ma->u_list); i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); fprintf(stream, "%d ", imp->ident); } fprintf(stream, "\n"); } }
bool MultiAlignContig(MultiAlignT *ma, gkStore *UNUSED, CNS_Options *opp) { int32 num_bases = 0; int32 num_unitigs = GetNumIntUnitigPoss(ma->u_list); int32 num_frags = GetNumIntMultiPoss(ma->f_list); int32 num_columns = 0; IntMultiPos *flist = GetIntMultiPos(ma->f_list, 0); IntUnitigPos *ulist = GetIntUnitigPos(ma->u_list, 0); IntMultiVar *vlist = GetIntMultiVar(ma->v_list, 0); SeqInterval *offsets = (SeqInterval *) safe_calloc(num_unitigs,sizeof(SeqInterval)); for (int32 i=0;i<num_unitigs;i++) { int32 flen = (ulist[i].position.bgn < ulist[i].position.end) ? (ulist[i].position.end < ulist[i].position.bgn) : (ulist[i].position.bgn - ulist[i].position.end); num_bases += flen + 2 * AS_CNS_ERROR_RATE * flen; num_columns = (ulist[i].position.bgn > num_columns) ? ulist[i].position.bgn : num_columns; num_columns = (ulist[i].position.end > num_columns) ? ulist[i].position.end : num_columns; //fprintf(stderr, "CTG %d UTG %d %d-%d\n", // ma->maID, ulist[i].ident, ulist[i].position.bgn, ulist[i].position.end); } for (int32 i=0;i<num_frags;i++) { int32 flen = (flist[i].position.bgn < flist[i].position.end) ? (flist[i].position.end < flist[i].position.bgn) : (flist[i].position.bgn - flist[i].position.end); num_bases += flen + 2 * AS_CNS_ERROR_RATE * flen; } ResetStores(num_bases, num_unitigs, num_columns); fragmentMap = CreateScalarHashTable_AS(); fragmentToIMP = CreateScalarHashTable_AS(); for (int32 i=0; i<num_frags; i++) { // Add all fragments in the contigs f_list to the fragmentMap. This tells us if a fragment is // not placed in a surrogate (because they aren't in the contigs f_list, but will appear in a // surrogate unitigs f_list). // if (HASH_SUCCESS != InsertInHashTable_AS(fragmentMap, flist[i].ident, 0, 1, 0)) { fprintf(stderr, "MultiAlignContig()-- Contig %d FAILED. Fragment %d is a duplicate.\n", ma->maID, flist[i].ident); return(false); } // SK store IID to IMP message mapping InsertInHashTable_AS(fragmentToIMP, flist[i].ident, 0, (uint64)&flist[i], 0); } for (int32 i=0;i<num_unitigs;i++) { uint32 complement = (ulist[i].position.bgn<ulist[i].position.end)?0:1; uint32 fid = AppendFragToLocalStore(AS_UNITIG, ulist[i].ident, complement, 0, ulist[i].type); offsets[fid].bgn = complement?ulist[i].position.end:ulist[i].position.bgn; offsets[fid].end = complement?ulist[i].position.bgn:ulist[i].position.end; } MANode *manode = CreateMANode(ma->maID); // Seed multiAlignment with 1st fragment of 1st unitig SeedMAWithFragment(manode->lid, GetFragment(fragmentStore,0)->lid, opp); PlaceFragments(GetFragment(fragmentStore,0)->lid, ulist + GetFragment(fragmentStore,0)->lid, opp); // Now, loop on remaining fragments, aligning to: // a) containing frag (if contained) // or b) previously aligned frag VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1); for (int32 i=1;i<num_unitigs;i++) { Fragment *afrag = NULL; Fragment *bfrag = GetFragment(fragmentStore,i); int32 ahang = 0; int32 bhang = 0; int32 ovl = 0; int32 alid = 0; int32 blid = bfrag->lid; OverlapType otype; int32 olap_success = 0; int32 try_contained = 0; int32 align_to = i - 1; Fragment *afrag_first = NULL; int32 ahang_first = 0; int32 bhang_first = 0; while (!olap_success) { nextFrag: if (try_contained == 0) // Skip contained stuff. while ((align_to > 0) && ((GetFragment(fragmentStore, align_to)->is_contained) || (GetFragment(fragmentStore, align_to)->container_iid > 0))) align_to--; if (align_to < 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: hit the beginning of unitig list: no unitig upstream overlaps with current unitig %d\n", bfrag->iid); if (try_contained == 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: trying contained afrags for bfrag %d\n", bfrag->iid); try_contained = 1; align_to = i-1; goto nextFrag; } break; } afrag = GetFragment(fragmentStore, align_to); alid = afrag->lid; ahang = offsets[blid].bgn - offsets[alid].bgn; bhang = offsets[blid].end - offsets[alid].end; if (afrag_first == NULL) { afrag_first = afrag; ahang_first = ahang; bhang_first = bhang; } // This code copied from MultiAlignUnitig. if (offsets[afrag->lid].bgn < offsets[bfrag->lid].bgn) if (offsets[afrag->lid].end < offsets[bfrag->lid].end) ovl = offsets[afrag->lid].end - offsets[bfrag->lid].bgn; else //ovl = offsets[bfrag->lid].end - offsets[bfrag->lid].bgn; ovl = bfrag->length; else if (offsets[afrag->lid].end < offsets[bfrag->lid].end) //ovl = offsets[afrag->lid].end - offsets[afrag->lid].bgn; ovl = afrag->length; else ovl = offsets[bfrag->lid].end - offsets[afrag->lid].bgn; // End of copy if (ovl <= 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: positions of afrag %d and bfrag %d do not overlap. Proceed to the next upstream afrag\n", afrag->iid, bfrag->iid); align_to--; goto nextFrag; } olap_success = GetAlignmentTraceDriver(afrag, NULL, bfrag, &ahang, &bhang, ovl, trace, &otype, GETALIGNTRACE_CONTIGU, (blid + 1 < num_unitigs) ? (offsets[blid + 1].bgn - offsets[blid].bgn) : 800); // Nope, fail. if (!olap_success) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: Positions of afrag %d (%c) and bfrag %d (%c) overlap, but GetAlignmentTrace returns no overlap success.\n", afrag->iid, afrag->type, bfrag->iid, bfrag->type); align_to--; if ((align_to < 0) && (!try_contained)) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: Try contained afrags for bfrag %d\n", bfrag->iid); try_contained = 1; align_to = i-1; } } } // while !olap_success if ((!olap_success) && (FORCE_UNITIG_ABUT == 0)) { fprintf(stderr,"MultiAlignContig: Could (really) not find overlap between %d (%c) and %d (%c), estimated ahang %d\n", afrag->iid,afrag->type,bfrag->iid,bfrag->type, ahang); fprintf(stderr,"MultiAlignContig: You can (possibly) force these to abut with '-D forceunitigabut', but that code is buggy at best.\n"); goto returnFailure; } #if 1 if ((!olap_success) && (FORCE_UNITIG_ABUT == 1)) { if (afrag_first) { afrag = afrag_first; ahang = ahang_first; bhang = bhang_first; } else { // Dang, we're really screwed. Nobody overlapped with us. // Cross our fingers and find the closest end point. // int32 maxOvl = -offsets[blid].bgn; //if (VERBOSE_MULTIALIGN_OUTPUT) // fprintf(stderr, "MultiAlignContig: YIKES! Your unitig doesn't overlap with anything! Picking the closest thing!\n"); align_to = i-1; while (align_to >= 0) { if ((try_contained == 0) && ((GetFragment(fragmentStore, align_to)->is_contained) || (GetFragment(fragmentStore, align_to)->container_iid > 0))) { // NOP! Found a contained frag, and we want to skip it. } else if (maxOvl < offsets[alid].end - offsets[blid].bgn) { afrag = GetFragment(fragmentStore, align_to); alid = afrag->lid; ahang = offsets[blid].bgn - offsets[alid].bgn; maxOvl = offsets[alid].end - offsets[blid].bgn; //fprintf(stderr, "MultiAlignContig: RESET align_to=%d alid=%d maxOvl=%d ahang=%d\n", align_to, alid, maxOvl, ahang); } align_to--; } // while align_to >= 0 } fprintf(stderr, "MultiAlignContig: Forcing abut between afrag %d (%c) and bfrag %d (%c) in contig %d.\n", afrag->iid, afrag->type, bfrag->iid, bfrag->type, ma->maID); // Force a 1bp overlap. We'd like to strictly abut, but ApplyAlignment() requires that there // be an overlap, and removing checks for that seem like a bad idea. // ahang = afrag->length - 1; otype = AS_DOVETAIL; int32 zero = 0; ResetVA_int32(trace); AppendVA_int32(trace, &zero); assert(*Getint32(trace,0) == 0); assert(GetNumint32s(trace) == 1); } #endif // Unitig is placed, or we just forced it to be placed. if (otype == AS_CONTAINMENT) { bfrag->is_contained = 1; if (bfrag->container_iid == 0) bfrag->container_iid = 1; // Not sure why 1 and not afrag->iid } ApplyAlignment(afrag->lid, 0, NULL, bfrag->lid, ahang, Getint32(trace,0)); PlaceFragments(bfrag->lid, ulist + bfrag->lid, opp); } // over all unitigs // Now, must find fragments in regions of overlapping unitigs, and adjust // their alignments as needed RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0); //fprintf(stderr,"MultiAlignContig: Initial pairwise induced alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); AbacusRefine(manode,0,-1,CNS_SMOOTH, opp); MergeRefine(manode->lid, NULL, 0, opp, 1); AbacusRefine(manode,0,-1,CNS_POLYX, opp); //fprintf(stderr,"MultiAlignContig: POLYX refined alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); { IntMultiVar *vl = NULL; int32 nv = 0; RefreshMANode(manode->lid, 0, opp, &nv, &vl, 0, 0); AbacusRefine(manode,0,-1,CNS_INDEL, opp); MergeRefine(manode->lid, ma->v_list, 0, opp, 2); } //fprintf(stderr,"MultiAlignContig: Final refined alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); //if (num_frags == 0) // PrintAlignment(stderr,manode->lid,0,-1); GetMANodeConsensus(manode->lid, ma->consensus, ma->quality); GetMANodePositions(manode->lid, ma); DeleteMANode(manode->lid); safe_free(offsets); Delete_VA(trace); DeleteHashTable_AS(fragmentMap); fragmentMap = NULL; DeleteHashTable_AS(fragmentToIMP); fragmentToIMP = NULL; return(true); returnFailure: safe_free(offsets); Delete_VA(trace); DeleteHashTable_AS(fragmentMap); fragmentMap = NULL; DeleteHashTable_AS(fragmentToIMP); fragmentToIMP = NULL; return(false); }
static int abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) { int32 num_frags = GetNumIntMultiPoss(uma->f_list); int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list); HashTable_AS *unitigFrags = CreateScalarHashTable_AS(); int32 num_columns = GetMultiAlignLength(uma); int32 ungapped_pos = 0; int32 *gapped_positions = new int32 [num_columns + 1]; char *consensus = Getchar(uma->consensus,0); for (int32 i=0; i<num_columns+1; i++) { gapped_positions[i] = ungapped_pos; if (consensus[i] != '-') ungapped_pos++; } // Remember the first fragment we add. int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions); for (int32 ifrag=0; ifrag<num_frags; ifrag++) { CNS_AlignedContigElement epos; IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag); if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident); assert(0); } if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident); assert(0); } assert(frag->position.bgn >= 0); assert(frag->position.bgn < num_columns + 1); assert(frag->position.end >= 0); assert(frag->position.end < num_columns + 1); epos.frg_or_utg = CNS_ELEMENT_IS_FRAGMENT; epos.idx.fragment.frgIdent = frag->ident; epos.idx.fragment.frgType = frag->type; epos.idx.fragment.frgContained = frag->contained; epos.idx.fragment.frgInUnitig = (type == AS_CONTIG) ? -1 : uma->maID; epos.position.bgn = gapped_positions[frag->position.bgn]; epos.position.end = gapped_positions[frag->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end); // Adjust the ungapped position if we fall within a gap // if (epos.position.bgn == epos.position.end) { fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n", epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end); assert(frag->position.bgn != frag->position.end); if (frag->position.bgn < frag->position.end) { if (epos.position.bgn > 0) epos.position.bgn--; else epos.position.end++; } else { if (epos.position.end > 0) epos.position.end--; else epos.position.bgn++; } fprintf(stderr,"SetUngappedFragmentPositions()-- Reset to "F_S32","F_S32"\n", epos.position.bgn, epos.position.end); } AppendVA_CNS_AlignedContigElement(fragment_positions, &epos); } for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){ CNS_AlignedContigElement epos; IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag); epos.frg_or_utg = CNS_ELEMENT_IS_UNITIG; epos.idx.unitig.utgIdent = unitig->ident; epos.idx.unitig.utgType = unitig->type; epos.position.bgn = gapped_positions[unitig->position.bgn]; epos.position.end = gapped_positions[unitig->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end); AppendVA_CNS_AlignedContigElement(fragment_positions,&epos); } // This is used only by ReplaceEndUnitigInContig(). Mark fragments in the "anchoring" contig // that belong to this unitig. // if (type != AS_CONTIG) { Fragment *anchor = GetFragment(fragmentStore,0); if ((anchor != NULL) && (anchor->type == AS_CONTIG)) { CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components); for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) { if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) && (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0))) af->idx.fragment.frgInUnitig = uma->maID; } } } DeleteHashTable_AS(unitigFrags); delete [] gapped_positions; return first_frag; }
int main (int argc, char **argv) { char tmpName[FILENAME_MAX] = {0}; char *gkpName = NULL; char *tigName = NULL; int32 tigVers = -1; int32 tigPart = -1; int64 ctgBgn = -1; int64 ctgEnd = -1; char *ctgName = NULL; char *outName = NULL; char *inName = NULL; bool forceCompute = false; int32 numFailures = 0; int32 numSkipped = 0; bool useUnitig = false; bool showResult = false; CNS_Options options = { CNS_OPTIONS_SPLIT_ALLELES_DEFAULT, CNS_OPTIONS_MIN_ANCHOR_DEFAULT, CNS_OPTIONS_DO_PHASING_DEFAULT }; // Comminucate to MultiAlignment_CNS.c that we are doing consensus and not cgw. thisIsConsensus = 1; argc = AS_configure(argc, argv); int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-g") == 0) { gkpName = argv[++arg]; } else if (strcmp(argv[arg], "-t") == 0) { tigName = argv[++arg]; tigVers = atoi(argv[++arg]); tigPart = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-c") == 0) { AS_UTL_decodeRange(argv[++arg], ctgBgn, ctgEnd); } else if (strcmp(argv[arg], "-T") == 0) { ctgName = argv[++arg]; } else if (strcmp(argv[arg], "-O") == 0) { outName = argv[++arg]; } else if (strcmp(argv[arg], "-I") == 0) { inName = argv[++arg]; } else if (strcmp(argv[arg], "-f") == 0) { forceCompute = true; } else if (strcmp(argv[arg], "-U") == 0) { useUnitig = true; } else if (strcmp(argv[arg], "-v") == 0) { showResult = true; } else if (strcmp(argv[arg], "-V") == 0) { VERBOSE_MULTIALIGN_OUTPUT++; } else if (strcmp(argv[arg], "-w") == 0) { options.smooth_win = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-P") == 0) { options.do_phasing = atoi(argv[++arg]); } else { fprintf(stderr, "%s: Unknown option '%s'\n", argv[0], argv[arg]); err++; } arg++; } if ((err) || (gkpName == NULL) || (tigName == NULL)) { fprintf(stderr, "usage: %s -g gkpStore -t tigStore version partition [opts]\n", argv[0]); fprintf(stderr, " -c b Compute only contig ID 'b' (must be in the correct partition!)\n"); fprintf(stderr, " -c b-e Compute only contigs from ID 'b' to ID 'e'\n"); fprintf(stderr, "\n"); fprintf(stderr, " -T file Test the computation of the contig layout in 'file'\n"); fprintf(stderr, "\n"); fprintf(stderr, " -f Recompute contigs that already have a multialignment\n"); fprintf(stderr, "\n"); fprintf(stderr, " -U Reuse the unitig consensus for contigs with only a single\n"); fprintf(stderr, " unitig (EXPERIMENTAL!)\n"); fprintf(stderr, "\n"); fprintf(stderr, " -O file Don't update tigStore, dump a binary file instead.\n"); fprintf(stderr, " -I file Import binary file into tigStore\n"); fprintf(stderr, "\n"); fprintf(stderr, " -v Show multialigns.\n"); fprintf(stderr, " -V Enable debugging option 'verbosemultialign'.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -w ws Smoothing window size\n"); fprintf(stderr, "\n"); exit(1); } // Open both stores for read only. gkpStore = new gkStore(gkpName, false, false); tigStore = new MultiAlignStore(tigName, tigVers, 0, tigPart, false, false, false); gkpStore->gkStore_loadPartition(tigPart); // Decide on what to compute. Either all contigs, or a single contig, or a special case test. uint32 b = 0; uint32 e = tigStore->numContigs(); if (ctgBgn != -1) { b = ctgBgn; e = ctgEnd + 1; } FORCE_UNITIG_ABUT = 1; if (ctgName != NULL) { errno = 0; FILE *F = fopen(ctgName, "r"); if (errno) fprintf(stderr, "Failed to open input contig file '%s': %s\n", ctgName, strerror(errno)), exit(1); MultiAlignT *ma = CreateEmptyMultiAlignT(); bool isUnitig = false; while (LoadMultiAlignFromHuman(ma, isUnitig, F) == true) { if (ma->maID < 0) ma->maID = (isUnitig) ? tigStore->numUnitigs() : tigStore->numContigs(); if (MultiAlignContig(ma, gkpStore, &options)) { if (showResult) PrintMultiAlignT(stdout, ma, gkpStore, false, false, AS_READ_CLEAR_LATEST); } else { fprintf(stderr, "MultiAlignContig()-- contig %d failed.\n", ma->maID); numFailures++; } } DeleteMultiAlignT(ma); b = e = 0; } // Reopen for writing, if we have work to do. if (((inName) || (b < e)) && (outName == NULL)) { delete tigStore; tigStore = new MultiAlignStore(tigName, tigVers, 0, tigPart, true, false, true); } if (inName) { importFromFile(inName, tigPart); b = e = 0; } // Now the usual case. Iterate over all contigs, compute and update. for (uint32 i=b; i<e; i++) { MultiAlignT *cma = tigStore->loadMultiAlign(i, false); if (cma == NULL) { // Not in our partition, or deleted. continue; } bool exists = (cma->consensus != NULL) && (GetNumchars(cma->consensus) > 1); if ((forceCompute == false) && (exists == true)) { // Already finished contig consensus. fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments) - already computed, skipped\n", cma->maID, cma->data.num_unitigs, cma->data.num_frags); numSkipped++; tigStore->unloadMultiAlign(cma->maID, false); continue; } int32 uID = GetIntUnitigPos(cma->u_list, 0)->ident; // If this is a surrogate, we CANNOT reuse the unitig. We need to process the contig so that // the unplaced reads are stripped out. A surrogate should have different contig and unitig // IDs; we could also check the contig status. if ((cma->data.num_unitigs == 1) && (cma->maID == uID) && (useUnitig == true)) { fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments) - reusing unitig %d consensus\n", cma->maID, cma->data.num_unitigs, cma->data.num_frags, uID); MultiAlignT *uma = tigStore->loadMultiAlign(uID, true); uma->data = cma->data; tigStore->unloadMultiAlign(cma->maID, false); if (outName) writeToOutFile(outName, tigPart, uma); else tigStore->insertMultiAlign(uma, false, false); tigStore->unloadMultiAlign(uma->maID, true); continue; } fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments)%s\n", cma->maID, cma->data.num_unitigs, cma->data.num_frags, (exists) ? " - already computed, recomputing" : ""); if (MultiAlignContig(cma, gkpStore, &options)) { if (outName) writeToOutFile(outName, tigPart, cma); else tigStore->insertMultiAlign(cma, false, true); if (showResult) PrintMultiAlignT(stdout, cma, gkpStore, false, false, AS_READ_CLEAR_LATEST); tigStore->unloadMultiAlign(cma->maID, false); } else { fprintf(stderr, "MultiAlignContig()-- contig %d failed.\n", cma->maID); numFailures++; } } delete tigStore; fprintf(stderr, "\n"); fprintf(stderr, "NumColumnsInUnitigs = %d\n", NumColumnsInUnitigs); fprintf(stderr, "NumGapsInUnitigs = %d\n", NumGapsInUnitigs); fprintf(stderr, "NumRunsOfGapsInUnitigReads = %d\n", NumRunsOfGapsInUnitigReads); fprintf(stderr, "NumColumnsInContigs = %d\n", NumColumnsInContigs); fprintf(stderr, "NumGapsInContigs = %d\n", NumGapsInContigs); fprintf(stderr, "NumRunsOfGapsInContigReads = %d\n", NumRunsOfGapsInContigReads); fprintf(stderr, "NumAAMismatches = %d\n", NumAAMismatches); fprintf(stderr, "NumVARRecords = %d\n", NumVARRecords); fprintf(stderr, "NumVARStringsWithFlankingGaps = %d\n", NumVARStringsWithFlankingGaps); fprintf(stderr, "NumUnitigRetrySuccess = %d\n", NumUnitigRetrySuccess); fprintf(stderr, "\n"); if (numFailures) { fprintf(stderr, "WARNING: Total number of contig failures = %d\n", numFailures); fprintf(stderr, "\n"); fprintf(stderr, "Consensus did NOT finish successfully.\n"); return(1); } fprintf(stderr, "Consensus finished successfully. Bye.\n"); return(0); }
MultiAlignT * ReplaceEndUnitigInContig(uint32 contig_iid, uint32 unitig_iid, int32 extendingLeft, CNS_Options *opp) { int32 cid,tid; // local id of contig (cid), and unitig(tid) int32 aid,bid; int32 i,num_unitigs; MultiAlignT *oma; MultiAlignT *cma; IntUnitigPos *u_list; IntMultiPos *f_list; IntMultiVar *v_list; int32 append_left=0; int32 num_frags=0; int32 complement=0; MANode *ma; Fragment *cfrag; Fragment *tfrag = NULL; static VA_TYPE(int32) *trace=NULL; oma = tigStore->loadMultiAlign(contig_iid, FALSE); ResetStores(2 * GetNumchars(oma->consensus), 2, 2 * GetNumchars(oma->consensus)); num_unitigs = GetNumIntUnitigPoss(oma->u_list); num_frags = GetNumIntMultiPoss(oma->f_list); u_list = GetIntUnitigPos(oma->u_list,0); f_list = GetIntMultiPos(oma->f_list,0); v_list = GetIntMultiVar(oma->v_list,0); //PrintIMPInfo(stderr, num_frags, f_list); //PrintIUPInfo(stderr, num_unitigs, u_list); // capture the consensus sequence of the original contig and put into local "fragment" format cid = AppendFragToLocalStore(AS_CONTIG, contig_iid, 0, 0, AS_OTHER_UNITIG); fprintf(stderr,"ReplaceEndUnitigInContig()-- contig %d unitig %d isLeft(%d)\n", contig_iid,unitig_iid,extendingLeft); // The only real value-added from ReplaceUnitigInContig is a new consensus sequence for the contig // some adjustments to positions go along with this, but the real compute is an alignment // between the old contig consensus and the updated unitig // // first we want to determine whether unitig is on left or right of contig, // so that alignment can be done with a positive ahang // if u is at left, i.e.: // // C---------------C // u------u // then initialize new alignment with unitig, and add contig, else // // if u is at right, i.e.: // // C---------------C // u------u // then initialize new alignment with contig, and add unitig, else ma = CreateMANode(0); if ( trace == NULL ) trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN); ResetVA_int32(trace); { int32 ahang,bhang,pos_offset=0; int32 tigs_adjusted_pos=0; OverlapType otype; int32 olap_success=0; cfrag=GetFragment(fragmentStore,cid); for(i=0;i<num_unitigs;i++) { uint32 id=u_list[i].ident; if ( id == unitig_iid ) { int32 bgn=u_list[i].position.bgn; int32 end=u_list[i].position.end; int32 complement_tmp=(bgn<end)?0:1; int32 left=(complement_tmp)?end:bgn; int32 right=(complement_tmp)?bgn:end; complement=complement_tmp; tid = AppendFragToLocalStore(AS_UNITIG, id, complement, 0, AS_OTHER_UNITIG); tfrag=GetFragment(fragmentStore,tid); if ( extendingLeft ) { // need to set aid to unitig to preserve positive ahang -- and we now should always // have a bhang of zero. append_left=1; aid=tid; bid=cid; // and ahang estimate is the diff in size between // new unitig (GetFragment(fragmentStore,tid)->length) and old unitig (right-left) ahang = GetFragment(fragmentStore,tid)->length - (right-left); bhang = 0; } else { // -------- // ---+++ // We extended the unitig by "+++". The ahang is just the // start position of the original placement, and the bhang // is the amount extended (as above). aid=cid; bid=tid; ahang = left; bhang = GetFragment(fragmentStore,tid)->length - (right-left); } SeedMAWithFragment(ma->lid, aid, opp); // The expected length of this alignment is always the length of the original unitig. int32 ovl = right - left; olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); if (!olap_success) olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); // If the alignment fails -- usually because the ahang is // negative -- return an empty alignment. This causes // extendClearRanges (the sole user of this function) to // gracefully handle the failure. // if (olap_success == 0) { return(NULL); assert(olap_success); } ApplyAlignment(aid, 0, NULL, bid, ahang, Getint32(trace,0)); RefreshMANode(ma->lid, 0, opp, NULL, NULL, 0, 0); //PrintAlignment(stderr,ma->lid,0,-1); break; } } } // Now, want to generate a new MultiAlignT which is an appropriate adjustment of original cma = CreateMultiAlignT(); cma->maID = -1; cma->data = oma->data; cma->consensus = CreateVA_char(GetMANodeLength(ma->lid)+1); cma->quality = CreateVA_char(GetMANodeLength(ma->lid)+1); GetMANodeConsensus(ma->lid, cma->consensus, cma->quality); // no deltas required at this stage // merge the f_lists and u_lists by cloning and concating cma->f_list = Clone_VA(oma->f_list); cma->u_list = Clone_VA(oma->u_list); cma->v_list = Clone_VA(oma->v_list); cma->fdelta = CreateVA_int32(0); cma->udelta = CreateVA_int32(0); { CNS_AlignedContigElement *components; CNS_AlignedContigElement *tcomponents; CNS_AlignedContigElement *contig_component; CNS_AlignedContigElement *aligned_component; int32 ifrag=0; int32 iunitig=0; IntMultiPos *imp; IntUnitigPos *iup; Fragment *frag; int32 ci=0; int32 tc=0; //unitig component index int32 bgn,end,left,right,tmp; int32 range_bgn=0,range_end=0,new_tig=0; components=GetCNS_AlignedContigElement(fragment_positions,cfrag->components); tcomponents=GetCNS_AlignedContigElement(fragment_positions,tfrag->components); // make adjustments to positions if ( append_left) { // fragments within unitig are 0 to tfrag->n_components // and cfrag->n_components-num_unitigs range_bgn = 0; range_end = tfrag->n_components-1; new_tig=cfrag->n_components-num_unitigs; } else { // changed unitig on right // fragments within unitig are (num_frags-tfrag->n_components) to num_frags // and cfrag->n_components-1; range_bgn = (num_frags-(tfrag->n_components-1)); range_end = num_frags; new_tig=cfrag->n_components-1; } while (ci < cfrag->n_components) { contig_component = &components[ci]; if ( contig_component->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT && contig_component->idx.fragment.frgInUnitig == unitig_iid ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else if ( ci == new_tig ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else { aligned_component = contig_component; bgn = aligned_component->position.bgn; end = aligned_component->position.end; frag = cfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } left = (bgn<end)?bgn:end; right = (bgn<end)?end:bgn; //if ( ci == new_tig ) { // left = 0; // right = frag->length; //} left = GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + left )->column_index)->ma_index; right= GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + right-1)->column_index)->ma_index + 1; tmp = bgn; bgn = (bgn<end)?left:right; end = (tmp<end)?right:left; if (aligned_component->frg_or_utg==CNS_ELEMENT_IS_UNITIG) { iup = GetIntUnitigPos(cma->u_list,iunitig); iup->position.bgn = bgn; iup->position.end = end; iup->delta_length = 0; iup->delta = NULL; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif ci++;iunitig++; } else { imp = GetIntMultiPos(cma->f_list,ifrag); imp->ident = aligned_component->idx.fragment.frgIdent; imp->contained = aligned_component->idx.fragment.frgContained; imp->position.bgn = bgn; imp->position.end = end; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif imp->delta_length = 0; imp->delta = NULL; ci++;ifrag++; } } } DeleteMANode(ma->lid); return cma; }
void dumpContigInfo(ChunkInstanceT *contig) { int contigOrientation; MultiAlignT *ma; char *seq1; int len1; VA_TYPE(char) *consensus = CreateVA_char(2048); VA_TYPE(char) *quality = CreateVA_char(2048); fprintf( stderr, "*********************** contig analysis **************************\n"); fprintf( stderr, "analyzing contig: %d\n", contig->id); if (contig->offsetAEnd.mean < contig->offsetBEnd.mean) contigOrientation = 0; else contigOrientation = 1; fprintf(stderr, "contig orientation: %d\t length: %d contig offsetAEnd: %d\t offsetBEnd: %d\n", contigOrientation, (int)contig->bpLength.mean, (int)contig->offsetAEnd.mean, (int)contig->offsetBEnd.mean); ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, ScaffoldGraph->ContigGraph->type == CI_GRAPH); // Get the consensus sequences for the contig from the Store GetConsensus(ScaffoldGraph->ContigGraph, contig->id, consensus, quality); seq1 = Getchar(consensus, 0); len1 = strlen(seq1); if (contigOrientation == 1) reverseComplementSequence(seq1, len1); if (len1 < 5000) { fprintf( stderr, ">contig%d consensus seq (flipped to reflect scaff orientation)\n", contig->id); fprintf( stderr, "%s\n", seq1); } else { char tmpchar = seq1[2500]; seq1[2500] = '\0'; fprintf( stderr, ">contig%d left end\n", contig->id); fprintf( stderr, "%s\n", seq1); seq1[2500] = tmpchar; fprintf( stderr, ">contig%d right end\n", contig->id); fprintf( stderr, "%s\n", seq1 + len1 - 2501); } #if 1 int numUnitigs = GetNumIntUnitigPoss(ma->u_list); fprintf( stderr, "number unitigs: %d\n", numUnitigs); int i; for (i = 0; i < numUnitigs; i++) { IntUnitigPos *upos = GetIntUnitigPos( ma->u_list, i); ChunkInstanceT *unitig = GetGraphNode( ScaffoldGraph->CIGraph, upos->ident); MultiAlignT *uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); IntMultiPos *ump; int icntfrag; fprintf( stderr, " unitig: %d\t num frags: %ld surrogate: %d\n", unitig->id, GetNumIntMultiPoss(uma->f_list), (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate)); if (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate) { fprintf (stderr, " surrogate unitig offsetAEnd: %f, offsetBEnd: %f\n", unitig->offsetAEnd.mean, unitig->offsetBEnd.mean); unitig = GetGraphNode( ScaffoldGraph->CIGraph, unitig->info.CI.baseID); fprintf ( stderr, " using original unitig: %d\n", unitig->id); uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); } // now print out info on the frags in the unitig for (icntfrag = 0; icntfrag < GetNumIntMultiPoss(uma->f_list); icntfrag++) { IntMultiPos *imp = GetIntMultiPos(uma->f_list, icntfrag); CIFragT *frag = GetCIFragT(ScaffoldGraph->CIFrags, imp->ident); fprintf(stderr, " frag: %6d\t contig pos (5p, 3p): %6d, %6d\n", imp->ident, (int) frag->contigOffset5p.mean, (int) frag->contigOffset3p.mean); } } #endif #if 1 CIEdgeT * e; GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES); // FALSE == ITERATOR_VERBOSE while((e = edges.nextRaw()) != NULL) PrintGraphEdge( stderr, ScaffoldGraph->ContigGraph, "Analyzing edge", e, 0); #endif DeleteVA_char(consensus); DeleteVA_char(quality); }
void writeCCO(FILE *asmFile, bool doWrite) { SnapConConMesg cco; GenericMesg pmesg = { &cco, MESG_CCO }; GraphNodeIterator contigs; ContigT *contig; fprintf(stderr, "writeCCO()--\n"); InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph)); if (contig->flags.bits.isChaff) continue; NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) && (contig->scaffoldID == NULLINDEX) && (unitig->info.CI.numInstances > 0)) // Contig is a surrogate instance continue; MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); cco.eaccession = AS_UID_fromInteger(getUID(uidServer)); cco.iaccession = contig->id; cco.placed = ScaffoldGraph->tigStore->getContigStatus(contig->id); cco.length = GetMultiAlignLength(ma); cco.consensus = Getchar(ma->consensus, 0); cco.quality = Getchar(ma->quality, 0); cco.forced = 0; cco.num_pieces = GetNumIntMultiPoss(ma->f_list); cco.num_unitigs = GetNumIntMultiPoss(ma->u_list); cco.num_vars = GetNumIntMultiPoss(ma->v_list); cco.pieces = NULL; cco.unitigs = NULL; cco.vars = NULL; if (cco.consensus == NULL) fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n", cco.iaccession); assert(cco.consensus != NULL); if (cco.length != strlen(cco.consensus)) fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n", cco.iaccession, cco.length, strlen(cco.consensus)); assert(cco.length == strlen(cco.consensus)); if (cco.num_pieces > 0) { cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos)); for(int32 i=0; i<cco.num_pieces; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); cco.pieces[i].type = imp->type; cco.pieces[i].eident = FRGmap.lookup(imp->ident); cco.pieces[i].delta_length = imp->delta_length; cco.pieces[i].position = imp->position; cco.pieces[i].delta = imp->delta; } } if (cco.num_unitigs > 0) { cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos)); for(int32 i=0; i<cco.num_unitigs; i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); cco.unitigs[i].type = imp->type; cco.unitigs[i].eident = UTGmap.lookup(imp->ident); cco.unitigs[i].position = imp->position; cco.unitigs[i].delta = imp->delta; cco.unitigs[i].delta_length = imp->delta_length; } } if (cco.num_vars > 0) { cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar)); for(int32 i=0; i<cco.num_vars; i++) { IntMultiVar *imv = GetIntMultiVar(ma->v_list, i); cco.vars[i].var_id = imv->var_id; cco.vars[i].phased_id = imv->phased_id; cco.vars[i].position = imv->position; cco.vars[i].num_reads = imv->num_reads; cco.vars[i].num_alleles = imv->num_alleles; cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed; cco.vars[i].min_anchor_size = imv->min_anchor_size; cco.vars[i].var_length = imv->var_length; cco.vars[i].alleles = imv->alleles; cco.vars[i].var_seq_memory = imv->var_seq_memory; cco.vars[i].read_id_memory = imv->read_id_memory; cco.vars[i].enc_num_reads = NULL; cco.vars[i].enc_weights = NULL; cco.vars[i].enc_var_seq = NULL; cco.vars[i].enc_read_ids = NULL; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(cco.pieces); safe_free(cco.unitigs); safe_free(cco.vars); CCOmap.add(cco.iaccession, cco.eaccession); } }