void writeAFGFromTigStore(FILE *asmFile, bool doWrite) { AugFragMesg afg; GenericMesg pmesg = { &afg, MESG_AFG }; gkFragment fr; fprintf(stderr, "writeAFGFromTigStore()--\n"); for (uint32 tigID = 0; tigID < ScaffoldGraph->tigStore->numUnitigs(); tigID++) { MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(tigID, TRUE); if (ma == NULL) continue; for (uint32 i=0; i<GetNumIntMultiPoss(ma->f_list); i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); ScaffoldGraph->gkpStore->gkStore_getFragment(imp->ident, &fr, GKFRAGMENT_INF); afg.eaccession = fr.gkFragment_getReadUID(); afg.iaccession = fr.gkFragment_getReadIID(); afg.mate_status = UNASSIGNED_MATE; afg.chaff = 0; afg.clear_rng.bgn = fr.gkFragment_getClearRegionBegin(); afg.clear_rng.end = fr.gkFragment_getClearRegionEnd (); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); FRGmap.add(afg.iaccession, afg.eaccession); if ((AS_UID_isString(afg.eaccession) == FALSE) && (uidMin <= AS_UID_toInteger(afg.eaccession))) uidMin = AS_UID_toInteger(afg.eaccession) + 1; } } }
void writeUTGFromCGW(FILE *asmFile, bool doWrite) { SnapUnitigMesg utg; GenericMesg pmesg = { &utg, MESG_UTG }; GraphNodeIterator unitigs; ChunkInstanceT *ci; fprintf(stderr, "writeUTGFromCGW()--\n"); InitGraphNodeIterator(&unitigs, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT); while ((ci = NextGraphNodeIterator(&unitigs)) != NULL) { assert(ci->id >= 0); assert(ci->id < GetNumGraphNodes(ScaffoldGraph->CIGraph)); if (ci->flags.bits.isChaff) // Don't write chaff continue; if (ci->type == RESOLVEDREPEATCHUNK_CGW) // Don't write surrogate instances continue; buildUTGMessage(ci->id, &utg); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(utg.f_list); UTGmap.add(utg.iaccession, utg.eaccession); } }
int main(int argc, char **argv) { OverlapStore *ovs = NULL; argc = AS_configure(argc, argv); int arg=1; int err=0; while (arg < argc) { if (strcmp(argv[arg], "-O") == 0) { ovs = AS_OVS_openOverlapStore(argv[++arg]); } else { err++; } arg++; } if ((ovs == NULL) || (err)) { fprintf(stderr, "usage: %s -O ovlStore < unitigs.cgb > fixedUnitigs.cgb\n", argv[0]); exit(1); } GenericMesg *pmesg = NULL; while ((ReadProtoMesg_AS(stdin, &pmesg) != EOF)) { if (pmesg->t == MESG_IUM) fixUnitig((IntUnitigMesg *)(pmesg->m), ovs); WriteProtoMesg_AS(stdout, pmesg); } exit(0); }
void writeMDI(FILE *asmFile, bool doWrite) { SnapMateDistMesg mdi; GenericMesg pmesg = { &mdi, MESG_MDI }; fprintf(stderr, "writeMDI()--\n"); for (int32 i=1; i<GetNumDistTs(ScaffoldGraph->Dists); i++){ DistT *dptr = GetDistT(ScaffoldGraph->Dists, i); // Believe whatever estimate is here. We used to reset to zero and the input (except we had // already munged the input stddev) if there were 30 or fewer samples. mdi.erefines = ScaffoldGraph->gkpStore->gkStore_getLibrary(i)->libraryUID; mdi.irefines = i; mdi.mean = dptr->mu; mdi.stddev = dptr->sigma; mdi.min = INT32_MIN; mdi.max = INT32_MAX; mdi.num_buckets = 0; mdi.histogram = NULL; // The histogram does not get stored in a checkpoint. If the current run of CGW did not have // enough samples to recompute the histogram, we have to live without it if (dptr->bnum > 0) { mdi.min = dptr->min; mdi.max = dptr->max; mdi.num_buckets = dptr->bnum; mdi.histogram = dptr->histogram; } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); MDImap.add(mdi.irefines, mdi.erefines); if ((AS_UID_isString(mdi.erefines) == FALSE) && (uidMin <= AS_UID_toInteger(mdi.erefines))) uidMin = AS_UID_toInteger(mdi.erefines) + 1; safe_free(dptr->histogram); dptr->histogram = NULL; dptr->numSamples = 0; dptr->bnum = 0; } }
void writeUTGFromTigStore(FILE *asmFile, bool doWrite) { SnapUnitigMesg utg; GenericMesg pmesg = { &utg, MESG_UTG }; fprintf(stderr, "writeUTGFromTigStore()--\n"); for (uint32 tigID = 0; tigID < ScaffoldGraph->tigStore->numUnitigs(); tigID++) { if (buildUTGMessage(tigID, &utg)) { if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(utg.f_list); UTGmap.add(utg.iaccession, utg.eaccession); } } }
void fixUnitig(IntUnitigMesg *iunitig, OverlapStore *ovs) { int thisFrag; int thatFrag; for (thisFrag=1; thisFrag<iunitig->num_frags; thisFrag++) { int failed = updateFragmentWithParent(iunitig, thisFrag, ovs); // If that failed, the iunitig is guaranteed good up until the // 'failed' fragment. It'll get written out back in main; all we // need to do is fix up the rest of the fragments, possibly into // multiple unitigs. if (failed != -1) { IntUnitigMesg junitig = *iunitig; assert(failed == thisFrag); // Make the iacc big, just to label this as needing a new iacc. junitig.iaccession += 1000000000; junitig.num_frags = iunitig->num_frags - failed; junitig.f_list = iunitig->f_list + failed; junitig.f_list[0].parent = 0; junitig.f_list[0].ahang = 0; junitig.f_list[0].bhang = 0; junitig.f_list[0].contained = 0; iunitig->num_frags = failed; fixUnitig(&junitig, ovs); GenericMesg pmesg; pmesg.t = MESG_IUM; pmesg.m = &junitig; WriteProtoMesg_AS(stdout, &pmesg); } } }
void writeAFGFromCGW(FILE *asmFile, bool doWrite) { AugFragMesg afg; GenericMesg pmesg = { &afg, MESG_AFG }; gkFragment fr; gkStream *fs = new gkStream(ScaffoldGraph->gkpStore, 0, 0, GKFRAGMENT_INF); fprintf(stderr, "writeAFGFromCGW()--\n"); for (int32 i=1; i<GetNumCIFragTs(ScaffoldGraph->CIFrags); i++) { CIFragT *cifrag = GetCIFragT(ScaffoldGraph->CIFrags, i); fs->next(&fr); if (cifrag->flags.bits.isDeleted) continue; assert(cifrag->read_iid == i); assert(cifrag->read_iid == fr.gkFragment_getReadIID()); afg.eaccession = fr.gkFragment_getReadUID(); afg.iaccession = i; afg.mate_status = cifrag->flags.bits.mateDetail; afg.chaff = cifrag->flags.bits.isChaff; afg.clear_rng.bgn = fr.gkFragment_getClearRegionBegin(); afg.clear_rng.end = fr.gkFragment_getClearRegionEnd (); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); FRGmap.add(afg.iaccession, afg.eaccession); if ((AS_UID_isString(afg.eaccession) == FALSE) && (uidMin <= AS_UID_toInteger(afg.eaccession))) uidMin = AS_UID_toInteger(afg.eaccession) + 1; } delete fs; }
void writeAMP(FILE *asmFile, bool doWrite) { AugMatePairMesg amp; GenericMesg pmesg = { &, MESG_AMP }; fprintf(stderr, "writeAMP()--\n"); for (int32 i=1; i<GetNumCIFragTs(ScaffoldGraph->CIFrags); i++) { CIFragT *cif1 = GetCIFragT(ScaffoldGraph->CIFrags, i); CIFragT *cif2 = NULL; if (cif1->flags.bits.isDeleted) continue; if (cif1->mate_iid == 0) continue; cif2 = GetCIFragT(ScaffoldGraph->CIFrags, cif1->mate_iid); if (cif2->flags.bits.isDeleted) continue; if (cif1->read_iid > cif2->read_iid) continue; assert(cif1->flags.bits.edgeStatus == cif2->flags.bits.edgeStatus); assert(cif1->flags.bits.mateDetail == cif2->flags.bits.mateDetail); amp.fragment1 = FRGmap.lookup(cif1->read_iid); amp.fragment2 = FRGmap.lookup(cif2->read_iid); amp.mate_status = cif1->flags.bits.mateDetail; if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); } }
// For every unitig, report the best overlaps contained in the // unitig, and all overlaps contained in the unitig. // // Wow, this is ancient. // void writeOverlapsUsed(UnitigVector &unitigs, char *fileprefix) { char filename[FILENAME_MAX] = {0}; #if 0 GenericMesg pmesg; OverlapMesg omesg; #endif sprintf(filename, "%s.unused.ovl", fileprefix); FILE *file = fopen(filename, "w"); assert(file != NULL); #if 0 for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg == NULL) continue; for (uint32 fi=0; fi<utg->ufpath.size(); fi++) { ufNode *frg = &utg->ufpath[fi]; // Where is our best overlap? Contained or dovetail? BestEdgeOverlap *bestedge5 = OG->getBestEdgeOverlap(frg->ident, false); BestEdgeOverlap *bestedge3 = OG->getBestEdgeOverlap(frg->ident, true); int bestident5 = 0; int bestident3 = 0; if (bestedge5) { bestident5 = bestedge5->fragId(); if ((bestident5 > 0) && (utg->fragIn(bestident5) != utg->id())) { omesg.aifrag = frg->ident; omesg.bifrag = bestident5; omesg.ahg = bestedge5->ahang(); omesg.bhg = bestedge5->bhang(); omesg.orientation.setIsUnknown(); omesg.overlap_type = AS_DOVETAIL; omesg.quality = 0.0; omesg.min_offset = 0; omesg.max_offset = 0; omesg.polymorph_ct = 0; omesg.alignment_trace = NULL; #ifdef AS_MSG_USE_OVL_DELTA omesg.alignment_delta = NULL; #endif // This overlap is off of the 5' end of this fragment. if (bestedge5->frag3p() == false) omesg.orientation.setIsOuttie(); if (bestedge5->frag3p() == true) omesg.orientation.setIsAnti(); pmesg.t = MESG_OVL; pmesg.m = &omesg; WriteProtoMesg_AS(file, &pmesg); } } if (bestedge3) { bestident3 = bestedge3->fragId(); if ((bestident3 > 0) && (utg->fragIn(bestident3) != utg->id())) { omesg.aifrag = frg->ident; omesg.bifrag = bestident3; omesg.ahg = bestedge3->ahang(); omesg.bhg = bestedge3->bhang(); omesg.orientation.setIsUnknown(); omesg.overlap_type = AS_DOVETAIL; omesg.quality = 0.0; omesg.min_offset = 0; omesg.max_offset = 0; omesg.polymorph_ct = 0; omesg.alignment_trace = NULL; #ifdef AS_MSG_USE_OVL_DELTA omesg.alignment_delta = NULL; #endif // This overlap is off of the 3' end of this fragment. if (bestedge3->frag3p() == false) omesg.orientation.setIsNormal(); if (bestedge3->frag3p() == true) omesg.orientation.setIsInnie(); pmesg.t = MESG_OVL; pmesg.m = &omesg; WriteProtoMesg_AS(file, &pmesg); } } } } #endif fclose(file); }
int main(int argc, char **argv) { int msglist[NUM_OF_REC_TYPES + 1]; FILE *outfile[NUM_OF_REC_TYPES + 1]; off_t count[NUM_OF_REC_TYPES + 1]; off_t size[NUM_OF_REC_TYPES + 1]; int i; for (i=0; i<=NUM_OF_REC_TYPES; i++) { msglist[i] = 0; outfile[i] = 0L; count[i] = 0; size[i] = 0; } int arg = 1; int inc = 0; int err = 0; int msg = 0; argc = AS_configure(argc, argv); while (arg < argc) { if (strcmp(argv[arg], "-i") == 0) { inc = 1; } else if (strcmp(argv[arg], "-x") == 0) { inc = 0; } else if (strcmp(argv[arg], "-o") == 0) { errno = 0; FILE *F = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "%s: failed to open output file '%s': %s\n", argv[0], argv[arg], strerror(errno)), exit(1); // Depending on the include flag, we either write all messages // listed in our msglist (or write all message not in the // msglist) to the freshly opened file. // if (inc) { // Include message i in the output if it was listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] > 0)) outfile[i] = F; } else { // Include message i in the output if it was not listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] == 0)) outfile[i] = F; } for (i=0; i<=NUM_OF_REC_TYPES; i++) msglist[i] = 0; } else if (strcmp(argv[arg], "-m") == 0) { int type = GetMessageType(argv[++arg]); if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) { msglist[type]++; msg++; } else { fprintf(stderr, "%s: invalid message type '%s'.\n", argv[0], argv[arg]); err = 1; } } else if (strcmp(argv[arg], "-h") == 0) { err = 1; } else { int type = GetMessageType(argv[arg]); if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) { msglist[type]++; msg++; } else { fprintf(stderr, "%s: invalid option '%s'.\n", argv[0], argv[arg]); err = 1; } } arg++; } if (err) usage(argv[0]), exit(1); // Assume everything else goes to stdout. We need to obey the inc // flag, still, though. // if (inc) { // Include message i in the output if it was listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] > 0)) outfile[i] = stdout; } else { // Include message i in the output if it was not listed for (i=1; i<=NUM_OF_REC_TYPES; i++) if ((outfile[i] == NULL) && (msglist[i] == 0)) outfile[i] = stdout; } GenericMesg *pmesg; off_t currPos = 0; off_t prevPos = 0; while (ReadProtoMesg_AS(stdin, &pmesg) != EOF) { assert(pmesg->t <= NUM_OF_REC_TYPES); currPos = AS_UTL_ftell(stdin); if (outfile[pmesg->t] != NULL) { count[pmesg->t]++; size[pmesg->t] += currPos - prevPos; WriteProtoMesg_AS(outfile[pmesg->t], pmesg); } prevPos = currPos; } for (i=0; i<=NUM_OF_REC_TYPES; i++) if (count[i] > 0) fprintf(stderr, "%s num "F_OFF_T" size "F_OFF_T" avg %f\n", MessageTypeName[i], count[i], size[i], (double)size[i] / count[i]); exit(0); }
void writeSLK(FILE *asmFile, bool doWrite) { SnapScaffoldLinkMesg slk; GenericMesg pmesg = { &slk, MESG_SLK }; GraphNodeIterator scaffolds; CIScaffoldT *scaffold; CIScaffoldT *scafmate; fprintf(stderr, "writeSLK()--\n"); InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT); while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) { GraphEdgeIterator edges(ScaffoldGraph->ScaffoldGraph, scaffold->id, ALL_END, ALL_EDGES); CIEdgeT *edge; CIEdgeT *redge; while((edge = edges.nextMerged()) != NULL) { if (edge->idA != scaffold->id) continue; scafmate = GetGraphNode(ScaffoldGraph->ScaffoldGraph, edge->idB); assert(!isOverlapEdge(edge)); slk.escaffold1 = SCFmap.lookup(scaffold->id); slk.escaffold2 = SCFmap.lookup(scafmate->id); slk.orientation = edge->orient; slk.mean_distance = edge->distance.mean; slk.std_deviation = sqrt(edge->distance.variance); slk.num_contributing = edge->edgesContributing; int edgeTotal = slk.num_contributing; int edgeCount = 0; if(edgeTotal < 2) continue; slk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * slk.num_contributing); if (edge->flags.bits.isRaw) { assert(edgeTotal <= 1); // sanity check if (edgeTotal == 1) { slk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); slk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); }else{ slk.jump_list[edgeCount].in1 = AS_UID_undefined(); slk.jump_list[edgeCount].in2 = AS_UID_undefined(); } slk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } else { redge = edge; assert(redge->flags.bits.isRaw == FALSE); assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->ScaffoldGraph,redge->nextRawEdge); assert(!isOverlapEdge(redge)); slk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); slk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); slk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(slk.jump_list); } } }
void writeSCF(FILE *asmFile, bool doWrite) { SnapScaffoldMesg scf; GenericMesg pmesg = { &scf, MESG_SCF }; GraphNodeIterator scaffolds; CIScaffoldT *scaffold; fprintf(stderr, "writeSCF()--\n"); InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT); while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) { if(scaffold->type != REAL_SCAFFOLD) continue; assert(scaffold->info.Scaffold.numElements > 0); scf.eaccession = AS_UID_fromInteger(getUID(uidServer)); scf.iaccession = scaffold->id; scf.num_contig_pairs = scaffold->info.Scaffold.numElements - 1; scf.contig_pairs = (SnapContigPairs *)safe_malloc(sizeof(SnapContigPairs) * scaffold->info.Scaffold.numElements); CIScaffoldTIterator contigs; ChunkInstanceT *contigCurr; ChunkInstanceT *contigLast; InitCIScaffoldTIterator(ScaffoldGraph, scaffold, TRUE, FALSE, &contigs); contigLast = NextCIScaffoldTIterator(&contigs); SequenceOrient orientLast; SequenceOrient orientCurr; orientLast.setIsForward(contigLast->offsetAEnd.mean < contigLast->offsetBEnd.mean); assert(contigLast->scaffoldID == scaffold->id); if (scf.num_contig_pairs == 0) { scf.contig_pairs[0].econtig1 = CCOmap.lookup(contigLast->id); scf.contig_pairs[0].econtig2 = CCOmap.lookup(contigLast->id); scf.contig_pairs[0].mean = 0.0; scf.contig_pairs[0].stddev = 0.0; scf.contig_pairs[0].orient.setIsAB_AB(); // got to put something } else { int32 pairCount = 0; while ((contigCurr = NextCIScaffoldTIterator(&contigs)) != NULL) { assert(pairCount < scf.num_contig_pairs); assert(contigCurr->scaffoldID == scaffold->id); scf.contig_pairs[pairCount].econtig1 = CCOmap.lookup(contigLast->id); scf.contig_pairs[pairCount].econtig2 = CCOmap.lookup(contigCurr->id); SequenceOrient orientCurr; orientCurr.setIsForward(contigCurr->offsetAEnd.mean < contigCurr->offsetBEnd.mean); if (orientLast.isForward()) { if (orientCurr.isForward()) { scf.contig_pairs[pairCount].mean = contigCurr->offsetAEnd.mean - contigLast->offsetBEnd.mean; scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetAEnd.variance - contigLast->offsetBEnd.variance); scf.contig_pairs[pairCount].orient.setIsAB_AB(); } else { //orientCurr == B_A scf.contig_pairs[pairCount].mean = contigCurr->offsetBEnd.mean - contigLast->offsetBEnd.mean; scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetBEnd.variance - contigLast->offsetBEnd.variance); scf.contig_pairs[pairCount].orient.setIsAB_BA(); } } else { //orientLast == B_A if (orientCurr.isForward()) { scf.contig_pairs[pairCount].mean = contigCurr->offsetAEnd.mean - contigLast->offsetAEnd.mean; scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetAEnd.variance - contigLast->offsetAEnd.variance); scf.contig_pairs[pairCount].orient.setIsBA_AB(); } else { //orientCurr == B_A scf.contig_pairs[pairCount].mean = contigCurr->offsetBEnd.mean - contigLast->offsetAEnd.mean; scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetBEnd.variance - contigLast->offsetAEnd.variance); scf.contig_pairs[pairCount].orient.setIsBA_BA(); } } contigLast = contigCurr; orientLast = orientCurr; ++pairCount; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); SCFmap.add(scf.iaccession, scf.eaccession); safe_free(scf.contig_pairs); } }
void writeCLK(FILE *asmFile, bool doWrite) { SnapContigLinkMesg clk; GenericMesg pmesg = { &clk, MESG_CLK }; GraphNodeIterator nodes; ContigT *ctg; fprintf(stderr, "writeCLK()--\n"); InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((ctg = NextGraphNodeIterator(&nodes)) != NULL) { if (ctg->flags.bits.isChaff) continue; if (SurrogatedSingleUnitigContig(ctg)) continue; GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, ctg->id, ALL_END, ALL_EDGES); CIEdgeT *edge; while((edge = edges.nextMerged()) != NULL){ if (edge->idA != ctg->id) continue; ContigT *mate = GetGraphNode(ScaffoldGraph->ContigGraph, edge->idB); if(mate->flags.bits.isChaff) continue; if (SurrogatedSingleUnitigContig(mate)) continue; clk.econtig1 = CCOmap.lookup(edge->idA); clk.econtig2 = CCOmap.lookup(edge->idB); clk.orientation = edge->orient; // Don't need to map orientation, always using canonical orientation clk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP; switch (GetEdgeStatus(edge)) { case LARGE_VARIANCE_EDGE_STATUS: case UNKNOWN_EDGE_STATUS: case INTER_SCAFFOLD_EDGE_STATUS: clk.status = AS_UNKNOWN_IN_ASSEMBLY; break; case TENTATIVE_TRUSTED_EDGE_STATUS: case TRUSTED_EDGE_STATUS: clk.status = AS_IN_ASSEMBLY; break; case TENTATIVE_UNTRUSTED_EDGE_STATUS: case UNTRUSTED_EDGE_STATUS: clk.status = AS_BAD; break; default: assert(0 /* Invalid edge status */); } clk.is_possible_chimera = edge->flags.bits.isPossibleChimera; clk.mean_distance = edge->distance.mean; clk.std_deviation = sqrt(edge->distance.variance); clk.num_contributing = edge->edgesContributing; uint32 edgeCount = 0; uint32 edgeTotal = clk.num_contributing; if ((edgeTotal == 1) && (clk.overlap_type == AS_OVERLAP) && (GlobalData->outputOverlapOnlyContigEdges == FALSE)) // don't output pure overlap edges continue; clk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal); if (edge->flags.bits.isRaw) { assert(edgeTotal == 1); if (clk.overlap_type == AS_NO_OVERLAP) { clk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); clk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); clk.jump_list[edgeCount].type.setIsMatePair(); } else { assert(GlobalData->outputOverlapOnlyContigEdges); clk.jump_list[edgeCount].in1 = AS_UID_undefined(); clk.jump_list[edgeCount].in2 = AS_UID_undefined(); clk.jump_list[edgeCount].type.setIsOverlap(); } edgeCount++; } else { CIEdgeT *redge = edge; assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->ContigGraph, redge->nextRawEdge); if (isOverlapEdge(redge)) { // overlap edges don't count edgeTotal--; continue; } clk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); clk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); clk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(clk.jump_list); } } }
void writeCCO(FILE *asmFile, bool doWrite) { SnapConConMesg cco; GenericMesg pmesg = { &cco, MESG_CCO }; GraphNodeIterator contigs; ContigT *contig; fprintf(stderr, "writeCCO()--\n"); InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph)); if (contig->flags.bits.isChaff) continue; NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) && (contig->scaffoldID == NULLINDEX) && (unitig->info.CI.numInstances > 0)) // Contig is a surrogate instance continue; MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); cco.eaccession = AS_UID_fromInteger(getUID(uidServer)); cco.iaccession = contig->id; cco.placed = ScaffoldGraph->tigStore->getContigStatus(contig->id); cco.length = GetMultiAlignLength(ma); cco.consensus = Getchar(ma->consensus, 0); cco.quality = Getchar(ma->quality, 0); cco.forced = 0; cco.num_pieces = GetNumIntMultiPoss(ma->f_list); cco.num_unitigs = GetNumIntMultiPoss(ma->u_list); cco.num_vars = GetNumIntMultiPoss(ma->v_list); cco.pieces = NULL; cco.unitigs = NULL; cco.vars = NULL; if (cco.consensus == NULL) fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n", cco.iaccession); assert(cco.consensus != NULL); if (cco.length != strlen(cco.consensus)) fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n", cco.iaccession, cco.length, strlen(cco.consensus)); assert(cco.length == strlen(cco.consensus)); if (cco.num_pieces > 0) { cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos)); for(int32 i=0; i<cco.num_pieces; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); cco.pieces[i].type = imp->type; cco.pieces[i].eident = FRGmap.lookup(imp->ident); cco.pieces[i].delta_length = imp->delta_length; cco.pieces[i].position = imp->position; cco.pieces[i].delta = imp->delta; } } if (cco.num_unitigs > 0) { cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos)); for(int32 i=0; i<cco.num_unitigs; i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); cco.unitigs[i].type = imp->type; cco.unitigs[i].eident = UTGmap.lookup(imp->ident); cco.unitigs[i].position = imp->position; cco.unitigs[i].delta = imp->delta; cco.unitigs[i].delta_length = imp->delta_length; } } if (cco.num_vars > 0) { cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar)); for(int32 i=0; i<cco.num_vars; i++) { IntMultiVar *imv = GetIntMultiVar(ma->v_list, i); cco.vars[i].var_id = imv->var_id; cco.vars[i].phased_id = imv->phased_id; cco.vars[i].position = imv->position; cco.vars[i].num_reads = imv->num_reads; cco.vars[i].num_alleles = imv->num_alleles; cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed; cco.vars[i].min_anchor_size = imv->min_anchor_size; cco.vars[i].var_length = imv->var_length; cco.vars[i].alleles = imv->alleles; cco.vars[i].var_seq_memory = imv->var_seq_memory; cco.vars[i].read_id_memory = imv->read_id_memory; cco.vars[i].enc_num_reads = NULL; cco.vars[i].enc_weights = NULL; cco.vars[i].enc_var_seq = NULL; cco.vars[i].enc_read_ids = NULL; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(cco.pieces); safe_free(cco.unitigs); safe_free(cco.vars); CCOmap.add(cco.iaccession, cco.eaccession); } }
void writeULK(FILE *asmFile, bool doWrite) { SnapUnitigLinkMesg ulk; GenericMesg pmesg = { & ulk, MESG_ULK }; GraphNodeIterator nodes; ChunkInstanceT *ci; fprintf(stderr, "writeULK()--\n"); InitGraphNodeIterator(&nodes, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT); while ((ci = NextGraphNodeIterator(&nodes)) != NULL) { assert(ci->type != CONTIG_CGW); if (ci->type == RESOLVEDREPEATCHUNK_CGW) continue; if (ci->flags.bits.isChaff) continue; GraphEdgeIterator edges(ScaffoldGraph->CIGraph, ci->id, ALL_END, ALL_EDGES); CIEdgeT *edge; while ((edge = edges.nextMerged()) != NULL) { if (edge->idA != ci->id || edge->flags.bits.isInferred || edge->flags.bits.isInferredRemoved || edge->flags.bits.isMarkedForDeletion) continue; ChunkInstanceT *mi = GetGraphNode(ScaffoldGraph->CIGraph, edge->idB); if (mi->flags.bits.isChaff) continue; ulk.eunitig1 = UTGmap.lookup(edge->idA); // == ci->id ulk.eunitig2 = UTGmap.lookup(edge->idB); ulk.orientation = edge->orient; // Don't need to map orientation, always using canonical orientation ulk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP; ulk.is_possible_chimera = edge->flags.bits.isPossibleChimera; ulk.mean_distance = edge->distance.mean; ulk.std_deviation = sqrt(edge->distance.variance); ulk.num_contributing = edge->edgesContributing; ulk.status = AS_UNKNOWN_IN_ASSEMBLY; uint32 edgeCount = 0; uint32 edgeTotal = ulk.num_contributing; if ((edgeTotal == 1) && (ulk.overlap_type == AS_OVERLAP)) // don't output pure overlap edges continue; // Look through the fragment pairs in this edge to decide the status of the link. CIEdgeT *redge = (edge->flags.bits.isRaw) ? edge : GetGraphEdge(ScaffoldGraph->CIGraph, edge->nextRawEdge); int numBad = 0; int numGood = 0; int numUnknown = 0; for (; redge != NULL; redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge)) { if(isOverlapEdge(redge)) continue; CIFragT *fragA = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragA); CIFragT *fragB = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragB); assert(fragA->flags.bits.edgeStatus == fragB->flags.bits.edgeStatus); if ((fragA->flags.bits.edgeStatus == UNTRUSTED_EDGE_STATUS) || (fragA->flags.bits.edgeStatus == TENTATIVE_UNTRUSTED_EDGE_STATUS)) numBad++; else if ((fragA->flags.bits.edgeStatus == TRUSTED_EDGE_STATUS) || (fragA->flags.bits.edgeStatus == TENTATIVE_TRUSTED_EDGE_STATUS)) numGood++; else numUnknown++; } if (numBad > 0) ulk.status = AS_BAD; else if (numGood > 0) ulk.status = AS_IN_ASSEMBLY; else ulk.status = AS_UNKNOWN_IN_ASSEMBLY; ulk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal); if (edge->flags.bits.isRaw) { assert(edgeTotal == 1); ulk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA); ulk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB); ulk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } else { assert(edgeTotal > 0); redge = edge; assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge while (redge->nextRawEdge != NULLINDEX) { redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge); if (isOverlapEdge(redge)) { // overlap edges don't count edgeTotal--; continue; } ulk.jump_list[edgeCount].in1 = FRGmap.lookup(redge->fragA); ulk.jump_list[edgeCount].in2 = FRGmap.lookup(redge->fragB); ulk.jump_list[edgeCount].type.setIsMatePair(); edgeCount++; } } assert(edgeCount == edgeTotal); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(ulk.jump_list); } } }
int32 main(int32 argc, char **argv) { int32 arg = 1; int32 err = 0; int32 hlp = 0; char * gkpStoreName = NULL; int32 gkpStorePart = 0; char * msgFile = NULL; char * outputFileName= NULL; char * seqAn = NULL; char * wrkDir = NULL; char * seqStoreName = NULL; int32 seqStoreVer = 0; int32 seqStorePart = 0; argc = AS_configure(argc, argv); while (arg < argc) { if (strcmp(argv[arg], "-c") == 0) { msgFile = argv[++arg]; } else if (strcmp(argv[arg], "-G") == 0) { gkpStoreName = argv[++arg]; } else if (strcmp(argv[arg], "-S") == 0) { gkpStorePart = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-o") == 0) { outputFileName = argv[++arg]; } else if (strcmp(argv[arg], "-s") == 0) { seqAn = argv[++arg]; } else if (strcmp(argv[arg], "-w") == 0) { wrkDir = argv[++arg]; } else if (strcmp(argv[arg], "-u") == 0) { seqStoreName = argv[++arg]; } else if (strcmp(argv[arg], "-V") == 0) { seqStoreVer = atoi(argv[++arg]); } else if (strcmp(argv[arg], "-p") == 0) { seqStorePart = atoi(argv[++arg]); } else { err++; } arg++; } if ((err) || (gkpStoreName == NULL) || (msgFile == NULL) || (outputFileName == NULL) || seqAn == NULL) { fprintf(stderr, "USAGE: SeqAn_CNS -G <gkpStore> -c <input.cgb> -o <output.cgi> -s <seqan_executable> [-u seqstore, required for contig consensus] [-w working directory]\n"); exit(1); } gkStore *gkpStore = new gkStore(gkpStoreName, FALSE, FALSE); gkpStore->gkStore_loadPartition(gkpStorePart); gkFragment fr; GenericMesg *pmesg; tSequenceDB *sequenceDB = NULL; FILE *infp = fopen(msgFile,"r"); FILE *tempReads; FILE *outfp = fopen(outputFileName, "w"); char fileName[AS_SEQAN_MAX_BUFFER_LENGTH]; char *prefix = outputFileName; getFileName(prefix, wrkDir, AS_SEQAN_INPUT_NAME, fileName); int32 i = 0; while ((EOF != ReadProtoMesg_AS(infp, &pmesg))) { int32 freeMem = 0; if (pmesg->t == MESG_IUM) { IntUnitigMesg *ium_mesg = (IntUnitigMesg *)pmesg->m; if (strlen(ium_mesg->consensus) == 0) { tempReads = fopen(fileName,"w"); for (i =0; i < ium_mesg->num_frags; i++) { // get the fragment sequence gkpStore->gkStore_getFragment(ium_mesg->f_list[i].ident, &fr, GKFRAGMENT_QLT); uint32 clrBeg = fr.gkFragment_getClearRegionBegin(); uint32 clrEnd = fr.gkFragment_getClearRegionEnd (); char *seqStart = fr.gkFragment_getSequence(); char *seq = seqStart+clrBeg; seq[clrEnd] = 0; AS_UTL_writeFastA(tempReads, seq, clrEnd-clrBeg, ">"F_IID","F_IID"\n", ium_mesg->f_list[i].position.bgn, ium_mesg->f_list[i].position.end); } fclose(tempReads); updateRecord(ium_mesg, fileName, seqAn, prefix, wrkDir); freeMem = 1; } WriteProtoMesg_AS(outfp, pmesg); if (freeMem) { safe_free(ium_mesg->consensus); safe_free(ium_mesg->quality); } } else if (pmesg->t == MESG_ICM) { IntConConMesg *icm_mesg = (IntConConMesg *)pmesg->m; if (seqStoreName == NULL) { fprintf(stderr, "USAGE: The -u option is required for contig consensus\n"); exit(1); } if (sequenceDB == NULL) { sequenceDB = openSequenceDB(seqStoreName, FALSE, seqStoreVer); openSequenceDBPartition(sequenceDB, seqStorePart); } if (strlen(icm_mesg->consensus) == 0) { tempReads = fopen(fileName,"w"); for (i =0; i < icm_mesg->num_pieces; i++) { // get the fragment sequence gkpStore->gkStore_getFragment(icm_mesg->pieces[i].ident, &fr, GKFRAGMENT_QLT); uint32 clrBeg = fr.gkFragment_getClearRegionBegin(); uint32 clrEnd = fr.gkFragment_getClearRegionEnd (); char *seqStart = fr.gkFragment_getSequence(); char *seq = seqStart+clrBeg; seq[clrEnd] = 0; AS_UTL_writeFastA(tempReads, seq, clrEnd-clrBeg, ">"F_IID","F_IID"\n", icm_mesg->pieces[i].position.bgn, icm_mesg->pieces[i].position.end); } // now handle the unitig messages for (i =0; i < icm_mesg->num_unitigs; i++) { VA_TYPE(char) *ungappedSequence = CreateVA_char(0); VA_TYPE(char) *ungappedQuality = CreateVA_char(0); MultiAlignT *uma = loadMultiAlignTFromSequenceDB(sequenceDB, icm_mesg->unitigs[i].ident, 1); assert(uma != NULL); GetMultiAlignUngappedConsensus(uma, ungappedSequence, ungappedQuality); char * seq = Getchar(ungappedSequence,0); AS_UTL_writeFastA(tempReads, seq, strlen(seq), ">"F_IID","F_IID"\n", icm_mesg->unitigs[i].position.bgn, icm_mesg->unitigs[i].position.end); } fclose(tempReads); updateICMRecord(icm_mesg, fileName, seqAn, prefix, wrkDir); freeMem = 1; } WriteProtoMesg_AS(outfp, pmesg); if (freeMem) { safe_free(icm_mesg->consensus); safe_free(icm_mesg->quality); } } } fclose(infp); fclose(outfp); return 0; }