void writeAFGFromTigStore(FILE *asmFile, bool doWrite) { AugFragMesg afg; GenericMesg pmesg = { &afg, MESG_AFG }; gkFragment fr; fprintf(stderr, "writeAFGFromTigStore()--\n"); for (uint32 tigID = 0; tigID < ScaffoldGraph->tigStore->numUnitigs(); tigID++) { MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(tigID, TRUE); if (ma == NULL) continue; for (uint32 i=0; i<GetNumIntMultiPoss(ma->f_list); i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); ScaffoldGraph->gkpStore->gkStore_getFragment(imp->ident, &fr, GKFRAGMENT_INF); afg.eaccession = fr.gkFragment_getReadUID(); afg.iaccession = fr.gkFragment_getReadIID(); afg.mate_status = UNASSIGNED_MATE; afg.chaff = 0; afg.clear_rng.bgn = fr.gkFragment_getClearRegionBegin(); afg.clear_rng.end = fr.gkFragment_getClearRegionEnd (); if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); FRGmap.add(afg.iaccession, afg.eaccession); if ((AS_UID_isString(afg.eaccession) == FALSE) && (uidMin <= AS_UID_toInteger(afg.eaccession))) uidMin = AS_UID_toInteger(afg.eaccession) + 1; } } }
bool buildUTGMessage(int32 ID, SnapUnitigMesg *utg) { MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(ID, TRUE); if (ma == NULL) return(false); utg->eaccession = AS_UID_fromInteger(getUID(uidServer)); utg->iaccession = ID; utg->coverage_stat = ScaffoldGraph->tigStore->getUnitigCoverageStat(ID); utg->microhet_prob = ScaffoldGraph->tigStore->getUnitigMicroHetProb(ID); utg->status = ScaffoldGraph->tigStore->getUnitigStatus(ID); utg->status = (utg->status == AS_UNASSIGNED ? AS_UNIQUE : utg->status); utg->length = GetMultiAlignLength(ma); utg->consensus = Getchar(ma->consensus, 0); utg->quality = Getchar(ma->quality, 0); utg->forced = 0; utg->num_frags = GetNumIntMultiPoss(ma->f_list); utg->num_vars = 0; utg->f_list = (SnapMultiPos*)safe_malloc(utg->num_frags * sizeof(SnapMultiPos)); utg->v_list = NULL; if (utg->consensus == NULL) fprintf(stderr, "buildUTGMessage()-- unitig %d missing consensus sequence\n", utg->iaccession); assert(utg->consensus != NULL); if (utg->length != strlen(utg->consensus)) fprintf(stderr, "buildUTGMessage()-- unitig %d length %d != consensus string length "F_SIZE_T"\n", utg->iaccession, utg->length, strlen(utg->consensus)); assert(utg->length == strlen(utg->consensus)); for (int32 i=0; i<utg->num_frags; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); utg->f_list[i].type = imp->type; utg->f_list[i].eident = FRGmap.lookup(imp->ident); utg->f_list[i].position = imp->position; utg->f_list[i].delta_length = imp->delta_length; utg->f_list[i].delta = imp->delta; } return(true); }
bool MultiAlignContig(MultiAlignT *ma, gkStore *UNUSED, CNS_Options *opp) { int32 num_bases = 0; int32 num_unitigs = GetNumIntUnitigPoss(ma->u_list); int32 num_frags = GetNumIntMultiPoss(ma->f_list); int32 num_columns = 0; IntMultiPos *flist = GetIntMultiPos(ma->f_list, 0); IntUnitigPos *ulist = GetIntUnitigPos(ma->u_list, 0); IntMultiVar *vlist = GetIntMultiVar(ma->v_list, 0); SeqInterval *offsets = (SeqInterval *) safe_calloc(num_unitigs,sizeof(SeqInterval)); for (int32 i=0;i<num_unitigs;i++) { int32 flen = (ulist[i].position.bgn < ulist[i].position.end) ? (ulist[i].position.end < ulist[i].position.bgn) : (ulist[i].position.bgn - ulist[i].position.end); num_bases += flen + 2 * AS_CNS_ERROR_RATE * flen; num_columns = (ulist[i].position.bgn > num_columns) ? ulist[i].position.bgn : num_columns; num_columns = (ulist[i].position.end > num_columns) ? ulist[i].position.end : num_columns; //fprintf(stderr, "CTG %d UTG %d %d-%d\n", // ma->maID, ulist[i].ident, ulist[i].position.bgn, ulist[i].position.end); } for (int32 i=0;i<num_frags;i++) { int32 flen = (flist[i].position.bgn < flist[i].position.end) ? (flist[i].position.end < flist[i].position.bgn) : (flist[i].position.bgn - flist[i].position.end); num_bases += flen + 2 * AS_CNS_ERROR_RATE * flen; } ResetStores(num_bases, num_unitigs, num_columns); fragmentMap = CreateScalarHashTable_AS(); fragmentToIMP = CreateScalarHashTable_AS(); for (int32 i=0; i<num_frags; i++) { // Add all fragments in the contigs f_list to the fragmentMap. This tells us if a fragment is // not placed in a surrogate (because they aren't in the contigs f_list, but will appear in a // surrogate unitigs f_list). // if (HASH_SUCCESS != InsertInHashTable_AS(fragmentMap, flist[i].ident, 0, 1, 0)) { fprintf(stderr, "MultiAlignContig()-- Contig %d FAILED. Fragment %d is a duplicate.\n", ma->maID, flist[i].ident); return(false); } // SK store IID to IMP message mapping InsertInHashTable_AS(fragmentToIMP, flist[i].ident, 0, (uint64)&flist[i], 0); } for (int32 i=0;i<num_unitigs;i++) { uint32 complement = (ulist[i].position.bgn<ulist[i].position.end)?0:1; uint32 fid = AppendFragToLocalStore(AS_UNITIG, ulist[i].ident, complement, 0, ulist[i].type); offsets[fid].bgn = complement?ulist[i].position.end:ulist[i].position.bgn; offsets[fid].end = complement?ulist[i].position.bgn:ulist[i].position.end; } MANode *manode = CreateMANode(ma->maID); // Seed multiAlignment with 1st fragment of 1st unitig SeedMAWithFragment(manode->lid, GetFragment(fragmentStore,0)->lid, opp); PlaceFragments(GetFragment(fragmentStore,0)->lid, ulist + GetFragment(fragmentStore,0)->lid, opp); // Now, loop on remaining fragments, aligning to: // a) containing frag (if contained) // or b) previously aligned frag VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1); for (int32 i=1;i<num_unitigs;i++) { Fragment *afrag = NULL; Fragment *bfrag = GetFragment(fragmentStore,i); int32 ahang = 0; int32 bhang = 0; int32 ovl = 0; int32 alid = 0; int32 blid = bfrag->lid; OverlapType otype; int32 olap_success = 0; int32 try_contained = 0; int32 align_to = i - 1; Fragment *afrag_first = NULL; int32 ahang_first = 0; int32 bhang_first = 0; while (!olap_success) { nextFrag: if (try_contained == 0) // Skip contained stuff. while ((align_to > 0) && ((GetFragment(fragmentStore, align_to)->is_contained) || (GetFragment(fragmentStore, align_to)->container_iid > 0))) align_to--; if (align_to < 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: hit the beginning of unitig list: no unitig upstream overlaps with current unitig %d\n", bfrag->iid); if (try_contained == 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: trying contained afrags for bfrag %d\n", bfrag->iid); try_contained = 1; align_to = i-1; goto nextFrag; } break; } afrag = GetFragment(fragmentStore, align_to); alid = afrag->lid; ahang = offsets[blid].bgn - offsets[alid].bgn; bhang = offsets[blid].end - offsets[alid].end; if (afrag_first == NULL) { afrag_first = afrag; ahang_first = ahang; bhang_first = bhang; } // This code copied from MultiAlignUnitig. if (offsets[afrag->lid].bgn < offsets[bfrag->lid].bgn) if (offsets[afrag->lid].end < offsets[bfrag->lid].end) ovl = offsets[afrag->lid].end - offsets[bfrag->lid].bgn; else //ovl = offsets[bfrag->lid].end - offsets[bfrag->lid].bgn; ovl = bfrag->length; else if (offsets[afrag->lid].end < offsets[bfrag->lid].end) //ovl = offsets[afrag->lid].end - offsets[afrag->lid].bgn; ovl = afrag->length; else ovl = offsets[bfrag->lid].end - offsets[afrag->lid].bgn; // End of copy if (ovl <= 0) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: positions of afrag %d and bfrag %d do not overlap. Proceed to the next upstream afrag\n", afrag->iid, bfrag->iid); align_to--; goto nextFrag; } olap_success = GetAlignmentTraceDriver(afrag, NULL, bfrag, &ahang, &bhang, ovl, trace, &otype, GETALIGNTRACE_CONTIGU, (blid + 1 < num_unitigs) ? (offsets[blid + 1].bgn - offsets[blid].bgn) : 800); // Nope, fail. if (!olap_success) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: Positions of afrag %d (%c) and bfrag %d (%c) overlap, but GetAlignmentTrace returns no overlap success.\n", afrag->iid, afrag->type, bfrag->iid, bfrag->type); align_to--; if ((align_to < 0) && (!try_contained)) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "MultiAlignContig: Try contained afrags for bfrag %d\n", bfrag->iid); try_contained = 1; align_to = i-1; } } } // while !olap_success if ((!olap_success) && (FORCE_UNITIG_ABUT == 0)) { fprintf(stderr,"MultiAlignContig: Could (really) not find overlap between %d (%c) and %d (%c), estimated ahang %d\n", afrag->iid,afrag->type,bfrag->iid,bfrag->type, ahang); fprintf(stderr,"MultiAlignContig: You can (possibly) force these to abut with '-D forceunitigabut', but that code is buggy at best.\n"); goto returnFailure; } #if 1 if ((!olap_success) && (FORCE_UNITIG_ABUT == 1)) { if (afrag_first) { afrag = afrag_first; ahang = ahang_first; bhang = bhang_first; } else { // Dang, we're really screwed. Nobody overlapped with us. // Cross our fingers and find the closest end point. // int32 maxOvl = -offsets[blid].bgn; //if (VERBOSE_MULTIALIGN_OUTPUT) // fprintf(stderr, "MultiAlignContig: YIKES! Your unitig doesn't overlap with anything! Picking the closest thing!\n"); align_to = i-1; while (align_to >= 0) { if ((try_contained == 0) && ((GetFragment(fragmentStore, align_to)->is_contained) || (GetFragment(fragmentStore, align_to)->container_iid > 0))) { // NOP! Found a contained frag, and we want to skip it. } else if (maxOvl < offsets[alid].end - offsets[blid].bgn) { afrag = GetFragment(fragmentStore, align_to); alid = afrag->lid; ahang = offsets[blid].bgn - offsets[alid].bgn; maxOvl = offsets[alid].end - offsets[blid].bgn; //fprintf(stderr, "MultiAlignContig: RESET align_to=%d alid=%d maxOvl=%d ahang=%d\n", align_to, alid, maxOvl, ahang); } align_to--; } // while align_to >= 0 } fprintf(stderr, "MultiAlignContig: Forcing abut between afrag %d (%c) and bfrag %d (%c) in contig %d.\n", afrag->iid, afrag->type, bfrag->iid, bfrag->type, ma->maID); // Force a 1bp overlap. We'd like to strictly abut, but ApplyAlignment() requires that there // be an overlap, and removing checks for that seem like a bad idea. // ahang = afrag->length - 1; otype = AS_DOVETAIL; int32 zero = 0; ResetVA_int32(trace); AppendVA_int32(trace, &zero); assert(*Getint32(trace,0) == 0); assert(GetNumint32s(trace) == 1); } #endif // Unitig is placed, or we just forced it to be placed. if (otype == AS_CONTAINMENT) { bfrag->is_contained = 1; if (bfrag->container_iid == 0) bfrag->container_iid = 1; // Not sure why 1 and not afrag->iid } ApplyAlignment(afrag->lid, 0, NULL, bfrag->lid, ahang, Getint32(trace,0)); PlaceFragments(bfrag->lid, ulist + bfrag->lid, opp); } // over all unitigs // Now, must find fragments in regions of overlapping unitigs, and adjust // their alignments as needed RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0); //fprintf(stderr,"MultiAlignContig: Initial pairwise induced alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); AbacusRefine(manode,0,-1,CNS_SMOOTH, opp); MergeRefine(manode->lid, NULL, 0, opp, 1); AbacusRefine(manode,0,-1,CNS_POLYX, opp); //fprintf(stderr,"MultiAlignContig: POLYX refined alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); { IntMultiVar *vl = NULL; int32 nv = 0; RefreshMANode(manode->lid, 0, opp, &nv, &vl, 0, 0); AbacusRefine(manode,0,-1,CNS_INDEL, opp); MergeRefine(manode->lid, ma->v_list, 0, opp, 2); } //fprintf(stderr,"MultiAlignContig: Final refined alignment\n"); //PrintAlignment(stderr,manode->lid,0,-1); //if (num_frags == 0) // PrintAlignment(stderr,manode->lid,0,-1); GetMANodeConsensus(manode->lid, ma->consensus, ma->quality); GetMANodePositions(manode->lid, ma); DeleteMANode(manode->lid); safe_free(offsets); Delete_VA(trace); DeleteHashTable_AS(fragmentMap); fragmentMap = NULL; DeleteHashTable_AS(fragmentToIMP); fragmentToIMP = NULL; return(true); returnFailure: safe_free(offsets); Delete_VA(trace); DeleteHashTable_AS(fragmentMap); fragmentMap = NULL; DeleteHashTable_AS(fragmentToIMP); fragmentToIMP = NULL; return(false); }
static int abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) { int32 num_frags = GetNumIntMultiPoss(uma->f_list); int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list); HashTable_AS *unitigFrags = CreateScalarHashTable_AS(); int32 num_columns = GetMultiAlignLength(uma); int32 ungapped_pos = 0; int32 *gapped_positions = new int32 [num_columns + 1]; char *consensus = Getchar(uma->consensus,0); for (int32 i=0; i<num_columns+1; i++) { gapped_positions[i] = ungapped_pos; if (consensus[i] != '-') ungapped_pos++; } // Remember the first fragment we add. int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions); for (int32 ifrag=0; ifrag<num_frags; ifrag++) { CNS_AlignedContigElement epos; IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag); if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident); assert(0); } if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident); assert(0); } assert(frag->position.bgn >= 0); assert(frag->position.bgn < num_columns + 1); assert(frag->position.end >= 0); assert(frag->position.end < num_columns + 1); epos.frg_or_utg = CNS_ELEMENT_IS_FRAGMENT; epos.idx.fragment.frgIdent = frag->ident; epos.idx.fragment.frgType = frag->type; epos.idx.fragment.frgContained = frag->contained; epos.idx.fragment.frgInUnitig = (type == AS_CONTIG) ? -1 : uma->maID; epos.position.bgn = gapped_positions[frag->position.bgn]; epos.position.end = gapped_positions[frag->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end); // Adjust the ungapped position if we fall within a gap // if (epos.position.bgn == epos.position.end) { fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n", epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end); assert(frag->position.bgn != frag->position.end); if (frag->position.bgn < frag->position.end) { if (epos.position.bgn > 0) epos.position.bgn--; else epos.position.end++; } else { if (epos.position.end > 0) epos.position.end--; else epos.position.bgn++; } fprintf(stderr,"SetUngappedFragmentPositions()-- Reset to "F_S32","F_S32"\n", epos.position.bgn, epos.position.end); } AppendVA_CNS_AlignedContigElement(fragment_positions, &epos); } for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){ CNS_AlignedContigElement epos; IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag); epos.frg_or_utg = CNS_ELEMENT_IS_UNITIG; epos.idx.unitig.utgIdent = unitig->ident; epos.idx.unitig.utgType = unitig->type; epos.position.bgn = gapped_positions[unitig->position.bgn]; epos.position.end = gapped_positions[unitig->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end); AppendVA_CNS_AlignedContigElement(fragment_positions,&epos); } // This is used only by ReplaceEndUnitigInContig(). Mark fragments in the "anchoring" contig // that belong to this unitig. // if (type != AS_CONTIG) { Fragment *anchor = GetFragment(fragmentStore,0); if ((anchor != NULL) && (anchor->type == AS_CONTIG)) { CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components); for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) { if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) && (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0))) af->idx.fragment.frgInUnitig = uma->maID; } } } DeleteHashTable_AS(unitigFrags); delete [] gapped_positions; return first_frag; }
MultiAlignT * ReplaceEndUnitigInContig(uint32 contig_iid, uint32 unitig_iid, int32 extendingLeft, CNS_Options *opp) { int32 cid,tid; // local id of contig (cid), and unitig(tid) int32 aid,bid; int32 i,num_unitigs; MultiAlignT *oma; MultiAlignT *cma; IntUnitigPos *u_list; IntMultiPos *f_list; IntMultiVar *v_list; int32 append_left=0; int32 num_frags=0; int32 complement=0; MANode *ma; Fragment *cfrag; Fragment *tfrag = NULL; static VA_TYPE(int32) *trace=NULL; oma = tigStore->loadMultiAlign(contig_iid, FALSE); ResetStores(2 * GetNumchars(oma->consensus), 2, 2 * GetNumchars(oma->consensus)); num_unitigs = GetNumIntUnitigPoss(oma->u_list); num_frags = GetNumIntMultiPoss(oma->f_list); u_list = GetIntUnitigPos(oma->u_list,0); f_list = GetIntMultiPos(oma->f_list,0); v_list = GetIntMultiVar(oma->v_list,0); //PrintIMPInfo(stderr, num_frags, f_list); //PrintIUPInfo(stderr, num_unitigs, u_list); // capture the consensus sequence of the original contig and put into local "fragment" format cid = AppendFragToLocalStore(AS_CONTIG, contig_iid, 0, 0, AS_OTHER_UNITIG); fprintf(stderr,"ReplaceEndUnitigInContig()-- contig %d unitig %d isLeft(%d)\n", contig_iid,unitig_iid,extendingLeft); // The only real value-added from ReplaceUnitigInContig is a new consensus sequence for the contig // some adjustments to positions go along with this, but the real compute is an alignment // between the old contig consensus and the updated unitig // // first we want to determine whether unitig is on left or right of contig, // so that alignment can be done with a positive ahang // if u is at left, i.e.: // // C---------------C // u------u // then initialize new alignment with unitig, and add contig, else // // if u is at right, i.e.: // // C---------------C // u------u // then initialize new alignment with contig, and add unitig, else ma = CreateMANode(0); if ( trace == NULL ) trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN); ResetVA_int32(trace); { int32 ahang,bhang,pos_offset=0; int32 tigs_adjusted_pos=0; OverlapType otype; int32 olap_success=0; cfrag=GetFragment(fragmentStore,cid); for(i=0;i<num_unitigs;i++) { uint32 id=u_list[i].ident; if ( id == unitig_iid ) { int32 bgn=u_list[i].position.bgn; int32 end=u_list[i].position.end; int32 complement_tmp=(bgn<end)?0:1; int32 left=(complement_tmp)?end:bgn; int32 right=(complement_tmp)?bgn:end; complement=complement_tmp; tid = AppendFragToLocalStore(AS_UNITIG, id, complement, 0, AS_OTHER_UNITIG); tfrag=GetFragment(fragmentStore,tid); if ( extendingLeft ) { // need to set aid to unitig to preserve positive ahang -- and we now should always // have a bhang of zero. append_left=1; aid=tid; bid=cid; // and ahang estimate is the diff in size between // new unitig (GetFragment(fragmentStore,tid)->length) and old unitig (right-left) ahang = GetFragment(fragmentStore,tid)->length - (right-left); bhang = 0; } else { // -------- // ---+++ // We extended the unitig by "+++". The ahang is just the // start position of the original placement, and the bhang // is the amount extended (as above). aid=cid; bid=tid; ahang = left; bhang = GetFragment(fragmentStore,tid)->length - (right-left); } SeedMAWithFragment(ma->lid, aid, opp); // The expected length of this alignment is always the length of the original unitig. int32 ovl = right - left; olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); if (!olap_success) olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); // If the alignment fails -- usually because the ahang is // negative -- return an empty alignment. This causes // extendClearRanges (the sole user of this function) to // gracefully handle the failure. // if (olap_success == 0) { return(NULL); assert(olap_success); } ApplyAlignment(aid, 0, NULL, bid, ahang, Getint32(trace,0)); RefreshMANode(ma->lid, 0, opp, NULL, NULL, 0, 0); //PrintAlignment(stderr,ma->lid,0,-1); break; } } } // Now, want to generate a new MultiAlignT which is an appropriate adjustment of original cma = CreateMultiAlignT(); cma->maID = -1; cma->data = oma->data; cma->consensus = CreateVA_char(GetMANodeLength(ma->lid)+1); cma->quality = CreateVA_char(GetMANodeLength(ma->lid)+1); GetMANodeConsensus(ma->lid, cma->consensus, cma->quality); // no deltas required at this stage // merge the f_lists and u_lists by cloning and concating cma->f_list = Clone_VA(oma->f_list); cma->u_list = Clone_VA(oma->u_list); cma->v_list = Clone_VA(oma->v_list); cma->fdelta = CreateVA_int32(0); cma->udelta = CreateVA_int32(0); { CNS_AlignedContigElement *components; CNS_AlignedContigElement *tcomponents; CNS_AlignedContigElement *contig_component; CNS_AlignedContigElement *aligned_component; int32 ifrag=0; int32 iunitig=0; IntMultiPos *imp; IntUnitigPos *iup; Fragment *frag; int32 ci=0; int32 tc=0; //unitig component index int32 bgn,end,left,right,tmp; int32 range_bgn=0,range_end=0,new_tig=0; components=GetCNS_AlignedContigElement(fragment_positions,cfrag->components); tcomponents=GetCNS_AlignedContigElement(fragment_positions,tfrag->components); // make adjustments to positions if ( append_left) { // fragments within unitig are 0 to tfrag->n_components // and cfrag->n_components-num_unitigs range_bgn = 0; range_end = tfrag->n_components-1; new_tig=cfrag->n_components-num_unitigs; } else { // changed unitig on right // fragments within unitig are (num_frags-tfrag->n_components) to num_frags // and cfrag->n_components-1; range_bgn = (num_frags-(tfrag->n_components-1)); range_end = num_frags; new_tig=cfrag->n_components-1; } while (ci < cfrag->n_components) { contig_component = &components[ci]; if ( contig_component->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT && contig_component->idx.fragment.frgInUnitig == unitig_iid ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else if ( ci == new_tig ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else { aligned_component = contig_component; bgn = aligned_component->position.bgn; end = aligned_component->position.end; frag = cfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } left = (bgn<end)?bgn:end; right = (bgn<end)?end:bgn; //if ( ci == new_tig ) { // left = 0; // right = frag->length; //} left = GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + left )->column_index)->ma_index; right= GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + right-1)->column_index)->ma_index + 1; tmp = bgn; bgn = (bgn<end)?left:right; end = (tmp<end)?right:left; if (aligned_component->frg_or_utg==CNS_ELEMENT_IS_UNITIG) { iup = GetIntUnitigPos(cma->u_list,iunitig); iup->position.bgn = bgn; iup->position.end = end; iup->delta_length = 0; iup->delta = NULL; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif ci++;iunitig++; } else { imp = GetIntMultiPos(cma->f_list,ifrag); imp->ident = aligned_component->idx.fragment.frgIdent; imp->contained = aligned_component->idx.fragment.frgContained; imp->position.bgn = bgn; imp->position.end = end; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif imp->delta_length = 0; imp->delta = NULL; ci++;ifrag++; } } } DeleteMANode(ma->lid); return cma; }
void dumpContigInfo(ChunkInstanceT *contig) { int contigOrientation; MultiAlignT *ma; char *seq1; int len1; VA_TYPE(char) *consensus = CreateVA_char(2048); VA_TYPE(char) *quality = CreateVA_char(2048); fprintf( stderr, "*********************** contig analysis **************************\n"); fprintf( stderr, "analyzing contig: %d\n", contig->id); if (contig->offsetAEnd.mean < contig->offsetBEnd.mean) contigOrientation = 0; else contigOrientation = 1; fprintf(stderr, "contig orientation: %d\t length: %d contig offsetAEnd: %d\t offsetBEnd: %d\n", contigOrientation, (int)contig->bpLength.mean, (int)contig->offsetAEnd.mean, (int)contig->offsetBEnd.mean); ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, ScaffoldGraph->ContigGraph->type == CI_GRAPH); // Get the consensus sequences for the contig from the Store GetConsensus(ScaffoldGraph->ContigGraph, contig->id, consensus, quality); seq1 = Getchar(consensus, 0); len1 = strlen(seq1); if (contigOrientation == 1) reverseComplementSequence(seq1, len1); if (len1 < 5000) { fprintf( stderr, ">contig%d consensus seq (flipped to reflect scaff orientation)\n", contig->id); fprintf( stderr, "%s\n", seq1); } else { char tmpchar = seq1[2500]; seq1[2500] = '\0'; fprintf( stderr, ">contig%d left end\n", contig->id); fprintf( stderr, "%s\n", seq1); seq1[2500] = tmpchar; fprintf( stderr, ">contig%d right end\n", contig->id); fprintf( stderr, "%s\n", seq1 + len1 - 2501); } #if 1 int numUnitigs = GetNumIntUnitigPoss(ma->u_list); fprintf( stderr, "number unitigs: %d\n", numUnitigs); int i; for (i = 0; i < numUnitigs; i++) { IntUnitigPos *upos = GetIntUnitigPos( ma->u_list, i); ChunkInstanceT *unitig = GetGraphNode( ScaffoldGraph->CIGraph, upos->ident); MultiAlignT *uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); IntMultiPos *ump; int icntfrag; fprintf( stderr, " unitig: %d\t num frags: %ld surrogate: %d\n", unitig->id, GetNumIntMultiPoss(uma->f_list), (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate)); if (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate) { fprintf (stderr, " surrogate unitig offsetAEnd: %f, offsetBEnd: %f\n", unitig->offsetAEnd.mean, unitig->offsetBEnd.mean); unitig = GetGraphNode( ScaffoldGraph->CIGraph, unitig->info.CI.baseID); fprintf ( stderr, " using original unitig: %d\n", unitig->id); uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH); } // now print out info on the frags in the unitig for (icntfrag = 0; icntfrag < GetNumIntMultiPoss(uma->f_list); icntfrag++) { IntMultiPos *imp = GetIntMultiPos(uma->f_list, icntfrag); CIFragT *frag = GetCIFragT(ScaffoldGraph->CIFrags, imp->ident); fprintf(stderr, " frag: %6d\t contig pos (5p, 3p): %6d, %6d\n", imp->ident, (int) frag->contigOffset5p.mean, (int) frag->contigOffset3p.mean); } } #endif #if 1 CIEdgeT * e; GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES); // FALSE == ITERATOR_VERBOSE while((e = edges.nextRaw()) != NULL) PrintGraphEdge( stderr, ScaffoldGraph->ContigGraph, "Analyzing edge", e, 0); #endif DeleteVA_char(consensus); DeleteVA_char(quality); }
void writeCCO(FILE *asmFile, bool doWrite) { SnapConConMesg cco; GenericMesg pmesg = { &cco, MESG_CCO }; GraphNodeIterator contigs; ContigT *contig; fprintf(stderr, "writeCCO()--\n"); InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph)); if (contig->flags.bits.isChaff) continue; NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) && (contig->scaffoldID == NULLINDEX) && (unitig->info.CI.numInstances > 0)) // Contig is a surrogate instance continue; MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); cco.eaccession = AS_UID_fromInteger(getUID(uidServer)); cco.iaccession = contig->id; cco.placed = ScaffoldGraph->tigStore->getContigStatus(contig->id); cco.length = GetMultiAlignLength(ma); cco.consensus = Getchar(ma->consensus, 0); cco.quality = Getchar(ma->quality, 0); cco.forced = 0; cco.num_pieces = GetNumIntMultiPoss(ma->f_list); cco.num_unitigs = GetNumIntMultiPoss(ma->u_list); cco.num_vars = GetNumIntMultiPoss(ma->v_list); cco.pieces = NULL; cco.unitigs = NULL; cco.vars = NULL; if (cco.consensus == NULL) fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n", cco.iaccession); assert(cco.consensus != NULL); if (cco.length != strlen(cco.consensus)) fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n", cco.iaccession, cco.length, strlen(cco.consensus)); assert(cco.length == strlen(cco.consensus)); if (cco.num_pieces > 0) { cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos)); for(int32 i=0; i<cco.num_pieces; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); cco.pieces[i].type = imp->type; cco.pieces[i].eident = FRGmap.lookup(imp->ident); cco.pieces[i].delta_length = imp->delta_length; cco.pieces[i].position = imp->position; cco.pieces[i].delta = imp->delta; } } if (cco.num_unitigs > 0) { cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos)); for(int32 i=0; i<cco.num_unitigs; i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); cco.unitigs[i].type = imp->type; cco.unitigs[i].eident = UTGmap.lookup(imp->ident); cco.unitigs[i].position = imp->position; cco.unitigs[i].delta = imp->delta; cco.unitigs[i].delta_length = imp->delta_length; } } if (cco.num_vars > 0) { cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar)); for(int32 i=0; i<cco.num_vars; i++) { IntMultiVar *imv = GetIntMultiVar(ma->v_list, i); cco.vars[i].var_id = imv->var_id; cco.vars[i].phased_id = imv->phased_id; cco.vars[i].position = imv->position; cco.vars[i].num_reads = imv->num_reads; cco.vars[i].num_alleles = imv->num_alleles; cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed; cco.vars[i].min_anchor_size = imv->min_anchor_size; cco.vars[i].var_length = imv->var_length; cco.vars[i].alleles = imv->alleles; cco.vars[i].var_seq_memory = imv->var_seq_memory; cco.vars[i].read_id_memory = imv->read_id_memory; cco.vars[i].enc_num_reads = NULL; cco.vars[i].enc_weights = NULL; cco.vars[i].enc_var_seq = NULL; cco.vars[i].enc_read_ids = NULL; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(cco.pieces); safe_free(cco.unitigs); safe_free(cco.vars); CCOmap.add(cco.iaccession, cco.eaccession); } }
void PrintMultiAlignT(FILE *out, MultiAlignT *ma, gkStore *frag_store, int32 show_qv, int32 dots, uint32 clrrng_flag) { int32 depth; int32 i; int32 window; char **multia=NULL; int32 **idarray; int32 **oriarray; char *consensus = Getchar(ma->consensus,0); char *quality = Getchar(ma->quality,0); gkFragment rsp; dots = 0; if ((consensus == NULL) || (consensus[0] == 0)) { fprintf(out, "No MultiAlignment to print for tig %d -- no consensus sequence present.\n", ma->maID); return; } int32 length = strlen(consensus); IMP2Array(GetIntMultiPos(ma->f_list,0), GetNumIntMultiPoss(ma->f_list), GetNumchars(ma->consensus), frag_store, &depth, &multia, &idarray, &oriarray, clrrng_flag); fprintf(out,"<<< begin Contig %d >>>",ma->maID);; char gruler[MULTIALIGN_PRINT_WIDTH + 200]; char uruler[MULTIALIGN_PRINT_WIDTH + 200]; int32 ungapped = 1; int32 tick = 1; for (window=0;window<length;) { int32 row_id = 0; int32 orient = 0; int32 rowlen = (window + MULTIALIGN_PRINT_WIDTH < length) ? MULTIALIGN_PRINT_WIDTH : length - window; fprintf(out, "\n"); fprintf(out, "\n"); fprintf(out, "<<< Contig %d, gapped length: %d >>>\n",ma->maID, length); { memset(gruler, 0, MULTIALIGN_PRINT_WIDTH + 200); memset(uruler, 0, MULTIALIGN_PRINT_WIDTH + 200); for (int32 rowind=0; rowind<rowlen; rowind++) { if (((window + 1 + rowind) % 25) == 0) sprintf(gruler + rowind, "| GAP=%d", window + 1 + rowind); if ((ungapped % 25) == 0) sprintf(uruler + rowind, "| UNG=%d", ungapped); if (consensus[window + rowind] != '-') ungapped++; } for (int32 i=0; i<MULTIALIGN_PRINT_WIDTH; i++) { if (gruler[i] == 0) gruler[i] = ' '; if (uruler[i] == 0) uruler[i] = ' '; } for (int32 i=MULTIALIGN_PRINT_WIDTH-1; (i >= 0) && (gruler[i] == ' '); i--) gruler[i] = 0; for (int32 i=MULTIALIGN_PRINT_WIDTH-1; (i >= 0) && (uruler[i] == ' '); i--) uruler[i] = 0; fprintf(out, "%s\n", gruler); fprintf(out, "%s\n", uruler); } { char save = consensus[window + rowlen]; consensus[window+rowlen] = 0; fprintf(out,"%s cns (uid,iid) type\n", consensus+window); consensus[window+rowlen] = save; } { char save = quality[window + rowlen]; quality[window+rowlen] = 0; fprintf(out,"%s qlt\n", quality+window); quality[window+rowlen] = save; } for (i=0;i<depth;i++) { assert(multia[2*i] != NULL); // Change matching bases to '.' or lowercase. // Count the number of non-blank letters. int32 nonBlank = 0; for (int32 j=0; j<MULTIALIGN_PRINT_WIDTH; j++) { if (window + j > length) break; if (multia[2*i][window+j] == consensus[window+j]) { if (dots) { multia[2*i] [window+j] = '.'; multia[2*i+1][window+j] = ' '; } else { multia[2*i][window+j] = tolower(multia[2*i][window+j]); } } if (multia[2*i][window+j] != ' ') nonBlank++; if (idarray[i][window + j] > 0) { row_id = idarray[i][window + j]; orient = oriarray[i][window + j]; } } if (nonBlank == 0) continue; // Figure out the ID and orientation for this block frag_store->gkStore_getFragment(row_id, &rsp, GKFRAGMENT_INF); { char save = multia[2*i][window + MULTIALIGN_PRINT_WIDTH]; multia[2*i][window + MULTIALIGN_PRINT_WIDTH] = 0; fprintf(out, "%s %c (%s,%d)\n", multia[2*i]+window, (orient>0)?'>':'<', AS_UID_toString(rsp.gkFragment_getReadUID()), row_id); multia[2*i][window + MULTIALIGN_PRINT_WIDTH] = save; } if (show_qv) { char save = multia[2*i+1][window + MULTIALIGN_PRINT_WIDTH]; multia[2*i+1][window + MULTIALIGN_PRINT_WIDTH] = 0; fprintf(out, "%s\n", multia[2*i+1]+window); multia[2*i+1][window + MULTIALIGN_PRINT_WIDTH] = save; } } window += MULTIALIGN_PRINT_WIDTH; } fprintf(out,"\n<<< end Contig %d >>>\n", ma->maID); for (i=0;i<2*depth;i++) safe_free(multia[i]); safe_free(multia); for (i=0;i<depth;i++) { safe_free(idarray[i]); safe_free(oriarray[i]); } safe_free(idarray); safe_free(oriarray); }