static int abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) { int32 num_frags = GetNumIntMultiPoss(uma->f_list); int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list); HashTable_AS *unitigFrags = CreateScalarHashTable_AS(); int32 num_columns = GetMultiAlignLength(uma); int32 ungapped_pos = 0; int32 *gapped_positions = new int32 [num_columns + 1]; char *consensus = Getchar(uma->consensus,0); for (int32 i=0; i<num_columns+1; i++) { gapped_positions[i] = ungapped_pos; if (consensus[i] != '-') ungapped_pos++; } // Remember the first fragment we add. int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions); for (int32 ifrag=0; ifrag<num_frags; ifrag++) { CNS_AlignedContigElement epos; IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag); if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident); assert(0); } if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident); assert(0); } assert(frag->position.bgn >= 0); assert(frag->position.bgn < num_columns + 1); assert(frag->position.end >= 0); assert(frag->position.end < num_columns + 1); epos.frg_or_utg = CNS_ELEMENT_IS_FRAGMENT; epos.idx.fragment.frgIdent = frag->ident; epos.idx.fragment.frgType = frag->type; epos.idx.fragment.frgContained = frag->contained; epos.idx.fragment.frgInUnitig = (type == AS_CONTIG) ? -1 : uma->maID; epos.position.bgn = gapped_positions[frag->position.bgn]; epos.position.end = gapped_positions[frag->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end); // Adjust the ungapped position if we fall within a gap // if (epos.position.bgn == epos.position.end) { fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n", epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end); assert(frag->position.bgn != frag->position.end); if (frag->position.bgn < frag->position.end) { if (epos.position.bgn > 0) epos.position.bgn--; else epos.position.end++; } else { if (epos.position.end > 0) epos.position.end--; else epos.position.bgn++; } fprintf(stderr,"SetUngappedFragmentPositions()-- Reset to "F_S32","F_S32"\n", epos.position.bgn, epos.position.end); } AppendVA_CNS_AlignedContigElement(fragment_positions, &epos); } for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){ CNS_AlignedContigElement epos; IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag); epos.frg_or_utg = CNS_ELEMENT_IS_UNITIG; epos.idx.unitig.utgIdent = unitig->ident; epos.idx.unitig.utgType = unitig->type; epos.position.bgn = gapped_positions[unitig->position.bgn]; epos.position.end = gapped_positions[unitig->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end); AppendVA_CNS_AlignedContigElement(fragment_positions,&epos); } // This is used only by ReplaceEndUnitigInContig(). Mark fragments in the "anchoring" contig // that belong to this unitig. // if (type != AS_CONTIG) { Fragment *anchor = GetFragment(fragmentStore,0); if ((anchor != NULL) && (anchor->type == AS_CONTIG)) { CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components); for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) { if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) && (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0))) af->idx.fragment.frgInUnitig = uma->maID; } } } DeleteHashTable_AS(unitigFrags); delete [] gapped_positions; return first_frag; }
static void PlaceFragments(int32 fid, IntUnitigPos *aiup, CNS_Options *opp) { Fragment *afrag = GetFragment(fragmentStore,fid); Fragment *bfrag = NULL; CNS_AlignedContigElement *belem = GetCNS_AlignedContigElement(fragment_positions, afrag->components); if (afrag->n_components == 0) return; VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1); for (; belem->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT; belem++) { if (FALSE == ExistsInHashTable_AS(fragmentMap, belem->idx.fragment.frgIdent, 0)) // Fragment is not in the contigs f_list. It is an unplaced read from a surrogate. continue; // if it exists in the fragmentMap it should exist in this map as well since it was added at the same time // look up where this fragment is placed within the entire contig, see if that matches where we're about to place it // this is necessary for surrogates that are multiply placed in a single contig for example: // contig: --------------*****--------*****--------> where ***** represents a surrogate // ----> readA // when placing readA within the surrogate unitig, we see if readA belongs in surrogate instance A or B // by computing the position of the unitig within the contig and adding ahang to it // if this computed position matches the position that the IMP record retrieved below tells us, proceed, otherwise skip placement IntMultiPos *bimp = (IntMultiPos *)LookupValueInHashTable_AS(fragmentToIMP, belem->idx.fragment.frgIdent, 0); int32 bbgn = (bimp->position.bgn < bimp->position.end ? bimp->position.bgn : bimp->position.end); int32 abgn = (aiup->position.bgn < aiup->position.end ? aiup->position.bgn : aiup->position.end); int32 fcomplement = afrag->complement; int32 bcomplement = (belem->position.bgn < belem->position.end) ? 0 : 1; int32 ahang = 0; int32 bhang = 0; int32 ovl = 0; OverlapType otype; // all of fid's component frags will be aligned to it (not to // each other) // // fcomplement==0 fcomplement==1 // // A) fid C) fid // ------------------> <---------------- // ---> <--- // bid (bcomplement==0) bid // // B) fid D) fid // ------------------> <---------------- // <--- ---> // bid (bcomplement==1) bid // // The afrag is a unitig, so b is always contained (length of // overlap is length of belem). if (fcomplement && bcomplement) { ahang = afrag->length - belem->position.bgn; /* Case D */ bhang = belem->position.end - afrag->length; ovl = belem->position.bgn - belem->position.end; } else if (fcomplement && !bcomplement) { ahang = afrag->length - belem->position.end; /* Case C */ bhang = belem->position.bgn - afrag->length; ovl = belem->position.end - belem->position.bgn; } else if (!fcomplement && bcomplement) { ahang = belem->position.end; /* Case B */ bhang = belem->position.bgn - afrag->length; ovl = belem->position.bgn - belem->position.end; } else { ahang = belem->position.bgn; /* Case A */ bhang = belem->position.end - afrag->length; ovl = belem->position.end - belem->position.bgn; } assert(ahang >= 0); assert(bhang <= 0); assert(ovl > 0); if (aiup->num_instances > 1 && abs(ahang + abgn - bbgn) > MAX_SURROGATE_FUDGE_FACTOR) { if (VERBOSE_MULTIALIGN_OUTPUT) fprintf(stderr, "Not placing fragment %d into unitig %d because the positions (%d, %d) do not match (%d, %d)\n", belem->idx.fragment.frgIdent, afrag->iid, bimp->position.bgn, bimp->position.end, ahang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get() ))->column_index)->ma_index, bhang + GetColumn(columnStore,(GetBead(beadStore,afrag->firstbead.get()+afrag->length-1))->column_index)->ma_index+1); continue; } int32 blid = AppendFragToLocalStore(belem->idx.fragment.frgType, belem->idx.fragment.frgIdent, (bcomplement != fcomplement), belem->idx.fragment.frgContained, AS_OTHER_UNITIG); afrag = GetFragment(fragmentStore, fid); // AppendFragToLocalStore can change the pointer on us. bfrag = GetFragment(fragmentStore, blid); if (!GetAlignmentTraceDriver(afrag, NULL, bfrag, &ahang, &bhang, ovl, trace, &otype, GETALIGNTRACE_CONTIGF, 0)) { //if (!GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE) && // !GetAlignmentTrace(afrag->lid, 0, blid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, AS_CONSENSUS, AS_CNS_ERROR_RATE)) { Bead *afirst = GetBead(beadStore, afrag->firstbead.get() + ahang); Column *col = GetColumn(columnStore, afirst->column_index); MANode *manode = GetMANode(manodeStore, col->ma_id); RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0); // BPW not sure why we need this fprintf(stderr, "Could (really) not find overlap between %d (%c) and %d (%c) estimated ahang: %d (ejecting frag %d from contig)\n", afrag->iid, afrag->type, belem->idx.fragment.frgIdent, belem->idx.fragment.frgType, ahang, belem->idx.fragment.frgIdent); GetFragment(fragmentStore,blid)->deleted = 1; } else { ApplyAlignment(afrag->lid, 0, NULL, blid, ahang, Getint32(trace,0)); } } // over all fragments Delete_VA(trace); }
MultiAlignT * ReplaceEndUnitigInContig(uint32 contig_iid, uint32 unitig_iid, int32 extendingLeft, CNS_Options *opp) { int32 cid,tid; // local id of contig (cid), and unitig(tid) int32 aid,bid; int32 i,num_unitigs; MultiAlignT *oma; MultiAlignT *cma; IntUnitigPos *u_list; IntMultiPos *f_list; IntMultiVar *v_list; int32 append_left=0; int32 num_frags=0; int32 complement=0; MANode *ma; Fragment *cfrag; Fragment *tfrag = NULL; static VA_TYPE(int32) *trace=NULL; oma = tigStore->loadMultiAlign(contig_iid, FALSE); ResetStores(2 * GetNumchars(oma->consensus), 2, 2 * GetNumchars(oma->consensus)); num_unitigs = GetNumIntUnitigPoss(oma->u_list); num_frags = GetNumIntMultiPoss(oma->f_list); u_list = GetIntUnitigPos(oma->u_list,0); f_list = GetIntMultiPos(oma->f_list,0); v_list = GetIntMultiVar(oma->v_list,0); //PrintIMPInfo(stderr, num_frags, f_list); //PrintIUPInfo(stderr, num_unitigs, u_list); // capture the consensus sequence of the original contig and put into local "fragment" format cid = AppendFragToLocalStore(AS_CONTIG, contig_iid, 0, 0, AS_OTHER_UNITIG); fprintf(stderr,"ReplaceEndUnitigInContig()-- contig %d unitig %d isLeft(%d)\n", contig_iid,unitig_iid,extendingLeft); // The only real value-added from ReplaceUnitigInContig is a new consensus sequence for the contig // some adjustments to positions go along with this, but the real compute is an alignment // between the old contig consensus and the updated unitig // // first we want to determine whether unitig is on left or right of contig, // so that alignment can be done with a positive ahang // if u is at left, i.e.: // // C---------------C // u------u // then initialize new alignment with unitig, and add contig, else // // if u is at right, i.e.: // // C---------------C // u------u // then initialize new alignment with contig, and add unitig, else ma = CreateMANode(0); if ( trace == NULL ) trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN); ResetVA_int32(trace); { int32 ahang,bhang,pos_offset=0; int32 tigs_adjusted_pos=0; OverlapType otype; int32 olap_success=0; cfrag=GetFragment(fragmentStore,cid); for(i=0;i<num_unitigs;i++) { uint32 id=u_list[i].ident; if ( id == unitig_iid ) { int32 bgn=u_list[i].position.bgn; int32 end=u_list[i].position.end; int32 complement_tmp=(bgn<end)?0:1; int32 left=(complement_tmp)?end:bgn; int32 right=(complement_tmp)?bgn:end; complement=complement_tmp; tid = AppendFragToLocalStore(AS_UNITIG, id, complement, 0, AS_OTHER_UNITIG); tfrag=GetFragment(fragmentStore,tid); if ( extendingLeft ) { // need to set aid to unitig to preserve positive ahang -- and we now should always // have a bhang of zero. append_left=1; aid=tid; bid=cid; // and ahang estimate is the diff in size between // new unitig (GetFragment(fragmentStore,tid)->length) and old unitig (right-left) ahang = GetFragment(fragmentStore,tid)->length - (right-left); bhang = 0; } else { // -------- // ---+++ // We extended the unitig by "+++". The ahang is just the // start position of the original placement, and the bhang // is the amount extended (as above). aid=cid; bid=tid; ahang = left; bhang = GetFragment(fragmentStore,tid)->length - (right-left); } SeedMAWithFragment(ma->lid, aid, opp); // The expected length of this alignment is always the length of the original unitig. int32 ovl = right - left; olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); if (!olap_success) olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE); // If the alignment fails -- usually because the ahang is // negative -- return an empty alignment. This causes // extendClearRanges (the sole user of this function) to // gracefully handle the failure. // if (olap_success == 0) { return(NULL); assert(olap_success); } ApplyAlignment(aid, 0, NULL, bid, ahang, Getint32(trace,0)); RefreshMANode(ma->lid, 0, opp, NULL, NULL, 0, 0); //PrintAlignment(stderr,ma->lid,0,-1); break; } } } // Now, want to generate a new MultiAlignT which is an appropriate adjustment of original cma = CreateMultiAlignT(); cma->maID = -1; cma->data = oma->data; cma->consensus = CreateVA_char(GetMANodeLength(ma->lid)+1); cma->quality = CreateVA_char(GetMANodeLength(ma->lid)+1); GetMANodeConsensus(ma->lid, cma->consensus, cma->quality); // no deltas required at this stage // merge the f_lists and u_lists by cloning and concating cma->f_list = Clone_VA(oma->f_list); cma->u_list = Clone_VA(oma->u_list); cma->v_list = Clone_VA(oma->v_list); cma->fdelta = CreateVA_int32(0); cma->udelta = CreateVA_int32(0); { CNS_AlignedContigElement *components; CNS_AlignedContigElement *tcomponents; CNS_AlignedContigElement *contig_component; CNS_AlignedContigElement *aligned_component; int32 ifrag=0; int32 iunitig=0; IntMultiPos *imp; IntUnitigPos *iup; Fragment *frag; int32 ci=0; int32 tc=0; //unitig component index int32 bgn,end,left,right,tmp; int32 range_bgn=0,range_end=0,new_tig=0; components=GetCNS_AlignedContigElement(fragment_positions,cfrag->components); tcomponents=GetCNS_AlignedContigElement(fragment_positions,tfrag->components); // make adjustments to positions if ( append_left) { // fragments within unitig are 0 to tfrag->n_components // and cfrag->n_components-num_unitigs range_bgn = 0; range_end = tfrag->n_components-1; new_tig=cfrag->n_components-num_unitigs; } else { // changed unitig on right // fragments within unitig are (num_frags-tfrag->n_components) to num_frags // and cfrag->n_components-1; range_bgn = (num_frags-(tfrag->n_components-1)); range_end = num_frags; new_tig=cfrag->n_components-1; } while (ci < cfrag->n_components) { contig_component = &components[ci]; if ( contig_component->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT && contig_component->idx.fragment.frgInUnitig == unitig_iid ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else if ( ci == new_tig ) { aligned_component = &tcomponents[tc++]; if ( complement ) { bgn = tfrag->length-aligned_component->position.bgn; end = tfrag->length-aligned_component->position.end; } else { bgn = aligned_component->position.bgn; end = aligned_component->position.end; } frag = tfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } else { aligned_component = contig_component; bgn = aligned_component->position.bgn; end = aligned_component->position.end; frag = cfrag; #ifdef DEBUG_POSITIONS fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end); #endif } left = (bgn<end)?bgn:end; right = (bgn<end)?end:bgn; //if ( ci == new_tig ) { // left = 0; // right = frag->length; //} left = GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + left )->column_index)->ma_index; right= GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + right-1)->column_index)->ma_index + 1; tmp = bgn; bgn = (bgn<end)?left:right; end = (tmp<end)?right:left; if (aligned_component->frg_or_utg==CNS_ELEMENT_IS_UNITIG) { iup = GetIntUnitigPos(cma->u_list,iunitig); iup->position.bgn = bgn; iup->position.end = end; iup->delta_length = 0; iup->delta = NULL; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif ci++;iunitig++; } else { imp = GetIntMultiPos(cma->f_list,ifrag); imp->ident = aligned_component->idx.fragment.frgIdent; imp->contained = aligned_component->idx.fragment.frgContained; imp->position.bgn = bgn; imp->position.end = end; #ifdef DEBUG_POSITIONS fprintf(stderr," element %d at %d,%d\n", ci,bgn,end); #endif imp->delta_length = 0; imp->delta = NULL; ci++;ifrag++; } } } DeleteMANode(ma->lid); return cma; }