bool buildUTGMessage(int32 ID, SnapUnitigMesg *utg) { MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(ID, TRUE); if (ma == NULL) return(false); utg->eaccession = AS_UID_fromInteger(getUID(uidServer)); utg->iaccession = ID; utg->coverage_stat = ScaffoldGraph->tigStore->getUnitigCoverageStat(ID); utg->microhet_prob = ScaffoldGraph->tigStore->getUnitigMicroHetProb(ID); utg->status = ScaffoldGraph->tigStore->getUnitigStatus(ID); utg->status = (utg->status == AS_UNASSIGNED ? AS_UNIQUE : utg->status); utg->length = GetMultiAlignLength(ma); utg->consensus = Getchar(ma->consensus, 0); utg->quality = Getchar(ma->quality, 0); utg->forced = 0; utg->num_frags = GetNumIntMultiPoss(ma->f_list); utg->num_vars = 0; utg->f_list = (SnapMultiPos*)safe_malloc(utg->num_frags * sizeof(SnapMultiPos)); utg->v_list = NULL; if (utg->consensus == NULL) fprintf(stderr, "buildUTGMessage()-- unitig %d missing consensus sequence\n", utg->iaccession); assert(utg->consensus != NULL); if (utg->length != strlen(utg->consensus)) fprintf(stderr, "buildUTGMessage()-- unitig %d length %d != consensus string length "F_SIZE_T"\n", utg->iaccession, utg->length, strlen(utg->consensus)); assert(utg->length == strlen(utg->consensus)); for (int32 i=0; i<utg->num_frags; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); utg->f_list[i].type = imp->type; utg->f_list[i].eident = FRGmap.lookup(imp->ident); utg->f_list[i].position = imp->position; utg->f_list[i].delta_length = imp->delta_length; utg->f_list[i].delta = imp->delta; } return(true); }
static int abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) { int32 num_frags = GetNumIntMultiPoss(uma->f_list); int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list); HashTable_AS *unitigFrags = CreateScalarHashTable_AS(); int32 num_columns = GetMultiAlignLength(uma); int32 ungapped_pos = 0; int32 *gapped_positions = new int32 [num_columns + 1]; char *consensus = Getchar(uma->consensus,0); for (int32 i=0; i<num_columns+1; i++) { gapped_positions[i] = ungapped_pos; if (consensus[i] != '-') ungapped_pos++; } // Remember the first fragment we add. int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions); for (int32 ifrag=0; ifrag<num_frags; ifrag++) { CNS_AlignedContigElement epos; IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag); if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident); assert(0); } if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) { fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident); assert(0); } assert(frag->position.bgn >= 0); assert(frag->position.bgn < num_columns + 1); assert(frag->position.end >= 0); assert(frag->position.end < num_columns + 1); epos.frg_or_utg = CNS_ELEMENT_IS_FRAGMENT; epos.idx.fragment.frgIdent = frag->ident; epos.idx.fragment.frgType = frag->type; epos.idx.fragment.frgContained = frag->contained; epos.idx.fragment.frgInUnitig = (type == AS_CONTIG) ? -1 : uma->maID; epos.position.bgn = gapped_positions[frag->position.bgn]; epos.position.end = gapped_positions[frag->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end); // Adjust the ungapped position if we fall within a gap // if (epos.position.bgn == epos.position.end) { fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n", epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end); assert(frag->position.bgn != frag->position.end); if (frag->position.bgn < frag->position.end) { if (epos.position.bgn > 0) epos.position.bgn--; else epos.position.end++; } else { if (epos.position.end > 0) epos.position.end--; else epos.position.bgn++; } fprintf(stderr,"SetUngappedFragmentPositions()-- Reset to "F_S32","F_S32"\n", epos.position.bgn, epos.position.end); } AppendVA_CNS_AlignedContigElement(fragment_positions, &epos); } for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){ CNS_AlignedContigElement epos; IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag); epos.frg_or_utg = CNS_ELEMENT_IS_UNITIG; epos.idx.unitig.utgIdent = unitig->ident; epos.idx.unitig.utgType = unitig->type; epos.position.bgn = gapped_positions[unitig->position.bgn]; epos.position.end = gapped_positions[unitig->position.end]; //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n", // unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end); AppendVA_CNS_AlignedContigElement(fragment_positions,&epos); } // This is used only by ReplaceEndUnitigInContig(). Mark fragments in the "anchoring" contig // that belong to this unitig. // if (type != AS_CONTIG) { Fragment *anchor = GetFragment(fragmentStore,0); if ((anchor != NULL) && (anchor->type == AS_CONTIG)) { CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components); for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) { if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) && (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0))) af->idx.fragment.frgInUnitig = uma->maID; } } } DeleteHashTable_AS(unitigFrags); delete [] gapped_positions; return first_frag; }
void writeCCO(FILE *asmFile, bool doWrite) { SnapConConMesg cco; GenericMesg pmesg = { &cco, MESG_CCO }; GraphNodeIterator contigs; ContigT *contig; fprintf(stderr, "writeCCO()--\n"); InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT); while ((contig = NextGraphNodeIterator(&contigs)) != NULL) { assert(contig->id >= 0); assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph)); if (contig->flags.bits.isChaff) continue; NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI); if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) && (contig->scaffoldID == NULLINDEX) && (unitig->info.CI.numInstances > 0)) // Contig is a surrogate instance continue; MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE); cco.eaccession = AS_UID_fromInteger(getUID(uidServer)); cco.iaccession = contig->id; cco.placed = ScaffoldGraph->tigStore->getContigStatus(contig->id); cco.length = GetMultiAlignLength(ma); cco.consensus = Getchar(ma->consensus, 0); cco.quality = Getchar(ma->quality, 0); cco.forced = 0; cco.num_pieces = GetNumIntMultiPoss(ma->f_list); cco.num_unitigs = GetNumIntMultiPoss(ma->u_list); cco.num_vars = GetNumIntMultiPoss(ma->v_list); cco.pieces = NULL; cco.unitigs = NULL; cco.vars = NULL; if (cco.consensus == NULL) fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n", cco.iaccession); assert(cco.consensus != NULL); if (cco.length != strlen(cco.consensus)) fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n", cco.iaccession, cco.length, strlen(cco.consensus)); assert(cco.length == strlen(cco.consensus)); if (cco.num_pieces > 0) { cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos)); for(int32 i=0; i<cco.num_pieces; i++) { IntMultiPos *imp = GetIntMultiPos(ma->f_list, i); cco.pieces[i].type = imp->type; cco.pieces[i].eident = FRGmap.lookup(imp->ident); cco.pieces[i].delta_length = imp->delta_length; cco.pieces[i].position = imp->position; cco.pieces[i].delta = imp->delta; } } if (cco.num_unitigs > 0) { cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos)); for(int32 i=0; i<cco.num_unitigs; i++) { IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i); cco.unitigs[i].type = imp->type; cco.unitigs[i].eident = UTGmap.lookup(imp->ident); cco.unitigs[i].position = imp->position; cco.unitigs[i].delta = imp->delta; cco.unitigs[i].delta_length = imp->delta_length; } } if (cco.num_vars > 0) { cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar)); for(int32 i=0; i<cco.num_vars; i++) { IntMultiVar *imv = GetIntMultiVar(ma->v_list, i); cco.vars[i].var_id = imv->var_id; cco.vars[i].phased_id = imv->phased_id; cco.vars[i].position = imv->position; cco.vars[i].num_reads = imv->num_reads; cco.vars[i].num_alleles = imv->num_alleles; cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed; cco.vars[i].min_anchor_size = imv->min_anchor_size; cco.vars[i].var_length = imv->var_length; cco.vars[i].alleles = imv->alleles; cco.vars[i].var_seq_memory = imv->var_seq_memory; cco.vars[i].read_id_memory = imv->read_id_memory; cco.vars[i].enc_num_reads = NULL; cco.vars[i].enc_weights = NULL; cco.vars[i].enc_var_seq = NULL; cco.vars[i].enc_read_ids = NULL; } } if (doWrite) WriteProtoMesg_AS(asmFile, &pmesg); safe_free(cco.pieces); safe_free(cco.unitigs); safe_free(cco.vars); CCOmap.add(cco.iaccession, cco.eaccession); } }