Exemplo n.º 1
0
void PrintContigContents(FILE *stream, ScaffoldGraphT *graph, char *message) {
	  GraphNodeIterator   contigs;
	  ContigT	     *contig;

	  InitGraphNodeIterator(&contigs, graph->ContigGraph, GRAPH_NODE_DEFAULT);
	  while ((contig = NextGraphNodeIterator(&contigs)) != NULL) {
	    assert(contig->id >= 0);
	    assert(contig->id < GetNumGraphNodes(graph->ContigGraph));

	    MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE);

	    fprintf(stream, "%s contig %d placed status %c with %d unitigs: ", message, contig->id, graph->tigStore->getContigStatus(contig->id), GetNumIntMultiPoss(ma->u_list));


	    for (int i = 0; i < GetNumIntMultiPoss(ma->u_list); i++) {
	      IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i);

	      fprintf(stream, "%d ", imp->ident);
	    }
	    fprintf(stream, "\n");
	  }
}
Exemplo n.º 2
0
bool
MultiAlignContig(MultiAlignT  *ma,
                 gkStore      *UNUSED,
                 CNS_Options  *opp) {
  int32        num_bases     = 0;
  int32        num_unitigs   = GetNumIntUnitigPoss(ma->u_list);
  int32        num_frags     = GetNumIntMultiPoss(ma->f_list);
  int32        num_columns   = 0;

  IntMultiPos  *flist    = GetIntMultiPos(ma->f_list, 0);
  IntUnitigPos *ulist    = GetIntUnitigPos(ma->u_list, 0);
  IntMultiVar  *vlist    = GetIntMultiVar(ma->v_list, 0);

  SeqInterval  *offsets       = (SeqInterval *) safe_calloc(num_unitigs,sizeof(SeqInterval));

  for (int32 i=0;i<num_unitigs;i++) {
    int32 flen   = (ulist[i].position.bgn < ulist[i].position.end) ? (ulist[i].position.end < ulist[i].position.bgn) : (ulist[i].position.bgn - ulist[i].position.end);
    num_bases   += flen + 2 * AS_CNS_ERROR_RATE * flen;

    num_columns = (ulist[i].position.bgn > num_columns) ? ulist[i].position.bgn : num_columns;
    num_columns = (ulist[i].position.end > num_columns) ? ulist[i].position.end : num_columns;

    //fprintf(stderr, "CTG %d UTG %d %d-%d\n",
    //        ma->maID, ulist[i].ident, ulist[i].position.bgn, ulist[i].position.end);
  }

  for (int32 i=0;i<num_frags;i++) {
    int32 flen   = (flist[i].position.bgn < flist[i].position.end) ? (flist[i].position.end < flist[i].position.bgn) : (flist[i].position.bgn - flist[i].position.end);
    num_bases   += flen + 2 * AS_CNS_ERROR_RATE * flen;
  }

  ResetStores(num_bases, num_unitigs, num_columns);

  fragmentMap   = CreateScalarHashTable_AS();
  fragmentToIMP = CreateScalarHashTable_AS();

  for (int32 i=0; i<num_frags; i++) {

    //  Add all fragments in the contigs f_list to the fragmentMap.  This tells us if a fragment is
    //  not placed in a surrogate (because they aren't in the contigs f_list, but will appear in a
    //  surrogate unitigs f_list).
    //
    if (HASH_SUCCESS != InsertInHashTable_AS(fragmentMap, flist[i].ident, 0, 1, 0)) {
      fprintf(stderr, "MultiAlignContig()-- Contig %d FAILED.  Fragment %d is a duplicate.\n",
              ma->maID, flist[i].ident);
      return(false);
    }

    // SK store IID to IMP message mapping
    InsertInHashTable_AS(fragmentToIMP, flist[i].ident, 0, (uint64)&flist[i], 0);
  }

  for (int32 i=0;i<num_unitigs;i++) {
    uint32 complement = (ulist[i].position.bgn<ulist[i].position.end)?0:1;
    uint32 fid = AppendFragToLocalStore(AS_UNITIG,
                                 ulist[i].ident,
                                 complement,
                                 0,
                                 ulist[i].type);
    offsets[fid].bgn = complement?ulist[i].position.end:ulist[i].position.bgn;
    offsets[fid].end = complement?ulist[i].position.bgn:ulist[i].position.end;
  }

  MANode *manode = CreateMANode(ma->maID);

  // Seed multiAlignment with 1st fragment of 1st unitig

  SeedMAWithFragment(manode->lid, GetFragment(fragmentStore,0)->lid, opp);
  PlaceFragments(GetFragment(fragmentStore,0)->lid, ulist + GetFragment(fragmentStore,0)->lid, opp);

  // Now, loop on remaining fragments, aligning to:
  //    a)  containing frag (if contained)
  // or b)  previously aligned frag

  VA_TYPE(int32) *trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN+1);

  for (int32 i=1;i<num_unitigs;i++) {
    Fragment *afrag = NULL;
    Fragment *bfrag = GetFragment(fragmentStore,i);

    int32    ahang  = 0;
    int32    bhang  = 0;
    int32    ovl    = 0;
    int32    alid   = 0;
    int32    blid   = bfrag->lid;

    OverlapType otype;

    int32 olap_success  = 0;
    int32 try_contained = 0;
    int32 align_to      = i - 1;

    Fragment *afrag_first = NULL;
    int32       ahang_first = 0;
    int32       bhang_first = 0;

    while (!olap_success) {
    nextFrag:

      if (try_contained == 0)
        //  Skip contained stuff.
        while ((align_to > 0) &&
               ((GetFragment(fragmentStore, align_to)->is_contained) ||
                (GetFragment(fragmentStore, align_to)->container_iid > 0)))
          align_to--;

      if (align_to < 0) {
        if (VERBOSE_MULTIALIGN_OUTPUT)
          fprintf(stderr, "MultiAlignContig: hit the beginning of unitig list: no unitig upstream overlaps with current unitig %d\n", bfrag->iid);

        if (try_contained == 0) {
          if (VERBOSE_MULTIALIGN_OUTPUT)
            fprintf(stderr, "MultiAlignContig: trying contained afrags for bfrag %d\n", bfrag->iid);
          try_contained = 1;
          align_to      = i-1;
          goto nextFrag;
        }

        break;
      }

      afrag = GetFragment(fragmentStore, align_to);
      alid  = afrag->lid;

      ahang = offsets[blid].bgn - offsets[alid].bgn;
      bhang = offsets[blid].end - offsets[alid].end;

      if (afrag_first == NULL) {
        afrag_first = afrag;
        ahang_first = ahang;
        bhang_first = bhang;
      }

      //  This code copied from MultiAlignUnitig.

      if (offsets[afrag->lid].bgn < offsets[bfrag->lid].bgn)
        if (offsets[afrag->lid].end < offsets[bfrag->lid].end)
          ovl = offsets[afrag->lid].end - offsets[bfrag->lid].bgn;
        else
          //ovl = offsets[bfrag->lid].end - offsets[bfrag->lid].bgn;
          ovl = bfrag->length;
      else
        if (offsets[afrag->lid].end < offsets[bfrag->lid].end)
          //ovl = offsets[afrag->lid].end - offsets[afrag->lid].bgn;
          ovl = afrag->length;
        else
          ovl = offsets[bfrag->lid].end - offsets[afrag->lid].bgn;

      //  End of copy

      if (ovl <= 0) {
        if (VERBOSE_MULTIALIGN_OUTPUT)
          fprintf(stderr, "MultiAlignContig: positions of afrag %d and bfrag %d do not overlap.  Proceed to the next upstream afrag\n", afrag->iid, bfrag->iid);
        align_to--;
        goto nextFrag;
      }

      olap_success = GetAlignmentTraceDriver(afrag, NULL,
                                             bfrag,
                                             &ahang, &bhang, ovl,
                                             trace,
                                             &otype,
                                             GETALIGNTRACE_CONTIGU,
                                             (blid + 1 < num_unitigs) ? (offsets[blid + 1].bgn - offsets[blid].bgn) : 800);

      //  Nope, fail.
      if (!olap_success) {
        if (VERBOSE_MULTIALIGN_OUTPUT)
          fprintf(stderr, "MultiAlignContig: Positions of afrag %d (%c) and bfrag %d (%c) overlap, but GetAlignmentTrace returns no overlap success.\n",
                  afrag->iid, afrag->type, bfrag->iid, bfrag->type);

        align_to--;

        if ((align_to < 0) && (!try_contained)) {
          if (VERBOSE_MULTIALIGN_OUTPUT)
            fprintf(stderr, "MultiAlignContig: Try contained afrags for bfrag %d\n", bfrag->iid);
          try_contained = 1;
          align_to = i-1;
        }
      }
    }  //  while !olap_success


    if ((!olap_success) && (FORCE_UNITIG_ABUT == 0)) {
      fprintf(stderr,"MultiAlignContig: Could (really) not find overlap between %d (%c) and %d (%c), estimated ahang %d\n",
              afrag->iid,afrag->type,bfrag->iid,bfrag->type, ahang);
      fprintf(stderr,"MultiAlignContig: You can (possibly) force these to abut with '-D forceunitigabut', but that code is buggy at best.\n");
      goto returnFailure;
    }

#if 1
    if ((!olap_success) && (FORCE_UNITIG_ABUT == 1)) {
      if (afrag_first) {
        afrag = afrag_first;
        ahang = ahang_first;
        bhang = bhang_first;
      } else {
        //  Dang, we're really screwed.  Nobody overlapped with us.
        //  Cross our fingers and find the closest end point.
        //
        int32   maxOvl = -offsets[blid].bgn;

        //if (VERBOSE_MULTIALIGN_OUTPUT)
        //  fprintf(stderr, "MultiAlignContig:  YIKES!  Your unitig doesn't overlap with anything!  Picking the closest thing!\n");

        align_to = i-1;

        while (align_to >= 0) {
          if ((try_contained == 0) &&
              ((GetFragment(fragmentStore, align_to)->is_contained) ||
               (GetFragment(fragmentStore, align_to)->container_iid > 0))) {
            //  NOP!  Found a contained frag, and we want to skip it.
          } else if (maxOvl < offsets[alid].end - offsets[blid].bgn) {
            afrag  = GetFragment(fragmentStore, align_to);
            alid   = afrag->lid;
            ahang  = offsets[blid].bgn - offsets[alid].bgn;
            maxOvl = offsets[alid].end - offsets[blid].bgn;

            //fprintf(stderr, "MultiAlignContig:  RESET align_to=%d alid=%d maxOvl=%d ahang=%d\n", align_to, alid, maxOvl, ahang);
          }

          align_to--;
        }  //  while align_to >= 0
      }

      fprintf(stderr, "MultiAlignContig:  Forcing abut between afrag %d (%c) and bfrag %d (%c) in contig %d.\n",
              afrag->iid, afrag->type, bfrag->iid, bfrag->type, ma->maID);

      //  Force a 1bp overlap.  We'd like to strictly abut, but ApplyAlignment() requires that there
      //  be an overlap, and removing checks for that seem like a bad idea.
      //
      ahang = afrag->length - 1;

      otype = AS_DOVETAIL;

      int32 zero = 0;

      ResetVA_int32(trace);
      AppendVA_int32(trace, &zero);

      assert(*Getint32(trace,0) == 0);
      assert(GetNumint32s(trace) == 1);
    }
#endif

    //  Unitig is placed, or we just forced it to be placed.

    if (otype == AS_CONTAINMENT) {
      bfrag->is_contained = 1;
      if (bfrag->container_iid == 0)
        bfrag->container_iid = 1;  //  Not sure why 1 and not afrag->iid
    }

    ApplyAlignment(afrag->lid, 0, NULL, bfrag->lid, ahang, Getint32(trace,0));
    PlaceFragments(bfrag->lid, ulist + bfrag->lid, opp);
  }  //  over all unitigs

  // Now, must find fragments in regions of overlapping unitigs, and adjust
  // their alignments as needed
  RefreshMANode(manode->lid, 0, opp, NULL, NULL, 0, 0);

  //fprintf(stderr,"MultiAlignContig: Initial pairwise induced alignment\n");
  //PrintAlignment(stderr,manode->lid,0,-1);

  AbacusRefine(manode,0,-1,CNS_SMOOTH, opp);
  MergeRefine(manode->lid, NULL, 0, opp, 1);
  AbacusRefine(manode,0,-1,CNS_POLYX, opp);

  //fprintf(stderr,"MultiAlignContig: POLYX refined alignment\n");
  //PrintAlignment(stderr,manode->lid,0,-1);

  {
    IntMultiVar  *vl = NULL;
    int32         nv = 0;

    RefreshMANode(manode->lid, 0, opp, &nv, &vl, 0, 0);
    AbacusRefine(manode,0,-1,CNS_INDEL, opp);
    MergeRefine(manode->lid, ma->v_list, 0, opp, 2);
  }

  //fprintf(stderr,"MultiAlignContig: Final refined alignment\n");
  //PrintAlignment(stderr,manode->lid,0,-1);

  //if (num_frags == 0)
  //  PrintAlignment(stderr,manode->lid,0,-1);

  GetMANodeConsensus(manode->lid, ma->consensus, ma->quality);
  GetMANodePositions(manode->lid, ma);

  DeleteMANode(manode->lid);

  safe_free(offsets);
  Delete_VA(trace);
  DeleteHashTable_AS(fragmentMap);  
  fragmentMap = NULL;
  DeleteHashTable_AS(fragmentToIMP);
  fragmentToIMP = NULL;
  return(true);

 returnFailure:
  safe_free(offsets);
  Delete_VA(trace);
  DeleteHashTable_AS(fragmentMap);
  fragmentMap = NULL;
  DeleteHashTable_AS(fragmentToIMP);
  fragmentToIMP = NULL;
  return(false);
}
Exemplo n.º 3
0
static
int
abAbacus::SetUngappedFragmentPositions(FragType type,int32 n_frags, MultiAlignT *uma) {

  int32 num_frags   = GetNumIntMultiPoss(uma->f_list);
  int32 num_unitigs = GetNumIntUnitigPoss(uma->u_list);

  HashTable_AS *unitigFrags = CreateScalarHashTable_AS();

  int32 num_columns   = GetMultiAlignLength(uma);
  int32 ungapped_pos  = 0;

  int32 *gapped_positions = new int32 [num_columns + 1];
  char  *consensus        = Getchar(uma->consensus,0);

  for (int32 i=0; i<num_columns+1; i++) {
    gapped_positions[i] = ungapped_pos;

    if (consensus[i] != '-')
      ungapped_pos++;
  }

  //  Remember the first fragment we add.
  int32 first_frag = GetNumCNS_AlignedContigElements(fragment_positions);

  for (int32 ifrag=0; ifrag<num_frags; ifrag++) {
    CNS_AlignedContigElement epos;
    IntMultiPos *frag = GetIntMultiPos(uma->f_list, ifrag);

    if (ExistsInHashTable_AS(unitigFrags, frag->ident, 0)) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- ident %d already in hashtable\n", frag->ident);
      assert(0);
    }
    if (HASH_SUCCESS != InsertInHashTable_AS(unitigFrags, frag->ident, 0, 1, 0)) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- Failure to insert ident %d in hashtable\n", frag->ident);
      assert(0);
    }

    assert(frag->position.bgn >= 0);
    assert(frag->position.bgn < num_columns + 1);
    assert(frag->position.end >= 0);
    assert(frag->position.end < num_columns + 1);

    epos.frg_or_utg                  = CNS_ELEMENT_IS_FRAGMENT;
    epos.idx.fragment.frgIdent       = frag->ident;
    epos.idx.fragment.frgType        = frag->type;
    epos.idx.fragment.frgContained   = frag->contained;
    epos.idx.fragment.frgInUnitig    = (type == AS_CONTIG) ? -1 : uma->maID;
    epos.position.bgn                = gapped_positions[frag->position.bgn];
    epos.position.end                = gapped_positions[frag->position.end];

    //fprintf(stderr, "SetUngappedFragmentPositions()-- FRG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n",
    //        frag->ident, frag->type, epos.position.bgn, epos.position.end, frag->position.bgn, frag->position.end);

    //  Adjust the ungapped position if we fall within a gap
    //
    if (epos.position.bgn == epos.position.end) {
      fprintf(stderr,"SetUngappedFragmentPositions()-- Encountered bgn==end=="F_S32" in ungapped coords within SetUngappedFragmentPositions for "F_CID "(gapped coords "F_S32","F_S32")\n",
              epos.position.bgn,frag->ident,frag->position.bgn,frag->position.end);
      assert(frag->position.bgn != frag->position.end);

      if (frag->position.bgn < frag->position.end) {
        if (epos.position.bgn > 0)
          epos.position.bgn--;
        else
          epos.position.end++;
      } else {
        if (epos.position.end > 0)
          epos.position.end--;
        else
          epos.position.bgn++;
      }
      fprintf(stderr,"SetUngappedFragmentPositions()--   Reset to "F_S32","F_S32"\n",
              epos.position.bgn,
              epos.position.end);
    }

    AppendVA_CNS_AlignedContigElement(fragment_positions, &epos);
  }


  for (int32 ifrag=0; ifrag < num_unitigs; ifrag++){
    CNS_AlignedContigElement epos;
    IntUnitigPos *unitig = GetIntUnitigPos(uma->u_list, ifrag);

    epos.frg_or_utg           = CNS_ELEMENT_IS_UNITIG;
    epos.idx.unitig.utgIdent  = unitig->ident;
    epos.idx.unitig.utgType   = unitig->type;
    epos.position.bgn         = gapped_positions[unitig->position.bgn];
    epos.position.end         = gapped_positions[unitig->position.end];

    //fprintf(stderr, "SetUngappedFragmentPositions()-- UTG id=%d type=%c pos=%d,%d (orig pos=%d,%d)\n",
    //        unitig->ident, unitig->type, epos.position.bgn, epos.position.end, unitig->position.bgn, unitig->position.end);

    AppendVA_CNS_AlignedContigElement(fragment_positions,&epos);
  }

  //  This is used only by ReplaceEndUnitigInContig().  Mark fragments in the "anchoring" contig
  //  that belong to this unitig.
  //
  if (type != AS_CONTIG) {
    Fragment *anchor = GetFragment(fragmentStore,0);

    if ((anchor != NULL) &&
        (anchor->type == AS_CONTIG)) {
      CNS_AlignedContigElement *af = GetCNS_AlignedContigElement(fragment_positions, anchor->components);

      for (int32 ifrag=0; ifrag < anchor->n_components; ifrag++, af++) {
        if ((af->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT) &&
            (ExistsInHashTable_AS(unitigFrags, af->idx.fragment.frgIdent, 0)))
          af->idx.fragment.frgInUnitig = uma->maID;
      }
    }
  }

  DeleteHashTable_AS(unitigFrags);
  delete [] gapped_positions;

  return first_frag;
}
Exemplo n.º 4
0
int
main (int argc, char **argv) {
  char   tmpName[FILENAME_MAX] = {0};

  char  *gkpName = NULL;

  char  *tigName = NULL;
  int32  tigVers = -1;
  int32  tigPart = -1;

  int64  ctgBgn = -1;
  int64  ctgEnd = -1;

  char  *ctgName = NULL;
  char  *outName = NULL;
  char  *inName  = NULL;

  bool   forceCompute = false;

  int32  numFailures = 0;
  int32  numSkipped  = 0;

  bool   useUnitig  = false;
  bool   showResult = false;

  CNS_Options options = { CNS_OPTIONS_SPLIT_ALLELES_DEFAULT,
                          CNS_OPTIONS_MIN_ANCHOR_DEFAULT,
                          CNS_OPTIONS_DO_PHASING_DEFAULT };

  //  Comminucate to MultiAlignment_CNS.c that we are doing consensus and not cgw.
  thisIsConsensus = 1;

  argc = AS_configure(argc, argv);

  int arg=1;
  int err=0;
  while (arg < argc) {
    if        (strcmp(argv[arg], "-g") == 0) {
      gkpName = argv[++arg];

    } else if (strcmp(argv[arg], "-t") == 0) {
      tigName = argv[++arg];
      tigVers = atoi(argv[++arg]);
      tigPart = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-c") == 0) {
      AS_UTL_decodeRange(argv[++arg], ctgBgn, ctgEnd);

    } else if (strcmp(argv[arg], "-T") == 0) {
      ctgName = argv[++arg];

    } else if (strcmp(argv[arg], "-O") == 0) {
      outName = argv[++arg];
    } else if (strcmp(argv[arg], "-I") == 0) {
      inName = argv[++arg];

    } else if (strcmp(argv[arg], "-f") == 0) {
      forceCompute = true;

    } else if (strcmp(argv[arg], "-U") == 0) {
      useUnitig = true;

    } else if (strcmp(argv[arg], "-v") == 0) {
      showResult = true;

    } else if (strcmp(argv[arg], "-V") == 0) {
      VERBOSE_MULTIALIGN_OUTPUT++;

    } else if (strcmp(argv[arg], "-w") == 0) {
      options.smooth_win = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "-P") == 0) {
      options.do_phasing = atoi(argv[++arg]);

    } else {
      fprintf(stderr, "%s: Unknown option '%s'\n", argv[0], argv[arg]);
      err++;
    }

    arg++;
  }
  if ((err) || (gkpName == NULL) || (tigName == NULL)) {
    fprintf(stderr, "usage: %s -g gkpStore -t tigStore version partition [opts]\n", argv[0]);
    fprintf(stderr, "    -c b         Compute only contig ID 'b' (must be in the correct partition!)\n");
    fprintf(stderr, "    -c b-e       Compute only contigs from ID 'b' to ID 'e'\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -T file      Test the computation of the contig layout in 'file'\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -f           Recompute contigs that already have a multialignment\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -U           Reuse the unitig consensus for contigs with only a single\n");
    fprintf(stderr, "                 unitig (EXPERIMENTAL!)\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -O file      Don't update tigStore, dump a binary file instead.\n");
    fprintf(stderr, "    -I file      Import binary file into tigStore\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -v           Show multialigns.\n");
    fprintf(stderr, "    -V           Enable debugging option 'verbosemultialign'.\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "    -w ws        Smoothing window size\n");
    fprintf(stderr, "\n");
    exit(1);
  }

  //  Open both stores for read only.
  gkpStore = new gkStore(gkpName, false, false);
  tigStore = new MultiAlignStore(tigName, tigVers, 0, tigPart, false, false, false);

  gkpStore->gkStore_loadPartition(tigPart);

  //  Decide on what to compute.  Either all contigs, or a single contig, or a special case test.
  uint32 b = 0;
  uint32 e = tigStore->numContigs();

  if (ctgBgn != -1) {
    b = ctgBgn;
    e = ctgEnd + 1;
  }

  FORCE_UNITIG_ABUT = 1;

  if (ctgName != NULL) {
    errno = 0;
    FILE         *F = fopen(ctgName, "r");
    if (errno)
      fprintf(stderr, "Failed to open input contig file '%s': %s\n", ctgName, strerror(errno)), exit(1);

    MultiAlignT  *ma       = CreateEmptyMultiAlignT();
    bool          isUnitig = false;

    while (LoadMultiAlignFromHuman(ma, isUnitig, F) == true) {
      if (ma->maID < 0)
        ma->maID = (isUnitig) ? tigStore->numUnitigs() : tigStore->numContigs();

      if (MultiAlignContig(ma, gkpStore, &options)) {
        if (showResult)
          PrintMultiAlignT(stdout, ma, gkpStore, false, false, AS_READ_CLEAR_LATEST);
      } else {
        fprintf(stderr, "MultiAlignContig()-- contig %d failed.\n", ma->maID);
        numFailures++;
      }
    }

    DeleteMultiAlignT(ma);

    b = e = 0;
  }

  //  Reopen for writing, if we have work to do.
  if (((inName) || (b < e)) && (outName == NULL)) {
    delete tigStore;
    tigStore = new MultiAlignStore(tigName, tigVers, 0, tigPart, true, false, true);
  }

  if (inName) {
    importFromFile(inName, tigPart);

    b = e = 0;
  }

  //  Now the usual case.  Iterate over all contigs, compute and update.
  for (uint32 i=b; i<e; i++) {
    MultiAlignT  *cma = tigStore->loadMultiAlign(i, false);

    if (cma == NULL) {
      //  Not in our partition, or deleted.
      continue;
    }

    bool  exists = (cma->consensus != NULL) && (GetNumchars(cma->consensus) > 1);

    if ((forceCompute == false) && (exists == true)) {
      //  Already finished contig consensus.
      fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments) - already computed, skipped\n",
              cma->maID, cma->data.num_unitigs, cma->data.num_frags);

      numSkipped++;

      tigStore->unloadMultiAlign(cma->maID, false);

      continue;
    }

    int32         uID = GetIntUnitigPos(cma->u_list, 0)->ident;

    //  If this is a surrogate, we CANNOT reuse the unitig.  We need to process the contig so that
    //  the unplaced reads are stripped out.  A surrogate should have different contig and unitig
    //  IDs; we could also check the contig status.

    if ((cma->data.num_unitigs == 1) &&
        (cma->maID == uID) &&
        (useUnitig == true)) {
      fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments) - reusing unitig %d consensus\n",
              cma->maID, cma->data.num_unitigs, cma->data.num_frags, uID);

      MultiAlignT  *uma = tigStore->loadMultiAlign(uID, true);

      uma->data = cma->data;

      tigStore->unloadMultiAlign(cma->maID, false);

      if (outName)
        writeToOutFile(outName, tigPart, uma);
      else
        tigStore->insertMultiAlign(uma, false, false);

      tigStore->unloadMultiAlign(uma->maID, true);

      continue;
    }

    fprintf(stderr, "Working on contig %d (%d unitigs and %d fragments)%s\n",
            cma->maID, cma->data.num_unitigs, cma->data.num_frags,
            (exists) ? " - already computed, recomputing" : "");

    if (MultiAlignContig(cma, gkpStore, &options)) {
      if (outName)
        writeToOutFile(outName, tigPart, cma);
      else
        tigStore->insertMultiAlign(cma, false, true);

      if (showResult)
        PrintMultiAlignT(stdout, cma, gkpStore, false, false, AS_READ_CLEAR_LATEST);

      tigStore->unloadMultiAlign(cma->maID, false);
    } else {
      fprintf(stderr, "MultiAlignContig()-- contig %d failed.\n", cma->maID);
      numFailures++;
    }
  }

  delete tigStore;

  fprintf(stderr, "\n");
  fprintf(stderr, "NumColumnsInUnitigs             = %d\n", NumColumnsInUnitigs);
  fprintf(stderr, "NumGapsInUnitigs                = %d\n", NumGapsInUnitigs);
  fprintf(stderr, "NumRunsOfGapsInUnitigReads      = %d\n", NumRunsOfGapsInUnitigReads);
  fprintf(stderr, "NumColumnsInContigs             = %d\n", NumColumnsInContigs);
  fprintf(stderr, "NumGapsInContigs                = %d\n", NumGapsInContigs);
  fprintf(stderr, "NumRunsOfGapsInContigReads      = %d\n", NumRunsOfGapsInContigReads);
  fprintf(stderr, "NumAAMismatches                 = %d\n", NumAAMismatches);
  fprintf(stderr, "NumVARRecords                   = %d\n", NumVARRecords);
  fprintf(stderr, "NumVARStringsWithFlankingGaps   = %d\n", NumVARStringsWithFlankingGaps);
  fprintf(stderr, "NumUnitigRetrySuccess           = %d\n", NumUnitigRetrySuccess);
  fprintf(stderr, "\n");

  if (numFailures) {
    fprintf(stderr, "WARNING:  Total number of contig failures = %d\n", numFailures);
    fprintf(stderr, "\n");
    fprintf(stderr, "Consensus did NOT finish successfully.\n");
    return(1);
  }

  fprintf(stderr, "Consensus finished successfully.  Bye.\n");
  return(0);
}
Exemplo n.º 5
0
MultiAlignT *
ReplaceEndUnitigInContig(uint32 contig_iid,
                         uint32 unitig_iid,
                         int32 extendingLeft,
                         CNS_Options *opp) {
  int32 cid,tid; // local id of contig (cid), and unitig(tid)
  int32 aid,bid;
  int32 i,num_unitigs;
  MultiAlignT *oma;
  MultiAlignT *cma;
  IntUnitigPos *u_list;
  IntMultiPos *f_list;
  IntMultiVar  *v_list;
  int32 append_left=0;
  int32 num_frags=0;
  int32 complement=0;
  MANode *ma;
  Fragment *cfrag;
  Fragment *tfrag = NULL;
  static VA_TYPE(int32) *trace=NULL;

  oma =  tigStore->loadMultiAlign(contig_iid, FALSE);

  ResetStores(2 * GetNumchars(oma->consensus),
              2,
              2 * GetNumchars(oma->consensus));

  num_unitigs = GetNumIntUnitigPoss(oma->u_list);
  num_frags   = GetNumIntMultiPoss(oma->f_list);

  u_list = GetIntUnitigPos(oma->u_list,0);
  f_list = GetIntMultiPos(oma->f_list,0);
  v_list = GetIntMultiVar(oma->v_list,0);

  //PrintIMPInfo(stderr, num_frags,   f_list);
  //PrintIUPInfo(stderr, num_unitigs, u_list);

  // capture the consensus sequence of the original contig and put into local "fragment" format
  cid = AppendFragToLocalStore(AS_CONTIG,
                               contig_iid,
                               0,
                               0,
                               AS_OTHER_UNITIG);

  fprintf(stderr,"ReplaceEndUnitigInContig()-- contig %d unitig %d isLeft(%d)\n",
          contig_iid,unitig_iid,extendingLeft);

  //  The only real value-added from ReplaceUnitigInContig is a new consensus sequence for the contig
  //  some adjustments to positions go along with this, but the real compute is an alignment
  //  between the old contig consensus and the updated unitig
  //
  //  first we want to determine whether unitig is on left or right of contig,
  //  so that alignment can be done with a positive ahang
  //  if u is at left, i.e.:
  //
  //  C---------------C
  //  u------u
  //  then initialize new alignment with unitig, and add contig, else
  //
  //  if u is at right, i.e.:
  //
  //  C---------------C
  //           u------u
  //  then initialize new alignment with contig, and add unitig, else

  ma = CreateMANode(0);

  if ( trace == NULL )
    trace = CreateVA_int32(AS_READ_MAX_NORMAL_LEN);
  ResetVA_int32(trace);

  {
    int32 ahang,bhang,pos_offset=0;
    int32 tigs_adjusted_pos=0;
    OverlapType otype;
    int32 olap_success=0;
    cfrag=GetFragment(fragmentStore,cid);
    for(i=0;i<num_unitigs;i++) {
      uint32 id=u_list[i].ident;
      if ( id == unitig_iid ) {
        int32 bgn=u_list[i].position.bgn;
        int32 end=u_list[i].position.end;
        int32 complement_tmp=(bgn<end)?0:1;
        int32 left=(complement_tmp)?end:bgn;
        int32 right=(complement_tmp)?bgn:end;
        complement=complement_tmp;
        tid = AppendFragToLocalStore(AS_UNITIG,
                                     id,
                                     complement,
                                     0,
                                     AS_OTHER_UNITIG);
        tfrag=GetFragment(fragmentStore,tid);

        if ( extendingLeft ) {
          // need to set aid to unitig to preserve positive ahang -- and we now should always
          // have a bhang of zero.
          append_left=1;
          aid=tid;
          bid=cid;
          // and ahang estimate is the diff in size between
          // new unitig (GetFragment(fragmentStore,tid)->length) and old unitig (right-left)
          ahang = GetFragment(fragmentStore,tid)->length - (right-left);
          bhang = 0;
        }  else {
          //  --------
          //       ---+++
          //  We extended the unitig by "+++".  The ahang is just the
          //  start position of the original placement, and the bhang
          //  is the amount extended (as above).
          aid=cid;
          bid=tid;
          ahang = left;
          bhang = GetFragment(fragmentStore,tid)->length - (right-left);
        }
        SeedMAWithFragment(ma->lid, aid, opp);

        //  The expected length of this alignment is always the length of the original unitig.
        int32 ovl = right - left;

        olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, DP_Compare, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE);

        if (!olap_success)
          olap_success = GetAlignmentTrace(aid, 0, bid, &ahang, &bhang, ovl, trace, &otype, Local_Overlap_AS_forCNS, DONT_SHOW_OLAP, 0, GETALIGNTRACE_MERGE, AS_CGW_ERROR_RATE);

        //  If the alignment fails -- usually because the ahang is
        //  negative -- return an empty alignment.  This causes
        //  extendClearRanges (the sole user of this function) to
        //  gracefully handle the failure.
        //
        if (olap_success == 0) {
          return(NULL);
          assert(olap_success);
        }

        ApplyAlignment(aid, 0, NULL, bid, ahang, Getint32(trace,0));
        RefreshMANode(ma->lid, 0, opp, NULL, NULL, 0, 0);

        //PrintAlignment(stderr,ma->lid,0,-1);

        break;
      }
    }
  }

  // Now, want to generate a new MultiAlignT which is an appropriate adjustment of original
  cma = CreateMultiAlignT();

  cma->maID = -1;
  cma->data = oma->data;

  cma->consensus = CreateVA_char(GetMANodeLength(ma->lid)+1);
  cma->quality   = CreateVA_char(GetMANodeLength(ma->lid)+1);

  GetMANodeConsensus(ma->lid, cma->consensus, cma->quality);

  // no deltas required at this stage
  // merge the f_lists and u_lists by cloning and concating

  cma->f_list = Clone_VA(oma->f_list);
  cma->u_list = Clone_VA(oma->u_list);
  cma->v_list = Clone_VA(oma->v_list);

  cma->fdelta = CreateVA_int32(0);
  cma->udelta = CreateVA_int32(0);

  {
    CNS_AlignedContigElement *components;
    CNS_AlignedContigElement *tcomponents;
    CNS_AlignedContigElement *contig_component;
    CNS_AlignedContigElement *aligned_component;
    int32 ifrag=0;
    int32 iunitig=0;
    IntMultiPos *imp;
    IntUnitigPos *iup;
    Fragment *frag;
    int32 ci=0;
    int32 tc=0; //unitig component index
    int32 bgn,end,left,right,tmp;
    int32 range_bgn=0,range_end=0,new_tig=0;
    components=GetCNS_AlignedContigElement(fragment_positions,cfrag->components);
    tcomponents=GetCNS_AlignedContigElement(fragment_positions,tfrag->components);
    // make adjustments to positions
    if ( append_left) {
      // fragments within unitig are 0 to tfrag->n_components
      // and cfrag->n_components-num_unitigs
      range_bgn = 0;
      range_end = tfrag->n_components-1;
      new_tig=cfrag->n_components-num_unitigs;
    } else {  // changed unitig on right
      // fragments within unitig are (num_frags-tfrag->n_components) to num_frags
      // and cfrag->n_components-1;
      range_bgn = (num_frags-(tfrag->n_components-1));
      range_end = num_frags;
      new_tig=cfrag->n_components-1;
    }
    while (ci < cfrag->n_components) {
      contig_component = &components[ci];
      if ( contig_component->frg_or_utg == CNS_ELEMENT_IS_FRAGMENT && contig_component->idx.fragment.frgInUnitig == unitig_iid ) {
        aligned_component = &tcomponents[tc++];
        if ( complement ) {
          bgn = tfrag->length-aligned_component->position.bgn;
          end = tfrag->length-aligned_component->position.end;
        } else {
          bgn = aligned_component->position.bgn;
          end = aligned_component->position.end;
        }
        frag = tfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      } else if ( ci == new_tig ) {
        aligned_component =  &tcomponents[tc++];
        if ( complement ) {
          bgn = tfrag->length-aligned_component->position.bgn;
          end = tfrag->length-aligned_component->position.end;
        } else {
          bgn = aligned_component->position.bgn;
          end = aligned_component->position.end;
        }
        frag = tfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      } else {
        aligned_component =  contig_component;
        bgn = aligned_component->position.bgn;
        end = aligned_component->position.end;
        frag = cfrag;
#ifdef DEBUG_POSITIONS
        fprintf(stderr,"compci->idx %12d bgn: %10d end: %10d\n",ci,bgn,end);
#endif
      }
      left = (bgn<end)?bgn:end;
      right = (bgn<end)?end:bgn;
      //if ( ci == new_tig ) {
      //    left = 0;
      //    right = frag->length;
      //}
      left = GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + left   )->column_index)->ma_index;
      right= GetColumn(columnStore, GetBead(beadStore,frag->firstbead.get() + right-1)->column_index)->ma_index + 1;
      tmp = bgn;
      bgn = (bgn<end)?left:right;
      end = (tmp<end)?right:left;
      if (aligned_component->frg_or_utg==CNS_ELEMENT_IS_UNITIG) {
        iup = GetIntUnitigPos(cma->u_list,iunitig);
        iup->position.bgn = bgn;
        iup->position.end = end;
        iup->delta_length = 0;
        iup->delta = NULL;
#ifdef DEBUG_POSITIONS
        fprintf(stderr," element %d at %d,%d\n",
                ci,bgn,end);
#endif
        ci++;iunitig++;
      } else {
        imp = GetIntMultiPos(cma->f_list,ifrag);
        imp->ident = aligned_component->idx.fragment.frgIdent;
        imp->contained = aligned_component->idx.fragment.frgContained;
        imp->position.bgn = bgn;
        imp->position.end = end;
#ifdef DEBUG_POSITIONS
        fprintf(stderr," element %d at %d,%d\n", ci,bgn,end);
#endif
        imp->delta_length = 0;
        imp->delta = NULL;
        ci++;ifrag++;
      }
    }
  }
  DeleteMANode(ma->lid);
  return cma;
}
Exemplo n.º 6
0
void
dumpContigInfo(ChunkInstanceT *contig) {
  int           contigOrientation;
  MultiAlignT  *ma;
  char         *seq1;
  int           len1;

  VA_TYPE(char) *consensus = CreateVA_char(2048);
  VA_TYPE(char) *quality   = CreateVA_char(2048);

  fprintf( stderr, "*********************** contig analysis **************************\n");
  fprintf( stderr, "analyzing contig: %d\n", contig->id);

  if (contig->offsetAEnd.mean < contig->offsetBEnd.mean)
    contigOrientation = 0;
  else
    contigOrientation = 1;

  fprintf(stderr, "contig orientation: %d\t length: %d  contig offsetAEnd: %d\t offsetBEnd: %d\n",
          contigOrientation,
          (int)contig->bpLength.mean,
          (int)contig->offsetAEnd.mean,
          (int)contig->offsetBEnd.mean);

  ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, ScaffoldGraph->ContigGraph->type == CI_GRAPH);

  // Get the consensus sequences for the contig from the Store
  GetConsensus(ScaffoldGraph->ContigGraph, contig->id, consensus, quality);

  seq1 = Getchar(consensus, 0);
  len1 = strlen(seq1);

  if (contigOrientation == 1)
    reverseComplementSequence(seq1, len1);

  if (len1 < 5000) {
    fprintf( stderr, ">contig%d consensus seq (flipped to reflect scaff orientation)\n", contig->id);
    fprintf( stderr, "%s\n", seq1);
  } else {
    char tmpchar = seq1[2500];
    seq1[2500] = '\0';

    fprintf( stderr, ">contig%d left end\n", contig->id);
    fprintf( stderr, "%s\n", seq1);

    seq1[2500] = tmpchar;

    fprintf( stderr, ">contig%d right end\n", contig->id);
    fprintf( stderr, "%s\n", seq1 + len1 - 2501);
  }

#if 1
  int numUnitigs = GetNumIntUnitigPoss(ma->u_list);
  fprintf( stderr, "number unitigs: %d\n", numUnitigs);

  int i;
  for (i = 0; i < numUnitigs; i++) {
    IntUnitigPos *upos = GetIntUnitigPos( ma->u_list, i);
    ChunkInstanceT *unitig = GetGraphNode( ScaffoldGraph->CIGraph, upos->ident);
    MultiAlignT *uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id, ScaffoldGraph->CIGraph->type == CI_GRAPH);
    IntMultiPos *ump;
    int icntfrag;

    fprintf( stderr, "  unitig: %d\t num frags: %ld surrogate: %d\n", unitig->id, GetNumIntMultiPoss(uma->f_list),
             (unitig->flags.bits.isStoneSurrogate || unitig->flags.bits.isWalkSurrogate));

    if (unitig->flags.bits.isStoneSurrogate ||
        unitig->flags.bits.isWalkSurrogate) {
      fprintf (stderr, "  surrogate unitig offsetAEnd: %f, offsetBEnd: %f\n", unitig->offsetAEnd.mean, unitig->offsetBEnd.mean);

      unitig = GetGraphNode( ScaffoldGraph->CIGraph, unitig->info.CI.baseID);
      fprintf ( stderr, "  using original unitig: %d\n", unitig->id);
      uma = ScaffoldGraph->tigStore->loadMultiAlign(unitig->id,
                                                    ScaffoldGraph->CIGraph->type == CI_GRAPH);
    }

    // now print out info on the frags in the unitig
    for (icntfrag = 0; icntfrag < GetNumIntMultiPoss(uma->f_list); icntfrag++) {
      IntMultiPos *imp = GetIntMultiPos(uma->f_list, icntfrag);
      CIFragT     *frag = GetCIFragT(ScaffoldGraph->CIFrags, imp->ident);

      fprintf(stderr, "    frag: %6d\t contig pos (5p, 3p): %6d, %6d\n",
              imp->ident, (int) frag->contigOffset5p.mean, (int) frag->contigOffset3p.mean);
    }
  }
#endif


#if 1
  CIEdgeT * e;
  GraphEdgeIterator edges(ScaffoldGraph->ContigGraph, contig->id, ALL_END, ALL_EDGES);

  //  FALSE == ITERATOR_VERBOSE

  while((e = edges.nextRaw()) != NULL)
    PrintGraphEdge( stderr, ScaffoldGraph->ContigGraph, "Analyzing edge", e, 0);
#endif

  DeleteVA_char(consensus);
  DeleteVA_char(quality);
}
Exemplo n.º 7
0
void
writeCCO(FILE *asmFile, bool doWrite) {
  SnapConConMesg      cco;
  GenericMesg         pmesg = { &cco, MESG_CCO };
  GraphNodeIterator   contigs;
  ContigT             *contig;

  fprintf(stderr, "writeCCO()--\n");

  InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((contig = NextGraphNodeIterator(&contigs)) != NULL) {
    assert(contig->id >= 0);
    assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph));

    if (contig->flags.bits.isChaff)
      continue;

    NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI);

    if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) &&
        (contig->scaffoldID == NULLINDEX) &&
        (unitig->info.CI.numInstances > 0))
      //  Contig is a surrogate instance
      continue;

    MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE);

    cco.eaccession  = AS_UID_fromInteger(getUID(uidServer));
    cco.iaccession  = contig->id;
    cco.placed      = ScaffoldGraph->tigStore->getContigStatus(contig->id);
    cco.length      = GetMultiAlignLength(ma);
    cco.consensus   = Getchar(ma->consensus, 0);
    cco.quality     = Getchar(ma->quality, 0);
    cco.forced      = 0;
    cco.num_pieces  = GetNumIntMultiPoss(ma->f_list);
    cco.num_unitigs = GetNumIntMultiPoss(ma->u_list);
    cco.num_vars    = GetNumIntMultiPoss(ma->v_list);
    cco.pieces      = NULL;
    cco.unitigs     = NULL;
    cco.vars        = NULL;

    if (cco.consensus == NULL)
      fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n",
              cco.iaccession);
    assert(cco.consensus != NULL);
    if (cco.length != strlen(cco.consensus))
      fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n",
              cco.iaccession, cco.length, strlen(cco.consensus));
    assert(cco.length == strlen(cco.consensus));

    if (cco.num_pieces > 0) {
      cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos));

      for(int32 i=0; i<cco.num_pieces; i++) {
        IntMultiPos *imp = GetIntMultiPos(ma->f_list, i);

        cco.pieces[i].type         = imp->type;
        cco.pieces[i].eident       = FRGmap.lookup(imp->ident);
        cco.pieces[i].delta_length = imp->delta_length;
        cco.pieces[i].position     = imp->position;
        cco.pieces[i].delta        = imp->delta;
      }
    }

    if (cco.num_unitigs > 0) {
      cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos));

      for(int32 i=0; i<cco.num_unitigs; i++) {
        IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i);

        cco.unitigs[i].type         = imp->type;
        cco.unitigs[i].eident       = UTGmap.lookup(imp->ident);
        cco.unitigs[i].position     = imp->position;
        cco.unitigs[i].delta        = imp->delta;
        cco.unitigs[i].delta_length = imp->delta_length;
      }
    }

    if (cco.num_vars > 0) {
      cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar));

      for(int32 i=0; i<cco.num_vars; i++) {
        IntMultiVar *imv = GetIntMultiVar(ma->v_list, i);

        cco.vars[i].var_id                = imv->var_id;
        cco.vars[i].phased_id             = imv->phased_id;

        cco.vars[i].position              = imv->position;
        cco.vars[i].num_reads             = imv->num_reads;
        cco.vars[i].num_alleles           = imv->num_alleles;
        cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed;
        cco.vars[i].min_anchor_size       = imv->min_anchor_size;
        cco.vars[i].var_length            = imv->var_length;

        cco.vars[i].alleles               = imv->alleles;
        cco.vars[i].var_seq_memory        = imv->var_seq_memory;
        cco.vars[i].read_id_memory        = imv->read_id_memory;

        cco.vars[i].enc_num_reads         = NULL;
        cco.vars[i].enc_weights           = NULL;
        cco.vars[i].enc_var_seq           = NULL;
        cco.vars[i].enc_read_ids          = NULL;
      }
    }

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    safe_free(cco.pieces);
    safe_free(cco.unitigs);
    safe_free(cco.vars);

    CCOmap.add(cco.iaccession, cco.eaccession);
  }
}