Esempio n. 1
0
void writeAFGFromTigStore(FILE *asmFile, bool doWrite) {
  AugFragMesg       afg;
  GenericMesg       pmesg = { &afg, MESG_AFG };
  gkFragment        fr;

  fprintf(stderr, "writeAFGFromTigStore()--\n");

  for (uint32 tigID = 0; tigID < ScaffoldGraph->tigStore->numUnitigs(); tigID++) {
    MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(tigID, TRUE);
    if (ma == NULL) 
      continue;
    
    for (uint32 i=0; i<GetNumIntMultiPoss(ma->f_list); i++) {
      IntMultiPos *imp = GetIntMultiPos(ma->f_list, i);
      ScaffoldGraph->gkpStore->gkStore_getFragment(imp->ident, &fr, GKFRAGMENT_INF);
      afg.eaccession     = fr.gkFragment_getReadUID();
      afg.iaccession     = fr.gkFragment_getReadIID();
      afg.mate_status    = UNASSIGNED_MATE;
      afg.chaff          = 0;
      afg.clear_rng.bgn  = fr.gkFragment_getClearRegionBegin();
      afg.clear_rng.end  = fr.gkFragment_getClearRegionEnd  ();

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      FRGmap.add(afg.iaccession, afg.eaccession);

      if ((AS_UID_isString(afg.eaccession) == FALSE) &&
          (uidMin <= AS_UID_toInteger(afg.eaccession)))
        uidMin = AS_UID_toInteger(afg.eaccession) + 1;
    }
  }
}
Esempio n. 2
0
void
writeUTGFromCGW(FILE *asmFile, bool doWrite) {
  SnapUnitigMesg      utg;
  GenericMesg         pmesg = { &utg, MESG_UTG };
  GraphNodeIterator   unitigs;
  ChunkInstanceT     *ci;

  fprintf(stderr, "writeUTGFromCGW()--\n");

  InitGraphNodeIterator(&unitigs, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT);
  while ((ci = NextGraphNodeIterator(&unitigs)) != NULL) {
    assert(ci->id >= 0);
    assert(ci->id < GetNumGraphNodes(ScaffoldGraph->CIGraph));

    if (ci->flags.bits.isChaff)
      //  Don't write chaff
      continue;

    if (ci->type == RESOLVEDREPEATCHUNK_CGW)
      //  Don't write surrogate instances
      continue;

    buildUTGMessage(ci->id, &utg);

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    safe_free(utg.f_list);

    UTGmap.add(utg.iaccession, utg.eaccession);
  }
}
Esempio n. 3
0
int
main(int argc, char **argv) {
    OverlapStore  *ovs        = NULL;

    argc = AS_configure(argc, argv);

    int arg=1;
    int err=0;
    while (arg < argc) {
        if        (strcmp(argv[arg], "-O") == 0) {
            ovs = AS_OVS_openOverlapStore(argv[++arg]);
        } else {
            err++;
        }

        arg++;
    }
    if ((ovs == NULL) || (err)) {
        fprintf(stderr, "usage: %s -O ovlStore < unitigs.cgb > fixedUnitigs.cgb\n", argv[0]);
        exit(1);
    }

    GenericMesg   *pmesg = NULL;
    while ((ReadProtoMesg_AS(stdin, &pmesg) != EOF)) {
        if (pmesg->t == MESG_IUM)
            fixUnitig((IntUnitigMesg *)(pmesg->m), ovs);

        WriteProtoMesg_AS(stdout, pmesg);
    }

    exit(0);
}
Esempio n. 4
0
void
writeMDI(FILE *asmFile, bool doWrite) {
  SnapMateDistMesg      mdi;
  GenericMesg           pmesg = { &mdi, MESG_MDI };

  fprintf(stderr, "writeMDI()--\n");

  for (int32 i=1; i<GetNumDistTs(ScaffoldGraph->Dists); i++){
    DistT *dptr = GetDistT(ScaffoldGraph->Dists, i);

    //  Believe whatever estimate is here.  We used to reset to zero and the input (except we had
    //  already munged the input stddev) if there were 30 or fewer samples.

    mdi.erefines    = ScaffoldGraph->gkpStore->gkStore_getLibrary(i)->libraryUID;
    mdi.irefines    = i;
    mdi.mean        = dptr->mu;
    mdi.stddev      = dptr->sigma;
    mdi.min         = INT32_MIN;
    mdi.max         = INT32_MAX;
    mdi.num_buckets = 0;
    mdi.histogram   = NULL;

    //  The histogram does not get stored in a checkpoint.  If the current run of CGW did not have
    //  enough samples to recompute the histogram, we have to live without it

    if (dptr->bnum > 0) {
      mdi.min         = dptr->min;
      mdi.max         = dptr->max;
      mdi.num_buckets = dptr->bnum;
      mdi.histogram   = dptr->histogram;
    }

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    MDImap.add(mdi.irefines, mdi.erefines);

    if ((AS_UID_isString(mdi.erefines) == FALSE) &&
        (uidMin <= AS_UID_toInteger(mdi.erefines)))
      uidMin = AS_UID_toInteger(mdi.erefines) + 1;

    safe_free(dptr->histogram);

    dptr->histogram  = NULL;
    dptr->numSamples = 0;
    dptr->bnum       = 0;
  }
}
Esempio n. 5
0
void writeUTGFromTigStore(FILE *asmFile, bool doWrite) {
  SnapUnitigMesg      utg;
  GenericMesg         pmesg = { &utg, MESG_UTG };

  fprintf(stderr, "writeUTGFromTigStore()--\n");

  for (uint32 tigID = 0; tigID < ScaffoldGraph->tigStore->numUnitigs(); tigID++) {
    if (buildUTGMessage(tigID, &utg)) {
      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(utg.f_list);
      UTGmap.add(utg.iaccession, utg.eaccession);
    }
  }
}
Esempio n. 6
0
void
fixUnitig(IntUnitigMesg *iunitig, OverlapStore *ovs) {
    int            thisFrag;
    int            thatFrag;

    for (thisFrag=1; thisFrag<iunitig->num_frags; thisFrag++) {
        int failed = updateFragmentWithParent(iunitig, thisFrag, ovs);

        //  If that failed, the iunitig is guaranteed good up until the
        //  'failed' fragment.  It'll get written out back in main; all we
        //  need to do is fix up the rest of the fragments, possibly into
        //  multiple unitigs.

        if (failed != -1) {
            IntUnitigMesg  junitig = *iunitig;

            assert(failed == thisFrag);

            //  Make the iacc big, just to label this as needing a new iacc.
            junitig.iaccession += 1000000000;

            junitig.num_frags  = iunitig->num_frags - failed;
            junitig.f_list     = iunitig->f_list    + failed;

            junitig.f_list[0].parent    = 0;
            junitig.f_list[0].ahang     = 0;
            junitig.f_list[0].bhang     = 0;
            junitig.f_list[0].contained = 0;

            iunitig->num_frags = failed;

            fixUnitig(&junitig, ovs);

            GenericMesg   pmesg;

            pmesg.t = MESG_IUM;
            pmesg.m = &junitig;

            WriteProtoMesg_AS(stdout, &pmesg);
        }
    }
}
Esempio n. 7
0
void
writeAFGFromCGW(FILE *asmFile, bool doWrite) {
  AugFragMesg       afg;
  GenericMesg       pmesg = { &afg, MESG_AFG };
  gkFragment        fr;
  gkStream         *fs = new gkStream(ScaffoldGraph->gkpStore, 0, 0, GKFRAGMENT_INF);

  fprintf(stderr, "writeAFGFromCGW()--\n");

  for (int32 i=1; i<GetNumCIFragTs(ScaffoldGraph->CIFrags); i++) {
    CIFragT  *cifrag = GetCIFragT(ScaffoldGraph->CIFrags, i);

    fs->next(&fr);

    if (cifrag->flags.bits.isDeleted)
      continue;

    assert(cifrag->read_iid == i);
    assert(cifrag->read_iid == fr.gkFragment_getReadIID());

    afg.eaccession     = fr.gkFragment_getReadUID();
    afg.iaccession     = i;
    afg.mate_status    = cifrag->flags.bits.mateDetail;
    afg.chaff          = cifrag->flags.bits.isChaff;
    afg.clear_rng.bgn  = fr.gkFragment_getClearRegionBegin();
    afg.clear_rng.end  = fr.gkFragment_getClearRegionEnd  ();

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    FRGmap.add(afg.iaccession, afg.eaccession);

    if ((AS_UID_isString(afg.eaccession) == FALSE) &&
        (uidMin <= AS_UID_toInteger(afg.eaccession)))
      uidMin = AS_UID_toInteger(afg.eaccession) + 1;
  }

  delete fs;
}
Esempio n. 8
0
void
writeAMP(FILE *asmFile, bool doWrite) {
  AugMatePairMesg       amp;
  GenericMesg           pmesg = { &amp, MESG_AMP };

  fprintf(stderr, "writeAMP()--\n");

  for (int32 i=1; i<GetNumCIFragTs(ScaffoldGraph->CIFrags); i++) {
    CIFragT            *cif1 = GetCIFragT(ScaffoldGraph->CIFrags, i);
    CIFragT            *cif2 = NULL;

    if (cif1->flags.bits.isDeleted)
      continue;

    if (cif1->mate_iid == 0)
      continue;

    cif2 = GetCIFragT(ScaffoldGraph->CIFrags, cif1->mate_iid);

    if (cif2->flags.bits.isDeleted)
      continue;

    if (cif1->read_iid > cif2->read_iid)
      continue;

    assert(cif1->flags.bits.edgeStatus == cif2->flags.bits.edgeStatus);
    assert(cif1->flags.bits.mateDetail == cif2->flags.bits.mateDetail);

    amp.fragment1   = FRGmap.lookup(cif1->read_iid);
    amp.fragment2   = FRGmap.lookup(cif2->read_iid);
    amp.mate_status = cif1->flags.bits.mateDetail;

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);
  }
}
Esempio n. 9
0
//  For every unitig, report the best overlaps contained in the
//  unitig, and all overlaps contained in the unitig.
//
//  Wow, this is ancient.
//
void
writeOverlapsUsed(UnitigVector &unitigs,
                  char         *fileprefix) {
  char         filename[FILENAME_MAX] = {0};
#if 0
  GenericMesg  pmesg;
  OverlapMesg  omesg;
#endif

  sprintf(filename, "%s.unused.ovl", fileprefix);
  FILE *file = fopen(filename, "w");
  assert(file != NULL);

#if 0
  for (uint32  ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if (utg == NULL)
      continue;

    for (uint32 fi=0; fi<utg->ufpath.size(); fi++) {
      ufNode  *frg = &utg->ufpath[fi];

      //  Where is our best overlap?  Contained or dovetail?

      BestEdgeOverlap *bestedge5 = OG->getBestEdgeOverlap(frg->ident, false);
      BestEdgeOverlap *bestedge3 = OG->getBestEdgeOverlap(frg->ident, true);

      int              bestident5 = 0;
      int              bestident3 = 0;

      if (bestedge5) {
        bestident5 = bestedge5->fragId();

        if ((bestident5 > 0) && (utg->fragIn(bestident5) != utg->id())) {
          omesg.aifrag          = frg->ident;
          omesg.bifrag          = bestident5;
          omesg.ahg             = bestedge5->ahang();
          omesg.bhg             = bestedge5->bhang();
          omesg.orientation.setIsUnknown();
          omesg.overlap_type    = AS_DOVETAIL;
          omesg.quality         = 0.0;
          omesg.min_offset      = 0;
          omesg.max_offset      = 0;
          omesg.polymorph_ct    = 0;
          omesg.alignment_trace = NULL;
#ifdef AS_MSG_USE_OVL_DELTA
          omesg.alignment_delta = NULL;
#endif

          //  This overlap is off of the 5' end of this fragment.
          if (bestedge5->frag3p() == false)
            omesg.orientation.setIsOuttie();
          if (bestedge5->frag3p() == true)
            omesg.orientation.setIsAnti();

          pmesg.t = MESG_OVL;
          pmesg.m = &omesg;

          WriteProtoMesg_AS(file, &pmesg);
        }
      }

      if (bestedge3) {
        bestident3 = bestedge3->fragId();

        if ((bestident3 > 0) && (utg->fragIn(bestident3) != utg->id())) {
          omesg.aifrag          = frg->ident;
          omesg.bifrag          = bestident3;
          omesg.ahg             = bestedge3->ahang();
          omesg.bhg             = bestedge3->bhang();
          omesg.orientation.setIsUnknown();
          omesg.overlap_type    = AS_DOVETAIL;
          omesg.quality         = 0.0;
          omesg.min_offset      = 0;
          omesg.max_offset      = 0;
          omesg.polymorph_ct    = 0;
          omesg.alignment_trace = NULL;
#ifdef AS_MSG_USE_OVL_DELTA
          omesg.alignment_delta = NULL;
#endif

          //  This overlap is off of the 3' end of this fragment.
          if (bestedge3->frag3p() == false)
            omesg.orientation.setIsNormal();
          if (bestedge3->frag3p() == true)
            omesg.orientation.setIsInnie();

          pmesg.t = MESG_OVL;
          pmesg.m = &omesg;

          WriteProtoMesg_AS(file, &pmesg);
        }
      }
    }
  }
#endif

  fclose(file);
}
Esempio n. 10
0
int
main(int argc, char **argv) {
    int            msglist[NUM_OF_REC_TYPES + 1];
    FILE          *outfile[NUM_OF_REC_TYPES + 1];
    off_t          count[NUM_OF_REC_TYPES + 1];
    off_t          size[NUM_OF_REC_TYPES + 1];
    int            i;

    for (i=0; i<=NUM_OF_REC_TYPES; i++) {
        msglist[i] = 0;
        outfile[i] = 0L;
        count[i]   = 0;
        size[i]    = 0;
    }

    int arg = 1;
    int inc = 0;
    int err = 0;
    int msg = 0;

    argc = AS_configure(argc, argv);

    while (arg < argc) {
        if        (strcmp(argv[arg], "-i") == 0) {
            inc = 1;
        } else if (strcmp(argv[arg], "-x") == 0) {
            inc = 0;
        } else if (strcmp(argv[arg], "-o") == 0) {
            errno = 0;
            FILE *F = fopen(argv[++arg], "w");
            if (errno)
                fprintf(stderr, "%s: failed to open output file '%s': %s\n", argv[0], argv[arg], strerror(errno)), exit(1);

            //  Depending on the include flag, we either write all messages
            //  listed in our msglist (or write all message not in the
            //  msglist) to the freshly opened file.
            //
            if (inc) {
                //  Include message i in the output if it was listed
                for (i=1; i<=NUM_OF_REC_TYPES; i++)
                    if ((outfile[i] == NULL) && (msglist[i] > 0))
                        outfile[i] = F;
            } else {
                //  Include message i in the output if it was not listed
                for (i=1; i<=NUM_OF_REC_TYPES; i++)
                    if ((outfile[i] == NULL) && (msglist[i] == 0))
                        outfile[i] = F;
            }

            for (i=0; i<=NUM_OF_REC_TYPES; i++)
                msglist[i] = 0;
        } else if (strcmp(argv[arg], "-m") == 0) {
            int type = GetMessageType(argv[++arg]);
            if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) {
                msglist[type]++;
                msg++;
            } else {
                fprintf(stderr, "%s: invalid message type '%s'.\n", argv[0], argv[arg]);
                err = 1;
            }
        } else if (strcmp(argv[arg], "-h") == 0) {
            err = 1;
        } else {
            int type = GetMessageType(argv[arg]);
            if ((type >= 1) && (type <= NUM_OF_REC_TYPES)) {
                msglist[type]++;
                msg++;
            } else {
                fprintf(stderr, "%s: invalid option '%s'.\n", argv[0], argv[arg]);
                err = 1;
            }
        }
        arg++;
    }

    if (err)
        usage(argv[0]), exit(1);

    //  Assume everything else goes to stdout.  We need to obey the inc
    //  flag, still, though.
    //
    if (inc) {
        //  Include message i in the output if it was listed
        for (i=1; i<=NUM_OF_REC_TYPES; i++)
            if ((outfile[i] == NULL) && (msglist[i] > 0))
                outfile[i] = stdout;
    } else {
        //  Include message i in the output if it was not listed
        for (i=1; i<=NUM_OF_REC_TYPES; i++)
            if ((outfile[i] == NULL) && (msglist[i] == 0))
                outfile[i] = stdout;
    }

    GenericMesg   *pmesg;
    off_t          currPos = 0;
    off_t          prevPos = 0;

    while (ReadProtoMesg_AS(stdin, &pmesg) != EOF) {
        assert(pmesg->t <= NUM_OF_REC_TYPES);

        currPos = AS_UTL_ftell(stdin);

        if (outfile[pmesg->t] != NULL) {
            count[pmesg->t]++;

            size[pmesg->t] += currPos - prevPos;

            WriteProtoMesg_AS(outfile[pmesg->t], pmesg);
        }

        prevPos = currPos;
    }

    for (i=0; i<=NUM_OF_REC_TYPES; i++)
        if (count[i] > 0)
            fprintf(stderr, "%s num "F_OFF_T" size "F_OFF_T" avg %f\n",
                    MessageTypeName[i], count[i], size[i], (double)size[i] / count[i]);

    exit(0);
}
Esempio n. 11
0
void
writeSLK(FILE *asmFile, bool doWrite) {
  SnapScaffoldLinkMesg slk;
  GenericMesg          pmesg = { &slk, MESG_SLK };
  GraphNodeIterator    scaffolds;
  CIScaffoldT         *scaffold;
  CIScaffoldT         *scafmate;

  fprintf(stderr, "writeSLK()--\n");

  InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT);
  while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) {
    GraphEdgeIterator    edges(ScaffoldGraph->ScaffoldGraph, scaffold->id, ALL_END, ALL_EDGES);
    CIEdgeT             *edge;
    CIEdgeT             *redge;

    while((edge = edges.nextMerged()) != NULL) {
      if (edge->idA != scaffold->id)
        continue;

      scafmate = GetGraphNode(ScaffoldGraph->ScaffoldGraph, edge->idB);

      assert(!isOverlapEdge(edge));

      slk.escaffold1       = SCFmap.lookup(scaffold->id);
      slk.escaffold2       = SCFmap.lookup(scafmate->id);

      slk.orientation      = edge->orient;

      slk.mean_distance    = edge->distance.mean;
      slk.std_deviation    = sqrt(edge->distance.variance);
      slk.num_contributing = edge->edgesContributing;

      int edgeTotal = slk.num_contributing;
      int edgeCount = 0;

      if(edgeTotal < 2)
        continue;

      slk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * slk.num_contributing);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal <= 1);    // sanity check

        if (edgeTotal == 1) {
          slk.jump_list[edgeCount].in1 = FRGmap.lookup(edge->fragA);
          slk.jump_list[edgeCount].in2 = FRGmap.lookup(edge->fragB);
        }else{
          slk.jump_list[edgeCount].in1 = AS_UID_undefined();
          slk.jump_list[edgeCount].in2 = AS_UID_undefined();
        }

        slk.jump_list[edgeCount].type.setIsMatePair();

        edgeCount++;

      } else {
        redge = edge;

        assert(redge->flags.bits.isRaw == FALSE);

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->ScaffoldGraph,redge->nextRawEdge);

          assert(!isOverlapEdge(redge));

          slk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          slk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          slk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(slk.jump_list);
    }
  }
}
Esempio n. 12
0
void
writeSCF(FILE *asmFile, bool doWrite) {
  SnapScaffoldMesg    scf;
  GenericMesg         pmesg = { &scf, MESG_SCF };
  GraphNodeIterator   scaffolds;
  CIScaffoldT        *scaffold;

  fprintf(stderr, "writeSCF()--\n");

  InitGraphNodeIterator(&scaffolds, ScaffoldGraph->ScaffoldGraph, GRAPH_NODE_DEFAULT);
  while ((scaffold = NextGraphNodeIterator(&scaffolds)) != NULL) {
    if(scaffold->type != REAL_SCAFFOLD)
      continue;

    assert(scaffold->info.Scaffold.numElements > 0);

    scf.eaccession       = AS_UID_fromInteger(getUID(uidServer));
    scf.iaccession       = scaffold->id;
    scf.num_contig_pairs = scaffold->info.Scaffold.numElements - 1;
    scf.contig_pairs     = (SnapContigPairs *)safe_malloc(sizeof(SnapContigPairs) * scaffold->info.Scaffold.numElements);

    CIScaffoldTIterator      contigs;
    ChunkInstanceT         *contigCurr;
    ChunkInstanceT         *contigLast;

    InitCIScaffoldTIterator(ScaffoldGraph, scaffold, TRUE, FALSE, &contigs);
    contigLast = NextCIScaffoldTIterator(&contigs);

    SequenceOrient  orientLast;
    SequenceOrient  orientCurr;

    orientLast.setIsForward(contigLast->offsetAEnd.mean < contigLast->offsetBEnd.mean);

    assert(contigLast->scaffoldID == scaffold->id);

    if (scf.num_contig_pairs == 0) {
      scf.contig_pairs[0].econtig1 = CCOmap.lookup(contigLast->id);
      scf.contig_pairs[0].econtig2 = CCOmap.lookup(contigLast->id);
      scf.contig_pairs[0].mean     = 0.0;
      scf.contig_pairs[0].stddev   = 0.0;
      scf.contig_pairs[0].orient.setIsAB_AB(); // got to put something

    } else {
      int32 pairCount = 0;

      while ((contigCurr = NextCIScaffoldTIterator(&contigs)) != NULL) {

        assert(pairCount < scf.num_contig_pairs);
        assert(contigCurr->scaffoldID == scaffold->id);

        scf.contig_pairs[pairCount].econtig1 = CCOmap.lookup(contigLast->id);
        scf.contig_pairs[pairCount].econtig2 = CCOmap.lookup(contigCurr->id);

        SequenceOrient orientCurr;

        orientCurr.setIsForward(contigCurr->offsetAEnd.mean < contigCurr->offsetBEnd.mean);

        if (orientLast.isForward()) {
          if (orientCurr.isForward()) {
            scf.contig_pairs[pairCount].mean   = contigCurr->offsetAEnd.mean - contigLast->offsetBEnd.mean;
            scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetAEnd.variance -
                                                      contigLast->offsetBEnd.variance);
            scf.contig_pairs[pairCount].orient.setIsAB_AB();
          } else {  //orientCurr == B_A
            scf.contig_pairs[pairCount].mean   = contigCurr->offsetBEnd.mean - contigLast->offsetBEnd.mean;
            scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetBEnd.variance -
                                                      contigLast->offsetBEnd.variance);
            scf.contig_pairs[pairCount].orient.setIsAB_BA();
          }
        } else {  //orientLast == B_A
          if (orientCurr.isForward()) {
            scf.contig_pairs[pairCount].mean   = contigCurr->offsetAEnd.mean - contigLast->offsetAEnd.mean;
            scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetAEnd.variance -
                                                      contigLast->offsetAEnd.variance);
            scf.contig_pairs[pairCount].orient.setIsBA_AB();
          } else {  //orientCurr == B_A
            scf.contig_pairs[pairCount].mean   = contigCurr->offsetBEnd.mean - contigLast->offsetAEnd.mean;
            scf.contig_pairs[pairCount].stddev = sqrt(contigCurr->offsetBEnd.variance -
                                                      contigLast->offsetAEnd.variance);
            scf.contig_pairs[pairCount].orient.setIsBA_BA();
          }
        }

        contigLast = contigCurr;
        orientLast = orientCurr;

        ++pairCount;
      }
    }

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    SCFmap.add(scf.iaccession, scf.eaccession);

    safe_free(scf.contig_pairs);
  }
}
Esempio n. 13
0
void
writeCLK(FILE *asmFile, bool doWrite) {
  SnapContigLinkMesg     clk;
  GenericMesg            pmesg = { &clk, MESG_CLK };
  GraphNodeIterator      nodes;
  ContigT               *ctg;

  fprintf(stderr, "writeCLK()--\n");

  InitGraphNodeIterator(&nodes, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((ctg = NextGraphNodeIterator(&nodes)) != NULL) {

    if (ctg->flags.bits.isChaff)
      continue;

    if (SurrogatedSingleUnitigContig(ctg))
      continue;

    GraphEdgeIterator  edges(ScaffoldGraph->ContigGraph, ctg->id, ALL_END, ALL_EDGES);
    CIEdgeT           *edge;

    while((edge = edges.nextMerged()) != NULL){

      if (edge->idA != ctg->id)
        continue;

      ContigT *mate = GetGraphNode(ScaffoldGraph->ContigGraph, edge->idB);

      if(mate->flags.bits.isChaff)
        continue;

      if (SurrogatedSingleUnitigContig(mate))
        continue;

      clk.econtig1 = CCOmap.lookup(edge->idA);
      clk.econtig2 = CCOmap.lookup(edge->idB);

      clk.orientation = edge->orient;  //  Don't need to map orientation, always using canonical orientation

      clk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP;

      switch (GetEdgeStatus(edge)) {
        case LARGE_VARIANCE_EDGE_STATUS:
        case UNKNOWN_EDGE_STATUS:
        case INTER_SCAFFOLD_EDGE_STATUS:
          clk.status = AS_UNKNOWN_IN_ASSEMBLY;
          break;

        case TENTATIVE_TRUSTED_EDGE_STATUS:
        case TRUSTED_EDGE_STATUS:
          clk.status = AS_IN_ASSEMBLY;
          break;

        case TENTATIVE_UNTRUSTED_EDGE_STATUS:
        case UNTRUSTED_EDGE_STATUS:
          clk.status = AS_BAD;
          break;

        default:
          assert(0 /* Invalid edge status */);
      }

      clk.is_possible_chimera = edge->flags.bits.isPossibleChimera;
      clk.mean_distance       = edge->distance.mean;
      clk.std_deviation       = sqrt(edge->distance.variance);
      clk.num_contributing    = edge->edgesContributing;

      uint32 edgeCount = 0;
      uint32 edgeTotal = clk.num_contributing;

      if ((edgeTotal == 1) &&
          (clk.overlap_type == AS_OVERLAP) &&
          (GlobalData->outputOverlapOnlyContigEdges == FALSE))
        // don't output pure overlap edges
        continue;

      clk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal == 1);

        if (clk.overlap_type == AS_NO_OVERLAP) {
          clk.jump_list[edgeCount].in1  = FRGmap.lookup(edge->fragA);
          clk.jump_list[edgeCount].in2  = FRGmap.lookup(edge->fragB);
          clk.jump_list[edgeCount].type.setIsMatePair();
        } else {
          assert(GlobalData->outputOverlapOnlyContigEdges);
          clk.jump_list[edgeCount].in1  = AS_UID_undefined();
          clk.jump_list[edgeCount].in2  = AS_UID_undefined();
          clk.jump_list[edgeCount].type.setIsOverlap();
        }

        edgeCount++;

      } else {
        CIEdgeT *redge = edge;

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->ContigGraph, redge->nextRawEdge);

          if (isOverlapEdge(redge)) {
            // overlap edges don't count
            edgeTotal--;
            continue;
          }

          clk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          clk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          clk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(clk.jump_list);
    }
  }
}
Esempio n. 14
0
void
writeCCO(FILE *asmFile, bool doWrite) {
  SnapConConMesg      cco;
  GenericMesg         pmesg = { &cco, MESG_CCO };
  GraphNodeIterator   contigs;
  ContigT             *contig;

  fprintf(stderr, "writeCCO()--\n");

  InitGraphNodeIterator(&contigs, ScaffoldGraph->ContigGraph, GRAPH_NODE_DEFAULT);
  while ((contig = NextGraphNodeIterator(&contigs)) != NULL) {
    assert(contig->id >= 0);
    assert(contig->id < GetNumGraphNodes(ScaffoldGraph->ContigGraph));

    if (contig->flags.bits.isChaff)
      continue;

    NodeCGW_T *unitig = GetGraphNode(ScaffoldGraph->CIGraph, contig->info.Contig.AEndCI);

    if ((ScaffoldGraph->tigStore->getNumUnitigs(contig->id, FALSE) == 1) &&
        (contig->scaffoldID == NULLINDEX) &&
        (unitig->info.CI.numInstances > 0))
      //  Contig is a surrogate instance
      continue;

    MultiAlignT *ma = ScaffoldGraph->tigStore->loadMultiAlign(contig->id, FALSE);

    cco.eaccession  = AS_UID_fromInteger(getUID(uidServer));
    cco.iaccession  = contig->id;
    cco.placed      = ScaffoldGraph->tigStore->getContigStatus(contig->id);
    cco.length      = GetMultiAlignLength(ma);
    cco.consensus   = Getchar(ma->consensus, 0);
    cco.quality     = Getchar(ma->quality, 0);
    cco.forced      = 0;
    cco.num_pieces  = GetNumIntMultiPoss(ma->f_list);
    cco.num_unitigs = GetNumIntMultiPoss(ma->u_list);
    cco.num_vars    = GetNumIntMultiPoss(ma->v_list);
    cco.pieces      = NULL;
    cco.unitigs     = NULL;
    cco.vars        = NULL;

    if (cco.consensus == NULL)
      fprintf(stderr, "buildCCOMessage()-- contig %d missing consensus sequence\n",
              cco.iaccession);
    assert(cco.consensus != NULL);
    if (cco.length != strlen(cco.consensus))
      fprintf(stderr, "buildCCOMessage()-- contig %d length %d != consensus string length "F_SIZE_T"\n",
              cco.iaccession, cco.length, strlen(cco.consensus));
    assert(cco.length == strlen(cco.consensus));

    if (cco.num_pieces > 0) {
      cco.pieces = (SnapMultiPos *)safe_malloc(cco.num_pieces * sizeof(SnapMultiPos));

      for(int32 i=0; i<cco.num_pieces; i++) {
        IntMultiPos *imp = GetIntMultiPos(ma->f_list, i);

        cco.pieces[i].type         = imp->type;
        cco.pieces[i].eident       = FRGmap.lookup(imp->ident);
        cco.pieces[i].delta_length = imp->delta_length;
        cco.pieces[i].position     = imp->position;
        cco.pieces[i].delta        = imp->delta;
      }
    }

    if (cco.num_unitigs > 0) {
      cco.unitigs = (UnitigPos *)safe_malloc(cco.num_unitigs * sizeof(UnitigPos));

      for(int32 i=0; i<cco.num_unitigs; i++) {
        IntUnitigPos *imp = GetIntUnitigPos(ma->u_list, i);

        cco.unitigs[i].type         = imp->type;
        cco.unitigs[i].eident       = UTGmap.lookup(imp->ident);
        cco.unitigs[i].position     = imp->position;
        cco.unitigs[i].delta        = imp->delta;
        cco.unitigs[i].delta_length = imp->delta_length;
      }
    }

    if (cco.num_vars > 0) {
      cco.vars = (IntMultiVar *)safe_malloc(cco.num_vars * sizeof(IntMultiVar));

      for(int32 i=0; i<cco.num_vars; i++) {
        IntMultiVar *imv = GetIntMultiVar(ma->v_list, i);

        cco.vars[i].var_id                = imv->var_id;
        cco.vars[i].phased_id             = imv->phased_id;

        cco.vars[i].position              = imv->position;
        cco.vars[i].num_reads             = imv->num_reads;
        cco.vars[i].num_alleles           = imv->num_alleles;
        cco.vars[i].num_alleles_confirmed = imv->num_alleles_confirmed;
        cco.vars[i].min_anchor_size       = imv->min_anchor_size;
        cco.vars[i].var_length            = imv->var_length;

        cco.vars[i].alleles               = imv->alleles;
        cco.vars[i].var_seq_memory        = imv->var_seq_memory;
        cco.vars[i].read_id_memory        = imv->read_id_memory;

        cco.vars[i].enc_num_reads         = NULL;
        cco.vars[i].enc_weights           = NULL;
        cco.vars[i].enc_var_seq           = NULL;
        cco.vars[i].enc_read_ids          = NULL;
      }
    }

    if (doWrite)
      WriteProtoMesg_AS(asmFile, &pmesg);

    safe_free(cco.pieces);
    safe_free(cco.unitigs);
    safe_free(cco.vars);

    CCOmap.add(cco.iaccession, cco.eaccession);
  }
}
Esempio n. 15
0
void
writeULK(FILE *asmFile, bool doWrite) {
  SnapUnitigLinkMesg   ulk;
  GenericMesg          pmesg = { & ulk, MESG_ULK };
  GraphNodeIterator    nodes;
  ChunkInstanceT      *ci;

  fprintf(stderr, "writeULK()--\n");

  InitGraphNodeIterator(&nodes, ScaffoldGraph->CIGraph, GRAPH_NODE_DEFAULT);
  while ((ci = NextGraphNodeIterator(&nodes)) != NULL) {
    assert(ci->type != CONTIG_CGW);

    if (ci->type == RESOLVEDREPEATCHUNK_CGW)
      continue;

    if (ci->flags.bits.isChaff)
      continue;

    GraphEdgeIterator  edges(ScaffoldGraph->CIGraph, ci->id, ALL_END, ALL_EDGES);
    CIEdgeT           *edge;

    while ((edge = edges.nextMerged()) != NULL) {

      if (edge->idA != ci->id ||
          edge->flags.bits.isInferred ||
          edge->flags.bits.isInferredRemoved ||
          edge->flags.bits.isMarkedForDeletion)
        continue;

      ChunkInstanceT *mi = GetGraphNode(ScaffoldGraph->CIGraph, edge->idB);

      if (mi->flags.bits.isChaff)
        continue;

      ulk.eunitig1 = UTGmap.lookup(edge->idA);  //  == ci->id
      ulk.eunitig2 = UTGmap.lookup(edge->idB);

      ulk.orientation = edge->orient;  //  Don't need to map orientation, always using canonical orientation

      ulk.overlap_type = (isOverlapEdge(edge)) ? AS_OVERLAP : AS_NO_OVERLAP;

      ulk.is_possible_chimera = edge->flags.bits.isPossibleChimera;
      ulk.mean_distance       = edge->distance.mean;
      ulk.std_deviation       = sqrt(edge->distance.variance);
      ulk.num_contributing    = edge->edgesContributing;
      ulk.status              = AS_UNKNOWN_IN_ASSEMBLY;

      uint32  edgeCount = 0;
      uint32  edgeTotal = ulk.num_contributing;

      if ((edgeTotal == 1) && (ulk.overlap_type == AS_OVERLAP))
        // don't output pure overlap edges
        continue;

      // Look through the fragment pairs in this edge to decide the status of the link.

      CIEdgeT *redge = (edge->flags.bits.isRaw) ? edge : GetGraphEdge(ScaffoldGraph->CIGraph, edge->nextRawEdge);

      int numBad     = 0;
      int numGood    = 0;
      int numUnknown = 0;

      for (; redge != NULL; redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge)) {
        if(isOverlapEdge(redge))
          continue;

        CIFragT *fragA = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragA);
        CIFragT *fragB = GetCIFragT(ScaffoldGraph->CIFrags, redge->fragB);

        assert(fragA->flags.bits.edgeStatus == fragB->flags.bits.edgeStatus);

        if ((fragA->flags.bits.edgeStatus == UNTRUSTED_EDGE_STATUS) ||
            (fragA->flags.bits.edgeStatus == TENTATIVE_UNTRUSTED_EDGE_STATUS))
          numBad++;

        else if ((fragA->flags.bits.edgeStatus == TRUSTED_EDGE_STATUS) ||
                 (fragA->flags.bits.edgeStatus == TENTATIVE_TRUSTED_EDGE_STATUS))
          numGood++;

        else
          numUnknown++;
      }

      if (numBad > 0)
        ulk.status = AS_BAD;

      else if (numGood > 0)
        ulk.status = AS_IN_ASSEMBLY;

      else
        ulk.status = AS_UNKNOWN_IN_ASSEMBLY;

      ulk.jump_list = (SnapMate_Pairs *)safe_malloc(sizeof(SnapMate_Pairs) * edgeTotal);

      if (edge->flags.bits.isRaw) {
        assert(edgeTotal == 1);

        ulk.jump_list[edgeCount].in1  = FRGmap.lookup(edge->fragA);
        ulk.jump_list[edgeCount].in2  = FRGmap.lookup(edge->fragB);
        ulk.jump_list[edgeCount].type.setIsMatePair();

        edgeCount++;
      } else {
        assert(edgeTotal > 0);

        redge = edge;

        assert(redge->nextRawEdge != NULLINDEX); // must have >= 1 raw edge

        while (redge->nextRawEdge != NULLINDEX) {
          redge = GetGraphEdge(ScaffoldGraph->CIGraph, redge->nextRawEdge);

          if (isOverlapEdge(redge)) {
            // overlap edges don't count
            edgeTotal--;
            continue;
          }

          ulk.jump_list[edgeCount].in1  = FRGmap.lookup(redge->fragA);
          ulk.jump_list[edgeCount].in2  = FRGmap.lookup(redge->fragB);
          ulk.jump_list[edgeCount].type.setIsMatePair();

          edgeCount++;
        }
      }

      assert(edgeCount == edgeTotal);

      if (doWrite)
        WriteProtoMesg_AS(asmFile, &pmesg);

      safe_free(ulk.jump_list);
    }
  }
}
Esempio n. 16
0
int32
main(int32 argc, char **argv) {
   int32 arg = 1;
   int32 err = 0;
   int32 hlp = 0;

   char * gkpStoreName  = NULL;
   int32  gkpStorePart  = 0;
   char * msgFile       = NULL;
   char * outputFileName= NULL;
   char * seqAn         = NULL;
   char * wrkDir        = NULL;
   char * seqStoreName  = NULL;
   int32  seqStoreVer   = 0;
   int32  seqStorePart  = 0;     

   argc = AS_configure(argc, argv);

   while (arg < argc) {
      if (strcmp(argv[arg], "-c") == 0) {
         msgFile = argv[++arg];
      } else if (strcmp(argv[arg], "-G") == 0) {
         gkpStoreName = argv[++arg];
      } else if (strcmp(argv[arg], "-S") == 0) {
         gkpStorePart = atoi(argv[++arg]);
      } else if (strcmp(argv[arg], "-o") == 0) {
         outputFileName = argv[++arg];
      } else if (strcmp(argv[arg], "-s") == 0) {
         seqAn = argv[++arg];
      } else if (strcmp(argv[arg], "-w") == 0) {
         wrkDir = argv[++arg];
      } else if (strcmp(argv[arg], "-u") == 0) {
         seqStoreName = argv[++arg];
      } else if (strcmp(argv[arg], "-V") == 0) {
         seqStoreVer = atoi(argv[++arg]);
      } else if (strcmp(argv[arg], "-p") == 0) {
         seqStorePart = atoi(argv[++arg]);
      } else {
         err++;
      }
      arg++;
   }

   if ((err) || (gkpStoreName == NULL) || (msgFile == NULL) || (outputFileName == NULL) || seqAn == NULL) {
      fprintf(stderr, "USAGE: SeqAn_CNS -G <gkpStore> -c <input.cgb> -o <output.cgi> -s <seqan_executable> [-u seqstore, required for contig consensus] [-w working directory]\n");      
      exit(1);
   }

   gkStore        *gkpStore = new gkStore(gkpStoreName, FALSE, FALSE);

   gkpStore->gkStore_loadPartition(gkpStorePart);
   
   gkFragment      fr;
   GenericMesg    *pmesg;
   tSequenceDB    *sequenceDB = NULL;   

   FILE *infp = fopen(msgFile,"r");
   FILE *tempReads;
   FILE *outfp = fopen(outputFileName, "w");
   char fileName[AS_SEQAN_MAX_BUFFER_LENGTH];
   char *prefix = outputFileName;
   getFileName(prefix, wrkDir, AS_SEQAN_INPUT_NAME, fileName);

   int32 i = 0;
   
   while ((EOF != ReadProtoMesg_AS(infp, &pmesg))) {
      int32 freeMem = 0;
     
      if (pmesg->t == MESG_IUM) {
         IntUnitigMesg *ium_mesg = (IntUnitigMesg *)pmesg->m;         
         
         if (strlen(ium_mesg->consensus) == 0) {
            tempReads = fopen(fileName,"w");

            for (i =0; i < ium_mesg->num_frags; i++) {
               // get the fragment sequence
               gkpStore->gkStore_getFragment(ium_mesg->f_list[i].ident, &fr, GKFRAGMENT_QLT);
               uint32   clrBeg = fr.gkFragment_getClearRegionBegin();
               uint32   clrEnd = fr.gkFragment_getClearRegionEnd  ();
               char    *seqStart = fr.gkFragment_getSequence();
               char     *seq      = seqStart+clrBeg;

               seq[clrEnd] = 0;
               AS_UTL_writeFastA(tempReads,
                  seq, clrEnd-clrBeg,
                   ">"F_IID","F_IID"\n", ium_mesg->f_list[i].position.bgn, ium_mesg->f_list[i].position.end);
            }
            fclose(tempReads);
            updateRecord(ium_mesg, fileName, seqAn, prefix, wrkDir);
            freeMem = 1;
         }
         WriteProtoMesg_AS(outfp, pmesg);

         if (freeMem) {
            safe_free(ium_mesg->consensus);
            safe_free(ium_mesg->quality);
         }
      }
      else if (pmesg->t == MESG_ICM) {         
         IntConConMesg *icm_mesg = (IntConConMesg *)pmesg->m;

         if (seqStoreName == NULL) {
            fprintf(stderr, "USAGE: The -u option is required for contig consensus\n");
            exit(1);
         }
         if (sequenceDB == NULL) {
            sequenceDB = openSequenceDB(seqStoreName, FALSE, seqStoreVer);
            openSequenceDBPartition(sequenceDB, seqStorePart);
         }
         
         if (strlen(icm_mesg->consensus) == 0) {
            tempReads = fopen(fileName,"w");

            for (i =0; i < icm_mesg->num_pieces; i++) {
               // get the fragment sequence
               gkpStore->gkStore_getFragment(icm_mesg->pieces[i].ident, &fr, GKFRAGMENT_QLT);
               uint32   clrBeg   = fr.gkFragment_getClearRegionBegin();
               uint32   clrEnd   = fr.gkFragment_getClearRegionEnd  ();
               char    *seqStart = fr.gkFragment_getSequence();
               char    *seq      = seqStart+clrBeg;

               seq[clrEnd] = 0;
               AS_UTL_writeFastA(tempReads,
                  seq, clrEnd-clrBeg,
                   ">"F_IID","F_IID"\n", icm_mesg->pieces[i].position.bgn, icm_mesg->pieces[i].position.end);
            }
            
            // now handle the unitig messages
            for (i =0; i < icm_mesg->num_unitigs; i++) {
               VA_TYPE(char) *ungappedSequence = CreateVA_char(0);
               VA_TYPE(char) *ungappedQuality  = CreateVA_char(0);
               MultiAlignT *uma = loadMultiAlignTFromSequenceDB(sequenceDB, icm_mesg->unitigs[i].ident, 1);
               assert(uma != NULL);
               
               GetMultiAlignUngappedConsensus(uma, ungappedSequence, ungappedQuality);
               char * seq = Getchar(ungappedSequence,0);

               AS_UTL_writeFastA(tempReads,
                  seq, strlen(seq),
                   ">"F_IID","F_IID"\n", icm_mesg->unitigs[i].position.bgn, icm_mesg->unitigs[i].position.end);
            }
            fclose(tempReads);

            updateICMRecord(icm_mesg, fileName, seqAn, prefix, wrkDir);
            freeMem = 1;
         }
         WriteProtoMesg_AS(outfp, pmesg);

         if (freeMem) {
            safe_free(icm_mesg->consensus);
            safe_free(icm_mesg->quality);
         }
      }
   }
      
   fclose(infp);
   fclose(outfp);

   return 0;
}