コード例 #1
0
void
mappedLengths(atacFile &AF, atacMatchList &matches, seqCache *A, seqCache *B, char *prefix) {
  histogram  h1(100, 1000000);
  histogram  h2(100, 1000000);

  //  For the coverage to work correctly, we need to either have one
  //  intervalList per input sequence, or build a table of the chained
  //  sequence positions.
  //
  uint64  *offset1 = buildOffset(AF.fastaA());
  uint64  *offset2 = buildOffset(AF.fastaB());

  intervalList<uint64>  intervalA;
  intervalList<uint64>  intervalB;

  for (uint32 m=0; m<matches.numberOfMatches(); m++) {
    intervalA.add(offset1[matches[m]->iid1] + (uint64)matches[m]->pos1, (uint64)matches[m]->len1);
    intervalB.add(offset2[matches[m]->iid2] + (uint64)matches[m]->pos2, (uint64)matches[m]->len2);

    h1.add(matches[m]->len1);
    h2.add(matches[m]->len2);
  }

  fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)matches.numberOfMatches());

  fprintf(stdout, "matchLength   %s "uint64FMT"  %s "uint64FMT" # Sum of lengths of sequence in matches\n",
          AF.labelA(), (uint64)intervalA.sumOfLengths(),
          AF.labelB(), (uint64)intervalB.sumOfLengths());

  h1.show("AmatchLength");
  h2.show("BmatchLength");
  h1.dump(prefix, "AmatchLength");    h1.plot(prefix, "AmatchLength");
  h2.dump(prefix, "BmatchLength");    h2.plot(prefix, "BmatchLength");

  intervalA.merge();
  intervalB.merge();

  fprintf(stdout, "coveredLength  %s "uint64FMT"  %s "uint64FMT" # sequence covered by a match, including N\n",
          AF.labelA(), (uint64)intervalA.sumOfLengths(),
          AF.labelB(), (uint64)intervalB.sumOfLengths());

  fprintf(stdout, "coveredLength  %s "uint64FMT"  %s "uint64FMT" # sequence covered by a match, ACGT only (new)\n",
          AF.labelA(), tandemRepeatACGTLength(intervalA, offset1, A),
          AF.labelB(), tandemRepeatACGTLength(intervalB, offset2, B));

  delete [] offset1;
  delete [] offset2;
}
コード例 #2
0
ファイル: MyVector.cpp プロジェクト: eaulisa/MyFEMuS
 // ******************
 template <class Type> void MyVector<Type>::scatter() {
   buildOffset();
   scatter(_offset);
 }
コード例 #3
0
void
tandemRepeatStats(atacFileStream   &featuresA,
                  atacFileStream   &featuresB,
                  atacFile         &AF,
                  seqCache         *A,
                  seqCache         *B) {
  intervalList<uint64>  ifa, ifb;
  intervalList<uint64>  ima, imb;
  intervalList<uint64>  mma, mmb;

  atacMatchList    &matches = *AF.matches();

  uint64  *offset1 = buildOffset(A);
  uint64  *offset2 = buildOffset(B);

  //  ifa, ifb are intervalLists, storing the intervals labeled as
  //  tandem repeats.  They are using the offset[] to encode the
  //  entire sequence as one consecutive string.
  //
  atacFeature  *f = 0L;
  while ((f = featuresA.nextFeature("tr")) != 0L)
    ifa.add(offset1[f->iid] + f->pos, f->len);
  while ((f = featuresB.nextFeature("tr")) != 0L)
    ifb.add(offset2[f->iid] + f->pos, f->len);


  //  ima, imb, like if?, encode the matches in one string.
  //
  for (uint32 m=0; m<matches.numberOfMatches(); m++)
    ima.add(offset1[matches[m]->iid1] + (uint64)matches[m]->pos1, (uint64)matches[m]->len1);
  for (uint32 m=0; m<matches.numberOfMatches(); m++)
    imb.add(offset2[matches[m]->iid2] + (uint64)matches[m]->pos2, (uint64)matches[m]->len2);


  fprintf(stdout, "\nTANDEM REPEATS in %s\n", AF.labelA());
  fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)ifa.numberOfIntervals());
  fprintf(stdout, "totalLength   "uint64FMT" # sum of lengths of all features\n", ifa.sumOfLengths());
  ifa.merge();
  fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions\n", (uint64)ifa.numberOfIntervals());
  fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, including N\n", ifa.sumOfLengths());
  fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, ACGT only\n", tandemRepeatACGTLength(ifa, offset1, A));
  mma.intersect(ifa, ima);
  fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions, only in matches\n", (uint64)mma.numberOfIntervals());
  fprintf(stdout, "inMatches     "uint64FMT" # sequence covered by a feature and in a match, including N\n", mma.sumOfLengths());
  fprintf(stdout, "inMatches     "uint64FMT" # sequence covered by a feature and in a match, ACGT only\n", tandemRepeatACGTLength(mma, offset1, A));


  fprintf(stdout, "\nTANDEM REPEATS in %s\n", AF.labelB());
  fprintf(stdout, "numberOfItems "uint64FMT"\n", (uint64)ifb.numberOfIntervals());
  fprintf(stdout, "totalLength   "uint64FMT" # sum of lengths of all features\n", ifb.sumOfLengths());
  ifb.merge();
  fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions\n", (uint64)ifb.numberOfIntervals());
  fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, including N\n", ifb.sumOfLengths());
  fprintf(stdout, "coveredLength "uint64FMT" # sequence covered by a feature, ACGT only\n", tandemRepeatACGTLength(ifb, offset2, B));
  mmb.intersect(ifb, imb);
  fprintf(stdout, "numberOfItems "uint64FMT" # after merging overlapping regions, only in matches\n", (uint64)mmb.numberOfIntervals());
  fprintf(stdout, "inMatches     "uint64FMT" # sequence covered by a feature and in a match, including N\n", mmb.sumOfLengths());
  fprintf(stdout, "inMatches     "uint64FMT" # sequence covered by a feature and in a match, ACGT only\n", tandemRepeatACGTLength(mmb, offset2, B));

  delete [] offset1;
  delete [] offset2;
}