예제 #1
0
u64bit
existDB::count(u64bit mer) {
  u64bit c, h, st, ed;

  if (_counts == 0L)
    return(0);

  if (_compressedHash) {
    h  = HASH(mer) * _hshWidth;
    st = getDecodedValue(_hashTable, h,             _hshWidth);
    ed = getDecodedValue(_hashTable, h + _hshWidth, _hshWidth);
  } else {
    h  = HASH(mer);
    st = _hashTable[h];
    ed = _hashTable[h+1];
  }

  if (st == ed)
    return(0);

  c = CHECK(mer);

  if (_compressedBucket) {
    st *= _chkWidth;
    ed *= _chkWidth;

    for (; st<ed; st += _chkWidth) {
      if (getDecodedValue(_buckets, st, _chkWidth) == c)
        goto returncount;
    }
  } else {
    for (; st<ed; st++) {
      if (_buckets[st] == c)
        goto returncount;
    }
  }

  return(0);

 returncount:
  if (_compressedCounts)
    return(getDecodedValue(_counts, st * _cntWidth, _cntWidth));
  else
    return(_counts[st]);
}
예제 #2
0
void
positionDB::dump(char *name) {
  uint64  sizs[4] = {_chckWidth, _pptrWidth, 1, _sizeWidth};
  uint64  vals[4] = {0, 0, 0, 0};
  FILE   *F = fopen(name, "w");

  for (uint64 h=0; h<_tableSizeInEntries; h++) {
    uint64 st, ed;

    if (_hashTable_BP) {
      st = getDecodedValue(_hashTable_BP, h * _hashWidth,              _hashWidth);
      ed = getDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth);
    } else {
      st = _hashTable_FW[h];
      ed = _hashTable_FW[h+1];
    }

    fprintf(F, "B "uint64FMT" "uint64FMT"-"uint64FMT"\n", h, st, ed);

    while (st < ed) {
      uint64     cb = st * _wFin;

      getDecodedValues(_buckets, cb, (_sizeWidth == 0) ? 3 : 4, sizs, vals);

      fprintf(F, "%c chk="uint64HEX" pos="uint64FMT" siz="uint64FMT,
              (vals[2] == 0) ? 'D' : 'U', vals[0], vals[1], vals[3]);

      if (vals[2] == 0) {
        uint64 pos = vals[1] * _posnWidth;
        uint64 len = getDecodedValue(_positions, pos, _posnWidth);

        for (pos += _posnWidth; len > 0; pos += _posnWidth, len--)
          fprintf(F, " "uint64FMT, getDecodedValue(_positions, pos, _posnWidth));
      }

      fprintf(F, "\n");

      st++;
    }
  }

  fclose(F);
}
예제 #3
0
bool
existDB::exists(u64bit mer) {
  u64bit c, h, st, ed;

  if (_compressedHash) {
    h  = HASH(mer) * _hshWidth;
    st = getDecodedValue(_hashTable, h,             _hshWidth);
    ed = getDecodedValue(_hashTable, h + _hshWidth, _hshWidth);
  } else {
    h  = HASH(mer);
    st = _hashTable[h];
    ed = _hashTable[h+1];
  }

  if (st == ed)
    return(false);

  c = CHECK(mer);

  if (_compressedBucket) {
    st *= _chkWidth;
    ed *= _chkWidth;

    for (; st<ed; st += _chkWidth) {
      if (getDecodedValue(_buckets, st, _chkWidth) == c)
        return(true);
    }
  } else {
    for (; st<ed; st++) {
      if (_buckets[st] == c)
        return(true);
    }
  }

  return(false);
}
//  Returns hits with _AT_MOST_ numMismatches mistakes.
bool
positionDB::getUpToNMismatches(uint64   mer,
                               uint32   numMismatches,
                               uint64*& posn,
                               uint64&  posnMax,
                               uint64&  posnLen) {

  PREFETCH(_hashedErrors);  //  Slightly better.

  posnLen = 0;

  if (_hashedErrors == 0L) {
    fprintf(stderr, "ERROR:  Nobody initialized getUpToNMismatches() by calling setUpMismatchMatcher().\n");
    exit(1);
  }

  if (posnMax == 0) {
    posnMax = 16384;
    try {
      posn    = new uint64 [posnMax];
    } catch (...) {
      fprintf(stderr, "positionDB::getUpToNMismatches()-- Can't allocate space for initial positions, requested "uint64FMT" uint64's.\n", posnMax);
      abort();
    }
  }

  uint64  orig = HASH(mer);

  //  Optimization that didn't work.  The idea was to compute all the
  //  hashes with errors, then sort to gain better cache locality in
  //  the lookups.  The sort dominated.
  //
  //  Another: Surprisingly, theq two getDecodedValue calls are faster
  //  than a single getDecodedValues.

  for (uint32 e=0; e<_hashedErrorsLen; e++) {
    uint64 hash = orig ^ _hashedErrors[e];
    uint64 st, ed;

    if (_hashTable_BP) {
      st = getDecodedValue(_hashTable_BP, hash * _hashWidth,              _hashWidth);
      ed = getDecodedValue(_hashTable_BP, hash * _hashWidth + _hashWidth, _hashWidth);
    } else {
      st = _hashTable_FW[hash];
      ed = _hashTable_FW[hash+1];
    }

    assert((_hashedErrors[e] & ~_hashMask) == 0);
    assert((hash             & ~_hashMask) == 0);

    //  Rebuild the mer from the hash and its check code.
    //
    //  Compare the rebuilt mer and the original mer -- if there are
    //  exactly N errors, it's a hit!  (if there are fewer than N,
    //  we'll find it when we look for N-1 errors).
    //
    //  Before rebuilding, compute diffs on the chckBits only -- if
    //  things are wildly different (the usual case) we'll get
    //  enough difference here to abort.  Remember, the chck bits
    //  are not encoded, they're an exact copy from the unhashed
    //  mer.

    if (st != ed) {
      for (uint64 i=ed-st, J=st * _wFin; i--; J += _wFin) {
        uint64 chck  = getDecodedValue(_buckets, J, _chckWidth);
        uint64 diffs = chck ^ (mer & _mask2);
        uint64 d1    = diffs & uint64NUMBER(0x5555555555555555);
        uint64 d2    = diffs & uint64NUMBER(0xaaaaaaaaaaaaaaaa);
        uint64 err   = countNumberOfSetBits64(d1 | (d2 >> 1));

        if (err <= numMismatches) {
          diffs = REBUILD(hash, chck) ^ mer;
          d1    = diffs & uint64NUMBER(0x5555555555555555);
          d2    = diffs & uint64NUMBER(0xaaaaaaaaaaaaaaaa);
          err   = countNumberOfSetBits64(d1 | (d2 >> 1));

          if (err <= numMismatches)
            //  err is junk, just need a parameter here
            loadPositions(J, posn, posnMax, posnLen, err);
        }
      }
    }
  }
예제 #5
0
void
testBinaryEncoding(void) {
  time_t     mtseed     = time(0L);
  mt_s      *mtctx      = 0L;

  uint32     iterations = TEST_LENGTH;

  uint64    *bits        = new uint64 [iterations + 2];
  uint64     bpos        = uint64ZERO;

  uint64    *V           = new uint64 [iterations];
  uint64    *C           = new uint64 [iterations];
  uint64    *S           = new uint64 [iterations];

  uint32     failed      = 0;
  uint32     errors      = 0;

  fprintf(stderr, "Starting test of binary encoding\n");

  bpos   = uint64ZERO;
  mtctx = mtInit(mtseed);

  //  Build some values to stuff into the bits

  for (uint32 j=0; j < iterations; j++) {
    S[j] = (mtRandom32(mtctx) % 63) + 1;
    V[j] = mtRandom64(mtctx) & uint64MASK(S[j]);
    //fprintf(stderr, "[%2d] S="uint64FMT" V="uint64HEX"\n", j, S[j], V[j]);
  }

  //  Stuff them in, in blocks of some size.  At the same time, decode
  //  (this has found bugs in the past).

  failed = 0;
  for (uint32 j=0; j < iterations; ) {
    uint64 num = (mtRandom32(mtctx) % 8);

    if (j + num > iterations)
      num = iterations - j;

    if (num == 0) {
      setDecodedValue(bits, bpos, S[j], V[j]);
      C[j] = getDecodedValue(bits, bpos, S[j]);
      //fprintf(stderr, "[%2d] V="uint64HEX" C="uint64HEX" single\n", j, V[j], C[j]);
      bpos += S[j];
    } else {
      uint64 newp1 = setDecodedValues(bits, bpos, num, S+j, V+j);
      uint64 newp2 = getDecodedValues(bits, bpos, num, S+j, C+j);

      if (newp1 != newp2) {
        //  not perfect; we should be checking the values too, but we do that later.
        for (uint32 x=0; x<num; x++)
          fprintf(stderr, "[%2d] #1 V="uint64HEX" C="uint64HEX" multiple "uint32FMT" %s\n",
                  j+x, V[j+x], C[j+x], num, (V[j+x] == C[j+x]) ? "" : "FAILED");
        failed++;
      }

      bpos = newp2;
    }

    j += num;
    if (num == 0)
      j++;
  }
  if (failed) {
    fprintf(stderr, "binEncoding #1 failed encoding "uint32FMT" times.\n", failed);
    errors++;
  }

  //  Check that V == C

  failed = 0;
  for (uint32 j=0; j<iterations; j++) {
    if (V[j] != C[j]) {
      fprintf(stderr, "[%2d] #2 V="uint64HEX" C="uint64HEX" S="uint32FMT"\n",
              j, V[j], C[j], S[j]);
      failed++;
    }
  }
  if (failed) {
    fprintf(stderr, "binEncoding #2 failed encode/decode "uint32FMT" times.\n", failed);
    errors++;
  }


  //  Decode independently, with different nums

  bpos = 0;  //  reset to start of bits

  for (uint32 j=0; j < iterations; ) {
    uint64 num = (mtRandom32(mtctx) % 8);

    if (j + num > iterations)
      num = iterations - j;

    if (num == 0) {
      C[j] = getDecodedValue(bits, bpos, S[j]);
      bpos += S[j];
    } else {
      bpos = getDecodedValues(bits, bpos, num, S+j, C+j);
    }

    j += num;
    if (num == 0)
      j++;
  }

  //  Check that V == C

  failed = 0;
  for (uint32 j=0; j<iterations; j++) {
    if (V[j] != C[j]) {
      fprintf(stderr, "[%2d] #3 V="uint64HEX" C="uint64HEX" S="uint32FMT"\n",
              j, V[j], C[j], S[j]);
      failed++;
    }
  }
  if (failed) {
    fprintf(stderr, "binEncoding #3 failed decoding "uint32FMT" times.\n", failed);
    errors++;
  }

  //  Clean.

  delete [] bits;
  delete [] V;
  delete [] C;
  delete [] S;

  if (errors)
    exit(1);
}
예제 #6
0
void
testBinaryEncodingPrePost(void) {
  time_t     mtseed     = time(0L);
  mt_s      *mtctx      = 0L;

  uint32     iterations = TEST_LENGTH;

  uint64    *bits        = new uint64 [2 * iterations];
  uint64     bpos        = uint64ZERO;
  uint32     siz1       = uint64ZERO;
  uint64     val1       = uint64ZERO;
  uint64     val2       = uint64ZERO;

  fprintf(stderr, "Starting test of binary encoding pre/post increment\n");

  bpos   = uint64ZERO;
  mtctx = mtInit(mtseed);

  for (uint32 j=0; j < iterations; j++) {
    siz1  = (mtRandom32(mtctx) % 63) + 1;
    val1  = mtRandom64(mtctx) & uint64MASK(siz1);

    setDecodedValue(bits, bpos, siz1, val1);

    val2 = postDecrementDecodedValue(bits, bpos, siz1);
    if (val2 != val1) {
      fprintf(stderr, "postDec1 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n",
              val2, val1, siz1);
      exit(1);
    }
    val2  = getDecodedValue(bits, bpos, siz1) + 1;
    val2 &= uint64MASK(siz1);
    if (val2 != val1) {
      fprintf(stderr, "postDec2 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n",
              val2, val1, siz1);
      exit(1);
    }

    val2 = preDecrementDecodedValue(bits, bpos, siz1) + 2;
    val2 &= uint64MASK(siz1);
    if (val2 != val1) {
      fprintf(stderr, "preDec failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n",
              val2, val1, siz1);
      exit(1);
    }

    val2 = postIncrementDecodedValue(bits, bpos, siz1) + 2;
    val2 &= uint64MASK(siz1);
    if (val2 != val1) {
      fprintf(stderr, "postInc failed: got "uint64FMT" expected "uint64FMT"\n", val2+2, val1-2);
      exit(1);
    }
    val2  = getDecodedValue(bits, bpos, siz1) + 1;
    val2 &= uint64MASK(siz1);
    if (val2 != val1) {
      fprintf(stderr, "postInc2 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n",
              val2, val1, siz1);
      exit(1);
    }

    val2 = preIncrementDecodedValue(bits, bpos, siz1);
    //  Should be back to original value, so no mask
    if (val2 != val1) {
      fprintf(stderr, "preInc failed: got "uint64FMT" expected "uint64FMT"\n", val2, val1);
      exit(1);
    }

    switch (j % 4) {
      case 0:
        val2 = postDecrementDecodedValue(bits, bpos, siz1);
        break;
      case 1:
        val2 = preDecrementDecodedValue(bits, bpos, siz1);
        break;
      case 2:
        val2 = postIncrementDecodedValue(bits, bpos, siz1);
        break;
      case 3:
        val2 = preIncrementDecodedValue(bits, bpos, siz1);
        break;
    }

    bpos += siz1;
  }

  bpos   = uint64ZERO;
  mtctx = mtInit(mtseed);

  //for (j=0; j < iterations; j++) {
  //}

  delete [] bits;
}