u64bit existDB::count(u64bit mer) { u64bit c, h, st, ed; if (_counts == 0L) return(0); if (_compressedHash) { h = HASH(mer) * _hshWidth; st = getDecodedValue(_hashTable, h, _hshWidth); ed = getDecodedValue(_hashTable, h + _hshWidth, _hshWidth); } else { h = HASH(mer); st = _hashTable[h]; ed = _hashTable[h+1]; } if (st == ed) return(0); c = CHECK(mer); if (_compressedBucket) { st *= _chkWidth; ed *= _chkWidth; for (; st<ed; st += _chkWidth) { if (getDecodedValue(_buckets, st, _chkWidth) == c) goto returncount; } } else { for (; st<ed; st++) { if (_buckets[st] == c) goto returncount; } } return(0); returncount: if (_compressedCounts) return(getDecodedValue(_counts, st * _cntWidth, _cntWidth)); else return(_counts[st]); }
void positionDB::dump(char *name) { uint64 sizs[4] = {_chckWidth, _pptrWidth, 1, _sizeWidth}; uint64 vals[4] = {0, 0, 0, 0}; FILE *F = fopen(name, "w"); for (uint64 h=0; h<_tableSizeInEntries; h++) { uint64 st, ed; if (_hashTable_BP) { st = getDecodedValue(_hashTable_BP, h * _hashWidth, _hashWidth); ed = getDecodedValue(_hashTable_BP, h * _hashWidth + _hashWidth, _hashWidth); } else { st = _hashTable_FW[h]; ed = _hashTable_FW[h+1]; } fprintf(F, "B "uint64FMT" "uint64FMT"-"uint64FMT"\n", h, st, ed); while (st < ed) { uint64 cb = st * _wFin; getDecodedValues(_buckets, cb, (_sizeWidth == 0) ? 3 : 4, sizs, vals); fprintf(F, "%c chk="uint64HEX" pos="uint64FMT" siz="uint64FMT, (vals[2] == 0) ? 'D' : 'U', vals[0], vals[1], vals[3]); if (vals[2] == 0) { uint64 pos = vals[1] * _posnWidth; uint64 len = getDecodedValue(_positions, pos, _posnWidth); for (pos += _posnWidth; len > 0; pos += _posnWidth, len--) fprintf(F, " "uint64FMT, getDecodedValue(_positions, pos, _posnWidth)); } fprintf(F, "\n"); st++; } } fclose(F); }
bool existDB::exists(u64bit mer) { u64bit c, h, st, ed; if (_compressedHash) { h = HASH(mer) * _hshWidth; st = getDecodedValue(_hashTable, h, _hshWidth); ed = getDecodedValue(_hashTable, h + _hshWidth, _hshWidth); } else { h = HASH(mer); st = _hashTable[h]; ed = _hashTable[h+1]; } if (st == ed) return(false); c = CHECK(mer); if (_compressedBucket) { st *= _chkWidth; ed *= _chkWidth; for (; st<ed; st += _chkWidth) { if (getDecodedValue(_buckets, st, _chkWidth) == c) return(true); } } else { for (; st<ed; st++) { if (_buckets[st] == c) return(true); } } return(false); }
// Returns hits with _AT_MOST_ numMismatches mistakes. bool positionDB::getUpToNMismatches(uint64 mer, uint32 numMismatches, uint64*& posn, uint64& posnMax, uint64& posnLen) { PREFETCH(_hashedErrors); // Slightly better. posnLen = 0; if (_hashedErrors == 0L) { fprintf(stderr, "ERROR: Nobody initialized getUpToNMismatches() by calling setUpMismatchMatcher().\n"); exit(1); } if (posnMax == 0) { posnMax = 16384; try { posn = new uint64 [posnMax]; } catch (...) { fprintf(stderr, "positionDB::getUpToNMismatches()-- Can't allocate space for initial positions, requested "uint64FMT" uint64's.\n", posnMax); abort(); } } uint64 orig = HASH(mer); // Optimization that didn't work. The idea was to compute all the // hashes with errors, then sort to gain better cache locality in // the lookups. The sort dominated. // // Another: Surprisingly, theq two getDecodedValue calls are faster // than a single getDecodedValues. for (uint32 e=0; e<_hashedErrorsLen; e++) { uint64 hash = orig ^ _hashedErrors[e]; uint64 st, ed; if (_hashTable_BP) { st = getDecodedValue(_hashTable_BP, hash * _hashWidth, _hashWidth); ed = getDecodedValue(_hashTable_BP, hash * _hashWidth + _hashWidth, _hashWidth); } else { st = _hashTable_FW[hash]; ed = _hashTable_FW[hash+1]; } assert((_hashedErrors[e] & ~_hashMask) == 0); assert((hash & ~_hashMask) == 0); // Rebuild the mer from the hash and its check code. // // Compare the rebuilt mer and the original mer -- if there are // exactly N errors, it's a hit! (if there are fewer than N, // we'll find it when we look for N-1 errors). // // Before rebuilding, compute diffs on the chckBits only -- if // things are wildly different (the usual case) we'll get // enough difference here to abort. Remember, the chck bits // are not encoded, they're an exact copy from the unhashed // mer. if (st != ed) { for (uint64 i=ed-st, J=st * _wFin; i--; J += _wFin) { uint64 chck = getDecodedValue(_buckets, J, _chckWidth); uint64 diffs = chck ^ (mer & _mask2); uint64 d1 = diffs & uint64NUMBER(0x5555555555555555); uint64 d2 = diffs & uint64NUMBER(0xaaaaaaaaaaaaaaaa); uint64 err = countNumberOfSetBits64(d1 | (d2 >> 1)); if (err <= numMismatches) { diffs = REBUILD(hash, chck) ^ mer; d1 = diffs & uint64NUMBER(0x5555555555555555); d2 = diffs & uint64NUMBER(0xaaaaaaaaaaaaaaaa); err = countNumberOfSetBits64(d1 | (d2 >> 1)); if (err <= numMismatches) // err is junk, just need a parameter here loadPositions(J, posn, posnMax, posnLen, err); } } } }
void testBinaryEncoding(void) { time_t mtseed = time(0L); mt_s *mtctx = 0L; uint32 iterations = TEST_LENGTH; uint64 *bits = new uint64 [iterations + 2]; uint64 bpos = uint64ZERO; uint64 *V = new uint64 [iterations]; uint64 *C = new uint64 [iterations]; uint64 *S = new uint64 [iterations]; uint32 failed = 0; uint32 errors = 0; fprintf(stderr, "Starting test of binary encoding\n"); bpos = uint64ZERO; mtctx = mtInit(mtseed); // Build some values to stuff into the bits for (uint32 j=0; j < iterations; j++) { S[j] = (mtRandom32(mtctx) % 63) + 1; V[j] = mtRandom64(mtctx) & uint64MASK(S[j]); //fprintf(stderr, "[%2d] S="uint64FMT" V="uint64HEX"\n", j, S[j], V[j]); } // Stuff them in, in blocks of some size. At the same time, decode // (this has found bugs in the past). failed = 0; for (uint32 j=0; j < iterations; ) { uint64 num = (mtRandom32(mtctx) % 8); if (j + num > iterations) num = iterations - j; if (num == 0) { setDecodedValue(bits, bpos, S[j], V[j]); C[j] = getDecodedValue(bits, bpos, S[j]); //fprintf(stderr, "[%2d] V="uint64HEX" C="uint64HEX" single\n", j, V[j], C[j]); bpos += S[j]; } else { uint64 newp1 = setDecodedValues(bits, bpos, num, S+j, V+j); uint64 newp2 = getDecodedValues(bits, bpos, num, S+j, C+j); if (newp1 != newp2) { // not perfect; we should be checking the values too, but we do that later. for (uint32 x=0; x<num; x++) fprintf(stderr, "[%2d] #1 V="uint64HEX" C="uint64HEX" multiple "uint32FMT" %s\n", j+x, V[j+x], C[j+x], num, (V[j+x] == C[j+x]) ? "" : "FAILED"); failed++; } bpos = newp2; } j += num; if (num == 0) j++; } if (failed) { fprintf(stderr, "binEncoding #1 failed encoding "uint32FMT" times.\n", failed); errors++; } // Check that V == C failed = 0; for (uint32 j=0; j<iterations; j++) { if (V[j] != C[j]) { fprintf(stderr, "[%2d] #2 V="uint64HEX" C="uint64HEX" S="uint32FMT"\n", j, V[j], C[j], S[j]); failed++; } } if (failed) { fprintf(stderr, "binEncoding #2 failed encode/decode "uint32FMT" times.\n", failed); errors++; } // Decode independently, with different nums bpos = 0; // reset to start of bits for (uint32 j=0; j < iterations; ) { uint64 num = (mtRandom32(mtctx) % 8); if (j + num > iterations) num = iterations - j; if (num == 0) { C[j] = getDecodedValue(bits, bpos, S[j]); bpos += S[j]; } else { bpos = getDecodedValues(bits, bpos, num, S+j, C+j); } j += num; if (num == 0) j++; } // Check that V == C failed = 0; for (uint32 j=0; j<iterations; j++) { if (V[j] != C[j]) { fprintf(stderr, "[%2d] #3 V="uint64HEX" C="uint64HEX" S="uint32FMT"\n", j, V[j], C[j], S[j]); failed++; } } if (failed) { fprintf(stderr, "binEncoding #3 failed decoding "uint32FMT" times.\n", failed); errors++; } // Clean. delete [] bits; delete [] V; delete [] C; delete [] S; if (errors) exit(1); }
void testBinaryEncodingPrePost(void) { time_t mtseed = time(0L); mt_s *mtctx = 0L; uint32 iterations = TEST_LENGTH; uint64 *bits = new uint64 [2 * iterations]; uint64 bpos = uint64ZERO; uint32 siz1 = uint64ZERO; uint64 val1 = uint64ZERO; uint64 val2 = uint64ZERO; fprintf(stderr, "Starting test of binary encoding pre/post increment\n"); bpos = uint64ZERO; mtctx = mtInit(mtseed); for (uint32 j=0; j < iterations; j++) { siz1 = (mtRandom32(mtctx) % 63) + 1; val1 = mtRandom64(mtctx) & uint64MASK(siz1); setDecodedValue(bits, bpos, siz1, val1); val2 = postDecrementDecodedValue(bits, bpos, siz1); if (val2 != val1) { fprintf(stderr, "postDec1 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n", val2, val1, siz1); exit(1); } val2 = getDecodedValue(bits, bpos, siz1) + 1; val2 &= uint64MASK(siz1); if (val2 != val1) { fprintf(stderr, "postDec2 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n", val2, val1, siz1); exit(1); } val2 = preDecrementDecodedValue(bits, bpos, siz1) + 2; val2 &= uint64MASK(siz1); if (val2 != val1) { fprintf(stderr, "preDec failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n", val2, val1, siz1); exit(1); } val2 = postIncrementDecodedValue(bits, bpos, siz1) + 2; val2 &= uint64MASK(siz1); if (val2 != val1) { fprintf(stderr, "postInc failed: got "uint64FMT" expected "uint64FMT"\n", val2+2, val1-2); exit(1); } val2 = getDecodedValue(bits, bpos, siz1) + 1; val2 &= uint64MASK(siz1); if (val2 != val1) { fprintf(stderr, "postInc2 failed: got "uint64FMT" expected "uint64FMT" siz="uint32FMT"\n", val2, val1, siz1); exit(1); } val2 = preIncrementDecodedValue(bits, bpos, siz1); // Should be back to original value, so no mask if (val2 != val1) { fprintf(stderr, "preInc failed: got "uint64FMT" expected "uint64FMT"\n", val2, val1); exit(1); } switch (j % 4) { case 0: val2 = postDecrementDecodedValue(bits, bpos, siz1); break; case 1: val2 = preDecrementDecodedValue(bits, bpos, siz1); break; case 2: val2 = postIncrementDecodedValue(bits, bpos, siz1); break; case 3: val2 = preIncrementDecodedValue(bits, bpos, siz1); break; } bpos += siz1; } bpos = uint64ZERO; mtctx = mtInit(mtseed); //for (j=0; j < iterations; j++) { //} delete [] bits; }