/*! * \brief l_dnaIntersectionByHash() * * \param[in] da1, da2 * \return dad intersection of the number arrays, or NULL on error * * <pre> * Notes: * (1) This uses the same method for building the intersection set * as ptaIntersectionByHash() and sarrayIntersectionByHash(). * </pre> */ L_DNA * l_dnaIntersectionByHash(L_DNA *da1, L_DNA *da2) { l_int32 n1, n2, nsmall, nbuckets, i, index1, index2; l_uint32 nsize2; l_uint64 key; l_float64 val; L_DNAHASH *dahash1, *dahash2; L_DNA *da_small, *da_big, *dad; PROCNAME("l_dnaIntersectionByHash"); if (!da1) return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); if (!da2) return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); /* Put the elements of the biggest array into a dnahash */ n1 = l_dnaGetCount(da1); n2 = l_dnaGetCount(da2); da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ dahash1 = l_dnaHashCreateFromDna(da_big); /* Build up the intersection of numbers. Add to %dad * if the number is in da_big (using dahash1) but hasn't * yet been seen in the traversal of da_small (using dahash2). */ dad = l_dnaCreate(0); nsmall = l_dnaGetCount(da_small); findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ dahash2 = l_dnaHashCreate(nsize2, 0); nbuckets = l_dnaHashGetCount(dahash2); for (i = 0; i < nsmall; i++) { l_dnaGetDValue(da_small, i, &val); l_dnaFindValByHash(da_big, dahash1, val, &index1); if (index1 >= 0) { /* found */ l_dnaFindValByHash(da_small, dahash2, val, &index2); if (index2 == -1) { /* not found */ l_dnaAddNumber(dad, val); l_hashFloat64ToUint64(nbuckets, val, &key); l_dnaHashAdd(dahash2, key, (l_float64)i); } } } l_dnaHashDestroy(&dahash1); l_dnaHashDestroy(&dahash2); return dad; }
/*! * \brief l_dnaFindValByHash() * * \param[in] da * \param[in] dahash containing indices into %da * \param[in] val searching for this number in %da * \param[out] pindex index into da if found; -1 otherwise * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Algo: hash %val into a key; hash the key to get the dna * in %dahash (that holds indices into %da); traverse * the dna of indices looking for %val in %da. * </pre> */ l_ok l_dnaFindValByHash(L_DNA *da, L_DNAHASH *dahash, l_float64 val, l_int32 *pindex) { l_int32 i, nbuckets, nvals, indexval; l_float64 vali; l_uint64 key; L_DNA *da1; PROCNAME("l_dnaFindValByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!da) return ERROR_INT("da not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashFloat64ToUint64(nbuckets, val, &key); da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da1) return 0; /* Run through da1, looking for this %val */ nvals = l_dnaGetCount(da1); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da1, i, &indexval); l_dnaGetDValue(da, indexval, &vali); if (val == vali) { *pindex = indexval; return 0; } } return 0; }
/*! * ptaFindPtByHash() * * Input: pta * dahash (built from pta) * x, y (arbitrary points) * &index (<return> index into pta if (x,y) is in pta; * -1 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) Fast lookup in dnaHash associated with a pta, to see if a * random point (x,y) is already stored in the hash table. */ l_int32 ptaFindPtByHash(PTA *pta, L_DNAHASH *dahash, l_int32 x, l_int32 y, l_int32 *pindex) { l_int32 i, nbuckets, nvals, index, xi, yi; l_uint64 key; L_DNA *da; PROCNAME("ptaFindPtByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!pta) return ERROR_INT("pta not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashPtToUint64Fast(nbuckets, x, y, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this point */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); ptaGetIPt(pta, index, &xi, &yi); if (x == xi && y == yi) { *pindex = index; return 0; } } return 0; }