/* * Construct a jsonb_ops GIN key from a flag byte and a textual representation * (which need not be null-terminated). This function is responsible * for hashing overlength text representations; it will add the * JGINFLAG_HASHED bit to the flag value if it does that. */ static Datum make_text_key(char flag, const char *str, int len) { text *item; char hashbuf[10]; if (len > JGIN_MAXLENGTH) { uint32 hashval; hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); str = hashbuf; len = 8; flag |= JGINFLAG_HASHED; } /* * Now build the text Datum. For simplicity we build a 4-byte-header * varlena text Datum here, but we expect it will get converted to short * header format when stored in the index. */ item = (text *) palloc(VARHDRSZ + len + 1); SET_VARSIZE(item, VARHDRSZ + len + 1); *VARDATA(item) = flag; memcpy(VARDATA(item) + 1, str, len); return PointerGetDatum(item); }
static uint32 build_hash_key(const void *key, Size keysize __attribute__((unused))) { Assert(key); BMBuildHashKey *keyData = (BMBuildHashKey*)key; Datum *k = keyData->attributeValueArr; bool *isNull = keyData->isNullArr; int i; uint32 hashkey = 0; for(i = 0; i < cur_bmbuild->natts; i++) { /* rotate hashkey left 1 bit at each step */ hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); if ( isNull[i] && cur_bmbuild->hash_func_is_strict[i]) { /* leave hashkey unmodified, equivalent to hashcode 0 */ } else { hashkey ^= DatumGetUInt32(FunctionCall1(&cur_bmbuild->hash_funcs[i], k[i])); } } return hashkey; }
/* * Add a resource to ResourceArray * * Caller must have previously done ResourceArrayEnlarge() */ static void ResourceArrayAdd(ResourceArray *resarr, Datum value) { uint32 idx; Assert(value != resarr->invalidval); Assert(resarr->nitems < resarr->maxitems); if (RESARRAY_IS_ARRAY(resarr)) { /* Append to linear array. */ idx = resarr->nitems; } else { /* Insert into first free slot at or after hash location. */ uint32 mask = resarr->capacity - 1; idx = DatumGetUInt32(hash_any((void *) &value, sizeof(value))) & mask; for (;;) { if (resarr->itemsarr[idx] == resarr->invalidval) break; idx = (idx + 1) & mask; } } resarr->lastidx = idx; resarr->itemsarr[idx] = value; resarr->nitems++; }
uint32 mcxt_ptr_hash_std(const void *key, Size keysize) { uint32 hashval; hashval = DatumGetUInt32(hash_any(key, keysize)); return hashval; }
/* * CatalogCacheComputeHashValue * * Compute the hash value associated with a given set of lookup keys */ static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys, ScanKey cur_skey) { uint32 hashValue = 0; uint32 oneHash; CACHE4_elog(DEBUG2, "CatalogCacheComputeHashValue %s %d %p", cache->cc_relname, nkeys, cache); switch (nkeys) { case 4: oneHash = DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[3], cur_skey[3].sk_argument)); hashValue ^= oneHash << 24; hashValue ^= oneHash >> 8; /* FALLTHROUGH */ case 3: oneHash = DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[2], cur_skey[2].sk_argument)); hashValue ^= oneHash << 16; hashValue ^= oneHash >> 16; /* FALLTHROUGH */ case 2: oneHash = DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[1], cur_skey[1].sk_argument)); hashValue ^= oneHash << 8; hashValue ^= oneHash >> 24; /* FALLTHROUGH */ case 1: oneHash = DatumGetUInt32(DirectFunctionCall1(cache->cc_hashfunc[0], cur_skey[0].sk_argument)); hashValue ^= oneHash; break; default: elog(FATAL, "wrong number of hash keys: %d", nkeys); break; } return hashValue; }
/* * Remove a resource from ResourceArray * * Returns true on success, false if resource was not found. * * Note: if same resource ID appears more than once, one instance is removed. */ static bool ResourceArrayRemove(ResourceArray *resarr, Datum value) { uint32 i, idx, lastidx = resarr->lastidx; Assert(value != resarr->invalidval); /* Search through all items, but try lastidx first. */ if (RESARRAY_IS_ARRAY(resarr)) { if (lastidx < resarr->nitems && resarr->itemsarr[lastidx] == value) { resarr->itemsarr[lastidx] = resarr->itemsarr[resarr->nitems - 1]; resarr->nitems--; /* Update lastidx to make reverse-order removals fast. */ resarr->lastidx = resarr->nitems - 1; return true; } for (i = 0; i < resarr->nitems; i++) { if (resarr->itemsarr[i] == value) { resarr->itemsarr[i] = resarr->itemsarr[resarr->nitems - 1]; resarr->nitems--; /* Update lastidx to make reverse-order removals fast. */ resarr->lastidx = resarr->nitems - 1; return true; } } } else { uint32 mask = resarr->capacity - 1; if (lastidx < resarr->capacity && resarr->itemsarr[lastidx] == value) { resarr->itemsarr[lastidx] = resarr->invalidval; resarr->nitems--; return true; } idx = DatumGetUInt32(hash_any((void *) &value, sizeof(value))) & mask; for (i = 0; i < resarr->capacity; i++) { if (resarr->itemsarr[idx] == value) { resarr->itemsarr[idx] = resarr->invalidval; resarr->nitems--; return true; } idx = (idx + 1) & mask; } } return false; }
/* * Compute the hash value for a tuple * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) */ static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; int numCols = hashtable->numCols; AttrNumber *keyColIdx = hashtable->keyColIdx; uint32 hashkey = hashtable->hash_iv; TupleTableSlot *slot; FmgrInfo *hashfunctions; int i; if (tuple == NULL) { /* Process the current input tuple for the table */ slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* * Process a tuple already stored in the table. * * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) */ slot = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { AttrNumber att = keyColIdx[i]; Datum attr; bool isNull; /* rotate hashkey left 1 bit at each step */ hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); attr = slot_getattr(slot, att, &isNull); if (!isNull) /* treat nulls as having hash key 0 */ { uint32 hkey; hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } /* * The way hashes are combined above, among each other and with the IV, * doesn't lead to good bit perturbation. As the IV's goal is to lead to * achieve that, perform a round of hashing of the combined hash - * resulting in near perfect perturbation. */ return murmurhash32(hashkey); }
/* * Hash functions for lexemes. They are strings, but not NULL terminated, * so we need a special hash function. */ static uint32 lexeme_hash(const void *key, Size keysize) { const LexemeHashKey *l = (const LexemeHashKey *) key; return DatumGetUInt32(hash_any((const unsigned char *) l->lexeme, l->length)); }
/* * Hash function for elements. * * We use the element type's default hash opclass, and the default collation * if the type is collation-sensitive. */ static uint32 element_hash(const void *key, Size keysize) { Datum d = *((const Datum *) key); Datum h; h = FunctionCall1Coll(array_extra_data->hash, DEFAULT_COLLATION_OID, d); return DatumGetUInt32(h); }
/* * _hash_datum2hashkey -- given a Datum, call the index's hash procedure * * The Datum is assumed to be of the index's column type, so we can use the * "primary" hash procedure that's tracked for us by the generic index code. */ uint32 _hash_datum2hashkey(Relation rel, Datum key) { FmgrInfo *procinfo; /* XXX assumes index has only one attribute */ procinfo = index_getprocinfo(rel, 1, HASHPROC); return DatumGetUInt32(FunctionCall1(procinfo, key)); }
/* * Calculate hash value for a key */ static uint32 pgss_hash_fn(const void *key, Size keysize) { const pgssHashKey *k = (const pgssHashKey *) key; /* we don't bother to include encoding in the hash */ return hash_uint32((uint32) k->userid) ^ hash_uint32((uint32) k->dbid) ^ DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr, k->query_len)); }
/* * _hash_datum2hashkey -- given a Datum, call the index's hash procedure * * The Datum is assumed to be of the index's column type, so we can use the * "primary" hash procedure that's tracked for us by the generic index code. */ uint32 _hash_datum2hashkey(Relation rel, Datum key) { FmgrInfo *procinfo; Oid collation; /* XXX assumes index has only one attribute */ procinfo = index_getprocinfo(rel, 1, HASHPROC); collation = rel->rd_indcollation[0]; return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key)); }
/* * Compute the hash value for a tuple * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) */ static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; int numCols = hashtable->numCols; AttrNumber *keyColIdx = hashtable->keyColIdx; uint32 hashkey = hashtable->hash_iv; TupleTableSlot *slot; FmgrInfo *hashfunctions; int i; if (tuple == NULL) { /* Process the current input tuple for the table */ slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* * Process a tuple already stored in the table. * * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) */ slot = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { AttrNumber att = keyColIdx[i]; Datum attr; bool isNull; /* rotate hashkey left 1 bit at each step */ hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); attr = slot_getattr(slot, att, &isNull); if (!isNull) /* treat nulls as having hash key 0 */ { uint32 hkey; hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } return hashkey; }
/* * string_hash: hash function for keys that are null-terminated strings. * * NOTE: this is the default hash function if none is specified. */ uint32 string_hash(const void *key, Size keysize) { /* * If the string exceeds keysize-1 bytes, we want to hash only that many, * because when it is copied into the hash table it will be truncated at * that length. */ Size s_len = strlen((const char *) key); s_len = Min(s_len, keysize - 1); return DatumGetUInt32(hash_any((const unsigned char *) key, (int) s_len)); }
/* * Compute the hash value for a tuple * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * * CurTupleHashTable must be set before calling this, since dynahash.c * doesn't provide any API that would let us get at the hashtable otherwise. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) */ static uint32 TupleHashTableHash(const void *key, Size keysize) { MinimalTuple tuple = ((const TupleHashEntryData *) key)->firstTuple; TupleTableSlot *slot; TupleHashTable hashtable = CurTupleHashTable; int numCols = hashtable->numCols; AttrNumber *keyColIdx = hashtable->keyColIdx; FmgrInfo *hashfunctions; uint32 hashkey = 0; int i; if (tuple == NULL) { /* Process the current input tuple for the table */ slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* Process a tuple already stored in the table */ /* (this case never actually occurs in current dynahash.c code) */ slot = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { AttrNumber att = keyColIdx[i]; Datum attr; bool isNull; /* rotate hashkey left 1 bit at each step */ hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); attr = slot_getattr(slot, att, &isNull); if (!isNull) /* treat nulls as having hash key 0 */ { uint32 hkey; hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } return hashkey; }
/* * Select next sampled tuple in current block. * * It is OK here to return an offset without knowing if the tuple is visible * (or even exists). The reason is that we do the coinflip for every tuple * offset in the table. Since all tuples have the same probability of being * returned, it doesn't matter if we do extra coinflips for invisible tuples. * * When we reach end of the block, return InvalidOffsetNumber which tells * SampleScan to go to next block. */ static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset) { BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state; OffsetNumber tupoffset = sampler->lt; uint32 hashinput[3]; /* Advance to first/next tuple in block */ if (tupoffset == InvalidOffsetNumber) tupoffset = FirstOffsetNumber; else tupoffset++; /* * We compute the hash by applying hash_any to an array of 3 uint32's * containing the block, offset, and seed. This is efficient to set up, * and with the current implementation of hash_any, it gives * machine-independent results, which is a nice property for regression * testing. * * These words in the hash input are the same throughout the block: */ hashinput[0] = blockno; hashinput[2] = sampler->seed; /* * Loop over tuple offsets until finding suitable TID or reaching end of * block. */ for (; tupoffset <= maxoffset; tupoffset++) { uint32 hash; hashinput[1] = tupoffset; hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, (int) sizeof(hashinput))); if (hash < sampler->cutoff) break; } if (tupoffset > maxoffset) tupoffset = InvalidOffsetNumber; sampler->lt = tupoffset; return tupoffset; }
/* * _hash_datum2hashkey_type -- given a Datum of a specified type, * hash it in a fashion compatible with this index * * This is much more expensive than _hash_datum2hashkey, so use it only in * cross-type situations. */ uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype) { RegProcedure hash_proc; /* XXX assumes index has only one attribute */ hash_proc = get_opfamily_proc(rel->rd_opfamily[0], keytype, keytype, HASHPROC); if (!RegProcedureIsValid(hash_proc)) elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"", HASHPROC, keytype, keytype, RelationGetRelationName(rel)); return DatumGetUInt32(OidFunctionCall1(hash_proc, key)); }
/* * Select next block to sample. */ static BlockNumber system_nextsampleblock(SampleScanState *node) { SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; HeapScanDesc scan = node->ss.ss_currentScanDesc; BlockNumber nextblock = sampler->nextblock; uint32 hashinput[2]; /* * We compute the hash by applying hash_any to an array of 2 uint32's * containing the block number and seed. This is efficient to set up, and * with the current implementation of hash_any, it gives * machine-independent results, which is a nice property for regression * testing. * * These words in the hash input are the same throughout the block: */ hashinput[1] = sampler->seed; /* * Loop over block numbers until finding suitable block or reaching end of * relation. */ for (; nextblock < scan->rs_nblocks; nextblock++) { uint32 hash; hashinput[0] = nextblock; hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, (int) sizeof(hashinput))); if (hash < sampler->cutoff) break; } if (nextblock < scan->rs_nblocks) { /* Found a suitable block; remember where we should start next time */ sampler->nextblock = nextblock + 1; return nextblock; } /* Done, but let's reset nextblock to 0 for safety. */ sampler->nextblock = 0; return InvalidBlockNumber; }
/* * uint32_hash: hash function for keys that are uint32 or int32 * * (tag_hash works for this case too, but is slower) */ uint32 uint32_hash(const void *key, Size keysize) { Assert(keysize == sizeof(uint32)); return DatumGetUInt32(hash_uint32(*((const uint32 *) key))); }
/* ** Calculate a hash code based on the geometry data alone */ static uint32 geography_hash(GSERIALIZED *g) { return DatumGetUInt32(hash_any((void*)g, VARSIZE(g))); }
/* * Executes INSERT and DELETE DML operations. The * action is specified within the TupleTableSlot at * plannode->actionColIdx.The ctid of the tuple to delete * is in position plannode->ctidColIdx in the current slot. * */ TupleTableSlot* ExecDML(DMLState *node) { PlanState *outerNode = outerPlanState(node); DML *plannode = (DML *) node->ps.plan; Assert(outerNode != NULL); TupleTableSlot *slot = ExecProcNode(outerNode); if (TupIsNull(slot)) { return NULL; } bool isnull = false; int action = DatumGetUInt32(slot_getattr(slot, plannode->actionColIdx, &isnull)); Assert(!isnull); isnull = false; Datum oid = slot_getattr(slot, plannode->oidColIdx, &isnull); slot->tts_tableOid = DatumGetUInt32(oid); bool isUpdate = false; if (node->ps.state->es_plannedstmt->commandType == CMD_UPDATE) { isUpdate = true; } Assert(action == DML_INSERT || action == DML_DELETE); /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. */ ExprContext *econtext = node->ps.ps_ExprContext; ResetExprContext(econtext); /* Prepare cleaned-up tuple by projecting it and filtering junk columns */ econtext->ecxt_outertuple = slot; TupleTableSlot *projectedSlot = ExecProject(node->ps.ps_ProjInfo, NULL); /* remove 'junk' columns from tuple */ node->cleanedUpSlot = ExecFilterJunk(node->junkfilter, projectedSlot); if (DML_INSERT == action) { /* Respect any given tuple Oid when updating a tuple. */ if(isUpdate && plannode->tupleoidColIdx != 0) { isnull = false; oid = slot_getattr(slot, plannode->tupleoidColIdx, &isnull); HeapTuple htuple = ExecFetchSlotHeapTuple(node->cleanedUpSlot); Assert(htuple == node->cleanedUpSlot->PRIVATE_tts_heaptuple); HeapTupleSetOid(htuple, oid); } /* The plan origin is required since ExecInsert performs different actions * depending on the type of plan (constraint enforcement and triggers.) */ ExecInsert(node->cleanedUpSlot, NULL /* destReceiver */, node->ps.state, PLANGEN_OPTIMIZER /* Plan origin */, isUpdate); } else /* DML_DELETE */ { Datum ctid = slot_getattr(slot, plannode->ctidColIdx, &isnull); Assert(!isnull); ItemPointer tupleid = (ItemPointer) DatumGetPointer(ctid); ItemPointerData tuple_ctid = *tupleid; tupleid = &tuple_ctid; /* Correct tuple count by ignoring deletes when splitting tuples. */ ExecDelete(tupleid, node->cleanedUpSlot, NULL /* DestReceiver */, node->ps.state, PLANGEN_OPTIMIZER /* Plan origin */, isUpdate); } return slot; }
/* * FetchTableCommon executes common logic that wraps around the actual data * fetching function. This common logic includes ensuring that only one process * tries to fetch this table at any given time, and that data fetch operations * are retried in case of node failures. */ static void FetchTableCommon(text *tableNameText, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, bool (*FetchTableFunction)(const char *, uint32, const char *)) { uint64 shardId = INVALID_SHARD_ID; Oid relationId = InvalidOid; List *relationNameList = NIL; RangeVar *relation = NULL; uint32 nodeIndex = 0; bool tableFetched = false; char *tableName = text_to_cstring(tableNameText); Datum *nodeNameArray = DeconstructArrayObject(nodeNameObject); Datum *nodePortArray = DeconstructArrayObject(nodePortObject); int32 nodeNameCount = ArrayObjectCount(nodeNameObject); int32 nodePortCount = ArrayObjectCount(nodePortObject); /* we should have the same number of node names and port numbers */ if (nodeNameCount != nodePortCount) { ereport(ERROR, (errmsg("node name array size: %d and node port array size: %d" " do not match", nodeNameCount, nodePortCount))); } /* * We lock on the shardId, but do not unlock. When the function returns, and * the transaction for this function commits, this lock will automatically * be released. This ensures that concurrent caching commands will see the * newly created table when they acquire the lock (in read committed mode). */ shardId = ExtractShardId(tableName); LockShardResource(shardId, AccessExclusiveLock); relationNameList = textToQualifiedNameList(tableNameText); relation = makeRangeVarFromNameList(relationNameList); relationId = RangeVarGetRelid(relation, NoLock, true); /* check if we already fetched the table */ if (relationId != InvalidOid) { uint64 localTableSize = 0; if (!ExpireCachedShards) { return; } /* * Check if the cached shard has the same size on disk as it has as on * the placement (is up to date). * * Note 1: performing updates or deletes on the original shard leads to * inconsistent sizes between different databases in which case the data * would be fetched every time, or worse, the placement would get into * a deadlock when it tries to fetch from itself while holding the lock. * Therefore, this option is disabled by default. * * Note 2: when appending data to a shard, the size on disk only * increases when a new page is added (the next 8kB block). */ localTableSize = LocalTableSize(relationId); if (remoteTableSize > localTableSize) { /* table is not up to date, drop the table */ ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 }; tableObject.classId = RelationRelationId; tableObject.objectId = relationId; tableObject.objectSubId = 0; performDeletion(&tableObject, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); } else { /* table is up to date */ return; } } /* loop until we fetch the table or try all nodes */ while (!tableFetched && (nodeIndex < nodeNameCount)) { Datum nodeNameDatum = nodeNameArray[nodeIndex]; Datum nodePortDatum = nodePortArray[nodeIndex]; char *nodeName = TextDatumGetCString(nodeNameDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum); tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName); nodeIndex++; } /* error out if we tried all nodes and could not fetch the table */ if (!tableFetched) { ereport(ERROR, (errmsg("could not fetch relation: \"%s\"", tableName))); } }
/* * Add an attribute to the hash calculation. * **IMPORTANT: any new hard coded support for a data type in here * must be added to isGreenplumDbHashable() below! * * Note that the caller should provide the base type if the datum is * of a domain type. It is quite expensive to call get_typtype() and * getBaseType() here since this function gets called a lot for the * same set of Datums. * * @param hashFn called to update the hash value. * @param clientData passed to hashFn. */ void hashDatum(Datum datum, Oid type, datumHashFunction hashFn, void *clientData) { void *buf = NULL; /* pointer to the data */ size_t len = 0; /* length for the data buffer */ int64 intbuf; /* an 8 byte buffer for all integer sizes */ float4 buf_f4; float8 buf_f8; Timestamp tsbuf; /* timestamp data dype is either a double or * int8 (determined in compile time) */ TimestampTz tstzbuf; DateADT datebuf; TimeADT timebuf; TimeTzADT *timetzptr; Interval *intervalptr; AbsoluteTime abstime_buf; RelativeTime reltime_buf; TimeInterval tinterval; AbsoluteTime tinterval_len; Numeric num; bool bool_buf; char char_buf; Name namebuf; ArrayType *arrbuf; inet *inetptr; /* inet/cidr */ unsigned char inet_hkey[sizeof(inet_struct)]; macaddr *macptr; /* MAC address */ VarBit *vbitptr; int2vector *i2vec_buf; oidvector *oidvec_buf; Cash cash_buf; AclItem *aclitem_ptr; uint32 aclitem_buf; /* * special case buffers */ uint32 nanbuf; uint32 invalidbuf; void *tofree = NULL; /* * Select the hash to be performed according to the field type we are adding to the * hash. */ switch (type) { /* * ======= NUMERIC TYPES ======== */ case INT2OID: /* -32 thousand to 32 thousand, 2-byte storage */ intbuf = (int64) DatumGetInt16(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case INT4OID: /* -2 billion to 2 billion integer, 4-byte * storage */ intbuf = (int64) DatumGetInt32(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case INT8OID: /* ~18 digit integer, 8-byte storage */ intbuf = DatumGetInt64(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case FLOAT4OID: /* single-precision floating point number, * 4-byte storage */ buf_f4 = DatumGetFloat4(datum); /* * On IEEE-float machines, minus zero and zero have different bit * patterns but should compare as equal. We must ensure that they * have the same hash value, which is most easily done this way: */ if (buf_f4 == (float4) 0) buf_f4 = 0.0; buf = &buf_f4; len = sizeof(buf_f4); break; case FLOAT8OID: /* double-precision floating point number, * 8-byte storage */ buf_f8 = DatumGetFloat8(datum); /* * On IEEE-float machines, minus zero and zero have different bit * patterns but should compare as equal. We must ensure that they * have the same hash value, which is most easily done this way: */ if (buf_f8 == (float8) 0) buf_f8 = 0.0; buf = &buf_f8; len = sizeof(buf_f8); break; case NUMERICOID: num = DatumGetNumeric(datum); if (NUMERIC_IS_NAN(num)) { nanbuf = NAN_VAL; buf = &nanbuf; len = sizeof(nanbuf); } else /* not a nan */ { buf = num->n_data; len = (VARSIZE(num) - NUMERIC_HDRSZ); } /* * If we did a pg_detoast_datum, we need to remember to pfree, * or we will leak memory. Because of the 1-byte varlena header stuff. */ if (num != DatumGetPointer(datum)) tofree = num; break; /* * ====== CHARACTER TYPES ======= */ case CHAROID: /* char(1), single character */ char_buf = DatumGetChar(datum); buf = &char_buf; len = 1; break; case BPCHAROID: /* char(n), blank-padded string, fixed storage */ case TEXTOID: /* text */ case VARCHAROID: /* varchar */ case BYTEAOID: /* bytea */ { int tmplen; varattrib_untoast_ptr_len(datum, (char **) &buf, &tmplen, &tofree); /* adjust length to not include trailing blanks */ if (type != BYTEAOID && tmplen > 1) tmplen = ignoreblanks((char *) buf, tmplen); len = tmplen; break; } case NAMEOID: namebuf = DatumGetName(datum); len = NAMEDATALEN; buf = NameStr(*namebuf); /* adjust length to not include trailing blanks */ if (len > 1) len = ignoreblanks((char *) buf, len); break; /* * ====== OBJECT IDENTIFIER TYPES ====== */ case OIDOID: /* object identifier(oid), maximum 4 billion */ case REGPROCOID: /* function name */ case REGPROCEDUREOID: /* function name with argument types */ case REGOPEROID: /* operator name */ case REGOPERATOROID: /* operator with argument types */ case REGCLASSOID: /* relation name */ case REGTYPEOID: /* data type name */ intbuf = (int64) DatumGetUInt32(datum); /* cast to 8 byte before hashing */ buf = &intbuf; len = sizeof(intbuf); break; case TIDOID: /* tuple id (6 bytes) */ buf = DatumGetPointer(datum); len = SizeOfIptrData; break; /* * ====== DATE/TIME TYPES ====== */ case TIMESTAMPOID: /* date and time */ tsbuf = DatumGetTimestamp(datum); buf = &tsbuf; len = sizeof(tsbuf); break; case TIMESTAMPTZOID: /* date and time with time zone */ tstzbuf = DatumGetTimestampTz(datum); buf = &tstzbuf; len = sizeof(tstzbuf); break; case DATEOID: /* ANSI SQL date */ datebuf = DatumGetDateADT(datum); buf = &datebuf; len = sizeof(datebuf); break; case TIMEOID: /* hh:mm:ss, ANSI SQL time */ timebuf = DatumGetTimeADT(datum); buf = &timebuf; len = sizeof(timebuf); break; case TIMETZOID: /* time with time zone */ /* * will not compare to TIMEOID on equal values. * Postgres never attempts to compare the two as well. */ timetzptr = DatumGetTimeTzADTP(datum); buf = (unsigned char *) timetzptr; /* * Specify hash length as sizeof(double) + sizeof(int4), not as * sizeof(TimeTzADT), so that any garbage pad bytes in the structure * won't be included in the hash! */ len = sizeof(timetzptr->time) + sizeof(timetzptr->zone); break; case INTERVALOID: /* @ <number> <units>, time interval */ intervalptr = DatumGetIntervalP(datum); buf = (unsigned char *) intervalptr; /* * Specify hash length as sizeof(double) + sizeof(int4), not as * sizeof(Interval), so that any garbage pad bytes in the structure * won't be included in the hash! */ len = sizeof(intervalptr->time) + sizeof(intervalptr->month); break; case ABSTIMEOID: abstime_buf = DatumGetAbsoluteTime(datum); if (abstime_buf == INVALID_ABSTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { len = sizeof(abstime_buf); buf = &abstime_buf; } break; case RELTIMEOID: reltime_buf = DatumGetRelativeTime(datum); if (reltime_buf == INVALID_RELTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { len = sizeof(reltime_buf); buf = &reltime_buf; } break; case TINTERVALOID: tinterval = DatumGetTimeInterval(datum); /* * check if a valid interval. the '0' status code * stands for T_INTERVAL_INVAL which is defined in * nabstime.c. We use the actual value instead * of defining it again here. */ if(tinterval->status == 0 || tinterval->data[0] == INVALID_ABSTIME || tinterval->data[1] == INVALID_ABSTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { /* normalize on length of the time interval */ tinterval_len = tinterval->data[1] - tinterval->data[0]; len = sizeof(tinterval_len); buf = &tinterval_len; } break; /* * ======= NETWORK TYPES ======== */ case INETOID: case CIDROID: inetptr = DatumGetInetP(datum); len = inet_getkey(inetptr, inet_hkey, sizeof(inet_hkey)); /* fill-in inet_key & get len */ buf = inet_hkey; break; case MACADDROID: macptr = DatumGetMacaddrP(datum); len = sizeof(macaddr); buf = (unsigned char *) macptr; break; /* * ======== BIT STRINGS ======== */ case BITOID: case VARBITOID: /* * Note that these are essentially strings. * we don't need to worry about '10' and '010' * to compare, b/c they will not, by design. * (see SQL standard, and varbit.c) */ vbitptr = DatumGetVarBitP(datum); len = VARBITBYTES(vbitptr); buf = (char *) VARBITS(vbitptr); break; /* * ======= other types ======= */ case BOOLOID: /* boolean, 'true'/'false' */ bool_buf = DatumGetBool(datum); buf = &bool_buf; len = sizeof(bool_buf); break; /* * We prepare the hash key for aclitems just like postgresql does. * (see code and comment in acl.c: hash_aclitem() ). */ case ACLITEMOID: aclitem_ptr = DatumGetAclItemP(datum); aclitem_buf = (uint32) (aclitem_ptr->ai_privs + aclitem_ptr->ai_grantee + aclitem_ptr->ai_grantor); buf = &aclitem_buf; len = sizeof(aclitem_buf); break; /* * ANYARRAY is a pseudo-type. We use it to include * any of the array types (OIDs 1007-1033 in pg_type.h). * caller needs to be sure the type is ANYARRAYOID * before calling cdbhash on an array (INSERT and COPY do so). */ case ANYARRAYOID: arrbuf = DatumGetArrayTypeP(datum); len = VARSIZE(arrbuf) - VARHDRSZ; buf = VARDATA(arrbuf); break; case INT2VECTOROID: i2vec_buf = (int2vector *) DatumGetPointer(datum); len = i2vec_buf->dim1 * sizeof(int2); buf = (void *)i2vec_buf->values; break; case OIDVECTOROID: oidvec_buf = (oidvector *) DatumGetPointer(datum); len = oidvec_buf->dim1 * sizeof(Oid); buf = oidvec_buf->values; break; case CASHOID: /* cash is stored in int32 internally */ cash_buf = (* (Cash *)DatumGetPointer(datum)); len = sizeof(Cash); buf = &cash_buf; break; default: ereport(ERROR, (errcode(ERRCODE_CDB_FEATURE_NOT_YET), errmsg("Type %u is not hashable.", type))); } /* switch(type) */ /* do the hash using the selected algorithm */ hashFn(clientData, buf, len); if(tofree) pfree(tofree); }
/* * oid_hash: hash function for keys that are OIDs * * (tag_hash works for this case too, but is slower) */ uint32 oid_hash(const void *key, Size keysize) { Assert(keysize == sizeof(Oid)); return DatumGetUInt32(hash_uint32((uint32) *((const Oid *) key))); }
/* * tag_hash: hash function for fixed-size tag values */ uint32 tag_hash(const void *key, Size keysize) { return DatumGetUInt32(hash_any((const unsigned char *) key, (int) keysize)); }
/* * string_hash: hash function for keys that are null-terminated strings. * * NOTE: this is the default hash function if none is specified. */ uint32 string_hash(const void *key, Size keysize) { return DatumGetUInt32(hash_any((const unsigned char *) key, (int) strlen((const char *) key))); }