/* * Construct an empty TupleHashTable * * numCols, keyColIdx: identify the tuple fields to use as lookup key * eqfunctions: equality comparison functions to use * hashfunctions: datatype-specific hashing functions to use * nbuckets: initial estimate of hashtable size * additionalsize: size of data stored in ->additional * tablecxt: memory context in which to store table and table entries * tempcxt: short-lived context for evaluation hash and comparison functions * * The function arrays may be made with execTuplesHashPrepare(). Note they * are not cross-type functions, but expect to see the table datatype(s) * on both sides. * * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in * storage that will live as long as the hashtable does. */ TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, FmgrInfo *eqfunctions, FmgrInfo *hashfunctions, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv) { TupleHashTable hashtable; Size entrysize = sizeof(TupleHashEntryData) + additionalsize; Assert(nbuckets > 0); /* Limit initial table size request to not more than work_mem */ nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize)); hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt, sizeof(TupleHashTableData)); hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; hashtable->tab_eq_funcs = eqfunctions; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; hashtable->in_hash_funcs = NULL; hashtable->cur_eq_funcs = NULL; /* * If parallelism is in use, even if the master backend is performing the * scan itself, we don't want to create the hashtable exactly the same way * in all workers. As hashtables are iterated over in keyspace-order, * doing so in all processes in the same way is likely to lead to * "unbalanced" hashtables when the table size initially is * underestimated. */ if (use_variable_hash_iv) hashtable->hash_iv = hash_uint32(ParallelWorkerNumber); else hashtable->hash_iv = 0; hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable); return hashtable; }
/* * Construct an empty TupleHashTable * * numCols, keyColIdx: identify the tuple fields to use as lookup key * eqfunctions: equality comparison functions to use * hashfunctions: datatype-specific hashing functions to use * nbuckets: initial estimate of hashtable size * additionalsize: size of data stored in ->additional * tablecxt: memory context in which to store table and table entries * tempcxt: short-lived context for evaluation hash and comparison functions * * The function arrays may be made with execTuplesHashPrepare(). Note they * are not cross-type functions, but expect to see the table datatype(s) * on both sides. * * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in * storage that will live as long as the hashtable does. */ TupleHashTable BuildTupleHashTable(PlanState *parent, TupleDesc inputDesc, int numCols, AttrNumber *keyColIdx, Oid *eqfuncoids, FmgrInfo *hashfunctions, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv) { TupleHashTable hashtable; Size entrysize = sizeof(TupleHashEntryData) + additionalsize; MemoryContext oldcontext; Assert(nbuckets > 0); /* Limit initial table size request to not more than work_mem */ nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize)); hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt, sizeof(TupleHashTableData)); hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; hashtable->in_hash_funcs = NULL; hashtable->cur_eq_func = NULL; /* * If parallelism is in use, even if the master backend is performing the * scan itself, we don't want to create the hashtable exactly the same way * in all workers. As hashtables are iterated over in keyspace-order, * doing so in all processes in the same way is likely to lead to * "unbalanced" hashtables when the table size initially is * underestimated. */ if (use_variable_hash_iv) hashtable->hash_iv = murmurhash32(ParallelWorkerNumber); else hashtable->hash_iv = 0; hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable); oldcontext = MemoryContextSwitchTo(hashtable->tablecxt); /* * We copy the input tuple descriptor just for safety --- we assume all * input tuples will have equivalent descriptors. */ hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc), &TTSOpsMinimalTuple); /* build comparator for all columns */ /* XXX: should we support non-minimal tuples for the inputslot? */ hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, keyColIdx, eqfuncoids, parent); MemoryContextSwitchTo(oldcontext); hashtable->exprcontext = CreateExprContext(parent->state); return hashtable; }