/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int num_skew_mcvs; int log2_nbuckets; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, OidIsValid(node->skewTable), &nbuckets, &nbatch, &num_skew_mcvs); #ifdef HJDEBUG printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* nbuckets must be a power of 2 */ log2_nbuckets = my_log2(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; hashtable->nSkewBuckets = 0; hashtable->skewBucketNums = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; hashtable->spacePeak = 0; hashtable->spaceAllowed = work_mem * 1024L; hashtable->spaceUsedSkew = 0; hashtable->spaceAllowedSkew = hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100; /* * Get info about the hash functions to be used for each hash key. Also * remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; START_MEMORY_ACCOUNT(hashState->ps.plan->memoryAccount); { Hash *node = (Hash *) hashState->ps.plan; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData)); hashtable->buckets = NULL; hashtable->bloom = NULL; hashtable->curbatch = 0; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->batches = NULL; hashtable->work_set = NULL; hashtable->state_file = NULL; hashtable->spaceAllowed = operatorMemKB * 1024L; hashtable->stats = NULL; hashtable->eagerlyReleased = false; hashtable->hjstate = hjstate; /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* CDB */ /* track temp buf file allocations in separate context */ hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext, "hbbfcxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &hashtable->nbuckets, &hashtable->nbatch, operatorMemKB); nbuckets = hashtable->nbuckets; nbatch = hashtable->nbatch; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; #ifdef HJDEBUG elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* * Get info about the hash functions to be used for each hash key. * Also remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; } /* * Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); #ifdef HJDEBUG { /* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */ int md_batch_size = (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024); /* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */ int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024); /* Memory needed to allocate hashtable->buckets, which consists of nbuckets HashJoinTuple structures*/ int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024); /* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */ int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024); /* Total memory needed for the hashtable metadata */ int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size; elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB", (int) (hashtable->spaceAllowed / (1024 * 1024)), md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot); elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d", (int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData), (int) sizeof(HashJoinTuple), (int) sizeof(uint64)); } #endif /* array of BatchData ptrs */ hashtable->batches = (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0])); /* one BatchData entry per initial batch */ for (i = 0; i < nbatch; i++) hashtable->batches[i] = (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData)); /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); if(gp_hashjoin_bloomfilter!=0) hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64)); MemoryContextSwitchTo(oldcxt); }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators) { HashJoinTable hashtable; Plan *outerNode; int totalbuckets; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's * the "outer" subtree of this node, but the inner relation of the * hashjoin). Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &totalbuckets, &nbuckets, &nbatch); #ifdef HJDEBUG printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n", nbatch, totalbuckets, nbuckets); #endif /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->totalbuckets = totalbuckets; hashtable->buckets = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->hashNonEmpty = false; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->innerBatchSize = NULL; hashtable->outerBatchSize = NULL; /* * Get info about the hash functions to be used for each hash key. */ nkeys = list_length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); i = 0; foreach(ho, hashOperators) { Oid hashfn; hashfn = get_op_hash_function(lfirst_oid(ho)); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", lfirst_oid(ho)); fmgr_info(hashfn, &hashtable->hashfunctions[i]); i++; }