/* * execTuplesHashPrepare * Look up the equality and hashing functions needed for a TupleHashTable. * * This is similar to execTuplesMatchPrepare, but we also need to find the * hash functions associated with the equality operators. *eqfunctions and * *hashfunctions receive the palloc'd result arrays. */ void execTuplesHashPrepare(TupleDesc tupdesc, int numCols, AttrNumber *matchColIdx, FmgrInfo **eqfunctions, FmgrInfo **hashfunctions) { int i; *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); for (i = 0; i < numCols; i++) { AttrNumber att = matchColIdx[i]; Oid typid = tupdesc->attrs[att - 1]->atttypid; Operator optup; Oid eq_opr; Oid eq_function; Oid hash_function; optup = equality_oper(typid, false); eq_opr = oprid(optup); eq_function = oprfuncid(optup); ReleaseSysCache(optup); hash_function = get_op_hash_function(eq_opr); if (!OidIsValid(hash_function)) /* should not happen */ elog(ERROR, "could not find hash function for hash operator %u", eq_opr); fmgr_info(eq_function, &(*eqfunctions)[i]); fmgr_info(hash_function, &(*hashfunctions)[i]); } }
/* * execTuplesHashPrepare * Look up the equality and hashing functions needed for a TupleHashTable. * * This is similar to execTuplesMatchPrepare, but we also need to find the * hash functions associated with the equality operators. *eqfunctions and * *hashfunctions receive the palloc'd result arrays. */ void execTuplesHashPrepareSGB(TupleDesc tupdesc, int numCols, AttrNumber *matchColIdx, FmgrInfo **eqfunctions, FmgrInfo **hashfunctions, FmgrInfo **ltfunctions, FmgrInfo **minusfunctions) { int i; *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); *ltfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); *minusfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); for (i = 0; i < numCols; i++) { AttrNumber att = matchColIdx[i]; Oid typid = tupdesc->attrs[att - 1]->atttypid; Operator optup; Oid eq_opr; Oid eq_function; Oid hash_function; Oid lt_function; Oid minus_function; /*GIVEN A TYPE GET THE = FUNCTION*/ /*eq_function = equality_oper_funcid(typid);*/ optup = equality_oper(typid, false); eq_opr = oprid(optup); eq_function = oprfuncid(optup); ReleaseSysCache(optup); /*GIVEN A TYPE GET THE HASH FUNCTION*/ hash_function = get_op_hash_function(eq_opr); if (!OidIsValid(hash_function)) /* should not happen */ elog(ERROR, "could not find hash function for hash operator %u", eq_opr); /*GIVEN A TYPE GET THE < FUNCTION*/ optup = ordering_oper(typid, false); lt_function = oprfuncid(optup); ReleaseSysCache(optup); /*GIVEN A TYPE GET THE - FUNCTION*/ optup = minus_oper(typid, false); /*minus_oper WAS ADDED BY YASIN*/ minus_function = oprfuncid(optup); /*get the function from the operator tuple*/ ReleaseSysCache(optup); fmgr_info(eq_function, &(*eqfunctions)[i]); fmgr_info(hash_function, &(*hashfunctions)[i]); fmgr_info(lt_function, &(*ltfunctions)[i]); fmgr_info(minus_function, &(*minusfunctions)[i]); } }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB, workfile_set * workfile_set) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; START_MEMORY_ACCOUNT(hashState->ps.plan->memoryAccount); { Hash *node = (Hash *) hashState->ps.plan; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData)); hashtable->buckets = NULL; hashtable->bloom = NULL; hashtable->curbatch = 0; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->batches = NULL; hashtable->work_set = NULL; hashtable->state_file = NULL; hashtable->spaceAllowed = operatorMemKB * 1024L; hashtable->stats = NULL; hashtable->eagerlyReleased = false; hashtable->hjstate = hjstate; /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* CDB */ /* track temp buf file allocations in separate context */ hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext, "hbbfcxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); if (workfile_set != NULL) { hashtable->work_set = workfile_set; ExecHashJoinLoadBucketsBatches(hashtable); Assert(hjstate->nbatch_loaded_state == -1); Assert(hjstate->cached_workfiles_batches_buckets_loaded); hjstate->nbatch_loaded_state = hashtable->nbatch; } else { ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &hashtable->nbuckets, &hashtable->nbatch, operatorMemKB); } nbuckets = hashtable->nbuckets; nbatch = hashtable->nbatch; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; #ifdef HJDEBUG elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* * Get info about the hash functions to be used for each hash key. * Also remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid hashfn; hashfn = get_op_hash_function(hashop); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(hashfn, &hashtable->hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; } /* * Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); #ifdef HJDEBUG { /* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */ int md_batch_size = (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024); /* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */ int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024); /* Memory needed to allocate hashtable->buckets, which consists of nbuckets HashJoinTuple structures*/ int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024); /* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */ int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024); /* Total memory needed for the hashtable metadata */ int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size; elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB", (int) (hashtable->spaceAllowed / (1024 * 1024)), md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot); elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d", (int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData), (int) sizeof(HashJoinTuple), (int) sizeof(uint64)); } #endif /* array of BatchData ptrs */ hashtable->batches = (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0])); /* one BatchData entry per initial batch */ for (i = 0; i < nbatch; i++) hashtable->batches[i] = (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData)); /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); if(gp_hashjoin_bloomfilter!=0) hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64)); MemoryContextSwitchTo(oldcxt); }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators) { HashJoinTable hashtable; Plan *outerNode; int totalbuckets; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's * the "outer" subtree of this node, but the inner relation of the * hashjoin). Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &totalbuckets, &nbuckets, &nbatch); #ifdef HJDEBUG printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n", nbatch, totalbuckets, nbuckets); #endif /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->totalbuckets = totalbuckets; hashtable->buckets = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->hashNonEmpty = false; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->innerBatchSize = NULL; hashtable->outerBatchSize = NULL; /* * Get info about the hash functions to be used for each hash key. */ nkeys = list_length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); i = 0; foreach(ho, hashOperators) { Oid hashfn; hashfn = get_op_hash_function(lfirst_oid(ho)); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", lfirst_oid(ho)); fmgr_info(hashfn, &hashtable->hashfunctions[i]); i++; }