/* * execTuplesHashPrepare * Look up the equality and hashing functions needed for a TupleHashTable. * * This is similar to execTuplesMatchPrepare, but we also need to find the * hash functions associated with the equality operators. *eqFunctions and * *hashFunctions receive the palloc'd result arrays. * * Note: we expect that the given operators are not cross-type comparisons. */ void execTuplesHashPrepare(int numCols, Oid *eqOperators, FmgrInfo **eqFunctions, FmgrInfo **hashFunctions) { int i; *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); for (i = 0; i < numCols; i++) { Oid eq_opr = eqOperators[i]; Oid eq_function; Oid left_hash_function; Oid right_hash_function; eq_function = get_opcode(eq_opr); if (!get_op_hash_functions(eq_opr, &left_hash_function, &right_hash_function)) elog(ERROR, "could not find hash function for hash operator %u", eq_opr); /* We're not supporting cross-type cases here */ Assert(left_hash_function == right_hash_function); fmgr_info(eq_function, &(*eqFunctions)[i]); fmgr_info(right_hash_function, &(*hashFunctions)[i]); } }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int num_skew_mcvs; int log2_nbuckets; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, OidIsValid(node->skewTable), &nbuckets, &nbatch, &num_skew_mcvs); #ifdef HJDEBUG printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* nbuckets must be a power of 2 */ log2_nbuckets = my_log2(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; hashtable->nSkewBuckets = 0; hashtable->skewBucketNums = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; hashtable->spacePeak = 0; hashtable->spaceAllowed = work_mem * 1024L; hashtable->spaceUsedSkew = 0; hashtable->spaceAllowedSkew = hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100; /* * Get info about the hash functions to be used for each hash key. Also * remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; START_MEMORY_ACCOUNT(hashState->ps.plan->memoryAccount); { Hash *node = (Hash *) hashState->ps.plan; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData)); hashtable->buckets = NULL; hashtable->bloom = NULL; hashtable->curbatch = 0; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->batches = NULL; hashtable->work_set = NULL; hashtable->state_file = NULL; hashtable->spaceAllowed = operatorMemKB * 1024L; hashtable->stats = NULL; hashtable->eagerlyReleased = false; hashtable->hjstate = hjstate; /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* CDB */ /* track temp buf file allocations in separate context */ hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext, "hbbfcxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &hashtable->nbuckets, &hashtable->nbatch, operatorMemKB); nbuckets = hashtable->nbuckets; nbatch = hashtable->nbatch; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; #ifdef HJDEBUG elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* * Get info about the hash functions to be used for each hash key. * Also remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; } /* * Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); #ifdef HJDEBUG { /* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */ int md_batch_size = (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024); /* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */ int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024); /* Memory needed to allocate hashtable->buckets, which consists of nbuckets HashJoinTuple structures*/ int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024); /* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */ int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024); /* Total memory needed for the hashtable metadata */ int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size; elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB", (int) (hashtable->spaceAllowed / (1024 * 1024)), md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot); elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d", (int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData), (int) sizeof(HashJoinTuple), (int) sizeof(uint64)); } #endif /* array of BatchData ptrs */ hashtable->batches = (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0])); /* one BatchData entry per initial batch */ for (i = 0; i < nbatch; i++) hashtable->batches[i] = (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData)); /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); if(gp_hashjoin_bloomfilter!=0) hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64)); MemoryContextSwitchTo(oldcxt); }
/* * _bitmap_init_buildstate() -- initialize the build state before building * a bitmap index. */ void _bitmap_init_buildstate(Relation index, BMBuildState *bmstate) { MIRROREDLOCK_BUFMGR_DECLARE; BMMetaPage mp; HASHCTL hash_ctl; int hash_flags; int i; Buffer metabuf; /* initialize the build state */ bmstate->bm_tupDesc = RelationGetDescr(index); bmstate->bm_tidLocsBuffer = (BMTidBuildBuf *) palloc(sizeof(BMTidBuildBuf)); bmstate->bm_tidLocsBuffer->byte_size = 0; bmstate->bm_tidLocsBuffer->lov_blocks = NIL; bmstate->bm_tidLocsBuffer->max_lov_block = InvalidBlockNumber; // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; metabuf = _bitmap_getbuf(index, BM_METAPAGE, BM_READ); mp = _bitmap_get_metapage_data(index, metabuf); _bitmap_open_lov_heapandindex(index, mp, &(bmstate->bm_lov_heap), &(bmstate->bm_lov_index), RowExclusiveLock); _bitmap_relbuf(metabuf); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- cur_bmbuild = (BMBuildHashData *)palloc(sizeof(BMBuildHashData)); cur_bmbuild->hash_funcs = (FmgrInfo *) palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); cur_bmbuild->eq_funcs = (FmgrInfo *) palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); cur_bmbuild->hash_func_is_strict = (bool *) palloc(sizeof(bool) * bmstate->bm_tupDesc->natts); for (i = 0; i < bmstate->bm_tupDesc->natts; i++) { Oid typid = bmstate->bm_tupDesc->attrs[i]->atttypid; Operator optup; Oid eq_opr; Oid eq_function; Oid left_hash_function; Oid right_hash_function; optup = equality_oper(typid, false); eq_opr = oprid(optup); eq_function = oprfuncid(optup); ReleaseOperator(optup); if (!get_op_hash_functions(eq_opr, &left_hash_function, &right_hash_function)) { pfree(cur_bmbuild); cur_bmbuild = NULL; break; } Assert(left_hash_function == right_hash_function); fmgr_info(eq_function, &cur_bmbuild->eq_funcs[i]); fmgr_info(right_hash_function, &cur_bmbuild->hash_funcs[i]); cur_bmbuild->hash_func_is_strict[i] = func_strict(right_hash_function); } if (cur_bmbuild) { cur_bmbuild->natts = bmstate->bm_tupDesc->natts; cur_bmbuild->tmpcxt = AllocSetContextCreate(CurrentMemoryContext, "Bitmap build temp space", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* setup the hash table */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); /** * Reserve enough space for the hash key header and then the data segments (values followed by nulls) */ hash_ctl.keysize = MAXALIGN(sizeof(BMBuildHashKey)) + MAXALIGN(sizeof(Datum) * cur_bmbuild->natts) + MAXALIGN(sizeof(bool) * cur_bmbuild->natts); hash_ctl.entrysize = hash_ctl.keysize + sizeof(BMBuildLovData) + 200; hash_ctl.hash = build_hash_key; hash_ctl.match = build_match_key; hash_ctl.keycopy = build_keycopy; hash_ctl.hcxt = AllocSetContextCreate(CurrentMemoryContext, "Bitmap build hash table", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); cur_bmbuild->hash_cxt = hash_ctl.hcxt; hash_flags = HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT | HASH_KEYCOPY; bmstate->lovitem_hash = hash_create("Bitmap index build lov item hash", 100, &hash_ctl, hash_flags); bmstate->lovitem_hashKeySize = hash_ctl.keysize; } else { int attno; bmstate->lovitem_hash = NULL; bmstate->lovitem_hashKeySize = 0; bmstate->bm_lov_scanKeys = (ScanKey)palloc0(bmstate->bm_tupDesc->natts * sizeof(ScanKeyData)); for (attno = 0; attno < bmstate->bm_tupDesc->natts; attno++) { RegProcedure opfuncid; Oid atttypid; atttypid = bmstate->bm_tupDesc->attrs[attno]->atttypid; opfuncid = equality_oper_funcid(atttypid); ScanKeyEntryInitialize(&(bmstate->bm_lov_scanKeys[attno]), SK_ISNULL, attno + 1, BTEqualStrategyNumber, InvalidOid, opfuncid, 0); } bmstate->bm_lov_scanDesc = index_beginscan(bmstate->bm_lov_heap, bmstate->bm_lov_index, ActiveSnapshot, bmstate->bm_tupDesc->natts, bmstate->bm_lov_scanKeys); } /* * We need to log index creation in WAL iff WAL archiving is enabled * AND it's not a temp index. Currently, since building an index * writes page to the shared buffer, we can't disable WAL archiving. * We will add this shortly. */ bmstate->use_wal = !XLog_UnconvertedCanBypassWal() && !index->rd_istemp; }