Example #1
0
/* ----------------------------------------------------------------
 *		ExecHashTableCreate
 *
 *		create an empty hashtable data structure for hashjoin.
 * ----------------------------------------------------------------
 */
HashJoinTable
ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
{
	HashJoinTable hashtable;
	Plan	   *outerNode;
	int			nbuckets;
	int			nbatch;
	int			num_skew_mcvs;
	int			log2_nbuckets;
	int			nkeys;
	int			i;
	ListCell   *ho;
	MemoryContext oldcxt;

	/*
	 * Get information about the size of the relation to be hashed (it's the
	 * "outer" subtree of this node, but the inner relation of the hashjoin).
	 * Compute the appropriate size of the hash table.
	 */
	outerNode = outerPlan(node);

	ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
							OidIsValid(node->skewTable),
							&nbuckets, &nbatch, &num_skew_mcvs);

#ifdef HJDEBUG
	printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets);
#endif

	/* nbuckets must be a power of 2 */
	log2_nbuckets = my_log2(nbuckets);
	Assert(nbuckets == (1 << log2_nbuckets));

	/*
	 * Initialize the hash table control block.
	 *
	 * The hashtable control block is just palloc'd from the executor's
	 * per-query memory context.
	 */
	hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
	hashtable->nbuckets = nbuckets;
	hashtable->log2_nbuckets = log2_nbuckets;
	hashtable->buckets = NULL;
	hashtable->keepNulls = keepNulls;
	hashtable->skewEnabled = false;
	hashtable->skewBucket = NULL;
	hashtable->skewBucketLen = 0;
	hashtable->nSkewBuckets = 0;
	hashtable->skewBucketNums = NULL;
	hashtable->nbatch = nbatch;
	hashtable->curbatch = 0;
	hashtable->nbatch_original = nbatch;
	hashtable->nbatch_outstart = nbatch;
	hashtable->growEnabled = true;
	hashtable->totalTuples = 0;
	hashtable->innerBatchFile = NULL;
	hashtable->outerBatchFile = NULL;
	hashtable->spaceUsed = 0;
	hashtable->spacePeak = 0;
	hashtable->spaceAllowed = work_mem * 1024L;
	hashtable->spaceUsedSkew = 0;
	hashtable->spaceAllowedSkew =
		hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100;

	/*
	 * Get info about the hash functions to be used for each hash key. Also
	 * remember whether the join operators are strict.
	 */
	nkeys = list_length(hashOperators);
	hashtable->outer_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->inner_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
	i = 0;
	foreach(ho, hashOperators)
	{
		Oid			hashop = lfirst_oid(ho);
		Oid			left_hashfn;
		Oid			right_hashfn;

		if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
			elog(ERROR, "could not find hash function for hash operator %u",
				 hashop);
		fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
		fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
		hashtable->hashStrict[i] = op_strict(hashop);
		i++;
	}
Example #2
0
/* ----------------------------------------------------------------
 *		ExecHashTableCreate
 *
 *		create an empty hashtable data structure for hashjoin.
 * ----------------------------------------------------------------
 */
HashJoinTable
ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB)
{
	HashJoinTable hashtable;
	Plan	   *outerNode;
	int			nbuckets;
	int			nbatch;
	int			nkeys;
	int			i;
	ListCell   *ho;
	MemoryContext oldcxt;

	START_MEMORY_ACCOUNT(hashState->ps.plan->memoryAccount);
	{

	Hash *node = (Hash *) hashState->ps.plan;

	/*
	 * Get information about the size of the relation to be hashed (it's the
	 * "outer" subtree of this node, but the inner relation of the hashjoin).
	 * Compute the appropriate size of the hash table.
	 */
	outerNode = outerPlan(node);

	/*
	 * Initialize the hash table control block.
	 *
	 * The hashtable control block is just palloc'd from the executor's
	 * per-query memory context.
	 */
	hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData));
	hashtable->buckets = NULL;
	hashtable->bloom = NULL;
	hashtable->curbatch = 0;
	hashtable->growEnabled = true;
	hashtable->totalTuples = 0;
	hashtable->batches = NULL;
	hashtable->work_set = NULL;
	hashtable->state_file = NULL;
	hashtable->spaceAllowed = operatorMemKB * 1024L;
	hashtable->stats = NULL;
	hashtable->eagerlyReleased = false;
	hashtable->hjstate = hjstate;

	/*
	 * Create temporary memory contexts in which to keep the hashtable working
	 * storage.  See notes in executor/hashjoin.h.
	 */
	hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext,
											   "HashTableContext",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);

	hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt,
												"HashBatchContext",
												ALLOCSET_DEFAULT_MINSIZE,
												ALLOCSET_DEFAULT_INITSIZE,
												ALLOCSET_DEFAULT_MAXSIZE);

	/* CDB */ /* track temp buf file allocations in separate context */
	hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext,
											 "hbbfcxt",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);

	ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
			&hashtable->nbuckets, &hashtable->nbatch, operatorMemKB);

	nbuckets = hashtable->nbuckets;
	nbatch = hashtable->nbatch;
	hashtable->nbatch_original = nbatch;
	hashtable->nbatch_outstart = nbatch;


#ifdef HJDEBUG
    elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets);
#endif


    /*
	 * Get info about the hash functions to be used for each hash key.
	 * Also remember whether the join operators are strict.
	 */
	nkeys = list_length(hashOperators);
	hashtable->outer_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->inner_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
	i = 0;
	foreach(ho, hashOperators)
	{
		Oid			hashop = lfirst_oid(ho);
		Oid			left_hashfn;
		Oid			right_hashfn;

		if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
			elog(ERROR, "could not find hash function for hash operator %u",
				 hashop);
		fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
		fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
		hashtable->hashStrict[i] = op_strict(hashop);
		i++;
	}

	/*
     * Allocate data that will live for the life of the hashjoin
     */
	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);

#ifdef HJDEBUG
	{
		/* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */
		int md_batch_size =  (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024);
		/* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */
		int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024);

		/* Memory needed to allocate hashtable->buckets, which consists of nbuckets  HashJoinTuple structures*/
		int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024);

		/* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */
		int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024);

		/* Total memory needed for the hashtable metadata */
		int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size;

		elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB",
				(int) (hashtable->spaceAllowed / (1024 * 1024)),
				md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot);

		elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d",
				(int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData),
				(int) sizeof(HashJoinTuple), (int) sizeof(uint64));
	}
#endif

    /* array of BatchData ptrs */
    hashtable->batches =
        (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0]));

    /* one BatchData entry per initial batch */
    for (i = 0; i < nbatch; i++)
        hashtable->batches[i] =
            (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData));

	/*
	 * Prepare context for the first-scan space allocations; allocate the
	 * hashbucket array therein, and set each bucket "empty".
	 */
	MemoryContextSwitchTo(hashtable->batchCxt);

	hashtable->buckets = (HashJoinTuple *)
		palloc0(nbuckets * sizeof(HashJoinTuple));

	if(gp_hashjoin_bloomfilter!=0)
		hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));

	MemoryContextSwitchTo(oldcxt);
	}
Example #3
0
/* ----------------------------------------------------------------
 *		ExecHashTableCreate
 *
 *		create an empty hashtable data structure for hashjoin.
 * ----------------------------------------------------------------
 */
HashJoinTable
ExecHashTableCreate(Hash *node, List *hashOperators)
{
	HashJoinTable hashtable;
	Plan	   *outerNode;
	int			totalbuckets;
	int			nbuckets;
	int			nbatch;
	int			nkeys;
	int			i;
	ListCell   *ho;
	MemoryContext oldcxt;

	/*
	 * Get information about the size of the relation to be hashed (it's
	 * the "outer" subtree of this node, but the inner relation of the
	 * hashjoin).  Compute the appropriate size of the hash table.
	 */
	outerNode = outerPlan(node);

	ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
							&totalbuckets, &nbuckets, &nbatch);

#ifdef HJDEBUG
	printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",
		   nbatch, totalbuckets, nbuckets);
#endif

	/*
	 * Initialize the hash table control block.
	 *
	 * The hashtable control block is just palloc'd from the executor's
	 * per-query memory context.
	 */
	hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
	hashtable->nbuckets = nbuckets;
	hashtable->totalbuckets = totalbuckets;
	hashtable->buckets = NULL;
	hashtable->nbatch = nbatch;
	hashtable->curbatch = 0;
	hashtable->hashNonEmpty = false;
	hashtable->innerBatchFile = NULL;
	hashtable->outerBatchFile = NULL;
	hashtable->innerBatchSize = NULL;
	hashtable->outerBatchSize = NULL;

	/*
	 * Get info about the hash functions to be used for each hash key.
	 */
	nkeys = list_length(hashOperators);
	hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	i = 0;
	foreach(ho, hashOperators)
	{
		Oid			hashfn;

		hashfn = get_op_hash_function(lfirst_oid(ho));
		if (!OidIsValid(hashfn))
			elog(ERROR, "could not find hash function for hash operator %u",
				 lfirst_oid(ho));
		fmgr_info(hashfn, &hashtable->hashfunctions[i]);
		i++;
	}