/* ---------------------------------------------------------------- * ExecHash * * build hash table for hashjoin, doing partitioning if more * than one batch is required. * ---------------------------------------------------------------- */ TupleTableSlot * ExecHash(HashState *node) { EState *estate; PlanState *outerNode; List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; int nbatch; int i; /* * get state info from node */ estate = node->ps.state; outerNode = outerPlanState(node); hashtable = node->hashtable; nbatch = hashtable->nbatch; if (nbatch > 0) { /* * Open temp files for inner batches, if needed. Note that file * buffers are palloc'd in regular executor context. */ for (i = 0; i < nbatch; i++) hashtable->innerBatchFile[i] = BufFileCreateTemp(false); } /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; hashtable->hashNonEmpty = true; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkeys); ExecClearTuple(slot); } /* * Return the slot so that we have the tuple descriptor when we need * to save/restore them. -Jeff 11 July 1991 */ return slot; }
/* ---------------------------------------------------------------- * MultiExecHash * * build hash table for hashjoin, doing partitioning if more * than one batch is required. * ---------------------------------------------------------------- */ Node * MultiExecHash(HashState *node) { PlanState *outerNode; List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; uint32 hashvalue; /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); /* * get state info from node */ outerNode = outerPlanState(node); hashtable = node->hashtable; /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; hashtable->totalTuples += 1; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys); ExecHashTableInsert(hashtable, slot, hashvalue); } /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStopNode(node->ps.instrument, hashtable->totalTuples); /* * We do not return the hash table directly because it's not a subtype of * Node, and so would violate the MultiExecProcNode API. Instead, our * parent Hashjoin node is expected to know how to fish it out of our node * state. Ugly but not really worth cleaning up, since Hashjoin knows * quite a bit more about Hash besides that. */ return NULL; }
/* ---------------------------------------------------------------- * MultiExecHash * * build hash table for hashjoin, doing partitioning if more * than one batch is required. * ---------------------------------------------------------------- */ Node * MultiExecHash(HashState *node) { PlanState *outerNode; List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; uint32 hashvalue = 0; /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); /* * get state info from node */ outerNode = outerPlanState(node); hashtable = node->hashtable; /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( MultiExecHashLargeVmem, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; Gpmon_M_Incr(GpmonPktFromHashState(node), GPMON_QEXEC_M_ROWSIN); CheckSendPlanStateGpmonPkt(&node->ps); /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; bool hashkeys_null = false; if (ExecHashGetHashValue(node, hashtable, econtext, hashkeys, false, node->hs_keepnull, &hashvalue, &hashkeys_null)) { ExecHashTableInsert(node, hashtable, slot, hashvalue); } if (hashkeys_null) { node->hs_hashkeys_null = true; if (node->hs_quit_if_hashkeys_null) { ExecSquelchNode(outerNode); return NULL; } } } /* Now we have set up all the initial batches & primary overflow batches. */ hashtable->nbatch_outstart = hashtable->nbatch; /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStopNode(node->ps.instrument, hashtable->totalTuples); /* * We do not return the hash table directly because it's not a subtype of * Node, and so would violate the MultiExecProcNode API. Instead, our * parent Hashjoin node is expected to know how to fish it out of our node * state. Ugly but not really worth cleaning up, since Hashjoin knows * quite a bit more about Hash besides that. */ return NULL; }
/* ---------------------------------------------------------------- * MultiExecHash * * build hash table for hashjoin, doing partitioning if more * than one batch is required. * ---------------------------------------------------------------- */ Node * MultiExecHash(HashState *node) { PlanState *outerNode; List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; uint32 hashvalue; /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); /* * get state info from node */ outerNode = outerPlanState(node); hashtable = node->hashtable; /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, hashtable->keepNulls, &hashvalue)) { int bucketNumber; bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue); if (bucketNumber != INVALID_SKEW_BUCKET_NO) { /* It's a skew tuple, so put it into that hash table */ ExecHashSkewTableInsert(hashtable, slot, hashvalue, bucketNumber); } else { /* Not subject to skew optimization, so insert normally */ ExecHashTableInsert(hashtable, slot, hashvalue); } hashtable->totalTuples += 1; } } /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStopNode(node->ps.instrument, hashtable->totalTuples); /* * We do not return the hash table directly because it's not a subtype of * Node, and so would violate the MultiExecProcNode API. Instead, our * parent Hashjoin node is expected to know how to fish it out of our node * state. Ugly but not really worth cleaning up, since Hashjoin knows * quite a bit more about Hash besides that. */ return NULL; }
/* ---------------------------------------------------------------- * MultiExecHash * * build hash table for hashjoin, doing partitioning if more * than one batch is required. * ---------------------------------------------------------------- */ node_n* MultiExecHash(hash_ps* node) { plan_state_n* outerNode; struct list* hashkeys; struct hash_join_table* hashtable; struct tupslot* slot; expr_ctx_n* econtext; uint32 hashvalue; /* must provide our own instrumentation support */ if (node->ps.instrument) instr_start_node(node->ps.instrument); /* * get state info from node */ outerNode = OUTER_PLAN_STATE(node); hashtable = node->hashtable; /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = exec_proc_node(outerNode); if (TUPSLOT_NULL(slot)) break; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, hashtable->keepNulls, &hashvalue)) { int bucketNumber; bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue); if (bucketNumber != INVALID_SKEW_BUCKET_NO) { /* It's a skew tuple, so put it into that hash table */ ExecHashSkewTableInsert(hashtable, slot, hashvalue, bucketNumber); } else { /* Not subject to skew optimization, so insert normally */ ExecHashTableInsert(hashtable, slot, hashvalue); } hashtable->totalTuples += 1; } } /* must provide our own instrumentation support */ if (node->ps.instrument) instr_stop_node(node->ps.instrument, hashtable->totalTuples); /* * We do not return the hash table directly because it's not a subtype of * node_n, and so would violate the multi_exec_proc_node API. Instead, our * parent Hashjoin node is expected to know how to fish it out of our node * state. Ugly but not really worth cleaning up, since Hashjoin knows * quite a bit more about hash_pl besides that. */ return NULL; }
/* ---------------------------------------------------------------- * ExecHash * * build hash table for hashjoin, all do partitioning if more * than one batches are required. * ---------------------------------------------------------------- */ TupleTableSlot * ExecHash(Hash *node) { EState *estate; HashState *hashstate; Plan *outerNode; Var *hashkey; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; int nbatch; File *batches; RelativeAddr *batchPos; int *batchSizes; int i; RelativeAddr *innerbatchNames; /* ---------------- * get state info from node * ---------------- */ hashstate = node->hashstate; estate = node->plan.state; outerNode = outerPlan(node); hashtable = node->hashtable; if (hashtable == NULL) elog(WARN, "ExecHash: hash table is NULL."); nbatch = hashtable->nbatch; if (nbatch > 0) { /* if needs hash partition */ innerbatchNames = (RelativeAddr *) ABSADDR(hashtable->innerbatchNames); /* -------------- * allocate space for the file descriptors of batch files * then open the batch files in the current processes. * -------------- */ batches = (File*)palloc(nbatch * sizeof(File)); for (i=0; i<nbatch; i++) { batches[i] = FileNameOpenFile(ABSADDR(innerbatchNames[i]), O_CREAT | O_RDWR, 0600); } hashstate->hashBatches = batches; batchPos = (RelativeAddr*) ABSADDR(hashtable->innerbatchPos); batchSizes = (int*) ABSADDR(hashtable->innerbatchSizes); } /* ---------------- * set expression context * ---------------- */ hashkey = node->hashkey; econtext = hashstate->cstate.cs_ExprContext; /* ---------------- * get tuple and insert into the hash table * ---------------- */ for (;;) { slot = ExecProcNode(outerNode, (Plan*)node); if (TupIsNull(slot)) break; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkey, hashstate->hashBatches); ExecClearTuple(slot); } /* * end of build phase, flush all the last pages of the batches. */ for (i=0; i<nbatch; i++) { if (FileSeek(batches[i], 0L, SEEK_END) < 0) perror("FileSeek"); if (FileWrite(batches[i],ABSADDR(hashtable->batch)+i*BLCKSZ,BLCKSZ) < 0) perror("FileWrite"); NDirectFileWrite++; } /* --------------------- * Return the slot so that we have the tuple descriptor * when we need to save/restore them. -Jeff 11 July 1991 * --------------------- */ return slot; }