/* * ForeignRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool ForeignRecheck(ForeignScanState *node, TupleTableSlot *slot) { FdwRoutine *fdwroutine = node->fdwroutine; ExprContext *econtext; /* * extract necessary information from foreign scan node */ econtext = node->ss.ps.ps_ExprContext; /* Does the tuple meet the remote qual condition? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); /* * If an outer join is pushed down, RecheckForeignScan may need to store a * different tuple in the slot, because a different set of columns may go * to NULL upon recheck. Otherwise, it shouldn't need to change the slot * contents, just return true or false to indicate whether the quals still * pass. For simple cases, setting fdw_recheck_quals may be easier than * providing this callback. */ if (fdwroutine->RecheckForeignScan && !fdwroutine->RecheckForeignScan(node, slot)) return false; return ExecQual(node->fdw_recheck_quals, econtext, false); }
/* * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot) { ExprContext *econtext; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; /* Does the tuple meet the indexqual condition? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); return ExecQual(node->indexqualorig, econtext, false); }
/* * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot) { ExprContext *econtext; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); return ExecQual(node->bitmapqualorig, econtext); }
static TupleTableSlot* BitmapTableScanPlanQualTuple(BitmapTableScanState *node) { EState *estate = node->ss.ps.state; Index scanrelid = ((BitmapTableScan *) node->ss.ps.plan)->scan.scanrelid; ExprContext *econtext = node->ss.ps.ps_ExprContext; TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) { return ExecClearTuple(slot); } ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { ExecClearTuple(slot); /* would not be returned by scan */ } /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } return ExecClearTuple(slot); }
/* * Checks eligibility of a tuple. * * Note, a tuple may fail to meet visibility requirement. Moreover, * for a lossy bitmap, we need to check for every tuple to make sure * that it satisfies the qual. */ bool BitmapTableScanRecheckTuple(BitmapTableScanState *scanState, TupleTableSlot *slot) { /* * If we are using lossy info or we are required to recheck each tuple * because of visibility or other causes, then evaluate the tuple * eligibility. */ if (scanState->isLossyBitmapPage || scanState->recheckTuples) { ExprContext *econtext = scanState->ss.ps.ps_ExprContext; econtext->ecxt_scantuple = slot; ResetExprContext(econtext); return ExecQual(scanState->bitmapqualorig, econtext, false); } return true; }
/* ---------------------------------------------------------------- * ExecScan * * Scans the relation using the 'access method' indicated and * returns the next qualifying tuple in the direction specified * in the global variable ExecDirection. * The access method returns the next tuple and ExecScan() is * responsible for checking the tuple returned against the qual-clause. * * A 'recheck method' must also be provided that can check an * arbitrary tuple of the relation against any qual conditions * that are implemented internal to the access method. * * Conditions: * -- the "cursor" maintained by the AMI is positioned at the tuple * returned previously. * * Initial States: * -- the relation indicated is opened for scanning so that the * "cursor" is positioned before the first qualifying tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, /* function returning a tuple */ ExecScanRecheckMtd recheckMtd) { ExprContext *econtext; List *qual; ProjectionInfo *projInfo; /* * Fetch data from node */ qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; econtext = node->ps.ps_ExprContext; /* * If we have neither a qual to check nor a projection to do, just skip * all the overhead and return the raw scan tuple. */ if (!qual && !projInfo) { ResetExprContext(econtext); return ExecScanFetch(node, accessMtd, recheckMtd); } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. */ ResetExprContext(econtext); /* * get a tuple from the access method. Loop until we obtain a tuple that * passes the qualification. */ for (;;) { TupleTableSlot *slot; CHECK_FOR_INTERRUPTS(); slot = ExecScanFetch(node, accessMtd, recheckMtd); /* * if the slot returned by the accessMtd contains NULL, then it means * there is nothing more to scan so we just return an empty slot, * being careful to use the projection result slot so it has correct * tupleDesc. */ if (TupIsNull(slot)) { if (projInfo) return ExecClearTuple(projInfo->pi_slot); else return slot; } /* * place the current tuple into the expr context */ econtext->ecxt_scantuple = slot; /* * check that the current tuple satisfies the qual-clause * * check for non-nil qual here to avoid a function call to ExecQual() * when the qual is nil ... saves only a few cycles, but they add up * ... */ if (!qual || ExecQual(qual, econtext, false)) { /* * Found a satisfactory scan tuple. */ if (projInfo) { /* * Form a projection tuple, store it in the result tuple slot * and return it. */ return ExecProject(projInfo); } else { /* * Here, we aren't projecting, so just return scan tuple. */ return slot; } } else InstrCountFiltered1(node, 1); /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); } }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scandesc; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; OnDiskBitmapWords *odbm; ODBMIterateResult *odbmres; bool inmem = false; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scandesc = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; odbm = node->odbm; odbmres = node->odbmres; /* * Clear any reference to the previously returned tuple. The idea here is * to not have the tuple slot be the last holder of a pin on that tuple's * buffer; if it is, we'll need a separate visit to the bufmgr to release * the buffer. By clearing here, we get to have the release done by * ReleaseAndReadBuffer, below. */ ExecClearTuple(slot); /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return slot; /* return empty slot */ ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* check if this requires in-mem bitmap scan or on-disk bitmap index. */ inmem = ((BitmapHeapScan*)(((PlanState*)node)->plan))->inmem; /* * If the underline indexes are on disk bitmap indexes */ if (!inmem) { uint64 nextTid = 0; if (odbm == NULL) { odbm = odbm_create(ODBM_MAX_WORDS); node->odbm = odbm; } if (odbmres == NULL) { odbmres = odbm_res_create(odbm); node->odbmres = odbmres; } for (;;) { /* If we have used up the words from previous scan, or we haven't scan the underlying index scan for wrods yet, then do it. */ if (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids) { Plan* outerPlan = (((PlanState*)node)->lefttree)->plan; odbm_set_bitmaptype(outerPlan, false); odbm->firstTid = odbmres->nextTid; odbm->startNo = 0; odbm_set_child_resultnode(((PlanState*)node)->lefttree, odbm); odbm = (OnDiskBitmapWords *) MultiExecProcNode(outerPlanState(node)); if (!odbm || !IsA(odbm, OnDiskBitmapWords)) elog(ERROR, "unrecognized result from subplan"); odbm_begin_iterate(node->odbm, node->odbmres); } /* If we can not find more words, then this scan is over. */ if (odbm == NULL || (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids)) return ExecClearTuple(slot); nextTid = odbm_findnexttid(odbm, odbmres); if (nextTid == 0) continue; ItemPointerSet(&scandesc->rs_ctup.t_self, (nextTid-1)/MaxNumHeapTuples, ((nextTid-1)%MaxNumHeapTuples)+1); /* fetch the heap tuple and see if it matches the snapshot. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. * Note that the slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* return this tuple */ return slot; } } } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got * AccessShareLock before performing any of the indexscans, but * let's be safe.) */ if (tbmres->blockno >= scandesc->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Acquire pin on the current heap page. We'll hold the pin until * done looking at the page. We trade in any pin we held before. */ scandesc->rs_cbuf = ReleaseAndReadBuffer(scandesc->rs_cbuf, scandesc->rs_rd, tbmres->blockno); /* * Determine how many entries we need to look at on this page. If * the bitmap is lossy then we need to look at each physical item * pointer; otherwise we just look through the offsets listed in * tbmres. */ if (tbmres->ntuples >= 0) { /* non-lossy case */ node->minslot = 0; node->maxslot = tbmres->ntuples - 1; } else { /* lossy case */ Page dp; LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_SHARE); dp = (Page) BufferGetPage(scandesc->rs_cbuf); node->minslot = FirstOffsetNumber; node->maxslot = PageGetMaxOffsetNumber(dp); LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_UNLOCK); } /* * Set curslot to first slot to examine */ node->curslot = node->minslot; } else { /* * Continuing in previously obtained page; advance curslot */ node->curslot++; } /* * Out of range? If so, nothing more to look at on this page */ if (node->curslot < node->minslot || node->curslot > node->maxslot) { node->tbmres = tbmres = NULL; continue; } /* * Okay to try to fetch the tuple */ if (tbmres->ntuples >= 0) { /* non-lossy case */ targoffset = tbmres->offsets[node->curslot]; } else { /* lossy case */ targoffset = (OffsetNumber) node->curslot; } ItemPointerSet(&scandesc->rs_ctup.t_self, tbmres->blockno, targoffset); /* * Fetch the heap tuple and see if it matches the snapshot. We use * heap_release_fetch to avoid useless bufmgr traffic. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. Note that the * slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual * conditions at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * Failed the snap, so loop back and try again. */ } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * IndexSpoolInsert - * * Copy from ExecInsertIndexTuples. */ static void IndexSpoolInsert(BTSpool **spools, TupleTableSlot *slot, ItemPointer tupleid, EState *estate, bool reindex) { ResultRelInfo *relinfo; int i; int numIndices; RelationPtr indices; IndexInfo **indexInfoArray; Relation heapRelation; ExprContext *econtext; /* * Get information from the result relation relinfo structure. */ relinfo = estate->es_result_relation_info; numIndices = relinfo->ri_NumIndices; indices = relinfo->ri_IndexRelationDescs; indexInfoArray = relinfo->ri_IndexRelationInfo; heapRelation = relinfo->ri_RelationDesc; /* * We will use the EState's per-tuple context for evaluating predicates * and index expressions (creating it if it's not already there). */ econtext = GetPerTupleExprContext(estate); /* Arrange for econtext's scan tuple to be the tuple under test */ econtext->ecxt_scantuple = slot; for (i = 0; i < numIndices; i++) { Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; IndexInfo *indexInfo; if (indices[i] == NULL) continue; /* Skip non-btree indexes on reindex mode. */ if (reindex && spools != NULL && spools[i] == NULL) continue; indexInfo = indexInfoArray[i]; /* If the index is marked as read-only, ignore it */ if (!indexInfo->ii_ReadyForInserts) continue; /* Check for partial index */ if (indexInfo->ii_Predicate != NIL) { List *predicate; /* * If predicate state not set up yet, create it (in the estate's * per-query context) */ predicate = indexInfo->ii_PredicateState; if (predicate == NIL) { predicate = (List *) ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, estate); indexInfo->ii_PredicateState = predicate; } /* Skip this index-update if the predicate isn'loader satisfied */ if (!ExecQual(predicate, econtext, false)) continue; } FormIndexDatum(indexInfo, slot, estate, values, isnull); /* * Insert or spool the tuple. */ if (spools != NULL && spools[i] != NULL) { IndexTuple itup = index_form_tuple(RelationGetDescr(indices[i]), values, isnull); itup->t_tid = *tupleid; _bt_spool(itup, spools[i]); pfree(itup); } else { /* Insert one by one */ index_insert(indices[i], values, isnull, tupleid, heapRelation, indices[i]->rd_index->indisunique); } } }
/* ---------------------------------------------------------------- * IndexOnlyNext * * Retrieve a tuple from the IndexOnlyScan node's index. * ---------------------------------------------------------------- */ static TupleTableSlot * IndexOnlyNext(IndexOnlyScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; TupleTableSlot *slot; ItemPointer tid; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->ioss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; /* * OK, now that we have what we need, fetch the next tuple. */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { HeapTuple tuple = NULL; /* * We can skip the heap fetch if the TID references a heap page on * which all tuples are known visible to everybody. In any case, * we'll use the index tuple not the heap tuple as the data source. * * Note on Memory Ordering Effects: visibilitymap_test does not lock * the visibility map buffer, and therefore the result we read here * could be slightly stale. However, it can't be stale enough to * matter. * * We need to detect clearing a VM bit due to an insert right away, * because the tuple is present in the index page but not visible. The * reading of the TID by this scan (using a shared lock on the index * buffer) is serialized with the insert of the TID into the index * (using an exclusive lock on the index buffer). Because the VM bit * is cleared before updating the index, and locking/unlocking of the * index page acts as a full memory barrier, we are sure to see the * cleared bit if we see a recently-inserted TID. * * Deletes do not update the index page (only VACUUM will clear out * the TID), so the clearing of the VM bit by a delete is not * serialized with this test below, and we may see a value that is * significantly stale. However, we don't care about the delete right * away, because the tuple is still visible until the deleting * transaction commits or the statement ends (if it's our * transaction). In either case, the lock on the VM buffer will have * been released (acting as a write barrier) after clearing the * bit. And for us to have a snapshot that includes the deleting * transaction (making the tuple invisible), we must have acquired * ProcArrayLock after that time, acting as a read barrier. * * It's worth going through this complexity to avoid needing to lock * the VM buffer, which could cause significant contention. */ if (!visibilitymap_test(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), &node->ioss_VMBuffer)) { /* * Rats, we have to visit the heap to check visibility. */ node->ioss_HeapFetches++; tuple = index_fetch_heap(scandesc); if (tuple == NULL) continue; /* no visible tuple, try next index entry */ /* * Only MVCC snapshots are supported here, so there should be no * need to keep following the HOT chain once a visible entry has * been found. If we did want to allow that, we'd need to keep * more state to remember not to call index_getnext_tid next time. */ if (scandesc->xs_continue_hot) elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); /* * Note: at this point we are holding a pin on the heap page, as * recorded in scandesc->xs_cbuf. We could release that pin now, * but it's not clear whether it's a win to do so. The next index * entry might require a visit to the same heap page. */ } /* * Fill the scan tuple slot with data from the index. */ StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); /* * If the index was lossy, we have to recheck the index quals. * (Currently, this can never happen, but we should support the case * for possible future use, eg with GiST indexes.) */ if (scandesc->xs_recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->indexqual, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); continue; } } /* * We don't currently support rechecking ORDER BY distances. (In * principle, if the index can support retrieval of the originally * indexed value, it should be able to produce an exact distance * calculation too. So it's not clear that adding code here for * recheck/re-sort would be worth the trouble. But we should at least * throw an error if someone tries it.) */ if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("lossy distance functions are not supported in index-only scans"))); /* * Predicate locks for index-only scans must be acquired at the page * level when the heap is not accessed, since tuple-level predicate * locks need the tuple's xmin value. If we had to visit the tuple * anyway, then we already have the tuple-level lock and can skip the * page lock. */ if (tuple == NULL) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); return slot; } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * IndexNext * * Retrieve a tuple from the IndexScan node's currentRelation * using the index specified in the IndexScanState information. * ---------------------------------------------------------------- */ TupleTableSlot * IndexNext(IndexScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; Index scanrelid; HeapTuple tuple; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; initScanDesc(node); /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->iss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) { if (!node->ss.ps.delayEagerFree) { ExecEagerFreeIndexScan(node); } return ExecClearTuple(slot); } ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false); /* Does the tuple meet the indexqual condition? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->indexqualorig, econtext, false)) { if (!node->ss.ps.delayEagerFree) { ExecEagerFreeIndexScan(node); } ExecClearTuple(slot); /* would not be returned by scan */ } /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node)); CheckSendPlanStateGpmonPkt(&node->ss.ps); return slot; } /* * ok, now that we have what we need, fetch the next tuple. */ if ((tuple = index_getnext(scandesc, direction)) != NULL) { /* * Store the scanned tuple in the scan tuple slot of the scan state. * Note: we pass 'false' because tuples returned by amgetnext are * pointers onto disk pages and must not be pfree()'d. */ ExecStoreHeapTuple(tuple, /* tuple to store */ slot, /* slot to store in */ scandesc->xs_cbuf, /* buffer containing tuple */ false); /* don't pfree */ Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node)); CheckSendPlanStateGpmonPkt(&node->ss.ps); return slot; } if (!node->ss.ps.delayEagerFree) { ExecEagerFreeIndexScan(node); } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* * ExecGroup - * * Return one tuple for each group of matching input tuples. */ TupleTableSlot * ExecGroup(GroupState *node) { ExprContext *econtext; int numCols; AttrNumber *grpColIdx; TupleTableSlot *firsttupleslot; TupleTableSlot *outerslot; /* * get state info from node */ if (node->grp_done) return NULL; econtext = node->ss.ps.ps_ExprContext; numCols = ((Group *) node->ss.ps.plan)->numCols; grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; /* * Check to see if we're still projecting out tuples from a previous group * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ss.ps.ps_TupFromTlist) { TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); if (isDone == ExprMultipleResult) return result; /* Done with that source tuple... */ node->ss.ps.ps_TupFromTlist = false; } /* * The ScanTupleSlot holds the (copied) first tuple of each group. */ firsttupleslot = node->ss.ss_ScanTupleSlot; /* * We need not call ResetExprContext here because execTuplesMatch will * reset the per-tuple memory context once per input tuple. */ /* * If first time through, acquire first input tuple and determine whether * to return it or not. */ if (TupIsNull(firsttupleslot)) { outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { /* empty input, so return nothing */ node->grp_done = TRUE; return NULL; } /* Copy tuple into firsttupleslot */ ExecCopySlot(firsttupleslot, outerslot); /* * Set it up as input for qual test and projection. The expressions * will access the input tuple as varno OUTER. */ econtext->ecxt_outertuple = firsttupleslot; /* * Check the qual (HAVING clause); if the group does not match, ignore * it and fall into scan loop. */ if (ExecQual(node->ss.ps.qual, econtext, false)) { /* * Form and return a projection tuple using the first input tuple. */ TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->ss.ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return result; } } else InstrCountFiltered1(node, 1); } /* * This loop iterates once per input tuple group. At the head of the * loop, we have finished processing the first tuple of the group and now * need to scan over all the other group members. */ for (;;) { /* * Scan over all remaining tuples that belong to this group */ for (;;) { outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { /* no more groups, so we're done */ node->grp_done = TRUE; return NULL; } /* * Compare with first tuple and see if this tuple is of the same * group. If so, ignore it and keep scanning. */ if (!execTuplesMatch(firsttupleslot, outerslot, numCols, grpColIdx, node->eqfunctions, econtext->ecxt_per_tuple_memory)) break; } /* * We have the first tuple of the next input group. See if we want to * return it. */ /* Copy tuple, set up as input for qual test and projection */ ExecCopySlot(firsttupleslot, outerslot); econtext->ecxt_outertuple = firsttupleslot; /* * Check the qual (HAVING clause); if the group does not match, ignore * it and loop back to scan the rest of the group. */ if (ExecQual(node->ss.ps.qual, econtext, false)) { /* * Form and return a projection tuple using the first input tuple. */ TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->ss.ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return result; } } else InstrCountFiltered1(node, 1); } }
/* ---------------------------------------------------------------- * ExecNestLoop(node) * * old comments * Returns the tuple joined from inner and outer tuples which * satisfies the qualification clause. * * It scans the inner relation to join with current outer tuple. * * If none is found, next tuple from the outer relation is retrieved * and the inner relation is scanned from the beginning again to join * with the outer tuple. * * NULL is returned if all the remaining outer tuples are tried and * all fail to join with the inner tuples. * * NULL is also returned if there is no tuple from inner relation. * * Conditions: * -- outerTuple contains current tuple from outer relation and * the right son(inner relation) maintains "cursor" at the tuple * returned previously. * This is achieved by maintaining a scan position on the outer * relation. * * Initial States: * -- the outer child and the inner child * are prepared to return the first tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecNestLoop(NestLoopState *node) { PlanState *innerPlan; PlanState *outerPlan; TupleTableSlot *outerTupleSlot; TupleTableSlot *innerTupleSlot; List *joinqual; List *otherqual; ExprContext *econtext; /* * get information from the node */ ENL1_printf("getting info from node"); joinqual = node->js.joinqual; otherqual = node->js.ps.qual; outerPlan = outerPlanState(node); innerPlan = innerPlanState(node); econtext = node->js.ps.ps_ExprContext; /* * get the current outer tuple */ outerTupleSlot = node->js.ps.ps_OuterTupleSlot; econtext->ecxt_outertuple = outerTupleSlot; /* * Check to see if we're still projecting out tuples from a previous join * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->js.ps.ps_TupFromTlist) { TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); if (isDone == ExprMultipleResult) return result; /* Done with that source tuple... */ node->js.ps.ps_TupFromTlist = false; } /* * If we're doing an IN join, we want to return at most one row per outer * tuple; so we can stop scanning the inner scan if we matched on the * previous try. */ if (node->js.jointype == JOIN_IN && node->nl_MatchedOuter) node->nl_NeedNewOuter = true; /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a join tuple. */ ResetExprContext(econtext); /* * Ok, everything is setup for the join so now loop until we return a * qualifying join tuple. */ ENL1_printf("entering main loop"); for (;;) { /* * If we don't have an outer tuple, get the next one and reset the * inner scan. */ if (node->nl_NeedNewOuter) { ENL1_printf("getting new outer tuple"); outerTupleSlot = ExecProcNode(outerPlan); /* * if there are no more outer tuples, then the join is complete.. */ if (TupIsNull(outerTupleSlot)) { ENL1_printf("no outer tuple, ending join"); return NULL; } ENL1_printf("saving new outer tuple information"); node->js.ps.ps_OuterTupleSlot = outerTupleSlot; econtext->ecxt_outertuple = outerTupleSlot; node->nl_NeedNewOuter = false; node->nl_MatchedOuter = false; /* * now rescan the inner plan */ ENL1_printf("rescanning inner plan"); /* * The scan key of the inner plan might depend on the current * outer tuple (e.g. in index scans), that's why we pass our expr * context. */ ExecReScan(innerPlan, econtext); } /* * we have an outerTuple, try to get the next inner tuple. */ ENL1_printf("getting new inner tuple"); innerTupleSlot = ExecProcNode(innerPlan); econtext->ecxt_innertuple = innerTupleSlot; if (TupIsNull(innerTupleSlot)) { ENL1_printf("no inner tuple, need new outer tuple"); node->nl_NeedNewOuter = true; if (!node->nl_MatchedOuter && node->js.jointype == JOIN_LEFT) { /* * We are doing an outer join and there were no join matches * for this outer tuple. Generate a fake join tuple with * nulls for the inner tuple, and return it if it passes the * non-join quals. */ econtext->ecxt_innertuple = node->nl_NullInnerTupleSlot; ENL1_printf("testing qualification for outer-join tuple"); if (ExecQual(otherqual, econtext, false)) { /* * qualification was satisfied so we project and return * the slot containing the result tuple using * ExecProject(). */ TupleTableSlot *result; ExprDoneCond isDone; ENL1_printf("qualification succeeded, projecting tuple"); result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->js.ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return result; } } } /* * Otherwise just return to top of loop for a new outer tuple. */ continue; } /* * at this point we have a new pair of inner and outer tuples so we * test the inner and outer tuples to see if they satisfy the node's * qualification. * * Only the joinquals determine MatchedOuter status, but all quals * must pass to actually return the tuple. */ ENL1_printf("testing qualification"); if (ExecQual(joinqual, econtext, false)) { node->nl_MatchedOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) { /* * qualification was satisfied so we project and return the * slot containing the result tuple using ExecProject(). */ TupleTableSlot *result; ExprDoneCond isDone; ENL1_printf("qualification succeeded, projecting tuple"); result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->js.ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return result; } } /* If we didn't return a tuple, may need to set NeedNewOuter */ if (node->js.jointype == JOIN_IN) node->nl_NeedNewOuter = true; } /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); ENL1_printf("qualification failed, looping"); } }
/* ---------------------------------------------------------------- * ExecScanHashBucket * * scan a hash bucket of matches * ---------------------------------------------------------------- */ HeapTuple ExecScanHashBucket(HashJoinState *hjstate, HashBucket bucket, HeapTuple curtuple, List *hjclauses, ExprContext *econtext) { HeapTuple heapTuple; bool qualResult; OverflowTuple otuple = NULL; OverflowTuple curotuple; TupleTableSlot *inntuple; OverflowTuple firstotuple; OverflowTuple lastotuple; HashJoinTable hashtable; hashtable = hjstate->hj_HashTable; firstotuple = (OverflowTuple)ABSADDR(bucket->firstotuple); lastotuple = (OverflowTuple)ABSADDR(bucket->lastotuple); /* ---------------- * search the hash bucket * ---------------- */ if (curtuple == NULL || curtuple < (HeapTuple)ABSADDR(bucket->bottom)) { if (curtuple == NULL) heapTuple = (HeapTuple) LONGALIGN(ABSADDR(bucket->top)); else heapTuple = (HeapTuple) LONGALIGN(((char*)curtuple+curtuple->t_len)); while (heapTuple < (HeapTuple)ABSADDR(bucket->bottom)) { inntuple = ExecStoreTuple(heapTuple, /* tuple to store */ hjstate->hj_HashTupleSlot, /* slot */ InvalidBuffer,/* tuple has no buffer */ false); /* do not pfree this tuple */ econtext->ecxt_innertuple = inntuple; qualResult = ExecQual((List*)hjclauses, econtext); if (qualResult) return heapTuple; heapTuple = (HeapTuple) LONGALIGN(((char*)heapTuple+heapTuple->t_len)); } if (firstotuple == NULL) return NULL; otuple = firstotuple; } /* ---------------- * search the overflow area of the hash bucket * ---------------- */ if (otuple == NULL) { curotuple = hjstate->hj_CurOTuple; otuple = (OverflowTuple)ABSADDR(curotuple->next); } while (otuple != NULL) { heapTuple = (HeapTuple)ABSADDR(otuple->tuple); inntuple = ExecStoreTuple(heapTuple, /* tuple to store */ hjstate->hj_HashTupleSlot, /* slot */ InvalidBuffer, /* SP?? this tuple has no buffer */ false); /* do not pfree this tuple */ econtext->ecxt_innertuple = inntuple; qualResult = ExecQual((List*)hjclauses, econtext); if (qualResult) { hjstate->hj_CurOTuple = otuple; return heapTuple; } otuple = (OverflowTuple)ABSADDR(otuple->next); } /* ---------------- * no match * ---------------- */ return NULL; }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; HeapScanDesc scan; TIDBitmap *tbm; TBMIterator *tbmiterator = NULL; TBMSharedIterator *shared_tbmiterator = NULL; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; ParallelBitmapHeapState *pstate = node->pstate; dsa_area *dsa = node->ss.ps.state->es_query_dsa; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; if (pstate == NULL) tbmiterator = node->tbmiterator; else shared_tbmiterator = node->shared_tbmiterator; tbmres = node->tbmres; /* * If we haven't yet performed the underlying index scan, do it, and begin * the iteration over the bitmap. * * For prefetching, we use *two* iterators, one for the pages we are * actually scanning and another that runs ahead of the first for * prefetching. node->prefetch_pages tracks exactly how many pages ahead * the prefetch iterator is. Also, node->prefetch_target tracks the * desired prefetch distance, which starts small and increases up to the * node->prefetch_maximum. This is to avoid doing a lot of prefetching in * a scan that stops after a few tuples because of a LIMIT. */ if (!node->initialized) { if (!pstate) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { node->prefetch_iterator = tbm_begin_iterate(tbm); node->prefetch_pages = 0; node->prefetch_target = -1; } #endif /* USE_PREFETCH */ } else { /* * The leader will immediately come out of the function, but * others will be blocked until leader populates the TBM and wakes * them up. */ if (BitmapShouldInitializeSharedState(pstate)) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; /* * Prepare to iterate over the TBM. This will return the * dsa_pointer of the iterator state which will be used by * multiple processes to iterate jointly. */ pstate->tbmiterator = tbm_prepare_shared_iterate(tbm); #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { pstate->prefetch_iterator = tbm_prepare_shared_iterate(tbm); /* * We don't need the mutex here as we haven't yet woke up * others. */ pstate->prefetch_pages = 0; pstate->prefetch_target = -1; } #endif /* We have initialized the shared state so wake up others. */ BitmapDoneInitializingSharedState(pstate); } /* Allocate a private iterator and attach the shared state to it */ node->shared_tbmiterator = shared_tbmiterator = tbm_attach_shared_iterate(dsa, pstate->tbmiterator); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { node->shared_prefetch_iterator = tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator); } #endif /* USE_PREFETCH */ } node->initialized = true; } for (;;) { Page dp; ItemId lp; CHECK_FOR_INTERRUPTS(); /* * Get next page of results if needed */ if (tbmres == NULL) { if (!pstate) node->tbmres = tbmres = tbm_iterate(tbmiterator); else node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } BitmapAdjustPrefetchIterator(node, tbmres); /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); if (tbmres->ntuples >= 0) node->exact_pages++; else node->lossy_pages++; /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; #ifdef USE_PREFETCH /* * Try to prefetch at least a few pages even before we get to the * second page if we don't stop reading after the first tuple. */ if (!pstate) { if (node->prefetch_target < node->prefetch_maximum) node->prefetch_target++; } else if (pstate->prefetch_target < node->prefetch_maximum) { /* take spinlock while updating shared state */ SpinLockAcquire(&pstate->mutex); if (pstate->prefetch_target < node->prefetch_maximum) pstate->prefetch_target++; SpinLockRelease(&pstate->mutex); } #endif /* USE_PREFETCH */ } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } /* * We issue prefetch requests *after* fetching the current page to try * to avoid having prefetching interfere with the main I/O. Also, this * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. */ BitmapPrefetch(node, scan); /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * ExecGroup - * * Return one tuple for each group of matching input tuples. */ TupleTableSlot * ExecGroup(GroupState *node) { ExprContext *econtext; int numCols; AttrNumber *grpColIdx; TupleTableSlot *firsttupleslot; TupleTableSlot *outerslot; /* * get state info from node */ if (node->grp_done) return NULL; econtext = node->ss.ps.ps_ExprContext; numCols = ((Group *) node->ss.ps.plan)->numCols; grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; /* * The ScanTupleSlot holds the (copied) first tuple of each group. */ firsttupleslot = node->ss.ss_ScanTupleSlot; /* * We need not call ResetExprContext here because execTuplesMatch will * reset the per-tuple memory context once per input tuple. */ /* * If first time through, acquire first input tuple and determine whether * to return it or not. */ if (TupIsNull(firsttupleslot)) { outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { /* empty input, so return nothing */ node->grp_done = TRUE; return NULL; } /* Copy tuple, set up as input for qual test and projection */ ExecCopySlot(firsttupleslot, outerslot); econtext->ecxt_scantuple = firsttupleslot; /* * Check the qual (HAVING clause); if the group does not match, ignore * it and fall into scan loop. */ if (ExecQual(node->ss.ps.qual, econtext, false)) { /* * Form and return a projection tuple using the first input tuple. */ return ExecProject(node->ss.ps.ps_ProjInfo, NULL); } } /* * This loop iterates once per input tuple group. At the head of the * loop, we have finished processing the first tuple of the group and now * need to scan over all the other group members. */ for (;;) { /* * Scan over all remaining tuples that belong to this group */ for (;;) { outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { /* no more groups, so we're done */ node->grp_done = TRUE; return NULL; } /* * Compare with first tuple and see if this tuple is of the same * group. If so, ignore it and keep scanning. */ if (!execTuplesMatch(firsttupleslot, outerslot, numCols, grpColIdx, node->eqfunctions, econtext->ecxt_per_tuple_memory)) break; } /* * We have the first tuple of the next input group. See if we want to * return it. */ /* Copy tuple, set up as input for qual test and projection */ ExecCopySlot(firsttupleslot, outerslot); econtext->ecxt_scantuple = firsttupleslot; /* * Check the qual (HAVING clause); if the group does not match, ignore * it and loop back to scan the rest of the group. */ if (ExecQual(node->ss.ps.qual, econtext, false)) { /* * Form and return a projection tuple using the first input tuple. */ return ExecProject(node->ss.ps.ps_ProjInfo, NULL); } } /* NOTREACHED */ return NULL; }
/* ---------------------------------------------------------------- * IndexOnlyNext * * Retrieve a tuple from the IndexOnlyScan node's index. * ---------------------------------------------------------------- */ static TupleTableSlot * IndexOnlyNext(IndexOnlyScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; TupleTableSlot *slot; ItemPointer tid; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->ioss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; /* * OK, now that we have what we need, fetch the next tuple. */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { HeapTuple tuple = NULL; /* * We can skip the heap fetch if the TID references a heap page on * which all tuples are known visible to everybody. In any case, * we'll use the index tuple not the heap tuple as the data source. * * Note on Memory Ordering Effects: visibilitymap_test does not lock * the visibility map buffer, and therefore the result we read here * could be slightly stale. However, it can't be stale enough to * matter. It suffices to show that (1) there is a read barrier * between the time we read the index TID and the time we test the * visibility map; and (2) there is a write barrier between the time * some other concurrent process clears the visibility map bit and the * time it inserts the index TID. Since acquiring or releasing a * LWLock interposes a full barrier, this is easy to show: (1) is * satisfied by the release of the index buffer content lock after * reading the TID; and (2) is satisfied by the acquisition of the * buffer content lock in order to insert the TID. */ if (!visibilitymap_test(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), &node->ioss_VMBuffer)) { /* * Rats, we have to visit the heap to check visibility. */ node->ioss_HeapFetches++; tuple = index_fetch_heap(scandesc); if (tuple == NULL) continue; /* no visible tuple, try next index entry */ /* * Only MVCC snapshots are supported here, so there should be no * need to keep following the HOT chain once a visible entry has * been found. If we did want to allow that, we'd need to keep * more state to remember not to call index_getnext_tid next time. */ if (scandesc->xs_continue_hot) elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); /* * Note: at this point we are holding a pin on the heap page, as * recorded in scandesc->xs_cbuf. We could release that pin now, * but it's not clear whether it's a win to do so. The next index * entry might require a visit to the same heap page. */ } /* * Fill the scan tuple slot with data from the index. */ StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); /* * If the index was lossy, we have to recheck the index quals. * (Currently, this can never happen, but we should support the case * for possible future use, eg with GiST indexes.) */ if (scandesc->xs_recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->indexqual, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); continue; } } /* * Predicate locks for index-only scans must be acquired at the page * level when the heap is not accessed, since tuple-level predicate * locks need the tuple's xmin value. If we had to visit the tuple * anyway, then we already have the tuple-level lock and can skip the * page lock. */ if (tuple == NULL) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); return slot; } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* * ExecIndexRecommend * * This function obtains data directly from the RecView, which we * assume is populated with predictions for this user. */ static TupleTableSlot* ExecIndexRecommend(RecScanState *recnode, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd) { ExprContext *econtext; List *qual; ProjectionInfo *projInfo; ExprDoneCond isDone; TupleTableSlot *resultSlot; ScanState *node; AttributeInfo *attributes; node = recnode->subscan; attributes = (AttributeInfo*) recnode->attributes; /* * Fetch data from node */ qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; econtext = node->ps.ps_ExprContext; /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { Assert(projInfo); /* can't get here if not projecting */ resultSlot = ExecProject(projInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a scan tuple. */ ResetExprContext(econtext); /* * get a tuple from the access method. Loop until we obtain a tuple that * passes the qualification. */ for (;;) { TupleTableSlot *slot; int userID; bool recQual = true; CHECK_FOR_INTERRUPTS(); slot = recnode->ss.ps.ps_ResultTupleSlot; /* If we're using the RecView, we're going to fetch a new * tuple every time. */ slot = ExecRecFetch(node, accessMtd, recheckMtd); /* If the slot is null now, then we've run out of tuples * to return, so we're done. */ if (TupIsNull(slot)) { if (projInfo) return ExecClearTuple(projInfo->pi_slot); else return slot; } /* * Before we check the qualifications, we're going to manually check * to see that the tuple matches the provided user ID, because this * is always necessary and it's easier than messing with the target * list. */ /* First, we'll make sure we're dealing with the right user. */ userID = getTupleInt(slot,attributes->userkey); /* How we could fail to find the user ID, I don't know. */ if (userID < 0) elog(ERROR, "user ID column not found"); /* If this tuple doesn't match the user ID, just skip it * and move on. */ if (userID != attributes->userID) recQual = false; /* * place the current tuple into the expr context */ econtext->ecxt_scantuple = slot; /* * check that the current tuple satisfies the qual-clause * * check for non-nil qual here to avoid a function call to ExecQual() * when the qual is nil ... saves only a few cycles, but they add up * ... * * we also make sure that the tuple passes our recommender qual */ if (recQual && (!qual || ExecQual(qual, econtext, false))) { /* * Found a satisfactory scan tuple. */ if (projInfo) { /* * Form a projection tuple, store it in the result tuple slot * and return it --- unless we find we can project no tuples * from this scan tuple, in which case continue scan. */ resultSlot = ExecProject(projInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } else { /* * Here, we aren't projecting, so just return scan tuple. */ return slot; } } else InstrCountFiltered1(node, 1); /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); } }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; HeapScanDesc scan; TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; TBMIterator *prefetch_iterator; OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; tbmiterator = node->tbmiterator; tbmres = node->tbmres; prefetch_iterator = node->prefetch_iterator; /* * If we haven't yet performed the underlying index scan, do it, and begin * the iteration over the bitmap. * * For prefetching, we use *two* iterators, one for the pages we are * actually scanning and another that runs ahead of the first for * prefetching. node->prefetch_pages tracks exactly how many pages ahead * the prefetch iterator is. Also, node->prefetch_target tracks the * desired prefetch distance, which starts small and increases up to the * GUC-controlled maximum, target_prefetch_pages. This is to avoid doing * a lot of prefetching in a scan that stops after a few tuples because of * a LIMIT. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (target_prefetch_pages > 0) { node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm); node->prefetch_pages = 0; node->prefetch_target = -1; } #endif /* USE_PREFETCH */ } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } #ifdef USE_PREFETCH if (node->prefetch_pages > 0) { /* The main iterator has closed the distance by one page */ node->prefetch_pages--; } else if (prefetch_iterator) { /* Do not let the prefetch iterator get behind the main one */ TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) elog(ERROR, "prefetch and main iterators are out of sync"); } #endif /* USE_PREFETCH */ /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; #ifdef USE_PREFETCH /* * Increase prefetch target if it's not yet at the max. Note that * we will increase it to zero after fetching the very first * page/tuple, then to one after the second tuple is fetched, then * it doubles as later pages are fetched. */ if (node->prefetch_target >= target_prefetch_pages) /* don't increase any further */ ; else if (node->prefetch_target >= target_prefetch_pages / 2) node->prefetch_target = target_prefetch_pages; else if (node->prefetch_target > 0) node->prefetch_target *= 2; else node->prefetch_target++; #endif /* USE_PREFETCH */ } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; #ifdef USE_PREFETCH /* * Try to prefetch at least a few pages even before we get to the * second page if we don't stop reading after the first tuple. */ if (node->prefetch_target < target_prefetch_pages) node->prefetch_target++; #endif /* USE_PREFETCH */ } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } #ifdef USE_PREFETCH /* * We issue prefetch requests *after* fetching the current page to try * to avoid having prefetching interfere with the main I/O. Also, this * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. */ if (prefetch_iterator) { while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); node->prefetch_iterator = prefetch_iterator = NULL; break; } node->prefetch_pages++; PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } #endif /* USE_PREFETCH */ /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * ExecFilterRecommend * * This function just borrows a tuple descriptor from the RecView, * but we create the data ourselves through various means. */ static TupleTableSlot* ExecFilterRecommend(RecScanState *recnode, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd) { ExprContext *econtext; List *qual; ProjectionInfo *projInfo; ExprDoneCond isDone; TupleTableSlot *resultSlot; ScanState *node; AttributeInfo *attributes; node = recnode->subscan; attributes = (AttributeInfo*) recnode->attributes; /* * Fetch data from node */ qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; econtext = node->ps.ps_ExprContext; /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { Assert(projInfo); /* can't get here if not projecting */ resultSlot = ExecProject(projInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a scan tuple. */ ResetExprContext(econtext); /* * get a tuple from the access method. Loop until we obtain a tuple that * passes the qualification. */ for (;;) { TupleTableSlot *slot; int natts, i, userID, userindex, itemID, itemindex; CHECK_FOR_INTERRUPTS(); slot = recnode->ss.ps.ps_ResultTupleSlot; /* The first thing we need to do is initialize our recommender * model and other things, if we haven't done so already. */ if (!recnode->initialized) InitializeRecommender(recnode); /* * If we've exhausted our item list, then we're totally * finished. We set a flag for this. It's possible that * we'll be in the inner loop of a join, through poor * planning, so we'll reset the appropriate data in case * we have to do this again, though our JoinRecommend * should assure this doesn't happen. */ if (recnode->finished) { recnode->finished = false; recnode->userNum = 0; recnode->itemNum = 0; return NULL; } /* We're only going to fetch one tuple and store its tuple * descriptor. We can use this tuple descriptor to make as * many new tuples as we want. */ if (recnode->base_slot == NULL) { slot = ExecRecFetch(node, accessMtd, recheckMtd); recnode->base_slot = CreateTupleDescCopy(slot->tts_tupleDescriptor); } /* Create a new slot to operate on. */ slot = MakeSingleTupleTableSlot(recnode->base_slot); slot->tts_isempty = false; /* * place the current tuple into the expr context */ econtext->ecxt_scantuple = slot; /* Mark all slots as usable. */ natts = slot->tts_tupleDescriptor->natts; for (i = 0; i < natts; i++) { /* Mark slot. */ slot->tts_values[i] = Int32GetDatum(0); slot->tts_isnull[i] = false; slot->tts_nvalid++; } /* While we're here, record what tuple attributes * correspond to our key columns. This will save * us unnecessary strcmp functions. */ if (recnode->useratt < 0) { for (i = 0; i < natts; i++) { char* col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; //printf("%s\n",col_name); if (strcmp(col_name,attributes->userkey) == 0) recnode->useratt = i; else if (strcmp(col_name,attributes->itemkey) == 0) recnode->itematt = i; else if (strcmp(col_name,attributes->eventval) == 0) recnode->eventatt = i; } } /* * We now have a problem: we need to create prediction structures * for a user before we do filtering, so that we can have a proper * item list. But we also need to filter before creating those * structures, so we don't end up taking forever with it. The * solution is to filter twice. */ userID = -1; itemID = -1; /* First, replace the user ID. */ userindex = recnode->userNum; userID = recnode->userList[userindex]; /* * We now have a blank tuple slot that we need to fill with data. * We have a working user ID, but not a valid item list. We'd like to * use the filter to determine if this is a good user, but we can't * do that without an item, in many cases. The solution is to add in * dummy items, then compare it against the filter. If a given user ID * doesn't make it past the filter with any item ID, then that user is * being filtered out, and we'll move on to the next. */ if (recnode->newUser) { recnode->fullItemNum = 0; itemindex = recnode->fullItemNum; itemID = recnode->fullItemList[itemindex]; slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); slot->tts_values[recnode->eventatt] = Int32GetDatum(-1); /* We have a preliminary slot - let's test it. */ while (qual && !ExecQual(qual, econtext, false)) { /* We failed the test. Try the next item. */ recnode->fullItemNum++; if (recnode->fullItemNum >= recnode->fullTotalItems) { /* If we've reached the last item, move onto the next user. * If we've reached the last user, we're done. */ InstrCountFiltered1(node, recnode->fullTotalItems); recnode->userNum++; recnode->newUser = true; recnode->fullItemNum = 0; if (recnode->userNum >= recnode->totalUsers) { recnode->userNum = 0; recnode->itemNum = 0; return NULL; } userindex = recnode->userNum; userID = recnode->userList[userindex]; } itemindex = recnode->fullItemNum; itemID = recnode->fullItemList[itemindex]; slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); } /* If we get here, then we found a user who will be actually * returned in the results. One quick reset here. */ recnode->fullItemNum = 0; } /* Mark the user ID and index. */ attributes->userID = userID; recnode->userindex = userindex; /* With the user ID determined, we need to investigate and see * if this is a new user. If so, attempt to create prediction * data structures, or report that this user is invalid. We have * to do this here, so we can establish the item list. */ if (recnode->newUser) { recnode->validUser = prepUserForRating(recnode,userID); recnode->newUser = false; } /* Now replace the item ID, if the user is valid. Otherwise, * leave the item ID as is, as it doesn't matter what it is. */ if (recnode->validUser) itemID = recnode->itemList[recnode->itemNum]; while (recnode->fullItemList[recnode->fullItemNum] < itemID) recnode->fullItemNum++; itemindex = recnode->fullItemNum; if (recnode->fullItemList[itemindex] > itemID) elog(ERROR, "critical item mismatch in ExecRecommend"); /* Plug in the data, marking those columns full. We also need to * mark the rating column with something temporary. */ slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); slot->tts_values[recnode->eventatt] = Int32GetDatum(-1); /* It's possible our filter criteria involves the RecScore somehow. * If that's the case, we need to calculate it before we do the * qual filtering. Also, if we're doing a JoinRecommend, we should * not calculate the RecScore in this node. In the current version * of RecDB, an OP_NOFILTER shouldn't be allowed. */ if (attributes->opType == OP_NOFILTER) applyRecScore(recnode, slot, itemID, itemindex); /* Move onto the next item, for next time. If we're doing a RecJoin, * though, we'll move onto the next user instead. */ recnode->itemNum++; if (recnode->itemNum >= recnode->totalItems || attributes->opType == OP_JOIN || attributes->opType == OP_GENERATEJOIN) { /* If we've reached the last item, move onto the next user. * If we've reached the last user, we're done. */ recnode->userNum++; recnode->newUser = true; recnode->itemNum = 0; recnode->fullItemNum = 0; if (recnode->userNum >= recnode->totalUsers) recnode->finished = true; } /* * check that the current tuple satisfies the qual-clause * * check for non-nil qual here to avoid a function call to ExecQual() * when the qual is nil ... saves only a few cycles, but they add up * ... */ if (!qual || ExecQual(qual, econtext, false)) { /* * If this is an invalid user, then we'll skip this tuple, * adding one to the filter count. */ if (!recnode->validUser) { InstrCountFiltered1(node, 1); ResetExprContext(econtext); ExecDropSingleTupleTableSlot(slot); continue; } /* * Found a satisfactory scan tuple. This is usually when * we will calculate and apply the RecScore. */ if (attributes->opType == OP_FILTER || attributes->opType == OP_GENERATE) applyRecScore(recnode, slot, itemID, itemindex); if (projInfo) { /* * Form a projection tuple, store it in the result tuple slot * and return it --- unless we find we can project no tuples * from this scan tuple, in which case continue scan. */ resultSlot = ExecProject(projInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } else { /* * Here, we aren't projecting, so just return scan tuple. */ return slot; } } else InstrCountFiltered1(node, 1); /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); ExecDropSingleTupleTableSlot(slot); } }
/* ---------------------------------------------------------------- * ExecResult(node) * * returns the tuples from the outer plan which satisfy the * qualification clause. Since result nodes with right * subtrees are never planned, we ignore the right subtree * entirely (for now).. -cim 10/7/89 * * The qualification containing only constant clauses are * checked first before any processing is done. It always returns * 'nil' if the constant qualification is not satisfied. * ---------------------------------------------------------------- */ TupleTableSlot * ExecResult(ResultState *node) { TupleTableSlot *outerTupleSlot; TupleTableSlot *resultSlot; PlanState *outerPlan; ExprContext *econtext; ExprDoneCond isDone; econtext = node->ps.ps_ExprContext; /* * check constant qualifications like (2 > 1), if not already done */ if (node->rs_checkqual) { bool qualResult = ExecQual((List *) node->resconstantqual, econtext, false); node->rs_checkqual = false; if (!qualResult) { node->rs_done = true; return NULL; } } /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a scan tuple. */ ResetExprContext(econtext); /* * if rs_done is true then it means that we were asked to return a * constant tuple and we already did the last time ExecResult() was * called, OR that we failed the constant qual check. Either way, now we * are through. */ while (!node->rs_done) { outerPlan = outerPlanState(node); if (outerPlan != NULL) { /* * retrieve tuples from the outer plan until there are no more. */ outerTupleSlot = ExecProcNode(outerPlan); if (TupIsNull(outerTupleSlot)) return NULL; /* * prepare to compute projection expressions, which will expect to * access the input tuples as varno OUTER. */ econtext->ecxt_outertuple = outerTupleSlot; } else { /* * if we don't have an outer plan, then we are just generating the * results from a constant target list. Do it only once. */ node->rs_done = true; } /* * form the result tuple using ExecProject(), and return it --- unless * the projection produces an empty set, in which case we must loop * back to see if there are more outerPlan tuples. */ resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } return NULL; }
/* ---------------------------------------------------------------- * ExecResult(node) * * returns the tuples from the outer plan which satisfy the * qualification clause. Since result nodes with right * subtrees are never planned, we ignore the right subtree * entirely (for now).. -cim 10/7/89 * * The qualification containing only constant clauses are * checked first before any processing is done. It always returns * 'nil' if the constant qualification is not satisfied. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecResult(PlanState *pstate) { ResultState *node = castNode(ResultState, pstate); TupleTableSlot *outerTupleSlot; PlanState *outerPlan; ExprContext *econtext; CHECK_FOR_INTERRUPTS(); econtext = node->ps.ps_ExprContext; /* * check constant qualifications like (2 > 1), if not already done */ if (node->rs_checkqual) { bool qualResult = ExecQual(node->resconstantqual, econtext); node->rs_checkqual = false; if (!qualResult) { node->rs_done = true; return NULL; } } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. */ ResetExprContext(econtext); /* * if rs_done is true then it means that we were asked to return a * constant tuple and we already did the last time ExecResult() was * called, OR that we failed the constant qual check. Either way, now we * are through. */ while (!node->rs_done) { outerPlan = outerPlanState(node); if (outerPlan != NULL) { /* * retrieve tuples from the outer plan until there are no more. */ outerTupleSlot = ExecProcNode(outerPlan); if (TupIsNull(outerTupleSlot)) return NULL; /* * prepare to compute projection expressions, which will expect to * access the input tuples as varno OUTER. */ econtext->ecxt_outertuple = outerTupleSlot; } else { /* * if we don't have an outer plan, then we are just generating the * results from a constant target list. Do it only once. */ node->rs_done = true; } /* form the result tuple using ExecProject(), and return it */ return ExecProject(node->ps.ps_ProjInfo); } return NULL; }
/* * Repeatly output each tuple received from the outer plan with some * defined number of times. The number of times to output a tuple is * determined by the value of a given column in the received tuple. * * Note that the Repeat node also have the functionality to evaluate * the GroupingFunc. */ TupleTableSlot * ExecRepeat(RepeatState *repeatstate) { TupleTableSlot *outerslot; ExprContext *econtext = repeatstate->ps.ps_ExprContext; Repeat *node = (Repeat *)repeatstate->ps.plan; if (repeatstate->repeat_done) return NULL; /* * If the previous tuple still needs to be outputted, * output it here. */ if (repeatstate->slot != NULL) { if (repeatstate->repeat_count > 0) { /* Output the previous tuple */ econtext->ecxt_outertuple = repeatstate->slot; econtext->ecxt_scantuple = repeatstate->slot; do { econtext->group_id = repeatstate->repeat_count - 1; econtext->grouping = node->grouping; repeatstate->repeat_count--; /* Check the qual until we find one output tuple. */ if (ExecQual(repeatstate->ps.qual, econtext, false)) { Gpmon_M_Incr_Rows_Out(GpmonPktFromRepeatState(repeatstate)); CheckSendPlanStateGpmonPkt(&repeatstate->ps); return ExecProject(repeatstate->ps.ps_ProjInfo, NULL); } } while (repeatstate->repeat_count > 0); } else repeatstate->slot = NULL; } ResetExprContext(econtext); while (!repeatstate->repeat_done) { MemoryContext oldcxt; bool isNull = false; outerslot = ExecProcNode(outerPlanState(repeatstate)); if (TupIsNull(outerslot)) { repeatstate->repeat_done = true; return NULL; } econtext->ecxt_outertuple = outerslot; econtext->ecxt_scantuple = outerslot; /* Compute the number of times to output this tuple. */ oldcxt = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); repeatstate->repeat_count = DatumGetInt32(ExecEvalExpr(repeatstate->expr_state, econtext, &isNull, NULL)); Assert(!isNull); MemoryContextSwitchTo(oldcxt); if (repeatstate->repeat_count == 0) continue; if (repeatstate->repeat_count > 1) repeatstate->slot = outerslot; do { econtext->group_id = repeatstate->repeat_count - 1; econtext->grouping = node->grouping; repeatstate->repeat_count--; /* Check the qual until we find one output tuple. */ if (ExecQual(repeatstate->ps.qual, econtext, false)) { Gpmon_M_Incr_Rows_Out(GpmonPktFromRepeatState(repeatstate)); CheckSendPlanStateGpmonPkt(&repeatstate->ps); return ExecProject(repeatstate->ps.ps_ProjInfo, NULL); } } while (repeatstate->repeat_count > 0); } return NULL; }
/* ---------------------------------------------------------------- * ExecNestLoop(node) * * old comments * Returns the tuple joined from inner and outer tuples which * satisfies the qualification clause. * * It scans the inner relation to join with current outer tuple. * * If none is found, next tuple form the outer relation is retrieved * and the inner relation is scanned from the beginning again to join * with the outer tuple. * * Nil is returned if all the remaining outer tuples are tried and * all fail to join with the inner tuples. * * Nil is also returned if there is no tuple from inner realtion. * * Conditions: * -- outerTuple contains current tuple from outer relation and * the right son(inner realtion) maintains "cursor" at the tuple * returned previously. * This is achieved by maintaining a scan position on the outer * relation. * * Initial States: * -- the outer child and the inner child * are prepared to return the first tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecNestLoop(NestLoop *node, Plan* parent) { NestLoopState *nlstate; Plan *innerPlan; Plan *outerPlan; bool needNewOuterTuple; TupleTableSlot *outerTupleSlot; TupleTableSlot *innerTupleSlot; List *qual; bool qualResult; ExprContext *econtext; /* ---------------- * get information from the node * ---------------- */ ENL1_printf("getting info from node"); nlstate = node->nlstate; qual = node->join.qual; outerPlan = outerPlan(&node->join); innerPlan = innerPlan(&node->join); /* ---------------- * initialize expression context * ---------------- */ econtext = nlstate->jstate.cs_ExprContext; /* ---------------- * get the current outer tuple * ---------------- */ outerTupleSlot = nlstate->jstate.cs_OuterTupleSlot; econtext->ecxt_outertuple = outerTupleSlot; /* ---------------- * Ok, everything is setup for the join so now loop until * we return a qualifying join tuple.. * ---------------- */ if (nlstate->jstate.cs_TupFromTlist) { TupleTableSlot *result; bool isDone; result = ExecProject(nlstate->jstate.cs_ProjInfo, &isDone); if (!isDone) return result; } ENL1_printf("entering main loop"); for(;;) { /* ---------------- * The essential idea now is to get the next inner tuple * and join it with the current outer tuple. * ---------------- */ needNewOuterTuple = false; /* ---------------- * If outer tuple is not null then that means * we are in the middle of a scan and we should * restore our previously saved scan position. * ---------------- */ if (! TupIsNull(outerTupleSlot)) { ENL1_printf("have outer tuple, restoring outer plan"); ExecRestrPos(outerPlan); } else { ENL1_printf("outer tuple is nil, need new outer tuple"); needNewOuterTuple = true; } /* ---------------- * if we have an outerTuple, try to get the next inner tuple. * ---------------- */ if (!needNewOuterTuple) { ENL1_printf("getting new inner tuple"); innerTupleSlot = ExecProcNode(innerPlan, (Plan*)node); econtext->ecxt_innertuple = innerTupleSlot; if (TupIsNull(innerTupleSlot)) { ENL1_printf("no inner tuple, need new outer tuple"); needNewOuterTuple = true; } } /* ---------------- * loop until we have a new outer tuple and a new * inner tuple. * ---------------- */ while (needNewOuterTuple) { /* ---------------- * now try to get the next outer tuple * ---------------- */ ENL1_printf("getting new outer tuple"); outerTupleSlot = ExecProcNode(outerPlan, (Plan*)node); econtext->ecxt_outertuple = outerTupleSlot; /* ---------------- * if there are no more outer tuples, then the join * is complete.. * ---------------- */ if (TupIsNull(outerTupleSlot)) { ENL1_printf("no outer tuple, ending join"); return NULL; } /* ---------------- * we have a new outer tuple so we mark our position * in the outer scan and save the outer tuple in the * NestLoop state * ---------------- */ ENL1_printf("saving new outer tuple information"); ExecMarkPos(outerPlan); nlstate->jstate.cs_OuterTupleSlot = outerTupleSlot; /* ---------------- * now rescan the inner plan and get a new inner tuple * ---------------- */ ENL1_printf("rescanning inner plan"); /* * The scan key of the inner plan might depend on the current * outer tuple (e.g. in index scans), that's why we pass our * expr context. */ ExecReScan(innerPlan, econtext, parent); ENL1_printf("getting new inner tuple"); innerTupleSlot = ExecProcNode(innerPlan, (Plan*)node); econtext->ecxt_innertuple = innerTupleSlot; if (TupIsNull(innerTupleSlot)) { ENL1_printf("couldn't get inner tuple - need new outer tuple"); } else { ENL1_printf("got inner and outer tuples"); needNewOuterTuple = false; } } /* while (needNewOuterTuple) */ /* ---------------- * at this point we have a new pair of inner and outer * tuples so we test the inner and outer tuples to see * if they satisify the node's qualification. * ---------------- */ ENL1_printf("testing qualification"); qualResult = ExecQual((List*)qual, econtext); if (qualResult) { /* ---------------- * qualification was satisified so we project and * return the slot containing the result tuple * using ExecProject(). * ---------------- */ ProjectionInfo *projInfo; TupleTableSlot *result; bool isDone; ENL1_printf("qualification succeeded, projecting tuple"); projInfo = nlstate->jstate.cs_ProjInfo; result = ExecProject(projInfo, &isDone); nlstate->jstate.cs_TupFromTlist = !isDone; return result; } /* ---------------- * qualification failed so we have to try again.. * ---------------- */ ENL1_printf("qualification failed, looping"); } }
/* ---------------------------------------------------------------- * ExecScan * * Scans the relation using the 'access method' indicated and * returns the next qualifying tuple in the direction specified * in the global variable ExecDirection. * The access method returns the next tuple and execScan() is * responsible for checking the tuple returned against the qual-clause. * * Conditions: * -- the "cursor" maintained by the AMI is positioned at the tuple * returned previously. * * Initial States: * -- the relation indicated is opened for scanning so that the * "cursor" is positioned before the first qualifying tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd) /* function returning a tuple */ { ExprContext *econtext; List *qual; ProjectionInfo *projInfo; ExprDoneCond isDone; TupleTableSlot *resultSlot; /* * Fetch data from node */ qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; /* * If we have neither a qual to check nor a projection to do, just skip * all the overhead and return the raw scan tuple. */ if (!qual && !projInfo) return (*accessMtd) (node); /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { Assert(projInfo); /* can't get here if not projecting */ resultSlot = ExecProject(projInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a scan tuple. */ econtext = node->ps.ps_ExprContext; ResetExprContext(econtext); /* * get a tuple from the access method loop until we obtain a tuple which * passes the qualification. */ for (;;) { TupleTableSlot *slot; CHECK_FOR_INTERRUPTS(); slot = (*accessMtd) (node); /* * if the slot returned by the accessMtd contains NULL, then it means * there is nothing more to scan so we just return an empty slot, * being careful to use the projection result slot so it has correct * tupleDesc. */ if (TupIsNull(slot)) { if (projInfo) return ExecClearTuple(projInfo->pi_slot); else return slot; } /* * place the current tuple into the expr context */ econtext->ecxt_scantuple = slot; /* * check that the current tuple satisfies the qual-clause * * check for non-nil qual here to avoid a function call to ExecQual() * when the qual is nil ... saves only a few cycles, but they add up * ... */ if (!qual || ExecQual(qual, econtext, false)) { /* * Found a satisfactory scan tuple. */ if (projInfo) { /* * Form a projection tuple, store it in the result tuple slot * and return it --- unless we find we can project no tuples * from this scan tuple, in which case continue scan. */ resultSlot = ExecProject(projInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } else { /* * Here, we aren't projecting, so just return scan tuple. */ return slot; } } /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); } }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scan; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return ExecClearTuple(slot); ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * IndexNext * * Retrieve a tuple from the IndexScan node's currentRelation * using the index specified in the IndexScanState information. * ---------------------------------------------------------------- */ static TupleTableSlot * IndexNext(IndexScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; HeapTuple tuple; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->iss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; /* * ok, now that we have what we need, fetch the next tuple. */ while ((tuple = index_getnext(scandesc, direction)) != NULL) { /* * Store the scanned tuple in the scan tuple slot of the scan state. * Note: we pass 'false' because tuples returned by amgetnext are * pointers onto disk pages and must not be pfree()'d. */ ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ scandesc->xs_cbuf, /* buffer containing tuple */ false); /* don't pfree */ /* * If the index was lossy, we have to recheck the index quals using * the real tuple. */ if (scandesc->xs_recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->indexqualorig, econtext, false)) continue; /* nope, so ask index for another one */ } return slot; } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed * * This is called back from an access-method-specific index build procedure * after the AM has done whatever setup it needs. The parent heap relation * is scanned to find tuples that should be entered into the index. Each * such tuple is passed to the AM's callback routine, which does the right * things to add it to the new index. After we return, the AM's index * build procedure does whatever cleanup is needed; in particular, it should * close the heap and index relations. * * The total count of heap tuples is returned. This is for updating pg_class * statistics. (It's annoying not to be able to do that here, but we can't * do it until after the relation is closed.) Note that the index AM itself * must keep track of the number of index tuples; we don't do so here because * the AM might reject some of the tuples for its own reasons, such as being * unable to store NULLs. */ double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, IndexBuildCallback callback, void *callback_state) { HeapScanDesc scan; HeapTuple heapTuple; TupleDesc heapDescriptor; Datum attdata[INDEX_MAX_KEYS]; char nulls[INDEX_MAX_KEYS]; double reltuples; List *predicate; TupleTable tupleTable; TupleTableSlot *slot; EState *estate; ExprContext *econtext; Snapshot snapshot; TransactionId OldestXmin; /* * sanity checks */ Assert(OidIsValid(indexRelation->rd_rel->relam)); heapDescriptor = RelationGetDescr(heapRelation); /* * Need an EState for evaluation of index expressions and * partial-index predicates. */ estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); /* * If this is a predicate (partial) index, we will need to evaluate * the predicate using ExecQual, which requires the current tuple to * be in a slot of a TupleTable. Likewise if there are any * expressions. */ if (indexInfo->ii_Predicate != NIL || indexInfo->ii_Expressions != NIL) { tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); ExecSetSlotDescriptor(slot, heapDescriptor, false); /* Arrange for econtext's scan tuple to be the tuple under test */ econtext->ecxt_scantuple = slot; /* Set up execution state for predicate. */ predicate = (List *) ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, estate); } else { tupleTable = NULL; slot = NULL; predicate = NIL; } /* * Ok, begin our scan of the base relation. We use SnapshotAny * because we must retrieve all tuples and do our own time qual * checks. */ if (IsBootstrapProcessingMode()) { snapshot = SnapshotNow; OldestXmin = InvalidTransactionId; } else { snapshot = SnapshotAny; OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared); } scan = heap_beginscan(heapRelation, /* relation */ snapshot, /* seeself */ 0, /* number of keys */ (ScanKey) NULL); /* scan key */ reltuples = 0; /* * Scan all tuples in the base relation. */ while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { bool tupleIsAlive; CHECK_FOR_INTERRUPTS(); if (snapshot == SnapshotAny) { /* do our own time qual check */ bool indexIt; uint16 sv_infomask; /* * HeapTupleSatisfiesVacuum may update tuple's hint status * bits. We could possibly get away with not locking the * buffer here, since caller should hold ShareLock on the * relation, but let's be conservative about it. */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); sv_infomask = heapTuple->t_data->t_infomask; switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin)) { case HEAPTUPLE_DEAD: indexIt = false; tupleIsAlive = false; break; case HEAPTUPLE_LIVE: indexIt = true; tupleIsAlive = true; break; case HEAPTUPLE_RECENTLY_DEAD: /* * If tuple is recently deleted then we must index it * anyway to keep VACUUM from complaining. */ indexIt = true; tupleIsAlive = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* * Since caller should hold ShareLock or better, we * should not see any tuples inserted by open * transactions --- unless it's our own transaction. * (Consider INSERT followed by CREATE INDEX within a * transaction.) An exception occurs when reindexing * a system catalog, because we often release lock on * system catalogs before committing. */ if (!TransactionIdIsCurrentTransactionId( HeapTupleHeaderGetXmin(heapTuple->t_data)) && !IsSystemRelation(heapRelation)) elog(ERROR, "concurrent insert in progress"); indexIt = true; tupleIsAlive = true; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* * Since caller should hold ShareLock or better, we * should not see any tuples deleted by open * transactions --- unless it's our own transaction. * (Consider DELETE followed by CREATE INDEX within a * transaction.) An exception occurs when reindexing * a system catalog, because we often release lock on * system catalogs before committing. */ if (!TransactionIdIsCurrentTransactionId( HeapTupleHeaderGetXmax(heapTuple->t_data)) && !IsSystemRelation(heapRelation)) elog(ERROR, "concurrent delete in progress"); indexIt = true; tupleIsAlive = false; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); indexIt = tupleIsAlive = false; /* keep compiler quiet */ break; } /* check for hint-bit update by HeapTupleSatisfiesVacuum */ if (sv_infomask != heapTuple->t_data->t_infomask) SetBufferCommitInfoNeedsSave(scan->rs_cbuf); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); if (!indexIt) continue; } else { /* heap_getnext did the time qual check */ tupleIsAlive = true; } reltuples += 1; MemoryContextReset(econtext->ecxt_per_tuple_memory); /* Set up for predicate or expression evaluation */ if (slot) ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); /* * In a partial index, discard tuples that don't satisfy the * predicate. We can also discard recently-dead tuples, since * VACUUM doesn't complain about tuple count mismatch for partial * indexes. */ if (predicate != NIL) { if (!tupleIsAlive) continue; if (!ExecQual(predicate, econtext, false)) continue; } /* * For the current heap tuple, extract all the attributes we use * in this index, and note which are null. This also performs * evaluation of any expressions needed. */ FormIndexDatum(indexInfo, heapTuple, heapDescriptor, estate, attdata, nulls); /* * You'd think we should go ahead and build the index tuple here, * but some index AMs want to do further processing on the data * first. So pass the attdata and nulls arrays, instead. */ /* Call the AM's callback routine to process the tuple */ callback(indexRelation, heapTuple, attdata, nulls, tupleIsAlive, callback_state); } heap_endscan(scan); if (tupleTable) ExecDropTupleTable(tupleTable, true); FreeExecutorState(estate); /* These may have been pointing to the now-gone estate */ indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_PredicateState = NIL; return reltuples; }