/* * systable_beginscan_ordered --- set up for ordered catalog scan * * These routines have essentially the same API as systable_beginscan etc, * except that they guarantee to return multiple matching tuples in * index order. Also, for largely historical reasons, the index to use * is opened and locked by the caller, not here. * * Currently we do not support non-index-based scans here. (In principle * we could do a heapscan and sort, but the uses are in places that * probably don't need to still work with corrupted catalog indexes.) * For the moment, therefore, these functions are merely the thinnest of * wrappers around index_beginscan/index_getnext. The main reason for their * existence is to centralize possible future support of lossy operators * in catalog scans. */ SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; int i; /* REINDEX can probably be a hard error here ... */ if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", RelationGetRelationName(indexRelation)); /* ... but we only throw a warning about violating IgnoreSystemIndexes */ if (IgnoreSystemIndexes) elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes", RelationGetRelationName(indexRelation)); sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < indexRelation->rd_index->indnatts; j++) { if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == indexRelation->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, indexRelation, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; return sysscan; }
/* ---------------------------------------------------------------- * ExecReScanIndexScan(node) * * Recalculates the values of any scan keys whose value depends on * information known at runtime, then rescans the indexed relation. * * Updating the scan key was formerly done separately in * ExecUpdateIndexScanKeys. Integrating it into ReScan makes * rescans of indices and relations/general streams more uniform. * ---------------------------------------------------------------- */ void ExecReScanIndexScan(IndexScanState *node) { /* * If we are doing runtime key calculations (ie, any of the index key * values weren't simple Consts), compute the new key values. But first, * reset the context so we don't leak memory as each outer tuple is * scanned. Note this assumes that we will recalculate *all* runtime keys * on each call. */ if (node->iss_NumRuntimeKeys != 0) { ExprContext *econtext = node->iss_RuntimeContext; ResetExprContext(econtext); ExecIndexEvalRuntimeKeys(econtext, node->iss_RuntimeKeys, node->iss_NumRuntimeKeys); } node->iss_RuntimeKeysReady = true; /* reset index scan */ index_rescan(node->iss_ScanDesc, node->iss_ScanKeys, node->iss_NumScanKeys, node->iss_OrderByKeys, node->iss_NumOrderByKeys); ExecScanReScan(&node->ss); }
/* ---------------------------------------------------------------- * ExecIndexOnlyScanInitializeDSM * * Set up a parallel index-only scan descriptor. * ---------------------------------------------------------------- */ void ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; ParallelIndexScanDesc piscan; piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen); index_parallelscan_initialize(node->ss.ss_currentRelation, node->ioss_RelationDesc, estate->es_snapshot, piscan); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan); node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, piscan); node->ioss_ScanDesc->xs_want_itup = true; node->ioss_VMBuffer = InvalidBuffer; /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. */ if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady) index_rescan(node->ioss_ScanDesc, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); }
/* ---------------------------------------------------------------- * ExecIndexReScan(node) * * Recalculates the value of the scan keys whose value depends on * information known at runtime and rescans the indexed relation. * Updating the scan key was formerly done separately in * ExecUpdateIndexScanKeys. Integrating it into ReScan makes * rescans of indices and relations/general streams more uniform. * ---------------------------------------------------------------- */ void ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt) { EState *estate; ExprContext *econtext; Index scanrelid; estate = node->ss.ps.state; econtext = node->iss_RuntimeContext; /* context for runtime keys */ scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid; node->ss.ps.ps_TupFromTlist = false; if (econtext) { /* * If we are being passed an outer tuple, save it for runtime key * calc. We also need to link it into the "regular" per-tuple * econtext, so it can be used during indexqualorig evaluations. */ if (exprCtxt != NULL) { ExprContext *stdecontext; econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple; stdecontext = node->ss.ps.ps_ExprContext; stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple; } /* * Reset the runtime-key context so we don't leak memory as each outer * tuple is scanned. Note this assumes that we will recalculate *all* * runtime keys on each call. */ ResetExprContext(econtext); } /* * If we are doing runtime key calculations (ie, the index keys depend on * data from an outer scan), compute the new key values */ if (node->iss_NumRuntimeKeys != 0) ExecIndexEvalRuntimeKeys(econtext, node->iss_RuntimeKeys, node->iss_NumRuntimeKeys); node->iss_RuntimeKeysReady = true; /* If this is re-scanning of PlanQual ... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { estate->es_evTupleNull[scanrelid - 1] = false; return; } /* reset index scan */ index_rescan(node->iss_ScanDesc, node->iss_ScanKeys); }
/* ---------------------------------------------------------------- * ExecBitmapIndexReScan(node) * * Recalculates the value of the scan keys whose value depends on * information known at runtime and rescans the indexed relation. * ---------------------------------------------------------------- */ void ExecBitmapIndexReScan(BitmapIndexScanState *node, ExprContext *exprCtxt) { ExprContext *econtext; ExprState **runtimeKeyInfo; econtext = node->biss_RuntimeContext; /* context for runtime keys */ runtimeKeyInfo = node->biss_RuntimeKeyInfo; if (econtext) { /* * If we are being passed an outer tuple, save it for runtime key * calc. */ if (exprCtxt != NULL) econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple; /* * Reset the runtime-key context so we don't leak memory as each outer * tuple is scanned. Note this assumes that we will recalculate *all* * runtime keys on each call. */ ResetExprContext(econtext); } /* * If we are doing runtime key calculations (ie, the index keys depend on * data from an outer scan), compute the new key values */ if (runtimeKeyInfo) { ExecIndexEvalRuntimeKeys(econtext, runtimeKeyInfo, node->biss_ScanKeys, node->biss_NumScanKeys); node->biss_RuntimeKeysReady = true; } /* reset index scan */ index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); if (node->odbiss_ScanDesc != NULL) index_rescan(node->odbiss_ScanDesc, node->biss_ScanKeys); }
/* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * * This routine creates an index scan structure and sets its contents * up correctly. This routine calls AMrescan to set up the scan with * the passed key. * * Parameters: * indexRelation -- index relation for scan. * nkeys -- count of scan keys. * key -- array of scan keys to restrict the index scan. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, ScanKey key) { IndexScanDesc scan; scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; /* * We allocate the key space here, but the AM is responsible for * actually filling it from the passed key array. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; scan->kill_prior_tuple = false; scan->ignore_killed_tuples = true; /* default setting */ scan->keys_are_unique = false; /* may be set by index AM */ scan->got_tuple = false; scan->opaque = NULL; ItemPointerSetInvalid(&scan->currentItemData); ItemPointerSetInvalid(&scan->currentMarkData); ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_datamcxt = NULL; scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; /* mark cached function lookup data invalid; it will be set later */ scan->fn_getnext.fn_oid = InvalidOid; scan->unique_tuple_pos = 0; scan->unique_tuple_mark = 0; pgstat_initstats(&scan->xs_pgstat_info, indexRelation); /* * Let the AM fill in the key and any opaque data it wants. */ index_rescan(scan, key); return scan; }
/* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * * This routine creates an index scan structure and sets its contents * up correctly. This routine calls AMrescan to set up the scan with * the passed key. * * Parameters: * indexRelation -- index relation for scan. * nkeys -- count of scan keys. * key -- array of scan keys to restrict the index scan. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, ScanKey key) { IndexScanDesc scan; scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; /* * We allocate the key space here, but the AM is responsible for actually * filling it from the passed key array. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; /* * During recovery we ignore killed tuples and don't bother to kill them * either. We do this because the xmin on the primary node could easily be * later than the xmin on the standby node, so that what the primary * thinks is killed is supposed to be visible on standby. So for correct * MVCC for queries during recovery we must ignore these hints and check * all tuples. Do *not* set ignore_killed_tuples to true when running in a * transaction that was started during recovery. xactStartedInRecovery * should not be altered by index AMs. */ scan->kill_prior_tuple = false; scan->xactStartedInRecovery = TransactionStartedDuringRecovery(); scan->ignore_killed_tuples = !scan->xactStartedInRecovery; scan->opaque = NULL; ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; scan->xs_hot_dead = false; scan->xs_next_hot = InvalidOffsetNumber; scan->xs_prev_xmax = InvalidTransactionId; /* * Let the AM fill in the key and any opaque data it wants. */ index_rescan(scan, key); return scan; }
/* ---------------------------------------------------------------- * ExecBitmapIndexReScan(node) * * Recalculates the value of the scan keys whose value depends on * information known at runtime and rescans the indexed relation. * ---------------------------------------------------------------- */ void ExecBitmapIndexReScan(BitmapIndexScanState *node, ExprContext *exprCtxt) { ExprContext *econtext; econtext = node->biss_RuntimeContext; /* context for runtime keys */ if (econtext) { /* * If we are being passed an outer tuple, save it for runtime key * calc. */ if (exprCtxt != NULL) econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple; /* * Reset the runtime-key context so we don't leak memory as each outer * tuple is scanned. Note this assumes that we will recalculate *all* * runtime keys on each call. */ ResetExprContext(econtext); } /* * If we are doing runtime key calculations (ie, the index keys depend on * data from an outer scan), compute the new key values. * * Array keys are also treated as runtime keys; note that if we return * with biss_RuntimeKeysReady still false, then there is an empty array * key so no index scan is needed. */ if (node->biss_NumRuntimeKeys != 0) ExecIndexEvalRuntimeKeys(econtext, node->biss_RuntimeKeys, node->biss_NumRuntimeKeys); if (node->biss_NumArrayKeys != 0) node->biss_RuntimeKeysReady = ExecIndexEvalArrayKeys(econtext, node->biss_ArrayKeys, node->biss_NumArrayKeys); else node->biss_RuntimeKeysReady = true; /* reset index scan */ if (node->biss_RuntimeKeysReady) index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); }
/* ---------------------------------------------------------------- * ExecIndexReScan(node) * * Recalculates the value of the scan keys whose value depends on * information known at runtime and rescans the indexed relation. * Updating the scan key was formerly done separately in * ExecUpdateIndexScanKeys. Integrating it into ReScan makes * rescans of indices and relations/general streams more uniform. * ---------------------------------------------------------------- */ void ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt) { ExprContext *econtext; econtext = node->iss_RuntimeContext; /* context for runtime keys */ if (econtext) { /* * If we are being passed an outer tuple, save it for runtime key * calc. We also need to link it into the "regular" per-tuple * econtext, so it can be used during indexqualorig evaluations. */ if (exprCtxt != NULL) { ExprContext *stdecontext; econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple; stdecontext = node->ss.ps.ps_ExprContext; stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple; } /* * Reset the runtime-key context so we don't leak memory as each outer * tuple is scanned. Note this assumes that we will recalculate *all* * runtime keys on each call. */ ResetExprContext(econtext); } /* * If we are doing runtime key calculations (ie, the index keys depend on * data from an outer scan), compute the new key values */ if (node->iss_NumRuntimeKeys != 0) ExecIndexEvalRuntimeKeys(econtext, node->iss_RuntimeKeys, node->iss_NumRuntimeKeys); node->iss_RuntimeKeysReady = true; /* reset index scan */ index_rescan(node->iss_ScanDesc, node->iss_ScanKeys); ExecScanReScan(&node->ss); }
/* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * * This routine creates an index scan structure and sets its contents * up correctly. This routine calls AMrescan to set up the scan with * the passed key. * * Parameters: * indexRelation -- index relation for scan. * nkeys -- count of scan keys. * key -- array of scan keys to restrict the index scan. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, ScanKey key) { IndexScanDesc scan; scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; /* * We allocate the key space here, but the AM is responsible for actually * filling it from the passed key array. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; scan->is_multiscan = false; /* caller may change this */ scan->kill_prior_tuple = false; scan->ignore_killed_tuples = true; /* default setting */ scan->opaque = NULL; ItemPointerSetInvalid(&scan->currentItemData); ItemPointerSetInvalid(&scan->currentMarkData); ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; pgstat_initstats(&scan->xs_pgstat_info, indexRelation); /* * Let the AM fill in the key and any opaque data it wants. */ index_rescan(scan, key); return scan; }
/* ---------------------------------------------------------------- * ExecReScanIndexOnlyScan(node) * * Recalculates the values of any scan keys whose value depends on * information known at runtime, then rescans the indexed relation. * * Updating the scan key was formerly done separately in * ExecUpdateIndexScanKeys. Integrating it into ReScan makes * rescans of indices and relations/general streams more uniform. * ---------------------------------------------------------------- */ void ExecReScanIndexOnlyScan(IndexOnlyScanState *node) { bool reset_parallel_scan = true; /* * If we are here to just update the scan keys, then don't reset parallel * scan. For detailed reason behind this look in the comments for * ExecReScanIndexScan. */ if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady) reset_parallel_scan = false; /* * If we are doing runtime key calculations (ie, any of the index key * values weren't simple Consts), compute the new key values. But first, * reset the context so we don't leak memory as each outer tuple is * scanned. Note this assumes that we will recalculate *all* runtime keys * on each call. */ if (node->ioss_NumRuntimeKeys != 0) { ExprContext *econtext = node->ioss_RuntimeContext; ResetExprContext(econtext); ExecIndexEvalRuntimeKeys(econtext, node->ioss_RuntimeKeys, node->ioss_NumRuntimeKeys); } node->ioss_RuntimeKeysReady = true; /* reset index scan */ if (node->ioss_ScanDesc) { index_rescan(node->ioss_ScanDesc, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); if (reset_parallel_scan && node->ioss_ScanDesc->parallel_scan) index_parallelrescan(node->ioss_ScanDesc); } ExecScanReScan(&node->ss); }
/* * Starts a scan over the visimap store. * * Parameter keys may be NULL iff nkeys is zero. */ IndexScanDesc AppendOnlyVisimapStore_BeginScan(AppendOnlyVisimapStore *visiMapStore, int nkeys, ScanKey keys) { IndexScanDesc scandesc; Assert(visiMapStore); Assert(RelationIsValid(visiMapStore->visimapRelation)); scandesc = index_beginscan(visiMapStore->visimapRelation, visiMapStore->visimapIndex, visiMapStore->snapshot, nkeys, 0); index_rescan(scandesc, keys, nkeys, NULL, 0); return scandesc; }
/* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * * This routine creates an index scan structure and sets its contents * up correctly. This routine calls AMrescan to set up the scan with * the passed key. * * Parameters: * indexRelation -- index relation for scan. * nkeys -- count of scan keys. * key -- array of scan keys to restrict the index scan. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, ScanKey key) { IndexScanDesc scan; scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; /* * We allocate the key space here, but the AM is responsible for actually * filling it from the passed key array. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; scan->kill_prior_tuple = false; scan->ignore_killed_tuples = true; /* default setting */ scan->opaque = NULL; ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; scan->xs_hot_dead = false; scan->xs_next_hot = InvalidOffsetNumber; scan->xs_prev_xmax = InvalidTransactionId; /* * Let the AM fill in the key and any opaque data it wants. */ index_rescan(scan, key); return scan; }
/* ---------------------------------------------------------------- * ExecReScanBitmapIndexScan(node) * * Recalculates the values of any scan keys whose value depends on * information known at runtime, then rescans the indexed relation. * ---------------------------------------------------------------- */ void ExecReScanBitmapIndexScan(BitmapIndexScanState *node) { ExprContext *econtext = node->biss_RuntimeContext; /* * Reset the runtime-key context so we don't leak memory as each outer * tuple is scanned. Note this assumes that we will recalculate *all* * runtime keys on each call. */ if (econtext) ResetExprContext(econtext); /* * If we are doing runtime key calculations (ie, any of the index key * values weren't simple Consts), compute the new key values. * * Array keys are also treated as runtime keys; note that if we return * with biss_RuntimeKeysReady still false, then there is an empty array * key so no index scan is needed. */ if (node->biss_NumRuntimeKeys != 0) ExecIndexEvalRuntimeKeys(econtext, node->biss_RuntimeKeys, node->biss_NumRuntimeKeys); if (node->biss_NumArrayKeys != 0) node->biss_RuntimeKeysReady = ExecIndexEvalArrayKeys(econtext, node->biss_ArrayKeys, node->biss_NumArrayKeys); else node->biss_RuntimeKeysReady = true; /* reset index scan */ if (node->biss_RuntimeKeysReady) index_rescan(node->biss_ScanDesc, node->biss_ScanKeys, node->biss_NumScanKeys, NULL, 0); }
/* ---------------------------------------------------------------- * ExecIndexOnlyScanInitializeWorker * * Copy relevant information from TOC into planstate. * ---------------------------------------------------------------- */ void ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc) { ParallelIndexScanDesc piscan; piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false); node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, piscan); node->ioss_ScanDesc->xs_want_itup = true; /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. */ if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady) index_rescan(node->ioss_ScanDesc, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); }
/* ---------------------------------------------------------------- * MultiExecBitmapIndexScan(node) * ---------------------------------------------------------------- */ Node * MultiExecBitmapIndexScan(BitmapIndexScanState *node) { TIDBitmap *tbm; IndexScanDesc scandesc; double nTuples = 0; bool doscan; /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStartNode(node->ss.ps.instrument); /* * extract necessary information from index scan node */ scandesc = node->biss_ScanDesc; /* * If we have runtime keys and they've not already been set up, do it now. * Array keys are also treated as runtime keys; note that if ExecReScan * returns with biss_RuntimeKeysReady still false, then there is an empty * array key so we should do nothing. */ if (!node->biss_RuntimeKeysReady && (node->biss_NumRuntimeKeys != 0 || node->biss_NumArrayKeys != 0)) { ExecReScan((PlanState *) node); doscan = node->biss_RuntimeKeysReady; } else doscan = true; /* * Prepare the result bitmap. Normally we just create a new one to pass * back; however, our parent node is allowed to store a pre-made one into * node->biss_result, in which case we just OR our tuple IDs into the * existing bitmap. (This saves needing explicit UNION steps.) */ if (node->biss_result) { tbm = node->biss_result; node->biss_result = NULL; /* reset for next time */ } else { /* XXX should we use less than work_mem for this? */ tbm = tbm_create(work_mem * 1024L); } /* * Get TIDs from index and insert into bitmap */ while (doscan) { nTuples += (double) index_getbitmap(scandesc, tbm); CHECK_FOR_INTERRUPTS(); doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, node->biss_NumArrayKeys); if (doscan) /* reset index scan */ index_rescan(node->biss_ScanDesc, node->biss_ScanKeys, node->biss_NumScanKeys, NULL, 0); } /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStopNode(node->ss.ps.instrument, nTuples); return (Node *) tbm; }
/* ---------------------------------------------------------------- * ExecInitBitmapIndexScan * * Initializes the index scan's state information. * ---------------------------------------------------------------- */ BitmapIndexScanState * ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) { BitmapIndexScanState *indexstate; bool relistarget; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ indexstate = makeNode(BitmapIndexScanState); indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; /* normally we don't make the result bitmap till runtime */ indexstate->biss_result = NULL; /* * Miscellaneous initialization * * We do not need a standard exprcontext for this node, though we may * decide below to create a runtime-key exprcontext */ /* * initialize child expressions * * We don't need to initialize targetlist or qual since neither are used. * * Note: we don't initialize all of the indexqual expression, only the * sub-parts corresponding to runtime keys (see below). */ /* * We do not open or lock the base relation here. We assume that an * ancestor BitmapHeapScan node is holding AccessShareLock (or better) on * the heap relation throughout the execution of the plan tree. */ indexstate->ss.ss_currentRelation = NULL; indexstate->ss.ss_currentScanDesc = NULL; /* * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop * here. This allows an index-advisor plugin to EXPLAIN a plan containing * references to nonexistent indexes. */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; /* * Open the index relation. * * If the parent table is one of the target relations of the query, then * InitPlan already opened and write-locked the index, so we can avoid * taking another lock here. Otherwise we need a normal reader's lock. */ relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); indexstate->biss_RelationDesc = index_open(node->indexid, relistarget ? NoLock : AccessShareLock); /* * Initialize index-specific scan state */ indexstate->biss_RuntimeKeysReady = false; indexstate->biss_RuntimeKeys = NULL; indexstate->biss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->biss_RelationDesc, node->indexqual, false, &indexstate->biss_ScanKeys, &indexstate->biss_NumScanKeys, &indexstate->biss_RuntimeKeys, &indexstate->biss_NumRuntimeKeys, &indexstate->biss_ArrayKeys, &indexstate->biss_NumArrayKeys); /* * If we have runtime keys or array keys, we need an ExprContext to * evaluate them. We could just create a "standard" plan node exprcontext, * but to keep the code looking similar to nodeIndexscan.c, it seems * better to stick with the approach of using a separate ExprContext. */ if (indexstate->biss_NumRuntimeKeys != 0 || indexstate->biss_NumArrayKeys != 0) { ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; ExecAssignExprContext(estate, &indexstate->ss.ps); indexstate->biss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; indexstate->ss.ps.ps_ExprContext = stdecontext; } else { indexstate->biss_RuntimeContext = NULL; } /* * Initialize scan descriptor. */ indexstate->biss_ScanDesc = index_beginscan_bitmap(indexstate->biss_RelationDesc, estate->es_snapshot, indexstate->biss_NumScanKeys); /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. */ if (indexstate->biss_NumRuntimeKeys == 0 && indexstate->biss_NumArrayKeys == 0) index_rescan(indexstate->biss_ScanDesc, indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys, NULL, 0); /* * all done. */ return indexstate; }
/* * systable_beginscan --- set up for heap-or-index scan * * rel: catalog to scan, already opened and suitably locked * indexId: OID of index to conditionally use * indexOK: if false, forces a heap scan (see notes below) * snapshot: time qual to use (NULL for a recent catalog snapshot) * nkeys, key: scan keys * * The attribute numbers in the scan key should be set for the heap case. * If we choose to index, we reset them to 1..n to reference the index * columns. Note this means there must be one scankey qualification per * index column! This is checked by the Asserts in the normal, index-using * case, but won't be checked if the heapscan path is taken. * * The routine checks the normal cases for whether an indexscan is safe, * but caller can make additional checks and pass indexOK=false if needed. * In standard case indexOK can simply be constant TRUE. */ SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; Relation irel; if (indexOK && !IgnoreSystemIndexes && !ReindexIsProcessingIndex(indexId)) irel = index_open(indexId, AccessShareLock); else irel = NULL; sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = irel; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } if (irel) { int i; /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < irel->rd_index->indnatts; j++) { if (key[i].sk_attno == irel->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == irel->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, irel, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; } else { /* * We disallow synchronized scans when forced to use a heapscan on a * catalog. In most cases the desired rows are near the front, so * that the unpredictable start point of a syncscan is a serious * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, nkeys, key, true, false); sysscan->iscan = NULL; } return sysscan; }
/* ---------------------------------------------------------------- * ExecInitIndexOnlyScan * * Initializes the index scan's state information, creates * scan keys, and opens the base and index relations. * * Note: index scans have 2 sets of state information because * we have to keep track of the base relation and the * index relation. * ---------------------------------------------------------------- */ IndexOnlyScanState * ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) { IndexOnlyScanState *indexstate; Relation currentRelation; bool relistarget; TupleDesc tupDesc; /* * create state structure */ indexstate = makeNode(IndexOnlyScanState); indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; indexstate->ioss_HeapFetches = 0; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &indexstate->ss.ps); indexstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions * * Note: we don't initialize all of the indexorderby expression, only the * sub-parts corresponding to runtime keys (see below). */ indexstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) indexstate); indexstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) indexstate); indexstate->indexqual = (List *) ExecInitExpr((Expr *) node->indexqual, (PlanState *) indexstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &indexstate->ss.ps); ExecInitScanTupleSlot(estate, &indexstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); indexstate->ss.ss_currentRelation = currentRelation; indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ /* * Build the scan tuple type using the indextlist generated by the * planner. We use this, rather than the index's physical tuple * descriptor, because the latter contains storage column types not the * types of the original datums. (It's the AM's responsibility to return * suitable data anyway.) */ tupDesc = ExecTypeFromTL(node->indextlist, false); ExecAssignScanType(&indexstate->ss, tupDesc); /* * Initialize result tuple type and projection info. The node's * targetlist will contain Vars with varno = INDEX_VAR, referencing the * scan tuple. */ ExecAssignResultTypeFromTL(&indexstate->ss.ps); ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR); /* * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop * here. This allows an index-advisor plugin to EXPLAIN a plan containing * references to nonexistent indexes. */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; /* * Open the index relation. * * If the parent table is one of the target relations of the query, then * InitPlan already opened and write-locked the index, so we can avoid * taking another lock here. Otherwise we need a normal reader's lock. */ relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); indexstate->ioss_RelationDesc = index_open(node->indexid, relistarget ? NoLock : AccessShareLock); /* * Initialize index-specific scan state */ indexstate->ioss_RuntimeKeysReady = false; indexstate->ioss_RuntimeKeys = NULL; indexstate->ioss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->ioss_RelationDesc, node->indexqual, false, &indexstate->ioss_ScanKeys, &indexstate->ioss_NumScanKeys, &indexstate->ioss_RuntimeKeys, &indexstate->ioss_NumRuntimeKeys, NULL, /* no ArrayKeys */ NULL); /* * any ORDER BY exprs have to be turned into scankeys in the same way */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->ioss_RelationDesc, node->indexorderby, true, &indexstate->ioss_OrderByKeys, &indexstate->ioss_NumOrderByKeys, &indexstate->ioss_RuntimeKeys, &indexstate->ioss_NumRuntimeKeys, NULL, /* no ArrayKeys */ NULL); /* * If we have runtime keys, we need an ExprContext to evaluate them. The * node's standard context won't do because we want to reset that context * for every tuple. So, build another context just like the other one... * -tgl 7/11/00 */ if (indexstate->ioss_NumRuntimeKeys != 0) { ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; ExecAssignExprContext(estate, &indexstate->ss.ps); indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; indexstate->ss.ps.ps_ExprContext = stdecontext; } else { indexstate->ioss_RuntimeContext = NULL; } /* * Initialize scan descriptor. */ indexstate->ioss_ScanDesc = index_beginscan(currentRelation, indexstate->ioss_RelationDesc, estate->es_snapshot, indexstate->ioss_NumScanKeys, indexstate->ioss_NumOrderByKeys); /* Set it up for index-only scan */ indexstate->ioss_ScanDesc->xs_want_itup = true; indexstate->ioss_VMBuffer = InvalidBuffer; /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. */ if (indexstate->ioss_NumRuntimeKeys == 0) index_rescan(indexstate->ioss_ScanDesc, indexstate->ioss_ScanKeys, indexstate->ioss_NumScanKeys, indexstate->ioss_OrderByKeys, indexstate->ioss_NumOrderByKeys); /* * all done. */ return indexstate; }
/* ---------------------------------------------------------------- * MultiExecBitmapIndexScan(node) * ---------------------------------------------------------------- */ Node * MultiExecBitmapIndexScan(BitmapIndexScanState *node) { IndexScanState *scanState = (IndexScanState*)node; Node *bitmap = NULL; /* must provide our own instrumentation support */ if (scanState->ss.ps.instrument) { InstrStartNode(scanState->ss.ps.instrument); } bool partitionIsReady = DynamicScan_BeginIndexPartition(scanState, false /* initQual */, false /* initTargetList */, true /* supportsArrayKeys */, true /* isMultiScan */); Assert(partitionIsReady); if (!partitionIsReady) { DynamicScan_EndIndexPartition(scanState); return NULL; } bool doscan = node->indexScanState.iss_RuntimeKeysReady; IndexScanDesc scandesc = scanState->iss_ScanDesc; /* Get bitmap from index */ while (doscan) { bitmap = index_getmulti(scandesc, node->bitmap); if ((NULL != bitmap) && !(IsA(bitmap, HashBitmap) || IsA(bitmap, StreamBitmap))) { elog(ERROR, "unrecognized result from bitmap index scan"); } CHECK_FOR_INTERRUPTS(); /* CDB: If EXPLAIN ANALYZE, let bitmap share our Instrumentation. */ if (scanState->ss.ps.instrument) { tbm_bitmap_set_instrument(bitmap, scanState->ss.ps.instrument); } if(node->bitmap == NULL) { node->bitmap = (Node *)bitmap; } doscan = ExecIndexAdvanceArrayKeys(scanState->iss_ArrayKeys, scanState->iss_NumArrayKeys); if (doscan) { /* reset index scan */ index_rescan(scanState->iss_ScanDesc, scanState->iss_ScanKeys); } } DynamicScan_EndIndexPartition(scanState); /* must provide our own instrumentation support */ if (scanState->ss.ps.instrument) { InstrStopNode(scanState->ss.ps.instrument, 1 /* nTuples */); } return (Node *) bitmap; }
/* ---------------------------------------------------------------- * IndexOnlyNext * * Retrieve a tuple from the IndexOnlyScan node's index. * ---------------------------------------------------------------- */ static TupleTableSlot * IndexOnlyNext(IndexOnlyScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; TupleTableSlot *slot; ItemPointer tid; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->ioss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; if (scandesc == NULL) { /* * We reach here if the index only scan is not parallel, or if we're * serially executing an index only scan that was planned to be * parallel. */ scandesc = index_beginscan(node->ss.ss_currentRelation, node->ioss_RelationDesc, estate->es_snapshot, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys); node->ioss_ScanDesc = scandesc; /* Set it up for index-only scan */ node->ioss_ScanDesc->xs_want_itup = true; node->ioss_VMBuffer = InvalidBuffer; /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. */ if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady) index_rescan(scandesc, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); } /* * OK, now that we have what we need, fetch the next tuple. */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { HeapTuple tuple = NULL; CHECK_FOR_INTERRUPTS(); /* * We can skip the heap fetch if the TID references a heap page on * which all tuples are known visible to everybody. In any case, * we'll use the index tuple not the heap tuple as the data source. * * Note on Memory Ordering Effects: visibilitymap_get_status does not * lock the visibility map buffer, and therefore the result we read * here could be slightly stale. However, it can't be stale enough to * matter. * * We need to detect clearing a VM bit due to an insert right away, * because the tuple is present in the index page but not visible. The * reading of the TID by this scan (using a shared lock on the index * buffer) is serialized with the insert of the TID into the index * (using an exclusive lock on the index buffer). Because the VM bit * is cleared before updating the index, and locking/unlocking of the * index page acts as a full memory barrier, we are sure to see the * cleared bit if we see a recently-inserted TID. * * Deletes do not update the index page (only VACUUM will clear out * the TID), so the clearing of the VM bit by a delete is not * serialized with this test below, and we may see a value that is * significantly stale. However, we don't care about the delete right * away, because the tuple is still visible until the deleting * transaction commits or the statement ends (if it's our * transaction). In either case, the lock on the VM buffer will have * been released (acting as a write barrier) after clearing the bit. * And for us to have a snapshot that includes the deleting * transaction (making the tuple invisible), we must have acquired * ProcArrayLock after that time, acting as a read barrier. * * It's worth going through this complexity to avoid needing to lock * the VM buffer, which could cause significant contention. */ if (!VM_ALL_VISIBLE(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), &node->ioss_VMBuffer)) { /* * Rats, we have to visit the heap to check visibility. */ InstrCountTuples2(node, 1); tuple = index_fetch_heap(scandesc); if (tuple == NULL) continue; /* no visible tuple, try next index entry */ /* * Only MVCC snapshots are supported here, so there should be no * need to keep following the HOT chain once a visible entry has * been found. If we did want to allow that, we'd need to keep * more state to remember not to call index_getnext_tid next time. */ if (scandesc->xs_continue_hot) elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); /* * Note: at this point we are holding a pin on the heap page, as * recorded in scandesc->xs_cbuf. We could release that pin now, * but it's not clear whether it's a win to do so. The next index * entry might require a visit to the same heap page. */ } /* * Fill the scan tuple slot with data from the index. This might be * provided in either HeapTuple or IndexTuple format. Conceivably an * index AM might fill both fields, in which case we prefer the heap * format, since it's probably a bit cheaper to fill a slot from. */ if (scandesc->xs_hitup) { /* * We don't take the trouble to verify that the provided tuple has * exactly the slot's format, but it seems worth doing a quick * check on the number of fields. */ Assert(slot->tts_tupleDescriptor->natts == scandesc->xs_hitupdesc->natts); ExecStoreHeapTuple(scandesc->xs_hitup, slot, false); } else if (scandesc->xs_itup) StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); else elog(ERROR, "no data returned for index-only scan"); /* * If the index was lossy, we have to recheck the index quals. * (Currently, this can never happen, but we should support the case * for possible future use, eg with GiST indexes.) */ if (scandesc->xs_recheck) { econtext->ecxt_scantuple = slot; if (!ExecQualAndReset(node->indexqual, econtext)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); continue; } } /* * We don't currently support rechecking ORDER BY distances. (In * principle, if the index can support retrieval of the originally * indexed value, it should be able to produce an exact distance * calculation too. So it's not clear that adding code here for * recheck/re-sort would be worth the trouble. But we should at least * throw an error if someone tries it.) */ if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("lossy distance functions are not supported in index-only scans"))); /* * Predicate locks for index-only scans must be acquired at the page * level when the heap is not accessed, since tuple-level predicate * locks need the tuple's xmin value. If we had to visit the tuple * anyway, then we already have the tuple-level lock and can skip the * page lock. */ if (tuple == NULL) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); return slot; } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* * Search the relation 'rel' for tuple using the index. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. */ bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; TransactionId xwait; Relation idxrel; bool found; /* Open the index. */ idxrel = index_open(idxoid, RowExclusiveLock); /* Start an index scan. */ InitDirtySnapshot(snap); scan = index_beginscan(rel, idxrel, &snap, RelationGetNumberOfAttributes(idxrel), 0); /* Build scan key. */ build_replindex_scan_key(skey, rel, idxrel, searchslot); retry: found = false; index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0); /* Try to find the tuple */ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) { found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } index_endscan(scan); /* Don't release lock until commit. */ index_close(idxrel, NoLock); return found; }
/* ---------------------------------------------------------------- * ExecInitIndexScan * * Initializes the index scan's state information, creates * scan keys, and opens the base and index relations. * * Note: index scans have 2 sets of state information because * we have to keep track of the base relation and the * index relation. * ---------------------------------------------------------------- */ IndexScanState * ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) { IndexScanState *indexstate; Relation currentRelation; bool relistarget; /* * create state structure */ indexstate = makeNode(IndexScanState); indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &indexstate->ss.ps); indexstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions * * Note: we don't initialize all of the indexqual expression, only the * sub-parts corresponding to runtime keys (see below). Likewise for * indexorderby, if any. But the indexqualorig expression is always * initialized even though it will only be used in some uncommon cases --- * would be nice to improve that. (Problem is that any SubPlans present * in the expression must be found now...) */ indexstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) indexstate); indexstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) indexstate); indexstate->indexqualorig = (List *) ExecInitExpr((Expr *) node->indexqualorig, (PlanState *) indexstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &indexstate->ss.ps); ExecInitScanTupleSlot(estate, &indexstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); indexstate->ss.ss_currentRelation = currentRelation; indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&indexstate->ss.ps); ExecAssignScanProjectionInfo(&indexstate->ss); /* * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop * here. This allows an index-advisor plugin to EXPLAIN a plan containing * references to nonexistent indexes. */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return indexstate; /* * Open the index relation. * * If the parent table is one of the target relations of the query, then * InitPlan already opened and write-locked the index, so we can avoid * taking another lock here. Otherwise we need a normal reader's lock. */ relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); indexstate->iss_RelationDesc = index_open(node->indexid, relistarget ? NoLock : AccessShareLock); /* * Initialize index-specific scan state */ indexstate->iss_RuntimeKeysReady = false; indexstate->iss_RuntimeKeys = NULL; indexstate->iss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, node->scan.scanrelid, node->indexqual, false, &indexstate->iss_ScanKeys, &indexstate->iss_NumScanKeys, &indexstate->iss_RuntimeKeys, &indexstate->iss_NumRuntimeKeys, NULL, /* no ArrayKeys */ NULL); /* * any ORDER BY exprs have to be turned into scankeys in the same way */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, node->scan.scanrelid, node->indexorderby, true, &indexstate->iss_OrderByKeys, &indexstate->iss_NumOrderByKeys, &indexstate->iss_RuntimeKeys, &indexstate->iss_NumRuntimeKeys, NULL, /* no ArrayKeys */ NULL); /* * If we have runtime keys, we need an ExprContext to evaluate them. The * node's standard context won't do because we want to reset that context * for every tuple. So, build another context just like the other one... * -tgl 7/11/00 */ if (indexstate->iss_NumRuntimeKeys != 0) { ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; ExecAssignExprContext(estate, &indexstate->ss.ps); indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; indexstate->ss.ps.ps_ExprContext = stdecontext; } else { indexstate->iss_RuntimeContext = NULL; } /* * Initialize scan descriptor. */ indexstate->iss_ScanDesc = index_beginscan(currentRelation, indexstate->iss_RelationDesc, estate->es_snapshot, indexstate->iss_NumScanKeys, indexstate->iss_NumOrderByKeys); /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. */ if (indexstate->iss_NumRuntimeKeys == 0) index_rescan(indexstate->iss_ScanDesc, indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys); /* * all done. */ return indexstate; }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). The index TID corresponding * to the heap tuple can be obtained if needed from scan->currentItemData. * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { HeapTuple heapTuple = &scan->xs_ctup; SCAN_CHECKS; /* Release any previously held pin */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } /* * If we already got a tuple and it must be unique, there's no need to * make the index AM look through any additional tuples. (This can * save a useful amount of work in scenarios where there are many dead * tuples due to heavy update activity.) * * To do this we must keep track of the logical scan position * (before/on/after tuple). Also, we have to be sure to release scan * resources before returning NULL; if we fail to do so then a * multi-index scan can easily run the system out of free buffers. We * can release index-level resources fairly cheaply by calling * index_rescan. This means there are two persistent states as far as * the index AM is concerned: on-tuple and rescanned. If we are * actually asked to re-fetch the single tuple, we have to go through * a fresh indexscan startup, which penalizes that (infrequent) case. */ if (scan->keys_are_unique && scan->got_tuple) { int new_tuple_pos = scan->unique_tuple_pos; if (ScanDirectionIsForward(direction)) { if (new_tuple_pos <= 0) new_tuple_pos++; } else { if (new_tuple_pos >= 0) new_tuple_pos--; } if (new_tuple_pos == 0) { /* * We are moving onto the unique tuple from having been off * it. We just fall through and let the index AM do the work. * Note we should get the right answer regardless of scan * direction. */ scan->unique_tuple_pos = 0; /* need to update position */ } else { /* * Moving off the tuple; must do amrescan to release * index-level pins before we return NULL. Since index_rescan * will reset my state, must save and restore... */ int unique_tuple_mark = scan->unique_tuple_mark; index_rescan(scan, NULL /* no change to key */ ); scan->keys_are_unique = true; scan->got_tuple = true; scan->unique_tuple_pos = new_tuple_pos; scan->unique_tuple_mark = unique_tuple_mark; return NULL; } } /* just make sure this is false... */ scan->kill_prior_tuple = false; for (;;) { bool found; uint16 sv_infomask; pgstat_count_index_scan(&scan->xs_pgstat_info); /* * The AM's gettuple proc finds the next tuple matching the scan * keys. index_beginscan already set up fn_getnext. */ found = DatumGetBool(FunctionCall2(&scan->fn_getnext, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; if (!found) return NULL; /* failure exit */ /* * Fetch the heap tuple and see if it matches the snapshot. */ if (heap_fetch(scan->heapRelation, scan->xs_snapshot, heapTuple, &scan->xs_cbuf, true, &scan->xs_pgstat_info)) break; /* Skip if no tuple at this location */ if (heapTuple->t_data == NULL) continue; /* should we raise an error instead? */ /* * If we can't see it, maybe no one else can either. Check to see * if the tuple is dead to all transactions. If so, signal the * index AM to not return it on future indexscans. * * We told heap_fetch to keep a pin on the buffer, so we can * re-access the tuple here. But we must re-lock the buffer * first. Also, it's just barely possible for an update of hint * bits to occur here. */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); sv_infomask = heapTuple->t_data->t_infomask; if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin) == HEAPTUPLE_DEAD) scan->kill_prior_tuple = true; if (sv_infomask != heapTuple->t_data->t_infomask) SetBufferCommitInfoNeedsSave(scan->xs_cbuf); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } /* Success exit */ scan->got_tuple = true; /* * If we just fetched a known-unique tuple, then subsequent calls will * go through the short-circuit code above. unique_tuple_pos has been * initialized to 0, which is the correct state ("on row"). */ pgstat_count_index_getnext(&scan->xs_pgstat_info); return heapTuple; }