static void AOCSMoveTuple(TupleTableSlot *slot, AOCSInsertDesc insertDesc, ResultRelInfo *resultRelInfo, EState *estate) { AOTupleId *oldAoTupleId; AOTupleId newAoTupleId; Assert(resultRelInfo); Assert(slot); Assert(estate); oldAoTupleId = (AOTupleId *) slot_get_ctid(slot); /* Extract all the values of the tuple */ slot_getallattrs(slot); (void) aocs_insert_values(insertDesc, slot_get_values(slot), slot_get_isnull(slot), &newAoTupleId); /* insert index' tuples if needed */ if (resultRelInfo->ri_NumIndices > 0) { ExecInsertIndexTuples(slot, (ItemPointer) &newAoTupleId, estate); ResetPerTupleExprContext(estate); } elogif(Debug_appendonly_print_compaction, DEBUG5, "Compaction: Moved tuple (%d," INT64_FORMAT ") -> (%d," INT64_FORMAT ")", AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId), AOTupleIdGet_segmentFileNum(&newAoTupleId), AOTupleIdGet_rowNum(&newAoTupleId)); }
void AppendOnlyThrowAwayTuple( Relation rel, MemTuple tuple, TupleTableSlot *slot, MemTupleBinding *mt_bind) { AOTupleId *oldAoTupleId; Assert(slot); Assert(mt_bind); oldAoTupleId = (AOTupleId*)slot_get_ctid(slot); /* Extract all the values of the tuple */ slot_getallattrs(slot); if (MemTupleHasExternal(tuple, mt_bind)) { toast_delete(rel, (HeapTuple) tuple, mt_bind); } elogif(Debug_appendonly_print_compaction, DEBUG5, "Compaction: Throw away tuple (%d," INT64_FORMAT ")", AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId)); }
static void AppendOnlyMoveTuple(MemTuple tuple, TupleTableSlot *slot, MemTupleBinding *mt_bind, AppendOnlyInsertDesc insertDesc, ResultRelInfo *resultRelInfo, EState *estate) { AOTupleId *oldAoTupleId; Oid tupleOid; AOTupleId newAoTupleId; Assert(resultRelInfo); Assert(slot); Assert(mt_bind); Assert(estate); oldAoTupleId = (AOTupleId*)slot_get_ctid(slot); /* Extract all the values of the tuple */ slot_getallattrs(slot); tupleOid = MemTupleGetOid(tuple, mt_bind); appendonly_insert(insertDesc, tuple, &tupleOid, &newAoTupleId); /* insert index' tuples if needed */ if (resultRelInfo->ri_NumIndices > 0) { ExecInsertIndexTuples(slot, (ItemPointer)&newAoTupleId, estate, true); ResetPerTupleExprContext(estate); } elogif(Debug_appendonly_print_compaction, DEBUG5, "Compaction: Moved tuple (%d," INT64_FORMAT ") -> (%d," INT64_FORMAT ")", AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId), AOTupleIdGet_segmentFileNum(&newAoTupleId), AOTupleIdGet_rowNum(&newAoTupleId)); }
/* * Returns the next matching tuple. */ TupleTableSlot * BitmapAOScanNext(ScanState *scanState) { BitmapTableScanState *node = (BitmapTableScanState *)scanState; TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; TBMIterateResult *tbmres = (TBMIterateResult *)node->tbmres; /* Make sure we never cross 15-bit offset number [MPP-24326] */ Assert(tbmres->ntuples <= INT16_MAX + 1); OffsetNumber psuedoHeapOffset; ItemPointerData psudeoHeapTid; AOTupleId aoTid; Assert(tbmres != NULL && tbmres->ntuples != 0); Assert(node->needNewBitmapPage == false); AOIteratorState *iterator = (AOIteratorState *)node->iterator; for (;;) { CHECK_FOR_INTERRUPTS(); if (iterator == NULL) { iterator = palloc0(sizeof(AOIteratorState)); if (node->isLossyBitmapPage) { /* Iterate over the first 2^15 tuples [MPP-24326] */ iterator->nTuples = INT16_MAX + 1; } else { iterator->nTuples = tbmres->ntuples; } /* Start from the beginning of the page */ iterator->tupleIndex = 0; node->iterator = iterator; } else { /* * Continuing in previously obtained page; advance tupleIndex */ iterator->tupleIndex++; } /* * Out of range? If so, nothing more to look at on this page */ if (iterator->tupleIndex < 0 || iterator->tupleIndex >= iterator->nTuples) { pfree(iterator); node->iterator = NULL; node->needNewBitmapPage = true; return ExecClearTuple(slot); } /* * Must account for lossy page info... */ if (node->isLossyBitmapPage) { /* We are iterating through all items. */ psuedoHeapOffset = iterator->tupleIndex; } else { Assert(iterator->tupleIndex <= tbmres->ntuples); psuedoHeapOffset = tbmres->offsets[iterator->tupleIndex]; /* * Ensure that the reserved 16-th bit is always ON for offsets from * lossless bitmap pages [MPP-24326]. */ Assert(((uint16)(psuedoHeapOffset & 0x8000)) > 0); } /* * Okay to fetch the tuple */ ItemPointerSet( &psudeoHeapTid, tbmres->blockno, psuedoHeapOffset); tbm_convert_appendonly_tid_out(&psudeoHeapTid, &aoTid); if (scanState->tableType == TableTypeAppendOnly) { appendonly_fetch((AppendOnlyFetchDesc)node->scanDesc, &aoTid, slot); } else if (scanState->tableType == TableTypeParquet) { Assert(!"BitmapScan for Parquet is not supported yet"); /* Assert(scanState->tableType == TableTypeAOCS); aocs_fetch((AOCSFetchDesc)node->scanDesc, &aoTid, slot); */ } if (TupIsNull(slot)) { continue; } Assert(ItemPointerIsValid(slot_get_ctid(slot))); pgstat_count_heap_fetch(node->ss.ss_currentRelation); if (!BitmapTableScanRecheckTuple(node, slot)) { ExecClearTuple(slot); continue; } return slot; } /* * We should never reach here as the termination is handled * from nodeBitmapTableScan. */ Assert(false); return NULL; }
/* * Assumes that the segment file lock is already held. * Assumes that the segment file should be compacted. */ static bool AOCSSegmentFileFullCompaction(Relation aorel, AOCSInsertDesc insertDesc, AOCSFileSegInfo *fsinfo, Snapshot snapshot) { const char *relname; AppendOnlyVisimap visiMap; AOCSScanDesc scanDesc; TupleDesc tupDesc; TupleTableSlot *slot; int compact_segno; int64 movedTupleCount = 0; ResultRelInfo *resultRelInfo; MemTupleBinding *mt_bind; EState *estate; bool *proj; int i; AOTupleId *aoTupleId; int64 tupleCount = 0; int64 tuplePerPage = INT_MAX; Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); Assert(insertDesc); compact_segno = fsinfo->segno; if (fsinfo->varblockcount > 0) { tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount; } relname = RelationGetRelationName(aorel); AppendOnlyVisimap_Init(&visiMap, aorel->rd_appendonly->visimaprelid, aorel->rd_appendonly->visimapidxid, ShareLock, snapshot); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO segfile %d, relation %sd", compact_segno, relname); proj = palloc0(sizeof(bool) * RelationGetNumberOfAttributes(aorel)); for (i = 0; i < RelationGetNumberOfAttributes(aorel); ++i) { proj[i] = true; } scanDesc = aocs_beginrangescan(aorel, snapshot, snapshot, &compact_segno, 1, NULL, proj); tupDesc = RelationGetDescr(aorel); slot = MakeSingleTupleTableSlot(tupDesc); mt_bind = create_memtuple_binding(tupDesc); /* * We need a ResultRelInfo and an EState so we can use the regular * executor's index-entry-making machinery. */ estate = CreateExecutorState(); resultRelInfo = makeNode(ResultRelInfo); resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = aorel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ ExecOpenIndices(resultRelInfo); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; while (aocs_getnext(scanDesc, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); aoTupleId = (AOTupleId *) slot_get_ctid(slot); if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId)) { AOCSMoveTuple(slot, insertDesc, resultRelInfo, estate); movedTupleCount++; } else { /* Tuple is invisible and needs to be dropped */ AppendOnlyThrowAwayTuple(aorel, slot, mt_bind); } /* * Check for vacuum delay point after approximatly a var block */ tupleCount++; if (VacuumCostActive && tupleCount % tuplePerPage == 0) { vacuum_delay_point(); } } SetAOCSFileSegInfoState(aorel, compact_segno, AOSEG_STATE_AWAITING_DROP); AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno); /* Delete all mini pages of the segment files if block directory exists */ if (OidIsValid(aorel->rd_appendonly->blkdirrelid)) { AppendOnlyBlockDirectory_DeleteSegmentFile(aorel, snapshot, compact_segno, 0); } elogif(Debug_appendonly_print_compaction, LOG, "Finished compaction: " "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT, compact_segno, relname, movedTupleCount); AppendOnlyVisimap_Finish(&visiMap, NoLock); ExecCloseIndices(resultRelInfo); FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); destroy_memtuple_binding(mt_bind); aocs_endscan(scanDesc); pfree(proj); return true; }
/* * Assumes that the segment file lock is already held. * Assumes that the segment file should be compacted. * */ static void AppendOnlySegmentFileFullCompaction(Relation aorel, AppendOnlyEntry *aoEntry, AppendOnlyInsertDesc insertDesc, FileSegInfo* fsinfo) { const char* relname; AppendOnlyVisimap visiMap; AppendOnlyScanDesc scanDesc; TupleDesc tupDesc; MemTuple tuple; TupleTableSlot *slot; MemTupleBinding *mt_bind; int compact_segno; int64 movedTupleCount = 0; ResultRelInfo *resultRelInfo; EState *estate; AOTupleId *aoTupleId; int64 tupleCount = 0; int64 tuplePerPage = INT_MAX; Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoRows(aorel)); Assert(insertDesc); compact_segno = fsinfo->segno; if (fsinfo->varblockcount > 0) { tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount; } relname = RelationGetRelationName(aorel); AppendOnlyVisimap_Init(&visiMap, aoEntry->visimaprelid, aoEntry->visimapidxid, ShareUpdateExclusiveLock, SnapshotNow); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO segno %d, relation %s, insert segno %d", compact_segno, relname, insertDesc->storageWrite.segmentFileNum); /* * Todo: We need to limit the scan to one file and we need to avoid to * lock the file again. * * We use SnapshotAny to get visible and invisible tuples. */ scanDesc = appendonly_beginrangescan(aorel, SnapshotAny, SnapshotNow, &compact_segno, 1, 0, NULL); tupDesc = RelationGetDescr(aorel); slot = MakeSingleTupleTableSlot(tupDesc); mt_bind = create_memtuple_binding(tupDesc); /* * We need a ResultRelInfo and an EState so we can use the regular * executor's index-entry-making machinery. */ estate = CreateExecutorState(); resultRelInfo = makeNode(ResultRelInfo); resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = aorel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ ExecOpenIndices(resultRelInfo); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; /* * Go through all visible tuples and move them to a new segfile. */ while ((tuple = appendonly_getnext(scanDesc, ForwardScanDirection, slot)) != NULL) { /* Check interrupts as this may take time. */ CHECK_FOR_INTERRUPTS(); aoTupleId = (AOTupleId*)slot_get_ctid(slot); if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId)) { AppendOnlyMoveTuple(tuple, slot, mt_bind, insertDesc, resultRelInfo, estate); movedTupleCount++; } else { /* Tuple is invisible and needs to be dropped */ AppendOnlyThrowAwayTuple(aorel, tuple, slot, mt_bind); } /* * Check for vacuum delay point after approximatly a var block */ tupleCount++; if (VacuumCostActive && tupleCount % tuplePerPage == 0) { vacuum_delay_point(); } } SetFileSegInfoState(aorel, aoEntry, compact_segno, AOSEG_STATE_AWAITING_DROP); AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno); /* Delete all mini pages of the segment files if block directory exists */ if (OidIsValid(aoEntry->blkdirrelid)) { AppendOnlyBlockDirectory_DeleteSegmentFile( aoEntry, SnapshotNow, compact_segno, 0); } elogif(Debug_appendonly_print_compaction, LOG, "Finished compaction: " "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT, compact_segno, relname, movedTupleCount); AppendOnlyVisimap_Finish(&visiMap, NoLock); ExecCloseIndices(resultRelInfo); FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); destroy_memtuple_binding(mt_bind); appendonly_endscan(scanDesc); }
/* ---------------------------------------------------------------- * ExecInsert * * INSERTs have to add the tuple into * the base relation and insert appropriate tuples into the * index relations. * Insert can be part of an update operation when * there is a preceding SplitUpdate node. * ---------------------------------------------------------------- */ void ExecInsert(TupleTableSlot *slot, DestReceiver *dest, EState *estate, PlanGenerator planGen, bool isUpdate) { void *tuple = NULL; ResultRelInfo *resultRelInfo = NULL; Relation resultRelationDesc = NULL; Oid newId = InvalidOid; TupleTableSlot *partslot = NULL; AOTupleId aoTupleId = AOTUPLEID_INIT; bool rel_is_heap = false; bool rel_is_aorows = false; bool rel_is_aocols = false; bool rel_is_external = false; /* * get information on the (current) result relation */ if (estate->es_result_partitions) { resultRelInfo = slot_get_partition(slot, estate); /* Check whether the user provided the correct leaf part only if required */ if (!dml_ignore_target_partition_check) { Assert(NULL != estate->es_result_partitions->part && NULL != resultRelInfo->ri_RelationDesc); List *resultRelations = estate->es_plannedstmt->resultRelations; /* * Only inheritance can generate multiple result relations and inheritance * is not compatible with partitions. As we are in inserting in partitioned * table, we should not have more than one resultRelation */ Assert(list_length(resultRelations) == 1); /* We only have one resultRelations entry where the user originally intended to insert */ int rteIdxForUserRel = linitial_int(resultRelations); Assert (rteIdxForUserRel > 0); Oid userProvidedRel = InvalidOid; if (1 == rteIdxForUserRel) { /* Optimization for typical case */ userProvidedRel = ((RangeTblEntry *) estate->es_plannedstmt->rtable->head->data.ptr_value)->relid; } else { userProvidedRel = getrelid(rteIdxForUserRel, estate->es_plannedstmt->rtable); } /* Error out if user provides a leaf partition that does not match with our calculated partition */ if (userProvidedRel != estate->es_result_partitions->part->parrelid && userProvidedRel != resultRelInfo->ri_RelationDesc->rd_id) { ereport(ERROR, (errcode(ERRCODE_CHECK_VIOLATION), errmsg("Trying to insert row into wrong partition"), errdetail("Expected partition: %s, provided partition: %s", resultRelInfo->ri_RelationDesc->rd_rel->relname.data, estate->es_result_relation_info->ri_RelationDesc->rd_rel->relname.data))); } } estate->es_result_relation_info = resultRelInfo; } else { resultRelInfo = estate->es_result_relation_info; } Assert (!resultRelInfo->ri_projectReturning); resultRelationDesc = resultRelInfo->ri_RelationDesc; rel_is_heap = RelationIsHeap(resultRelationDesc); rel_is_aocols = RelationIsAoCols(resultRelationDesc); rel_is_aorows = RelationIsAoRows(resultRelationDesc); rel_is_external = RelationIsExternal(resultRelationDesc); partslot = reconstructMatchingTupleSlot(slot, resultRelInfo); if (rel_is_heap) { tuple = ExecFetchSlotHeapTuple(partslot); } else if (rel_is_aorows) { tuple = ExecFetchSlotMemTuple(partslot, false); } else if (rel_is_external) { if (estate->es_result_partitions && estate->es_result_partitions->part->parrelid != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Insert into external partitions not supported."))); return; } else { tuple = ExecFetchSlotHeapTuple(partslot); } } else { Assert(rel_is_aocols); tuple = ExecFetchSlotMemTuple(partslot, true); } Assert(partslot != NULL && tuple != NULL); /* Execute triggers in Planner-generated plans */ if (planGen == PLANGEN_PLANNER) { /* BEFORE ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) { HeapTuple newtuple; /* NYI */ if(rel_is_aocols) elog(ERROR, "triggers are not supported on tables that use column-oriented storage"); newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); if (newtuple == NULL) /* "do nothing" */ { return; } if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */ tuple = newtuple; partslot = newslot; } } } /* * Check the constraints of the tuple */ if (resultRelationDesc->rd_att->constr && planGen == PLANGEN_PLANNER) { ExecConstraints(resultRelInfo, partslot, estate); } /* * insert the tuple * * Note: heap_insert returns the tid (location) of the new tuple in the * t_self field. * * NOTE: for append-only relations we use the append-only access methods. */ if (rel_is_aorows) { if (resultRelInfo->ri_aoInsertDesc == NULL) { /* Set the pre-assigned fileseg number to insert into */ ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aoInsertDesc = appendonly_insert_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno, false); } appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId); } else if (rel_is_aocols) { if (resultRelInfo->ri_aocsInsertDesc == NULL) { ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aocsInsertDesc = aocs_insert_init(resultRelationDesc, resultRelInfo->ri_aosegno, false); } newId = aocs_insert(resultRelInfo->ri_aocsInsertDesc, partslot); aoTupleId = *((AOTupleId*)slot_get_ctid(partslot)); } else if (rel_is_external) { /* Writable external table */ if (resultRelInfo->ri_extInsertDesc == NULL) resultRelInfo->ri_extInsertDesc = external_insert_init(resultRelationDesc); newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple); } else { Insist(rel_is_heap); newId = heap_insert(resultRelationDesc, tuple, estate->es_snapshot->curcid, true, true, GetCurrentTransactionId()); } IncrAppended(); (estate->es_processed)++; (resultRelInfo->ri_aoprocessed)++; estate->es_lastoid = newId; partslot->tts_tableOid = RelationGetRelid(resultRelationDesc); if (rel_is_aorows || rel_is_aocols) { /* * insert index entries for AO Row-Store tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false); } else { /* Use parttuple for index update in case this is an indexed heap table. */ TupleTableSlot *xslot = partslot; void *xtuple = tuple; setLastTid(&(((HeapTuple) xtuple)->t_self)); /* * insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false); } if (planGen == PLANGEN_PLANNER) { /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple); } }