PersistentTidIsKnownResult PersistentStore_TidIsKnown( PersistentStoreSharedData *storeSharedData, ItemPointer persistentTid, ItemPointer maxTid) { *maxTid = storeSharedData->maxTid; Assert(!PersistentStore_IsZeroTid(persistentTid)); // UNDONE: I think the InRecovery test only applies to physical Master Mirroring on Standby. /* Only test this outside of recovery scenarios */ if (Persistent_BeforePersistenceWork()) return PersistentTidIsKnownResult_BeforePersistenceWork; if (storeSharedData->needToScanIntoSharedMemory) return PersistentTidIsKnownResult_ScanNotPerformedYet; if (PersistentStore_IsZeroTid(&storeSharedData->maxTid)) return PersistentTidIsKnownResult_MaxTidIsZero; if (ItemPointerCompare( persistentTid, &storeSharedData->maxTid) <= 0) // Less-than or equal. return PersistentTidIsKnownResult_Known; else return PersistentTidIsKnownResult_NotKnown; }
static void PersistentStore_InitScanAddFreeEntry( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer persistentTid, ItemPointer previousFreeTid, int64 freeOrderNum) { PersistentFreeEntryKey key; PersistentFreeEntry *entry; bool found; if (PersistentStore_IsZeroTid(persistentTid)) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); elog(ERROR, "Expected persistent TID to not be (0,0)"); } if (PersistentStore_IsZeroTid(previousFreeTid)) { elog(ERROR, "Expected previous free TID to not be (0,0)"); } if (freeEntryHashTable == NULL) PersistentStore_FreeEntryHashTableInit(); MemSet(&key, 0, sizeof(key)); key.persistentTid = *persistentTid; entry = (PersistentFreeEntry*) hash_search(freeEntryHashTable, (void *) &key, HASH_ENTER, &found); if (found) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); elog(ERROR, "Duplicate free persistent TID entry %s", ItemPointerToString(persistentTid)); } entry->previousFreeTid = *previousFreeTid; entry->freeOrderNum = freeOrderNum; }
PersistentFileSysObjStateChangeResult PersistentRelation_MarkAbortingCreate( PersistentFileSysObjName *fsObjName, ItemPointer persistentTid, int64 persistentSerialNum, bool retryPossible) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; RelFileNode *relFileNode = &fsObjName->variant.rel.relFileNode; RelationDirEntry relationDirEntry; PersistentFileSysObjStateChangeResult stateChangeResult; if (RelFileNode_IsEmpty(relFileNode)) { elog(ERROR, "Invalid RelFileNode (0,0,0)"); } if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) { elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); /* * The initdb process will load the persistent table once we out of bootstrap mode. */ return PersistentFileSysObjStateChangeResult_None; } } PersistentRelation_VerifyInitScan(); /* * Do this check after skipping out if in bootstrap mode. */ if (PersistentStore_IsZeroTid(persistentTid)) { elog(ERROR, "TID for persistent '%s' tuple for mark DROP pending is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(fsObjName)); } if (persistentSerialNum == 0) { elog(ERROR, "Persistent '%s' serial number for mark DROP pending is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(fsObjName)); } WRITE_PERSISTENT_STATE_ORDERED_LOCK; relationDirEntry = PersistentRelation_FindEntryUnderLock(relFileNode); if (relationDirEntry == NULL) { elog(ERROR, "Did not find persistent relation entry %u/%u/%u", relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode); } if (relationDirEntry->state != PersistentFileSysState_CreatePending) { elog(ERROR, "Persistent relation entry %u/%u/%u expected to be in 'Create Pending' (actual state '%s')", relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, PersistentFileSysObjState_Name(relationDirEntry->state)); } stateChangeResult = PersistentFileSysObj_StateChange( fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_AbortingCreate, retryPossible, /* flushToXLog */ false, /* oldState */ NULL, /* verifiedActionCallback */ NULL); relationDirEntry->state = PersistentFileSysState_AbortingCreate; WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) { elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' changed state from 'Create Pending' to 'Aborting Create', serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(fsObjName), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); } return stateChangeResult; }
static void PersistentEndXactRec_VerifyFileSysActionInfos( EndXactRecKind endXactRecKind, PersistentEndXactFileSysActionInfo *fileSysActionInfos, int count) { int i; ItemPointerData maxTid; if (InRecovery || Persistent_BeforePersistenceWork()) return; for (i = 0; i < count; i++) { PersistentTidIsKnownResult persistentTidIsKnownResult; if (!PersistentEndXactFileSysAction_IsValid(fileSysActionInfos[i].action)) elog(ERROR, "Persistent file-system action is invalid (%d) (index %d, transaction kind '%s')", fileSysActionInfos[i].action, i, EndXactRecKind_Name(endXactRecKind)); if (!PersistentFsObjType_IsValid(fileSysActionInfos[i].fsObjName.type)) elog(ERROR, "Persistent file-system object type is invalid (%d) (index %d, transaction kind '%s')", fileSysActionInfos[i].fsObjName.type, i, EndXactRecKind_Name(endXactRecKind)); if (PersistentStore_IsZeroTid(&fileSysActionInfos[i].persistentTid)) elog(ERROR, "TID for persistent '%s' tuple is invalid (0,0) (index %d, transaction kind '%s')", PersistentFileSysObjName_TypeAndObjectName(&fileSysActionInfos[i].fsObjName), i, EndXactRecKind_Name(endXactRecKind)); persistentTidIsKnownResult = PersistentFileSysObj_TidIsKnown( fileSysActionInfos[i].fsObjName.type, &fileSysActionInfos[i].persistentTid, &maxTid); switch (persistentTidIsKnownResult) { case PersistentTidIsKnownResult_BeforePersistenceWork: elog(ERROR, "Shouldn't being trying to verify persistent TID before persistence work"); break; case PersistentTidIsKnownResult_ScanNotPerformedYet: // UNDONE: For now, just debug log this. if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Can't verify persistent TID if we haven't done the persistent scan yet"); break; case PersistentTidIsKnownResult_MaxTidIsZero: // UNDONE: For now, just debug log this. if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "TID for persistent '%s' tuple TID %s and the last known TID zero (0,0) (index %d, transaction kind '%s')", PersistentFileSysObjName_TypeAndObjectName(&fileSysActionInfos[i].fsObjName), ItemPointerToString(&fileSysActionInfos[i].persistentTid), i, EndXactRecKind_Name(endXactRecKind)); break; case PersistentTidIsKnownResult_NotKnown: // UNDONE: For now, just debug log this. if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "TID for persistent '%s' tuple TID %s is beyond the last known TID %s (index %d, transaction kind '%s')", PersistentFileSysObjName_TypeAndObjectName(&fileSysActionInfos[i].fsObjName), ItemPointerToString(&fileSysActionInfos[i].persistentTid), ItemPointerToString2(&maxTid), i, EndXactRecKind_Name(endXactRecKind)); break; case PersistentTidIsKnownResult_Known: /* OK */ break; default: elog(ERROR, "Unexpected persistent file-system TID is known result: %d", persistentTidIsKnownResult); } if (fileSysActionInfos[i].persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number is invalid (0) (index %d, transaction kind '%s')", PersistentFileSysObjName_TypeAndObjectName(&fileSysActionInfos[i].fsObjName), i, EndXactRecKind_Name(endXactRecKind)); if (fileSysActionInfos[i].fsObjName.type == PersistentFsObjType_RelationFile && !PersistentFileSysRelStorageMgr_IsValid(fileSysActionInfos[i].relStorageMgr)) elog(ERROR, "Persistent '%s' relation storage manager has invalid value (%d) (index %d, transaction kind '%s')", PersistentFileSysObjName_TypeAndObjectName(&fileSysActionInfos[i].fsObjName), fileSysActionInfos[i].relStorageMgr, i, EndXactRecKind_Name(endXactRecKind)); } }
bool ChangeTracking_PrintRelationChangeInfo( RmgrId xl_rmid, uint8 xl_info, void *data, XLogRecPtr *loc, bool weAreGeneratingXLogNow, bool printSkipIssuesOnly) { bool atLeastOneSkipIssue = false; int relationChangeInfoArrayCount; int i; int arrlen = ChangeTracking_GetInfoArrayDesiredMaxLength(xl_rmid, xl_info); RelationChangeInfo relationChangeInfoArray[arrlen]; ChangeTracking_GetRelationChangeInfoFromXlog( xl_rmid, xl_info, data, relationChangeInfoArray, &relationChangeInfoArrayCount, arrlen); for (i = 0; i < relationChangeInfoArrayCount; i++) { RelationChangeInfo *relationChangeInfo; int64 maxPersistentSerialNum; bool skip; bool zeroTid = false; bool invalidTid = false; bool zeroSerialNum = false; bool invalidSerialNum = false; bool skipIssue = false; relationChangeInfo = &relationChangeInfoArray[i]; if (weAreGeneratingXLogNow) maxPersistentSerialNum = PersistentRelfile_MyHighestSerialNum(); else maxPersistentSerialNum = PersistentRelfile_CurrentMaxSerialNum(); skip = GpPersistent_SkipXLogInfo(relationChangeInfo->relFileNode.relNode); if (!skip) { zeroTid = PersistentStore_IsZeroTid(&relationChangeInfo->persistentTid); if (!zeroTid) invalidTid = !ItemPointerIsValid(&relationChangeInfo->persistentTid); zeroSerialNum = (relationChangeInfo->persistentSerialNum == 0); if (!zeroSerialNum) { invalidSerialNum = (relationChangeInfo->persistentSerialNum < 0); /* * If we have'nt done the scan yet... do not do upper range check. */ if (maxPersistentSerialNum != 0 && relationChangeInfo->persistentSerialNum > maxPersistentSerialNum) invalidSerialNum = true; } skipIssue = (zeroTid || invalidTid || zeroSerialNum || invalidSerialNum); } if (!printSkipIssuesOnly || skipIssue) elog(LOG, "ChangeTracking_PrintRelationChangeInfo: [%d] xl_rmid %d, xl_info 0x%X, %u/%u/%u, block number %u, LSN %s, persistent serial num " INT64_FORMAT ", TID %s, maxPersistentSerialNum " INT64_FORMAT ", skip %s, zeroTid %s, invalidTid %s, zeroSerialNum %s, invalidSerialNum %s, skipIssue %s", i, xl_rmid, xl_info, relationChangeInfo->relFileNode.spcNode, relationChangeInfo->relFileNode.dbNode, relationChangeInfo->relFileNode.relNode, relationChangeInfo->blockNumber, XLogLocationToString(loc), relationChangeInfo->persistentSerialNum, ItemPointerToString(&relationChangeInfo->persistentTid), maxPersistentSerialNum, (skip ? "true" : "false"), (zeroTid ? "true" : "false"), (invalidTid ? "true" : "false"), (zeroSerialNum ? "true" : "false"), (invalidSerialNum ? "true" : "false"), (skipIssue ? "true" : "false")); if (skipIssue) atLeastOneSkipIssue = true; } return atLeastOneSkipIssue; }
static void PersistentStore_DoInitScan( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { PersistentStoreScan storeScan; ItemPointerData persistentTid; int64 persistentSerialNum; ItemPointerData previousFreeTid; Datum *values; int64 globalSequenceNum; values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); MemSet(&storeSharedData->maxTid, 0, sizeof(ItemPointerData)); PersistentStore_BeginScan( storeData, storeSharedData, &storeScan); while (PersistentStore_GetNext( &storeScan, values, &persistentTid, &persistentSerialNum)) { /* * We are scanning from low to high TID. */ Assert( PersistentStore_IsZeroTid(&storeSharedData->maxTid) || ItemPointerCompare( &storeSharedData->maxTid, &persistentTid) == -1); // Less-Than. storeSharedData->maxTid = persistentTid; PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, &previousFreeTid); if (Debug_persistent_recovery_print) (*storeData->printTupleCallback)( PersistentRecovery_DebugPrintLevel(), "SCAN", &persistentTid, values); if (!PersistentStore_IsZeroTid(&previousFreeTid)) { /* * Non-zero previousFreeTid implies a free entry. */ if (storeSharedData->maxFreeOrderNum < persistentSerialNum) { storeSharedData->maxFreeOrderNum = persistentSerialNum; storeSharedData->freeTid = persistentTid; } if (!gp_persistent_skip_free_list) { PersistentStore_InitScanAddFreeEntry( &persistentTid, &previousFreeTid, /* freeOrderNum */ persistentSerialNum); } } else { storeSharedData->inUseCount++; if (storeSharedData->maxInUseSerialNum < persistentSerialNum) { storeSharedData->maxInUseSerialNum = persistentSerialNum; storeData->myHighestSerialNum = storeSharedData->maxInUseSerialNum; } } if (storeData->scanTupleCallback != NULL) (*storeData->scanTupleCallback)( &persistentTid, persistentSerialNum, values); } PersistentStore_EndScan(&storeScan); pfree(values); globalSequenceNum = GlobalSequence_Current(storeData->gpGlobalSequence); /* * Note: Originally the below IF STMT was guarded with a InRecovery flag check. * However, this routine should not be called during recovery since the entries are * not consistent... */ Assert(!InRecovery); if (globalSequenceNum < storeSharedData->maxInUseSerialNum) { /* * We seem to have a corruption problem. * * Use the gp_persistent_repair_global_sequence GUC to get the system up. */ if (gp_persistent_repair_global_sequence) { elog(LOG, "Need to Repair global sequence number " INT64_FORMAT " so use scanned maximum value " INT64_FORMAT " ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } else { elog(ERROR, "Global sequence number " INT64_FORMAT " less than maximum value " INT64_FORMAT " found in scan ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } } else { storeSharedData->maxCachedSerialNum = globalSequenceNum; } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_DoInitScan ('%s'): maximum in-use serial number " INT64_FORMAT ", maximum free order number " INT64_FORMAT ", free TID %s, maximum known TID %s", storeData->tableName, storeSharedData->maxInUseSerialNum, storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(&storeSharedData->maxTid)); if (!gp_persistent_skip_free_list) { PersistentStore_InitScanVerifyFreeEntries( storeData, storeSharedData); } else { if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_DoInitScan ('%s'): Skipping verification because gp_persistent_skip_free_list GUC is ON", storeData->tableName); } }
static void PersistentStore_InitScanVerifyFreeEntries( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { int64 freeOrderNum; ItemPointerData freeTid; PersistentFreeEntry *entry; HASH_SEQ_STATUS iterateStatus; freeOrderNum = storeSharedData->maxFreeOrderNum; freeTid = storeSharedData->freeTid; if (freeOrderNum == 0) { if (!PersistentStore_IsZeroTid(&freeTid)) elog(ERROR, "Expected free TID to be (0,0) when free order number is 0 in '%s'", storeData->tableName); } else { PersistentFreeEntryKey key; PersistentFreeEntry *removeEntry; if (freeEntryHashTable == NULL) elog(ERROR, "Looking for free order number " INT64_FORMAT " and the free entry hash table is empty for '%s'", freeOrderNum, storeData->tableName); while (true) { MemSet(&key, 0, sizeof(key)); key.persistentTid = freeTid; entry = (PersistentFreeEntry*) hash_search(freeEntryHashTable, (void *) &key, HASH_FIND, NULL); if (entry == NULL) elog(ERROR, "Did not find free entry for free TID %s (free order number " INT64_FORMAT ") for '%s'", ItemPointerToString(&freeTid), freeOrderNum, storeData->tableName); if (PersistentStore_IsZeroTid(&entry->previousFreeTid)) elog(ERROR, "Previous free TID not expected to be (0,0) -- persistent Free Entry hashtable corrupted for '%s' " "(expected free order number " INT64_FORMAT ", entry free order number " INT64_FORMAT ")", storeData->tableName, freeOrderNum, entry->freeOrderNum); if (freeOrderNum != entry->freeOrderNum) elog(ERROR, "Free entry for free TID %s has wrong free order number (expected free order number " INT64_FORMAT ", found free order number " INT64_FORMAT ") for '%s'", ItemPointerToString(&freeTid), freeOrderNum, entry->freeOrderNum, storeData->tableName); if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_InitScanVerifyFreeEntries ('%s'): Free order number " INT64_FORMAT ", free TID %s, previous free TID %s", storeData->tableName, freeOrderNum, ItemPointerToString(&freeTid), ItemPointerToString2(&entry->previousFreeTid)); freeTid = entry->previousFreeTid; Insist(!PersistentStore_IsZeroTid(&freeTid)); // Note the error check above. if (freeOrderNum == 1) { /* * The last free entry uses its own TID in previous_free_tid. */ if (ItemPointerCompare( &entry->key.persistentTid, &freeTid) != 0) { elog(ERROR, "Expected previous_free_tid %s to match the persistent TID %s for the last free entry (free order number 1) for '%s'", ItemPointerToString(&freeTid), ItemPointerToString2(&entry->key.persistentTid), storeData->tableName); } } removeEntry = hash_search( freeEntryHashTable, (void *) &entry->key, HASH_REMOVE, NULL); if (removeEntry == NULL) elog(ERROR, "Persistent Free Entry hashtable corrupted for '%s'", storeData->tableName); entry = NULL; freeOrderNum--; if (freeOrderNum == 0) break; } } if (freeEntryHashTable != NULL) { hash_seq_init( &iterateStatus, freeEntryHashTable); /* * Verify the hash table has no free entries left. */ while ((entry = hash_seq_search(&iterateStatus)) != NULL) { elog(ERROR, "Found at least one unaccounted for free entry for '%s'. Example: free order number " INT64_FORMAT ", free TID %s, previous free TID %s", storeData->tableName, entry->freeOrderNum, ItemPointerToString(&entry->key.persistentTid), ItemPointerToString2(&entry->previousFreeTid)); } hash_destroy(freeEntryHashTable); freeEntryHashTable = NULL; } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_InitScanVerifyFreeEntries ('%s'): Successfully verified " INT64_FORMAT " free entries", storeData->tableName, storeSharedData->maxFreeOrderNum); }
static bool PersistentStore_GetFreeTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer freeTid) { Datum *values; HeapTuple tupleCopy; int64 persistentSerialNum; ItemPointerData previousFreeTid; MemSet(freeTid, 0, sizeof(ItemPointerData)); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Enter: maximum free order number " INT64_FORMAT ", free TID %s ('%s')", storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); if (storeSharedData->maxFreeOrderNum == 0) { return false; // No free tuples. } if (gp_persistent_skip_free_list) { if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Skipping because gp_persistent_skip_free_list GUC is ON ('%s')", storeData->tableName); return false; // Pretend no free tuples. } Assert(storeSharedData->freeTid.ip_posid != 0); /* * Read the current last free tuple. */ values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_ReadTuple( storeData, storeSharedData, &storeSharedData->freeTid, values, &tupleCopy); PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, &previousFreeTid); if (PersistentStore_IsZeroTid(&previousFreeTid)) elog(ERROR, "Expected persistent store tuple at %s to be free ('%s')", ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); if (storeSharedData->maxFreeOrderNum == 1) Assert(ItemPointerCompare(&previousFreeTid, &storeSharedData->freeTid) == 0); if (persistentSerialNum != storeSharedData->maxFreeOrderNum) elog(ERROR, "Expected persistent store tuple at %s to have order number " INT64_FORMAT " (found " INT64_FORMAT ", '%s')", ItemPointerToString(&storeSharedData->freeTid), storeSharedData->maxFreeOrderNum, persistentSerialNum, storeData->tableName); *freeTid = storeSharedData->freeTid; storeSharedData->maxFreeOrderNum--; storeSharedData->freeTid = previousFreeTid; pfree(values); heap_freetuple(tupleCopy); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Exit: maximum free order number " INT64_FORMAT ", free TID %s ('%s')", storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); return true; }
void PersistentStore_ReadTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer readTid, Datum *values, HeapTuple *tupleCopy) { Relation persistentRel; HeapTupleData tuple; Buffer buffer; bool *nulls; #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Going to read tuple at TID %s ('%s', shared data %p)", ItemPointerToString(readTid), storeData->tableName, storeSharedData); if (PersistentStore_IsZeroTid(readTid)) elog(ERROR, "TID for fetch persistent tuple is invalid (0,0) ('%s')", storeData->tableName); // UNDONE: I think the InRecovery test only applies to physical Master Mirroring on Standby. /* Only test this outside of recovery scenarios */ if (!InRecovery && (PersistentStore_IsZeroTid(&storeSharedData->maxTid) || ItemPointerCompare( readTid, &storeSharedData->maxTid) == 1 // Greater-than. )) { elog(ERROR, "TID %s for fetch persistent tuple is greater than the last known TID %s ('%s')", ItemPointerToString(readTid), ItemPointerToString2(&storeSharedData->maxTid), storeData->tableName); } persistentRel = (*storeData->openRel)(); tuple.t_self = *readTid; if (!heap_fetch(persistentRel, SnapshotAny, &tuple, &buffer, false, NULL)) { elog(ERROR, "Failed to fetch persistent tuple at %s (maximum known TID %s, '%s')", ItemPointerToString(&tuple.t_self), ItemPointerToString2(&storeSharedData->maxTid), storeData->tableName); } *tupleCopy = heaptuple_copy_to(&tuple, NULL, NULL); ReleaseBuffer(buffer); /* * In order to keep the tuples the exact same size to enable direct reuse of * free tuples, we do not use NULLs. */ nulls = (bool*)palloc(storeData->numAttributes * sizeof(bool)); heap_deform_tuple(*tupleCopy, persistentRel->rd_att, values, nulls); (*storeData->closeRel)(persistentRel); if (Debug_persistent_store_print) { elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Successfully read tuple at TID %s ('%s')", ItemPointerToString(readTid), storeData->tableName); (*storeData->printTupleCallback)( PersistentStore_DebugPrintLevel(), "STORE READ TUPLE", readTid, values); } pfree(nulls); }
/* * Indicate we are aborting the create of a relation file. * * This state will make sure the relation gets dropped after a system crash. */ PersistentFileSysObjStateChangeResult PersistentRelation_MarkAbortingCreate( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the aborting create. */ int32 segmentFileNum, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum, /* Serial number for the relation. Distinquishes the uses of the tuple. */ bool retryPossible) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; PersistentFileSysObjStateChangeResult stateChangeResult; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return false; // The initdb process will load the persistent table once we out of bootstrap mode. } /* MPP-16543: When inserting tuples into AO table, row numbers will be * generated from gp_fastsequence catalog table, as part of the design, * these sequence numbers are not reusable, even if the AO insert * transaction is aborted. The entry in gp_fastsequence was inserted * using frozen_heap_insert, which means it's always visible. * Aborted AO insert transaction will cause inconsistency between * gp_fastsequence and pg_class, the solution is to introduce "frozen * delete" - inplace update tuple's MVCC header to make it invisible. */ Relation gp_fastsequence_rel = heap_open(FastSequenceRelationId, RowExclusiveLock); HeapTuple tup; SysScanDesc scan; ScanKeyData skey; ScanKeyInit(&skey, Anum_gp_fastsequence_objid, BTEqualStrategyNumber, F_OIDEQ, relFileNode->relNode); scan = systable_beginscan(gp_fastsequence_rel, InvalidOid, false, SnapshotNow, 1, &skey); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_gp_fastsequence found = (Form_gp_fastsequence) GETSTRUCT(tup); if (found->objid == relFileNode->relNode) { if (Debug_persistent_print) { elog(LOG, "frozen deleting gp_fastsequence entry for aborted AO insert transaction on relation %s", relpath(*relFileNode)); } frozen_heap_inplace_delete(gp_fastsequence_rel, tup); } } systable_endscan(scan); heap_close(gp_fastsequence_rel, RowExclusiveLock); PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark DROP pending is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark DROP pending is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); WRITE_PERSISTENT_STATE_ORDERED_LOCK; stateChangeResult = PersistentFileSysObj_StateChange( &fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_AbortingCreate, retryPossible, /* flushToXlog */ false, /* oldState */ NULL, /* verifiedActionCallback */ NULL); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' changed state from 'Create Pending' to 'Aborting Create', serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(&fsObjName), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); return stateChangeResult; }
/* * Indicate we intend to drop a relation file as part of the current transaction. * * This relation file to drop will be listed inside a commit, distributed commit, a distributed * prepared, and distributed commit prepared XOG records. * * For any of the commit type records, once that XLOG record is flushed then the actual * file-system delete will occur. The flush guarantees the action will be retried after system * crash. */ PersistentFileSysObjStateChangeResult PersistentRelation_MarkDropPending( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the drop. */ int32 segmentFileNum, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum, /* Serial number for the relation. Distinquishes the uses of the tuple. */ bool retryPossible) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; PersistentFileSysState oldState; PersistentFileSysObjStateChangeResult stateChangeResult; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return false; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark DROP pending is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark DROP pending is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); WRITE_PERSISTENT_STATE_ORDERED_LOCK; stateChangeResult = PersistentFileSysObj_StateChange( &fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_DropPending, retryPossible, /* flushToXlog */ false, &oldState, /* verifiedActionCallback */ NULL); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' changed state from '%s' to 'Drop Pending', serial number " INT64_FORMAT " TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(&fsObjName), PersistentFileSysObjState_Name(oldState), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); return stateChangeResult; }
void PersistentRelation_MarkBufPoolRelationForScanIncrementalResync( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the created relation. */ ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum) /* Serial number for the relation. Distinquishes the uses of the tuple. */ { PersistentFileSysObjName fsObjName; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (GpPersistent_SkipXLogInfo(relFileNode->relNode)) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because it is special", relpath(*relFileNode)); return; // Resynchronize will always handle these relations as 'Scan Incremental'.. } if (IsBootstrapProcessingMode()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are in bootstrap mode", relpath(*relFileNode)); return; // The initdb process will load the persistent table once we out of bootstrap mode. } if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, /* segmentFileNum */ 0); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark physically truncated is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark physcially truncated is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); PersistentFileSysObj_MarkBufPoolRelationForScanIncrementalResync( &fsObjName, persistentTid, persistentSerialNum, /* flushToXlog */ true); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' marked physically truncated, serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), persistentSerialNum, ItemPointerToString(persistentTid)); }
void PersistentStore_ReadTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer readTid, Datum *values, HeapTuple *tupleCopy) { Relation persistentRel; HeapTupleData tuple; Buffer buffer; bool *nulls; #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Going to read tuple at TID %s ('%s', shared data %p)", ItemPointerToString(readTid), storeData->tableName, storeSharedData); if (PersistentStore_IsZeroTid(readTid)) elog(ERROR, "TID for fetch persistent tuple is invalid (0,0) ('%s')", storeData->tableName); persistentRel = (*storeData->openRel)(); tuple.t_self = *readTid; if (heap_fetch(persistentRel, SnapshotAny, &tuple, &buffer, false, NULL)) { *tupleCopy = heaptuple_copy_to(&tuple, NULL, NULL); ReleaseBuffer(buffer); /* * In order to keep the tuples the exact same size to enable direct reuse of * free tuples, we do not use NULLs. */ nulls = (bool*)palloc(storeData->numAttributes * sizeof(bool)); heap_deform_tuple(*tupleCopy, persistentRel->rd_att, values, nulls); (*storeData->closeRel)(persistentRel); if (Debug_persistent_store_print) { elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Successfully read tuple at TID %s ('%s')", ItemPointerToString(readTid), storeData->tableName); (*storeData->printTupleCallback)( PersistentStore_DebugPrintLevel(), "STORE READ TUPLE", readTid, values); } pfree(nulls); } else { *tupleCopy = NULL; } }
/* * Rebuild free TID list based on freeEntryHashTable. Returns number * of free tuples in the rebuilt free list. */ uint64 PersistentStore_RebuildFreeList( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { Datum *values; PersistentStoreScan storeScan; ItemPointerData persistentTid; ItemPointerData previousFreeTid; ItemPointerData previousTid; uint64 persistentSerialNum; uint64 freeOrderNum; values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); /* * PT shared data must be already initialized, even when we are * called during recovery. */ Assert(!PersistentStore_IsZeroTid(&storeSharedData->maxTid)); if (storeSharedData->maxFreeOrderNum < 1) { elog(LOG, "no free tuples in %s, not building any free list", storeData->tableName); return 0; } elog(LOG, "rebuilding free list in %s with " INT64_FORMAT " free tuples", storeData->tableName, storeSharedData->maxFreeOrderNum); /* * Scan PT for free entries (in TID order) and establish links * with previous free entry as we go on. */ previousTid.ip_posid = 0; freeOrderNum = 0; PersistentStore_BeginScan(storeData, storeSharedData, &storeScan); while (PersistentStore_GetNext( &storeScan, values, &persistentTid, (int64 *)&persistentSerialNum)) { /* * We are scanning from low to high TID. All TIDs we * encounter should be smaller or equal to the known * maxTid. */ Assert(ItemPointerCompare( &storeSharedData->maxTid, &persistentTid) >= 0); PersistentStore_ExtractOurTupleData( storeData, values, (int64 *)&persistentSerialNum, &previousFreeTid); if (!PersistentStore_IsZeroTid(&previousFreeTid)) { values[storeData->attNumPersistentSerialNum - 1] = Int64GetDatum(++freeOrderNum); values[storeData->attNumPreviousFreeTid - 1] = ItemPointerIsValid(&previousTid) ? PointerGetDatum(&previousTid) : PointerGetDatum(&persistentTid); #ifdef FAULT_INJECTOR /* * Inject fault after free list is partially built - a few * tuples are updated but at least one is yet to be * updated. */ if (freeOrderNum > 3) { FaultInjector_InjectFaultIfSet( RebuildPTDB, DDLNotSpecified, "", // databaseName ""); // tableName } #endif PersistentStore_UpdateTuple( storeData, storeSharedData, &persistentTid, values, true); ItemPointerCopy(&persistentTid, &previousTid); } } PersistentStore_EndScan(&storeScan); pfree(values); if (ItemPointerIsValid(&previousTid)) { Assert(freeOrderNum > 0); ItemPointerCopy(&previousTid, &storeSharedData->freeTid); storeSharedData->maxFreeOrderNum = freeOrderNum; elog(LOG, "rebuilt free list in %s: maxFreeOrderNum = " INT64_FORMAT " freeTid = %s", storeData->tableName, freeOrderNum, ItemPointerToString(&persistentTid)); } return freeOrderNum; }
/* * Try to walk the free TID chain using free TIDs recorded in * freeEntryHashTable. Return true if the chain is valid, false * otherwise. * * Note: we have already validated that number of free entries is * equal to max free order number. */ static bool PersistentStore_InitScanVerifyFreeEntries( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { int64 freeOrderNum; ItemPointerData freeTid; PersistentFreeEntryKey key; PersistentFreeEntry *entry; freeOrderNum = storeSharedData->maxFreeOrderNum; freeTid = storeSharedData->freeTid; elog(LOG, "beginning free list validation for %s, maxFreeOrderNum = " INT64_FORMAT, storeData->tableName, freeOrderNum); if (freeOrderNum == 0) { if (!PersistentStore_IsZeroTid(&freeTid)) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail("expected free TID to be (0,0) when " "free order number is 0 in '%s'", storeData->tableName))); return false; } } else { if (freeEntryHashTable == NULL) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail("looking for free order number " INT64_FORMAT " and the free entry hash table is empty for '%s'", freeOrderNum, storeData->tableName))); return false; } while (freeOrderNum > 0) { MemSet(&key, 0, sizeof(key)); key.persistentTid = freeTid; entry = (PersistentFreeEntry*) hash_search(freeEntryHashTable, (void *) &key, HASH_FIND, NULL); if (entry == NULL) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail( "did not find free entry for free TID %s (" "free order number " INT64_FORMAT ") for '%s'", ItemPointerToString(&freeTid), freeOrderNum, storeData->tableName))); return false; } if (PersistentStore_IsZeroTid(&entry->previousFreeTid)) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail( "previous free TID not expected to be (0,0) -- " "persistent Free Entry hashtable corrupted for " "'%s' (expected free order number " INT64_FORMAT ", entry free order number " INT64_FORMAT ")", storeData->tableName, freeOrderNum, entry->freeOrderNum))); return false; } if (freeOrderNum != entry->freeOrderNum) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail( "free entry for free TID %s has wrong free " "order number (expected free order number " INT64_FORMAT ", found free order number " INT64_FORMAT ") for '%s'", ItemPointerToString(&freeTid), freeOrderNum, entry->freeOrderNum, storeData->tableName))); return false; } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_InitScanVerifyFreeEntries ('%s'): Free order" " number " INT64_FORMAT ", free TID %s, previous free TID %s", storeData->tableName, freeOrderNum, ItemPointerToString(&freeTid), ItemPointerToString2(&entry->previousFreeTid)); freeTid = entry->previousFreeTid; Insist(!PersistentStore_IsZeroTid(&freeTid)); // Note the error check above. if (freeOrderNum == 1) { /* * The last free entry uses its own TID in previous_free_tid. */ if (ItemPointerCompare( &entry->key.persistentTid, &freeTid) != 0) { PersistentStore_DiagnoseDumpTable(storeData, storeSharedData); ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("free list validation failed"), errdetail( "expected previous_free_tid %s to match the" " persistent TID %s for the last free entry" " (free order number 1) for '%s'", ItemPointerToString(&freeTid), ItemPointerToString2(&entry->key.persistentTid), storeData->tableName))); return false; } } entry = NULL; freeOrderNum--; } } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_InitScanVerifyFreeEntries ('%s'): successfully " "verified " INT64_FORMAT " free entries", storeData->tableName, storeSharedData->maxFreeOrderNum); return true; }
/* * Check if the free TID is valid. If not, the free list is corrupted and we * pretend there are no free tuples to reset the free list. The corrupted free * list will be detached and cleaned during recovery or pt rebuild. */ static bool PersistentStore_ValidateFreeTID( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer previousFreeTid) { Datum *values; HeapTuple tupleCopy; int64 persistentSerialNum; bool tidIsValid = true; if (storeSharedData->maxFreeOrderNum <= 0) return true; /* No tuple to check */ values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_ReadTuple( storeData, storeSharedData, &storeSharedData->freeTid, values, &tupleCopy); PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, previousFreeTid); if (storeSharedData->maxFreeOrderNum == 1 && ItemPointerCompare(previousFreeTid, &storeSharedData->freeTid) != 0) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected to have previous FreeTID at %s equal to itself (found %s, %s)", ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(previousFreeTid), storeData->tableName))); } if (PersistentStore_IsZeroTid(previousFreeTid)) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected to have previous FreeTID at %s to be free (found %s, %s)", ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(previousFreeTid), storeData->tableName))); } if (persistentSerialNum != storeSharedData->maxFreeOrderNum) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected persistent store tuple at %s to have order number " INT64_FORMAT " (found " INT64_FORMAT ", '%s')", ItemPointerToString(&storeSharedData->freeTid), storeSharedData->maxFreeOrderNum, persistentSerialNum, storeData->tableName))); } pfree(values); heap_freetuple(tupleCopy); /* If the free TID is not valid, switch to a new free list here */ if (!tidIsValid) { ItemPointerSet(previousFreeTid, 0, 0); storeSharedData->maxFreeOrderNum = 0; MemSet(&storeSharedData->freeTid, 0, sizeof(ItemPointerData)); ereport(WARNING, (errmsg("switched to new free TID list"))); } return tidIsValid; }