bool PersistentStore_GetNext( PersistentStoreScan *storeScan, Datum *values, ItemPointer persistentTid, int64 *persistentSerialNum) { ItemPointerData previousFreeTid; storeScan->tuple = heap_getnext(storeScan->scan, ForwardScanDirection); if (storeScan->tuple == NULL) return false; PersistentStore_DeformTuple( storeScan->storeData, storeScan->persistentRel->rd_att, storeScan->tuple, values); PersistentStore_ExtractOurTupleData( storeScan->storeData, values, persistentSerialNum, &previousFreeTid); *persistentTid = storeScan->tuple->t_self; return true; }
static void PersistentStore_DoInitScan( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { PersistentStoreScan storeScan; ItemPointerData persistentTid; int64 persistentSerialNum; ItemPointerData previousFreeTid; Datum *values; int64 globalSequenceNum; values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); MemSet(&storeSharedData->maxTid, 0, sizeof(ItemPointerData)); PersistentStore_BeginScan( storeData, storeSharedData, &storeScan); while (PersistentStore_GetNext( &storeScan, values, &persistentTid, &persistentSerialNum)) { /* * We are scanning from low to high TID. */ Assert( PersistentStore_IsZeroTid(&storeSharedData->maxTid) || ItemPointerCompare( &storeSharedData->maxTid, &persistentTid) == -1); // Less-Than. storeSharedData->maxTid = persistentTid; PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, &previousFreeTid); if (Debug_persistent_recovery_print) (*storeData->printTupleCallback)( PersistentRecovery_DebugPrintLevel(), "SCAN", &persistentTid, values); if (!PersistentStore_IsZeroTid(&previousFreeTid)) { /* * Non-zero previousFreeTid implies a free entry. */ if (storeSharedData->maxFreeOrderNum < persistentSerialNum) { storeSharedData->maxFreeOrderNum = persistentSerialNum; storeSharedData->freeTid = persistentTid; } if (!gp_persistent_skip_free_list) { PersistentStore_InitScanAddFreeEntry( &persistentTid, &previousFreeTid, /* freeOrderNum */ persistentSerialNum); } } else { storeSharedData->inUseCount++; if (storeSharedData->maxInUseSerialNum < persistentSerialNum) { storeSharedData->maxInUseSerialNum = persistentSerialNum; storeData->myHighestSerialNum = storeSharedData->maxInUseSerialNum; } } if (storeData->scanTupleCallback != NULL) (*storeData->scanTupleCallback)( &persistentTid, persistentSerialNum, values); } PersistentStore_EndScan(&storeScan); pfree(values); globalSequenceNum = GlobalSequence_Current(storeData->gpGlobalSequence); /* * Note: Originally the below IF STMT was guarded with a InRecovery flag check. * However, this routine should not be called during recovery since the entries are * not consistent... */ Assert(!InRecovery); if (globalSequenceNum < storeSharedData->maxInUseSerialNum) { /* * We seem to have a corruption problem. * * Use the gp_persistent_repair_global_sequence GUC to get the system up. */ if (gp_persistent_repair_global_sequence) { elog(LOG, "Need to Repair global sequence number " INT64_FORMAT " so use scanned maximum value " INT64_FORMAT " ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } else { elog(ERROR, "Global sequence number " INT64_FORMAT " less than maximum value " INT64_FORMAT " found in scan ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } } else { storeSharedData->maxCachedSerialNum = globalSequenceNum; } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_DoInitScan ('%s'): maximum in-use serial number " INT64_FORMAT ", maximum free order number " INT64_FORMAT ", free TID %s, maximum known TID %s", storeData->tableName, storeSharedData->maxInUseSerialNum, storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(&storeSharedData->maxTid)); if (!gp_persistent_skip_free_list) { PersistentStore_InitScanVerifyFreeEntries( storeData, storeSharedData); } else { if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_DoInitScan ('%s'): Skipping verification because gp_persistent_skip_free_list GUC is ON", storeData->tableName); } }
static bool PersistentStore_GetFreeTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer freeTid) { Datum *values; HeapTuple tupleCopy; int64 persistentSerialNum; ItemPointerData previousFreeTid; MemSet(freeTid, 0, sizeof(ItemPointerData)); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Enter: maximum free order number " INT64_FORMAT ", free TID %s ('%s')", storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); if (storeSharedData->maxFreeOrderNum == 0) { return false; // No free tuples. } if (gp_persistent_skip_free_list) { if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Skipping because gp_persistent_skip_free_list GUC is ON ('%s')", storeData->tableName); return false; // Pretend no free tuples. } Assert(storeSharedData->freeTid.ip_posid != 0); /* * Read the current last free tuple. */ values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_ReadTuple( storeData, storeSharedData, &storeSharedData->freeTid, values, &tupleCopy); PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, &previousFreeTid); if (PersistentStore_IsZeroTid(&previousFreeTid)) elog(ERROR, "Expected persistent store tuple at %s to be free ('%s')", ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); if (storeSharedData->maxFreeOrderNum == 1) Assert(ItemPointerCompare(&previousFreeTid, &storeSharedData->freeTid) == 0); if (persistentSerialNum != storeSharedData->maxFreeOrderNum) elog(ERROR, "Expected persistent store tuple at %s to have order number " INT64_FORMAT " (found " INT64_FORMAT ", '%s')", ItemPointerToString(&storeSharedData->freeTid), storeSharedData->maxFreeOrderNum, persistentSerialNum, storeData->tableName); *freeTid = storeSharedData->freeTid; storeSharedData->maxFreeOrderNum--; storeSharedData->freeTid = previousFreeTid; pfree(values); heap_freetuple(tupleCopy); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_GetFreeTuple: Exit: maximum free order number " INT64_FORMAT ", free TID %s ('%s')", storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), storeData->tableName); return true; }
static void PersistentStore_DoInitScan( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { PersistentStoreScan storeScan; ItemPointerData persistentTid; int64 persistentSerialNum; Datum *values; int64 globalSequenceNum; values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_BeginScan( storeData, storeSharedData, &storeScan); while (PersistentStore_GetNext( &storeScan, values, &persistentTid, &persistentSerialNum)) { /* * We are scanning from low to high TID. */ PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum); if (Debug_persistent_recovery_print) (*storeData->printTupleCallback)( PersistentRecovery_DebugPrintLevel(), "SCAN", &persistentTid, values); storeSharedData->inUseCount++; if (storeSharedData->maxInUseSerialNum < persistentSerialNum) { storeSharedData->maxInUseSerialNum = persistentSerialNum; storeData->myHighestSerialNum = storeSharedData->maxInUseSerialNum; } if (storeData->scanTupleCallback != NULL) (*storeData->scanTupleCallback)( &persistentTid, persistentSerialNum, values); } PersistentStore_EndScan(&storeScan); pfree(values); globalSequenceNum = GlobalSequence_Current(storeData->gpGlobalSequence); /* * Note: Originally the below IF STMT was guarded with a InRecovery flag check. * However, this routine should not be called during recovery since the entries are * not consistent... */ Assert(!InRecovery); if (globalSequenceNum < storeSharedData->maxInUseSerialNum) { /* * We seem to have a corruption problem. * * Use the gp_persistent_repair_global_sequence GUC to get the * system up. */ if (gp_persistent_repair_global_sequence) { elog(LOG, "need to repair global sequence number " INT64_FORMAT " so use scanned maximum value " INT64_FORMAT " ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } else { elog(ERROR, "global sequence number " INT64_FORMAT " less than " "maximum value " INT64_FORMAT " found in scan ('%s')", globalSequenceNum, storeSharedData->maxInUseSerialNum, storeData->tableName); } } else { storeSharedData->maxInUseSerialNum = globalSequenceNum; } if (Debug_persistent_recovery_print) elog(PersistentRecovery_DebugPrintLevel(), "PersistentStore_DoInitScan ('%s'): maximum in-use serial number " INT64_FORMAT , storeData->tableName, storeSharedData->maxInUseSerialNum); }
/* * Rebuild free TID list based on freeEntryHashTable. Returns number * of free tuples in the rebuilt free list. */ uint64 PersistentStore_RebuildFreeList( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { Datum *values; PersistentStoreScan storeScan; ItemPointerData persistentTid; ItemPointerData previousFreeTid; ItemPointerData previousTid; uint64 persistentSerialNum; uint64 freeOrderNum; values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); /* * PT shared data must be already initialized, even when we are * called during recovery. */ Assert(!PersistentStore_IsZeroTid(&storeSharedData->maxTid)); if (storeSharedData->maxFreeOrderNum < 1) { elog(LOG, "no free tuples in %s, not building any free list", storeData->tableName); return 0; } elog(LOG, "rebuilding free list in %s with " INT64_FORMAT " free tuples", storeData->tableName, storeSharedData->maxFreeOrderNum); /* * Scan PT for free entries (in TID order) and establish links * with previous free entry as we go on. */ previousTid.ip_posid = 0; freeOrderNum = 0; PersistentStore_BeginScan(storeData, storeSharedData, &storeScan); while (PersistentStore_GetNext( &storeScan, values, &persistentTid, (int64 *)&persistentSerialNum)) { /* * We are scanning from low to high TID. All TIDs we * encounter should be smaller or equal to the known * maxTid. */ Assert(ItemPointerCompare( &storeSharedData->maxTid, &persistentTid) >= 0); PersistentStore_ExtractOurTupleData( storeData, values, (int64 *)&persistentSerialNum, &previousFreeTid); if (!PersistentStore_IsZeroTid(&previousFreeTid)) { values[storeData->attNumPersistentSerialNum - 1] = Int64GetDatum(++freeOrderNum); values[storeData->attNumPreviousFreeTid - 1] = ItemPointerIsValid(&previousTid) ? PointerGetDatum(&previousTid) : PointerGetDatum(&persistentTid); #ifdef FAULT_INJECTOR /* * Inject fault after free list is partially built - a few * tuples are updated but at least one is yet to be * updated. */ if (freeOrderNum > 3) { FaultInjector_InjectFaultIfSet( RebuildPTDB, DDLNotSpecified, "", // databaseName ""); // tableName } #endif PersistentStore_UpdateTuple( storeData, storeSharedData, &persistentTid, values, true); ItemPointerCopy(&persistentTid, &previousTid); } } PersistentStore_EndScan(&storeScan); pfree(values); if (ItemPointerIsValid(&previousTid)) { Assert(freeOrderNum > 0); ItemPointerCopy(&previousTid, &storeSharedData->freeTid); storeSharedData->maxFreeOrderNum = freeOrderNum; elog(LOG, "rebuilt free list in %s: maxFreeOrderNum = " INT64_FORMAT " freeTid = %s", storeData->tableName, freeOrderNum, ItemPointerToString(&persistentTid)); } return freeOrderNum; }
/* * Check if the free TID is valid. If not, the free list is corrupted and we * pretend there are no free tuples to reset the free list. The corrupted free * list will be detached and cleaned during recovery or pt rebuild. */ static bool PersistentStore_ValidateFreeTID( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer previousFreeTid) { Datum *values; HeapTuple tupleCopy; int64 persistentSerialNum; bool tidIsValid = true; if (storeSharedData->maxFreeOrderNum <= 0) return true; /* No tuple to check */ values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_ReadTuple( storeData, storeSharedData, &storeSharedData->freeTid, values, &tupleCopy); PersistentStore_ExtractOurTupleData( storeData, values, &persistentSerialNum, previousFreeTid); if (storeSharedData->maxFreeOrderNum == 1 && ItemPointerCompare(previousFreeTid, &storeSharedData->freeTid) != 0) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected to have previous FreeTID at %s equal to itself (found %s, %s)", ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(previousFreeTid), storeData->tableName))); } if (PersistentStore_IsZeroTid(previousFreeTid)) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected to have previous FreeTID at %s to be free (found %s, %s)", ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(previousFreeTid), storeData->tableName))); } if (persistentSerialNum != storeSharedData->maxFreeOrderNum) { tidIsValid = false; ereport(WARNING, (errmsg("integrity check for PT freeTid failed"), errdetail("expected persistent store tuple at %s to have order number " INT64_FORMAT " (found " INT64_FORMAT ", '%s')", ItemPointerToString(&storeSharedData->freeTid), storeSharedData->maxFreeOrderNum, persistentSerialNum, storeData->tableName))); } pfree(values); heap_freetuple(tupleCopy); /* If the free TID is not valid, switch to a new free list here */ if (!tidIsValid) { ItemPointerSet(previousFreeTid, 0, 0); storeSharedData->maxFreeOrderNum = 0; MemSet(&storeSharedData->freeTid, 0, sizeof(ItemPointerData)); ereport(WARNING, (errmsg("switched to new free TID list"))); } return tidIsValid; }