/* * Validity test for an old-format hstore. * 0 = not valid * 1 = valid but with "slop" in the length * 2 = exactly valid */ static int hstoreValidOldFormat(HStore *hs) { int count = hs->size_; HOldEntry *entries = (HOldEntry *) ARRPTR(hs); int vsize; int lastpos = 0; int i; if (hs->size_ & HS_FLAG_NEWVERSION) return 0; /* New format uses an HEntry for key and another for value */ StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry), "old hstore format is not upward-compatible"); if (count == 0) return 2; if (count > 0xFFFFFFF) return 0; if (CALCDATASIZE(count, 0) > VARSIZE(hs)) return 0; if (entries[0].pos != 0) return 0; /* key length must be nondecreasing */ for (i = 1; i < count; ++i) { if (entries[i].keylen < entries[i - 1].keylen) return 0; } /* * entry position must be strictly increasing, except for the first entry * (which can be ""=>"" and thus zero-length); and all entries must be * properly contiguous */ for (i = 0; i < count; ++i) { if (entries[i].pos != lastpos) return 0; lastpos += (entries[i].keylen + ((entries[i].valisnull) ? 0 : entries[i].vallen)); } vsize = CALCDATASIZE(count, lastpos); if (vsize > VARSIZE(hs)) return 0; if (vsize != VARSIZE(hs)) return 1; return 2; }
/* * InitCatalogCache - initialize the caches * * Note that no database access is done here; we only allocate memory * and initialize the cache structure. Interrogation of the database * to complete initialization of a cache happens upon first use * of that cache. */ void InitCatalogCache(void) { int cacheId; int i, j; StaticAssertStmt(SysCacheSize == (int) lengthof(cacheinfo), "SysCacheSize does not match syscache.c's array"); Assert(!CacheInitialized); SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0; for (cacheId = 0; cacheId < SysCacheSize; cacheId++) { SysCache[cacheId] = InitCatCache(cacheId, cacheinfo[cacheId].reloid, cacheinfo[cacheId].indoid, cacheinfo[cacheId].nkeys, cacheinfo[cacheId].key, cacheinfo[cacheId].nbuckets); if (!PointerIsValid(SysCache[cacheId])) elog(ERROR, "could not initialize cache %u (%d)", cacheinfo[cacheId].reloid, cacheId); /* Accumulate data for OID lists, too */ SysCacheRelationOid[SysCacheRelationOidSize++] = cacheinfo[cacheId].reloid; SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = cacheinfo[cacheId].reloid; SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = cacheinfo[cacheId].indoid; /* see comments for RelationInvalidatesSnapshotsOnly */ Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid)); } Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid)); Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid)); /* Sort and de-dup OID arrays, so we can use binary search. */ pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid), oid_compare); for (i = 1, j = 0; i < SysCacheRelationOidSize; i++) { if (SysCacheRelationOid[i] != SysCacheRelationOid[j]) SysCacheRelationOid[++j] = SysCacheRelationOid[i]; } SysCacheRelationOidSize = j + 1; pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize, sizeof(Oid), oid_compare); for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++) { if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j]) SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i]; } SysCacheSupportingRelOidSize = j + 1; CacheInitialized = true; }
/* * txid_current_snapshot() returns txid_snapshot * * Return current snapshot in TXID format * * Note that only top-transaction XIDs are included in the snapshot. */ Datum txid_current_snapshot(PG_FUNCTION_ARGS) { TxidSnapshot *snap; uint32 nxip, i; TxidEpoch state; Snapshot cur; cur = GetActiveSnapshot(); if (cur == NULL) elog(ERROR, "no active snapshot set"); load_xid_epoch(&state); /* * Compile-time limits on the procarray (MAX_BACKENDS processes plus * MAX_BACKENDS prepared transactions) guarantee nxip won't be too large. */ StaticAssertStmt(MAX_BACKENDS * 2 <= TXID_SNAPSHOT_MAX_NXIP, "possible overflow in txid_current_snapshot()"); /* allocate */ nxip = cur->xcnt; snap = palloc(TXID_SNAPSHOT_SIZE(nxip)); /* fill */ snap->xmin = convert_xid(cur->xmin, &state); snap->xmax = convert_xid(cur->xmax, &state); snap->nxip = nxip; for (i = 0; i < nxip; i++) snap->xip[i] = convert_xid(cur->xip[i], &state); /* * We want them guaranteed to be in ascending order. This also removes * any duplicate xids. Normally, an XID can only be assigned to one * backend, but when preparing a transaction for two-phase commit, there * is a transient state when both the original backend and the dummy * PGPROC entry reserved for the prepared transaction hold the same XID. */ sort_snapshot(snap); /* set size after sorting, because it may have removed duplicate xips */ SET_VARSIZE(snap, TXID_SNAPSHOT_SIZE(snap->nxip)); PG_RETURN_POINTER(snap); }
void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_) { StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t), "size mismatch of atomic_flag vs slock_t"); /* * If we're using semaphore based atomic flags, be careful about nested * usage of atomics while a spinlock is held. */ #ifndef HAVE_SPINLOCKS s_init_lock_sema((slock_t *) &ptr->sema, true); #else SpinLockInit((slock_t *) &ptr->sema); #endif ptr->value = val_; }
void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) { StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t), "size mismatch of atomic_flag vs slock_t"); #ifndef HAVE_SPINLOCKS /* * NB: If we're using semaphore based TAS emulation, be careful to use a * separate set of semaphores. Otherwise we'd get in trouble if a atomic * var would be manipulated while spinlock is held. */ s_init_lock_sema((slock_t *) &ptr->sema, true); #else SpinLockInit((slock_t *) &ptr->sema); #endif }
/* * ItemPointerEquals * Returns true if both item pointers point to the same item, * otherwise returns false. * * Note: * Asserts that the disk item pointers are both valid! */ bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2) { /* * We really want ItemPointerData to be exactly 6 bytes. This is rather a * random place to check, but there is no better place. */ StaticAssertStmt(sizeof(ItemPointerData) == 3 * sizeof(uint16), "ItemPointerData struct is improperly padded"); if (ItemPointerGetBlockNumber(pointer1) == ItemPointerGetBlockNumber(pointer2) && ItemPointerGetOffsetNumber(pointer1) == ItemPointerGetOffsetNumber(pointer2)) return true; else return false; }
/* * Determinisitcally generate salt for mock authentication, using a SHA256 * hash based on the username and a cluster-level secret key. Returns a * pointer to a static buffer of size SCRAM_DEFAULT_SALT_LEN. */ static char * scram_mock_salt(const char *username) { pg_sha256_ctx ctx; static uint8 sha_digest[PG_SHA256_DIGEST_LENGTH]; char *mock_auth_nonce = GetMockAuthenticationNonce(); /* * Generate salt using a SHA256 hash of the username and the cluster's * mock authentication nonce. (This works as long as the salt length is * not larger the SHA256 digest length. If the salt is smaller, the caller * will just ignore the extra data.) */ StaticAssertStmt(PG_SHA256_DIGEST_LENGTH >= SCRAM_DEFAULT_SALT_LEN, "salt length greater than SHA256 digest length"); pg_sha256_init(&ctx); pg_sha256_update(&ctx, (uint8 *) username, strlen(username)); pg_sha256_update(&ctx, (uint8 *) mock_auth_nonce, MOCK_AUTH_NONCE_LEN); pg_sha256_final(&ctx, sha_digest); return (char *) sha_digest; }
/* * PageIsVerified * Check that the page header and checksum (if any) appear valid. * * This is called when a page has just been read in from disk. The idea is * to cheaply detect trashed pages before we go nuts following bogus item * pointers, testing invalid transaction identifiers, etc. * * It turns out to be necessary to allow zeroed pages here too. Even though * this routine is *not* called when deliberately adding a page to a relation, * there are scenarios in which a zeroed page might be found in a table. * (Example: a backend extends a relation, then crashes before it can write * any WAL entry about the new page. The kernel will already have the * zeroed page in the file, and it will stay that way after restart.) So we * allow zeroed pages here, and are careful that the page access macros * treat such a page as empty and without free space. Eventually, VACUUM * will clean up such a page and make it usable. */ bool PageIsVerified(Page page, BlockNumber blkno) { PageHeader p = (PageHeader) page; size_t *pagebytes; int i; bool checksum_failure = false; bool header_sane = false; bool all_zeroes = false; uint16 checksum = 0; /* * Don't verify page data unless the page passes basic non-zero test */ if (!PageIsNew(page)) { if (DataChecksumsEnabled()) { checksum = pg_checksum_page((char *) page, blkno); if (checksum != p->pd_checksum) checksum_failure = true; } /* * The following checks don't prove the header is correct, only that * it looks sane enough to allow into the buffer pool. Later usage of * the block can still reveal problems, which is why we offer the * checksum option. */ if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper && p->pd_upper <= p->pd_special && p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special)) header_sane = true; if (header_sane && !checksum_failure) return true; } /* * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a * multiple of size_t - and it's much faster to compare memory using the * native word size. */ StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t), "BLCKSZ has to be a multiple of sizeof(size_t)"); all_zeroes = true; pagebytes = (size_t *) page; for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) { if (pagebytes[i] != 0) { all_zeroes = false; break; } } if (all_zeroes) return true; /* * Throw a WARNING if the checksum fails, but only after we've checked for * the all-zeroes case. */ if (checksum_failure) { ereport(WARNING, (ERRCODE_DATA_CORRUPTED, errmsg("page verification failed, calculated checksum %u but expected %u", checksum, p->pd_checksum))); if (header_sane && ignore_checksum_failure) return true; } return false; }
/* * There are a few ways to arrive in the initsequencer. * 1. From _PG_init (called exactly once when the library is loaded for ANY * reason). * 1a. Because of the command LOAD 'libraryname'; * This case can be distinguished because _PG_init will have found the * LOAD command and saved the 'libraryname' in pljavaLoadPath. * 1b. Because of a CREATE FUNCTION naming this library. pljavaLoadPath will * be NULL. * 1c. By the first actual use of a PL/Java function, causing this library * to be loaded. pljavaLoadPath will be NULL. The called function's Oid * will be available to the call handler once we return from _PG_init, * but it isn't (easily) available here. * 2. From the call handler, if initialization isn't complete yet. That can only * mean something failed in the earlier call to _PG_init, and whatever it was * is highly likely to fail again. That may lead to the untidyness of * duplicated diagnostic messages, but for now I like the belt-and-suspenders * approach of making sure the init sequence gets as many chances as possible * to succeed. * 3. From a GUC assign hook, if the user has updated a setting that might allow * initialization to succeed. It resumes from where it left off. * * In all cases, the sequence must progress as far as starting the VM and * initializing the PL/Java classes. In all cases except 1a, that's enough, * assuming the language handlers and schema have all been set up already (or, * in case 1b, the user is intent on setting them up explicitly). * * In case 1a, we can go ahead and test for, and create, the schema, functions, * and language entries as needed, using pljavaLoadPath as the library path * if creating the language handler functions. One-stop shopping. (The presence * of pljavaLoadPath in any of the other cases, such as resumption by an assign * hook, indicates it is really a continuation of case 1a.) */ static void initsequencer(enum initstage is, bool tolerant) { JVMOptList optList; Invocation ctx; jint JNIresult; char *greeting; switch (is) { case IS_FORMLESS_VOID: initstage = IS_GUCS_REGISTERED; case IS_GUCS_REGISTERED: libjvmlocation = strdup("libjvm.so"); initstage = IS_PLJAVA_ENABLED; case IS_PLJAVA_ENABLED: libjvm_handle = pg_dlopen(libjvmlocation); if ( NULL == libjvm_handle ) { ereport(ERROR, ( errmsg("Cannot load libjvm.so library, check that it is available in LD_LIBRARY_PATH"), errdetail("%s", (char *)pg_dlerror()))); goto check_tolerant; } initstage = IS_CAND_JVMOPENED; case IS_CAND_JVMOPENED: pljava_createvm = (jint (JNICALL *)(JavaVM **, void **, void *)) pg_dlsym(libjvm_handle, "JNI_CreateJavaVM"); if ( NULL == pljava_createvm ) { /* * If it hasn't got the symbol, it can't be the right * library, so close/unload it so another can be tried. * Format the dlerror string first: dlclose may clobber it. */ char *dle = MemoryContextStrdup(ErrorContext, pg_dlerror()); pg_dlclose(libjvm_handle); initstage = IS_CAND_JVMLOCATION; ereport(ERROR, ( errmsg("Cannot start Java VM"), errdetail("%s", dle), errhint("Check that libjvm.so is available in LD_LIBRARY_PATH"))); goto check_tolerant; } initstage = IS_CREATEVM_SYM_FOUND; case IS_CREATEVM_SYM_FOUND: s_javaLogLevel = INFO; checkIntTimeType(); HashMap_initialize(); /* creates things in TopMemoryContext */ #ifdef PLJAVA_DEBUG /* Hard setting for debug. Don't forget to recompile... */ pljava_debug = 1; #endif initstage = IS_MISC_ONCE_DONE; case IS_MISC_ONCE_DONE: JVMOptList_init(&optList); /* uses CurrentMemoryContext */ seenVisualVMName = false; addUserJVMOptions(&optList); if ( ! seenVisualVMName ) JVMOptList_addVisualVMName(&optList); JVMOptList_add(&optList, "vfprintf", (void*)my_vfprintf, true); #ifndef GCJ JVMOptList_add(&optList, "-Xrs", 0, true); #endif effectiveClassPath = getClassPath("-Djava.class.path="); if(effectiveClassPath != 0) { JVMOptList_add(&optList, effectiveClassPath, 0, true); } initstage = IS_JAVAVM_OPTLIST; case IS_JAVAVM_OPTLIST: JNIresult = initializeJavaVM(&optList); /* frees the optList */ if( JNI_OK != JNIresult ) { initstage = IS_MISC_ONCE_DONE; /* optList has been freed */ StaticAssertStmt(sizeof(jint) <= sizeof(long int), "jint wider than long int?!"); ereport(WARNING, (errmsg("failed to create Java virtual machine"), errdetail("JNI_CreateJavaVM returned an error code: %ld", (long int)JNIresult), jvmStartedAtLeastOnce ? errhint("Because an earlier attempt during this session " "did start a VM before failing, this probably means your " "Java runtime environment does not support more than one " "VM creation per session. You may need to exit this " "session and start a new one.") : 0)); goto check_tolerant; } jvmStartedAtLeastOnce = true; elog(DEBUG2, "successfully created Java virtual machine"); initstage = IS_JAVAVM_STARTED; case IS_JAVAVM_STARTED: #ifdef USE_PLJAVA_SIGHANDLERS pqsignal(SIGINT, pljavaStatementCancelHandler); pqsignal(SIGTERM, pljavaDieHandler); #endif /* Register an on_proc_exit handler that destroys the VM */ on_proc_exit(_destroyJavaVM, 0); initstage = IS_SIGHANDLERS; case IS_SIGHANDLERS: Invocation_pushBootContext(&ctx); PG_TRY(); { initPLJavaClasses(); initJavaSession(); Invocation_popBootContext(); initstage = IS_PLJAVA_FOUND; } PG_CATCH(); { MemoryContextSwitchTo(ctx.upperContext); /* leave ErrorContext */ Invocation_popBootContext(); initstage = IS_MISC_ONCE_DONE; /* We can't stay here... */ if ( tolerant ) reLogWithChangedLevel(WARNING); /* so xact is not aborted */ else { EmitErrorReport(); /* no more unwinding, just log it */ /* Seeing an ERROR emitted to the log, without leaving the * transaction aborted, would violate the principle of least * astonishment. But at check_tolerant below, another ERROR will * be thrown immediately, so the transaction effect will be as * expected and this ERROR will contribute information beyond * what is in the generic one thrown down there. */ FlushErrorState(); } } PG_END_TRY(); if ( IS_PLJAVA_FOUND != initstage ) { /* JVM initialization failed for some reason. Destroy * the VM if it exists. Perhaps the user will try * fixing the pljava.classpath and make a new attempt. */ ereport(WARNING, ( errmsg("failed to load initial PL/Java classes"), errhint("The most common reason is that \"pljava_classpath\" " "needs to be set, naming the proper \"pljava.jar\" file.") )); _destroyJavaVM(0, 0); goto check_tolerant; } case IS_PLJAVA_FOUND: greeting = InstallHelper_hello(); ereport(NULL != pljavaLoadPath ? NOTICE : DEBUG1, ( errmsg("PL/Java loaded"), errdetail("versions:\n%s", greeting))); pfree(greeting); if ( NULL != pljavaLoadPath ) InstallHelper_groundwork(); /* sqlj schema, language handlers, ...*/ initstage = IS_COMPLETE; case IS_COMPLETE: pljavaLoadingAsExtension = false; if ( alteredSettingsWereNeeded ) { /* Use this StringInfoData to conditionally construct part of the * hint string suggesting ALTER DATABASE ... SET ... FROM CURRENT * provided the server is >= 9.2 where that will actually work. * In 9.3, psprintf appeared, which would make this all simpler, * but if 9.3+ were all that had to be supported, this would all * be moot anyway. Doing the initStringInfo inside the ereport * ensures the string is allocated in ErrorContext and won't leak. * Don't remove the extra parens grouping * (initStringInfo, appendStringInfo, errhint) ... with the parens, * that's a comma expression, which is sequenced; without them, they * are just function parameters with evaluation order unknown. */ StringInfoData buf; #if PG_VERSION_NUM >= 90200 #define MOREHINT \ appendStringInfo(&buf, \ "using ALTER DATABASE %s SET ... FROM CURRENT or ", \ pljavaDbName()), #else #define MOREHINT #endif ereport(NOTICE, ( errmsg("PL/Java successfully started after adjusting settings"), (initStringInfo(&buf), MOREHINT errhint("The settings that worked should be saved (%s" "in the \"%s\" file). For a reminder of what has been set, " "try: SELECT name, setting FROM pg_settings WHERE name LIKE" " 'pljava.%%' AND source = 'session'", buf.data, superuser() ? PG_GETCONFIGOPTION("config_file") : "postgresql.conf")))); #undef MOREHINT if ( loadAsExtensionFailed ) { ereport(NOTICE, (errmsg( "PL/Java load successful after failed CREATE EXTENSION"), errdetail( "PL/Java is now installed, but not as an extension."), errhint( "To correct that, either COMMIT or ROLLBACK, make sure " "the working settings are saved, exit this session, and " "in a new session, either: " "1. if committed, run " "\"CREATE EXTENSION pljava FROM unpackaged\", or 2. " "if rolled back, simply \"CREATE EXTENSION pljava\" again." ))); } } return; default: ereport(ERROR, ( errmsg("cannot set up PL/Java"), errdetail( "An unexpected stage was reached in the startup sequence."), errhint( "Please report the circumstances to the PL/Java maintainers.") )); } check_tolerant: if ( pljavaLoadingAsExtension ) { tolerant = false; loadAsExtensionFailed = true; pljavaLoadingAsExtension = false; } if ( !tolerant ) { ereport(ERROR, ( errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg( "cannot use PL/Java before successfully completing its setup"), errhint( "Check the log for messages closely preceding this one, " "detailing what step of setup failed and what will be needed, " "probably setting one of the \"pljava.\" configuration " "variables, to complete the setup. If there is not enough " "help in the log, try again with different settings for " "\"log_min_messages\" or \"log_error_verbosity\"."))); } }
/* * Create a WaitEventSet with space for nevents different events to wait for. * * These events can then efficiently waited upon together, using * WaitEventSetWait(). */ WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents) { WaitEventSet *set; char *data; Size sz = 0; sz += sizeof(WaitEventSet); sz += sizeof(WaitEvent) * nevents; #if defined(WAIT_USE_EPOLL) sz += sizeof(struct epoll_event) * nevents; #elif defined(WAIT_USE_POLL) sz += sizeof(struct pollfd) * nevents; #elif defined(WAIT_USE_WIN32) /* need space for the pgwin32_signal_event */ sz += sizeof(HANDLE) * (nevents + 1); #endif data = (char *) MemoryContextAllocZero(context, sz); set = (WaitEventSet *) data; data += sizeof(WaitEventSet); set->events = (WaitEvent *) data; data += sizeof(WaitEvent) * nevents; #if defined(WAIT_USE_EPOLL) set->epoll_ret_events = (struct epoll_event *) data; data += sizeof(struct epoll_event) * nevents; #elif defined(WAIT_USE_POLL) set->pollfds = (struct pollfd *) data; data += sizeof(struct pollfd) * nevents; #elif defined(WAIT_USE_WIN32) set->handles = (HANDLE) data; data += sizeof(HANDLE) * nevents; #endif set->latch = NULL; set->nevents_space = nevents; #if defined(WAIT_USE_EPOLL) set->epoll_fd = epoll_create(nevents); if (set->epoll_fd < 0) elog(ERROR, "epoll_create failed: %m"); #elif defined(WAIT_USE_WIN32) /* * To handle signals while waiting, we need to add a win32 specific event. * We accounted for the additional event at the top of this routine. See * port/win32/signal.c for more details. * * Note: pgwin32_signal_event should be first to ensure that it will be * reported when multiple events are set. We want to guarantee that * pending signals are serviced. */ set->handles[0] = pgwin32_signal_event; StaticAssertStmt(WSA_INVALID_EVENT == NULL, ""); #endif return set; }
static void ReadBlocks(int filenum) { FILE *file; char record_type; char *dbname; Oid record_filenode; ForkNumber record_forknum; BlockNumber record_blocknum; BlockNumber record_range; int log_level = DEBUG3; Oid relOid = InvalidOid; Relation rel = NULL; bool skip_relation = false; bool skip_fork = false; bool skip_block = false; BlockNumber nblocks = 0; BlockNumber blocks_restored = 0; const char *filepath; /* * If this condition changes, then this code, and the code in the writer * will need to be changed; especially the format specifiers in log and * error messages. */ StaticAssertStmt(MaxBlockNumber == 0xFFFFFFFE, "Code may need review."); filepath = getSavefileName(filenum); file = fileOpen(filepath, PG_BINARY_R); dbname = readDBName(file, filepath); /* * When restoring global objects, the dbname is zero-length string, and non- * zero length otherwise. And filenum is never expected to be smaller than 1. */ Assert(filenum >= 1); Assert(filenum == 1 ? strlen(dbname) == 0 : strlen(dbname) > 0); /* To restore the global objects, use default database */ BackgroundWorkerInitializeConnection(filenum == 1 ? guc_default_database : dbname, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "restoring buffers"); /* * Note that in case of a read error, we will leak relcache entry that we may * currently have open. In case of EOF, we close the relation after the loop. */ while (fileRead(&record_type, 1, file, true, filepath)) { /* * If we want to process the signals, this seems to be the best place * to do it. Generally the backends refrain from processing config file * while in transaction, but that's more for the fear of allowing GUC * changes to affect expression evaluation, causing different results * for the same expression in a transaction. Since this worker is not * processing any queries, it is okay to process the config file here. * * Even though it's okay to process SIGHUP here, doing so doesn't add * any value. The only reason we might want to process config file here * would be to allow the user to interrupt the BlockReader's operation * by changing this extenstion's GUC parameter. But the user can do that * anyway, using SIGTERM or pg_terminate_backend(). */ /* Stop processing the save-file if the Postmaster wants us to die. */ if (got_sigterm) break; ereport(log_level, (errmsg("record type %x - %c", record_type, record_type))); switch (record_type) { case 'r': { /* Close the previous relation, if any. */ if (rel) { relation_close(rel, AccessShareLock); rel = NULL; } record_forknum = InvalidForkNumber; record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_filenode, sizeof(Oid), file, false, filepath); relOid = GetRelOid(record_filenode); ereport(log_level, (errmsg("processing filenode %u, relation %u", record_filenode, relOid))); /* * If the relation has been rewritten/dropped since we saved it, * just skip it and process the next relation. */ if (relOid == InvalidOid) skip_relation = true; else { skip_relation = false; /* Open the relation */ rel = relation_open(relOid, AccessShareLock); RelationOpenSmgr(rel); } } break; case 'f': { record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_forknum, sizeof(ForkNumber), file, false, filepath); if (skip_relation) continue; if (rel == NULL) ereport(ERROR, (errmsg("found a fork record without a preceeding relation record"))); ereport(log_level, (errmsg("processing fork %d", record_forknum))); if (!smgrexists(rel->rd_smgr, record_forknum)) skip_fork = true; else { skip_fork = false; nblocks = RelationGetNumberOfBlocksInFork(rel, record_forknum); } } break; case 'b': { if (record_forknum == InvalidForkNumber) ereport(ERROR, (errmsg("found a block record without a preceeding fork record"))); fileRead(&record_blocknum, sizeof(BlockNumber), file, false, filepath); if (skip_relation || skip_fork) continue; /* * Don't try to read past the file; the file may have been shrunk * by a vaccum/truncate operation. */ if (record_blocknum >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); skip_block = true; continue; } else { Buffer buf; skip_block = false; ereport(log_level, (errmsg("reader %d reading block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); buf = ReadBufferExtended(rel, record_forknum, record_blocknum, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; case 'N': { BlockNumber block; Assert(record_blocknum != InvalidBlockNumber); if (record_blocknum == InvalidBlockNumber) ereport(ERROR, (errmsg("found a block range record without a preceeding block record"))); fileRead(&record_range, sizeof(int), file, false, filepath); if (skip_relation || skip_fork || skip_block) continue; ereport(log_level, (errmsg("reader %d reading range filenode %u forknum %d blocknum %u range %u", filenum, record_filenode, record_forknum, record_blocknum, record_range))); for (block = record_blocknum + 1; block <= (record_blocknum + record_range); ++block) { Buffer buf; /* * Don't try to read past the file; the file may have been * shrunk by a vaccum operation. */ if (block >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block range filenode %u forknum %d start %u end %u", filenum, record_filenode, record_forknum, block, record_blocknum + record_range))); break; } buf = ReadBufferExtended(rel, record_forknum, block, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; default: { ereport(ERROR, (errmsg("found unexpected save-file marker %x - %c)", record_type, record_type))); Assert(false); } break; } } if (rel) relation_close(rel, AccessShareLock); ereport(LOG, (errmsg("Block Reader %d: restored %u blocks", filenum, blocks_restored))); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); fileClose(file, filepath); /* Remove the save-file */ if (remove(filepath) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("error removing file \"%s\" : %m", filepath))); }