/* * write_database_file: update the flat database file * * A side effect is to determine the oldest database's datfrozenxid * so we can set or update the XID wrap limit. * * Also, if "startup" is true, we tell relcache.c to clear out the relcache * init file in each database. That's a bit nonmodular, but scanning * pg_database twice during system startup seems too high a price for keeping * things better separated. */ static void write_database_file(Relation drel, bool startup) { StringInfoData buffer; HeapScanDesc scan; HeapTuple tuple; NameData oldest_datname; TransactionId oldest_datfrozenxid = InvalidTransactionId; MirroredFlatFileOpen mirroredOpen; initStringInfo(&buffer); MirroredFlatFile_Open( &mirroredOpen, "global", "pg_database", O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY, S_IRUSR | S_IWUSR, /* suppressError */ false, /* atomic operation */ true, /*isMirrorRecovery */ false); /* * Read pg_database and write the file. */ scan = heap_beginscan(drel, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple); char *datname; Oid datoid; Oid dattablespace; TransactionId datfrozenxid; datname = NameStr(dbform->datname); datoid = HeapTupleGetOid(tuple); dattablespace = dbform->dattablespace; datfrozenxid = dbform->datfrozenxid; /* * Identify the oldest datfrozenxid. This must match * the logic in vac_truncate_clog() in vacuum.c. * * MPP-20053: Skip databases that cannot be connected to in computing * the oldest database. */ if (dbform->datallowconn && TransactionIdIsNormal(datfrozenxid)) { if (oldest_datfrozenxid == InvalidTransactionId || TransactionIdPrecedes(datfrozenxid, oldest_datfrozenxid)) { oldest_datfrozenxid = datfrozenxid; namestrcpy(&oldest_datname, datname); } } /* * Check for illegal characters in the database name. */ if (!name_okay(datname)) { ereport(LOG, (errmsg("invalid database name \"%s\"", datname))); continue; } /* * The file format is: "dbname" oid tablespace frozenxid * * The xids are not needed for backend startup, but are of use to * autovacuum, and might also be helpful for forensic purposes. */ sputs_quote(&buffer, datname); appendStringInfo(&buffer, " %u %u %u\n", datoid, dattablespace, datfrozenxid); /* * MPP-10111 - During database expansion we need to be able to bring a * database up in order to correct the filespace locations in the * catalog. At this point we will not be able to resolve database paths * for databases not stored in "pg_default" or "pg_global". * * This is solved by passing a special guc to the startup during this * phase of expand to bypass logic involving non-system tablespaces. * Since we are bypassing the clearing of the relation cache on these * databases we need to ensure that we don't try to use them at all * elsewhere. This is done with a similar check in * PersistentTablespace_GetPrimaryAndMirrorFilespaces(). */ if (gp_before_filespace_setup && !IsBuiltinTablespace(dattablespace)) continue; } heap_endscan(scan); MirroredFlatFile_Append(&mirroredOpen, buffer.data, buffer.len, /* suppressError */ false); MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false); MirroredFlatFile_Close(&mirroredOpen); if (buffer.maxlen > 0) pfree(buffer.data); /* * Set the transaction ID wrap limit using the oldest datfrozenxid */ if (oldest_datfrozenxid != InvalidTransactionId) SetTransactionIdLimit(oldest_datfrozenxid, &oldest_datname); }
/* * Physical write of a page from a buffer slot * * On failure, we cannot just ereport(ERROR) since caller has put state in * shared memory that must be undone. So, we return FALSE and save enough * info in static variables to let SlruReportIOError make the report. * * For now, assume it's not worth keeping a file pointer open across * independent read/write operations. We do batch operations during * SimpleLruFlush, though. * * fdata is NULL for a standalone write, pointer to open-file info during * SimpleLruFlush. */ static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char simpleFileName[MAXPGPATH]; MirroredFlatFileOpen *existingMirroredOpen = NULL; MirroredFlatFileOpen newMirroredOpen = MirroredFlatFileOpen_Init; MirroredFlatFileOpen *useMirroredOpen = NULL; /* * During a Flush, we may already have the desired file open. */ if (fdata) { int i; for (i = 0; i < fdata->num_files; i++) { if (fdata->segno[i] == segno) { existingMirroredOpen = &fdata->mirroredOpens[i]; break; } } } if (existingMirroredOpen == NULL || !MirroredFlatFile_IsActive(existingMirroredOpen)) { /* * If the file doesn't already exist, we should create it. It is * possible for this to need to happen when writing a page that's not * first in its segment; we assume the OS can cope with that. (Note: * it might seem that it'd be okay to create files only when * SimpleLruZeroPage is called for the first page of a segment. * However, if after a crash and restart the REDO logic elects to * replay the log from a checkpoint before the latest one, then it's * possible that we will get commands to set transaction status of * transactions that have already been truncated from the commit log. * Easiest way to deal with that is to accept references to * nonexistent files here and in SlruPhysicalReadPage.) * * Note: it is possible for more than one backend to be executing this * code simultaneously for different pages of the same file. Hence, * don't use O_EXCL or O_TRUNC or anything like that. */ SlruSimpleFileName(simpleFileName, segno); if (MirroredFlatFile_Open( &newMirroredOpen, ctl->Dir, simpleFileName, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR, /* suppressError */ true, /* atomic operation */ false, /*isMirrorRecovery */ false)) { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; return false; } if (fdata) { if (fdata->num_files < MAX_FLUSH_BUFFERS) { fdata->mirroredOpens[fdata->num_files] = newMirroredOpen; useMirroredOpen = &fdata->mirroredOpens[fdata->num_files]; fdata->segno[fdata->num_files] = segno; fdata->num_files++; } else { /* * In the unlikely event that we exceed MAX_FLUSH_BUFFERS, * fall back to treating it as a standalone write. */ fdata = NULL; useMirroredOpen = &newMirroredOpen; } } else useMirroredOpen = &newMirroredOpen; } else useMirroredOpen = existingMirroredOpen; Assert(useMirroredOpen != NULL); if (MirroredFlatFile_SeekSet( useMirroredOpen, offset) != offset) { slru_errcause = SLRU_SEEK_FAILED; slru_errno = errno; if (!fdata) MirroredFlatFile_Close(useMirroredOpen); return false; } if (MirroredFlatFile_Write( useMirroredOpen, offset, shared->page_buffer[slotno], BLCKSZ, /* suppressError */ true)) { slru_errcause = SLRU_WRITE_FAILED; slru_errno = errno; if (!fdata) MirroredFlatFile_Close(useMirroredOpen); return false; } /* * If not part of Flush, need to fsync now. We assume this happens * infrequently enough that it's not a performance issue. */ if (!fdata) { if (ctl->do_fsync && MirroredFlatFile_Flush( useMirroredOpen, /* suppressError */ true)) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; MirroredFlatFile_Close(useMirroredOpen); return false; } // UNDONE: We don't have a suppressError for close... MirroredFlatFile_Close(useMirroredOpen); } return true; }