/* Process one per-dbspace directory for ResetUnloggedRelations */ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { DIR *dbspace_dir; struct dirent *de; char rm_path[MAXPGPATH]; /* Caller must specify at least one operation. */ Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0); /* * Cleanup is a two-pass operation. First, we go through and identify all * the files with init forks. Then, we go through again and nuke * everything with the same OID except the init fork. */ if ((op & UNLOGGED_RELATION_CLEANUP) != 0) { HTAB *hash = NULL; HASHCTL ctl; /* Open the directory. */ dbspace_dir = AllocateDir(dbspacedirname); if (dbspace_dir == NULL) { elog(LOG, "could not open dbspace directory \"%s\": %m", dbspacedirname); return; } /* * It's possible that someone could create a ton of unlogged relations * in the same database & tablespace, so we'd better use a hash table * rather than an array or linked list to keep track of which files * need to be reset. Otherwise, this cleanup operation would be * O(n^2). */ ctl.keysize = sizeof(unlogged_relation_entry); ctl.entrysize = sizeof(unlogged_relation_entry); hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM); /* Scan the directory. */ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* Also skip it unless this is the init fork. */ if (forkNum != INIT_FORKNUM) continue; /* * Put the OID portion of the name into the hash table, if it * isn't already. */ memset(ent.oid, 0, sizeof(ent.oid)); memcpy(ent.oid, de->d_name, oidchars); hash_search(hash, &ent, HASH_ENTER, NULL); } /* Done with the first pass. */ FreeDir(dbspace_dir); /* * If we didn't find any init forks, there's no point in continuing; * we can bail out now. */ if (hash_get_num_entries(hash) == 0) { hash_destroy(hash); return; } /* * Now, make a second pass and remove anything that matches. First, * reopen the directory. */ dbspace_dir = AllocateDir(dbspacedirname); if (dbspace_dir == NULL) { elog(LOG, "could not open dbspace directory \"%s\": %m", dbspacedirname); hash_destroy(hash); return; } /* Scan the directory. */ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; bool found; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* We never remove the init fork. */ if (forkNum == INIT_FORKNUM) continue; /* * See whether the OID portion of the name shows up in the hash * table. */ memset(ent.oid, 0, sizeof(ent.oid)); memcpy(ent.oid, de->d_name, oidchars); hash_search(hash, &ent, HASH_FIND, &found); /* If so, nuke it! */ if (found) { snprintf(rm_path, sizeof(rm_path), "%s/%s", dbspacedirname, de->d_name); /* * It's tempting to actually throw an error here, but since * this code gets run during database startup, that could * result in the database failing to start. (XXX Should we do * it anyway?) */ if (unlink(rm_path)) elog(LOG, "could not unlink file \"%s\": %m", rm_path); else elog(DEBUG2, "unlinked file \"%s\"", rm_path); } } /* Cleanup is complete. */ FreeDir(dbspace_dir); hash_destroy(hash); } /* * Initialization happens after cleanup is complete: we copy each init * fork file to the corresponding main fork file. Note that if we are * asked to do both cleanup and init, we may never get here: if the * cleanup code determines that there are no init forks in this dbspace, * it will return before we get to this point. */ if ((op & UNLOGGED_RELATION_INIT) != 0) { /* Open the directory. */ dbspace_dir = AllocateDir(dbspacedirname); if (dbspace_dir == NULL) { /* we just saw this directory, so it really ought to be there */ elog(LOG, "could not open dbspace directory \"%s\": %m", dbspacedirname); return; } /* Scan the directory. */ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; char oidbuf[OIDCHARS + 1]; char srcpath[MAXPGPATH]; char dstpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* Also skip it unless this is the init fork. */ if (forkNum != INIT_FORKNUM) continue; /* Construct source pathname. */ snprintf(srcpath, sizeof(srcpath), "%s/%s", dbspacedirname, de->d_name); /* Construct destination pathname. */ memcpy(oidbuf, de->d_name, oidchars); oidbuf[oidchars] = '\0'; snprintf(dstpath, sizeof(dstpath), "%s/%s%s", dbspacedirname, oidbuf, de->d_name + oidchars + 1 + strlen(forkNames[INIT_FORKNUM])); /* OK, we're ready to perform the actual copy. */ elog(DEBUG2, "copying %s to %s", srcpath, dstpath); copy_file(srcpath, dstpath); } /* Done with the first pass. */ FreeDir(dbspace_dir); } }
/* * Process one per-dbspace directory for ResetUnloggedRelations */ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { DIR *dbspace_dir; struct dirent *de; char rm_path[MAXPGPATH * 2]; /* Caller must specify at least one operation. */ Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0); /* * Cleanup is a two-pass operation. First, we go through and identify all * the files with init forks. Then, we go through again and nuke * everything with the same OID except the init fork. */ if ((op & UNLOGGED_RELATION_CLEANUP) != 0) { HTAB *hash; HASHCTL ctl; /* * It's possible that someone could create a ton of unlogged relations * in the same database & tablespace, so we'd better use a hash table * rather than an array or linked list to keep track of which files * need to be reset. Otherwise, this cleanup operation would be * O(n^2). */ memset(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(unlogged_relation_entry); ctl.entrysize = sizeof(unlogged_relation_entry); hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM); /* Scan the directory. */ dbspace_dir = AllocateDir(dbspacedirname); while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* Also skip it unless this is the init fork. */ if (forkNum != INIT_FORKNUM) continue; /* * Put the OID portion of the name into the hash table, if it * isn't already. */ memset(ent.oid, 0, sizeof(ent.oid)); memcpy(ent.oid, de->d_name, oidchars); hash_search(hash, &ent, HASH_ENTER, NULL); } /* Done with the first pass. */ FreeDir(dbspace_dir); /* * If we didn't find any init forks, there's no point in continuing; * we can bail out now. */ if (hash_get_num_entries(hash) == 0) { hash_destroy(hash); return; } /* * Now, make a second pass and remove anything that matches. */ dbspace_dir = AllocateDir(dbspacedirname); while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; bool found; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* We never remove the init fork. */ if (forkNum == INIT_FORKNUM) continue; /* * See whether the OID portion of the name shows up in the hash * table. */ memset(ent.oid, 0, sizeof(ent.oid)); memcpy(ent.oid, de->d_name, oidchars); hash_search(hash, &ent, HASH_FIND, &found); /* If so, nuke it! */ if (found) { snprintf(rm_path, sizeof(rm_path), "%s/%s", dbspacedirname, de->d_name); if (unlink(rm_path) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", rm_path))); else elog(DEBUG2, "unlinked file \"%s\"", rm_path); } } /* Cleanup is complete. */ FreeDir(dbspace_dir); hash_destroy(hash); } /* * Initialization happens after cleanup is complete: we copy each init * fork file to the corresponding main fork file. Note that if we are * asked to do both cleanup and init, we may never get here: if the * cleanup code determines that there are no init forks in this dbspace, * it will return before we get to this point. */ if ((op & UNLOGGED_RELATION_INIT) != 0) { /* Scan the directory. */ dbspace_dir = AllocateDir(dbspacedirname); while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; char oidbuf[OIDCHARS + 1]; char srcpath[MAXPGPATH * 2]; char dstpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* Also skip it unless this is the init fork. */ if (forkNum != INIT_FORKNUM) continue; /* Construct source pathname. */ snprintf(srcpath, sizeof(srcpath), "%s/%s", dbspacedirname, de->d_name); /* Construct destination pathname. */ memcpy(oidbuf, de->d_name, oidchars); oidbuf[oidchars] = '\0'; snprintf(dstpath, sizeof(dstpath), "%s/%s%s", dbspacedirname, oidbuf, de->d_name + oidchars + 1 + strlen(forkNames[INIT_FORKNUM])); /* OK, we're ready to perform the actual copy. */ elog(DEBUG2, "copying %s to %s", srcpath, dstpath); copy_file(srcpath, dstpath); } FreeDir(dbspace_dir); /* * copy_file() above has already called pg_flush_data() on the files * it created. Now we need to fsync those files, because a checkpoint * won't do it for us while we're in recovery. We do this in a * separate pass to allow the kernel to perform all the flushes * (especially the metadata ones) at once. */ dbspace_dir = AllocateDir(dbspacedirname); while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; int oidchars; char oidbuf[OIDCHARS + 1]; char mainpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &forkNum)) continue; /* Also skip it unless this is the init fork. */ if (forkNum != INIT_FORKNUM) continue; /* Construct main fork pathname. */ memcpy(oidbuf, de->d_name, oidchars); oidbuf[oidchars] = '\0'; snprintf(mainpath, sizeof(mainpath), "%s/%s%s", dbspacedirname, oidbuf, de->d_name + oidchars + 1 + strlen(forkNames[INIT_FORKNUM])); fsync_fname(mainpath, false); } FreeDir(dbspace_dir); /* * Lastly, fsync the database directory itself, ensuring the * filesystem remembers the file creations and deletions we've done. * We don't bother with this during a call that does only * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step * too at the next startup attempt. */ fsync_fname(dbspacedirname, true); } }