/* * Set checksum for a page in private memory. * * This must only be used when we know that no other process can be modifying * the page buffer. */ void PageSetChecksumInplace(Page page, BlockNumber blkno) { /* If we don't need a checksum, just return */ if (PageIsNew(page) || !DataChecksumsEnabled()) return; ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno); }
static void scan_file(const char *fn, BlockNumber segmentno) { PGAlignedBlock buf; PageHeader header = (PageHeader) buf.data; int f; BlockNumber blockno; f = open(fn, O_RDONLY | PG_BINARY, 0); if (f < 0) { fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fn, strerror(errno)); exit(1); } files++; for (blockno = 0;; blockno++) { uint16 csum; int r = read(f, buf.data, BLCKSZ); if (r == 0) break; if (r != BLCKSZ) { fprintf(stderr, _("%s: could not read block %u in file \"%s\": read %d of %d\n"), progname, blockno, fn, r, BLCKSZ); exit(1); } blocks++; /* New pages have no checksum yet */ if (PageIsNew(header)) continue; csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); if (csum != header->pd_checksum) { if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X\n"), progname, fn, blockno, csum, header->pd_checksum); badblocks++; } } if (verbose) fprintf(stderr, _("%s: checksums verified in file \"%s\"\n"), progname, fn); close(f); }
int verify_page(const char *page, BlockNumber blkno, const char *filepath) { PageHeader phdr = (PageHeader) page; uint16 checksum; checksum = pg_checksum_page((char *)page, blkno); if (phdr->pd_checksum != checksum) { printf("%s: blkno %d, expected %x, found %x\n", filepath, blkno, checksum, phdr->pd_checksum); return FALSE; } return TRUE; }
static void flush_overflow_page(void) { if (!PageIsNew(overflow_buf)) { if (user_opts.checksum_mode == CHECKSUM_ADD) ((PageHeader) overflow_buf)->pd_checksum = pg_checksum_page(overflow_buf, overflow_blkno); if (write(curr_dstfd, overflow_buf, BLCKSZ) != BLCKSZ) pg_log(PG_FATAL, "can't write overflow page to destination\n"); pg_log(PG_VERBOSE, "flushed overflow block\n"); overflow_blkno++; } /* Re-initialize the overflow buffer as an empty page. */ page_init(overflow_buf); overflow_blkno = 0; }
/* * Set checksum for a page in shared buffers. * * If checksums are disabled, or if the page is not initialized, just return * the input. Otherwise, we must make a copy of the page before calculating * the checksum, to prevent concurrent modifications (e.g. setting hint bits) * from making the final checksum invalid. It doesn't matter if we include or * exclude hints during the copy, as long as we write a valid page and * associated checksum. * * Returns a pointer to the block-sized data that needs to be written. Uses * statically-allocated memory, so the caller must immediately write the * returned page and not refer to it again. */ char * PageSetChecksumCopy(Page page, BlockNumber blkno) { static char *pageCopy = NULL; /* If we don't need a checksum, just return the passed-in data */ if (PageIsNew(page) || !DataChecksumsEnabled()) return (char *) page; /* * We allocate the copy space once and use it over on each subsequent * call. The point of palloc'ing here, rather than having a static char * array, is first to ensure adequate alignment for the checksumming code * and second to avoid wasting space in processes that never call this. */ if (pageCopy == NULL) pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ); memcpy(pageCopy, (char *) page, BLCKSZ); ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno); return pageCopy; }
/* * PageIsVerified * Check that the page header and checksum (if any) appear valid. * * This is called when a page has just been read in from disk. The idea is * to cheaply detect trashed pages before we go nuts following bogus item * pointers, testing invalid transaction identifiers, etc. * * It turns out to be necessary to allow zeroed pages here too. Even though * this routine is *not* called when deliberately adding a page to a relation, * there are scenarios in which a zeroed page might be found in a table. * (Example: a backend extends a relation, then crashes before it can write * any WAL entry about the new page. The kernel will already have the * zeroed page in the file, and it will stay that way after restart.) So we * allow zeroed pages here, and are careful that the page access macros * treat such a page as empty and without free space. Eventually, VACUUM * will clean up such a page and make it usable. */ bool PageIsVerified(Page page, BlockNumber blkno) { PageHeader p = (PageHeader) page; size_t *pagebytes; int i; bool checksum_failure = false; bool header_sane = false; bool all_zeroes = false; uint16 checksum = 0; /* * Don't verify page data unless the page passes basic non-zero test */ if (!PageIsNew(page)) { if (DataChecksumsEnabled()) { checksum = pg_checksum_page((char *) page, blkno); if (checksum != p->pd_checksum) checksum_failure = true; } /* * The following checks don't prove the header is correct, only that * it looks sane enough to allow into the buffer pool. Later usage of * the block can still reveal problems, which is why we offer the * checksum option. */ if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper && p->pd_upper <= p->pd_special && p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special)) header_sane = true; if (header_sane && !checksum_failure) return true; } /* * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a * multiple of size_t - and it's much faster to compare memory using the * native word size. */ StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t), "BLCKSZ has to be a multiple of sizeof(size_t)"); all_zeroes = true; pagebytes = (size_t *) page; for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) { if (pagebytes[i] != 0) { all_zeroes = false; break; } } if (all_zeroes) return true; /* * Throw a WARNING if the checksum fails, but only after we've checked for * the all-zeroes case. */ if (checksum_failure) { ereport(WARNING, (ERRCODE_DATA_CORRUPTED, errmsg("page verification failed, calculated checksum %u but expected %u", checksum, p->pd_checksum))); if (header_sane && ignore_checksum_failure) return true; } return false; }
const char * convert_gpdb4_heap_file(const char *src, const char *dst, bool has_numerics, AttInfo *atts, int natts) { int src_fd; int dstfd; int blkno; char buf[BLCKSZ]; ssize_t bytesRead; const char *msg = NULL; curr_hasnumerics = has_numerics; curr_atts = atts; curr_natts = natts; page_init(overflow_buf); overflow_blkno = 0; if ((src_fd = open(src, O_RDONLY, 0)) < 0) return "can't open source file"; if ((dstfd = open(dst, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0) { close(src_fd); return "can't create destination file"; } blkno = 0; curr_dstfd = dstfd; while ((bytesRead = read(src_fd, buf, BLCKSZ)) == BLCKSZ) { msg = convert_gpdb4_heap_page(buf); if (msg) break; /* * GPDB 4.x doesn't support checksums so we don't need to worry about * retaining an existing checksum like for upgrades from 5.x. If we're * not adding them we want a zeroed out portion in the header */ if (user_opts.checksum_mode == CHECKSUM_ADD) ((PageHeader) buf)->pd_checksum = pg_checksum_page(buf, blkno); else memset(&(((PageHeader) buf)->pd_checksum), 0, sizeof(uint16)); if (write(dstfd, buf, BLCKSZ) != BLCKSZ) { msg = "can't write new page to destination"; break; } blkno++; } flush_overflow_page(); close(src_fd); close(dstfd); if (msg) return msg; else if (bytesRead != 0) return "found partial page in source file"; else return NULL; }
/** * @brief Write block buffer contents. Number of block buffer to be * written is specified by num argument. * * Flow: * <ol> * <li>If no more space is available in the data file, switch to a new one.</li> * <li>Compute block number which can be written to the current file.</li> * <li>Save the last block number in the load status file.</li> * <li>Write to the current file.</li> * <li>If there are other data, write them too.</li> * </ol> * * @param loader [in] Direct Writer. * @return File descriptor for the current data file. */ static void flush_pages(DirectWriter *loader) { int i; int num; LoadStatus *ls = &loader->ls; num = loader->curblk; if (!PageIsEmpty(GetCurrentPage(loader))) num += 1; if (num <= 0) return; /* no work */ /* * Add WAL entry (only the first page) to ensure the current xid will * be recorded in xlog. We must flush some xlog records with XLogFlush() * before write any data blocks to follow the WAL protocol. * * If postgres process, such as loader and COPY, is killed by "kill -9", * database will be rewound to the last checkpoint and recovery will * be performed using WAL. * * After the recovery, if there are xid's which have not been recorded * to WAL, such xid's will be reused. * * However, in the loader and COPY, data file is actually updated and * xid must not be reused. * * WAL entry with such xid can be added using XLogInsert(). However, * such entries are not really written to the disk immediately. * WAL entries are flushed to the disk by XLogFlush(), typically * when a transaction is commited. COPY prevents xid reuse by * this method. */ #if PG_VERSION_NUM >= 90100 if (ls->ls.create_cnt == 0 && !RELATION_IS_LOCAL(loader->base.rel) && !(loader->base.rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) ) { XLogRecPtr recptr; recptr = log_newpage(&ls->ls.rnode, MAIN_FORKNUM, ls->ls.exist_cnt, loader->blocks); XLogFlush(recptr); } #else if (ls->ls.create_cnt == 0 && !RELATION_IS_LOCAL(loader->base.rel) ) { XLogRecPtr recptr; recptr = log_newpage(&ls->ls.rnode, MAIN_FORKNUM, ls->ls.exist_cnt, loader->blocks); XLogFlush(recptr); } #endif /* * Write blocks. We might need to write multiple files on boundary of * relation segments. */ for (i = 0; i < num;) { char *buffer; int total; int written; int flush_num; BlockNumber relblks = LS_TOTAL_CNT(ls); /* Switch to the next file if the current file has been filled up. */ if (relblks % RELSEG_SIZE == 0) close_data_file(loader); if (loader->datafd == -1) loader->datafd = open_data_file(ls->ls.rnode, RELATION_IS_LOCAL(loader->base.rel), relblks); /* Number of blocks to be added to the current file. */ flush_num = Min(num - i, RELSEG_SIZE - relblks % RELSEG_SIZE); Assert(flush_num > 0); /* Write the last block number to the load status file. */ UpdateLSF(loader, flush_num); #if PG_VERSION_NUM >= 90300 /* If we need a checksum, add it */ if (DataChecksumsEnabled()){ int j = 0; Page contained_page; for ( j=0; j<flush_num; j++ ) { contained_page = GetTargetPage(loader,j); ((PageHeader) contained_page)->pd_checksum = pg_checksum_page((char *) contained_page, LS_TOTAL_CNT(ls) - 1 - j); } } #endif /* * Flush flush_num data block to the current file. * Then the current file size becomes RELSEG_SIZE self->blocks. */ buffer = loader->blocks + BLCKSZ * i; total = BLCKSZ * flush_num; written = 0; while (total > 0) { int len = write(loader->datafd, buffer + written, total); if (len == -1) { /* fatal error, do not want to write blocks anymore */ ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to data file: %m"))); } written += len; total -= len; } i += flush_num; } /* * NOTICE: Be sure reset curblk to 0 and reinitialize recycled page * if you will continue to use blocks. */ }