Datum pgrowlocks(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; HeapScanDesc scan; HeapTuple tuple; TupleDesc tupdesc; AttInMetadata *attinmeta; Datum result; MyData *mydata; Relation rel; if (SRF_IS_FIRSTCALL()) { text *relname; RangeVar *relrv; MemoryContext oldcontext; AclResult aclresult; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->attinmeta = attinmeta; relname = PG_GETARG_TEXT_P(0); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = heap_openrv(relrv, AccessShareLock); /* check permissions: must have SELECT on table */ aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), ACL_SELECT); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, RelationGetRelationName(rel)); scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL); mydata = palloc(sizeof(*mydata)); mydata->rel = rel; mydata->scan = scan; mydata->ncolumns = tupdesc->natts; funcctx->user_fctx = mydata; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); attinmeta = funcctx->attinmeta; mydata = (MyData *) funcctx->user_fctx; scan = mydata->scan; /* scan the relation */ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { HTSU_Result htsu; TransactionId xmax; uint16 infomask; /* must hold a buffer lock to call HeapTupleSatisfiesUpdate */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), scan->rs_cbuf); xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; /* * a tuple is locked if HTSU returns BeingUpdated, and if it returns * MayBeUpdated but the Xmax is valid and pointing at us. */ if (htsu == HeapTupleBeingUpdated || (htsu == HeapTupleMayBeUpdated && !(infomask & HEAP_XMAX_INVALID) && !(infomask & HEAP_XMAX_IS_MULTI) && (xmax == GetCurrentTransactionIdIfAny()))) { char **values; values = (char **) palloc(mydata->ncolumns * sizeof(char *)); values[Atnum_tid] = (char *) DirectFunctionCall1(tidout, PointerGetDatum(&tuple->t_self)); values[Atnum_xmax] = palloc(NCHARS * sizeof(char)); snprintf(values[Atnum_xmax], NCHARS, "%d", xmax); if (infomask & HEAP_XMAX_IS_MULTI) { MultiXactMember *members; int nmembers; bool first = true; bool allow_old; values[Atnum_ismulti] = pstrdup("true"); allow_old = !(infomask & HEAP_LOCK_MASK) && (infomask & HEAP_XMAX_LOCK_ONLY); nmembers = GetMultiXactIdMembers(xmax, &members, allow_old); if (nmembers == -1) { values[Atnum_xids] = "{0}"; values[Atnum_modes] = "{transient upgrade status}"; values[Atnum_pids] = "{0}"; } else { int j; values[Atnum_xids] = palloc(NCHARS * nmembers); values[Atnum_modes] = palloc(NCHARS * nmembers); values[Atnum_pids] = palloc(NCHARS * nmembers); strcpy(values[Atnum_xids], "{"); strcpy(values[Atnum_modes], "{"); strcpy(values[Atnum_pids], "{"); for (j = 0; j < nmembers; j++) { char buf[NCHARS]; if (!first) { strcat(values[Atnum_xids], ","); strcat(values[Atnum_modes], ","); strcat(values[Atnum_pids], ","); } snprintf(buf, NCHARS, "%d", members[j].xid); strcat(values[Atnum_xids], buf); switch (members[j].status) { case MultiXactStatusUpdate: snprintf(buf, NCHARS, "Update"); break; case MultiXactStatusNoKeyUpdate: snprintf(buf, NCHARS, "No Key Update"); break; case MultiXactStatusForUpdate: snprintf(buf, NCHARS, "For Update"); break; case MultiXactStatusForNoKeyUpdate: snprintf(buf, NCHARS, "For No Key Update"); break; case MultiXactStatusForShare: snprintf(buf, NCHARS, "Share"); break; case MultiXactStatusForKeyShare: snprintf(buf, NCHARS, "Key Share"); break; } strcat(values[Atnum_modes], buf); snprintf(buf, NCHARS, "%d", BackendXidGetPid(members[j].xid)); strcat(values[Atnum_pids], buf); first = false; } strcat(values[Atnum_xids], "}"); strcat(values[Atnum_modes], "}"); strcat(values[Atnum_pids], "}"); } } else { values[Atnum_ismulti] = pstrdup("false"); values[Atnum_xids] = palloc(NCHARS * sizeof(char)); snprintf(values[Atnum_xids], NCHARS, "{%d}", xmax); values[Atnum_modes] = palloc(NCHARS); if (infomask & HEAP_XMAX_LOCK_ONLY) { if (HEAP_XMAX_IS_SHR_LOCKED(infomask)) snprintf(values[Atnum_modes], NCHARS, "{For Share}"); else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) snprintf(values[Atnum_modes], NCHARS, "{For Key Share}"); else if (HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { if (tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) snprintf(values[Atnum_modes], NCHARS, "{For Update}"); else snprintf(values[Atnum_modes], NCHARS, "{For No Key Update}"); } else /* neither keyshare nor exclusive bit it set */ snprintf(values[Atnum_modes], NCHARS, "{transient upgrade status}"); } else { if (tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) snprintf(values[Atnum_modes], NCHARS, "{Update}"); else snprintf(values[Atnum_modes], NCHARS, "{No Key Update}"); } values[Atnum_pids] = palloc(NCHARS * sizeof(char)); snprintf(values[Atnum_pids], NCHARS, "{%d}", BackendXidGetPid(xmax)); } LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* build a tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); /* make the tuple into a datum */ result = HeapTupleGetDatum(tuple); /* * no need to pfree what we allocated; it's on a short-lived * memory context anyway */ SRF_RETURN_NEXT(funcctx, result); } else { LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); } } heap_endscan(scan); heap_close(mydata->rel, AccessShareLock); SRF_RETURN_DONE(funcctx); }
/* * Assemble a WAL record from the registered data and buffers into an * XLogRecData chain, ready for insertion with XLogInsertRecord(). * * The record header fields are filled in, except for the xl_prev field. The * calculated CRC does not include the record header yet. * * If there are any registered buffers, and a full-page image was not taken * of all of them, *fpw_lsn is set to the lowest LSN among such pages. This * signals that the assembled record is only good for insertion on the * assumption that the RedoRecPtr and doPageWrites values were up-to-date. */ static XLogRecData * XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecPtr RedoRecPtr, bool doPageWrites, XLogRecPtr *fpw_lsn) { XLogRecData *rdt; uint32 total_len = 0; int block_id; pg_crc32c rdata_crc; registered_buffer *prev_regbuf = NULL; XLogRecData *rdt_datas_last; XLogRecord *rechdr; char *scratch = hdr_scratch; /* * Note: this function can be called multiple times for the same record. * All the modifications we do to the rdata chains below must handle that. */ /* The record begins with the fixed-size header */ rechdr = (XLogRecord *) scratch; scratch += SizeOfXLogRecord; hdr_rdt.next = NULL; rdt_datas_last = &hdr_rdt; hdr_rdt.data = hdr_scratch; /* * Make an rdata chain containing all the data portions of all block * references. This includes the data for full-page images. Also append * the headers for the block references in the scratch buffer. */ *fpw_lsn = InvalidXLogRecPtr; for (block_id = 0; block_id < max_registered_block_id; block_id++) { registered_buffer *regbuf = ®istered_buffers[block_id]; bool needs_backup; bool needs_data; XLogRecordBlockHeader bkpb; XLogRecordBlockImageHeader bimg; XLogRecordBlockCompressHeader cbimg = {0}; bool samerel; bool is_compressed = false; if (!regbuf->in_use) continue; /* Determine if this block needs to be backed up */ if (regbuf->flags & REGBUF_FORCE_IMAGE) needs_backup = true; else if (regbuf->flags & REGBUF_NO_IMAGE) needs_backup = false; else if (!doPageWrites) needs_backup = false; else { /* * We assume page LSN is first data on *every* page that can be * passed to XLogInsert, whether it has the standard page layout * or not. */ XLogRecPtr page_lsn = PageGetLSN(regbuf->page); needs_backup = (page_lsn <= RedoRecPtr); if (!needs_backup) { if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn) *fpw_lsn = page_lsn; } } /* Determine if the buffer data needs to included */ if (regbuf->rdata_len == 0) needs_data = false; else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0) needs_data = true; else needs_data = !needs_backup; bkpb.id = block_id; bkpb.fork_flags = regbuf->forkno; bkpb.data_length = 0; if ((regbuf->flags & REGBUF_WILL_INIT) == REGBUF_WILL_INIT) bkpb.fork_flags |= BKPBLOCK_WILL_INIT; if (needs_backup) { Page page = regbuf->page; uint16 compressed_len; /* * The page needs to be backed up, so calculate its hole length * and offset. */ if (regbuf->flags & REGBUF_STANDARD) { /* Assume we can omit data between pd_lower and pd_upper */ uint16 lower = ((PageHeader) page)->pd_lower; uint16 upper = ((PageHeader) page)->pd_upper; if (lower >= SizeOfPageHeaderData && upper > lower && upper <= BLCKSZ) { bimg.hole_offset = lower; cbimg.hole_length = upper - lower; } else { /* No "hole" to compress out */ bimg.hole_offset = 0; cbimg.hole_length = 0; } } else { /* Not a standard page header, don't try to eliminate "hole" */ bimg.hole_offset = 0; cbimg.hole_length = 0; } /* * Try to compress a block image if wal_compression is enabled */ if (wal_compression) { is_compressed = XLogCompressBackupBlock(page, bimg.hole_offset, cbimg.hole_length, regbuf->compressed_page, &compressed_len); } /* * Fill in the remaining fields in the XLogRecordBlockHeader * struct */ bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE; /* * Construct XLogRecData entries for the page content. */ rdt_datas_last->next = ®buf->bkp_rdatas[0]; rdt_datas_last = rdt_datas_last->next; bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE; if (is_compressed) { bimg.length = compressed_len; bimg.bimg_info |= BKPIMAGE_IS_COMPRESSED; rdt_datas_last->data = regbuf->compressed_page; rdt_datas_last->len = compressed_len; } else { bimg.length = BLCKSZ - cbimg.hole_length; if (cbimg.hole_length == 0) { rdt_datas_last->data = page; rdt_datas_last->len = BLCKSZ; } else { /* must skip the hole */ rdt_datas_last->data = page; rdt_datas_last->len = bimg.hole_offset; rdt_datas_last->next = ®buf->bkp_rdatas[1]; rdt_datas_last = rdt_datas_last->next; rdt_datas_last->data = page + (bimg.hole_offset + cbimg.hole_length); rdt_datas_last->len = BLCKSZ - (bimg.hole_offset + cbimg.hole_length); } } total_len += bimg.length; } if (needs_data) { /* * Link the caller-supplied rdata chain for this buffer to the * overall list. */ bkpb.fork_flags |= BKPBLOCK_HAS_DATA; bkpb.data_length = regbuf->rdata_len; total_len += regbuf->rdata_len; rdt_datas_last->next = regbuf->rdata_head; rdt_datas_last = regbuf->rdata_tail; } if (prev_regbuf && RelFileNodeEquals(regbuf->rnode, prev_regbuf->rnode)) { samerel = true; bkpb.fork_flags |= BKPBLOCK_SAME_REL; } else samerel = false; prev_regbuf = regbuf; /* Ok, copy the header to the scratch buffer */ memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader); scratch += SizeOfXLogRecordBlockHeader; if (needs_backup) { memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader); scratch += SizeOfXLogRecordBlockImageHeader; if (cbimg.hole_length != 0 && is_compressed) { memcpy(scratch, &cbimg, SizeOfXLogRecordBlockCompressHeader); scratch += SizeOfXLogRecordBlockCompressHeader; } } if (!samerel) { memcpy(scratch, ®buf->rnode, sizeof(RelFileNode)); scratch += sizeof(RelFileNode); } memcpy(scratch, ®buf->block, sizeof(BlockNumber)); scratch += sizeof(BlockNumber); } /* followed by the record's origin, if any */ if (include_origin && replorigin_sesssion_origin != InvalidRepOriginId) { *(scratch++) = XLR_BLOCK_ID_ORIGIN; memcpy(scratch, &replorigin_sesssion_origin, sizeof(replorigin_sesssion_origin)); scratch += sizeof(replorigin_sesssion_origin); } /* followed by main data, if any */ if (mainrdata_len > 0) { if (mainrdata_len > 255) { *(scratch++) = XLR_BLOCK_ID_DATA_LONG; memcpy(scratch, &mainrdata_len, sizeof(uint32)); scratch += sizeof(uint32); } else { *(scratch++) = XLR_BLOCK_ID_DATA_SHORT; *(scratch++) = (uint8) mainrdata_len; } rdt_datas_last->next = mainrdata_head; rdt_datas_last = mainrdata_last; total_len += mainrdata_len; } rdt_datas_last->next = NULL; hdr_rdt.len = (scratch - hdr_scratch); total_len += hdr_rdt.len; /* * Calculate CRC of the data * * Note that the record header isn't added into the CRC initially since we * don't know the prev-link yet. Thus, the CRC will represent the CRC of * the whole record in the order: rdata, then backup blocks, then record * header. */ INIT_CRC32C(rdata_crc); COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord); for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next) COMP_CRC32C(rdata_crc, rdt->data, rdt->len); /* * Fill in the fields in the record header. Prev-link is filled in later, * once we know where in the WAL the record will be inserted. The CRC does * not include the record header yet. */ rechdr->xl_xid = GetCurrentTransactionIdIfAny(); rechdr->xl_tot_len = total_len; rechdr->xl_info = info; rechdr->xl_rmid = rmid; rechdr->xl_prev = InvalidXLogRecPtr; rechdr->xl_crc = rdata_crc; return &hdr_rdt; }
static void DtmXactCallback(XactEvent event, void *arg) { //XTM_INFO("%d: DtmXactCallbackevent=%d nextxid=%d\n", getpid(), event, DtmNextXid); switch (event) { case XACT_EVENT_START: //XTM_INFO("%d: normal=%d, initialized=%d, replication=%d, bgw=%d, vacuum=%d\n", // getpid(), IsNormalProcessingMode(), dtm->initialized, MMDoReplication, IsBackgroundWorker, IsAutoVacuumWorkerProcess()); if (IsNormalProcessingMode() && dtm->initialized && MMDoReplication && !am_walsender && !IsBackgroundWorker && !IsAutoVacuumWorkerProcess()) { MMBeginTransaction(); } break; #if 0 case XACT_EVENT_PRE_COMMIT: case XACT_EVENT_PARALLEL_PRE_COMMIT: { TransactionId xid = GetCurrentTransactionIdIfAny(); if (!MMIsDistributedTrans && TransactionIdIsValid(xid)) { XTM_INFO("%d: Will ignore transaction %u\n", getpid(), xid); MMMarkTransAsLocal(xid); } break; } #endif case XACT_EVENT_COMMIT: case XACT_EVENT_ABORT: if (TransactionIdIsValid(DtmNextXid)) { if (!DtmVoted) { ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } if (event == XACT_EVENT_COMMIT) { /* * Now transaction status is already written in CLOG, * so we can remove information about it from hash table */ LWLockAcquire(dtm->hashLock, LW_EXCLUSIVE); hash_search(xid_in_doubt, &DtmNextXid, HASH_REMOVE, NULL); LWLockRelease(dtm->hashLock); } #if 0 /* should be handled now using DtmVoted flag */ else { /* * Transaction at the node can be aborted because of transaction failure at some other node * before it starts doing anything and assigned Xid, in this case Postgres is not calling SetTransactionStatus, * so we have to send report to DTMD here */ if (!TransactionIdIsValid(GetCurrentTransactionIdIfAny())) { XTM_INFO("%d: abort transation on DTMD\n", getpid()); ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } } #endif DtmNextXid = InvalidTransactionId; DtmLastSnapshot = NULL; } MMIsDistributedTrans = false; break; default: break; } }