/* * Make encoding (compresstype = none, blocksize=...). We do this for the case * where the user has not specified an encoding for the column. */ List * default_column_encoding_clause(void) { DefElem *e1 = makeDefElem("compresstype", (Node *)makeString("none")); DefElem *e2 = makeDefElem("blocksize", (Node *)makeInteger(DEFAULT_APPENDONLY_BLOCK_SIZE)); DefElem *e3 = makeDefElem("compresslevel", (Node *)makeInteger(0)); return list_make3(e1, e2, e3); }
/* * CopyTaskFilesFromDirectory finds all files in the given directory, except for * those having an attempt suffix. The function then copies these files into the * database table identified by the given schema and table name. */ static void CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName, StringInfo sourceDirectoryName) { const char *directoryName = sourceDirectoryName->data; struct dirent *directoryEntry = NULL; uint64 copiedRowTotal = 0; DIR *directory = AllocateDir(directoryName); if (directory == NULL) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", directoryName))); } directoryEntry = ReadDir(directory, directoryName); for (; directoryEntry != NULL; directoryEntry = ReadDir(directory, directoryName)) { const char *baseFilename = directoryEntry->d_name; const char *queryString = NULL; StringInfo fullFilename = NULL; RangeVar *relation = NULL; CopyStmt *copyStatement = NULL; uint64 copiedRowCount = 0; /* if system file or lingering task file, skip it */ if (strncmp(baseFilename, ".", MAXPGPATH) == 0 || strncmp(baseFilename, "..", MAXPGPATH) == 0 || strstr(baseFilename, ATTEMPT_FILE_SUFFIX) != NULL) { continue; } fullFilename = makeStringInfo(); appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename); /* build relation object and copy statement */ relation = makeRangeVar(schemaName->data, relationName->data, -1); copyStatement = CopyStatement(relation, fullFilename->data); if (BinaryWorkerCopyFormat) { DefElem *copyOption = makeDefElem("format", (Node *) makeString("binary")); copyStatement->options = list_make1(copyOption); } DoCopy(copyStatement, queryString, &copiedRowCount); copiedRowTotal += copiedRowCount; CommandCounterIncrement(); } ereport(DEBUG2, (errmsg("copied " UINT64_FORMAT " rows into table: \"%s.%s\"", copiedRowTotal, schemaName->data, relationName->data))); FreeDir(directory); }
/* * Make encoding (compresstype = none, blocksize=...) based on * currently configured defaults. */ List * default_column_encoding_clause(void) { const StdRdOptions *ao_opts = currentAOStorageOptions(); DefElem *e1, *e2, *e3; if (ao_opts->compresstype) { e1 = makeDefElem("compresstype", (Node *)makeString(pstrdup(ao_opts->compresstype))); } else { e1 = makeDefElem("compresstype", (Node *)makeString("none")); } e2 = makeDefElem("blocksize", (Node *)makeInteger(ao_opts->blocksize)); e3 = makeDefElem("compresslevel", (Node *)makeInteger(ao_opts->compresslevel)); return list_make3(e1, e2, e3); }
/* * pglogBeginForeignScan * Initiate access to the log by creating CopyState */ static void pglogBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; PgLogExecutionState *festate; elog(DEBUG1,"Entering function %s",__func__); /* * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return; /* Initialise the execution state */ festate = (PgLogExecutionState *) palloc(sizeof(PgLogExecutionState)); festate->filenames = initLogFileNames(Pglog_directory); festate->i = 0; /* Forces CSV format */ festate->options = list_make1(makeDefElem("format", (Node *) makeString("csv"))); /* Add any options from the plan (currently only convert_selectively) */ festate->options = list_concat(festate->options, plan->fdw_private); /* Remember scan memory context */ festate->scan_cxt = CurrentMemoryContext; /* * Create CopyState from FDW options. We always acquire all columns, so * as to match the expected ScanTupleSlot signature. * Starts from the first file in the list. */ festate->cstate = NULL; BeginNextCopy(node->ss.ss_currentRelation, festate); elog(DEBUG1,"Copy state %p",festate->cstate); /* * Save state in node->fdw_state. We must save enough information to call * BeginCopyFrom() again. */ node->fdw_state = (void *) festate; }
/* * pglogGetForeignPaths * Create possible access paths for a scan on the foreign table * * Currently we don't support any push-down feature, so there is only one * possible access path, which simply returns all records in the order in * the data file. */ static void pglogGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) { PgLogPlanState *fdw_private = (PgLogPlanState *) baserel->fdw_private; Cost startup_cost; Cost total_cost; List *columns; List *coptions = NIL; elog(DEBUG1,"Entering function %s",__func__); /* Decide whether to selectively perform binary conversion */ if (check_selective_binary_conversion(baserel, foreigntableid, &columns)) coptions = list_make1(makeDefElem("convert_selectively", (Node *) columns)); /* Estimate costs */ estimate_costs(root, baserel, fdw_private, &startup_cost, &total_cost); /* * Create a ForeignPath node and add it as only possible path. We use the * fdw_private list of the path to carry the convert_selectively option; * it will be propagated into the fdw_private list of the Plan node. */ add_path(baserel, (Path *) create_foreignscan_path(root, baserel, baserel->rows, startup_cost, total_cost, NIL, /* no pathkeys */ NULL, /* no outer rel either */ coptions)); /* * If data file was sorted, and we knew it somehow, we could insert * appropriate pathkeys into the ForeignPath node to tell the planner * that. */ }
/* * Create a DefElem setting "oids" to the specified value. */ DefElem * defWithOids(bool value) { return makeDefElem("oids", (Node *) makeInteger(value)); }
/* * Helper function for the various SQL callable logical decoding functions. */ static Datum pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary) { Name name; XLogRecPtr upto_lsn; int32 upto_nchanges; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; MemoryContext per_query_ctx; MemoryContext oldcontext; XLogRecPtr end_of_wal; XLogRecPtr startptr; LogicalDecodingContext *ctx; ResourceOwner old_resowner = CurrentResourceOwner; ArrayType *arr; Size ndim; List *options = NIL; DecodingOutputState *p; check_permissions(); CheckLogicalDecodingRequirements(); if (PG_ARGISNULL(0)) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("slot name must not be null"))); name = PG_GETARG_NAME(0); if (PG_ARGISNULL(1)) upto_lsn = InvalidXLogRecPtr; else upto_lsn = PG_GETARG_LSN(1); if (PG_ARGISNULL(2)) upto_nchanges = InvalidXLogRecPtr; else upto_nchanges = PG_GETARG_INT32(2); if (PG_ARGISNULL(3)) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("options array must not be null"))); arr = PG_GETARG_ARRAYTYPE_P(3); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* state to write output to */ p = palloc0(sizeof(DecodingOutputState)); p->binary_output = binary; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); /* Deconstruct options array */ ndim = ARR_NDIM(arr); if (ndim > 1) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must be one-dimensional"))); } else if (array_contains_nulls(arr)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must not contain nulls"))); } else if (ndim == 1) { int nelems; Datum *datum_opts; int i; Assert(ARR_ELEMTYPE(arr) == TEXTOID); deconstruct_array(arr, TEXTOID, -1, false, 'i', &datum_opts, NULL, &nelems); if (nelems % 2 != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must have even number of elements"))); for (i = 0; i < nelems; i += 2) { char *name = TextDatumGetCString(datum_opts[i]); char *opt = TextDatumGetCString(datum_opts[i + 1]); options = lappend(options, makeDefElem(name, (Node *) makeString(opt), -1)); } } p->tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = p->tupstore; rsinfo->setDesc = p->tupdesc; /* * Compute the current end-of-wal and maintain ThisTimeLineID. * RecoveryInProgress() will update ThisTimeLineID on promotion. */ if (!RecoveryInProgress()) end_of_wal = GetFlushRecPtr(); else end_of_wal = GetXLogReplayRecPtr(&ThisTimeLineID); ReplicationSlotAcquire(NameStr(*name), true); PG_TRY(); { /* restart at slot's confirmed_flush */ ctx = CreateDecodingContext(InvalidXLogRecPtr, options, false, logical_read_local_xlog_page, LogicalOutputPrepareWrite, LogicalOutputWrite, NULL); MemoryContextSwitchTo(oldcontext); /* * Check whether the output plugin writes textual output if that's * what we need. */ if (!binary && ctx->options.output_type !=OUTPUT_PLUGIN_TEXTUAL_OUTPUT) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("logical decoding output plugin \"%s\" produces binary output, but function \"%s\" expects textual data", NameStr(MyReplicationSlot->data.plugin), format_procedure(fcinfo->flinfo->fn_oid)))); ctx->output_writer_private = p; /* * Decoding of WAL must start at restart_lsn so that the entirety of * xacts that committed after the slot's confirmed_flush can be * accumulated into reorder buffers. */ startptr = MyReplicationSlot->data.restart_lsn; CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding"); /* invalidate non-timetravel entries */ InvalidateSystemCaches(); /* Decode until we run out of records */ while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) || (ctx->reader->EndRecPtr != InvalidXLogRecPtr && ctx->reader->EndRecPtr < end_of_wal)) { XLogRecord *record; char *errm = NULL; record = XLogReadRecord(ctx->reader, startptr, &errm); if (errm) elog(ERROR, "%s", errm); /* * Now that we've set up the xlog reader state, subsequent calls * pass InvalidXLogRecPtr to say "continue from last record" */ startptr = InvalidXLogRecPtr; /* * The {begin_txn,change,commit_txn}_wrapper callbacks above will * store the description into our tuplestore. */ if (record != NULL) LogicalDecodingProcessRecord(ctx, ctx->reader); /* check limits */ if (upto_lsn != InvalidXLogRecPtr && upto_lsn <= ctx->reader->EndRecPtr) break; if (upto_nchanges != 0 && upto_nchanges <= p->returned_rows) break; CHECK_FOR_INTERRUPTS(); } tuplestore_donestoring(tupstore); CurrentResourceOwner = old_resowner; /* * Next time, start where we left off. (Hunting things, the family * business..) */ if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm) { LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr); /* * If only the confirmed_flush_lsn has changed the slot won't get * marked as dirty by the above. Callers on the walsender * interface are expected to keep track of their own progress and * don't need it written out. But SQL-interface users cannot * specify their own start positions and it's harder for them to * keep track of their progress, so we should make more of an * effort to save it for them. * * Dirty the slot so it's written out at the next checkpoint. * We'll still lose its position on crash, as documented, but it's * better than always losing the position even on clean restart. */ ReplicationSlotMarkDirty(); } /* free context, call shutdown callback */ FreeDecodingContext(ctx); ReplicationSlotRelease(); InvalidateSystemCaches(); } PG_CATCH(); { /* clear all timetravel entries */ InvalidateSystemCaches(); PG_RE_THROW(); } PG_END_TRY(); return (Datum) 0; }
/* * Helper function for the various SQL callable logical decoding functions. */ static Datum pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary) { Name name = PG_GETARG_NAME(0); XLogRecPtr upto_lsn; int32 upto_nchanges; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; MemoryContext per_query_ctx; MemoryContext oldcontext; XLogRecPtr end_of_wal; XLogRecPtr startptr; LogicalDecodingContext *ctx; ResourceOwner old_resowner = CurrentResourceOwner; ArrayType *arr; Size ndim; List *options = NIL; DecodingOutputState *p; if (PG_ARGISNULL(1)) upto_lsn = InvalidXLogRecPtr; else upto_lsn = PG_GETARG_LSN(1); if (PG_ARGISNULL(2)) upto_nchanges = InvalidXLogRecPtr; else upto_nchanges = PG_GETARG_INT32(2); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* state to write output to */ p = palloc0(sizeof(DecodingOutputState)); p->binary_output = binary; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); check_permissions(); CheckLogicalDecodingRequirements(); arr = PG_GETARG_ARRAYTYPE_P(3); ndim = ARR_NDIM(arr); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); if (ndim > 1) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must be one-dimensional"))); } else if (array_contains_nulls(arr)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must not contain nulls"))); } else if (ndim == 1) { int nelems; Datum *datum_opts; int i; Assert(ARR_ELEMTYPE(arr) == TEXTOID); deconstruct_array(arr, TEXTOID, -1, false, 'i', &datum_opts, NULL, &nelems); if (nelems % 2 != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must have even number of elements"))); for (i = 0; i < nelems; i += 2) { char *name = TextDatumGetCString(datum_opts[i]); char *opt = TextDatumGetCString(datum_opts[i + 1]); options = lappend(options, makeDefElem(name, (Node *) makeString(opt))); } } p->tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = p->tupstore; rsinfo->setDesc = p->tupdesc; /* compute the current end-of-wal */ if (!RecoveryInProgress()) end_of_wal = GetFlushRecPtr(); else end_of_wal = GetXLogReplayRecPtr(NULL); CheckLogicalDecodingRequirements(); ReplicationSlotAcquire(NameStr(*name)); PG_TRY(); { ctx = CreateDecodingContext(InvalidXLogRecPtr, options, logical_read_local_xlog_page, LogicalOutputPrepareWrite, LogicalOutputWrite); MemoryContextSwitchTo(oldcontext); /* * Check whether the output pluggin writes textual output if that's * what we need. */ if (!binary && ctx->options.output_type != OUTPUT_PLUGIN_TEXTUAL_OUTPUT) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("output plugin cannot produce binary output"))); ctx->output_writer_private = p; startptr = MyReplicationSlot->data.restart_lsn; CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding"); /* invalidate non-timetravel entries */ InvalidateSystemCaches(); while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) || (ctx->reader->EndRecPtr && ctx->reader->EndRecPtr < end_of_wal)) { XLogRecord *record; char *errm = NULL; record = XLogReadRecord(ctx->reader, startptr, &errm); if (errm) elog(ERROR, "%s", errm); startptr = InvalidXLogRecPtr; /* * The {begin_txn,change,commit_txn}_wrapper callbacks above will * store the description into our tuplestore. */ if (record != NULL) LogicalDecodingProcessRecord(ctx, record); /* check limits */ if (upto_lsn != InvalidXLogRecPtr && upto_lsn <= ctx->reader->EndRecPtr) break; if (upto_nchanges != 0 && upto_nchanges <= p->returned_rows) break; } } PG_CATCH(); { /* clear all timetravel entries */ InvalidateSystemCaches(); PG_RE_THROW(); } PG_END_TRY(); tuplestore_donestoring(tupstore); CurrentResourceOwner = old_resowner; /* * Next time, start where we left off. (Hunting things, the family * business..) */ if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm) LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr); /* free context, call shutdown callback */ FreeDecodingContext(ctx); ReplicationSlotRelease(); InvalidateSystemCaches(); return (Datum) 0; }