/** * @fn Datum repack_index_swap(PG_FUNCTION_ARGS) * @brief Swap out an original index on a table with the newly-created one. * * repack_index_swap(index) * * @param index Oid of the *original* index. * @retval void */ Datum repack_index_swap(PG_FUNCTION_ARGS) { Oid orig_idx_oid = PG_GETARG_OID(0); Oid repacked_idx_oid; StringInfoData str; SPITupleTable *tuptable; TupleDesc desc; HeapTuple tuple; /* authority check */ must_be_superuser("repack_index_swap"); /* connect to SPI manager */ repack_init(); initStringInfo(&str); /* Find the OID of our new index. */ appendStringInfo(&str, "SELECT oid FROM pg_class " "WHERE relname = 'index_%u' AND relkind = 'i'", orig_idx_oid); execute(SPI_OK_SELECT, str.data); if (SPI_processed != 1) elog(ERROR, "Could not find index 'index_%u', found " UINT64_FORMAT " matches", orig_idx_oid, (uint64) SPI_processed); tuptable = SPI_tuptable; desc = tuptable->tupdesc; tuple = tuptable->vals[0]; repacked_idx_oid = getoid(tuple, desc, 1); swap_heap_or_index_files(orig_idx_oid, repacked_idx_oid); SPI_finish(); PG_RETURN_VOID(); }
/** * @fn Datum reorg_trigger(PG_FUNCTION_ARGS) * @brief Insert a operation log into log-table. * * reorg_trigger(sql) * * @param sql SQL to insert a operation log into log-table. */ Datum reorg_trigger(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; TupleDesc desc; HeapTuple tuple; Datum values[2]; bool nulls[2] = { 0, 0 }; Oid argtypes[2]; const char *sql; /* authority check */ must_be_superuser("reorg_trigger"); /* make sure it's called as a trigger at all */ if (!CALLED_AS_TRIGGER(fcinfo) || !TRIGGER_FIRED_BEFORE(trigdata->tg_event) || !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event) || trigdata->tg_trigger->tgnargs != 1) elog(ERROR, "reorg_trigger: invalid trigger call"); /* retrieve parameters */ sql = trigdata->tg_trigger->tgargs[0]; desc = RelationGetDescr(trigdata->tg_relation); argtypes[0] = argtypes[1] = trigdata->tg_relation->rd_rel->reltype; /* connect to SPI manager */ reorg_init(); if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) { /* INSERT: (NULL, newtup) */ tuple = trigdata->tg_trigtuple; nulls[0] = true; values[1] = copy_tuple(tuple, desc); } else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) { /* DELETE: (oldtup, NULL) */ tuple = trigdata->tg_trigtuple; values[0] = copy_tuple(tuple, desc); nulls[1] = true; } else { /* UPDATE: (oldtup, newtup) */ tuple = trigdata->tg_newtuple; values[0] = copy_tuple(trigdata->tg_trigtuple, desc); values[1] = copy_tuple(tuple, desc); } /* INSERT INTO reorg.log VALUES ($1, $2) */ execute_with_args(SPI_OK_INSERT, sql, 2, argtypes, values, nulls); SPI_finish(); PG_RETURN_POINTER(tuple); }
/** * @fn Datum reorg_drop(PG_FUNCTION_ARGS) * @brief Delete temporarily objects. * * reorg_drop(oid, relname) * * @param oid Oid of target table. * @retval None. */ Datum reorg_drop(PG_FUNCTION_ARGS) { Oid oid = PG_GETARG_OID(0); const char *relname = get_quoted_relname(oid); const char *nspname = get_quoted_nspname(oid); /* authority check */ must_be_superuser("reorg_drop"); /* connect to SPI manager */ reorg_init(); /* * drop reorg trigger: We have already dropped the trigger in normal * cases, but it can be left on error. */ execute_with_format( SPI_OK_UTILITY, "DROP TRIGGER IF EXISTS z_reorg_trigger ON %s.%s CASCADE", nspname, relname); #if PG_VERSION_NUM < 80400 /* delete autovacuum settings */ execute_with_format( SPI_OK_DELETE, "DELETE FROM pg_catalog.pg_autovacuum v" " USING pg_class c, pg_namespace n" " WHERE relname IN ('log_%u', 'table_%u')" " AND n.nspname = 'reorg'" " AND c.relnamespace = n.oid" " AND v.vacrelid = c.oid", oid, oid); #endif /* drop log table */ execute_with_format( SPI_OK_UTILITY, "DROP TABLE IF EXISTS reorg.log_%u CASCADE", oid); /* drop temp table */ execute_with_format( SPI_OK_UTILITY, "DROP TABLE IF EXISTS reorg.table_%u CASCADE", oid); /* drop type for log table */ execute_with_format( SPI_OK_UTILITY, "DROP TYPE IF EXISTS reorg.pk_%u CASCADE", oid); SPI_finish(); PG_RETURN_VOID(); }
/** * @fn Datum reorg_swap(PG_FUNCTION_ARGS) * @brief Swapping relfilenode of tables and relation ids of toast tables * and toast indexes. * * reorg_swap(oid, relname) * * TODO: remove useless CommandCounterIncrement(). * * @param oid Oid of table of target. * @retval None. */ Datum reorg_swap(PG_FUNCTION_ARGS) { Oid oid = PG_GETARG_OID(0); const char *relname = get_quoted_relname(oid); const char *nspname = get_quoted_nspname(oid); Oid argtypes[1] = { OIDOID }; bool nulls[1] = { 0 }; Datum values[1]; SPITupleTable *tuptable; TupleDesc desc; HeapTuple tuple; uint32 records; uint32 i; Oid reltoastrelid1; Oid reltoastidxid1; Oid oid2; Oid reltoastrelid2; Oid reltoastidxid2; Oid owner1; Oid owner2; /* authority check */ must_be_superuser("reorg_swap"); /* connect to SPI manager */ reorg_init(); /* swap relfilenode and dependencies for tables. */ values[0] = ObjectIdGetDatum(oid); execute_with_args(SPI_OK_SELECT, "SELECT X.reltoastrelid, TX.reltoastidxid, X.relowner," " Y.oid, Y.reltoastrelid, TY.reltoastidxid, Y.relowner" " FROM pg_catalog.pg_class X LEFT JOIN pg_catalog.pg_class TX" " ON X.reltoastrelid = TX.oid," " pg_catalog.pg_class Y LEFT JOIN pg_catalog.pg_class TY" " ON Y.reltoastrelid = TY.oid" " WHERE X.oid = $1" " AND Y.oid = ('reorg.table_' || X.oid)::regclass", 1, argtypes, values, nulls); tuptable = SPI_tuptable; desc = tuptable->tupdesc; records = SPI_processed; if (records == 0) elog(ERROR, "reorg_swap : no swap target"); tuple = tuptable->vals[0]; reltoastrelid1 = getoid(tuple, desc, 1); reltoastidxid1 = getoid(tuple, desc, 2); owner1 = getoid(tuple, desc, 3); oid2 = getoid(tuple, desc, 4); reltoastrelid2 = getoid(tuple, desc, 5); reltoastidxid2 = getoid(tuple, desc, 6); owner2 = getoid(tuple, desc, 7); /* change owner of new relation to original owner */ if (owner1 != owner2) { ATExecChangeOwner(oid2, owner1, true, AccessExclusiveLock); CommandCounterIncrement(); } /* swap tables. */ swap_heap_or_index_files(oid, oid2); CommandCounterIncrement(); /* swap indexes. */ values[0] = ObjectIdGetDatum(oid); execute_with_args(SPI_OK_SELECT, "SELECT X.oid, Y.oid" " FROM pg_catalog.pg_index I," " pg_catalog.pg_class X," " pg_catalog.pg_class Y" " WHERE I.indrelid = $1" " AND I.indexrelid = X.oid" " AND I.indisvalid" " AND Y.oid = ('reorg.index_' || X.oid)::regclass", 1, argtypes, values, nulls); tuptable = SPI_tuptable; desc = tuptable->tupdesc; records = SPI_processed; for (i = 0; i < records; i++) { Oid idx1, idx2; tuple = tuptable->vals[i]; idx1 = getoid(tuple, desc, 1); idx2 = getoid(tuple, desc, 2); swap_heap_or_index_files(idx1, idx2); CommandCounterIncrement(); } /* swap names for toast tables and toast indexes */ if (reltoastrelid1 == InvalidOid) { if (reltoastidxid1 != InvalidOid || reltoastrelid2 != InvalidOid || reltoastidxid2 != InvalidOid) elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u", reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2); /* do nothing */ } else if (reltoastrelid2 == InvalidOid) { char name[NAMEDATALEN]; if (reltoastidxid1 == InvalidOid || reltoastidxid2 != InvalidOid) elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u", reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2); /* rename X to Y */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); } else if (reltoastrelid1 != InvalidOid) { char name[NAMEDATALEN]; int pid = getpid(); /* rename X to TEMP */ snprintf(name, NAMEDATALEN, "pg_toast_pid%d", pid); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_pid%d_index", pid); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); /* rename Y to X */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid); RENAME_REL(reltoastrelid2, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid); RENAME_REL(reltoastidxid2, name); CommandCounterIncrement(); /* rename TEMP to Y */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); } /* drop reorg trigger */ execute_with_format( SPI_OK_UTILITY, "DROP TRIGGER IF EXISTS z_reorg_trigger ON %s.%s CASCADE", nspname, relname); SPI_finish(); PG_RETURN_VOID(); }
/** * @fn Datum reorg_apply(PG_FUNCTION_ARGS) * @brief Apply operations in log table into temp table. * * reorg_apply(sql_peek, sql_insert, sql_delete, sql_update, sql_pop, count) * * @param sql_peek SQL to pop tuple from log table. * @param sql_insert SQL to insert into temp table. * @param sql_delete SQL to delete from temp table. * @param sql_update SQL to update temp table. * @param sql_pop SQL to delete tuple from log table. * @param count Max number of operations, or no count iff <=0. * @retval Number of performed operations. */ Datum reorg_apply(PG_FUNCTION_ARGS) { #define DEFAULT_PEEK_COUNT 1000 const char *sql_peek = PG_GETARG_CSTRING(0); const char *sql_insert = PG_GETARG_CSTRING(1); const char *sql_delete = PG_GETARG_CSTRING(2); const char *sql_update = PG_GETARG_CSTRING(3); const char *sql_pop = PG_GETARG_CSTRING(4); int32 count = PG_GETARG_INT32(5); SPIPlanPtr plan_peek = NULL; SPIPlanPtr plan_insert = NULL; SPIPlanPtr plan_delete = NULL; SPIPlanPtr plan_update = NULL; SPIPlanPtr plan_pop = NULL; uint32 n, i; Oid argtypes_peek[1] = { INT4OID }; Datum values_peek[1]; bool nulls_peek[1] = { 0 }; /* authority check */ must_be_superuser("reorg_apply"); /* connect to SPI manager */ reorg_init(); /* peek tuple in log */ plan_peek = reorg_prepare(sql_peek, 1, argtypes_peek); for (n = 0;;) { int ntuples; SPITupleTable *tuptable; TupleDesc desc; Oid argtypes[3]; /* id, pk, row */ Datum values[3]; /* id, pk, row */ bool nulls[3]; /* id, pk, row */ /* peek tuple in log */ if (count == 0) values_peek[0] = Int32GetDatum(DEFAULT_PEEK_COUNT); else values_peek[0] = Int32GetDatum(Min(count - n, DEFAULT_PEEK_COUNT)); execute_plan(SPI_OK_SELECT, plan_peek, values_peek, nulls_peek); if (SPI_processed <= 0) break; /* copy tuptable because we will call other sqls. */ ntuples = SPI_processed; tuptable = SPI_tuptable; desc = tuptable->tupdesc; argtypes[0] = SPI_gettypeid(desc, 1); /* id */ argtypes[1] = SPI_gettypeid(desc, 2); /* pk */ argtypes[2] = SPI_gettypeid(desc, 3); /* row */ for (i = 0; i < ntuples; i++, n++) { HeapTuple tuple; tuple = tuptable->vals[i]; values[0] = SPI_getbinval(tuple, desc, 1, &nulls[0]); values[1] = SPI_getbinval(tuple, desc, 2, &nulls[1]); values[2] = SPI_getbinval(tuple, desc, 3, &nulls[2]); if (nulls[1]) { /* INSERT */ if (plan_insert == NULL) plan_insert = reorg_prepare(sql_insert, 1, &argtypes[2]); execute_plan(SPI_OK_INSERT, plan_insert, &values[2], &nulls[2]); } else if (nulls[2]) { /* DELETE */ if (plan_delete == NULL) plan_delete = reorg_prepare(sql_delete, 1, &argtypes[1]); execute_plan(SPI_OK_DELETE, plan_delete, &values[1], &nulls[1]); } else { /* UPDATE */ if (plan_update == NULL) plan_update = reorg_prepare(sql_update, 2, &argtypes[1]); execute_plan(SPI_OK_UPDATE, plan_update, &values[1], &nulls[1]); } } /* delete tuple in log */ if (plan_pop == NULL) plan_pop = reorg_prepare(sql_pop, 1, argtypes); execute_plan(SPI_OK_DELETE, plan_pop, values, nulls); SPI_freetuptable(tuptable); } SPI_finish(); PG_RETURN_INT32(n); }
/** * @fn Datum repack_drop(PG_FUNCTION_ARGS) * @brief Delete temporarily objects. * * repack_drop(oid, relname) * * @param oid Oid of target table. * @retval None. */ Datum repack_drop(PG_FUNCTION_ARGS) { Oid oid = PG_GETARG_OID(0); int numobj = PG_GETARG_INT32(1); const char *relname = get_quoted_relname(oid); const char *nspname = get_quoted_nspname(oid); if (!(relname && nspname)) { elog(ERROR, "table name not found for OID %u", oid); PG_RETURN_VOID(); } /* authority check */ must_be_superuser("repack_drop"); /* connect to SPI manager */ repack_init(); /* * To prevent concurrent lockers of the repack target table from causing * deadlocks, take an exclusive lock on it. Consider that the following * commands take exclusive lock on tables log_xxx and the target table * itself when deleting the repack_trigger on it, while concurrent * updaters require row exclusive lock on the target table and in * addition, on the log_xxx table, because of the trigger. * * Consider how a deadlock could occur - if the DROP TABLE repack.log_%u * gets a lock on log_%u table before a concurrent updater could get it * but after the updater has obtained a lock on the target table, the * subsequent DROP TRIGGER ... ON target-table would report a deadlock as * it finds itself waiting for a lock on target-table held by the updater, * which in turn, is waiting for lock on log_%u table. * * Fixes deadlock mentioned in the Github issue #55. * * Skip the lock if we are not going to do anything. * Otherwise, if repack gets accidentally run twice for the same table * at the same time, the second repack, in order to perform * a pointless cleanup, has to wait until the first one completes. * This adds an ACCESS EXCLUSIVE lock request into the queue * making the table effectively inaccessible for any other backend. */ if (numobj > 0) { execute_with_format( SPI_OK_UTILITY, "LOCK TABLE %s.%s IN ACCESS EXCLUSIVE MODE", nspname, relname); } /* drop log table: must be done before dropping the pk type, * since the log table is dependent on the pk type. (That's * why we check numobj > 1 here.) */ if (numobj > 1) { execute_with_format( SPI_OK_UTILITY, "DROP TABLE IF EXISTS repack.log_%u CASCADE", oid); --numobj; } /* drop type for pk type */ if (numobj > 0) { execute_with_format( SPI_OK_UTILITY, "DROP TYPE IF EXISTS repack.pk_%u", oid); --numobj; } /* * drop repack trigger: We have already dropped the trigger in normal * cases, but it can be left on error. */ if (numobj > 0) { execute_with_format( SPI_OK_UTILITY, "DROP TRIGGER IF EXISTS repack_trigger ON %s.%s CASCADE", nspname, relname); --numobj; } /* drop temp table */ if (numobj > 0) { execute_with_format( SPI_OK_UTILITY, "DROP TABLE IF EXISTS repack.table_%u CASCADE", oid); --numobj; } SPI_finish(); PG_RETURN_VOID(); }
/** * @fn Datum repack_apply(PG_FUNCTION_ARGS) * @brief Apply operations in log table into temp table. * * repack_apply(sql_peek, sql_insert, sql_delete, sql_update, sql_pop, count) * * @param sql_peek SQL to pop tuple from log table. * @param sql_insert SQL to insert into temp table. * @param sql_delete SQL to delete from temp table. * @param sql_update SQL to update temp table. * @param sql_pop SQL to bulk-delete tuples from log table. * @param count Max number of operations, or no count iff <=0. * @retval Number of performed operations. */ Datum repack_apply(PG_FUNCTION_ARGS) { #define DEFAULT_PEEK_COUNT 1000 const char *sql_peek = PG_GETARG_CSTRING(0); const char *sql_insert = PG_GETARG_CSTRING(1); const char *sql_delete = PG_GETARG_CSTRING(2); const char *sql_update = PG_GETARG_CSTRING(3); /* sql_pop, the fourth arg, will be used in the loop below */ int32 count = PG_GETARG_INT32(5); SPIPlanPtr plan_peek = NULL; SPIPlanPtr plan_insert = NULL; SPIPlanPtr plan_delete = NULL; SPIPlanPtr plan_update = NULL; uint32 n, i; Oid argtypes_peek[1] = { INT4OID }; Datum values_peek[1]; const char nulls_peek[1] = { 0 }; StringInfoData sql_pop; initStringInfo(&sql_pop); /* authority check */ must_be_superuser("repack_apply"); /* connect to SPI manager */ repack_init(); /* peek tuple in log */ plan_peek = repack_prepare(sql_peek, 1, argtypes_peek); for (n = 0;;) { int ntuples; SPITupleTable *tuptable; TupleDesc desc; Oid argtypes[3]; /* id, pk, row */ Datum values[3]; /* id, pk, row */ bool nulls[3]; /* id, pk, row */ /* peek tuple in log */ if (count <= 0) values_peek[0] = Int32GetDatum(DEFAULT_PEEK_COUNT); else values_peek[0] = Int32GetDatum(Min(count - n, DEFAULT_PEEK_COUNT)); execute_plan(SPI_OK_SELECT, plan_peek, values_peek, nulls_peek); if (SPI_processed <= 0) break; /* copy tuptable because we will call other sqls. */ ntuples = SPI_processed; tuptable = SPI_tuptable; desc = tuptable->tupdesc; argtypes[0] = SPI_gettypeid(desc, 1); /* id */ argtypes[1] = SPI_gettypeid(desc, 2); /* pk */ argtypes[2] = SPI_gettypeid(desc, 3); /* row */ resetStringInfo(&sql_pop); appendStringInfoString(&sql_pop, PG_GETARG_CSTRING(4)); for (i = 0; i < ntuples; i++, n++) { HeapTuple tuple; char *pkid; tuple = tuptable->vals[i]; values[0] = SPI_getbinval(tuple, desc, 1, &nulls[0]); values[1] = SPI_getbinval(tuple, desc, 2, &nulls[1]); values[2] = SPI_getbinval(tuple, desc, 3, &nulls[2]); pkid = SPI_getvalue(tuple, desc, 1); Assert(pkid != NULL); if (nulls[1]) { /* INSERT */ if (plan_insert == NULL) plan_insert = repack_prepare(sql_insert, 1, &argtypes[2]); execute_plan(SPI_OK_INSERT, plan_insert, &values[2], (nulls[2] ? "n" : " ")); } else if (nulls[2]) { /* DELETE */ if (plan_delete == NULL) plan_delete = repack_prepare(sql_delete, 1, &argtypes[1]); execute_plan(SPI_OK_DELETE, plan_delete, &values[1], (nulls[1] ? "n" : " ")); } else { /* UPDATE */ if (plan_update == NULL) plan_update = repack_prepare(sql_update, 2, &argtypes[1]); execute_plan(SPI_OK_UPDATE, plan_update, &values[1], (nulls[1] ? "n" : " ")); } /* Add the primary key ID of each row from the log * table we have processed so far to this * DELETE ... IN (...) query string, so we * can delete all the rows we have processed at-once. */ if (i == 0) appendStringInfoString(&sql_pop, pkid); else appendStringInfo(&sql_pop, ",%s", pkid); pfree(pkid); } /* i must be > 0 (and hence we must have some rows to delete) * since SPI_processed > 0 */ Assert(i > 0); appendStringInfoString(&sql_pop, ");"); /* Bulk delete of processed rows from the log table */ execute(SPI_OK_DELETE, sql_pop.data); SPI_freetuptable(tuptable); } SPI_finish(); PG_RETURN_INT32(n); }