/* * refresh_by_match_merge * * Refresh a materialized view with transactional semantics, while allowing * concurrent reads. * * This is called after a new version of the data has been created in a * temporary table. It performs a full outer join against the old version of * the data, producing "diff" results. This join cannot work if there are any * duplicated rows in either the old or new versions, in the sense that every * column would compare as equal between the two rows. It does work correctly * in the face of rows which have at least one NULL value, with all non-NULL * columns equal. The behavior of NULLs on equality tests and on UNIQUE * indexes turns out to be quite convenient here; the tests we need to make * are consistent with default behavior. If there is at least one UNIQUE * index on the materialized view, we have exactly the guarantee we need. * * The temporary table used to hold the diff results contains just the TID of * the old record (if matched) and the ROW from the new table as a single * column of complex record type (if matched). * * Once we have the diff table, we perform set-based DELETE and INSERT * operations against the materialized view, and discard both temporary * tables. * * Everything from the generation of the new data to applying the differences * takes place under cover of an ExclusiveLock, since it seems as though we * would want to prohibit not only concurrent REFRESH operations, but also * incremental maintenance. It also doesn't seem reasonable or safe to allow * SELECT FOR UPDATE or SELECT FOR SHARE on rows being updated or deleted by * this command. */ static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, int save_sec_context) { StringInfoData querybuf; Relation matviewRel; Relation tempRel; char *matviewname; char *tempname; char *diffname; TupleDesc tupdesc; bool foundUniqueIndex; List *indexoidlist; ListCell *indexoidscan; int16 relnatts; bool *usedForQual; initStringInfo(&querybuf); matviewRel = heap_open(matviewOid, NoLock); matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), RelationGetRelationName(matviewRel)); tempRel = heap_open(tempOid, NoLock); tempname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(tempRel)), RelationGetRelationName(tempRel)); diffname = make_temptable_name_n(tempname, 2); relnatts = matviewRel->rd_rel->relnatts; usedForQual = (bool *) palloc0(sizeof(bool) * relnatts); /* Open SPI context. */ if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "SPI_connect failed"); /* Analyze the temp table with the new contents. */ appendStringInfo(&querybuf, "ANALYZE %s", tempname); if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) elog(ERROR, "SPI_exec failed: %s", querybuf.data); /* * We need to ensure that there are not duplicate rows without NULLs in * the new data set before we can count on the "diff" results. Check for * that in a way that allows showing the first duplicated row found. Even * after we pass this test, a unique index on the materialized view may * find a duplicate key problem. */ resetStringInfo(&querybuf); appendStringInfo(&querybuf, "SELECT newdata FROM %s newdata " "WHERE newdata IS NOT NULL AND EXISTS " "(SELECT * FROM %s newdata2 WHERE newdata2 IS NOT NULL " "AND newdata2 OPERATOR(pg_catalog.*=) newdata " "AND newdata2.ctid OPERATOR(pg_catalog.<>) " "newdata.ctid) LIMIT 1", tempname, tempname); if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT) elog(ERROR, "SPI_exec failed: %s", querybuf.data); if (SPI_processed > 0) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("new data for \"%s\" contains duplicate rows without any null columns", RelationGetRelationName(matviewRel)), errdetail("Row: %s", SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1)))); } SetUserIdAndSecContext(relowner, save_sec_context | SECURITY_LOCAL_USERID_CHANGE); /* Start building the query for creating the diff table. */ resetStringInfo(&querybuf); appendStringInfo(&querybuf, "CREATE TEMP TABLE %s AS " "SELECT mv.ctid AS tid, newdata " "FROM %s mv FULL JOIN %s newdata ON (", diffname, matviewname, tempname); /* * Get the list of index OIDs for the table from the relcache, and look up * each one in the pg_index syscache. We will test for equality on all * columns present in all unique indexes which only reference columns and * include all rows. */ tupdesc = matviewRel->rd_att; foundUniqueIndex = false; indexoidlist = RelationGetIndexList(matviewRel); foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); Relation indexRel; Form_pg_index indexStruct; indexRel = index_open(indexoid, RowExclusiveLock); indexStruct = indexRel->rd_index; /* * We're only interested if it is unique, valid, contains no * expressions, and is not partial. */ if (indexStruct->indisunique && IndexIsValid(indexStruct) && RelationGetIndexExpressions(indexRel) == NIL && RelationGetIndexPredicate(indexRel) == NIL) { int numatts = indexStruct->indnatts; int i; /* Add quals for all columns from this index. */ for (i = 0; i < numatts; i++) { int attnum = indexStruct->indkey.values[i]; Oid type; Oid op; const char *colname; /* * Only include the column once regardless of how many times * it shows up in how many indexes. */ if (usedForQual[attnum - 1]) continue; usedForQual[attnum - 1] = true; /* * Actually add the qual, ANDed with any others. */ if (foundUniqueIndex) appendStringInfoString(&querybuf, " AND "); colname = quote_identifier(NameStr((tupdesc->attrs[attnum - 1])->attname)); appendStringInfo(&querybuf, "newdata.%s ", colname); type = attnumTypeId(matviewRel, attnum); op = lookup_type_cache(type, TYPECACHE_EQ_OPR)->eq_opr; mv_GenerateOper(&querybuf, op); appendStringInfo(&querybuf, " mv.%s", colname); foundUniqueIndex = true; } } /* Keep the locks, since we're about to run DML which needs them. */ index_close(indexRel, NoLock); }
/* * refresh_by_match_merge * * Refresh a materialized view with transactional semantics, while allowing * concurrent reads. * * This is called after a new version of the data has been created in a * temporary table. It performs a full outer join against the old version of * the data, producing "diff" results. This join cannot work if there are any * duplicated rows in either the old or new versions, in the sense that every * column would compare as equal between the two rows. It does work correctly * in the face of rows which have at least one NULL value, with all non-NULL * columns equal. The behavior of NULLs on equality tests and on UNIQUE * indexes turns out to be quite convenient here; the tests we need to make * are consistent with default behavior. If there is at least one UNIQUE * index on the materialized view, we have exactly the guarantee we need. By * joining based on equality on all columns which are part of any unique * index, we identify the rows on which we can use UPDATE without any problem. * If any column is NULL in either the old or new version of a row (or both), * we must use DELETE and INSERT, since there could be multiple rows which are * NOT DISTINCT FROM each other, and we could otherwise end up with the wrong * number of occurrences in the updated relation. The temporary table used to * hold the diff results contains just the TID of the old record (if matched) * and the ROW from the new table as a single column of complex record type * (if matched). * * Once we have the diff table, we perform set-based DELETE, UPDATE, and * INSERT operations against the materialized view, and discard both temporary * tables. * * Everything from the generation of the new data to applying the differences * takes place under cover of an ExclusiveLock, since it seems as though we * would want to prohibit not only concurrent REFRESH operations, but also * incremental maintenance. It also doesn't seem reasonable or safe to allow * SELECT FOR UPDATE or SELECT FOR SHARE on rows being updated or deleted by * this command. */ static void refresh_by_match_merge(Oid matviewOid, Oid tempOid) { StringInfoData querybuf; Relation matviewRel; Relation tempRel; char *matviewname; char *tempname; char *diffname; TupleDesc tupdesc; bool foundUniqueIndex; List *indexoidlist; ListCell *indexoidscan; int16 relnatts; bool *usedForQual; Oid save_userid; int save_sec_context; int save_nestlevel; initStringInfo(&querybuf); matviewRel = heap_open(matviewOid, NoLock); matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), RelationGetRelationName(matviewRel)); tempRel = heap_open(tempOid, NoLock); tempname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(tempRel)), RelationGetRelationName(tempRel)); diffname = make_temptable_name_n(tempname, 2); relnatts = matviewRel->rd_rel->relnatts; usedForQual = (bool *) palloc0(sizeof(bool) * relnatts); /* Open SPI context. */ if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "SPI_connect failed"); /* Analyze the temp table with the new contents. */ appendStringInfo(&querybuf, "ANALYZE %s", tempname); if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) elog(ERROR, "SPI_exec failed: %s", querybuf.data); /* * We need to ensure that there are not duplicate rows without NULLs in * the new data set before we can count on the "diff" results. Check for * that in a way that allows showing the first duplicated row found. Even * after we pass this test, a unique index on the materialized view may * find a duplicate key problem. */ resetStringInfo(&querybuf); appendStringInfo(&querybuf, "SELECT x FROM %s x WHERE x IS NOT NULL AND EXISTS " "(SELECT * FROM %s y WHERE y IS NOT NULL " "AND (y.*) = (x.*) AND y.ctid <> x.ctid) LIMIT 1", tempname, tempname); if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT) elog(ERROR, "SPI_exec failed: %s", querybuf.data); if (SPI_processed > 0) { ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("new data for \"%s\" contains duplicate rows without any NULL columns", RelationGetRelationName(matviewRel)), errdetail("Row: %s", SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1)))); } /* Start building the query for creating the diff table. */ resetStringInfo(&querybuf); appendStringInfo(&querybuf, "CREATE TEMP TABLE %s AS " "SELECT x.ctid AS tid, y FROM %s x FULL JOIN %s y ON (", diffname, matviewname, tempname); /* * Get the list of index OIDs for the table from the relcache, and look up * each one in the pg_index syscache. We will test for equality on all * columns present in all unique indexes which only reference columns and * include all rows. */ tupdesc = matviewRel->rd_att; foundUniqueIndex = false; indexoidlist = RelationGetIndexList(matviewRel); foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); HeapTuple indexTuple; Form_pg_index index; indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid)); if (!HeapTupleIsValid(indexTuple)) /* should not happen */ elog(ERROR, "cache lookup failed for index %u", indexoid); index = (Form_pg_index) GETSTRUCT(indexTuple); /* We're only interested if it is unique and valid. */ if (index->indisunique && IndexIsValid(index)) { int numatts = index->indnatts; int i; bool expr = false; Relation indexRel; /* Skip any index on an expression. */ for (i = 0; i < numatts; i++) { if (index->indkey.values[i] == 0) { expr = true; break; } } if (expr) { ReleaseSysCache(indexTuple); continue; } /* Skip partial indexes. */ indexRel = index_open(index->indexrelid, RowExclusiveLock); if (RelationGetIndexPredicate(indexRel) != NIL) { index_close(indexRel, NoLock); ReleaseSysCache(indexTuple); continue; } /* Hold the locks, since we're about to run DML which needs them. */ index_close(indexRel, NoLock); /* Add quals for all columns from this index. */ for (i = 0; i < numatts; i++) { int attnum = index->indkey.values[i]; Oid type; Oid op; const char *colname; /* * Only include the column once regardless of how many times * it shows up in how many indexes. * * This is also useful later to omit columns which can not * have changed from the SET clause of the UPDATE statement. */ if (usedForQual[attnum - 1]) continue; usedForQual[attnum - 1] = true; /* * Actually add the qual, ANDed with any others. */ if (foundUniqueIndex) appendStringInfoString(&querybuf, " AND "); colname = quote_identifier(NameStr((tupdesc->attrs[attnum - 1])->attname)); appendStringInfo(&querybuf, "y.%s ", colname); type = attnumTypeId(matviewRel, attnum); op = lookup_type_cache(type, TYPECACHE_EQ_OPR)->eq_opr; mv_GenerateOper(&querybuf, op); appendStringInfo(&querybuf, " x.%s", colname); foundUniqueIndex = true; } } ReleaseSysCache(indexTuple); }