/* * Returns N-th range (in form of array) * * First argument is the parent relid. * Second argument is the index of the range (if it is negative then the last * range will be returned). */ Datum get_range_by_idx(PG_FUNCTION_ARGS) { int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); int idx = DatumGetInt32(PG_GETARG_DATUM(1)); PartRelationInfo *prel; RangeRelation *rangerel; RangeEntry *ranges; RangeEntry *re; Datum *elems; TypeCacheEntry *tce; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); if (!prel || !rangerel || idx >= (int)rangerel->ranges.length) PG_RETURN_NULL(); tce = lookup_type_cache(prel->atttype, 0); ranges = dsm_array_get_pointer(&rangerel->ranges); if (idx >= 0) re = &ranges[idx]; else re = &ranges[rangerel->ranges.length - 1]; elems = palloc(2 * sizeof(Datum)); elems[0] = PATHMAN_GET_DATUM(re->min, rangerel->by_val); elems[1] = PATHMAN_GET_DATUM(re->max, rangerel->by_val); PG_RETURN_ARRAYTYPE_P( construct_array(elems, 2, prel->atttype, tce->typlen, tce->typbyval, tce->typalign)); }
/* * Create partitions and return an OID of the partition that contain value */ Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) { int ret; RangeEntry *ranges; Datum vals[2]; Oid oids[] = {OIDOID, value_type}; bool nulls[] = {false, false}; char *sql; bool found; int pos; PartRelationInfo *prel; RangeRelation *rangerel; FmgrInfo cmp_func; char *schema; *crashed = false; schema = get_extension_schema(); prel = get_pathman_relation_info(relid, NULL); rangerel = get_pathman_range_relation(relid, NULL); ranges = dsm_array_get_pointer(&rangerel->ranges); /* Comparison function */ cmp_func = *get_cmp_func(value_type, prel->atttype); vals[0] = ObjectIdGetDatum(relid); vals[1] = value; /* Perform PL procedure */ sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", schema); PG_TRY(); { ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); if (ret > 0) { /* Update relation info */ free_dsm_array(&rangerel->ranges); free_dsm_array(&prel->children); load_check_constraints(relid, GetCatalogSnapshot(relid)); } } PG_CATCH(); { elog(WARNING, "Attempt to create new partitions failed"); if (crashed != NULL) *crashed = true; return 0; } PG_END_TRY(); /* Repeat binary search */ ranges = dsm_array_get_pointer(&rangerel->ranges); pos = range_binary_search(rangerel, &cmp_func, value, &found); if (found) return ranges[pos].child_oid; return 0; }
/* * Returns range (min, max) as output parameters * * first argument is the parent relid * second is the partition relid * third and forth are MIN and MAX output parameters */ Datum get_partition_range(PG_FUNCTION_ARGS) { int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); int child_oid = DatumGetInt32(PG_GETARG_DATUM(1)); int nelems = 2; int i; bool found = false; Datum *elems; PartRelationInfo *prel; RangeRelation *rangerel; RangeEntry *ranges; TypeCacheEntry *tce; ArrayType *arr; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); if (!prel || !rangerel) PG_RETURN_NULL(); ranges = dsm_array_get_pointer(&rangerel->ranges); tce = lookup_type_cache(prel->atttype, 0); /* Looking for specified partition */ for(i=0; i<rangerel->ranges.length; i++) if (ranges[i].child_oid == child_oid) { found = true; break; } if (found) { bool byVal = rangerel->by_val; elems = palloc(nelems * sizeof(Datum)); elems[0] = PATHMAN_GET_DATUM(ranges[i].min, byVal); elems[1] = PATHMAN_GET_DATUM(ranges[i].max, byVal); arr = construct_array(elems, nelems, prel->atttype, tce->typlen, tce->typbyval, tce->typalign); PG_RETURN_ARRAYTYPE_P(arr); } PG_RETURN_NULL(); }
/* * Returns max value of the last range for relation */ Datum get_max_range_value(PG_FUNCTION_ARGS) { int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); PartRelationInfo *prel; RangeRelation *rangerel; RangeEntry *ranges; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); if (!prel || !rangerel || prel->parttype != PT_RANGE || rangerel->ranges.length == 0) PG_RETURN_NULL(); ranges = dsm_array_get_pointer(&rangerel->ranges); PG_RETURN_DATUM(PATHMAN_GET_DATUM(ranges[rangerel->ranges.length-1].max, rangerel->by_val)); }
Datum on_partitions_updated(PG_FUNCTION_ARGS) { Oid relid; PartRelationInfo *prel; /* Parent relation oid */ relid = DatumGetInt32(PG_GETARG_DATUM(0)); prel = get_pathman_relation_info(relid, NULL); if (prel != NULL) { LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); remove_relation_info(relid); load_relations_hashtable(false); LWLockRelease(pmstate->load_config_lock); } PG_RETURN_NULL(); }
/* * Checks if range overlaps with existing partitions. * Returns TRUE if overlaps and FALSE otherwise. */ Datum check_overlap(PG_FUNCTION_ARGS) { int parent_oid = DatumGetInt32(PG_GETARG_DATUM(0)); Datum p1 = PG_GETARG_DATUM(1); Oid p1_type = get_fn_expr_argtype(fcinfo->flinfo, 1); Datum p2 = PG_GETARG_DATUM(2); Oid p2_type = get_fn_expr_argtype(fcinfo->flinfo, 2); PartRelationInfo *prel; RangeRelation *rangerel; RangeEntry *ranges; FmgrInfo cmp_func_1; FmgrInfo cmp_func_2; int i; bool byVal; prel = get_pathman_relation_info(parent_oid, NULL); rangerel = get_pathman_range_relation(parent_oid, NULL); if (!prel || !rangerel || prel->parttype != PT_RANGE) PG_RETURN_NULL(); /* comparison functions */ cmp_func_1 = *get_cmp_func(p1_type, prel->atttype); cmp_func_2 = *get_cmp_func(p2_type, prel->atttype); byVal = rangerel->by_val; ranges = (RangeEntry *) dsm_array_get_pointer(&rangerel->ranges); for (i=0; i<rangerel->ranges.length; i++) { int c1 = FunctionCall2(&cmp_func_1, p1, PATHMAN_GET_DATUM(ranges[i].max, byVal)); int c2 = FunctionCall2(&cmp_func_2, p2, PATHMAN_GET_DATUM(ranges[i].min, byVal)); if (c1 < 0 && c2 > 0) PG_RETURN_BOOL(true); } PG_RETURN_BOOL(false); }
/* Take care of joins */ void pathman_join_pathlist_hook(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra) { JoinCostWorkspace workspace; JoinType saved_jointype = jointype; RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; const PartRelationInfo *inner_prel; List *joinclauses, *otherclauses; WalkerContext context; double paramsel; Node *part_expr; ListCell *lc; /* Call hooks set by other extensions */ if (pathman_set_join_pathlist_next) pathman_set_join_pathlist_next(root, joinrel, outerrel, innerrel, jointype, extra); /* Check that both pg_pathman & RuntimeAppend nodes are enabled */ if (!IsPathmanReady() || !pg_pathman_enable_runtimeappend) return; /* We should only consider base relations */ if (innerrel->reloptkind != RELOPT_BASEREL) return; /* We shouldn't process tables with active children */ if (inner_rte->inh) return; /* We can't handle full or right outer joins */ if (jointype == JOIN_FULL || jointype == JOIN_RIGHT) return; /* Check that innerrel is a BASEREL with PartRelationInfo */ if (innerrel->reloptkind != RELOPT_BASEREL || !(inner_prel = get_pathman_relation_info(inner_rte->relid))) return; /* * Check if query is: * 1) UPDATE part_table SET = .. FROM part_table. * 2) DELETE FROM part_table USING part_table. * * Either outerrel or innerrel may be a result relation. */ if ((root->parse->resultRelation == outerrel->relid || root->parse->resultRelation == innerrel->relid) && (root->parse->commandType == CMD_UPDATE || root->parse->commandType == CMD_DELETE)) { int rti = -1, count = 0; /* Inner relation must be partitioned */ Assert(inner_prel); /* Check each base rel of outer relation */ while ((rti = bms_next_member(outerrel->relids, rti)) >= 0) { Oid outer_baserel = root->simple_rte_array[rti]->relid; /* Is it partitioned? */ if (get_pathman_relation_info(outer_baserel)) count++; } if (count > 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("DELETE and UPDATE queries with a join " "of partitioned tables are not supported"))); } /* Skip if inner table is not allowed to act as parent (e.g. FROM ONLY) */ if (PARENTHOOD_DISALLOWED == get_rel_parenthood_status(inner_rte)) return; /* * These codes are used internally in the planner, but are not supported * by the executor (nor, indeed, by most of the planner). */ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) jointype = JOIN_INNER; /* replace with a proper value */ /* Extract join clauses which will separate partitions */ if (IS_OUTER_JOIN(extra->sjinfo->jointype)) { extract_actual_join_clauses_compat(extra->restrictlist, joinrel->relids, &joinclauses, &otherclauses); } else { /* We can treat all clauses alike for an inner join */ joinclauses = extract_actual_clauses(extra->restrictlist, false); otherclauses = NIL; } /* Make copy of partitioning expression and fix Var's varno attributes */ part_expr = PrelExpressionForRelid(inner_prel, innerrel->relid); paramsel = 1.0; foreach (lc, joinclauses) { WrapperNode *wrap; InitWalkerContext(&context, part_expr, inner_prel, NULL); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); paramsel *= wrap->paramsel; }
/* Take care of joins */ void pathman_join_pathlist_hook(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra) { JoinCostWorkspace workspace; JoinType saved_jointype = jointype; RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; const PartRelationInfo *inner_prel; List *joinclauses, *otherclauses; WalkerContext context; double paramsel; Node *part_expr; ListCell *lc; /* Call hooks set by other extensions */ if (set_join_pathlist_next) set_join_pathlist_next(root, joinrel, outerrel, innerrel, jointype, extra); /* Check that both pg_pathman & RuntimeAppend nodes are enabled */ if (!IsPathmanReady() || !pg_pathman_enable_runtimeappend) return; /* We should only consider base relations */ if (innerrel->reloptkind != RELOPT_BASEREL) return; /* We shouldn't process tables with active children */ if (inner_rte->inh) return; /* We can't handle full or right outer joins */ if (jointype == JOIN_FULL || jointype == JOIN_RIGHT) return; /* Check that innerrel is a BASEREL with PartRelationInfo */ if (innerrel->reloptkind != RELOPT_BASEREL || !(inner_prel = get_pathman_relation_info(inner_rte->relid))) return; /* Skip if inner table is not allowed to act as parent (e.g. FROM ONLY) */ if (PARENTHOOD_DISALLOWED == get_rel_parenthood_status(root->parse->queryId, inner_rte)) return; /* * These codes are used internally in the planner, but are not supported * by the executor (nor, indeed, by most of the planner). */ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) jointype = JOIN_INNER; /* replace with a proper value */ /* Extract join clauses which will separate partitions */ if (IS_OUTER_JOIN(extra->sjinfo->jointype)) { extract_actual_join_clauses(extra->restrictlist, &joinclauses, &otherclauses); } else { /* We can treat all clauses alike for an inner join */ joinclauses = extract_actual_clauses(extra->restrictlist, false); otherclauses = NIL; } /* Make copy of partitioning expression and fix Var's varno attributes */ part_expr = PrelExpressionForRelid(inner_prel, innerrel->relid); paramsel = 1.0; foreach (lc, joinclauses) { WrapperNode *wrap; InitWalkerContext(&context, part_expr, inner_prel, NULL); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); paramsel *= wrap->paramsel; }
/* * Returns partition oid for specified parent relid and value. * In case when partition isn't exist try to create one. */ Datum find_or_create_range_partition(PG_FUNCTION_ARGS) { int relid = DatumGetInt32(PG_GETARG_DATUM(0)); Datum value = PG_GETARG_DATUM(1); Oid value_type = get_fn_expr_argtype(fcinfo->flinfo, 1); int pos; bool found; RangeRelation *rangerel; RangeEntry *ranges; TypeCacheEntry *tce; PartRelationInfo *prel; Oid cmp_proc_oid; FmgrInfo cmp_func; tce = lookup_type_cache(value_type, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR | TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); prel = get_pathman_relation_info(relid, NULL); rangerel = get_pathman_range_relation(relid, NULL); if (!prel || !rangerel) PG_RETURN_NULL(); cmp_proc_oid = get_opfamily_proc(tce->btree_opf, value_type, prel->atttype, BTORDER_PROC); fmgr_info(cmp_proc_oid, &cmp_func); ranges = dsm_array_get_pointer(&rangerel->ranges); pos = range_binary_search(rangerel, &cmp_func, value, &found); /* * If found then just return oid. Else create new partitions */ if (found) PG_RETURN_OID(ranges[pos].child_oid); /* * If not found and value is between first and last partitions */ if (!found && pos >= 0) PG_RETURN_NULL(); else { Oid child_oid; bool crashed = false; /* Lock config before appending new partitions */ LWLockAcquire(pmstate->load_config_lock, LW_EXCLUSIVE); /* Restrict concurrent partition creation */ LWLockAcquire(pmstate->edit_partitions_lock, LW_EXCLUSIVE); /* * Check if someone else has already created partition. */ ranges = dsm_array_get_pointer(&rangerel->ranges); pos = range_binary_search(rangerel, &cmp_func, value, &found); if (found) { LWLockRelease(pmstate->edit_partitions_lock); LWLockRelease(pmstate->load_config_lock); PG_RETURN_OID(ranges[pos].child_oid); } /* Start background worker to create new partitions */ child_oid = create_partitions_bg_worker(relid, value, value_type, &crashed); // SPI_connect(); // child_oid = create_partitions(relid, value, value_type, &crashed); // SPI_finish(); // elog(WARNING, "Worker finished"); /* Release locks */ if (!crashed) { LWLockRelease(pmstate->edit_partitions_lock); LWLockRelease(pmstate->load_config_lock); } /* Repeat binary search */ ranges = dsm_array_get_pointer(&rangerel->ranges); pos = range_binary_search(rangerel, &cmp_func, value, &found); if (found) PG_RETURN_OID(child_oid); } PG_RETURN_NULL(); }