/* * percent_rank * return fraction between 0 and 1 inclusive, * which is described as (RK - 1) / (NR - 1), where RK is the current row's * rank and NR is the total number of rows, per spec. */ Datum window_percent_rank(PG_FUNCTION_ARGS) { WindowObject winobj = PG_WINDOW_OBJECT(); rank_context *context; bool up; int64 totalrows = WinGetPartitionRowCount(winobj); Assert(totalrows > 0); up = rank_up(winobj); context = (rank_context *) WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); if (up) context->rank = WinGetCurrentPosition(winobj) + 1; /* return zero if there's only one row, per spec */ if (totalrows <= 1) PG_RETURN_FLOAT8(0.0); PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1)); }
/* * cume_dist * return fraction betweeen 0 and 1 inclusive, * which is described as NP / NR, where NP is the number of rows preceding or * peers to the current row, and NR is the total number of rows, per spec. */ Datum window_cume_dist(PG_FUNCTION_ARGS) { WindowObject winobj = PG_WINDOW_OBJECT(); rank_context *context; bool up; int64 totalrows = WinGetPartitionRowCount(winobj); Assert(totalrows > 0); up = rank_up(winobj); context = (rank_context *) WinGetPartitionLocalMemory(winobj, sizeof(rank_context)); if (up || context->rank == 1) { /* * The current row is not peer to prior row or is just the first, so * count up the number of rows that are peer to the current. */ int64 row; context->rank = WinGetCurrentPosition(winobj) + 1; /* * start from current + 1 */ for (row = context->rank; row < totalrows; row++) { if (!WinRowsArePeers(winobj, row - 1, row)) break; context->rank++; } } PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows); }
/* * ntile * compute an exact numeric value with scale 0 (zero), * ranging from 1 (one) to n, per spec. */ Datum window_ntile(PG_FUNCTION_ARGS) { WindowObject winobj = PG_WINDOW_OBJECT(); ntile_context *context; context = (ntile_context *) WinGetPartitionLocalMemory(winobj, sizeof(ntile_context)); if (context->ntile == 0) { /* first call */ int64 total; int32 nbuckets; bool isnull; total = WinGetPartitionRowCount(winobj); nbuckets = DatumGetInt32(WinGetFuncArgCurrent(winobj, 0, &isnull)); /* * per spec: If NT is the null value, then the result is the null * value. */ if (isnull) PG_RETURN_NULL(); /* * per spec: If NT is less than or equal to 0 (zero), then an * exception condition is raised. */ if (nbuckets <= 0) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTILE), errmsg("argument of ntile must be greater than zero"))); context->ntile = 1; context->rows_per_bucket = 0; context->boundary = total / nbuckets; if (context->boundary <= 0) context->boundary = 1; else { /* * If the total number is not divisible, add 1 row to leading * buckets. */ context->remainder = total % nbuckets; if (context->remainder != 0) context->boundary++; } } context->rows_per_bucket++; if (context->boundary < context->rows_per_bucket) { /* ntile up */ if (context->remainder != 0 && context->ntile == context->remainder) { context->remainder = 0; context->boundary -= 1; } context->ntile += 1; context->rows_per_bucket = 1; } PG_RETURN_INT32(context->ntile); }
static Datum kmeans_impl(PG_FUNCTION_ARGS, bool initial_mean_supplied) { WindowObject winobj = PG_WINDOW_OBJECT(); kmeans_context *context; int64 curpos, rowcount; rowcount = WinGetPartitionRowCount(winobj); context = (kmeans_context *) WinGetPartitionLocalMemory(winobj, sizeof(kmeans_context) + sizeof(int) * rowcount); if (!context->isdone) { int dim, k, N; Datum arg; bool isnull, isout; myvector inputs, mean, maxlist, minlist; int *r; int i, a; ArrayType *x; arg = WinGetFuncArgCurrent(winobj, 0, &isnull); if (!isnull) x = DatumGetArrayTypeP( WinGetFuncArgCurrent(winobj, 0, &isnull)); KMEANS_CHECK_V(x, ARR_DIMS(x)[0], isnull); dim = ARR_DIMS(x)[0]; k = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull)); /* * Since window function ignores STRICT mark, * return NULL simply. */ if (isnull || k <= 0) { context->isdone = true; context->isnull = true; PG_RETURN_NULL(); } N = (int) WinGetPartitionRowCount(winobj); inputs = (myvector) palloc(SIZEOF_V(dim) * N); maxlist = (myvector) palloc(SIZEOF_V(dim)); minlist = (myvector) palloc(SIZEOF_V(dim)); for (i = 0; i < N; i++) { x = DatumGetArrayTypeP( WinGetFuncArgInPartition(winobj, 0, i, WINDOW_SEEK_HEAD, false, &isnull, &isout)); KMEANS_CHECK_V(x, dim, isnull); memcpy(&inputs[i * dim], ARR_DATA_PTR(x), SIZEOF_V(dim)); /* update min/max for later use of init mean */ for (a = 0; a < dim; a++) { if (i == 0 || maxlist[a] < inputs[i * dim + a]) maxlist[a] = inputs[i * dim + a]; if (i == 0 || minlist[a] > inputs[i * dim + a]) minlist[a] = inputs[i * dim + a]; } } /* * initial mean vectors. need improve how to define them. */ mean = (myvector) palloc(SIZEOF_V(dim) * k); /* only the result is stored in the partition local memory */ r = context->result; if (initial_mean_supplied) { ArrayType *init = DatumGetArrayTypeP( WinGetFuncArgCurrent(winobj, 2, &isnull)); /* * we can accept 1d or 2d array as mean vectors. */ if (isnull || ARR_HASNULL(init) || !((ARR_NDIM(init) == 2 && ARR_DIMS(init)[0] == k && ARR_DIMS(init)[1] == dim) || (ARR_NDIM(init) == 1 && ARR_DIMS(init)[0] == k * dim))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("initial mean vector must be 2d without NULL element"))); memcpy(mean, ARR_DATA_PTR(init), SIZEOF_V(dim) * k); } else { initialize_mean(inputs, dim, N, k, mean, r); kmeans_debug(mean, dim, k); } /* run it! */ calc_kmeans(inputs, dim, N, k, mean, r); context->isdone = true; } if (context->isnull) PG_RETURN_NULL(); curpos = WinGetCurrentPosition(winobj); PG_RETURN_INT32(context->result[curpos]); }