Datum svec_l2_ge(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); int result = svec_l2_cmp_internal(svec1,svec2); PG_RETURN_BOOL(((result == 0) || (result == 1)) ? 1 : 0); }
Datum svec_eq(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_from_svec(svec1); SparseData right = sdata_from_svec(svec2); PG_RETURN_BOOL(sparsedata_eq(left,right)); }
Datum svec_div(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); check_dimension(svec1,svec2,"svec_div"); SvecType *result = op_svec_by_svec_internal(3,svec1,svec2); PG_RETURN_SVECTYPE_P(result); }
Datum svec_pow(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); check_dimension(svec1,svec2,"svec_pow"); SvecType *result = pow_svec_by_scalar_internal(svec1,svec2); PG_RETURN_SVECTYPE_P(result); }
Datum svec_concat_replicate(PG_FUNCTION_ARGS) { int multiplier = PG_GETARG_INT32(0); SvecType *svec = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_from_svec(svec); SparseData sdata = makeEmptySparseData(); char *vals,*index; int l_val_len = left->vals->len; int l_ind_len = left->index->len; int val_len=l_val_len*multiplier; int ind_len=l_ind_len*multiplier; vals = (char *)palloc(sizeof(char)*val_len); index = (char *)palloc(sizeof(char)*ind_len); for (int i=0;i<multiplier;i++) { memcpy(vals+i*l_val_len,left->vals->data,l_val_len); memcpy(index+i*l_ind_len,left->index->data,l_ind_len); } sdata->vals = makeStringInfoFromData(vals,val_len); sdata->index = makeStringInfoFromData(index,ind_len); sdata->type_of_data = left->type_of_data; sdata->unique_value_count = multiplier * left->unique_value_count; sdata->total_value_count = multiplier * left->total_value_count; PG_RETURN_SVECTYPE_P(svec_from_sparsedata(sdata,true)); }
Datum svec_concat(PG_FUNCTION_ARGS) { if (PG_ARGISNULL(0) && (!PG_ARGISNULL(1))) { PG_RETURN_SVECTYPE_P(PG_GETARG_SVECTYPE_P(1)); } else if (PG_ARGISNULL(0) && PG_ARGISNULL(1)) { PG_RETURN_NULL(); } else if (PG_ARGISNULL(1)) { PG_RETURN_SVECTYPE_P(PG_GETARG_SVECTYPE_P(0)); } else { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_from_svec(svec1); SparseData right = sdata_from_svec(svec2); SparseData sdata = makeEmptySparseData(); char *vals,*index; int l_val_len = left->vals->len; int r_val_len = right->vals->len; int l_ind_len = left->index->len; int r_ind_len = right->index->len; int val_len=l_val_len+r_val_len; int ind_len=l_ind_len+r_ind_len; vals = (char *)palloc(sizeof(char)*val_len); index = (char *)palloc(sizeof(char)*ind_len); memcpy(vals ,left->vals->data,l_val_len); memcpy(vals+l_val_len,right->vals->data,r_val_len); memcpy(index, left->index->data,l_ind_len); memcpy(index+l_ind_len,right->index->data,r_ind_len); sdata->vals = makeStringInfoFromData(vals,val_len); sdata->index = makeStringInfoFromData(index,ind_len); sdata->type_of_data = left->type_of_data; sdata->unique_value_count = left->unique_value_count+ right->unique_value_count; sdata->total_value_count = left->total_value_count+ right->total_value_count; PG_RETURN_SVECTYPE_P(svec_from_sparsedata(sdata,true)); } }
Datum svec_dot(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_from_svec(svec1); SparseData right = sdata_from_svec(svec2); SparseData mult_result; double accum; check_dimension(svec1,svec2,"svec_dot"); mult_result = op_sdata_by_sdata(2,left,right); accum = sum_sdata_values_double(mult_result); freeSparseDataAndData(mult_result); PG_RETURN_FLOAT8(accum); }
Datum svec_summate(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); SparseData sdata = sdata_from_svec(svec); double accum; accum = sum_sdata_values_double(sdata); PG_RETURN_FLOAT8(accum); }
Datum float8arr_div_svec(PG_FUNCTION_ARGS) { ArrayType *arr = PG_GETARG_ARRAYTYPE_P(0); SvecType *svec = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_uncompressed_from_float8arr_internal(arr); SparseData right = sdata_from_svec(svec); int scalar_args = check_scalar(SDATA_IS_SCALAR(left),SDATA_IS_SCALAR(right)); PG_RETURN_SVECTYPE_P(svec_operate_on_sdata_pair(scalar_args,3,left,right)); }
Datum svec_median(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); SparseData sdata = sdata_from_svec(svec); int index,median_index = (sdata->total_value_count-1)/2; char *i_ptr; int64 *rle_index; if (sdata->index->data != NULL) //Sparse vector { /* * We need to create an uncompressed run length index to * feed to the partition select routine */ rle_index = (int64 *)palloc(sizeof(int64)*(sdata->unique_value_count)); i_ptr = sdata->index->data; for (int i=0;i<sdata->unique_value_count;i++,i_ptr+=int8compstoragesize(i_ptr)) { rle_index[i] = compword_to_int8(i_ptr); } /* * Allocate the outer "list of lists" */ char **lists = (char **)palloc(sizeof(char *)*2); lists[0] = sdata->vals->data; lists[1] = (char *)rle_index; size_t *widths = (size_t *)palloc(sizeof(size_t)*2); widths[0] = sizeof(float8); widths[1] = sizeof(int64); index = partition_select(lists,2,widths, 0,sdata->unique_value_count-1, median_index,compar_float8, real_index_calc_sparse_RLE); /* * Convert the uncompressed index into the compressed index */ i_ptr = sdata->index->data; for (int i=0;i<sdata->unique_value_count;i++,i_ptr+=int8compstoragesize(i_ptr)) { int8_to_compword(rle_index[i],i_ptr); } pfree(lists); pfree(widths); pfree(rle_index); } else { index = float8arr_partition_internal((double *)(sdata->vals->data), sdata->total_value_count, median_index); } PG_RETURN_FLOAT8(((float8 *)(sdata->vals->data))[index]); }
Datum internal_get_array_of_close_canopies(PG_FUNCTION_ARGS) { SvecType *svec; Datum *all_canopies; int num_all_canopies; float8 threshold; PGFunction metric_fn; ArrayType *close_canopies_arr; int4 *close_canopies; int num_close_canopies; size_t bytes; MemoryContext mem_context_for_function_calls; svec = PG_GETARG_SVECTYPE_P(verify_arg_nonnull(fcinfo, 0)); get_svec_array_elms(PG_GETARG_ARRAYTYPE_P(verify_arg_nonnull(fcinfo, 1)), &all_canopies, &num_all_canopies); threshold = PG_GETARG_FLOAT8(verify_arg_nonnull(fcinfo, 2)); metric_fn = get_metric_fn(PG_GETARG_INT32(verify_arg_nonnull(fcinfo, 3))); mem_context_for_function_calls = setup_mem_context_for_functional_calls(); close_canopies = (int4 *) palloc(sizeof(int4) * num_all_canopies); num_close_canopies = 0; for (int i = 0; i < num_all_canopies; i++) { if (compute_metric(metric_fn, mem_context_for_function_calls, PointerGetDatum(svec), all_canopies[i]) < threshold) close_canopies[num_close_canopies++] = i + 1 /* lower bound */; } MemoryContextDelete(mem_context_for_function_calls); /* If we cannot find any close canopy, return NULL. Note that the result * we return will be passed to internal_kmeans_closest_centroid() and if the * array of close canopies is NULL, then internal_kmeans_closest_centroid() * will consider and compute the distance to all centroids. */ if (num_close_canopies == 0) PG_RETURN_NULL(); bytes = ARR_OVERHEAD_NONULLS(1) + sizeof(int4) * num_close_canopies; close_canopies_arr = (ArrayType *) palloc0(bytes); SET_VARSIZE(close_canopies_arr, bytes); ARR_ELEMTYPE(close_canopies_arr) = INT4OID; ARR_NDIM(close_canopies_arr) = 1; ARR_DIMS(close_canopies_arr)[0] = num_close_canopies; ARR_LBOUND(close_canopies_arr)[0] = 1; memcpy(ARR_DATA_PTR(close_canopies_arr), close_canopies, sizeof(int4) * num_close_canopies); PG_RETURN_ARRAYTYPE_P(close_canopies_arr); }
Datum float8arr_dot_svec(PG_FUNCTION_ARGS) { ArrayType *arr = PG_GETARG_ARRAYTYPE_P(0); SvecType *svec = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_uncompressed_from_float8arr_internal(arr); SparseData right = sdata_from_svec(svec); SparseData mult_result; double accum; mult_result = op_sdata_by_sdata(2,left,right); accum = sum_sdata_values_double(mult_result); freeSparseData(left); freeSparseDataAndData(mult_result); PG_RETURN_FLOAT8(accum); }
/** * svec_send - converts text to binary format */ Datum svec_send(PG_FUNCTION_ARGS) { StringInfoData buf; SvecType *svec = PG_GETARG_SVECTYPE_P(0); SparseData sdata = sdata_from_svec(svec); pq_begintypsend(&buf); pq_sendint(&buf,sdata->type_of_data,sizeof(Oid)); pq_sendint(&buf,sdata->unique_value_count,sizeof(int)); pq_sendint(&buf,sdata->total_value_count,sizeof(int)); pq_sendint(&buf,sdata->vals->len,sizeof(int)); pq_sendint(&buf,sdata->index->len,sizeof(int)); pq_sendbytes(&buf,sdata->vals->data,sdata->vals->len); pq_sendbytes(&buf,sdata->index->data,sdata->index->len); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); }
Datum internal_kmeans_canopy_transition(PG_FUNCTION_ARGS) { ArrayType *canopies_arr; Datum *canopies; int num_canopies; SvecType *point; PGFunction metric_fn; float8 threshold; MemoryContext mem_context_for_function_calls; canopies_arr = PG_GETARG_ARRAYTYPE_P(verify_arg_nonnull(fcinfo, 0)); get_svec_array_elms(canopies_arr, &canopies, &num_canopies); point = PG_GETARG_SVECTYPE_P(verify_arg_nonnull(fcinfo, 1)); metric_fn = get_metric_fn(PG_GETARG_INT32(verify_arg_nonnull(fcinfo, 2))); threshold = PG_GETARG_FLOAT8(verify_arg_nonnull(fcinfo, 3)); mem_context_for_function_calls = setup_mem_context_for_functional_calls(); for (int i = 0; i < num_canopies; i++) { if (compute_metric(metric_fn, mem_context_for_function_calls, PointerGetDatum(point), canopies[i]) < threshold) PG_RETURN_ARRAYTYPE_P(canopies_arr); } MemoryContextDelete(mem_context_for_function_calls); int idx = (ARR_NDIM(canopies_arr) == 0) ? 1 : ARR_LBOUND(canopies_arr)[0] + ARR_DIMS(canopies_arr)[0]; return PointerGetDatum( array_set( canopies_arr, /* array: the initial array object (mustn't be NULL) */ 1, /* nSubscripts: number of subscripts supplied */ &idx, /* indx[]: the subscript values */ PointerGetDatum(point), /* dataValue: the datum to be inserted at the given position */ false, /* isNull: whether dataValue is NULL */ -1, /* arraytyplen: pg_type.typlen for the array type */ -1, /* elmlen: pg_type.typlen for the array's element type */ false, /* elmbyval: pg_type.typbyval for the array's element type */ 'd') /* elmalign: pg_type.typalign for the array's element type */ ); }
Datum svec_pivot(PG_FUNCTION_ARGS) { SvecType *svec; SparseData sdata; float8 value; if (PG_ARGISNULL(1)) { value = 0.; } else { value = PG_GETARG_FLOAT8(1); } if (! PG_ARGISNULL(0)) { svec = PG_GETARG_SVECTYPE_P(0); } else { //first call, construct a new svec /* * Allocate space for the unique values and index * * Note that we do this manually because we are going to * manage the memory allocations for the StringInfo structures * manually within this aggregate so that we can preserve * the intermediate state without re-serializing until there is * a need to re-alloc, at which point we will re-serialize to * form the returned state variable. */ svec = makeEmptySvec(1); } sdata = sdata_from_svec(svec); /* * Add the incoming float8 value to the svec. * * First check to see if there is room in both the data area and index * and if there isn't, re-alloc and recreate the svec */ if ( ((sdata->vals->len + sizeof(float8)+1) > sdata->vals->maxlen) || ((sdata->index->len + 9 +1) > sdata->index->maxlen) ) { svec = reallocSvec(svec); sdata = sdata_from_svec(svec); } /* * Now let's check to see if we're adding a new value or appending to the last * run. If the incoming value is the same as the last value, just increment * the last run. Note that we need to use the index cursor to find where the * last index counter is located. */ { char *index_location; int old_index_storage_size; int64 run_count; float8 last_value=-100000; bool new_run; if (sdata->index->len==0) //New vector { new_run=true; index_location = sdata->index->data; sdata->index->cursor = 0; run_count = 0; } else { index_location = sdata->index->data + sdata->index->cursor; old_index_storage_size = int8compstoragesize(index_location); run_count = compword_to_int8(index_location); last_value = *((float8 *)(sdata->vals->data+(sdata->vals->len-sizeof(float8)))); if (last_value == value) { new_run=false; } else { new_run=true; } } if (!new_run) { run_count++; int8_to_compword(run_count,index_location); sdata->index->len += (int8compstoragesize(index_location) - old_index_storage_size); sdata->total_value_count++; } else { add_run_to_sdata((char *)&value,1,sizeof(float8),sdata); char *i_ptr=sdata->index->data; int len=0; for (int j=0;j<sdata->unique_value_count-1;j++) { len+=int8compstoragesize(i_ptr); i_ptr+=int8compstoragesize(i_ptr); } sdata->index->cursor = len; } } PG_RETURN_SVECTYPE_P(svec); }
/** * svec_out - outputs a sparse vector as a C string */ Datum svec_out(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); char *result = svec_out_internal(svec); PG_RETURN_CSTRING(result); }
/** * svec_return_array - returns an uncompressed Array */ Datum svec_return_array(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); ArrayType *pgarray = svec_return_array_internal(svec); PG_RETURN_ARRAYTYPE_P(pgarray); }
Datum svec_l2_cmp(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); PG_RETURN_INT32(svec_l2_cmp_internal(svec1,svec2)); }
Datum svec_dimension(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); PG_RETURN_INT32(svec->dimension); }
Datum svec_count(PG_FUNCTION_ARGS) { SvecType *svec1 = PG_GETARG_SVECTYPE_P(0); SvecType *svec2 = PG_GETARG_SVECTYPE_P(1); SparseData left = sdata_from_svec(svec1); SparseData right = sdata_from_svec(svec2); double *right_vals=(double *)(right->vals->data); SvecType *result; double *clamped_vals; SparseData right_clamped,sdata_result; int scalar_args=check_scalar(IS_SCALAR(svec1),IS_SCALAR(svec2)); check_dimension(svec1,svec2,"svec_count"); /* Clamp the right vector values to 1. */ switch (scalar_args) { case 1: //left arg is scalar /* * If the left argument is a scalar, this is almost certainly the * first call to the routine, and we need a zero vector for the * beginning of the accumulation of the correct dimension. */ left = makeSparseDataFromDouble(0.,right->total_value_count); case 0: //neither arg is scalar case 2: //right arg is scalar /* Create an array of values either 1 or 0 depending on whether * the right vector has a non-zero value in it */ clamped_vals = (double *)palloc0(sizeof(double)*(right->unique_value_count)); for (int i=0;i<(right->unique_value_count);i++) { if (right_vals[i]!=0.) clamped_vals[i]=1.; } right_clamped = makeInplaceSparseData((char *)clamped_vals,right->index->data, right->vals->len,right->index->len,FLOAT8OID, right->unique_value_count,right->total_value_count); /* Create the output SVEC */ sdata_result = op_sdata_by_sdata(1,left,right_clamped); result = svec_from_sparsedata(sdata_result,true); pfree(clamped_vals); pfree(right_clamped); PG_RETURN_SVECTYPE_P(result); break; case 3: //both args are scalar default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Svec count is undefined when both arguments are scalar"))); PG_RETURN_SVECTYPE_P(svec1); break; } }