/** * @brief Convert a run-length encoded Greenplum sparse vector to an Eigen * sparse vector * * @param inVec A Greenplum sparse vector * @returns Eigen sparse vector */ inline Eigen::SparseVector<double> LegacySparseVectorToSparseColumnVector(SvecType* inVec) { SparseData sdata = sdata_from_svec(inVec); Eigen::SparseVector<double> vec(sdata->total_value_count); char* ix = sdata->index->data; double* vals = reinterpret_cast<double*>(sdata->vals->data); int64_t logicalIdx = 0; for (int64_t physicalIdx = 0; physicalIdx < sdata->unique_value_count; ++physicalIdx) { int64_t runLength = compword_to_int8(ix); if (vals[physicalIdx] == 0.) { logicalIdx += runLength; } else { for (int64_t i = 0; i < runLength; ++i) vec.insertBack(static_cast<int>(logicalIdx++)) = vals[physicalIdx]; } ix += int8compstoragesize(ix); } return vec; }
Datum svec_median(PG_FUNCTION_ARGS) { SvecType *svec = PG_GETARG_SVECTYPE_P(0); SparseData sdata = sdata_from_svec(svec); int index,median_index = (sdata->total_value_count-1)/2; char *i_ptr; int64 *rle_index; if (sdata->index->data != NULL) //Sparse vector { /* * We need to create an uncompressed run length index to * feed to the partition select routine */ rle_index = (int64 *)palloc(sizeof(int64)*(sdata->unique_value_count)); i_ptr = sdata->index->data; for (int i=0;i<sdata->unique_value_count;i++,i_ptr+=int8compstoragesize(i_ptr)) { rle_index[i] = compword_to_int8(i_ptr); } /* * Allocate the outer "list of lists" */ char **lists = (char **)palloc(sizeof(char *)*2); lists[0] = sdata->vals->data; lists[1] = (char *)rle_index; size_t *widths = (size_t *)palloc(sizeof(size_t)*2); widths[0] = sizeof(float8); widths[1] = sizeof(int64); index = partition_select(lists,2,widths, 0,sdata->unique_value_count-1, median_index,compar_float8, real_index_calc_sparse_RLE); /* * Convert the uncompressed index into the compressed index */ i_ptr = sdata->index->data; for (int i=0;i<sdata->unique_value_count;i++,i_ptr+=int8compstoragesize(i_ptr)) { int8_to_compword(rle_index[i],i_ptr); } pfree(lists); pfree(widths); pfree(rle_index); } else { index = float8arr_partition_internal((double *)(sdata->vals->data), sdata->total_value_count, median_index); } PG_RETURN_FLOAT8(((float8 *)(sdata->vals->data))[index]); }
Datum svec_pivot(PG_FUNCTION_ARGS) { SvecType *svec; SparseData sdata; float8 value; if (PG_ARGISNULL(1)) { value = 0.; } else { value = PG_GETARG_FLOAT8(1); } if (! PG_ARGISNULL(0)) { svec = PG_GETARG_SVECTYPE_P(0); } else { //first call, construct a new svec /* * Allocate space for the unique values and index * * Note that we do this manually because we are going to * manage the memory allocations for the StringInfo structures * manually within this aggregate so that we can preserve * the intermediate state without re-serializing until there is * a need to re-alloc, at which point we will re-serialize to * form the returned state variable. */ svec = makeEmptySvec(1); } sdata = sdata_from_svec(svec); /* * Add the incoming float8 value to the svec. * * First check to see if there is room in both the data area and index * and if there isn't, re-alloc and recreate the svec */ if ( ((sdata->vals->len + sizeof(float8)+1) > sdata->vals->maxlen) || ((sdata->index->len + 9 +1) > sdata->index->maxlen) ) { svec = reallocSvec(svec); sdata = sdata_from_svec(svec); } /* * Now let's check to see if we're adding a new value or appending to the last * run. If the incoming value is the same as the last value, just increment * the last run. Note that we need to use the index cursor to find where the * last index counter is located. */ { char *index_location; int old_index_storage_size; int64 run_count; float8 last_value=-100000; bool new_run; if (sdata->index->len==0) //New vector { new_run=true; index_location = sdata->index->data; sdata->index->cursor = 0; run_count = 0; } else { index_location = sdata->index->data + sdata->index->cursor; old_index_storage_size = int8compstoragesize(index_location); run_count = compword_to_int8(index_location); last_value = *((float8 *)(sdata->vals->data+(sdata->vals->len-sizeof(float8)))); if (last_value == value) { new_run=false; } else { new_run=true; } } if (!new_run) { run_count++; int8_to_compword(run_count,index_location); sdata->index->len += (int8compstoragesize(index_location) - old_index_storage_size); sdata->total_value_count++; } else { add_run_to_sdata((char *)&value,1,sizeof(float8),sdata); char *i_ptr=sdata->index->data; int len=0; for (int j=0;j<sdata->unique_value_count-1;j++) { len+=int8compstoragesize(i_ptr); i_ptr+=int8compstoragesize(i_ptr); } sdata->index->cursor = len; } } PG_RETURN_SVECTYPE_P(svec); }