/** * @brief Convert an Eigen sparse vector to a run-length encoded Greenplum * sparse vector * * @param inVec An Eigen sparse vector * @returns Greenplum sparse vector * * @internal We implement this function here and not in the legacy sparse-vector * code because the indices of type \c Index, as defined by Eigen. */ inline SvecType* SparseColumnVectorToLegacySparseVector( const Eigen::SparseVector<double> &inVec) { typedef Eigen::SparseVector<double>::Index Index; const size_t kValueLength = sizeof(double); const double* values = inVec.valuePtr(); const Index* indices = inVec.innerIndexPtr(); Index nnz = inVec.nonZeros(); Index size = inVec.size(); Index lastIndex = 0; double runValue = 0.; SparseData sdata = makeSparseData(); sdata->type_of_data = FLOAT8OID; madlib_assert(nnz == 0 || (indices && values), std::logic_error( "SparseColumnVectorToLegacySparseVector(): Missing values or indices " "in Eigen sparse vector.")); if (nnz > 0) { if (indices[0] == 0) { runValue = values[0]; } else if (std::memcmp(&values[0], &runValue, kValueLength)) { // In this case, we implicitly have: indices[0] > 0 // The first run is therefore a sequence of zeros. add_run_to_sdata(reinterpret_cast<char*>(&runValue), indices[0], kValueLength, sdata); runValue = values[0]; lastIndex = indices[0]; } // The remaining case is: indices[0] > 0 && values[0] == 0 // In this case, the original representation is not normalized -- // storing (indices[0], values[0]) is unncessary. We therefore just // ignore this value. } for (int i = 1; i < nnz; ++i) { if (std::memcmp(&values[i], &runValue, kValueLength)) { add_run_to_sdata(reinterpret_cast<char*>(&runValue), indices[i] - lastIndex, kValueLength, sdata); runValue = values[i]; lastIndex = indices[i]; } } add_run_to_sdata(reinterpret_cast<char*>(&runValue), size - lastIndex, kValueLength, sdata); // Add the final tallies sdata->unique_value_count = static_cast<int>(sdata->vals->len / kValueLength); sdata->total_value_count = static_cast<int>(size); return svec_from_sparsedata(sdata, true /* trim */); }
Datum svec_pivot(PG_FUNCTION_ARGS) { SvecType *svec; SparseData sdata; float8 value; if (PG_ARGISNULL(1)) { value = 0.; } else { value = PG_GETARG_FLOAT8(1); } if (! PG_ARGISNULL(0)) { svec = PG_GETARG_SVECTYPE_P(0); } else { //first call, construct a new svec /* * Allocate space for the unique values and index * * Note that we do this manually because we are going to * manage the memory allocations for the StringInfo structures * manually within this aggregate so that we can preserve * the intermediate state without re-serializing until there is * a need to re-alloc, at which point we will re-serialize to * form the returned state variable. */ svec = makeEmptySvec(1); } sdata = sdata_from_svec(svec); /* * Add the incoming float8 value to the svec. * * First check to see if there is room in both the data area and index * and if there isn't, re-alloc and recreate the svec */ if ( ((sdata->vals->len + sizeof(float8)+1) > sdata->vals->maxlen) || ((sdata->index->len + 9 +1) > sdata->index->maxlen) ) { svec = reallocSvec(svec); sdata = sdata_from_svec(svec); } /* * Now let's check to see if we're adding a new value or appending to the last * run. If the incoming value is the same as the last value, just increment * the last run. Note that we need to use the index cursor to find where the * last index counter is located. */ { char *index_location; int old_index_storage_size; int64 run_count; float8 last_value=-100000; bool new_run; if (sdata->index->len==0) //New vector { new_run=true; index_location = sdata->index->data; sdata->index->cursor = 0; run_count = 0; } else { index_location = sdata->index->data + sdata->index->cursor; old_index_storage_size = int8compstoragesize(index_location); run_count = compword_to_int8(index_location); last_value = *((float8 *)(sdata->vals->data+(sdata->vals->len-sizeof(float8)))); if (last_value == value) { new_run=false; } else { new_run=true; } } if (!new_run) { run_count++; int8_to_compword(run_count,index_location); sdata->index->len += (int8compstoragesize(index_location) - old_index_storage_size); sdata->total_value_count++; } else { add_run_to_sdata((char *)&value,1,sizeof(float8),sdata); char *i_ptr=sdata->index->data; int len=0; for (int j=0;j<sdata->unique_value_count-1;j++) { len+=int8compstoragesize(i_ptr); i_ptr+=int8compstoragesize(i_ptr); } sdata->index->cursor = len; } } PG_RETURN_SVECTYPE_P(svec); }