/**
 * @brief Convert an Eigen sparse vector to a run-length encoded Greenplum
 *     sparse vector
 *
 * @param inVec An Eigen sparse vector
 * @returns Greenplum sparse vector
 *
 * @internal We implement this function here and not in the legacy sparse-vector
 *     code because the indices of type \c Index, as defined by Eigen.
 */
inline
SvecType*
SparseColumnVectorToLegacySparseVector(
    const Eigen::SparseVector<double> &inVec) {

    typedef Eigen::SparseVector<double>::Index Index;
    const size_t kValueLength = sizeof(double);

    const double* values = inVec.valuePtr();
    const Index* indices = inVec.innerIndexPtr();
    Index nnz = inVec.nonZeros();
    Index size = inVec.size();

    Index lastIndex = 0;
    double runValue = 0.;
    SparseData sdata = makeSparseData();

    sdata->type_of_data = FLOAT8OID;

    madlib_assert(nnz == 0 || (indices && values), std::logic_error(
        "SparseColumnVectorToLegacySparseVector(): Missing values or indices "
        "in Eigen sparse vector."));

    if (nnz > 0) {
        if (indices[0] == 0) {
            runValue = values[0];
        } else if (std::memcmp(&values[0], &runValue, kValueLength)) {
            // In this case, we implicitly have: indices[0] > 0
            // The first run is therefore a sequence of zeros.
            add_run_to_sdata(reinterpret_cast<char*>(&runValue),
                indices[0], kValueLength, sdata);
            runValue = values[0];
            lastIndex = indices[0];
        }
        // The remaining case is: indices[0] > 0 && values[0] == 0
        // In this case, the original representation is not normalized --
        // storing (indices[0], values[0]) is unncessary. We therefore just
        // ignore this value.
    }
    for (int i = 1; i < nnz; ++i) {
        if (std::memcmp(&values[i], &runValue, kValueLength)) {
            add_run_to_sdata(reinterpret_cast<char*>(&runValue),
                indices[i] - lastIndex, kValueLength, sdata);
            runValue = values[i];
            lastIndex = indices[i];
        }
    }
    add_run_to_sdata(reinterpret_cast<char*>(&runValue),
        size - lastIndex, kValueLength, sdata);

    // Add the final tallies
    sdata->unique_value_count
        = static_cast<int>(sdata->vals->len / kValueLength);
    sdata->total_value_count = static_cast<int>(size);

    return svec_from_sparsedata(sdata, true /* trim */);
}
Beispiel #2
0
Datum
svec_pivot(PG_FUNCTION_ARGS)
{
	SvecType *svec;
	SparseData sdata;
	float8 value;

	if (PG_ARGISNULL(1))
	{
		value = 0.;
	} else
	{
		value = PG_GETARG_FLOAT8(1);
	}

	if (! PG_ARGISNULL(0))
	{
		svec = PG_GETARG_SVECTYPE_P(0);
	} else {	//first call, construct a new svec
		/*
		 * Allocate space for the unique values and index
		 *
		 * Note that we do this manually because we are going to
		 * manage the memory allocations for the StringInfo structures
		 * manually within this aggregate so that we can preserve
		 * the intermediate state without re-serializing until there is
		 * a need to re-alloc, at which point we will re-serialize to
		 * form the returned state variable.
		 */
		svec = makeEmptySvec(1);
	}
	sdata = sdata_from_svec(svec);

	/*
	 * Add the incoming float8 value to the svec.
	 *
	 * First check to see if there is room in both the data area and index
	 * and if there isn't, re-alloc and recreate the svec
	 */
	if (   ((sdata->vals->len + sizeof(float8)+1) > sdata->vals->maxlen)
	    || ((sdata->index->len + 9 +1)            > sdata->index->maxlen) )
	{
		svec = reallocSvec(svec);
		sdata = sdata_from_svec(svec);
	}
	/*
	 * Now let's check to see if we're adding a new value or appending to the last
	 * run.  If the incoming value is the same as the last value, just increment
	 * the last run.  Note that we need to use the index cursor to find where the
	 * last index counter is located.
	 */
	{
		char *index_location;
		int old_index_storage_size;
		int64 run_count;
		float8 last_value=-100000;
		bool new_run;

		if (sdata->index->len==0) //New vector
		{
			new_run=true;
			index_location = sdata->index->data;
			sdata->index->cursor = 0;
			run_count = 0;
		} else
		{
			index_location = sdata->index->data + sdata->index->cursor;
			old_index_storage_size = int8compstoragesize(index_location);
			run_count = compword_to_int8(index_location);
			last_value = *((float8 *)(sdata->vals->data+(sdata->vals->len-sizeof(float8))));

			if (last_value == value)
			{
				new_run=false;
			} else {
				new_run=true;
			}
		}
		if (!new_run)
		{
			run_count++;
			int8_to_compword(run_count,index_location);
			sdata->index->len += (int8compstoragesize(index_location)
					- old_index_storage_size);
			sdata->total_value_count++;
		} else {
			add_run_to_sdata((char *)&value,1,sizeof(float8),sdata);
			char *i_ptr=sdata->index->data;
			int len=0;
			for (int j=0;j<sdata->unique_value_count-1;j++)
			{
				len+=int8compstoragesize(i_ptr);
				i_ptr+=int8compstoragesize(i_ptr);
			}
			sdata->index->cursor = len;
		}
	}

	PG_RETURN_SVECTYPE_P(svec);
}