Esempio n. 1
0
/**
 * @brief Internal function for retrieving the type ID and datum for an element
 *     of a native composite type
 *
 * @param inID Number of function argument
 * @param[out] outTypeID PostgreSQL OID of the function argument's type
 * @param[out] outDatum PostgreSQL Datum for the function argument
 *
 * @internal
 *     Having this as separate function isolates the PG_TRY block. Otherwise,
 *     the compiler might warn that the longjmp could clobber local variables.
 */
inline
void
AbstractionLayer::AnyType::backendGetTypeIDAndDatumForTupleElement(
    uint16_t inID, Oid &outTypeID, Datum &outDatum) const {

    madlib_assert(mContent == NativeComposite, std::logic_error(
        "Inconsistency detected while converting from PostgreSQL to C++ types."));
    
    bool exceptionOccurred = false;
    Oid tupType;
    int32 tupTypmod;
    TupleDesc tupDesc;
    bool isNull = false;
    
    PG_TRY(); {
        tupType = HeapTupleHeaderGetTypeId(mTupleHeader);
        tupTypmod = HeapTupleHeaderGetTypMod(mTupleHeader);
        tupDesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
        outTypeID = tupDesc->attrs[inID]->atttypid;
        ReleaseTupleDesc(tupDesc);
        outDatum = GetAttributeByNum(mTupleHeader, inID, &isNull);
    } PG_CATCH(); {
        exceptionOccurred = true;
    } PG_END_TRY();
    
    if (exceptionOccurred)
        throw PGException();
}
Esempio n. 2
0
/**
 * @brief Internal function for determining the type of a function argument
 *
 * @param inID Number of function argument
 * @param[out] outTypeID PostgreSQL OID of the function argument's type
 * @param[out] outIsMutable True if the data structure of this function argument
 *     can be safely modified. For objects passed by reference (like arrays)
 *     this is only true when passed as the first argument of a transition
 *     function.
 *
 * @internal
 *     Having this as separate function isolates the PG_TRY block. Otherwise,
 *     the compiler might warn that the longjmp could clobber local variables.
 */
inline
void
AbstractionLayer::AnyType::backendGetTypeIDForFunctionArg(uint16_t inID,
    Oid &outTypeID, bool &outIsMutable) const {
    
    madlib_assert(mContent == FunctionComposite, std::logic_error(
        "Inconsistency detected while converting from PostgreSQL to C++ types."));
    
    bool exceptionOccurred = false;

    PG_TRY(); {
        outTypeID = get_fn_expr_argtype(fcinfo->flinfo, inID);    

        // If we are called as an aggregate function, the first argument is the
        // transition state. In that case, we are free to modify the data.
        // In fact, for performance reasons, we *should* even do all modifications
        // in-place. In all other cases, directly modifying memory is dangerous.
        // See warning at:
        // http://www.postgresql.org/docs/current/static/xfunc-c.html#XFUNC-C-BASETYPE
        outIsMutable = (inID == 0 && AggCheckCallContext(fcinfo, NULL));
    } PG_CATCH(); {
        exceptionOccurred = true;
    } PG_END_TRY();
    
    if (exceptionOccurred)
        throw PGException();
}
typename OutputStreamBufferBase<Derived, C, Allocator>::int_type
OutputStreamBufferBase<Derived, C, Allocator>::overflow(int_type c) {
    if (this->pptr() >= this->epptr()) {
        if (mStorageSize >= kMaxBufferSize)
            return traits_type::eof();

        uint32_t newStorageSize = mStorageSize * 2;
        C* newStorage = mAllocator.allocate(newStorageSize + 1);
        std::copy(mStorage, mStorage + mStorageSize, newStorage);
        mAllocator.deallocate(mStorage, mStorageSize + 1);
        mStorage = newStorage;

        madlib_assert(
            this->pptr() == this->epptr() &&
            this->pptr() - this->pbase() == static_cast<int64_t>(mStorageSize),
            std::logic_error("Internal error: Logging buffer has become "
                "inconsistent"));

        this->setp(mStorage, mStorage + newStorageSize);
        this->pbump(mStorageSize);
        mStorageSize = newStorageSize;
    } else if (c == traits_type::eof())
       return traits_type::eof();

    *this->pptr() = static_cast<C>(c);
    this->pbump(1);
    return traits_type::not_eof(c);
}
Esempio n. 4
0
/**
 * @brief Verify consistency of AnyType object. Throw exception if not.
 */
inline
void
AbstractionLayer::AnyType::consistencyCheck() const {
    const char *kMsg("Inconsistency detected while converting between "
        "PostgreSQL and C++ types.");

    madlib_assert(mContent != Null || (mDatum == 0 && fcinfo == NULL &&
        mTupleHeader == NULL && mChildren.empty()),
        std::logic_error(kMsg));
    madlib_assert(mContent != FunctionComposite || fcinfo != NULL,
        std::logic_error(kMsg));
    madlib_assert(mContent != NativeComposite || mTupleHeader != NULL,
        std::logic_error(kMsg));
    madlib_assert(mContent != ReturnComposite || (!mChildren.empty() &&
        mTypeID == InvalidOid),
        std::logic_error(kMsg));
}
Esempio n. 5
0
 void
 IGD<State, ConstState, Task>::transitionInMiniBatch(
        state_type &state,
        const tuple_type &tuple) {

    madlib_assert(tuple.indVar.rows() == tuple.depVar.rows(),
                  std::runtime_error("Invalid data. Independent and dependent "
                                     "batches don't have same number of rows."));

    uint16_t batch_size = state.batchSize;
    uint16_t n_epochs = state.nEpochs;

    // n_rows/n_ind_cols are the rows/cols in a transition tuple.
    Index n_rows = tuple.indVar.rows();
    size_t n_batches = n_rows < batch_size ? 1 :
                        size_t(n_rows / batch_size) + size_t(n_rows % batch_size > 0);

    double max_loss = 0.0;
    for (int curr_epoch=0; curr_epoch < n_epochs; curr_epoch++) {
        double loss = 0.0;
        /*
            Randomizing the input data before every iteration is good for
            minibatch gradient descent convergence. Since we don't do that,
            we are randomizing the order in which every batch is visited in
            a buffer. Note that this still does not randomize rows within
            a batch.
        */
        std::vector<size_t> random_curr_batch(n_batches, 0);
        for(size_t i=0; i < n_batches; i++) {
            random_curr_batch[i] = i;
        }
        std::random_shuffle(&random_curr_batch[0], &random_curr_batch[n_batches]);

        for (size_t i = 0; i < n_batches; i++) {
            size_t curr_batch = random_curr_batch[i];
            Index curr_batch_row_index = static_cast<Index>(curr_batch * batch_size);
            Matrix X_batch;
            Matrix Y_batch;
            if (curr_batch == n_batches-1) {
               // last batch
               X_batch = tuple.indVar.bottomRows(n_rows - curr_batch_row_index);
               Y_batch = tuple.depVar.bottomRows(n_rows - curr_batch_row_index);
            } else {
                X_batch = tuple.indVar.block(curr_batch_row_index, 0,
                                             batch_size, tuple.indVar.cols());
                Y_batch = tuple.depVar.block(curr_batch_row_index, 0,
                                             batch_size, tuple.depVar.cols());
            }
            loss += Task::getLossAndUpdateModel(
                state.model, X_batch, Y_batch, state.stepsize);
        }

        if (max_loss < loss) max_loss = loss;
    }
    // Be pessimistic and report the maximum loss
    state.loss += max_loss;
    return;
 }
Esempio n. 6
0
/**
 * @brief Convert an Eigen sparse vector to a run-length encoded Greenplum
 *     sparse vector
 *
 * @param inVec An Eigen sparse vector
 * @returns Greenplum sparse vector
 *
 * @internal We implement this function here and not in the legacy sparse-vector
 *     code because the indices of type \c Index, as defined by Eigen.
 */
inline
SvecType*
SparseColumnVectorToLegacySparseVector(
    const Eigen::SparseVector<double> &inVec) {

    typedef Eigen::SparseVector<double>::Index Index;
    const size_t kValueLength = sizeof(double);

    const double* values = inVec.valuePtr();
    const Index* indices = inVec.innerIndexPtr();
    Index nnz = inVec.nonZeros();
    Index size = inVec.size();

    Index lastIndex = 0;
    double runValue = 0.;
    SparseData sdata = makeSparseData();

    sdata->type_of_data = FLOAT8OID;

    madlib_assert(nnz == 0 || (indices && values), std::logic_error(
        "SparseColumnVectorToLegacySparseVector(): Missing values or indices "
        "in Eigen sparse vector."));

    if (nnz > 0) {
        if (indices[0] == 0) {
            runValue = values[0];
        } else if (std::memcmp(&values[0], &runValue, kValueLength)) {
            // In this case, we implicitly have: indices[0] > 0
            // The first run is therefore a sequence of zeros.
            add_run_to_sdata(reinterpret_cast<char*>(&runValue),
                indices[0], kValueLength, sdata);
            runValue = values[0];
            lastIndex = indices[0];
        }
        // The remaining case is: indices[0] > 0 && values[0] == 0
        // In this case, the original representation is not normalized --
        // storing (indices[0], values[0]) is unncessary. We therefore just
        // ignore this value.
    }
    for (int i = 1; i < nnz; ++i) {
        if (std::memcmp(&values[i], &runValue, kValueLength)) {
            add_run_to_sdata(reinterpret_cast<char*>(&runValue),
                indices[i] - lastIndex, kValueLength, sdata);
            runValue = values[i];
            lastIndex = indices[i];
        }
    }
    add_run_to_sdata(reinterpret_cast<char*>(&runValue),
        size - lastIndex, kValueLength, sdata);

    // Add the final tallies
    sdata->unique_value_count
        = static_cast<int>(sdata->vals->len / kValueLength);
    sdata->total_value_count = static_cast<int>(size);

    return svec_from_sparsedata(sdata, true /* trim */);
}
Esempio n. 7
0
/**
 * @brief Add an element to a composite value, for returning to the backend
 */
inline
AnyType&
AnyType::operator<<(const AnyType &inValue) {
    consistencyCheck();

    madlib_assert(mContent == Null || mContent == ReturnComposite,
                  std::logic_error("Internal inconsistency while creating composite "
                                   "return value."));

    mContent = ReturnComposite;
    mChildren.push_back(inValue);
    return *this;
}
Esempio n. 8
0
/**
 * @brief Return a PostgreSQL Datum representing the current object
 *
 * The only *conversion* taking place in this function is *combining* Datums
 * into a tuple. At this place, we do not have to worry any more about retaining
 * memory.
 *
 * @param inTargetTypeID PostgreSQL OID of the target type to convert to
 * @param inTargetIsComposite Whether the target type is composite.
 *     \c indeterminate if unknown.
 * @param inTargetTupleDesc If target type is known to be composite, then
 *     (optionally) the PostgreSQL TupleDesc. NULL is always a valid argument.
 *
 * @see getAsDatum(const FunctionCallInfo)
 */
inline
Datum
AbstractionLayer::AnyType::getAsDatum(Oid inTargetTypeID,
    boost::tribool inTargetIsComposite, TupleDesc inTargetTupleDesc) const {
    
    consistencyCheck();

    // The default value to return in case of Null is 0. Note, however, that
    // 0 can also be a perfectly valid (non-null) Datum. It is the caller's
    // responsibility to call isNull() separately.
    if (isNull())
        return 0;

    try {
        bool exceptionOccurred = false;
        TupleHandle tupleHandle(inTargetTupleDesc);
        
        if (boost::indeterminate(inTargetIsComposite)) {
            inTargetIsComposite = isRowTypeInCache(inTargetTypeID);
            backendGetIsCompositeTypeAndTupleHandle(inTargetTypeID,
                inTargetIsComposite, tupleHandle);
        }
        
        if (inTargetIsComposite && !isComposite())
            throw std::runtime_error("Invalid type conversion requested. "
                "Simple type supplied but PostgreSQL expects composite type.");

        if (!inTargetIsComposite && isComposite())
            throw std::runtime_error("Invalid type conversion requested. "
                "Composite type supplied but PostgreSQL expects simple type.");
        
        madlib_assert(inTargetIsComposite == (tupleHandle.desc != NULL),
            MADLIB_DEFAULT_EXCEPTION);
        
        if (inTargetIsComposite) {
            if (static_cast<size_t>(tupleHandle.desc->natts) < mChildren.size())
                throw std::runtime_error("Invalid type conversion requested. "
                    "Internal composite type has more elements than PostgreSQL "
                    "composite type.");

            std::vector<Datum> values;
            std::vector<char> nulls;

            for (uint16_t pos = 0; pos < mChildren.size(); ++pos) {
                Oid targetTypeID = tupleHandle.desc->attrs[pos]->atttypid;
                                    
                values.push_back(mChildren[pos].getAsDatum(targetTypeID));
                nulls.push_back(mChildren[pos].isNull());
            }
            // All elements that have not been initialized will be set to Null
            for (uint16_t pos = mChildren.size();
                pos < static_cast<size_t>(tupleHandle.desc->natts);
                ++pos) {
                
                values.push_back(Datum(0));
                nulls.push_back(true);
            }
            
            Datum returnValue;
            PG_TRY(); {
                HeapTuple heapTuple = heap_form_tuple(tupleHandle.desc,
                    &values[0], reinterpret_cast<bool*>(&nulls[0]));
                
                returnValue = HeapTupleGetDatum(heapTuple);
            } PG_CATCH(); {
                exceptionOccurred = true;
            } PG_END_TRY();
            
            if (exceptionOccurred)
                throw PGException();
            
            return returnValue;
        }
    } catch (PGException &e) {
        throw std::invalid_argument("An exception occurred while "
            "gathering inormation about the PostgreSQL return type.");
    }
        
    if (inTargetTypeID != mTypeID)
        throw std::invalid_argument("Invalid type conversion requested. "
            "C++ type and PostgreSQL return type do not match.");
    
    return mDatum;
}