inline RowMajorMatrixXf eigen_mat_from_values(std::size_t height, std::size_t width,
    const float_vec& values)
{
    assertion(height * width == values.size(), "invalid shape");
    RowMajorMatrixXf m(height, width);
    std::memcpy(m.data(), values.data(), values.size() * sizeof(float_type));
    return m;
}
示例#2
0
inline tensor5s lstm_impl(const tensor5& input,
                          const std::size_t n_units,
                          const bool use_bias,
                          const bool return_sequences,
                          const float_vec& weights,
                          const float_vec& recurrent_weights,
                          const float_vec& bias,
                          const std::string& activation,
                          const std::string& recurrent_activation)
{
    const RowMajorMatrixXf W = eigen_row_major_mat_from_values(weights.size() / (n_units * 4), n_units * 4, weights);
    const RowMajorMatrixXf U = eigen_row_major_mat_from_values(n_units, n_units * 4, recurrent_weights);
    
    // initialize cell output states h, and cell memory states c for t-1 with zeros
    RowMajorMatrixXf h(1, n_units);
    RowMajorMatrixXf c(1, n_units);
    h.setZero();
    c.setZero();

    std::size_t n_timesteps = input.shape().width_;
    std::size_t n_features = input.shape().depth_;

    RowMajorMatrixXf in(n_timesteps, n_features);

    // write input to eigen matrix

    for (std::size_t a_t = 0; a_t < n_timesteps; ++a_t)
        for (std::size_t a_f = 0; a_f < n_features; ++a_f)
            in(EigenIndex(a_t), EigenIndex(a_f)) = input.get(0, 0, 0, a_t, a_f);

    RowMajorMatrixXf X = in * W;

    if (use_bias)
    {
        // define eigen vector type to be able to use broadcasting
        typedef Eigen::Matrix<float_type, 1, Eigen::Dynamic> Vector_Xf;
        Vector_Xf b = eigen_row_major_mat_from_values(1, n_units * 4, bias);

        X.rowwise() += b;
    }

    // get activation functions
    auto act_func = get_activation_func(activation);
    auto act_func_recurrent = get_activation_func(recurrent_activation);

    // computing LSTM output
    const EigenIndex n = EigenIndex(n_units);

    tensor5s lstm_result;

    if (return_sequences)
        lstm_result = {tensor5(shape5(1, 1, 1, n_timesteps, n_units), float_type(0))};
    else
        lstm_result = {tensor5(shape5(1, 1, 1, 1, n_units), float_type(0))};

    for (EigenIndex k = 0; k < EigenIndex(n_timesteps); ++k)
    {
        const RowMajorMatrixXf ifco = h * U;

        // Use of Matrix.block(): Block of size (p,q), starting at (i,j) matrix.block(i,j,p,q);  matrix.block<p,q>(i,j);
        const RowMajorMatrixXf i = (X.block(k, 0, 1, n) + ifco.block(0, 0, 1, n)).unaryExpr(act_func_recurrent);
        const RowMajorMatrixXf f = (X.block(k, n, 1, n) + ifco.block(0, n, 1, n)).unaryExpr(act_func_recurrent);
        const RowMajorMatrixXf c_pre = (X.block(k, n * 2, 1, n) + ifco.block(0, n * 2, 1, n)).unaryExpr(act_func);
        const RowMajorMatrixXf o = (X.block(k, n * 3, 1, n) + ifco.block(0, n * 3, 1, n)).unaryExpr(act_func_recurrent);

        c = f.array() * c.array() + i.array() * c_pre.array();
        h = o.array() * c.unaryExpr(act_func).array();

        if (return_sequences)
            for (EigenIndex idx = 0; idx < n; ++idx)
                lstm_result.front().set(0, 0, 0, std::size_t(k), std::size_t(idx), h(idx));
        else if (k == EigenIndex(n_timesteps) - 1)
            for (EigenIndex idx = 0; idx < n; ++idx)
                lstm_result.front().set(0, 0, 0, 0, std::size_t(idx), h(idx));
        }
    

    return lstm_result;
}
示例#3
0
inline tensor5s gru_impl(const tensor5& input,
    const std::size_t n_units,
    const bool use_bias,
    const bool reset_after,
    const bool return_sequences,
    const float_vec& weights,
    const float_vec& recurrent_weights,
    const float_vec& bias,
    const std::string& activation,
    const std::string& recurrent_activation)
{
    const std::size_t n_timesteps = input.shape().width_;
    const std::size_t n_features = input.shape().depth_;

    // weight matrices
    const EigenIndex n = EigenIndex(n_units);
    const RowMajorMatrix<Dynamic, Dynamic> W = eigen_row_major_mat_from_values(n_features, n_units * 3, weights);
    const RowMajorMatrix<Dynamic, Dynamic> U = eigen_row_major_mat_from_values(n_units, n_units * 3, recurrent_weights);

    // kernel bias
    RowVector<Dynamic> b_x(n_units * 3);
    if (use_bias && bias.size() >= 1 * n_units * 3)
        std::copy_n(bias.cbegin(), n_units * 3, b_x.data());
    else
        b_x.setZero();

    // recurrent kernel bias
    RowVector<Dynamic> b_h(n_units * 3);
    if (use_bias && bias.size() >= 2 * n_units * 3)
        std::copy_n(bias.cbegin() + static_cast<float_vec::const_iterator::difference_type>(n_units * 3), n_units * 3, b_h.data());
    else
        b_h.setZero();

    // initialize cell output states h
    RowVector<Dynamic> h(1, n_units);
    h.setZero();

    // write input to eigen matrix of shape (timesteps, n_features)
    RowMajorMatrix<Dynamic, Dynamic> x(n_timesteps, n_features);

    for (std::size_t a_t = 0; a_t < n_timesteps; ++a_t)
        for (std::size_t a_f = 0; a_f < n_features; ++a_f)
            x(EigenIndex(a_t), EigenIndex(a_f)) = input.get(0, 0, 0, a_t, a_f);

    // kernel applied to inputs (with bias), produces shape (timesteps, n_units * 3)
    RowMajorMatrix<Dynamic, Dynamic> Wx = x * W;
    Wx.rowwise() += b_x;

    // get activation functions
    auto act_func = get_activation_func(activation);
    auto act_func_recurrent = get_activation_func(recurrent_activation);

    // computing GRU output
    tensor5s gru_result;

    if (return_sequences)
        gru_result = { tensor5(shape5(1, 1, 1, n_timesteps, n_units), float_type(0)) };
    else
        gru_result = { tensor5(shape5(1, 1, 1, 1, n_units), float_type(0)) };

    for (EigenIndex k = 0; k < EigenIndex(n_timesteps); ++k)
    {
        RowVector<Dynamic> r;
        RowVector<Dynamic> z;
        RowVector<Dynamic> m;

        // in the formulae below, the following notations are used:
        // A b       matrix product
        // a o b     Hadamard (element-wise) product
        // x         input vector
        // h         state vector
        // W_{x,a}   block of the kernel weight matrix corresponding to "a"
        // W_{h,a}   block of the recurrent kernel weight matrix corresponding to "a"
        // b_{x,a}   part of the kernel bias vector corresponding to "a"
        // b_{h,a}   part of the recurrent kernel bias corresponding to "a"
        // z         update gate vector
        // r         reset gate vector

        if (reset_after)
        {
            // recurrent kernel applied to timestep (with bias), produces shape (1, n_units * 3)
            RowMajorMatrix<1, Dynamic> Uh = h * U;
            Uh += b_h;

            // z = sigmoid(W_{x,z} x + b_{i,z} + W_{h,z} h + b_{h,z})
            z = (Wx.block(k, 0 * n, 1, n) + Uh.block(0, 0 * n, 1, n)).unaryExpr(act_func_recurrent);
            // r = sigmoid(W_{x,r} x + b_{i,r} + W_{h,r} h + b_{h,r})
            r = (Wx.block(k, 1 * n, 1, n) + Uh.block(0, 1 * n, 1, n)).unaryExpr(act_func_recurrent);
            // m = tanh(W_{x,m} x + b_{i,m} + r * (W_{h,m} h + b_{h,m}))
            m = (Wx.block(k, 2 * n, 1, n) + (r.array() * Uh.block(0, 2 * n, 1, n).array()).matrix()).unaryExpr(act_func);
        }
        else
        {
            // z = sigmoid(W_{x,z} x + b_{x,z} + W_{h,z} h + b_{h,z})
            z = (Wx.block(k, 0 * n, 1, n) + h * U.block(0, 0 * n, n, n) + b_h.block(0, 0 * n, 1, n)).unaryExpr(act_func_recurrent);
            // r = sigmoid(W_{x,r} x + b_{x,r} + W_{h,r} h + b_{h,r})
            r = (Wx.block(k, 1 * n, 1, n) + h * U.block(0, 1 * n, n, n) + b_h.block(0, 1 * n, 1, n)).unaryExpr(act_func_recurrent);
            // m = tanh(W_{x,m} x + b_{x,m} + W_{h,m} (r o h) + b_{h,m}))
            m = (Wx.block(k, 2 * n, 1, n) + (r.array() * h.array()).matrix() * U.block(0, 2 * n, n, n) + b_h.block(0, 2 * n, 1, n)).unaryExpr(act_func);
        }

        // output vector: h' = (1 - z) o m + z o h
        h = ((1 - z.array()) * m.array() + z.array() * h.array()).matrix();

        if (return_sequences)
            for (EigenIndex idx = 0; idx < n; ++idx)
                gru_result.front().set(0, 0, 0, std::size_t(k), std::size_t(idx), h(idx));
        else if (k == EigenIndex(n_timesteps) - 1)
            for (EigenIndex idx = 0; idx < n; ++idx)
                gru_result.front().set(0, 0, 0, 0, std::size_t(idx), h(idx));
    }

    return gru_result;
}