bool Other<R>::grad_allclose(Mat<R> a, Mat<R> b, double tol) { if (a.dims() != b.dims()) return false; return GRAD(a).allclose(GRAD(b), tol); }
bool Other<R>::equals(Mat<R> a, Mat<R> b) { // wrong dimensions if (a.dims() != b.dims()) return false; return MAT(a) == MAT(b); }
bool Other<R>::allclose(Mat<R> a, Mat<R> b, double tol) { if (a.dims() != b.dims()) return false; return MAT(a).allclose(MAT(b), tol); }
typename StackedGatedModel<Z>::MaskedActivation StackedGatedModel<Z>::masked_predict_cost( Mat<int> data, Mat<int> target_data, Mat<Z> mask, Z drop_prob, int temporal_offset, uint softmax_offset) const { utils::Timer mpc("masked_predict_cost"); auto state = this->initial_states(); mat total_error(1,1); mat memory; mat memory_error(1,1); auto n = data.dims(0); assert (temporal_offset < n); assert (target_data.dims(0) >= data.dims(0)); for (uint timestep = 0; timestep < n - temporal_offset; ++timestep) { // pick this letter from the embedding utils::Timer gte("get the embeddings"); auto input_vector = this->embedding[data[timestep]]; memory = gate.activate( { input_vector, state.back().hidden }).sigmoid(); input_vector = input_vector.eltmul_broadcast_colwise(memory); gte.stop(); utils::Timer flstm("forward lstm"); state = this->stacked_lstm.activate( state, input_vector, drop_prob ); flstm.stop(); // classifier takes as input the final hidden layer's activation: utils::Timer decode_tm("decode"); auto logprobs = this->decode(input_vector, state); decode_tm.stop(); auto target = target_data[timestep + temporal_offset]; if (softmax_offset > 0) { target -= softmax_offset; } utils::Timer softmax_tm("softmax cross entropy"); auto errors = MatOps<Z>::softmax_cross_entropy_rowwise(logprobs, target); softmax_tm.stop(); utils::Timer masking_tm("masking"); errors *= mask[timestep + temporal_offset].T(); memory *= mask[timestep + temporal_offset].T(); masking_tm.stop(); total_error += errors.sum(); memory_error += memory.sum() * memory_penalty; } mpc.stop(); return MaskedActivation(total_error, memory_error); }