void RecurrentLayer<Dtype, MItype, MOtype>::Reshape( const vector<Blob<MItype>*>& bottom, const vector<Blob<MOtype>*>& top) { CHECK_GE(bottom[0]->num_axes(), 2) << "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)"; CHECK_EQ(T_, bottom[0]->shape(0)) << "input number of timesteps changed"; N_ = bottom[0]->shape(1); CHECK_EQ(bottom[1]->num_axes(), 2) << "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)"; CHECK_EQ(T_, bottom[1]->shape(0)); CHECK_EQ(N_, bottom[1]->shape(1)); x_input_blob_->ReshapeLike(*bottom[0]); vector<int_tp> cont_shape = bottom[1]->shape(); cont_input_blob_->Reshape(cont_shape); if (static_input_) { x_static_input_blob_->ReshapeLike(*bottom[2]); } vector<BlobShape> recur_input_shapes; RecurrentInputShapes(&recur_input_shapes); CHECK_EQ(recur_input_shapes.size(), recur_input_blobs_.size()); for (int i = 0; i < recur_input_shapes.size(); ++i) { recur_input_blobs_[i]->Reshape(recur_input_shapes[i]); } unrolled_net_->Reshape(); x_input_blob_->ShareData(*bottom[0]); x_input_blob_->ShareDiff(*bottom[0]); cont_input_blob_->ShareData(*bottom[1]); if (static_input_) { x_static_input_blob_->ShareData(*bottom[2]); x_static_input_blob_->ShareDiff(*bottom[2]); } if (expose_hidden_) { const int bottom_offset = 2 + static_input_; for (int i = bottom_offset, j = 0; i < bottom.size(); ++i, ++j) { CHECK(recur_input_blobs_[j]->shape() == bottom[i]->shape()) << "shape mismatch - recur_input_blobs_[" << j << "]: " << recur_input_blobs_[j]->shape_string() << " vs. bottom[" << i << "]: " << bottom[i]->shape_string(); recur_input_blobs_[j]->ShareData(*bottom[i]); } } for (int i = 0; i < output_blobs_.size(); ++i) { top[i]->ReshapeLike(*output_blobs_[i]); top[i]->ShareData(*output_blobs_[i]); top[i]->ShareDiff(*output_blobs_[i]); } if (expose_hidden_) { const int top_offset = output_blobs_.size(); for (int i = top_offset, j = 0; i < top.size(); ++i, ++j) { top[i]->ReshapeLike(*recur_output_blobs_[j]); } } }
void RecurrentLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_GE(bottom[0]->num_axes(), 2) << "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)"; CHECK_EQ(T_, bottom[0]->shape(0)) << "input number of timesteps changed"; N_ = bottom[0]->shape(1); CHECK_EQ(bottom[1]->num_axes(), 2) << "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)"; CHECK_EQ(T_, bottom[1]->shape(0)); CHECK_EQ(N_, bottom[1]->shape(1)); CHECK_EQ(top.size(), output_blobs_.size()); x_input_blob_->ReshapeLike(*bottom[0]); vector<int> cont_shape = bottom[1]->shape(); cont_input_blob_->Reshape(cont_shape); if (static_input_) { x_static_input_blob_->ReshapeLike(*bottom[2]); } vector<BlobShape> recur_input_shapes; RecurrentInputShapes(&recur_input_shapes); CHECK_EQ(recur_input_shapes.size(), recur_input_blobs_.size()); for (int i = 0; i < recur_input_shapes.size(); ++i) { recur_input_blobs_[i]->Reshape(recur_input_shapes[i]); } unrolled_net_->Reshape(); x_input_blob_->ShareData(*bottom[0]); x_input_blob_->ShareDiff(*bottom[0]); cont_input_blob_->ShareData(*bottom[1]); if (static_input_) { x_static_input_blob_->ShareData(*bottom[2]); x_static_input_blob_->ShareDiff(*bottom[2]); } for (int i = 0; i < top.size(); ++i) { top[i]->ReshapeLike(*output_blobs_[i]); top[i]->ShareData(*output_blobs_[i]); top[i]->ShareDiff(*output_blobs_[i]); } }
void LSTMLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const { const int num_output = this->layer_param_.recurrent_param().num_output(); CHECK_GT(num_output, 0) << "num_output must be positive"; const FillerParameter& weight_filler = this->layer_param_.recurrent_param().weight_filler(); const FillerParameter& bias_filler = this->layer_param_.recurrent_param().bias_filler(); // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param; hidden_param.set_type("InnerProduct"); hidden_param.mutable_inner_product_param()->set_num_output(num_output * 4); hidden_param.mutable_inner_product_param()->set_bias_term(false); hidden_param.mutable_inner_product_param()->set_axis(2); hidden_param.mutable_inner_product_param()-> mutable_weight_filler()->CopyFrom(weight_filler); LayerParameter biased_hidden_param(hidden_param); biased_hidden_param.mutable_inner_product_param()->set_bias_term(true); biased_hidden_param.mutable_inner_product_param()-> mutable_bias_filler()->CopyFrom(bias_filler); LayerParameter sum_param; sum_param.set_type("Eltwise"); sum_param.mutable_eltwise_param()->set_operation( EltwiseParameter_EltwiseOp_SUM); LayerParameter scale_param; scale_param.set_type("Scale"); scale_param.mutable_scale_param()->set_axis(0); LayerParameter slice_param; slice_param.set_type("Slice"); slice_param.mutable_slice_param()->set_axis(0); LayerParameter split_param; split_param.set_type("Split"); vector<BlobShape> input_shapes; RecurrentInputShapes(&input_shapes); CHECK_EQ(2, input_shapes.size()); LayerParameter* input_layer_param = net_param->add_layer(); input_layer_param->set_type("Input"); InputParameter* input_param = input_layer_param->mutable_input_param(); input_layer_param->add_top("c_0"); input_param->add_shape()->CopyFrom(input_shapes[0]); input_layer_param->add_top("h_0"); input_param->add_shape()->CopyFrom(input_shapes[1]); LayerParameter* cont_slice_param = net_param->add_layer(); cont_slice_param->CopyFrom(slice_param); cont_slice_param->set_name("cont_slice"); cont_slice_param->add_bottom("cont"); cont_slice_param->mutable_slice_param()->set_axis(0); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter* x_transform_param = net_param->add_layer(); x_transform_param->CopyFrom(biased_hidden_param); x_transform_param->set_name("x_transform"); x_transform_param->add_param()->set_name("W_xc"); x_transform_param->add_param()->set_name("b_c"); x_transform_param->add_bottom("x"); x_transform_param->add_top("W_xc_x"); x_transform_param->add_propagate_down(true); } if (this->static_input_) { // Add layer to transform x_static to the gate dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter* x_static_transform_param = net_param->add_layer(); x_static_transform_param->CopyFrom(hidden_param); x_static_transform_param->mutable_inner_product_param()->set_axis(1); x_static_transform_param->set_name("W_xc_x_static"); x_static_transform_param->add_param()->set_name("W_xc_static"); x_static_transform_param->add_bottom("x_static"); x_static_transform_param->add_top("W_xc_x_static_preshape"); x_static_transform_param->add_propagate_down(true); LayerParameter* reshape_param = net_param->add_layer(); reshape_param->set_type("Reshape"); BlobShape* new_shape = reshape_param->mutable_reshape_param()->mutable_shape(); new_shape->add_dim(1); // One timestep. // Should infer this->N as the dimension so we can reshape on batch size. new_shape->add_dim(-1); new_shape->add_dim( x_static_transform_param->inner_product_param().num_output()); reshape_param->set_name("W_xc_x_static_reshape"); reshape_param->add_bottom("W_xc_x_static_preshape"); reshape_param->add_top("W_xc_x_static"); } LayerParameter* x_slice_param = net_param->add_layer(); x_slice_param->CopyFrom(slice_param); x_slice_param->add_bottom("W_xc_x"); x_slice_param->set_name("W_xc_x_slice"); LayerParameter output_concat_layer; output_concat_layer.set_name("h_concat"); output_concat_layer.set_type("Concat"); output_concat_layer.add_top("h"); output_concat_layer.mutable_concat_param()->set_axis(0); for (int t = 1; t <= this->T_; ++t) { string tm1s = format_int(t - 1); string ts = format_int(t); cont_slice_param->add_top("cont_" + ts); x_slice_param->add_top("W_xc_x_" + ts); // Add layers to flush the hidden state when beginning a new // sequence, as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise { LayerParameter* cont_h_param = net_param->add_layer(); cont_h_param->CopyFrom(scale_param); cont_h_param->set_name("h_conted_" + tm1s); cont_h_param->add_bottom("h_" + tm1s); cont_h_param->add_bottom("cont_" + ts); cont_h_param->add_top("h_conted_" + tm1s); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter* w_param = net_param->add_layer(); w_param->CopyFrom(hidden_param); w_param->set_name("transform_" + ts); w_param->add_param()->set_name("W_hc"); w_param->add_bottom("h_conted_" + tm1s); w_param->add_top("W_hc_h_" + tm1s); w_param->mutable_inner_product_param()->set_axis(2); } // Add the outputs of the linear transformations to compute the gate input. // gate_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter* input_sum_layer = net_param->add_layer(); input_sum_layer->CopyFrom(sum_param); input_sum_layer->set_name("gate_input_" + ts); input_sum_layer->add_bottom("W_hc_h_" + tm1s); input_sum_layer->add_bottom("W_xc_x_" + ts); if (this->static_input_) { input_sum_layer->add_bottom("W_xc_x_static"); } input_sum_layer->add_top("gate_input_" + ts); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter* lstm_unit_param = net_param->add_layer(); lstm_unit_param->set_type("LSTMUnit"); lstm_unit_param->add_bottom("c_" + tm1s); lstm_unit_param->add_bottom("gate_input_" + ts); lstm_unit_param->add_bottom("cont_" + ts); lstm_unit_param->add_top("c_" + ts); lstm_unit_param->add_top("h_" + ts); lstm_unit_param->set_name("unit_" + ts); } output_concat_layer.add_bottom("h_" + ts); } // for (int t = 1; t <= this->T_; ++t) { LayerParameter* c_T_copy_param = net_param->add_layer(); c_T_copy_param->CopyFrom(split_param); c_T_copy_param->add_bottom("c_" + format_int(this->T_)); c_T_copy_param->add_top("c_T"); } net_param->add_layer()->CopyFrom(output_concat_layer); }