void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layers_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layers(i); const string& source_layer_name = source_layer.name(); int target_layer_id = 0; while (target_layer_id != layer_names_.size() && layer_names_[target_layer_id] != source_layer_name) { ++target_layer_id; } if (target_layer_id == layer_names_.size()) { DLOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; vector<shared_ptr<Blob<Dtype> > >& target_blobs = layers_[target_layer_id]->blobs(); // blob 0# weights, 1# bias term CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); target_blobs[j]->FromProto(source_layer.blobs(j)); } } }
void Net<Dtype>::filterNet(const NetParameter& param, NetParameter* filtered_param){ NetState state(param.state()); filtered_param->CopyFrom(param); // remove all layer params and then filter filtered_param->clear_layer(); for (int i = 0; i < param.layer_size(); i++){ const LayerParameter& layer_param = param.layer(i); const string& layer_name = layer_param.name(); // usually a layer has not any include/exclude rules CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0) << "Specify either include or exclude rules."; bool layer_included = (layer_param.include_size() == 0); // assume 'included' and check if meet any excluded rules for (int j = 0; layer_included&&j < layer_param.exclude_size(); j++){ if (stateMeetRule(state, layer_param.exclude(j), layer_name)){ // cancel 'included' layer_included = false; } } // assume 'excluded' and check if meet any included rules for (int j = 0; !layer_included&&j < layer_param.include_size(); j++){ if (stateMeetRule(state, layer_param.include(j), layer_name)){ // cancel 'excluded' layer_included = true; } } // copy the included layer to filtered_param if (layer_included) filtered_param->add_layer()->CopyFrom(layer_param); } }
SolverParameter ModelServer<Dtype>::prepare_model() { NetParameter net; solver->net()->ToProto(&net); for (int i = 0; i < net.layer_size(); ++i) { LayerParameter& layer = *net.mutable_layer(i); layer.clear_blobs(); if ((layer.type().find("Data") != std::string::npos) && (layer.has_remote_data_param())) { layer.set_type("RemoteData"); for (int j = 0; j < layer.top_size(); ++j) { *layer.mutable_remote_data_param()->add_shape() = blob_shape_by_name(layer.top(j)); } } } SolverParameter ret = solver->param(); ret.clear_net(); ret.clear_net_param(); ret.clear_test_net(); ret.clear_test_net_param(); ret.clear_train_net(); *ret.mutable_train_net_param() = net; return ret; }
void InputProducer::to_proto(NetParameter& net) const { *net.add_input() = name_; net.add_input_dim(dims_[0]); net.add_input_dim(dims_[1]); net.add_input_dim(dims_[2]); net.add_input_dim(dims_[3]); }
Net<Dtype>::Net(const string& param_file, Phase phase, const Net* root_net = NULL): root_net(root_net){ NetParameter param; readNetParamsFromTextFileOrDie(param_file, ¶m); param.mutable_state()->set_phase(phase); Init(param); }
void NGNet::Init( ) { input_layer_top_idx_ = 0; output_layer_top_idx_ = 0; /* Load the network. */ net_.reset(new Net<float>(model_file_, TEST)); NetParameter param; CHECK(ReadProtoFromTextFile(model_file_, ¶m)) << "Failed to parse NetParameter file: " << model_file_; for (int ip = 0; ip < param.layer_size(); ip++) { LayerParameter layer_param = param.layer(ip); if (layer_param.has_inner_product_param()) { InnerProductParameter* inner_product_param = layer_param.mutable_inner_product_param(); int num_output = inner_product_param->num_output(); if (num_output > 0) { inner_product_param->set_num_output(num_output * 2); } } } // //param.mutable_state()->set_phase(phase); Net<float> * new_net = new Net<float>(param); net_->CopyTrainedLayersFrom(trained_file_); int input_layer_idx = -1; for (size_t layer_id = 0; layer_id < net_->layer_names().size(); ++layer_id) { if (net_->layer_names()[layer_id] == input_layer_name_) { input_layer_idx = layer_id; break; } } if (input_layer_idx == -1) { LOG(FATAL) << "Unknown layer name " << input_layer_name_; } input_layer_idx_ = input_layer_idx; input_layer_top_idx_ = 0; Blob<float>* input_layer = net_->top_vecs()[input_layer_idx_][input_layer_top_idx_]; input_layer_dim_ = input_layer->shape(1); int output_layer_idx = -1; for (size_t layer_id = 0; layer_id < net_->layer_names().size(); ++layer_id) { if (net_->layer_names()[layer_id] == output_layer_name_) { output_layer_idx = layer_id; break; } } if (output_layer_idx == -1) { LOG(FATAL) << "Unknown layer name " << output_layer_name_; } output_layer_idx_ = output_layer_idx; }
bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layers_size(); ++i) { if (net_param.layers(i).has_layer()) { return true; } } return false; }
void UpgradeV0PaddingLayers(const NetParameter& param, NetParameter* param_upgraded_pad) { // Copy everything other than the layers from the original param. param_upgraded_pad->Clear(); param_upgraded_pad->CopyFrom(param); param_upgraded_pad->clear_layers(); // Figure out which layer each bottom blob comes from. map<string, int> blob_name_to_last_top_idx; for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); blob_name_to_last_top_idx[blob_name] = -1; } for (int i = 0; i < param.layers_size(); ++i) { const V1LayerParameter& layer_connection = param.layers(i); const V0LayerParameter& layer_param = layer_connection.layer(); // Add the layer to the new net, unless it's a padding layer. if (layer_param.type() != "padding") { param_upgraded_pad->add_layers()->CopyFrom(layer_connection); } for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; } const int top_idx = blob_name_to_last_top_idx[blob_name]; if (top_idx == -1) { continue; } const V1LayerParameter& source_layer = param.layers(top_idx); if (source_layer.layer().type() == "padding") { // This layer has a padding layer as input -- check that it is a conv // layer or a pooling layer and takes only one input. Also check that // the padding layer input has only one input and one output. Other // cases have undefined behavior in Caffe. CHECK((layer_param.type() == "conv") || (layer_param.type() == "pool")) << "Padding layer input to " "non-convolutional / non-pooling layer type " << layer_param.type(); CHECK_EQ(layer_connection.bottom_size(), 1) << "Conv Layer takes a single blob as input."; CHECK_EQ(source_layer.bottom_size(), 1) << "Padding Layer takes a single blob as input."; CHECK_EQ(source_layer.top_size(), 1) << "Padding Layer produces a single blob as output."; int layer_index = param_upgraded_pad->layers_size() - 1; param_upgraded_pad->mutable_layers(layer_index)->mutable_layer() ->set_pad(source_layer.layer().pad()); param_upgraded_pad->mutable_layers(layer_index) ->set_bottom(j, source_layer.bottom(0)); } } for (int j = 0; j < layer_connection.top_size(); ++j) { const string& blob_name = layer_connection.top(j); blob_name_to_last_top_idx[blob_name] = i; } } }
bool NetNeedsDataUpgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layers_size(); ++i) { if (net_param.layers(i).type() == LayerParameter_LayerType_DATA) { DataParameter layer_param = net_param.layers(i).data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_IMAGE_DATA) { ImageDataParameter layer_param = net_param.layers(i).image_data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_WINDOW_DATA) { WindowDataParameter layer_param = net_param.layers(i).window_data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_QDATA) { QDataParameter layer_param = net_param.layers(i).qdata_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } } return false; }
bool NetNeedsBatchNormUpgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layer_size(); ++i) { // Check if BatchNorm layers declare three parameters, as required by // the previous BatchNorm layer definition. if (net_param.layer(i).type() == "BatchNorm" && net_param.layer(i).param_size() == 3) { return true; } } return false; }
void Net<Dtype>::ReInit( NetParameter& param, const int batch_size ) { layers_.clear(); layer_names_.clear(); layer_need_backward_.clear(); // blobs stores the blobs that store intermediate results between the // layers. blobs_.clear(); blob_names_.clear(); blob_need_backward_.clear(); // bottom_vecs stores the vectors containing the input for each layer. // They don't actually host the blobs (blobs_ does), so we simply store // pointers. bottom_vecs_.clear(); bottom_id_vecs_.clear(); // top_vecs stores the vectors containing the output for each layer top_vecs_.clear(); top_id_vecs_.clear(); // blob indices for the input and the output of the net net_input_blob_indices_.clear(); net_input_blobs_.clear(); net_output_blobs_.clear(); // The parameters in the network. params_.clear(); // the learning rate multipliers params_lr_.clear(); // the weight decay multipliers params_weight_decay_.clear(); param.mutable_layers(0)->mutable_layer()->set_batchsize(batch_size); Init( param ); }
bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { bool is_fully_compatible = true; if (v1_net_param.layer_size() > 0) { LOG(ERROR) << "Input NetParameter to be upgraded already specifies 'layer' " << "fields; these will be ignored for the upgrade."; is_fully_compatible = false; } net_param->CopyFrom(v1_net_param); net_param->clear_layers(); net_param->clear_layer(); for (int i = 0; i < v1_net_param.layers_size(); ++i) { if (!UpgradeV1LayerParameter(v1_net_param.layers(i), net_param->add_layer())) { LOG(ERROR) << "Upgrade of input layer " << i << " failed."; is_fully_compatible = false; } } return is_fully_compatible; }
void NetParameterToPrettyPrint(const NetParameter& param, NetParameterPrettyPrint* pretty_param) { pretty_param->Clear(); if (param.has_name()) { pretty_param->set_name(param.name()); } if (param.has_force_backward()) { pretty_param->set_force_backward(param.force_backward()); } for (int i = 0; i < param.input_size(); ++i) { pretty_param->add_input(param.input(i)); } for (int i = 0; i < param.input_dim_size(); ++i) { pretty_param->add_input_dim(param.input_dim(i)); } for (int i = 0; i < param.layers_size(); ++i) { pretty_param->add_layers()->CopyFrom(param.layers(i)); } }
bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { if (v1_net_param.layer_size() > 0) { LOG(FATAL) << "Refusing to upgrade inconsistent NetParameter input; " << "the definition includes both 'layer' and 'layers' fields. " << "The current format defines 'layer' fields with string type like " << "layer { type: 'Layer' ... } and not layers { type: LAYER ... }. " << "Manually switch the definition to 'layer' format to continue."; } bool is_fully_compatible = true; net_param->CopyFrom(v1_net_param); net_param->clear_layers(); net_param->clear_layer(); for (int i = 0; i < v1_net_param.layers_size(); ++i) { if (!UpgradeV1LayerParameter(v1_net_param.layers(i), net_param->add_layer())) { LOG(ERROR) << "Upgrade of input layer " << i << " failed."; is_fully_compatible = false; } } return is_fully_compatible; }
void Net<Dtype>::copyTrainedLayerFrom(const NetParameter& param){ int num_layers = param.layer_size(); for (int i = 0; i < num_layers; i++){ const LayerParameter& source_layer = param.layer(i); const string& source_layer_name = source_layer.name(); int target_layer_id = 0; while (target_layer_id != layer_names.size() && layer_names[target_layer_id] != source_layer_name){ target_layer_id++; } if (target_layer_id == layer_names.size()) continue; const vector < boost::shared_ptr<Blob<Dtype>>>& target_blobs = layers[target_layer_id]->getBlobs(); for (int j = 0; j < target_blobs.size(); j++){ Blob<Dtype> source_blob; source_blob.FromProto(source_layer.blobs(j)); Blob<Dtype>* target_blob = target_blobs[j].get(); CHECK(source_blob.shape() == target_blob->shape()) << "Incompatible shape when sharing trained params."; target_blob->FromProto(source_layer.blobs(j), false); } } }
void ApolloNet<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layer_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layer(i); const string& source_layer_name = source_layer.name(); if (layers_map_.find(source_layer_name) == layers_map_.end()) { LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } LOG(INFO) << "Copying source layer " << source_layer_name; vector<shared_ptr<Blob<Dtype> > >& target_blobs = layers_map_[source_layer_name]->blobs(); ASSERT(target_blobs.size() == source_layer.blobs_size(), "Incompatible number of blobs for layer " << source_layer_name); for (int j = 0; j < target_blobs.size(); ++j) { const bool kReshape = false; target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } } }
int Net < Dtype >::appendBottom(const NetParameter& param, const int layer_id, const int bottom_id, set<string>* available_blobs, map<string, int>* blob_name_to_idx){ const LayerParameter& layer_param = param.layer(layer_id); const string& blob_name = layer_param.bottom(bottom_id); if (!available_blobs->count(blob_name)) LOG(FATAL) << "Unknown bottom blob: " << blob_name<< " at layer: " << layer_param.name() << "."; // a bottom blob must share a top blob const int blob_id = (*blob_name_to_idx)[blob_name]; LOG_IF(INFO, Dragon::get_root_solver()) << layer_param.name() << "[Layer-Accept] <- " << blob_name << " [Blob-Name]"; bottom_vecs[layer_id].push_back(blobs[blob_id].get()); bottom_id_vecs[layer_id].push_back(blob_id); // ensure that a top blob must specify only one bottom blob // SplitLayer can be used to shadow a top blob into several top blobs available_blobs->erase(blob_name); bool need_bp = true; // default(TEST) is false bottoms_need_backward[layer_id].push_back(need_bp & blobs_need_backward[blob_id]); return blob_id; }
void Net<Dtype>::Init(const NetParameter& in_param){ CHECK(Dragon::get_root_solver() || root_net) << "Root net need to be set for all non-root solvers."; phase = in_param.state().phase(); NetParameter filtered_param, param; // filter for unqualified LayerParameters(e.g Test DataLayer) filterNet(in_param, &filtered_param); insertSplits(filtered_param, ¶m); name = param.name(); LOG_IF(INFO, Dragon::get_root_solver()) << "Initialize net from parameters: ";/*<< endl << param.DebugString();*/ map<string, int> blob_name_to_idx; set<string> available_blobs; CHECK_EQ(param.input_size(), param.input_shape_size())<< "input blob_shape must specify a blob."; memory_used = 0; // check and stuff virtual input blobs firstly [Viewing Mode Only] for (int input_id=0; input_id < param.input_size(); input_id++){ const int layer_id = -1; // net_input.push_back(.....virtual blob.....) appendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx); } // stuff real blobs for each layer then [Traning/Testing/Viewing Mode] bottom_vecs.resize(param.layer_size()); bottom_id_vecs.resize(param.layer_size()); bottoms_need_backward.resize(param.layer_size()); top_vecs.resize(param.layer_size()); top_id_vecs.resize(param.layer_size()); param_id_vecs.resize(param.layer_size()); for (int layer_id = 0; layer_id < param.layer_size(); layer_id++){ bool share_from_root = !Dragon::get_root_solver() && root_net->layers[layer_id]->shareInParallel(); // copy net phase to layer if not set if (!param.layer(layer_id).has_phase()) param.mutable_layer(layer_id)->set_phase(phase); const LayerParameter& layer_param = param.layer(layer_id); if (share_from_root){ LOG(INFO) << "Share Layer: " << layer_param.name() << " from the root net."; // share layer by pointer layers.push_back(root_net->layers[layer_id]); layers[layer_id]->setShared(true); } else{ // use layer factory to create a pointer // layer type is referred by layer_param->type() // see more in layer_factory.hpp layers.push_back(LayerFactory<Dtype>::createLayer(layer_param)); } layer_names.push_back(layer_param.name()); LOG_IF(INFO, Dragon::get_root_solver()) << "Create Layer: " << layer_param.name(); bool need_bp = false; // stuff bottom blobs for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); bottom_id++){ const int blob_id = appendBottom(param, layer_id, bottom_id, &available_blobs, &blob_name_to_idx); // check whether a bottom need back propogation need_bp |= blobs_need_backward[blob_id]; } // stuff top blobs for (int top_id = 0; top_id < layer_param.top_size(); top_id++) appendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx); // auto top blobs // NOT_IMPLEMENTED; Layer<Dtype>* layer = layers[layer_id].get(); // setup for layer if (share_from_root){ const vector<Blob<Dtype>*> base_top = root_net->top_vecs[layer_id]; const vector<Blob<Dtype>*> this_top = this->top_vecs[layer_id]; // reshape solely after root_net finishing for (int top_id = 0; top_id < base_top.size(); top_id++){ this_top[top_id]->reshapeLike(*base_top[top_id]); } } else layer->setup(bottom_vecs[layer_id], top_vecs[layer_id]); LOG_IF(INFO, Dragon::get_root_solver()) << "Setup Layer: " << layer_param.name(); for (int top_id = 0; top_id < top_vecs[layer_id].size(); top_id++){ // extend size to max number of blobs if necessary if (blobs_loss_weight.size() <= top_id_vecs[layer_id][top_id]) blobs_loss_weight.resize(top_id_vecs[layer_id][top_id] + 1, Dtype(0)); // store global loss weights from each layer each blob blobs_loss_weight[top_id_vecs[layer_id][top_id]] = layer->getLoss(top_id); LOG_IF(INFO, Dragon::get_root_solver()) << "Top shape: " << top_vecs[layer_id][top_id]->shape_string(); if (layer->getLoss(top_id)) LOG_IF(INFO, Dragon::get_root_solver()) << " with loss weight " << layer->getLoss(top_id); // sum up for training parameter statistic memory_used += top_vecs[layer_id][top_id]->count(); } LOG_IF(INFO, Dragon::get_root_solver()) << "Memory required for Data: " << memory_used*sizeof(Dtype); const int param_size = layer_param.param_size(); // blobs_size will be set after layer->setup() const int param_blobs_size = layer->getBlobs().size(); CHECK_LE(param_size, param_blobs_size)<< "Too many params specify for layer."; // use if do not specify hyperparameter // lr_mult=decay_mult=1.0 ParamSpec default_hyperparameter; for (int param_id = 0; param_id < param_blobs_size; param_id++){ const ParamSpec* hyperparameter = param_id < param_size ? &layer_param.param(param_id) : &default_hyperparameter; const bool param_need_bp = hyperparameter->lr_mult() != 0; // check whether a param blob need back propogation [default=true] need_bp |= param_need_bp; layer->setParamNeedBp(param_id, param_need_bp); } // stuff param blobs for (int param_id = 0; param_id < param_blobs_size; param_id++) appendParam(param, layer_id, param_id); // update param blobs if share others shareWeights(); layer_need_backward.push_back(need_bp); // after checking all bottom blobs and param blobs if (need_bp) for (int top_id = 0; top_id < top_id_vecs[layer_id].size(); top_id++) blobs_need_backward[top_id_vecs[layer_id][top_id]] = true; } // end layer_id set<string> blobs_under_loss, blobs_skip_bp; for (int layer_id = layers.size()-1; layer_id >= 0; layer_id--){ bool layer_contributes_loss = false; bool layer_skip_bp = true; Layer<Dtype>* layer = layers[layer_id].get(); for (int top_id = 0; top_id < top_vecs[layer_id].size(); top_id++){ const string& blob_name = blobs_name[top_id_vecs[layer_id][top_id]]; if (layer->getLoss(top_id) || blobs_under_loss.count(blob_name)) layer_contributes_loss = true; if (!blobs_skip_bp.count(blob_name)) layer_skip_bp = false; // find any top blobs if affected by loss and do not force to skip bp if (layer_contributes_loss&&!layer_skip_bp) break; } // optimization trick: set lr_mult but is not affected by loss if (layer_need_backward[layer_id] && layer_skip_bp){ // cancel layer layer_need_backward[layer_id] = false; // cancel bottom for (int bottom_id = 0; bottom_id < bottom_vecs[layer_id].size(); bottom_id++){ bottoms_need_backward[layer_id][bottom_id] = false; } } // cancel directly if layer is not affected by loss if (!layer_contributes_loss) layer_need_backward[layer_id] = false; // debug info if (Dragon::get_root_solver()){ if (layer_need_backward[layer_id]) LOG(INFO) << "Layer: " << layer_names[layer_id] << " need back-propogation."; else LOG(INFO) << "Layer: " << layer_names[layer_id] << " does not need back-propogation."; } // if one top blob affected by loss // all bottom blobs will be affected // regard it as "loss back-affected" for (int bottom_id = 0; bottom_id < bottom_vecs[layer_id].size(); bottom_id++){ const string& blob_name = blobs_name[bottom_id_vecs[layer_id][bottom_id]]; if (layer_contributes_loss) blobs_under_loss.insert(blob_name); else bottoms_need_backward[layer_id][bottom_id] = false; // use for optimization trick : skip all bottom blobs if (!bottoms_need_backward[layer_id][bottom_id]) blobs_skip_bp.insert(blob_name); } } // end layer id if (param.force_backward()){ for (int layer_id = 0; layer_id < layers.size(); layer_id++){ layer_need_backward[layer_id] = true; for (int bottom_id = 0; bottom_id < bottom_vecs[layer_id].size(); bottom_id++){ // set for bottoms bottoms_need_backward[layer_id][bottom_id] = bottoms_need_backward[layer_id][bottom_id]||layers[layer_id]->allowForceBackward(bottom_id); // set for blobs blobs_need_backward[bottom_id_vecs[layer_id][bottom_id]] = blobs_need_backward[bottom_id_vecs[layer_id][bottom_id]]||bottoms_need_backward[layer_id][bottom_id]; } // set for params for (int param_id = 0; param_id < layers[layer_id]->getBlobs().size(); param_id++){ layers[layer_id]->setParamNeedBp(param_id, true); } } } // move un-used(declare top but not use as bottom) blobs into output blobs // usually contain loss blobs for (set<string>::iterator i = available_blobs.begin(); i != available_blobs.end(); i++){ LOG_IF(INFO, Dragon::get_root_solver()) << "Network produces output: " << *i; net_output_blobs.push_back(blobs[blob_name_to_idx[*i]].get()); net_output_blob_indices.push_back(blob_name_to_idx[*i]); } // store blob_name -> blob_ids blobs_name_idx = blob_name_to_idx; // store layer_name -> layer_id for (size_t layer_id = 0; layer_id < layer_names.size(); layer_id++) layers_name_idx[layer_names[layer_id]] = layer_id; debug_info = param.debug_info(); LOG_IF(INFO, Dragon::get_root_solver()) << "Network Initializion done."; }
void Net<Dtype>::Init(const NetParameter& param) { // Basically, build all the layers and set up its connections. name_ = param.name(); map<string, int> blob_name_to_idx; set<string> available_blobs; int num_layers = param.layers_size(); CHECK_EQ(param.input_size() * 4, param.input_dim_size()) << "Incorrect bottom blob dimension specifications."; // set the input blobs for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); shared_ptr<Blob<Dtype> > blob_pointer( new Blob<Dtype>(param.input_dim(i * 4), param.input_dim(i * 4 + 1), param.input_dim(i * 4 + 2), param.input_dim(i * 4 + 3))); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); net_input_blob_indices_.push_back(i); net_input_blobs_.push_back(blob_pointer.get()); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); } // For each layer, set up their input and output bottom_vecs_.resize(param.layers_size()); top_vecs_.resize(param.layers_size()); bottom_id_vecs_.resize(param.layers_size()); top_id_vecs_.resize(param.layers_size()); for (int i = 0; i < param.layers_size(); ++i) { const LayerConnection& layer_connection = param.layers(i); const LayerParameter& layer_param = layer_connection.layer(); layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = param.force_backward(); // Figure out this layer's input and output for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back( blobs_[blob_id].get()); bottom_id_vecs_[i].push_back(blob_id); // If a blob needs backward, this layer should provide it. need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } for (int j = 0; j < layer_connection.top_size(); ++j) { const string& blob_name = layer_connection.top(j); // Check if we are doing in-place computation if (layer_connection.bottom_size() > j && blob_name == layer_connection.bottom(j)) { // In-place computation LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; available_blobs.insert(blob_name); top_vecs_[i].push_back( blobs_[blob_name_to_idx[blob_name]].get()); top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { // If we are not doing in-place computation but has duplicated blobs, // raise an error. LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. LOG(INFO) << layer_param.name() << " -> " << blob_name; shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } // After this layer is connected, set it up. // LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " << top_vecs_[i][topid]->height() << " " << top_vecs_[i][topid]->width(); } // Check if this layer needs backward operation itself for (int j = 0; j < layers_[i]->layer_param().blobs_lr_size(); ++j) { need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { LOG(INFO) << layer_names_[i] << " needs backward computation."; for (int j = 0; j < top_id_vecs_[i].size(); ++j) { blob_need_backward_[top_id_vecs_[i][j]] = true; } } else { LOG(INFO) << layer_names_[i] << " does not need backward computation."; } } // In the end, all remaining blobs are considered output blobs. for (set<string>::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG(INFO) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; }
bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param) { return net_param.layers_size() > 0; }
void insertSplits(const NetParameter& param, NetParameter* splitted_param){ splitted_param->CopyFrom(param); splitted_param->clear_layer(); // pair<layer_idx,blob_idx> map<string, pair<int, int> > blob_name_to_last_top_idx; map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx; map<pair<int, int>, int> top_idx_to_bottom_count; map<pair<int, int>, float> top_idx_to_loss_weight; map<pair<int, int>, int> top_idx_to_bottom_split_idx; map<int, string> layer_idx_to_layer_name; layer_idx_to_layer_name[-1] = "input"; // scan and stuff all input blobs into a virtual layer named as "input" at -1 // input blobs do not belong to any layers and we stuff them into a virtual layer // usually use for viewing a Net(e.g: examples\cifar10\cifar10_full.prototxt // input: "data" *** ¡û_¡û specify it as a temporary data blob *** // input_shape{ *** ¡û_¡û specify it as shape*** // dim: 1 // dim : 3 // dim : 32 // dim : 32 // } // pay attention: input blobs should not use in train/test prototxt // because they are not specified vaild data sources // you can regard them as viewing toys for (int i = 0; i < param.input_size(); i++){ const string& blob_name = param.input(i); blob_name_to_last_top_idx[blob_name] = make_pair(-1, i); } for (int i = 0; i < param.layer_size(); i++){ const LayerParameter& layer_param = param.layer(i); // bind layer idx to layer name layer_idx_to_layer_name[i] = layer_param.name(); // a layer has several bottom blobs(e.g DataLayer) for (int j = 0; j < layer_param.bottom_size(); j++){ const string& blob_name = layer_param.bottom(j); // ensure that all bottom blobs must have the same name as one top blob if (!blob_name_to_last_top_idx.count(blob_name)){ LOG(FATAL) << "Unknown bottom blob: " << blob_name << " at layer: " << layer_param.name() << "."; } const pair<int, int>& bottom_idx = make_pair(i, j); const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; // a bottom's name must be as same as one top's name // find a bottom's parent top (<- backward direction) // note that top name must declare before bottom name // or a bottom will bind to layer_{-1} bottom_idx_to_source_top_idx[bottom_idx] = top_idx; top_idx_to_bottom_count[top_idx]++; } // update top name's position for following bottom names for (int j = 0; j < layer_param.top_size(); j++){ const string& blob_name = layer_param.top(j); blob_name_to_last_top_idx[blob_name] = make_pair(i, j); } const int last_loss = min(layer_param.loss_weight_size(), layer_param.top_size()); // only work in LossLayer for (int j = 0; j < last_loss; j++){ const string& blob_name = layer_param.top(j); // updated before const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; top_idx_to_loss_weight[top_idx] = layer_param.loss_weight(j); // from loss(top) backward to bottom if (top_idx_to_loss_weight[top_idx]) top_idx_to_bottom_count[top_idx]++; } } // special case: data blob shared by other blobs in the virtual layer // split it also for (int i = 0; i < param.input_size(); i++){ const int split_count = top_idx_to_bottom_count[make_pair(-1, i)]; if (split_count > 1){ // "input" const string& layer_name = layer_idx_to_layer_name[-1]; const string& blob_name = param.input(i); // push_back a new param LayerParameter* split_layer_param = splitted_param->add_layer(); const float kZeroLossWeight = 0; configureSplitLayer(layer_name, blob_name, i, split_count, kZeroLossWeight, split_layer_param); } } for (int i = 0; i < param.layer_size(); i++){ // push_back a new param LayerParameter* layer_param = splitted_param->add_layer(); layer_param->CopyFrom(param.layer(i)); for (int j = 0; j < layer_param->bottom_size(); j++){ // call the top before bottom const pair<int, int>& top_idx = bottom_idx_to_source_top_idx[make_pair(i, j)]; // check top's count const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1){ // previous layer_name const string& layer_name = layer_idx_to_layer_name[top_idx.first]; const string& blob_name = layer_param->bottom(j); // e.g: conv1 => conv1_conv1_0_split_0 // once used then ++ for next layer_param->set_bottom(j, splitBlobName(layer_name, blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); } } for (int j = 0; j < layer_param->top_size(); j++){ const pair<int, int>& top_idx = make_pair(i, j); const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1){ // now layer_name const string& layer_name = layer_idx_to_layer_name[top_idx.first]; const string& blob_name = layer_param->top(j); // add a split layer LayerParameter *split_layer_param = splitted_param->add_layer(); const float loss_weight = top_idx_to_loss_weight[top_idx]; configureSplitLayer(layer_name, blob_name, j, split_count, loss_weight,split_layer_param); if (loss_weight){ layer_param->clear_loss_weight(); // loss as bottom should split from 1 ??? top_idx_to_bottom_split_idx[top_idx]++; } } } } }
void InsertSplits(const NetParameter& param, NetParameter* param_split) { // Initialize by copying from the input NetParameter. param_split->CopyFrom(param); param_split->clear_layer(); map<string, pair<int, int> > blob_name_to_last_top_idx; map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx; map<pair<int, int>, int> top_idx_to_bottom_count; map<pair<int, int>, float> top_idx_to_loss_weight; map<pair<int, int>, int> top_idx_to_bottom_split_idx; map<int, string> layer_idx_to_layer_name; for (int i = 0; i < param.layer_size(); ++i) { const LayerParameter& layer_param = param.layer(i); layer_idx_to_layer_name[i] = layer_param.name(); for (int j = 0; j < layer_param.bottom_size(); ++j) { const string& blob_name = layer_param.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '" << layer_param.name() << "', bottom index " << j << ")"; } const pair<int, int>& bottom_idx = make_pair(i, j); const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; bottom_idx_to_source_top_idx[bottom_idx] = top_idx; ++top_idx_to_bottom_count[top_idx]; } for (int j = 0; j < layer_param.top_size(); ++j) { const string& blob_name = layer_param.top(j); blob_name_to_last_top_idx[blob_name] = make_pair(i, j); } // A use of a top blob as a loss should be handled similarly to the use of // a top blob as a bottom blob to another layer. const int last_loss = std::min(layer_param.loss_weight_size(), layer_param.top_size()); for (int j = 0; j < last_loss; ++j) { const string& blob_name = layer_param.top(j); const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; top_idx_to_loss_weight[top_idx] = layer_param.loss_weight(j); if (top_idx_to_loss_weight[top_idx]) { ++top_idx_to_bottom_count[top_idx]; } } } for (int i = 0; i < param.layer_size(); ++i) { LayerParameter* layer_param = param_split->add_layer(); layer_param->CopyFrom(param.layer(i)); // Replace any shared bottom blobs with split layer outputs. for (int j = 0; j < layer_param->bottom_size(); ++j) { const pair<int, int>& top_idx = bottom_idx_to_source_top_idx[make_pair(i, j)]; const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[top_idx.first]; const string& blob_name = layer_param->bottom(j); layer_param->set_bottom(j, SplitBlobName(layer_name, blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); } } // Create split layer for any top blobs used by other layer as bottom // blobs more than once. for (int j = 0; j < layer_param->top_size(); ++j) { const pair<int, int>& top_idx = make_pair(i, j); const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[i]; const string& blob_name = layer_param->top(j); LayerParameter* split_layer_param = param_split->add_layer(); const float loss_weight = top_idx_to_loss_weight[top_idx]; ConfigureSplitLayer(layer_name, blob_name, j, split_count, loss_weight, split_layer_param); if (loss_weight) { layer_param->clear_loss_weight(); top_idx_to_bottom_split_idx[top_idx]++; } } } } }
void BaseProducer::to_proto (NetParameter& net) const { *net.add_layers() = layer_param_; }
void Net<Dtype>::Init(const NetParameter& in_param) { // Create a copy of in_param with splits added where necessary. NetParameter param; InsertSplits(in_param, ¶m); // Basically, build all the layers and set up its connections. name_ = param.name(); map<string, int> blob_name_to_idx; set<string> available_blobs; int num_layers = param.layers_size(); CHECK_EQ(param.input_size() * 4, param.input_dim_size()) << "Incorrect bottom blob dimension specifications."; size_t memory_used = 0; // set the input blobs for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); shared_ptr<Blob<Dtype> > blob_pointer( new Blob<Dtype>(param.input_dim(i * 4), param.input_dim(i * 4 + 1), param.input_dim(i * 4 + 2), param.input_dim(i * 4 + 3))); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); net_input_blob_indices_.push_back(i); net_input_blobs_.push_back(blob_pointer.get()); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); memory_used += blob_pointer->count(); } DLOG(INFO) << "Memory required for Data" << memory_used*sizeof(Dtype); // For each layer, set up their input and output bottom_vecs_.resize(param.layers_size()); top_vecs_.resize(param.layers_size()); bottom_id_vecs_.resize(param.layers_size()); top_id_vecs_.resize(param.layers_size()); for (int i = 0; i < param.layers_size(); ++i) { bool in_place = false; const LayerParameter& layer_param = param.layers(i); layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = param.force_backward(); // Figure out this layer's input for (int j = 0; j < layer_param.bottom_size(); ++j) { const string& blob_name = layer_param.bottom(j); const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back(blobs_[blob_id].get()); bottom_id_vecs_[i].push_back(blob_id); // If a blob needs backward, this layer should provide it. need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } // Figure out this layer's output for (int j = 0; j < layer_param.top_size(); ++j) { const string& blob_name = layer_param.top(j); // Check if we are doing in-place computation if (layer_param.bottom_size() > j && blob_name == layer_param.bottom(j)) { // In-place computation LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; in_place = true; available_blobs.insert(blob_name); top_vecs_[i].push_back( blobs_[blob_name_to_idx[blob_name]].get()); top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { // If we are not doing in-place computation but has duplicated blobs, // raise an error. LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. LOG(INFO) << layer_param.name() << " -> " << blob_name; shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } // After this layer is connected, set it up. //LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &(top_vecs_[i])); for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->num() << " " << top_vecs_[i][topid]->channels() << " " << top_vecs_[i][topid]->height() << " " << top_vecs_[i][topid]->width() << " (" << top_vecs_[i][topid]->count() << ")"; if (!in_place) memory_used += top_vecs_[i][topid]->count(); } DLOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); // blobs: 0# weights, 1# bias term; blob_lr: 1# learning rate for weights, 2# learning rate for bias int blobs_lr_size = layers_[i]->layer_param().blobs_lr_size(); CHECK(blobs_lr_size == layers_[i]->blobs().size() || blobs_lr_size == 0) // 0, 1, 2 << "Incorrect blobs lr size: should be either 0 or the same as " "the number of the layer's parameter blobs."; if (blobs_lr_size) { // Check if this layer needs backward operation itself for (int j = 0; j < blobs_lr_size; ++j) { need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); } } else if (layers_[i]->blobs().size()) { // catch: if a layer param does not specify blobs_lr, we should assume the // learning rate to be 1. Thus we will need to perform backward. need_backward = true; } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { LOG(INFO) << layer_names_[i] << " needs backward computation."; for (int j = 0; j < top_id_vecs_[i].size(); ++j) { blob_need_backward_[top_id_vecs_[i][j]] = true; } } else { LOG(INFO) << layer_names_[i] << " does not need backward computation."; } } // In the end, all remaining blobs are considered output blobs. for (set<string>::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG(INFO) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); net_output_blob_indices_.push_back(blob_name_to_idx[*it]); } for (size_t i = 0; i < blob_names_.size(); ++i) { blob_names_index_[blob_names_[i]] = i; } for (size_t i = 0; i < layer_names_.size(); ++i) { layer_names_index_[layer_names_[i]] = i; } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); }
void Net < Dtype >::appendTop(const NetParameter& param, const int layer_id, const int top_id, set<string>* available_blobs, map<string, int>* blob_name_to_idx){ boost::shared_ptr<LayerParameter> layer_param( layer_id >= 0 ? new LayerParameter(param.layer(layer_id)) : NULL); // use (layer_id//top_id) or (-1//top_id) to get a blob name const string& blob_name = layer_param ? (top_id<layer_param->top_size() ? layer_param->top(top_id) : "(automatic)") : param.input(top_id); // in-place case (e.g: // I0721 10:38 : 16.722070 4692 net.cpp : 84] relu1 <-conv1 // I0721 10:38 : 16.722082 4692 net.cpp : 98] relu1->conv1(in-place) // check a blob whether at the same postion in both bottom and top if (blob_name_to_idx && layer_param && top_id < layer_param->bottom_size() && blob_name == layer_param->bottom(top_id)){ LOG_IF(INFO, Dragon::get_root_solver()) << layer_param->name() << "[Layer-Produce]->" << blob_name << " [Blob-Name] (in-place)"; // add into this layer's top blob using blob_name top_vecs[layer_id].push_back(blobs[(*blob_name_to_idx)[blob_name]].get()); // log the id top_id_vecs[layer_id].push_back((*blob_name_to_idx)[blob_name]); } else if (blob_name_to_idx && (*blob_name_to_idx).count(blob_name) ){ LOG(FATAL) << "Top blob:" << blob_name << " propogate from multiple sources."; } // normal top blob stuffing else{ // debug info if (Dragon::get_root_solver()){ if (layer_param) LOG(INFO) << layer_param->name() << "[Layer-Produce] ->" << blob_name << " [Blob-Name]"; // special case and only used when viewing a Net's structure // because they need not specify data source and can not train or test // virtual data input blobs do not belong to any layers // see more in insert_splits.cpp/void InsertSplits() else LOG(INFO) << "Input " << top_id << "[Blob-Code] -> " << blob_name << "[Blob - Name]"; } // allocate a null blob at first boost::shared_ptr<Blob<Dtype>> ptr_blob(new Blob<Dtype>()); // store global blob infos const int blob_id = blobs.size(); blobs.push_back(ptr_blob); blobs_name.push_back(blob_name); blobs_need_backward.push_back(false); // encode index number for a name // which also represent this top blob is binded from a bottom // check it before can know whether a top blob has multiple sources(Forbidden) if (blob_name_to_idx) (*blob_name_to_idx)[blob_name] = blob_id; // reshape for virtual input blobs solely // becaude they do not exist into a DataLayer(provide reshape/transfrom service) if (layer_id == -1){ ptr_blob->reshape(param.input_shape(top_id)); // store solely for virtual input blobs net_input_blobs.push_back(ptr_blob.get()); net_input_blob_indices.push_back(blob_id); } else{ top_vecs[layer_id].push_back(ptr_blob.get()); top_id_vecs[layer_id].push_back(blob_id); } } // a set used for listing all exsiting top blobs if (available_blobs) available_blobs->insert(blob_name); }
void InsertSplits(const NetParameter& param, NetParameter* param_split) { // Initialize by copying from the input NetParameter. param_split->CopyFrom(param); param_split->clear_layers(); map<string, pair<int, int> > blob_name_to_last_top_idx; map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx; map<pair<int, int>, int> top_idx_to_bottom_count; map<pair<int, int>, int> top_idx_to_bottom_split_idx; map<int, string> layer_idx_to_layer_name; layer_idx_to_layer_name[-1] = "input"; // Determine the number of times each blob is used as an input (bottom) blob. for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); blob_name_to_last_top_idx[blob_name] = make_pair(-1, i); } for (int i = 0; i < param.layers_size(); ++i) { const LayerParameter& layer_param = param.layers(i); layer_idx_to_layer_name[i] = layer_param.name(); for (int j = 0; j < layer_param.bottom_size(); ++j) { const string& blob_name = layer_param.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; } const pair<int, int>& bottom_idx = make_pair(i, j); const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; bottom_idx_to_source_top_idx[bottom_idx] = top_idx; ++top_idx_to_bottom_count[top_idx]; } for (int j = 0; j < layer_param.top_size(); ++j) { const string& blob_name = layer_param.top(j); blob_name_to_last_top_idx[blob_name] = make_pair(i, j); } } // Create split layer for any input blobs used by other layers as bottom // blobs more than once. for (int i = 0; i < param.input_size(); ++i) { const int split_count = top_idx_to_bottom_count[make_pair(-1, i)]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[-1]; const string& blob_name = param.input(i); LayerParameter* split_layer_param = param_split->add_layers(); ConfigureSplitLayer(layer_name, blob_name, i, split_count, split_layer_param); } } for (int i = 0; i < param.layers_size(); ++i) { LayerParameter* layer_param = param_split->add_layers(); layer_param->CopyFrom(param.layers(i)); // Replace any shared bottom blobs with split layer outputs. for (int j = 0; j < layer_param->bottom_size(); ++j) { const pair<int, int>& top_idx = bottom_idx_to_source_top_idx[make_pair(i, j)]; const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[top_idx.first]; const string& blob_name = layer_param->bottom(j); layer_param->set_bottom(j, SplitBlobName(layer_name, blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); } } // Create split layer for any top blobs used by other layers as bottom // blobs more than once. for (int j = 0; j < layer_param->top_size(); ++j) { const int split_count = top_idx_to_bottom_count[make_pair(i, j)]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[i]; const string& blob_name = layer_param->top(j); LayerParameter* split_layer_param = param_split->add_layers(); ConfigureSplitLayer(layer_name, blob_name, j, split_count, split_layer_param); } } } }
// Generate inception network specifications. int main(int argc, char** argv) { string prefix(argv[1]); // Deployment network spec. string deploy_fname = prefix + "_deploy.prototxt"; NetParameter deploy; deploy.set_name("Inception"); deploy.add_input("data"); deploy.add_input_dim(10); deploy.add_input_dim(3); deploy.add_input_dim(227); deploy.add_input_dim(227); AddBody(&deploy); AddSoftmaxLayer("prob", "linear", deploy.add_layers()); WriteProtoToTextFile(deploy, deploy_fname); // Training network spec. string train_fname = prefix + "_train.prototxt"; NetParameter train; train.set_name("Inception"); AddDataLayer("data", "label", traindb, meanfile, 64, 227, true, train.add_layers()); AddBody(&train); AddSoftmaxLoss("prob", "linear", "label", train.add_layers()); WriteProtoToTextFile(train, train_fname); // Validation network spec. string val_fname = prefix + "_val.prototxt"; NetParameter val; val.set_name("Inception"); AddDataLayer("data", "label", valdb, meanfile, 50, 227, false, val.add_layers()); AddBody(&val); AddSoftmaxLayer("prob", "linear", val.add_layers()); AddAccuracyLayer("accuracy", "prob", "label", val.add_layers()); WriteProtoToTextFile(val, val_fname); }
void Net<Dtype>::appendParam(const NetParameter& param, const int layer_id, const int param_id){ const LayerParameter& layer_param = param.layer(layer_id); Layer<Dtype>* layer = layers[layer_id].get(); const int param_size = layer_param.param_size(); // default name="" (not set) string param_name = param_id<param_size? layer_param.param(param_id).name() : ""; // has name if (param_name.size()) param_display_names.push_back(param_name); // set param_id as name else{ ostringstream display_name; display_name << param_id; param_display_names.push_back(display_name.str()); } // each param blob has a net id(both weight and bias) const int net_param_id = param_blobs.size(); // add param blob which can be used by a net id param_blobs.push_back(layer->getBlobs()[param_id]); // store a net id // param_id_vecs[layer_id][param_id] can get the net_param_id param_id_vecs[layer_id].push_back(net_param_id); // store orginal id ( x_th layer/ y_th param ) // param_layer_indices[net_param_id] can get layer_id/param_id param_layer_indices.push_back(make_pair(layer_id, param_id)); ParamSpec default_hyperparameter; const ParamSpec* hyperparameter = param_id < param_size ? &layer_param.param(param_id) : &default_hyperparameter; // do not have a name or if (!param_size || !param_name.size() || (param_name.size() && !param_names_index.count(param_name))){ param_owners.push_back(-1); // has a name(non-empty) but has not logged before if (param_name.size()) param_names_index[param_name] = net_param_id; const int learnable_param_id = learnable_params.size(); learnable_params.push_back(param_blobs[net_param_id].get()); learnable_param_ids.push_back(learnable_param_id); has_params_lr.push_back(hyperparameter->has_lr_mult()); has_params_decay.push_back(hyperparameter->has_decay_mult()); params_lr.push_back(hyperparameter->lr_mult()); params_decay.push_back(hyperparameter->decay_mult()); } else{ // has a name(non-empty) and has logged before // it means to share this param and we need get the owner id const int owner_net_param_id = param_names_index[param_name]; param_owners.push_back(owner_net_param_id); const pair<int, int>& owner_index = param_layer_indices[owner_net_param_id]; const int owner_layer_id = owner_index.first; const int owner_param_id = owner_index.second; LOG_IF(INFO, Dragon::get_root_solver()) << "Share parameter: " << param_name << " ownd by layer: " << layer_names[owner_layer_id] << " param index: " << owner_layer_id; Blob<Dtype>* this_blob = param_blobs[net_param_id].get(); Blob<Dtype>* owner_blob = param_blobs[owner_net_param_id].get(); CHECK(this_blob);CHECK(owner_blob); // check before sharing if (layer_param.param(param_id).share_mode() == ParamSpec_DimCheckMode_PERMISSIVE_MODE) CHECK_EQ(this_blob->count(), owner_blob->count()); else CHECK(this_blob->shape() == owner_blob->shape()); // note that learnable_param_id = owner_net_param_id const int learnable_param_id = learnable_param_ids[owner_net_param_id]; // store parent id learnable_param_ids.push_back(learnable_param_id); // check lr_mult if (hyperparameter->has_lr_mult()){ if (has_params_lr[learnable_param_id]) CHECK_EQ(hyperparameter->lr_mult(), params_lr[learnable_param_id]) << "Shared param: " << param_name << " has mismatched lr_mult."; else{ has_params_lr[learnable_param_id] = true; params_lr[learnable_param_id] = hyperparameter->lr_mult(); } } // check decay_mult if (hyperparameter->has_decay_mult()){ if (has_params_decay[learnable_param_id]) CHECK_EQ(hyperparameter->decay_mult(), params_decay[learnable_param_id]) << "Shared param: " << param_name << " has mismatched decay_mult."; else{ has_params_decay[learnable_param_id] = true; params_decay[learnable_param_id] = hyperparameter->decay_mult(); } } } }