void UpgradeV0PaddingLayers(const NetParameter& param, NetParameter* param_upgraded_pad) { // Copy everything other than the layers from the original param. param_upgraded_pad->Clear(); param_upgraded_pad->CopyFrom(param); param_upgraded_pad->clear_layers(); // Figure out which layer each bottom blob comes from. map<string, int> blob_name_to_last_top_idx; for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); blob_name_to_last_top_idx[blob_name] = -1; } for (int i = 0; i < param.layers_size(); ++i) { const V1LayerParameter& layer_connection = param.layers(i); const V0LayerParameter& layer_param = layer_connection.layer(); // Add the layer to the new net, unless it's a padding layer. if (layer_param.type() != "padding") { param_upgraded_pad->add_layers()->CopyFrom(layer_connection); } for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; } const int top_idx = blob_name_to_last_top_idx[blob_name]; if (top_idx == -1) { continue; } const V1LayerParameter& source_layer = param.layers(top_idx); if (source_layer.layer().type() == "padding") { // This layer has a padding layer as input -- check that it is a conv // layer or a pooling layer and takes only one input. Also check that // the padding layer input has only one input and one output. Other // cases have undefined behavior in Caffe. CHECK((layer_param.type() == "conv") || (layer_param.type() == "pool")) << "Padding layer input to " "non-convolutional / non-pooling layer type " << layer_param.type(); CHECK_EQ(layer_connection.bottom_size(), 1) << "Conv Layer takes a single blob as input."; CHECK_EQ(source_layer.bottom_size(), 1) << "Padding Layer takes a single blob as input."; CHECK_EQ(source_layer.top_size(), 1) << "Padding Layer produces a single blob as output."; int layer_index = param_upgraded_pad->layers_size() - 1; param_upgraded_pad->mutable_layers(layer_index)->mutable_layer() ->set_pad(source_layer.layer().pad()); param_upgraded_pad->mutable_layers(layer_index) ->set_bottom(j, source_layer.bottom(0)); } } for (int j = 0; j < layer_connection.top_size(); ++j) { const string& blob_name = layer_connection.top(j); blob_name_to_last_top_idx[blob_name] = i; } } }
bool NetNeedsDataUpgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layers_size(); ++i) { if (net_param.layers(i).type() == LayerParameter_LayerType_DATA) { DataParameter layer_param = net_param.layers(i).data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_IMAGE_DATA) { ImageDataParameter layer_param = net_param.layers(i).image_data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_WINDOW_DATA) { WindowDataParameter layer_param = net_param.layers(i).window_data_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } if (net_param.layers(i).type() == LayerParameter_LayerType_QDATA) { QDataParameter layer_param = net_param.layers(i).qdata_param(); if (layer_param.has_scale()) { return true; } if (layer_param.has_mean_file()) { return true; } if (layer_param.has_crop_size()) { return true; } if (layer_param.has_mirror()) { return true; } } } return false; }
void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layers_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layers(i); const string& source_layer_name = source_layer.name(); int target_layer_id = 0; while (target_layer_id != layer_names_.size() && layer_names_[target_layer_id] != source_layer_name) { ++target_layer_id; } if (target_layer_id == layer_names_.size()) { DLOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; vector<shared_ptr<Blob<Dtype> > >& target_blobs = layers_[target_layer_id]->blobs(); // blob 0# weights, 1# bias term CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); target_blobs[j]->FromProto(source_layer.blobs(j)); } } }
bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layers_size(); ++i) { if (net_param.layers(i).has_layer()) { return true; } } return false; }
bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { bool is_fully_compatible = true; if (v1_net_param.layer_size() > 0) { LOG(ERROR) << "Input NetParameter to be upgraded already specifies 'layer' " << "fields; these will be ignored for the upgrade."; is_fully_compatible = false; } net_param->CopyFrom(v1_net_param); net_param->clear_layers(); net_param->clear_layer(); for (int i = 0; i < v1_net_param.layers_size(); ++i) { if (!UpgradeV1LayerParameter(v1_net_param.layers(i), net_param->add_layer())) { LOG(ERROR) << "Upgrade of input layer " << i << " failed."; is_fully_compatible = false; } } return is_fully_compatible; }
void NetParameterToPrettyPrint(const NetParameter& param, NetParameterPrettyPrint* pretty_param) { pretty_param->Clear(); if (param.has_name()) { pretty_param->set_name(param.name()); } if (param.has_force_backward()) { pretty_param->set_force_backward(param.force_backward()); } for (int i = 0; i < param.input_size(); ++i) { pretty_param->add_input(param.input(i)); } for (int i = 0; i < param.input_dim_size(); ++i) { pretty_param->add_input_dim(param.input_dim(i)); } for (int i = 0; i < param.layers_size(); ++i) { pretty_param->add_layers()->CopyFrom(param.layers(i)); } }
bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { if (v1_net_param.layer_size() > 0) { LOG(FATAL) << "Refusing to upgrade inconsistent NetParameter input; " << "the definition includes both 'layer' and 'layers' fields. " << "The current format defines 'layer' fields with string type like " << "layer { type: 'Layer' ... } and not layers { type: LAYER ... }. " << "Manually switch the definition to 'layer' format to continue."; } bool is_fully_compatible = true; net_param->CopyFrom(v1_net_param); net_param->clear_layers(); net_param->clear_layer(); for (int i = 0; i < v1_net_param.layers_size(); ++i) { if (!UpgradeV1LayerParameter(v1_net_param.layers(i), net_param->add_layer())) { LOG(ERROR) << "Upgrade of input layer " << i << " failed."; is_fully_compatible = false; } } return is_fully_compatible; }
void InsertSplits(const NetParameter& param, NetParameter* param_split) { // Initialize by copying from the input NetParameter. param_split->CopyFrom(param); param_split->clear_layers(); map<string, pair<int, int> > blob_name_to_last_top_idx; map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx; map<pair<int, int>, int> top_idx_to_bottom_count; map<pair<int, int>, int> top_idx_to_bottom_split_idx; map<int, string> layer_idx_to_layer_name; layer_idx_to_layer_name[-1] = "input"; // Determine the number of times each blob is used as an input (bottom) blob. for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); blob_name_to_last_top_idx[blob_name] = make_pair(-1, i); } for (int i = 0; i < param.layers_size(); ++i) { const LayerParameter& layer_param = param.layers(i); layer_idx_to_layer_name[i] = layer_param.name(); for (int j = 0; j < layer_param.bottom_size(); ++j) { const string& blob_name = layer_param.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; } const pair<int, int>& bottom_idx = make_pair(i, j); const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name]; bottom_idx_to_source_top_idx[bottom_idx] = top_idx; ++top_idx_to_bottom_count[top_idx]; } for (int j = 0; j < layer_param.top_size(); ++j) { const string& blob_name = layer_param.top(j); blob_name_to_last_top_idx[blob_name] = make_pair(i, j); } } // Create split layer for any input blobs used by other layers as bottom // blobs more than once. for (int i = 0; i < param.input_size(); ++i) { const int split_count = top_idx_to_bottom_count[make_pair(-1, i)]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[-1]; const string& blob_name = param.input(i); LayerParameter* split_layer_param = param_split->add_layers(); ConfigureSplitLayer(layer_name, blob_name, i, split_count, split_layer_param); } } for (int i = 0; i < param.layers_size(); ++i) { LayerParameter* layer_param = param_split->add_layers(); layer_param->CopyFrom(param.layers(i)); // Replace any shared bottom blobs with split layer outputs. for (int j = 0; j < layer_param->bottom_size(); ++j) { const pair<int, int>& top_idx = bottom_idx_to_source_top_idx[make_pair(i, j)]; const int split_count = top_idx_to_bottom_count[top_idx]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[top_idx.first]; const string& blob_name = layer_param->bottom(j); layer_param->set_bottom(j, SplitBlobName(layer_name, blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); } } // Create split layer for any top blobs used by other layers as bottom // blobs more than once. for (int j = 0; j < layer_param->top_size(); ++j) { const int split_count = top_idx_to_bottom_count[make_pair(i, j)]; if (split_count > 1) { const string& layer_name = layer_idx_to_layer_name[i]; const string& blob_name = layer_param->top(j); LayerParameter* split_layer_param = param_split->add_layers(); ConfigureSplitLayer(layer_name, blob_name, j, split_count, split_layer_param); } } } }
void Net<Dtype>::Init(const NetParameter& in_param) { // Create a copy of in_param with splits added where necessary. NetParameter param; InsertSplits(in_param, ¶m); // Basically, build all the layers and set up its connections. name_ = param.name(); map<string, int> blob_name_to_idx; set<string> available_blobs; int num_layers = param.layers_size(); CHECK_EQ(param.input_size() * 4, param.input_dim_size()) << "Incorrect bottom blob dimension specifications."; size_t memory_used = 0; // set the input blobs for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); shared_ptr<Blob<Dtype> > blob_pointer( new Blob<Dtype>(param.input_dim(i * 4), param.input_dim(i * 4 + 1), param.input_dim(i * 4 + 2), param.input_dim(i * 4 + 3))); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); net_input_blob_indices_.push_back(i); net_input_blobs_.push_back(blob_pointer.get()); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); memory_used += blob_pointer->count(); } DLOG(INFO) << "Memory required for Data" << memory_used*sizeof(Dtype); // For each layer, set up their input and output bottom_vecs_.resize(param.layers_size()); top_vecs_.resize(param.layers_size()); bottom_id_vecs_.resize(param.layers_size()); top_id_vecs_.resize(param.layers_size()); for (int i = 0; i < param.layers_size(); ++i) { bool in_place = false; const LayerParameter& layer_param = param.layers(i); layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = param.force_backward(); // Figure out this layer's input for (int j = 0; j < layer_param.bottom_size(); ++j) { const string& blob_name = layer_param.bottom(j); const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back(blobs_[blob_id].get()); bottom_id_vecs_[i].push_back(blob_id); // If a blob needs backward, this layer should provide it. need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } // Figure out this layer's output for (int j = 0; j < layer_param.top_size(); ++j) { const string& blob_name = layer_param.top(j); // Check if we are doing in-place computation if (layer_param.bottom_size() > j && blob_name == layer_param.bottom(j)) { // In-place computation LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; in_place = true; available_blobs.insert(blob_name); top_vecs_[i].push_back( blobs_[blob_name_to_idx[blob_name]].get()); top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { // If we are not doing in-place computation but has duplicated blobs, // raise an error. LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. LOG(INFO) << layer_param.name() << " -> " << blob_name; shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } // After this layer is connected, set it up. //LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &(top_vecs_[i])); for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->num() << " " << top_vecs_[i][topid]->channels() << " " << top_vecs_[i][topid]->height() << " " << top_vecs_[i][topid]->width() << " (" << top_vecs_[i][topid]->count() << ")"; if (!in_place) memory_used += top_vecs_[i][topid]->count(); } DLOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); // blobs: 0# weights, 1# bias term; blob_lr: 1# learning rate for weights, 2# learning rate for bias int blobs_lr_size = layers_[i]->layer_param().blobs_lr_size(); CHECK(blobs_lr_size == layers_[i]->blobs().size() || blobs_lr_size == 0) // 0, 1, 2 << "Incorrect blobs lr size: should be either 0 or the same as " "the number of the layer's parameter blobs."; if (blobs_lr_size) { // Check if this layer needs backward operation itself for (int j = 0; j < blobs_lr_size; ++j) { need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); } } else if (layers_[i]->blobs().size()) { // catch: if a layer param does not specify blobs_lr, we should assume the // learning rate to be 1. Thus we will need to perform backward. need_backward = true; } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { LOG(INFO) << layer_names_[i] << " needs backward computation."; for (int j = 0; j < top_id_vecs_[i].size(); ++j) { blob_need_backward_[top_id_vecs_[i][j]] = true; } } else { LOG(INFO) << layer_names_[i] << " does not need backward computation."; } } // In the end, all remaining blobs are considered output blobs. for (set<string>::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG(INFO) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); net_output_blob_indices_.push_back(blob_name_to_idx[*it]); } for (size_t i = 0; i < blob_names_.size(); ++i) { blob_names_index_[blob_names_[i]] = i; } for (size_t i = 0; i < layer_names_.size(); ++i) { layer_names_index_[layer_names_[i]] = i; } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); }
void Net<Dtype>::Init(const NetParameter& param) { // Basically, build all the layers and set up its connections. name_ = param.name(); map<string, int> blob_name_to_idx; set<string> available_blobs; int num_layers = param.layers_size(); CHECK_EQ(param.input_size() * 4, param.input_dim_size()) << "Incorrect bottom blob dimension specifications."; // set the input blobs for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); shared_ptr<Blob<Dtype> > blob_pointer( new Blob<Dtype>(param.input_dim(i * 4), param.input_dim(i * 4 + 1), param.input_dim(i * 4 + 2), param.input_dim(i * 4 + 3))); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); net_input_blob_indices_.push_back(i); net_input_blobs_.push_back(blob_pointer.get()); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); } // For each layer, set up their input and output bottom_vecs_.resize(param.layers_size()); top_vecs_.resize(param.layers_size()); bottom_id_vecs_.resize(param.layers_size()); top_id_vecs_.resize(param.layers_size()); for (int i = 0; i < param.layers_size(); ++i) { const LayerConnection& layer_connection = param.layers(i); const LayerParameter& layer_param = layer_connection.layer(); layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = param.force_backward(); // Figure out this layer's input and output for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back( blobs_[blob_id].get()); bottom_id_vecs_[i].push_back(blob_id); // If a blob needs backward, this layer should provide it. need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } for (int j = 0; j < layer_connection.top_size(); ++j) { const string& blob_name = layer_connection.top(j); // Check if we are doing in-place computation if (layer_connection.bottom_size() > j && blob_name == layer_connection.bottom(j)) { // In-place computation LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; available_blobs.insert(blob_name); top_vecs_[i].push_back( blobs_[blob_name_to_idx[blob_name]].get()); top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { // If we are not doing in-place computation but has duplicated blobs, // raise an error. LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. LOG(INFO) << layer_param.name() << " -> " << blob_name; shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(param.force_backward()); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } // After this layer is connected, set it up. // LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " << top_vecs_[i][topid]->height() << " " << top_vecs_[i][topid]->width(); } // Check if this layer needs backward operation itself for (int j = 0; j < layers_[i]->layer_param().blobs_lr_size(); ++j) { need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { LOG(INFO) << layer_names_[i] << " needs backward computation."; for (int j = 0; j < top_id_vecs_[i].size(); ++j) { blob_need_backward_[top_id_vecs_[i][j]] = true; } } else { LOG(INFO) << layer_names_[i] << " does not need backward computation."; } } // In the end, all remaining blobs are considered output blobs. for (set<string>::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG(INFO) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; }