void Net<Dtype>::Init(const NetParameter& in_param) {
		// Create a copy of in_param with splits added where necessary.
		NetParameter param;
		InsertSplits(in_param, &param);
		
		// Basically, build all the layers and set up its connections.
		name_ = param.name();
		map<string, int> blob_name_to_idx;
		set<string> available_blobs;
		int num_layers = param.layers_size();
		
		CHECK_EQ(param.input_size() * 4, param.input_dim_size())
			<< "Incorrect bottom blob dimension specifications.";
		size_t memory_used = 0;
		
		// set the input blobs
		for (int i = 0; i < param.input_size(); ++i) {
			const string& blob_name = param.input(i);
			shared_ptr<Blob<Dtype> > blob_pointer(
				new Blob<Dtype>(param.input_dim(i * 4),
				param.input_dim(i * 4 + 1),
				param.input_dim(i * 4 + 2),
				param.input_dim(i * 4 + 3)));
			blobs_.push_back(blob_pointer);
			blob_names_.push_back(blob_name);
			blob_need_backward_.push_back(param.force_backward());
			net_input_blob_indices_.push_back(i);
			net_input_blobs_.push_back(blob_pointer.get());
			blob_name_to_idx[blob_name] = i;
			available_blobs.insert(blob_name);
			memory_used += blob_pointer->count();
		}
		DLOG(INFO) << "Memory required for Data" << memory_used*sizeof(Dtype);
		
		// For each layer, set up their input and output
		bottom_vecs_.resize(param.layers_size());
		top_vecs_.resize(param.layers_size());
		bottom_id_vecs_.resize(param.layers_size());
		top_id_vecs_.resize(param.layers_size());
		
		for (int i = 0; i < param.layers_size(); ++i) {
			bool in_place = false;
			const LayerParameter& layer_param = param.layers(i);
			layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
			layer_names_.push_back(layer_param.name());
			
			LOG(INFO) << "Creating Layer " << layer_param.name();
			bool need_backward = param.force_backward();
			
			// Figure out this layer's input
			for (int j = 0; j < layer_param.bottom_size(); ++j) {
				const string& blob_name = layer_param.bottom(j);
				const int blob_id = blob_name_to_idx[blob_name];
				if (available_blobs.find(blob_name) == available_blobs.end()) {
					LOG(FATAL) << "Unknown blob input " << blob_name <<
						" to layer" << j;
				}
				LOG(INFO) << layer_param.name() << " <- " << blob_name;
				bottom_vecs_[i].push_back(blobs_[blob_id].get());
				bottom_id_vecs_[i].push_back(blob_id);
				// If a blob needs backward, this layer should provide it.
				need_backward |= blob_need_backward_[blob_id];
				available_blobs.erase(blob_name);
			}

			// Figure out this layer's output
			for (int j = 0; j < layer_param.top_size(); ++j) {
				const string& blob_name = layer_param.top(j);
				
				// Check if we are doing in-place computation
				if (layer_param.bottom_size() > j &&
					blob_name == layer_param.bottom(j)) {
						// In-place computation
						LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
						in_place = true;
						available_blobs.insert(blob_name);
						top_vecs_[i].push_back(
							blobs_[blob_name_to_idx[blob_name]].get());
						top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
				} else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
					// If we are not doing in-place computation but has duplicated blobs,
					// raise an error.
					LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
				} else {
					// Normal output.
					LOG(INFO) << layer_param.name() << " -> " << blob_name;
					shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
					blobs_.push_back(blob_pointer);
					blob_names_.push_back(blob_name);
					blob_need_backward_.push_back(param.force_backward());
					blob_name_to_idx[blob_name] = blob_names_.size() - 1;
					available_blobs.insert(blob_name);
					top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
					top_id_vecs_[i].push_back(blob_names_.size() - 1);
				}
			}
			
			// After this layer is connected, set it up.
			//LOG(INFO) << "Setting up " << layer_names_[i];
			layers_[i]->SetUp(bottom_vecs_[i], &(top_vecs_[i]));
			for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
				LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->num() << " "
					<< top_vecs_[i][topid]->channels() << " "
					<< top_vecs_[i][topid]->height() << " "
					<< top_vecs_[i][topid]->width() << " ("
					<< top_vecs_[i][topid]->count() << ")";
				if (!in_place)
					memory_used += top_vecs_[i][topid]->count();
			}
			DLOG(INFO) << "Memory  required for Data " << memory_used*sizeof(Dtype);

			// blobs: 0# weights, 1# bias term; blob_lr: 1# learning rate for weights, 2# learning rate for bias
			int blobs_lr_size = layers_[i]->layer_param().blobs_lr_size();
			CHECK(blobs_lr_size == layers_[i]->blobs().size() || blobs_lr_size == 0) // 0, 1, 2
				<< "Incorrect blobs lr size: should be either 0 or the same as "
				"the number of the layer's parameter blobs.";
			
			if (blobs_lr_size) {
				// Check if this layer needs backward operation itself
				for (int j = 0; j < blobs_lr_size; ++j) {
					need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0);
				}
			} else if (layers_[i]->blobs().size()) {
				// catch: if a layer param does not specify blobs_lr, we should assume the
				// learning rate to be 1. Thus we will need to perform backward.
				need_backward = true;
			}
			
			// Finally, set the backward flag
			layer_need_backward_.push_back(need_backward);
			
			if (need_backward) {
				LOG(INFO) << layer_names_[i] << " needs backward computation.";
				for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
					blob_need_backward_[top_id_vecs_[i][j]] = true;
				}
			} else {
				LOG(INFO) << layer_names_[i] << " does not need backward computation.";
			}
		}
		
		// In the end, all remaining blobs are considered output blobs.
		for (set<string>::iterator it = available_blobs.begin();
			it != available_blobs.end(); ++it) {
				LOG(INFO) << "This network produces output " << *it;
				net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
				net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
		}
			
		for (size_t i = 0; i < blob_names_.size(); ++i) {
			blob_names_index_[blob_names_[i]] = i;
		}
		
		for (size_t i = 0; i < layer_names_.size(); ++i) {
			layer_names_index_[layer_names_[i]] = i;
		}
		
		GetLearningRateAndWeightDecay();
		
		LOG(INFO) << "Network initialization done.";
		LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype);
	}
Example #2
0
void Net<Dtype>::Init(const NetParameter& param) {
  // Basically, build all the layers and set up its connections.
  name_ = param.name();
  map<string, int> blob_name_to_idx;
  set<string> available_blobs;
  int num_layers = param.layers_size();
  CHECK_EQ(param.input_size() * 4, param.input_dim_size())
      << "Incorrect bottom blob dimension specifications.";
  // set the input blobs
  for (int i = 0; i < param.input_size(); ++i) {
    const string& blob_name = param.input(i);
    shared_ptr<Blob<Dtype> > blob_pointer(
        new Blob<Dtype>(param.input_dim(i * 4),
                        param.input_dim(i * 4 + 1),
                        param.input_dim(i * 4 + 2),
                        param.input_dim(i * 4 + 3)));
    blobs_.push_back(blob_pointer);
    blob_names_.push_back(blob_name);
    blob_need_backward_.push_back(param.force_backward());
    net_input_blob_indices_.push_back(i);
    net_input_blobs_.push_back(blob_pointer.get());
    blob_name_to_idx[blob_name] = i;
    available_blobs.insert(blob_name);
  }
  // For each layer, set up their input and output
  bottom_vecs_.resize(param.layers_size());
  top_vecs_.resize(param.layers_size());
  bottom_id_vecs_.resize(param.layers_size());
  top_id_vecs_.resize(param.layers_size());
  for (int i = 0; i < param.layers_size(); ++i) {
    const LayerConnection& layer_connection = param.layers(i);
    const LayerParameter& layer_param = layer_connection.layer();
    layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
    layer_names_.push_back(layer_param.name());
    LOG(INFO) << "Creating Layer " << layer_param.name();
    bool need_backward = param.force_backward();
    // Figure out this layer's input and output
    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
      const string& blob_name = layer_connection.bottom(j);
      const int blob_id = blob_name_to_idx[blob_name];
      if (available_blobs.find(blob_name) == available_blobs.end()) {
        LOG(FATAL) << "Unknown blob input " << blob_name <<
            " to layer" << j;
      }
      LOG(INFO) << layer_param.name() << " <- " << blob_name;
      bottom_vecs_[i].push_back(
          blobs_[blob_id].get());
      bottom_id_vecs_[i].push_back(blob_id);
      // If a blob needs backward, this layer should provide it.
      need_backward |= blob_need_backward_[blob_id];
      available_blobs.erase(blob_name);
    }
    for (int j = 0; j < layer_connection.top_size(); ++j) {
      const string& blob_name = layer_connection.top(j);
      // Check if we are doing in-place computation
      if (layer_connection.bottom_size() > j &&
          blob_name == layer_connection.bottom(j)) {
        // In-place computation
        LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
        available_blobs.insert(blob_name);
        top_vecs_[i].push_back(
            blobs_[blob_name_to_idx[blob_name]].get());
        top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
      } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
        // If we are not doing in-place computation but has duplicated blobs,
        // raise an error.
        LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
      } else {
        // Normal output.
        LOG(INFO) << layer_param.name() << " -> " << blob_name;
        shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
        blobs_.push_back(blob_pointer);
        blob_names_.push_back(blob_name);
        blob_need_backward_.push_back(param.force_backward());
        blob_name_to_idx[blob_name] = blob_names_.size() - 1;
        available_blobs.insert(blob_name);
        top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
        top_id_vecs_[i].push_back(blob_names_.size() - 1);
      }
    }
    // After this layer is connected, set it up.
    // LOG(INFO) << "Setting up " << layer_names_[i];
    layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
    for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
      LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
          << top_vecs_[i][topid]->height() << " "
          << top_vecs_[i][topid]->width();
    }
    // Check if this layer needs backward operation itself
    for (int j = 0; j < layers_[i]->layer_param().blobs_lr_size(); ++j) {
      need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0);
    }
    // Finally, set the backward flag
    layer_need_backward_.push_back(need_backward);
    if (need_backward) {
      LOG(INFO) << layer_names_[i] << " needs backward computation.";
      for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
        blob_need_backward_[top_id_vecs_[i][j]] = true;
      }
    } else {
      LOG(INFO) << layer_names_[i] << " does not need backward computation.";
    }
  }
  // In the end, all remaining blobs are considered output blobs.
  for (set<string>::iterator it = available_blobs.begin();
      it != available_blobs.end(); ++it) {
    LOG(INFO) << "This network produces output " << *it;
    net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
  }
  GetLearningRateAndWeightDecay();
  LOG(INFO) << "Network initialization done.";
}