Beispiel #1
0
void DataReader::Body::read_one(db::Cursor* cursor, db::Transaction* dblt, QueuePair* qp) {
  Datum* datum = qp->free_.pop();
  // TODO deserialize in-place instead of copy?
  datum->ParseFromString(cursor->value());
  if (dblt != NULL) {
    string labels;
    CHECK_EQ(dblt->Get(cursor->key(), labels), 0);
    Datum labelDatum;
    labelDatum.ParseFromString(labels);
//    datum->MergeFrom(labelDatum);
    datum->set_channels(datum->channels() + labelDatum.channels());
    datum->mutable_float_data()->MergeFrom(labelDatum.float_data());
    datum->mutable_data()->append(labelDatum.data());
  }
  qp->full_.push(datum);

  // go to the next iter
  cursor->Next();
  if (!cursor->valid()) {
    DLOG(INFO) << "Restarting data prefetching from start.";
    cursor->SeekToFirst();
  }
}
bool MostCV::LevelDBReader::GetNextEntry(string &key, vector<double> &retVec, int &label) {
  if (!database_iter_->Valid())
    return false;

  Datum datum;
  datum.clear_float_data();
  datum.clear_data();
  datum.ParseFromString(database_iter_->value().ToString());

  key = database_iter_->key().ToString();
  label = datum.label();

  int expected_data_size = std::max<int>(datum.data().size(), datum.float_data_size());
  const int datum_volume_size = datum.channels() * datum.height() * datum.width();
  if (expected_data_size != datum_volume_size) {
    cout << "Something wrong in saved data.";
    assert(false);
  }

  retVec.resize(datum_volume_size);

  const string& data = datum.data();
  if (data.size() != 0) {
    // Data stored in string, e.g. just pixel values of 196608 = 256 * 256 * 3
    for (int i = 0; i < datum_volume_size; ++i)
      retVec[i] = data[i];
  } else {
    // Data stored in real feature vector such as 4096 from feature extraction
    for (int i = 0; i < datum_volume_size; ++i)
      retVec[i] = datum.float_data(i);
  }

  database_iter_->Next();
  ++record_idx_;

  return true;
}
void MyImageDataLayer<Dtype>::fetchData() {
	Datum datum;
	CHECK(prefetch_data_.count());
	Dtype* top_data = prefetch_data_.mutable_cpu_data();
	Dtype* top_label = prefetch_label_.mutable_cpu_data();
	ImageDataParameter image_data_param = this->layer_param_.image_data_param();
	const Dtype scale = image_data_param.scale();//image_data_layer相关参数
	const int batch_size = 1;//image_data_param.batch_size(); 这里我们只需要一张图片

	const int crop_size = image_data_param.crop_size();
	const bool mirror = image_data_param.mirror();
	const int new_height = image_data_param.new_height();
	const int new_width = image_data_param.new_width();

	if (mirror && crop_size == 0) {
	    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
				   << "set at the same time.";
	}
	// datum scales
	const int channels = datum_channels_;
	const int height = datum_height_;
	const int width = datum_width_;
	const int size = datum_size_;
	const int lines_size = lines_.size();
	const Dtype* mean = data_mean_.cpu_data();

	for (int item_id = 0; item_id < batch_size; ++item_id) {//读取一图片
	    // get a blob
	    CHECK_GT(lines_size, lines_id_);
	    if (!ReadImageToDatum(lines_[lines_id_].first,
							  lines_[lines_id_].second,
							  new_height, new_width, &datum)) {
			continue;
	    }
	    const string& data = datum.data();
	    if (crop_size) {
			CHECK(data.size()) << "Image cropping only support uint8 data";
			int h_off, w_off;
			// We only do random crop when we do training.
	        h_off = (height - crop_size) / 2;
	        w_off = (width - crop_size) / 2;

	        // Normal copy 正常读取,把裁剪后的图片数据读给top_data
	        for (int c = 0; c < channels; ++c) {
				for (int h = 0; h < crop_size; ++h) {
					for (int w = 0; w < crop_size; ++w) {
						int top_index = ((item_id * channels + c) * crop_size + h)
										* crop_size + w;
						int data_index = (c * height + h + h_off) * width + w + w_off;
						Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
						top_data[top_index] = (datum_element - mean[data_index]) * scale;
					}
				}
	        }

	    } else {
			// Just copy the whole data 正常读取,把图片数据读给top_data
			if (data.size()) {
				for (int j = 0; j < size; ++j) {
					Dtype datum_element =
						static_cast<Dtype>(static_cast<uint8_t>(data[j]));
					top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
				}
			} else {
				for (int j = 0; j < size; ++j) {
					top_data[item_id * size + j] =
						(datum.float_data(j) - mean[j]) * scale;
				}
			}
	    }
	    top_label[item_id] = datum.label();//读取该图片的标签

	}
}
void DataTransformer<Dtype>::Transform(const Datum& datum,
                                       Blob<Dtype>* transformed_blob,  int &h_off, int &w_off, int &do_mirror, vector<float> & col_ranges) {

  const int img_channels = datum.channels();
  const int img_height = datum.height();
  const int img_width = datum.width();

  const int channels = transformed_blob->channels();
  const int height = transformed_blob->height();
  const int width = transformed_blob->width();
  const int num = transformed_blob->num();

  //CHECK_EQ(channels, img_channels);
  CHECK_LE(height, img_height);
  CHECK_LE(width, img_width);
  CHECK_GE(num, 1);
  CHECK_EQ(img_channels, col_ranges.size());

  const int crop_size = param_.crop_size();
  const Dtype scale = param_.scale();
  const bool has_mean_file = param_.has_mean_file();
  const bool has_mean_values = mean_values_.size() > 0;
  if (do_mirror == -1)
  {
    do_mirror = param_.mirror() && Rand(2);
  }


  CHECK_GT(img_channels, 0);
  CHECK_GE(img_height, crop_size);
  CHECK_GE(img_width, crop_size);

  Dtype* mean = NULL;
  if (has_mean_file)
  {
    CHECK_EQ(img_channels, data_mean_.channels());
    if( (img_height == data_mean_.height() && img_width == data_mean_.width() ) || (crop_size == data_mean_.height() && crop_size == data_mean_.width() ) )
    {
        mean = data_mean_.mutable_cpu_data();
    }
    else
    {
      CHECK_EQ(img_height, data_mean_.height());
      CHECK_EQ(img_width, data_mean_.width());
    }
  }
  if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
     "Specify either 1 mean_value or as many as channels: " << img_channels;
    if (img_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < img_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }

  //cv::Mat cv_cropped_img = cv_img;
  if (crop_size) {
    CHECK_EQ(crop_size, height);
    CHECK_EQ(crop_size, width);
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {
      if (h_off == -1 && w_off == -1)
      {
        h_off = Rand(img_height - crop_size + 1);
        w_off = Rand(img_width - crop_size + 1);
      }
    }
    else {
      if (h_off == -1 && w_off == -1)
      {
        h_off = (img_height - crop_size) / 2;
        w_off = (img_width - crop_size) / 2;
      }
    }
    //cv::Rect roi(w_off, h_off, crop_size, crop_size);
    //cv_cropped_img = cv_img(roi);
  }
  else {
  h_off = 0;
  w_off = 0;
    CHECK_EQ(img_height, height);
    CHECK_EQ(img_width, width);
  }

  //CHECK(cv_cropped_img.data);

  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
  int top_index;
  // debug
  /*char ss1[1010];
  sprintf(ss1,"/home/xiaolonw/opt_flows/temp_results/sth.jpg");
  cv::Mat img(Size(crop_size, crop_size), CV_8UC1);*/

  for (int h = 0; h < height; ++h) {
    int img_index = 0;
    for (int w = 0; w < width; ++w) {
      for (int c = 0; c < img_channels; ++c) {
      float now_col = col_ranges[c];
        if (do_mirror) {
          top_index = (c * height + h) * width + (width - 1 - w);
        } else {
          top_index = (c * height + h) * width + w;
        }
        img_index = (c * img_height + h + h_off) * img_width + w + w_off;
        Dtype pixel = datum.float_data(img_index);

        // color augmentation
        pixel = std::min( pixel * now_col, Dtype(255.0) );
        // debug
        //img.at<uchar>(h, w) = (uchar)(pixel);

        if (has_mean_file) {
          int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
          if (crop_size == data_mean_.height() && crop_size == data_mean_.width() )
          {
            mean_index = (c * height + h) * width + w;
          }
          transformed_data[top_index] = (pixel - mean[mean_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (pixel - mean_values_[c]) * scale;
          } else {
            transformed_data[top_index] = pixel * scale;
          }
        }
      }
    }
  }
  //imwrite(ss1,img);


}
Beispiel #5
0
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);

  std::ifstream infile(argv[1]);
  std::vector<std::pair<string, int> > lines;
  string filename;
  int label;
  while (infile >> filename >> label) {
    lines.push_back(std::make_pair(filename, label));
  }
  LOG(INFO) << "A total of " << lines.size() << " images.";

  Datum datum;
  BlobProto sum_blob;
  int count = 0;

  if (!ReadImageToDatum(lines[0].first, lines[0].second, 
         resize_height, resize_width, is_color, &datum)) {
    return -1;
  }

  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());
  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());
  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }

  LOG(INFO) << "Starting Iteration";
  for (int line_id = 0; line_id < lines.size(); ++line_id) {
    if (!ReadImageToDatum(lines[line_id].first, lines[line_id].second, 
           resize_height, resize_width, is_color, &datum)) {
      continue;
    }

    const string& data = datum.data();
    size_in_datum = std::max<int>(datum.data().size(),
        datum.float_data_size());
    CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
        size_in_datum;
    if (data.size() != 0) {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
      }
    } else {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) +
            static_cast<float>(datum.float_data(i)));
      }
    }
    ++count;
  }

  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }

  // Write to disk
  LOG(INFO) << "Write to " << argv[2];
  WriteProtoToBinaryFile(sum_blob, argv[2]);

  return 0;
}
void* PoseImageDataLayerPrefetch(void* layer_pointer)
{
	CHECK(layer_pointer);
	PoseImageDataLayer<Dtype>* layer =
			reinterpret_cast<PoseImageDataLayer<Dtype>*>(layer_pointer);
	CHECK(layer);
	Datum datum;
	CHECK(layer->prefetch_data_);
	Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
	Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
	PoseImageDataParameter pose_image_data_param = layer->layer_param_.pose_image_data_param();
	const Dtype scale = pose_image_data_param.scale();
	const int batch_size = pose_image_data_param.batch_size();
	const int crop_size = pose_image_data_param.crop_size();
	const bool mirror = pose_image_data_param.mirror();
	const int new_height = pose_image_data_param.new_height();
	const int new_width = pose_image_data_param.new_width();
	const int out_height = pose_image_data_param.out_height();
	const int out_width  = pose_image_data_param.out_width();
	const int key_point_range = pose_image_data_param.key_point_range();
	const float scale_lower_bound = pose_image_data_param.scale_lower_bound();
	const float scale_upper_bound = pose_image_data_param.scale_upper_bound();
	const int key_point_num  = pose_image_data_param.key_point_num();
	const int randmargin  = pose_image_data_param.randmargin();
	const int use_mode = pose_image_data_param.use_mode();

	const float torso_ratio = pose_image_data_param.torso_ratio();
	const int mx1 = pose_image_data_param.mx1();
	const int mx2 = pose_image_data_param.mx2();
	const int my1 = pose_image_data_param.my1();
	const int my2 = pose_image_data_param.my2();

	const bool color_aug = pose_image_data_param.color_aug();


	if (mirror && crop_size == 0)
	{
		LOG(FATAL)
				<< "Current implementation requires mirror and crop_size to be "
				<< "set at the same time.";
	}
	// datum scales
	const int channels = layer->datum_channels_;
	const int height = layer->datum_height_;
	const int width = layer->datum_width_;
	const int size = layer->datum_size_;
	const int lines_size = layer->lines_.size();
	const Dtype* mean = layer->data_mean_.cpu_data();

	int * was = new int[out_height * out_width];

	for (int item_id = 0; item_id < batch_size; ++item_id)
	{
		char ss1[1010],ss2[1010];
		sprintf(ss1,"/home/dragon123/cnncode/showimg/%d.jpg",item_id);
		//sprintf(ss2,"/home/dragon123/cnncode/showimg/%d_gt.jpg",item_id);
		// get a blob
		float nowscale = 1;
		if (layer->phase_ == Caffe::TRAIN)
			nowscale = random(scale_lower_bound, scale_upper_bound);
		CHECK_GT(1.55, nowscale);
		CHECK_GT(nowscale, 0.95);

		CHECK_GT(lines_size, layer->lines_id_);
		if (use_mode == 1)
		{

			bool temp = PoseReadImageToDatum_mode1(layer->lines_[layer->lines_id_].first,
					layer->lines_[layer->lines_id_].second, new_height, new_width, &datum, nowscale,
					torso_ratio, mx1, mx2, my1, my2, randmargin);
			if (temp == false) continue;
		}
		else
		{
			bool temp = PoseReadImageToDatum_mode2(layer->lines_[layer->lines_id_].first,
								layer->lines_[layer->lines_id_].second, new_height, new_width, &datum, nowscale,
								torso_ratio, mx1, mx2, my1, my2, randmargin);
			if (temp == false) continue;
		}


		const string& data = datum.data();

		if (new_height > 0 && new_width > 0)
		{
			CHECK(data.size()) << "Image cropping only support uint8 data";
			int h_off, w_off;
			// We only do random crop when we do training.
			h_off = 0;
			w_off = 0;

			if (mirror && layer->PrefetchRand() % 2)
			{
				// Copy mirrored version
				for (int c = 0; c < channels; ++c)
				{
					float thisRand = 1;
					if(color_aug)
					{
						thisRand = random(0.8,1.2);
					}

					for (int h = 0; h < new_height; ++h)
					{
						for (int w = 0; w < new_width; ++w)
						{
							int top_index = ((item_id * channels + c)
									* new_height + h) * new_width
									+ (new_width - 1 - w);
							int data_index = (c * height + h + h_off) * width
									+ w + w_off;
							Dtype datum_element =
									static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
							top_data[top_index] = (datum_element
									- mean[data_index]) * scale;
							top_data[top_index] = min(top_data[top_index] * thisRand, (Dtype)(255.0));
						}
					}
				}
			}
			else
			{
				// Normal copy
				//Mat img(Size(new_width,new_height), CV_8UC3);
				for (int c = 0; c < channels; ++c)
				{
					float thisRand = 1;
					if(color_aug)
					{
						thisRand = random(0.8,1.2);
					}

					for (int h = 0; h < new_height; ++h)
					{
						for (int w = 0; w < new_width; ++w)
						{
							int top_index = ((item_id * channels + c)
									* new_height + h) * new_width + w;
							int data_index = (c * height + h + h_off) * width
									+ w + w_off;
							Dtype datum_element =
									static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
							top_data[top_index] = (datum_element
									- mean[data_index]) * scale;

							//img.at<cv::Vec3b>(h, w)[c] = (uchar)(datum_element * scale) * thisRand;

							top_data[top_index] = min(top_data[top_index] * thisRand, (Dtype)(255.0));
						}
					}
				}
				//imwrite(ss1, img);
			}
		}
		else
		{
			// Just copy the whole data
			if (data.size())
			{
				for (int j = 0; j < size; ++j)
				{
					Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(data[j]));
					top_data[item_id * size + j] = (datum_element - mean[j])
							* scale;
				}
			}
			else
			{
				for (int j = 0; j < size; ++j)
				{
					top_data[item_id * size + j] = (datum.float_data(j)
							- mean[j]) * scale;
				}
			}
		}

		float lblratio = new_height / out_height;
		vector<int> pts;
		for (int label_i = 0; label_i < datum.label_size(); label_i++)
		{
			pts.push_back( datum.label(label_i) / lblratio );
		}

		int lblLen = key_point_num * out_height * out_width;
		PoseReadLabel(pts, was, top_label + item_id * lblLen, out_height, out_width);

		/*for(int ci = 0; ci < key_point_num; ci ++)
		{
			Mat img(Size(out_height, out_width), CV_8UC3);
			sprintf(ss2,"/home/dragon123/cnncode/showimg/%d_%d_gt.jpg",item_id, ci);
			for(int h = 0; h < out_height; h ++)
				for(int w = 0; w < out_width; w ++)
				{
					int clr = top_label[item_id * lblLen + ci * out_height * out_width + h * out_width + w];
					if(clr <= 0)
					{
						if(clr == 0) for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 0;
						if(clr < 0) for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 128;
					}
					else
					{
						for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 255;
					}
 				}
			imwrite(ss2, img);
		}*/


		// go to the next iter
		layer->lines_id_++;
		if (layer->lines_id_ >= lines_size)
		{
			// We have reached the end. Restart from the first.
			DLOG(INFO) << "Restarting data prefetching from start.";
			layer->lines_id_ = 0;
			if (layer->layer_param_.pose_image_data_param().shuffle())
			{
				layer->ShuffleImages();
			}
		}
	}

	delete was;

	return reinterpret_cast<void*>(NULL);
}
void DataLstmTrainHistLayer<Dtype>::InternalThreadEntry() {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(this->prefetch_data_.count());

  Datum datum;
  Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
  Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
  Dtype* top_hist = this->prefetch_hist_.mutable_cpu_data();
  Dtype* top_marker = this->prefetch_marker_.mutable_cpu_data();

  // datum scales
  const int size = resize_height*resize_width*3;
  const Dtype* mean = this->data_mean_.mutable_cpu_data();

  string value;
  const int kMaxKeyLength = 256;
  char key_cstr[kMaxKeyLength];
  int key;

  const int sequence_size = this->layer_param_.data_lstm_train_hist_param().sequence_size();
  const int ind_seq_num=this->layer_param_.data_lstm_train_hist_param().sequence_num();
  const int interval=this->layer_param_.data_lstm_train_hist_param().interval();
  int item_id;

  for (int time_id = 0; time_id < sequence_size; ++time_id) {
     for (int seq_id = 0; seq_id < ind_seq_num; ++seq_id) {
        item_id=time_id*ind_seq_num+seq_id;
        timer.Start();
        // get a blob

        key=buffer_key[seq_id];  // MUST be changed according to the size of the training set

        snprintf(key_cstr, kMaxKeyLength, "%08d", key);
        db_->Get(leveldb::ReadOptions(), string(key_cstr), &value);
        datum.ParseFromString(value);
        const string& data = datum.data();

        read_time += timer.MicroSeconds();
        timer.Start();

        for (int j = 0; j < size; ++j) {
           Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j]));
           top_data[item_id * size + j] = (datum_element - mean[j]);
        }

        for (int j = 0; j < para_dim; ++j) { 
           top_label[item_id * para_dim + j] = datum.float_data(j); 
        }

        top_marker[item_id] = datum.float_data(para_dim);

        if (buffer_marker[seq_id] == 0) {
            top_marker[item_id] = 0;   
            buffer_marker[seq_id] = 1;
        }

        //////////////////////////////////// for hist
        if (top_marker[item_id] < 0.5) {
           for (int j = 0; j < para_dim; ++j)
               top_hist[item_id * para_dim + j] = 0; 
        } else {
           if (time_id == 0) {
              top_hist[item_id * para_dim + 0] = hist_blob[seq_id * para_dim + 0]/1.1+0.5;
              top_hist[item_id * para_dim + 1] = hist_blob[seq_id * para_dim + 1]*0.17778+1.34445;
              top_hist[item_id * para_dim + 2] = hist_blob[seq_id * para_dim + 2]*0.14545+0.39091;
              top_hist[item_id * para_dim + 3] = hist_blob[seq_id * para_dim + 3]*0.17778-0.34445;
              top_hist[item_id * para_dim + 4] = hist_blob[seq_id * para_dim + 4]/95.0+0.12;
              top_hist[item_id * para_dim + 5] = hist_blob[seq_id * para_dim + 5]/95.0+0.12;
              top_hist[item_id * para_dim + 6] = hist_blob[seq_id * para_dim + 6]*0.14545+1.48181;
              top_hist[item_id * para_dim + 7] = hist_blob[seq_id * para_dim + 7]*0.16+0.98;
              top_hist[item_id * para_dim + 8] = hist_blob[seq_id * para_dim + 8]*0.16+0.02;
              top_hist[item_id * para_dim + 9] = hist_blob[seq_id * para_dim + 9]*0.14545-0.48181;
              top_hist[item_id * para_dim + 10] = hist_blob[seq_id * para_dim + 10]/95.0+0.12;
              top_hist[item_id * para_dim + 11] = hist_blob[seq_id * para_dim + 11]/95.0+0.12;
              top_hist[item_id * para_dim + 12] = hist_blob[seq_id * para_dim + 12]/95.0+0.12;
              top_hist[item_id * para_dim + 13] = hist_blob[seq_id * para_dim + 13]*0.6+0.2;
           } else {
              int pre_id=(time_id-1)*ind_seq_num+seq_id;
              top_hist[item_id * para_dim + 0] = top_label[pre_id * para_dim + 0]/1.1+0.5;
              top_hist[item_id * para_dim + 1] = top_label[pre_id * para_dim + 1]*0.17778+1.34445;
              top_hist[item_id * para_dim + 2] = top_label[pre_id * para_dim + 2]*0.14545+0.39091;
              top_hist[item_id * para_dim + 3] = top_label[pre_id * para_dim + 3]*0.17778-0.34445;
              top_hist[item_id * para_dim + 4] = top_label[pre_id * para_dim + 4]/95.0+0.12;
              top_hist[item_id * para_dim + 5] = top_label[pre_id * para_dim + 5]/95.0+0.12;
              top_hist[item_id * para_dim + 6] = top_label[pre_id * para_dim + 6]*0.14545+1.48181;
              top_hist[item_id * para_dim + 7] = top_label[pre_id * para_dim + 7]*0.16+0.98;
              top_hist[item_id * para_dim + 8] = top_label[pre_id * para_dim + 8]*0.16+0.02;
              top_hist[item_id * para_dim + 9] = top_label[pre_id * para_dim + 9]*0.14545-0.48181;
              top_hist[item_id * para_dim + 10] = top_label[pre_id * para_dim + 10]/95.0+0.12;
              top_hist[item_id * para_dim + 11] = top_label[pre_id * para_dim + 11]/95.0+0.12;
              top_hist[item_id * para_dim + 12] = top_label[pre_id * para_dim + 12]/95.0+0.12;
              top_hist[item_id * para_dim + 13] = top_label[pre_id * para_dim + 13]*0.6+0.2;
           }
        }
        //////////////////////////////////// for hist

        trans_time += timer.MicroSeconds();

        buffer_key[seq_id]++;
        buffer_total[seq_id]++;
        if (buffer_key[seq_id]>total_frames || buffer_total[seq_id]>interval) {
           buffer_key[seq_id]=random(total_frames)+1;
           buffer_marker[seq_id]=0;
           buffer_total[seq_id]=0;
        }

        //////////////////////////////////// for hist
        if (time_id==sequence_size-1) {
           for (int j = 0; j < para_dim; ++j) 
               hist_blob[seq_id * para_dim + j] = datum.float_data(j); 
        }
        //////////////////////////////////// for hist

/*
        if (seq_id == 0) {
           for (int h = 0; h < resize_height; ++h) {
              for (int w = 0; w < resize_width; ++w) {
                 leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)data[h*resize_width+w];
                 leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)data[resize_height*resize_width+h*resize_width+w];
                 leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)data[resize_height*resize_width*2+h*resize_width+w];

                 //leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)top_data[item_id * size+h*resize_width+w];
                 //leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)top_data[item_id * size+resize_height*resize_width+h*resize_width+w];
                 //leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)top_data[item_id * size+resize_height*resize_width*2+h*resize_width+w];
               }
           }
           cvShowImage("Image from leveldb", leveldbTrain);
           cvWaitKey( 1 );
        }
*/
     }
  }

  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
Beispiel #8
0
void DataTransformer<Dtype>::Transform(const int batch_item_id,
                                       const Datum& datum,
                                       const Dtype* mean,
                                       Dtype* transformed_data) {
  const string& data = datum.data();
  const int channels = datum.channels();
  const int height = datum.height();
  const int width = datum.width();
  const int size = datum.channels() * datum.height() * datum.width();

  const int crop_size = param_.crop_size();
  const bool mirror = param_.mirror();
  const Dtype scale = param_.scale();

  if (mirror && crop_size == 0) {
    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
               << "set at the same time.";
  }

  if (crop_size) {
    CHECK(data.size()) << "Image cropping only support uint8 data";
    int h_off, w_off;
    // We only do random crop when we do training.
    if (phase_ == Caffe::TRAIN) {
      h_off = Rand() % (height - crop_size);
      w_off = Rand() % (width - crop_size);
    } else {
      h_off = (height - crop_size) / 2;
      w_off = (width - crop_size) / 2;
    }
    if (mirror && Rand() % 2) {
      // Copy mirrored version
      for (int c = 0; c < channels; ++c) {
        for (int h = 0; h < crop_size; ++h) {
          for (int w = 0; w < crop_size; ++w) {
            int data_index = (c * height + h + h_off) * width + w + w_off;
            int top_index = ((batch_item_id * channels + c) * crop_size + h)
                * crop_size + (crop_size - 1 - w);
            Dtype datum_element =
                static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
            transformed_data[top_index] =
                (datum_element - mean[data_index]) * scale;
          }
        }
      }
    } else {
      // Normal copy
      for (int c = 0; c < channels; ++c) {
        for (int h = 0; h < crop_size; ++h) {
          for (int w = 0; w < crop_size; ++w) {
            int top_index = ((batch_item_id * channels + c) * crop_size + h)
                * crop_size + w;
            int data_index = (c * height + h + h_off) * width + w + w_off;
            Dtype datum_element =
                static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
            transformed_data[top_index] =
                (datum_element - mean[data_index]) * scale;
          }
        }
      }
    }
  } else {
    // we will prefer to use data() first, and then try float_data()
    if (data.size()) {
      for (int j = 0; j < size; ++j) {
        Dtype datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[j]));
        transformed_data[j + batch_item_id * size] =
            (datum_element - mean[j]) * scale;
      }
    } else {
      for (int j = 0; j < size; ++j) {
        transformed_data[j + batch_item_id * size] =
            (datum.float_data(j) - mean[j]) * scale;
      }
    }
  }
}
void DataTransformer<Dtype>::Transform(const int batch_item_id,
                                       const Datum& datum,
                                       const Dtype* mean,
                                       Dtype* transformed_data, Dtype* temp_data) {
  const string& data = datum.data();
  const int channels = datum.channels();
  const int height = datum.height();
  const int width = datum.width();
  const int size = datum.channels() * datum.height() * datum.width();

  const int crop_size = param_.crop_size();
  const bool mirror = param_.mirror();
  const bool rotate = param_.rotate();
  const Dtype scale = param_.scale();
  const int window_size = param_.window_size();
  
  int newHeight = datum.height();
  int newWidth = datum.width();
  
  if(crop_size) {
	newHeight = crop_size;
	newWidth = crop_size;
  } else if(window_size) {
    newHeight = window_size;
	newWidth = window_size;
  }
  
  
  if (window_size && crop_size) {
    LOG(FATAL) << "Current implementation does not support window_size and crop_size to be "
               << "set at the same time.";
  }

  if (crop_size || window_size) {
    CHECK(data.size()) << "Image cropping only support uint8 data";
    int h_off, w_off;
    // We only do random crop when we do training.
    if (phase_ == Caffe::TRAIN && crop_size) {
      h_off = Rand() % (height - crop_size);
      w_off = Rand() % (width - crop_size);
    } else {
      h_off = (height - newHeight) / 2;
      w_off = (width - newWidth) / 2;
    }
    
	// Normal copy
	for (int c = 0; c < channels; ++c) {
		for (int h = 0; h < newHeight; ++h) {
		  for (int w = 0; w < newWidth; ++w) {
			int top_index = ((batch_item_id * channels + c) * newHeight + h)
				* newWidth + w;
			int data_index = (c * height + h + h_off) * width + w + w_off;
			Dtype datum_element =
				static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
			transformed_data[top_index] =
				(datum_element - mean[data_index]) * scale;
		  }
		}
	}  
  } else {
	//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Normal::"  << batch_item_id;
    // we will prefer to use data() first, and then try float_data()
    if (data.size()) {
      for (int j = 0; j < size; ++j) {
        Dtype datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[j]));
        transformed_data[j + batch_item_id * size] =
            (datum_element - mean[j]) * scale;
      }
    } else {
      for (int j = 0; j < size; ++j) {
        transformed_data[j + batch_item_id * size] =
            (datum.float_data(j) - mean[j]) * scale;
      }
    }
  }
  
  //Perform mirroring on the transformed_data using a temp_data first then copy it back
  if (mirror && Rand() % 3) {
      // Copy mirrored version
	  if(Rand()%2){ //Mirror vertical
		//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Mirror vertical::" << batch_item_id;
        for (int c = 0; c < channels; ++c) {
         for (int h = 0; h < newHeight; ++h) {
          for (int w = 0; w < newWidth; ++w) {
            int data_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w;
			int	top_index = ((batch_item_id * channels + c) * newHeight + h)
					* newWidth + (newWidth - 1 - w);
            Dtype datum_element =
                static_cast<Dtype>(static_cast<uint8_t>(transformed_data[data_index]));
            temp_data[top_index] = datum_element;
          }
         }
        } 
	   }else{ //Mirror horizontal
			//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Mirror horizontal::" << batch_item_id;
			for (int c = 0; c < channels; ++c) {
			 for (int h = 0; h < newHeight; ++h) {
			  for (int w = 0; w < newWidth; ++w) {
				int data_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w;
				int	top_index = ((batch_item_id * channels + c) * newHeight + (newHeight - 1 -h))
					* newWidth + w;
				Dtype datum_element =
					static_cast<Dtype>(static_cast<uint8_t>(transformed_data[data_index]));
				temp_data[top_index] = datum_element;
			  }
			 }
			} 
		}
		for (int c = 0; c < channels; ++c) {
			for (int h = 0; h < newHeight; ++h) {
				for (int w = 0; w < newWidth; ++w) {
					int top_index = ((batch_item_id * channels + c) * newHeight + h)
						* newWidth + w;
					Dtype datum_element =
						static_cast<Dtype>(static_cast<uint8_t>(temp_data[top_index]));
					transformed_data[top_index] = datum_element;
				}
			}
		}
    }
  
  
  
	// Perform rotation on the transformed_data using a temp_data first then copy it back
	if(rotate && Rand() %3) {
		int r = Rand() % 2;
		if(r == 0) {//Rotate 90
			//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Rotate 90::" << batch_item_id;
			for (int c = 0; c < channels; ++c) {
				for (int h = 0; h < newHeight; ++h) {
					for (int w = 0; w < newWidth; ++w) {
						int top_index = ((batch_item_id * channels + c) * newHeight + h)
							* newWidth + w;
						int new_top_index = ((batch_item_id * channels + c) * newHeight * newWidth) + h + (newWidth - 1 -w) * newWidth;
						Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index]));
						temp_data[new_top_index] = datum_element;
					}
				}
			}	
		}else if(r ==1) { //Rotate -90
			//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Rotate -90::" << batch_item_id;
			for (int c = 0; c < channels; ++c) {
				for (int h = 0; h < newHeight; ++h) {
					for (int w = 0; w < newWidth; ++w) {
						int top_index = ((batch_item_id * channels + c) * newHeight + h)
							* newWidth + w;
						int new_top_index = ((batch_item_id * channels + c) * newHeight * newWidth) + (newWidth - 1 -h) + (w * newWidth);
						Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index]));
						temp_data[new_top_index] = datum_element;
					}
				}
			}
		} else if(r ==2) { //Rotate 180
			for (int c = 0; c < channels; ++c) {
				for (int h = 0; h < newHeight; ++h) {
					for (int w = 0; w < newWidth; ++w) {
						int top_index = ((batch_item_id * channels + c) * newHeight + h)
							* newWidth + w;
						int new_top_index = ((batch_item_id * channels + c) * newHeight + (newHeight-h-1)) * newWidth + (newWidth -w-1);
						Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index]));
						temp_data[new_top_index] = datum_element;
					}
				}
			}
		}
		for (int c = 0; c < channels; ++c) {
			for (int h = 0; h < newHeight; ++h) {
				for (int w = 0; w < newWidth; ++w) {
					int top_index = ((batch_item_id * channels + c) * newHeight + h)
						* newWidth + w;
					Dtype datum_element =
						static_cast<Dtype>(static_cast<uint8_t>(temp_data[top_index]));
					transformed_data[top_index] = datum_element;
				}
			}
		}
	}
	//LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> -------------------::" << batch_item_id;
}
void DataTransformer<Dtype>::Transform(const Datum& datum,
                                       Dtype* transformed_data) {
  const string& data = datum.data();
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  const int crop_size = param_.crop_size();
  const Dtype scale = param_.scale();
  bool do_mirror = param_.mirror() && Rand(2); //aki_update
  const bool has_mean_file = param_.has_mean_file();
  const bool has_uint8 = data.size() > 0;
  const bool has_mean_values = mean_values_.size() > 0;

  CHECK_GT(datum_channels, 0);
  CHECK_GE(datum_height, crop_size);
  CHECK_GE(datum_width, crop_size);

  Dtype* mean = NULL;
  if (has_mean_file) {
    CHECK_EQ(datum_channels, data_mean_.channels());
    CHECK_EQ(datum_height, data_mean_.height());
    CHECK_EQ(datum_width, data_mean_.width());
    mean = data_mean_.mutable_cpu_data();
  }
  if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
     "Specify either 1 mean_value or as many as channels: " << datum_channels;
    if (datum_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < datum_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }

  int height = datum_height;
  int width = datum_width;

  int h_off = 0;
  int w_off = 0;
  if (crop_size) {
    height = crop_size;
    width = crop_size;
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {
      h_off = Rand(datum_height - crop_size + 1);
      w_off = Rand(datum_width - crop_size + 1);
    } else {
      h_off = (datum_height - crop_size) / 2;
      w_off = (datum_width - crop_size) / 2;
    }
  }
  //aki_update_start
  //use the multiview strategy in testing
  const bool use_multiview = param_.multi_view();
  if (use_multiview) {
    std::ifstream in_stream(std::string("multiview_cache").c_str());
    int view_type = 0;
    in_stream >> view_type;
    in_stream.close();
    if (view_type > 5)
    {
      //it means we have to use mirror right here
      do_mirror = true;
      view_type-=5;
    }
    switch(view_type){
      case 1:
        h_off = 0;
        w_off = 0;
  break;
      case 2:
        h_off = 0;
        w_off = datum_width - crop_size;
        break;
      case 3:
        h_off = datum_width - crop_size;
        w_off = 0;
        break;
      case 4:
        h_off = datum_width - crop_size;
        w_off = datum_width - crop_size;
        break;
      case 5:
        h_off = (datum_height - crop_size) / 2;
        w_off = (datum_width - crop_size) / 2;
        break;
      default:
        break;
    }
  }
  //aki_update_end

  Dtype datum_element;
  int top_index, data_index;
  for (int c = 0; c < datum_channels; ++c) {
    for (int h = 0; h < height; ++h) {
      for (int w = 0; w < width; ++w) {
        data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
        if (do_mirror) {
          top_index = (c * height + h) * width + (width - 1 - w);
        } else {
          top_index = (c * height + h) * width + w;
        }
        if (has_uint8) {
          datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
        } else {
          datum_element = datum.float_data(data_index);
        }
        if (has_mean_file) {
          transformed_data[top_index] =
            (datum_element - mean[data_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (datum_element - mean_values_[c]) * scale;
          } else {
            transformed_data[top_index] = datum_element * scale;
          }
        }
      }
    }
  }
}
Beispiel #11
0
void* DataLayerPrefetch(void* layer_pointer) {
  CHECK(layer_pointer);
  DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer);
  CHECK(layer);
  Datum datum;
  CHECK(layer->prefetch_data_);
  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
  Dtype* top_label;
  if (layer->output_labels_) {
    top_label = layer->prefetch_label_->mutable_cpu_data();
  }
  const Dtype scale = layer->layer_param_.data_param().scale();
  const int batch_size = layer->layer_param_.data_param().batch_size();
  const int crop_size = layer->layer_param_.data_param().crop_size();
  const bool mirror = layer->layer_param_.data_param().mirror();

  if (mirror && crop_size == 0) {
    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
        << "set at the same time.";
  }
  // datum scales
  const int channels = layer->datum_channels_;
  const int height = layer->datum_height_;
  const int width = layer->datum_width_;
  const int size = layer->datum_size_;
  const Dtype* mean = layer->data_mean_.cpu_data();
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    CHECK(layer->iter_);
    CHECK(layer->iter_->Valid());
    datum.ParseFromString(layer->iter_->value().ToString());
    const string& data = datum.data();
    if (crop_size) {
      CHECK(data.size()) << "Image cropping only support uint8 data";
      int h_off, w_off;
      // We only do random crop when we do training.
      if (layer->phase_ == Caffe::TRAIN) {
        h_off = layer->PrefetchRand() % (height - crop_size);
        w_off = layer->PrefetchRand() % (width - crop_size);
      } else {
        h_off = (height - crop_size) / 2;
        w_off = (width - crop_size) / 2;
      }
      if (mirror && layer->PrefetchRand() % 2) {
        // Copy mirrored version
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < crop_size; ++h) {
            for (int w = 0; w < crop_size; ++w) {
              int top_index = ((item_id * channels + c) * crop_size + h)
                              * crop_size + (crop_size - 1 - w);
              int data_index = (c * height + h + h_off) * width + w + w_off;
              Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
              top_data[top_index] = (datum_element - mean[data_index]) * scale;
            }
          }
        }
      } else {
        // Normal copy
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < crop_size; ++h) {
            for (int w = 0; w < crop_size; ++w) {
              int top_index = ((item_id * channels + c) * crop_size + h)
                              * crop_size + w;
              int data_index = (c * height + h + h_off) * width + w + w_off;
              Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
              top_data[top_index] = (datum_element - mean[data_index]) * scale;
            }
          }
        }
      }
    } else {
      // we will prefer to use data() first, and then try float_data()
      if (data.size()) {
        for (int j = 0; j < size; ++j) {
          Dtype datum_element =
              static_cast<Dtype>(static_cast<uint8_t>(data[j]));
          top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
        }
      } else {
        for (int j = 0; j < size; ++j) {
          top_data[item_id * size + j] =
              (datum.float_data(j) - mean[j]) * scale;
        }
      }
    }

    if (layer->output_labels_) {
      top_label[item_id] = datum.label();
    }
    // go to the next iter
    layer->iter_->Next();
    if (!layer->iter_->Valid()) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      layer->iter_->SeekToFirst();
    }
  }

  return static_cast<void*>(NULL);
}
Beispiel #12
0
void DataDrivingLayer<Dtype>::InternalThreadEntry() {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(this->prefetch_data_.count());

  Datum datum;
  Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
  Dtype* top_label = this->prefetch_label_.mutable_cpu_data();

  // datum scales
  const int size = resize_height*resize_width*3;
  const Dtype* mean = this->data_mean_.mutable_cpu_data();

  string value;
  const int kMaxKeyLength = 256;
  char key_cstr[kMaxKeyLength];
  int key;

  const int batch_size = this->layer_param_.data_driving_param().batch_size();

  for (int item_id = 0; item_id < batch_size; ++item_id) {

      timer.Start();
      // get a blob

      key=random(484815)+1;  // MUST be changed according to the size of the training set

      snprintf(key_cstr, kMaxKeyLength, "%08d", key);
      db_->Get(leveldb::ReadOptions(), string(key_cstr), &value);
      datum.ParseFromString(value);
      const string& data = datum.data();

      read_time += timer.MicroSeconds();
      timer.Start();

      for (int j = 0; j < size; ++j) {
         Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j]));
         top_data[item_id * size + j] = (datum_element - mean[j]);
      }

      for (int j = 0; j < para_dim; ++j) { 
         top_label[item_id*para_dim+j] = datum.float_data(j); 
      }

      trans_time += timer.MicroSeconds();
/*
      for (int h = 0; h < resize_height; ++h) {
         for (int w = 0; w < resize_width; ++w) {
            leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)data[h*resize_width+w];
            leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)data[resize_height*resize_width+h*resize_width+w];
            leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)data[resize_height*resize_width*2+h*resize_width+w];

            //leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)top_data[item_id * size+h*resize_width+w];
            //leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)top_data[item_id * size+resize_height*resize_width+h*resize_width+w];
            //leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)top_data[item_id * size+resize_height*resize_width*2+h*resize_width+w];
          }
      }
      cvShowImage("Image from leveldb", leveldbTrain);
      cvWaitKey( 1 );
*/
  }
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);
  if (argc < 5) {
    LOG(ERROR) << "Usage: compute_image_mean input_list new_height new_width output_file [dropping_rate]";
    return 1;
  }

  char* fn_list = argv[1];
  const int height = atoi(argv[2]);
  const int width = atoi(argv[3]);
  char* fn_output = argv[4];

  int sampling_rate = 1;
  if (argc >= 6){
	  sampling_rate = atoi(argv[5]);
	  LOG(INFO) << "using sampling rate " << sampling_rate;
  }

  Datum datum;
  BlobProto sum_blob;
  int count = 0;

  std::ifstream infile(fn_list);
  string fn_frm;
  int label;
  infile >> fn_frm >> label;

  ReadImageToDatum(fn_frm, label, height, width, &datum);

  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_length(1);
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());

  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());

  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }

  LOG(INFO) << "Starting Iteration";
  int i = 0;
  while (infile >> fn_frm >> label) {
	  i++;
	  if (i % sampling_rate!=0){
		  continue;
	  }
	ReadImageToDatum(fn_frm, label, height, width, &datum);
    const string& data = datum.data();
    size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size());
    CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
        size_in_datum;
    if (data.size() != 0) {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
      }
    } else {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) +
            static_cast<float>(datum.float_data(i)));
      }
    }
    ++count;
    if (count % 10000 == 0) {
      LOG(ERROR) << "Processed " << count << " files.";
    }
  }

  infile.close();

  if (count % 10000 != 0) {
    LOG(ERROR) << "Processed " << count << " files.";
  }
  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }
  // Write to disk
  LOG(INFO) << "Write to " << fn_output;
  WriteProtoToBinaryFile(sum_blob, fn_output);

  return 0;
}
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);
  if (argc < 3 || argc > 4) {
    LOG(ERROR) << "Usage: compute_image_mean input_db output_file"
               << " db_backend[leveldb or lmdb]";
    return 1;
  }

  string db_backend = "lmdb";
  if (argc == 4) {
    db_backend = string(argv[3]);
  }

  // Open leveldb
  leveldb::DB* db;
  leveldb::Options options;
  options.create_if_missing = false;
  leveldb::Iterator* it = NULL;
  // lmdb
  MDB_env* mdb_env;
  MDB_dbi mdb_dbi;
  MDB_val mdb_key, mdb_value;
  MDB_txn* mdb_txn;
  MDB_cursor* mdb_cursor;

  // Open db
  if (db_backend == "leveldb") {  // leveldb
    LOG(INFO) << "Opening leveldb " << argv[1];
    leveldb::Status status = leveldb::DB::Open(
        options, argv[1], &db);
    CHECK(status.ok()) << "Failed to open leveldb " << argv[1];
    leveldb::ReadOptions read_options;
    read_options.fill_cache = false;
    it = db->NewIterator(read_options);
    it->SeekToFirst();
  } else if (db_backend == "lmdb") {  // lmdb
    LOG(INFO) << "Opening lmdb " << argv[1];
    CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
    CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS);  // 1TB
    CHECK_EQ(mdb_env_open(mdb_env, argv[1], MDB_RDONLY, 0664),
        MDB_SUCCESS) << "mdb_env_open failed";
    CHECK_EQ(mdb_txn_begin(mdb_env, NULL, MDB_RDONLY, &mdb_txn), MDB_SUCCESS)
        << "mdb_txn_begin failed";
    CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
        << "mdb_open failed";
    CHECK_EQ(mdb_cursor_open(mdb_txn, mdb_dbi, &mdb_cursor), MDB_SUCCESS)
        << "mdb_cursor_open failed";
    CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST),
        MDB_SUCCESS);
  } else {
    LOG(FATAL) << "Unknown db backend " << db_backend;
  }

  // set size info
  Datum datum;
  BlobProto sum_blob;
  int count = 0;
  // load first datum
  if (db_backend == "leveldb") {
    datum.ParseFromString(it->value().ToString());
  } else if (db_backend == "lmdb") {
    datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size);
  } else {
    LOG(FATAL) << "Unknown db backend " << db_backend;
  }

  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());
  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());
  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }
  // start collecting
  LOG(INFO) << "Starting Iteration";

  if (db_backend == "leveldb") {  // leveldb
    for (it->SeekToFirst(); it->Valid(); it->Next()) {
      // just a dummy operation
      datum.ParseFromString(it->value().ToString());
      const string& data = datum.data();
      size_in_datum = std::max<int>(datum.data().size(),
          datum.float_data_size());
      CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
          size_in_datum;
      if (data.size() != 0) {
        for (int i = 0; i < size_in_datum; ++i) {
          sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
        }
      } else {
        for (int i = 0; i < size_in_datum; ++i) {
          sum_blob.set_data(i, sum_blob.data(i) +
              static_cast<float>(datum.float_data(i)));
        }
      }
      ++count;
      if (count % 10000 == 0) {
        LOG(ERROR) << "Processed " << count << " files.";
      }
    }
  } else if (db_backend == "lmdb") {  // lmdb
    CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST),
        MDB_SUCCESS);
    do {
      // just a dummy operation
      datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size);
      const string& data = datum.data();
      size_in_datum = std::max<int>(datum.data().size(),
          datum.float_data_size());
      CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
          size_in_datum;
      if (data.size() != 0) {
        for (int i = 0; i < size_in_datum; ++i) {
          sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
        }
      } else {
        for (int i = 0; i < size_in_datum; ++i) {
          sum_blob.set_data(i, sum_blob.data(i) +
              static_cast<float>(datum.float_data(i)));
        }
      }
      ++count;
      if (count % 10000 == 0) {
        LOG(ERROR) << "Processed " << count << " files.";
      }
    } while (mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_NEXT)
        == MDB_SUCCESS);
  } else {
    LOG(FATAL) << "Unknown db backend " << db_backend;
  }

  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }

  caffe::Blob<float> vis;
  vis.FromProto(sum_blob);
  caffe::imshow(&vis, 1, "mean img");
  cv::waitKey(0);
  
  google::protobuf::RepeatedField<float>* tmp = sum_blob.mutable_data();
  std::vector<float> mean_data(tmp->begin(), tmp->end());
  double sum = std::accumulate(mean_data.begin(), mean_data.end(), 0.0);
  double mean2 = sum / mean_data.size();
  double sq_sum = std::inner_product(mean_data.begin(), mean_data.end(), mean_data.begin(), 0.0);
  double stdev = std::sqrt(sq_sum / mean_data.size() - mean2 * mean2);

  LOG(INFO) << "mean of mean image: " << mean2 << " std: " << stdev;

  // Write to disk
  LOG(INFO) << "Write to " << argv[2];
  WriteProtoToBinaryFile(sum_blob, argv[2]);

  // Clean up
  if (db_backend == "leveldb") {
    delete db;
  } else if (db_backend == "lmdb") {
    mdb_cursor_close(mdb_cursor);
    mdb_close(mdb_env, mdb_dbi);
    mdb_txn_abort(mdb_txn);
    mdb_env_close(mdb_env);
  } else {
    LOG(FATAL) << "Unknown db backend " << db_backend;
  }
  return 0;
}
Beispiel #15
0
void* DataLayerPrefetch(void* layer_pointer) {
  CHECK(layer_pointer);
  DataLayer<Dtype>* layer = reinterpret_cast<DataLayer<Dtype>*>(layer_pointer);
  CHECK(layer);
  Datum datum;
  CHECK(layer->prefetch_data_);
  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
  Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
  const Dtype scale = layer->layer_param_.scale();
  const int batchsize = layer->layer_param_.batchsize();
  const int cropsize = layer->layer_param_.cropsize();
  const bool mirror = layer->layer_param_.mirror();

  if (mirror && cropsize == 0) {
    LOG(FATAL) << "Current implementation requires mirror and cropsize to be "
        << "set at the same time.";
  }
  // datum scales
  const int channels = layer->datum_channels_;
  const int height = layer->datum_height_;
  const int width = layer->datum_width_;
  const int size = layer->datum_size_;
  const Dtype* mean = layer->data_mean_.cpu_data();
  for (int itemid = 0; itemid < batchsize; ++itemid) {
    // get a blob
    CHECK(layer->iter_);
    CHECK(layer->iter_->Valid());
    datum.ParseFromString(layer->iter_->value().ToString());
    const string& data = datum.data();

    if (cropsize) {
      //CHECK(data.size()) << "Image cropping only support uint8 data";
      int h_off, w_off;
      // We only do random crop when we do training.
      if (Caffe::phase() == Caffe::TRAIN) {
        // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
        h_off = rand() % (height - cropsize);
        // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
        w_off = rand() % (width - cropsize);
      } else {
        h_off = (height - cropsize) / 2;
        w_off = (width - cropsize) / 2;
      }
      // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
      if (mirror && rand() % 2) {
        // Copy mirrored version
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < cropsize; ++h) {
            for (int w = 0; w < cropsize; ++w) {
              top_data[((itemid * channels + c) * cropsize + h) * cropsize
                       + cropsize - 1 - w] =
                  (static_cast<Dtype>(
                      (float)datum.float_data((c * height + h + h_off) * width
                                    + w + w_off))
                    - mean[(c * height + h + h_off) * width + w + w_off])
                  * scale;
            }
          }
        }
      } else {
        // Normal copy
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < cropsize; ++h) {
            for (int w = 0; w < cropsize; ++w) {
              top_data[((itemid * channels + c) * cropsize + h) * cropsize + w]
                  = (static_cast<Dtype>(
                      (float)datum.float_data((c * height + h + h_off) * width
                                    + w + w_off))
                     - mean[(c * height + h + h_off) * width + w + w_off])
                  * scale;
            }
          }
        }
      }
    } else {
      // we will prefer to use data() first, and then try float_data()
      if (data.size()) {

	//cout << "unint8 data!!!!" << endl;
        for (int j = 0; j < size; ++j) {
	  //cout << "datum.int_data " << j << "of size: " << size << static_cast<Dtype>((uint8_t)data[j]) << " mean: " << mean[j] << endl;
          top_data[itemid * size + j] =
              (static_cast<Dtype>((uint8_t)data[j]) - mean[j]) * scale;
        }
      } else {
        //cout << "float data !!!!!!!!!!!" << endl;
        for (int j = 0; j < size; ++j) {
	  
	  //cout << "item: " << itemid <<" datum.float_data " << j << "of size: " << size << endl;
	  //cout << datum.float_data(j) << " mean: " << mean[j] << endl;
          top_data[itemid * size + j] =
              (datum.float_data(j) - mean[j]) * scale;
        }
      }
    }

    top_label[itemid] = datum.label();
    // go to the next iter
    layer->iter_->Next();
    if (!layer->iter_->Valid()) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      layer->iter_->SeekToFirst();
    }
  }

  return reinterpret_cast<void*>(NULL);
}
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);
  if (argc < 4 || argc > 5) {
    printf("Convert a set of images to the leveldb format used\n"
        "as input for Caffe.\n"
        "Usage:\n"
        "    convert_imageset ROOTFOLDER/ LISTFILE DB_NAME"
        " RANDOM_SHUFFLE_DATA[0 or 1]\n"
        "The ImageNet dataset for the training demo is at\n"
        "    http://www.image-net.org/download-images\n");
    return 1;
  }
  std::ifstream infile(argv[2]);
  if(!infile)
	  LOG(INFO) <<"there is no file named " << argv[2];
  std::vector<string> lines;
  string infor;
  int label;
  while (infile >> infor) {
    lines.push_back(infor);
  }
  if (argc == 5 && argv[4][0] == '1') {
    // randomly shuffle data
    LOG(INFO) << "Shuffling data";
    std::random_shuffle(lines.begin()+1, lines.end());
  }
  LOG(INFO) << "A total of " << lines.size() << " images.";

  leveldb::DB* db;
  leveldb::Options options;
  options.error_if_exists = true;
  options.create_if_missing = true;
  options.write_buffer_size = 268435456;
  LOG(INFO) << "Opening leveldb " << argv[3];
  leveldb::Status status = leveldb::DB::Open(
      options, argv[3], &db);
  CHECK(status.ok()) << "Failed to open leveldb " << argv[3];

  Datum datum;
  int count = 0;
  const int kMaxKeyLength = 256;
  char key_cstr[kMaxKeyLength];
  leveldb::WriteBatch* batch = new leveldb::WriteBatch();
  int data_size;
  bool data_size_initialized = false;

  int width = 0 , height = 0 , channel = 0;
  std::string::size_type pos1 = 0 , pos2 = 0;
  pos2 = lines[0].find(",", pos1);
  channel = atoi(lines[0].substr(pos1,pos2-pos1).c_str());

  pos1 = pos2 + 1;
  pos2 = lines[0].find(",", pos1);
  height = atoi(lines[0].substr(pos1,pos2-pos1).c_str());

  pos1 = pos2 + 1;
  pos2 = lines[0].find(",", pos1);
  width = atoi(lines[0].substr(pos1,pos2-pos1).c_str());

  for (int line_id = 1; line_id < lines.size(); ++line_id) {
    if (!ReadCSVToDatum(lines[line_id], channel, width, height, &datum)) {
      continue;
    }

    if (!data_size_initialized) {
      data_size = datum.channels() * datum.height() * datum.width();
      data_size_initialized = true;
    } else {
      ::google::protobuf::RepeatedField< float > data = datum.float_data();
      CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
          << data.size();
    }
    // sequential
    snprintf(key_cstr, kMaxKeyLength, "%08d_%s", line_id,
        lines[line_id].c_str());
    string value;
    // get the value
    datum.SerializeToString(&value);
    batch->Put(string(key_cstr), value);
    if (++count % 1000 == 0) {
      db->Write(leveldb::WriteOptions(), batch);
      LOG(ERROR) << "Processed " << count << " files.";
      delete batch;
      batch = new leveldb::WriteBatch();
    }
  }
  // write the last batch
  if (count % 1000 != 0) {
    db->Write(leveldb::WriteOptions(), batch);
    LOG(ERROR) << "Processed " << count << " files.";
  }

  delete batch;
  delete db;
  return 0;
}
void* ImageDataLayerPrefetch(void* layer_pointer) {
  CHECK(layer_pointer);
  ImageDataLayer<Dtype>* layer =
      reinterpret_cast<ImageDataLayer<Dtype>*>(layer_pointer);
  CHECK(layer);
  Datum datum;
  CHECK(layer->prefetch_data_);
  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
  Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
  ImageDataParameter image_data_param = layer->layer_param_.image_data_param();
  const Dtype scale = image_data_param.scale();
  const int batch_size = image_data_param.batch_size();
  const int crop_size = image_data_param.crop_size();
  const bool mirror = image_data_param.mirror();
  const int new_height = image_data_param.new_height();
  const int new_width = image_data_param.new_width();

  if (mirror && crop_size == 0) {
    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
        << "set at the same time.";
  }
  // datum scales
  const int channels = layer->datum_channels_;
  const int height = layer->datum_height_;
  const int width = layer->datum_width_;
  const int size = layer->datum_size_;
  const int lines_size = layer->shuffle_index_.size();
  const Dtype* mean = layer->data_mean_.cpu_data();
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    CHECK_GT(lines_size, layer->lines_id_);
    int id = layer->shuffle_index_[layer->lines_id_];
    if (!ReadImageToDatum(layer->fn_list_[id],
          layer->label_list_[id],
          new_height, new_width, &datum)) {
      continue;
    }
    const string& data = datum.data();
    if (crop_size) {
      CHECK(data.size()) << "Image cropping only support uint8 data";
      int h_off, w_off;
      // We only do random crop when we do training.
      if (layer->phase_ == Caffe::TRAIN) {
        h_off = layer->PrefetchRand() % (height - crop_size);
        w_off = layer->PrefetchRand() % (width - crop_size);
      } else {
        h_off = (height - crop_size) / 2;
        w_off = (width - crop_size) / 2;
      }
      if (mirror && layer->PrefetchRand() % 2) {
        // Copy mirrored version
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < crop_size; ++h) {
            for (int w = 0; w < crop_size; ++w) {
              int top_index = ((item_id * channels + c) * crop_size + h)
                              * crop_size + (crop_size - 1 - w);
              int data_index = (c * height + h + h_off) * width + w + w_off;
              Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
              top_data[top_index] = (datum_element - mean[data_index]) * scale;
            }
          }
        }
      } else {
        // Normal copy
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < crop_size; ++h) {
            for (int w = 0; w < crop_size; ++w) {
              int top_index = ((item_id * channels + c) * crop_size + h)
                              * crop_size + w;
              int data_index = (c * height + h + h_off) * width + w + w_off;
              Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
              top_data[top_index] = (datum_element - mean[data_index]) * scale;
            }
          }
        }
      }
    } else {
      // Just copy the whole data
      if (data.size()) {
        for (int j = 0; j < size; ++j) {
          Dtype datum_element =
              static_cast<Dtype>(static_cast<uint8_t>(data[j]));
          top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
        }
      } else {
        for (int j = 0; j < size; ++j) {
          top_data[item_id * size + j] =
              (datum.float_data(j) - mean[j]) * scale;
        }
      }
    }

    top_label[item_id] = datum.label();
    // go to the next iter
    layer->lines_id_++;
    if (layer->lines_id_ >= lines_size) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      layer->lines_id_ = 0;
      if (layer->layer_param_.image_data_param().shuffle()) {
        layer->ShuffleImages();
      }
    }
  }

  return reinterpret_cast<void*>(NULL);
}
Beispiel #18
0
std::vector<float> calc_mean(const std::string &db_fname) {
  scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
  db->Open(db_fname, db::READ);
  scoped_ptr<db::Cursor> cursor(db->NewCursor());

  BlobProto sum_blob;
  int count = 0;
  // load first datum
  Datum datum;
  datum.ParseFromString(cursor->value());

  if (DecodeDatumNative(&datum)) {
    LOG(INFO) << "Decoding Datum";
  }

  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());
  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());
  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }
  LOG(INFO) << "Starting Iteration";
  while (cursor->valid()) {
    Datum datum;
    datum.ParseFromString(cursor->value());
    DecodeDatumNative(&datum);

    const std::string& data = datum.data();
    size_in_datum = std::max<int>(datum.data().size(),
                                  datum.float_data_size());
    CHECK_EQ(size_in_datum, data_size)
      << "Incorrect data field size " << size_in_datum;

    if (data.size() != 0) {
      CHECK_EQ(data.size(), size_in_datum);
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
      }
    } else {
      CHECK_EQ(datum.float_data_size(), size_in_datum);
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) +
            static_cast<float>(datum.float_data(i)));
      }
    }
    ++count;
    if (count % 10000 == 0) {
      LOG(INFO) << "Processed " << count << " files.";
    }
    cursor->Next();
  }

  if (count % 10000 != 0) {
    LOG(INFO) << "Processed " << count << " files.";
  }
  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }

  const int channels = sum_blob.channels();
  const int dim = sum_blob.height() * sum_blob.width();
  std::vector<float> mean_values(channels, 0.0);
  LOG(INFO) << "Number of channels: " << channels;
  for (int c = 0; c < channels; ++c) {
    for (int i = 0; i < dim; ++i) {
      mean_values[c] += sum_blob.data(dim * c + i);
    }
    mean_values[c] /= dim;
    LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c];
  }

  return mean_values;
}
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);

#ifdef USE_OPENCV
#ifndef GFLAGS_GFLAGS_H_
  namespace gflags = google;
#endif

  gflags::SetUsageMessage("Compute the mean_image of a set of images given by"
        " a leveldb/lmdb\n"
        "Usage:\n"
        "    compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n");

  gflags::ParseCommandLineFlags(&argc, &argv, true);

  if (argc < 2 || argc > 3) {
    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean");
    return 1;
  }

  scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
  db->Open(argv[1], db::READ);
  scoped_ptr<db::Cursor> cursor(db->NewCursor());

  BlobProto sum_blob;
  int count = 0;
  // load first datum
  Datum datum;
  datum.ParseFromString(cursor->value());

  if (DecodeDatumNative(&datum)) {
    LOG(INFO) << "Decoding Datum";
  }

  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());
  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());
  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }
  LOG(INFO) << "Starting Iteration";
  while (cursor->valid()) {
    Datum datum;
    datum.ParseFromString(cursor->value());
    DecodeDatumNative(&datum);

    const std::string& data = datum.data();
    size_in_datum = std::max<int>(datum.data().size(),
        datum.float_data_size());
    CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
        size_in_datum;
    if (data.size() != 0) {
      CHECK_EQ(data.size(), size_in_datum);
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
      }
    } else {
      CHECK_EQ(datum.float_data_size(), size_in_datum);
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) +
            static_cast<float>(datum.float_data(i)));
      }
    }
    ++count;
    if (count % 10000 == 0) {
      LOG(INFO) << "Processed " << count << " files.";
      printf("Processed %d files.\n",count);
    }
    cursor->Next();
  }

  if (count % 10000 != 0) {
    LOG(INFO) << "Processed " << count << " files.";
    printf("Processed %d files.\n",count);
  }
  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }
  // Write to disk
  if (argc == 3) {
    LOG(INFO) << "Write to " << argv[2];
    WriteProtoToBinaryFile(sum_blob, argv[2]);
  }
  const int channels = sum_blob.channels();
  const int dim = sum_blob.height() * sum_blob.width();
  std::vector<float> mean_values(channels, 0.0);
  LOG(INFO) << "Number of channels: " << channels;
  for (int c = 0; c < channels; ++c) {
    for (int i = 0; i < dim; ++i) {
      mean_values[c] += sum_blob.data(dim * c + i);
    }
    LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim;
  }
#else
  LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif  // USE_OPENCV
  return 0;
}
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);
  if (argc != 3) {
    LOG(ERROR) << "Usage: compute_image_mean input_leveldb output_file";
    return 1;
  }

  leveldb::DB* db;
  leveldb::Options options;
  options.create_if_missing = false;

  LOG(INFO) << "Opening leveldb " << argv[1];
  leveldb::Status status = leveldb::DB::Open(
      options, argv[1], &db);
  CHECK(status.ok()) << "Failed to open leveldb " << argv[1];

  leveldb::ReadOptions read_options;
  read_options.fill_cache = false;
  leveldb::Iterator* it = db->NewIterator(read_options);
  it->SeekToFirst();
  Datum datum;
  BlobProto sum_blob;
  int count = 0;
  datum.ParseFromString(it->value().ToString());
  sum_blob.set_num(1);
  sum_blob.set_channels(datum.channels());
  sum_blob.set_height(datum.height());
  sum_blob.set_width(datum.width());
  const int data_size = datum.channels() * datum.height() * datum.width();
  int size_in_datum = std::max<int>(datum.data().size(),
                                    datum.float_data_size());
  for (int i = 0; i < size_in_datum; ++i) {
    sum_blob.add_data(0.);
  }
  LOG(INFO) << "Starting Iteration";
  for (it->SeekToFirst(); it->Valid(); it->Next()) {
    // just a dummy operation
    datum.ParseFromString(it->value().ToString());
    const string& data = datum.data();
    size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size());
    CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
        size_in_datum;
    if (data.size() != 0) {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
      }
    } else {
      for (int i = 0; i < size_in_datum; ++i) {
        sum_blob.set_data(i, sum_blob.data(i) +
            static_cast<float>(datum.float_data(i)));
      }
    }
    ++count;
    if (count % 10000 == 0) {
      LOG(ERROR) << "Processed " << count << " files.";
    }
  }
  if (count % 10000 != 0) {
    LOG(ERROR) << "Processed " << count << " files.";
  }
  for (int i = 0; i < sum_blob.data_size(); ++i) {
    sum_blob.set_data(i, sum_blob.data(i) / count);
  }
  // Write to disk
  LOG(INFO) << "Write to " << argv[2];
  WriteProtoToBinaryFile(sum_blob, argv[2]);

  delete db;
  return 0;
}
Beispiel #21
0
void DataTransformer<Dtype>::Transform(const Datum& datum,
                                       Dtype* transformed_data) {
  const string& data = datum.data();
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  const int crop_size = param_.crop_size();
  const Dtype scale = param_.scale();
  const bool do_mirror = param_.mirror() && Rand(2);
  const bool has_mean_file = param_.has_mean_file();
  const bool has_uint8 = data.size() > 0;
  const bool has_mean_values = mean_values_.size() > 0;
  // mask_size is defaulted to 0 in caffe/proto/caffe.proto
  const int mask_size = param_.mask_size();
  // mask_freq is defaulted to 1 in 3 in caffe/proto/caffe.proto
  const int mask_freq = param_.mask_freq();

  CHECK_GT(datum_channels, 0);
  CHECK_GE(datum_height, crop_size);
  CHECK_GE(datum_width, crop_size);

  Dtype* mean = NULL;
  if (has_mean_file) {
    CHECK_EQ(datum_channels, data_mean_.channels());
    CHECK_EQ(datum_height, data_mean_.height());
    CHECK_EQ(datum_width, data_mean_.width());
    mean = data_mean_.mutable_cpu_data();
  }
  if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
     "Specify either 1 mean_value or as many as channels: " << datum_channels;
    if (datum_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < datum_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }

  int height = datum_height;
  int width = datum_width;

  int h_off = 0;
  int w_off = 0;
  if (crop_size) {
    height = crop_size;
    width = crop_size;
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {
      h_off = Rand(datum_height - crop_size + 1);
      w_off = Rand(datum_width - crop_size + 1);
    } else {
      h_off = (datum_height - crop_size) / 2;
      w_off = (datum_width - crop_size) / 2;
    }
  }

  // initialize masking offsets to be same as cropping offsets
  // so that there is no conflict
  bool masking = (phase_ == TRAIN) && (mask_size > 0) && (Rand(mask_freq) == 0);
  int h_mask_start = h_off;
  int w_mask_start = w_off;
  if (masking) {
    int h_effective = datum_height;
    int w_effective = datum_width;
    if (crop_size) { h_effective = w_effective = crop_size; }
    CHECK_GE(h_effective, mask_size);
    CHECK_GE(w_effective, mask_size);
    h_mask_start += Rand(h_effective-mask_size+1);
    w_mask_start += Rand(w_effective-mask_size+1);
  }
  int h_mask_end = h_mask_start + mask_size;
  int w_mask_end = w_mask_start + mask_size;

  Dtype datum_element;
  int top_index, data_index;
  for (int c = 0; c < datum_channels; ++c) {
    for (int h = 0; h < height; ++h) {
      for (int w = 0; w < width; ++w) {
        data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
        if (do_mirror) {
          top_index = (c * height + h) * width + (width - 1 - w);
        } else {
          top_index = (c * height + h) * width + w;
        }
        if (has_uint8) {
          datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
        } else {
          datum_element = datum.float_data(data_index);
        }
        if (has_mean_file) {
          transformed_data[top_index] =
            (datum_element - mean[data_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (datum_element - mean_values_[c]) * scale;
          } else {
            transformed_data[top_index] = datum_element * scale;
          }
        }
        if (masking) {
          if ((h > h_mask_start) && (w > w_mask_start) &&
              (h < h_mask_end) && (w < w_mask_end)) {
            transformed_data[top_index] = 0;
          }
        }
      }
    }
  }
}
Beispiel #22
0
void DataTransformer<Dtype>::Transform(const Datum& datum,
                                       Dtype* transformed_data) {
  const string& data = datum.data();
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  const int crop_size = param_.crop_size();
  const Dtype scale = param_.scale();
  const bool do_mirror = param_.mirror() && Rand(2);
  const bool has_mean_file = param_.has_mean_file();
  const bool has_uint8 = data.size() > 0;
  const bool has_mean_values = mean_values_.size() > 0;

  CHECK_GT(datum_channels, 0);
  CHECK_GE(datum_height, crop_size);
  CHECK_GE(datum_width, crop_size);

  Dtype* mean = NULL;
  if (has_mean_file) {
    CHECK_EQ(datum_channels, data_mean_.channels());
    CHECK_EQ(datum_height, data_mean_.height());
    CHECK_EQ(datum_width, data_mean_.width());
    mean = data_mean_.mutable_cpu_data();
  }
  if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
     "Specify either 1 mean_value or as many as channels: " << datum_channels;
    if (datum_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < datum_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }

  int height = datum_height;
  int width = datum_width;

  int h_off = 0;
  int w_off = 0;
  if (crop_size) {
    height = crop_size;
    width = crop_size;
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {
      h_off = Rand(datum_height - crop_size + 1);
      w_off = Rand(datum_width - crop_size + 1);
    } else {
      h_off = (datum_height - crop_size) / 2;
      w_off = (datum_width - crop_size) / 2;
    }
  }

  Dtype datum_element;
  int top_index, data_index;
  for (int c = 0; c < datum_channels; ++c) {
    for (int h = 0; h < height; ++h) {
      for (int w = 0; w < width; ++w) {
        data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
        if (do_mirror) {
          top_index = (c * height + h) * width + (width - 1 - w);
        } else {
          top_index = (c * height + h) * width + w;
        }
        if (has_uint8) {
          datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
        } else {
          datum_element = datum.float_data(data_index);
        }
        if (has_mean_file) {
          transformed_data[top_index] =
            (datum_element - mean[data_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (datum_element - mean_values_[c]) * scale;
          } else {
            transformed_data[top_index] = datum_element * scale;
          }
        }
      }
    }
  }
}
	void* DataLayerPrefetch(void* layer_pointer) {
		CHECK(layer_pointer);
		DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer);
		CHECK(layer);
		Datum datum;
		CHECK(layer->prefetch_data_);
		Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); //数据
		Dtype* top_label;                                            //标签
		if (layer->output_labels_) {
			top_label = layer->prefetch_label_->mutable_cpu_data();
		}
		const Dtype scale = layer->layer_param_.data_param().scale();
		const int batch_size = layer->layer_param_.data_param().batch_size();
		const int crop_size = layer->layer_param_.data_param().crop_size();
		const bool mirror = layer->layer_param_.data_param().mirror();

		if (mirror && crop_size == 0) {//当前实现需要同时设置mirror和cropsize
			LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
				<< "set at the same time.";
		}
		// datum scales
		const int channels = layer->datum_channels_;
		const int height = layer->datum_height_;
		const int width = layer->datum_width_;
		const int size = layer->datum_size_;
		const Dtype* mean = layer->data_mean_.cpu_data();
		
		for (int item_id = 0; item_id < batch_size; ++item_id) {
			//每一批数据的数量是batchsize,一个循环拉取一张

			// get a blob
			CHECK(layer->iter_);
			CHECK(layer->iter_->Valid());
			datum.ParseFromString(layer->iter_->value().ToString());//利用迭代器拉取下一批数据
			const string& data = datum.data();

			int label_blob_channels = layer->prefetch_label_->channels();
			int label_data_dim = datum.label_size();
			CHECK_EQ(layer->prefetch_label_->channels(), datum.label_size()) << "label size is NOT the same.";
			
			if (crop_size) {//如果需要裁剪  
				CHECK(data.size()) << "Image cropping only support uint8 data";
				int h_off, w_off;
				// We only do random crop when we do training.
				//只是在训练阶段做随机裁剪 
				if (layer->phase_ == Caffe::TRAIN) {
					h_off = layer->PrefetchRand() % (height - crop_size);
					w_off = layer->PrefetchRand() % (width - crop_size);
				} else {//测试阶段固定裁剪
					h_off = (height - crop_size) / 2;
					w_off = (width - crop_size) / 2;
				}
				//怎么感觉下面两种情况的代码是一样的? 
				if (mirror && layer->PrefetchRand() % 2) {
					// Copy mirrored version
					for (int c = 0; c < channels; ++c) {
						for (int h = 0; h < crop_size; ++h) {
							for (int w = 0; w < crop_size; ++w) {
								int top_index = ((item_id * channels + c) * crop_size + h)
									* crop_size + (crop_size - 1 - w);
								int data_index = (c * height + h + h_off) * width + w + w_off;
								Dtype datum_element =
									static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
								top_data[top_index] = (datum_element - mean[data_index]) * scale;
							}
						}
					}
				} else {//如果不需要裁剪  
					// Normal copy
					//我们优先考虑data(),然后float_data() 
					for (int c = 0; c < channels; ++c) {
						for (int h = 0; h < crop_size; ++h) {
							for (int w = 0; w < crop_size; ++w) {
								int top_index = ((item_id * channels + c) * crop_size + h)
									* crop_size + w;
								int data_index = (c * height + h + h_off) * width + w + w_off;
								Dtype datum_element =
									static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
								top_data[top_index] = (datum_element - mean[data_index]) * scale;
							}
						}
					}
				}
			} else {
				// we will prefer to use data() first, and then try float_data()
				if (data.size()) {
					for (int j = 0; j < size; ++j) {
						Dtype datum_element =
							static_cast<Dtype>(static_cast<uint8_t>(data[j]));
						top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
					}
				} else {
					for (int j = 0; j < size; ++j) {
						top_data[item_id * size + j] =
							(datum.float_data(j) - mean[j]) * scale;
					}
				}
			}

		
			if (g_item_id++ < 5)
			{
				int label_size = datum.label_size();	
				int image_label = 0;
				for (int j = 0; j < label_size; ++j) {
					if (datum.label(j) == 1)
					{
						image_label = j;
						break;
					}
				}	
				
				char strImgRawDataFile[255] = "";
				sprintf(strImgRawDataFile, "caffe_%s_%05d_%d%s", "train", item_id, image_label, ".txt");
				ofstream fout_image_raw_data(strImgRawDataFile);

				for (int h = 0; h < height; ++h) {
					for (int w = 0; w < width; ++w) {
						int pixel_index = h * height + w;
						Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[pixel_index]));

						char strHexByte[3] = "";
						sprintf(strHexByte, "%02X", (unsigned char)datum_element);
						fout_image_raw_data<<" "<<strHexByte;
					}
					
					fout_image_raw_data<<endl;
				}
				
				fout_image_raw_data<<endl;
				for (int j = 0; j < label_size; ++j) {
					fout_image_raw_data<<datum.label(j);
				}	

				fout_image_raw_data.close();
			}
		
			if (layer->output_labels_) {
				int label_size = datum.label_size();				
				for (int j = 0; j < label_size; ++j) {
					top_label[item_id * label_size + j] = datum.label(j);
				}				
				//top_label[item_id] = datum.label();
			}
			
			// go to the next iter
			layer->iter_->Next();
			if (!layer->iter_->Valid()) {
				// We have reached the end. Restart from the first.
				DLOG(INFO) << "Restarting data prefetching from start.";
				layer->iter_->SeekToFirst();
			}
		}

		return static_cast<void*>(NULL);
	}