void DataTransformer<Dtype>::Transform(const cv::Mat& img, Blob<Dtype>* transformed_blob) { const int min_side = param_.min_side(); const int min_side_min = param_.min_side_min(); const int min_side_max = param_.min_side_max(); const int crop_size = param_.crop_size(); const int rotation_angle = param_.max_rotation_angle(); const float min_contrast = param_.min_contrast(); const float max_contrast = param_.max_contrast(); const int max_brightness_shift = param_.max_brightness_shift(); const float max_smooth = param_.max_smooth(); const int max_color_shift = param_.max_color_shift(); const float apply_prob = 1.f - param_.apply_probability(); const bool debug_params = param_.debug_params(); // Check dimensions. const int channels = transformed_blob->channels(); const int height = transformed_blob->height(); const int width = transformed_blob->width(); const int num = transformed_blob->num(); const Dtype scale = param_.scale(); const bool has_mean_file = param_.has_mean_file(); const bool has_mean_values = mean_values_.size() > 0; float current_prob; const bool do_rotation = rotation_angle > 0 && phase_ == TRAIN; const bool do_resize_to_min_side = min_side > 0; const bool do_resize_to_min_side_min = min_side_min > 0; const bool do_resize_to_min_side_max = min_side_max > 0; const bool do_mirror = param_.mirror() && phase_ == TRAIN && Rand(2); caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); const bool do_brightness = param_.contrast_brightness_adjustment() && phase_ == TRAIN && current_prob > apply_prob; caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); const bool do_smooth = param_.smooth_filtering() && phase_ == TRAIN && max_smooth > 1 && current_prob > apply_prob; caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); const bool do_color_shift = max_color_shift > 0 && phase_ == TRAIN && current_prob > apply_prob; cv::Mat cv_img = img; int current_angle = 0; if (do_rotation) { current_angle = Rand(rotation_angle*2 + 1) - rotation_angle; if (current_angle) rotate(cv_img, current_angle); } // resizing and crop according to min side, preserving aspect ratio if (do_resize_to_min_side) { resize(cv_img, min_side); //random_crop(cv_img, min_side); } if (do_resize_to_min_side_min && do_resize_to_min_side_max) { //std::cout << min_side_min << " "<<min_side_max<<std::endl; int min_side_length = min_side_min + Rand(min_side_max - min_side_min + 1); resize(cv_img, min_side_length); //crop_center(cv_img, min_side, min_side); //random_crop(cv_img, min_side_length); } // apply color shift if (do_color_shift) { int b = Rand(max_color_shift + 1); int g = Rand(max_color_shift + 1); int r = Rand(max_color_shift + 1); int sign = Rand(2); cv::Mat shiftArr = cv_img.clone(); shiftArr.setTo(cv::Scalar(b,g,r)); if (sign == 1) { cv_img -= shiftArr; } else { cv_img += shiftArr; } } // set contrast and brightness float alpha; int beta; if (do_brightness){ caffe_rng_uniform(1, min_contrast, max_contrast, &alpha); beta = Rand(max_brightness_shift * 2 + 1) - max_brightness_shift; cv_img.convertTo(cv_img, -1, alpha, beta); } // set smoothness int smooth_param = 0; int smooth_type = 0; if (do_smooth) { smooth_type = Rand(4); smooth_param = 1 + 2 * Rand(max_smooth/2); switch (smooth_type) { case 0: cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param, smooth_param), 0); break; case 1: cv::blur(cv_img, cv_img, cv::Size(smooth_param, smooth_param)); break; case 2: cv::medianBlur(cv_img, cv_img, smooth_param); break; case 3: cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param * 2, smooth_param * 2)); break; default: break; } } if (debug_params && phase_ == TRAIN) { LOG(INFO) << "----------------------------------------"; if (do_rotation) { LOG(INFO) << "* parameter for rotation: "; LOG(INFO) << " current rotation angle: " << current_angle; } if (do_brightness) { LOG(INFO) << "* parameter for contrast adjustment: "; LOG(INFO) << " alpha: " << alpha << ", beta: " << beta; } if (do_smooth) { LOG(INFO) << "* parameter for smooth filtering: "; LOG(INFO) << " smooth type: " << smooth_type << ", smooth param: " << smooth_param; } } const int img_channels = cv_img.channels(); const int img_height = cv_img.rows; const int img_width = cv_img.cols; CHECK_GT(img_channels, 0); CHECK_GE(img_height, crop_size); CHECK_GE(img_width, crop_size); CHECK_EQ(channels, img_channels); CHECK_LE(height, img_height); CHECK_LE(width, img_width); CHECK_GE(num, 1); CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; Dtype* mean = NULL; if (has_mean_file) { CHECK_EQ(img_channels, data_mean_.channels()); CHECK_EQ(img_height, data_mean_.height()); CHECK_EQ(img_width, data_mean_.width()); mean = data_mean_.mutable_cpu_data(); } if (has_mean_values) { CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) << "Specify either 1 mean_value or as many as channels: " << img_channels; if (img_channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < img_channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } int h_off = 0; int w_off = 0; cv::Mat cv_cropped_img = cv_img; if (crop_size) { CHECK_EQ(crop_size, height); CHECK_EQ(crop_size, width); // We only do random crop when we do training. if (phase_ == TRAIN) { h_off = Rand(img_height - crop_size + 1); w_off = Rand(img_width - crop_size + 1); } else { h_off = (img_height - crop_size) / 2; w_off = (img_width - crop_size) / 2; } cv::Rect roi(w_off, h_off, crop_size, crop_size); cv_cropped_img = cv_img(roi); } else { //CHECK_EQ(img_height, height); //CHECK_EQ(img_width, width); } CHECK(cv_cropped_img.data); Dtype* transformed_data = transformed_blob->mutable_cpu_data(); int top_index; for (int h = 0; h < height; ++h) { const uchar* ptr = cv_cropped_img.ptr<uchar>(h); int img_index = 0; for (int w = 0; w < width; ++w) { for (int c = 0; c < img_channels; ++c) { if (do_mirror) { top_index = (c * height + h) * width + (width - 1 - w); } else { top_index = (c * height + h) * width + w; } // int top_index = (c * height + h) * width + w; Dtype pixel = static_cast<Dtype>(ptr[img_index++]); if (has_mean_file) { int mean_index = (c * img_height + h_off + h) * img_width + w_off + w; transformed_data[top_index] = (pixel - mean[mean_index]) * scale; } else { if (has_mean_values) { transformed_data[top_index] = (pixel - mean_values_[c]) * scale; } else { transformed_data[top_index] = pixel * scale; } } } } } }
cv::Mat converttoopencvimg<float>( float* singleimage, int height, int width ) { cv::Mat cv_img(height, width, CV_32FC3, singleimage); return cv_img; }
cv::Mat converttoopencvimg<double>( double* singleimage, int height, int width ) { cv::Mat cv_img(height, width, CV_64FC3, singleimage); return cv_img; }
void DenseImageDataLayer<Dtype>::InternalThreadEntry() { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(this->prefetch_data_.count()); CHECK(this->transformed_data_.count()); DenseImageDataParameter dense_image_data_param = this->layer_param_.dense_image_data_param(); const int batch_size = dense_image_data_param.batch_size(); const int new_height = dense_image_data_param.new_height(); const int new_width = dense_image_data_param.new_width(); const int crop_height = dense_image_data_param.crop_height(); const int crop_width = dense_image_data_param.crop_width(); const int crop_size = this->layer_param_.transform_param().crop_size(); const bool is_color = dense_image_data_param.is_color(); string root_folder = dense_image_data_param.root_folder(); // Reshape on single input batches for inputs of varying dimension. if (batch_size == 1 && crop_size == 0 && new_height == 0 && new_width == 0 && crop_height == 0 && crop_width == 0) { cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, 0, 0, is_color); this->prefetch_data_.Reshape(1, cv_img.channels(), cv_img.rows, cv_img.cols); this->transformed_data_.Reshape(1, cv_img.channels(), cv_img.rows, cv_img.cols); this->prefetch_label_.Reshape(1, 1, cv_img.rows, cv_img.cols); this->transformed_label_.Reshape(1, 1, cv_img.rows, cv_img.cols); } Dtype* prefetch_data = this->prefetch_data_.mutable_cpu_data(); Dtype* prefetch_label = this->prefetch_label_.mutable_cpu_data(); // datum scales const int lines_size = lines_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob timer.Start(); CHECK_GT(lines_size, lines_id_); cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; cv::Mat cv_lab = ReadImageToCVMat(root_folder + lines_[lines_id_].second, new_height, new_width, false, true); CHECK(cv_lab.data) << "Could not load " << lines_[lines_id_].second; read_time += timer.MicroSeconds(); timer.Start(); // Apply random horizontal mirror of images if (this->layer_param_.dense_image_data_param().mirror()) { const bool do_mirror = caffe_rng_rand() % 2; if (do_mirror) { cv::flip(cv_img,cv_img,1); cv::flip(cv_lab,cv_lab,1); } } // Apply crop int height = cv_img.rows; int width = cv_img.cols; int h_off = 0; int w_off = 0; if (crop_height>0 && crop_width>0) { h_off = caffe_rng_rand() % (height - crop_height + 1); w_off = caffe_rng_rand() % (width - crop_width + 1); cv::Rect myROI(w_off, h_off, crop_width, crop_height); cv_img = cv_img(myROI); cv_lab = cv_lab(myROI); } // Apply transformations (mirror, crop...) to the image int offset = this->prefetch_data_.offset(item_id); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); // transform label the same way int label_offset = this->prefetch_label_.offset(item_id); this->transformed_label_.set_cpu_data(prefetch_label + label_offset); this->data_transformer_->Transform(cv_lab, &this->transformed_label_, true); CHECK(!this->layer_param_.transform_param().mirror() && this->layer_param_.transform_param().crop_size() == 0) << "FIXME: Any stochastic transformation will break layer due to " << "the need to transform input and label images in the same way"; trans_time += timer.MicroSeconds(); // go to the next iter lines_id_++; if (lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; if (this->layer_param_.dense_image_data_param().shuffle()) { ShuffleImages(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; }