void convert_to_polar_data_transformer::transform( const void * data, void * data_transformed, neuron_data_type::input_type type, const layer_configuration_specific& original_config, unsigned int sample_id) { if (type != neuron_data_type::type_byte) throw neural_network_exception("convert_to_polar_data_transformer is implemented for data stored as bytes only"); if (original_config.dimension_sizes.size() != 2) throw neural_network_exception((boost::format("convert_to_polar_data_transformer is processing 2D data only, data is passed with number of dimensions %1%") % original_config.dimension_sizes.size()).str()); if (original_config.dimension_sizes != input_window_sizes) throw neural_network_exception("convert_to_polar_data_transformer: input window size mismatch between creation and actual transform"); unsigned int original_neuron_count_per_feature_map = original_config.get_neuron_count_per_feature_map(); unsigned int transformed_neuron_count_per_feature_map = get_transformed_configuration(original_config).get_neuron_count_per_feature_map(); for(unsigned int feature_map_id = 0; feature_map_id < original_config.feature_map_count; ++feature_map_id) { cv::Mat1b original_image(static_cast<int>(original_config.dimension_sizes[1]), static_cast<int>(original_config.dimension_sizes[0]), const_cast<unsigned char *>(static_cast<const unsigned char *>(data)) + (original_neuron_count_per_feature_map * feature_map_id)); cv::Mat1b dest_image(static_cast<int>(output_window_sizes[1]), static_cast<int>(output_window_sizes[0]), static_cast<unsigned char *>(data_transformed) + (transformed_neuron_count_per_feature_map * feature_map_id)); // Should try INTER_CUBIC and INTER_LANCZOS4 as well cv::remap(original_image, dest_image, map_x, map_y, cv::INTER_LINEAR, cv::BORDER_CONSTANT, border_value); } }
void extract_2d_data_transformer::transform( const void * input_data, void * output_data, neuron_data_type::input_type type, const layer_configuration_specific& original_config) { if (type != neuron_data_type::type_byte) throw neural_network_exception("extract_2d_data_transformer is implemented for data stored as bytes only"); if (original_config.dimension_sizes.size() != 2) throw neural_network_exception((boost::format("extract_2d_data_transformer is processing 2d data only, data is passed with number of dimensions %1%") % original_config.dimension_sizes.size()).str()); if (original_config.feature_map_count != 1) throw neural_network_exception("extract_2d_data_transformer is implemented for 1 feature map data only"); cv::Mat1b original_image(static_cast<int>(original_config.dimension_sizes[1]), static_cast<int>(original_config.dimension_sizes[0]), const_cast<unsigned char *>(static_cast<const unsigned char *>(input_data))); int window_top_left_x = (original_config.dimension_sizes[0] - input_window_width) / 2; int window_bottom_right_x = window_top_left_x + input_window_width; int window_top_left_y = (original_config.dimension_sizes[1] - input_window_height) / 2; int window_bottom_right_y = window_top_left_y + input_window_height; cv::Mat1b cropped_image = original_image.rowRange(window_top_left_y, window_bottom_right_y).colRange(window_top_left_x, window_bottom_right_x); cv::Mat1b dest_image(static_cast<int>(output_window_height), static_cast<int>(output_window_width), static_cast<unsigned char *>(output_data)); cv::resize(cropped_image, dest_image, dest_image.size()); }
int main(const int argc, const char *argv[]) { if (argc != 4) { std::cout << "ERROR!!! Specify all arguments\n"; std::cout << "Usage:\n"; std::cout << "\tVITDatasetCreator DIR_WITH_ETALONS ALPHABET_FILE OUTPUT_DIR\n"; return 1; } const char *kInputDirectory = argv[1]; const char *kAlphabetFile = argv[2]; const char *kOutputDir = argv[3]; auto alphabet = ReadAlphabetFromFile(kAlphabetFile); for (auto elem : alphabet.GetMap()) { fs::path directory_path = fs::path(kOutputDir) / fs::path(elem.second.name); if (!fs::is_directory(fs::status(directory_path)) && !fs::create_directory(directory_path)) { std::cout << "Can't create directory: " << directory_path << std::endl; return 1; } } std::string process_dir = kInputDirectory; auto img_paths = DirFiles(process_dir, ".jpg"); std::cout << "There are " << img_paths.size() << " images\n"; int processed = 0; for (auto impath : img_paths) { if (processed % 100 == 0) { std::cout << "Processed " << processed << " files of " << img_paths.size() << std::endl; } auto etalon_path = fs::path(impath).replace_extension("txt"); DataInfo dinfo; try { dinfo = ReadEtalonsFromFile(etalon_path.string()); if (0 == dinfo.pinfo.size()) continue; } catch(std::exception &e) { std::cout << "Skip " << impath << ", reason: " << e.what() << std::endl; continue; } //Export file to filesystem cv::Mat original_image = cv::imread(impath.string(), CV_LOAD_IMAGE_GRAYSCALE); for (auto p : dinfo.pinfo) { //std::cout << p.to_string() << std::endl; for (int i = 0; i < p.symbols.size(); i++) { std::string dir_name = alphabet.GetElemName(p.symbols[i].text); if (!dir_name.length()) { std::cout << "Unknown symbol: " << p.symbols[i].text << std::endl; continue; } fs::path output_dir = fs::path(kOutputDir) / fs::path(dir_name); auto save_image = [&output_dir](const cv::Mat& image) { std::string image_name = GetGUID() + ".bmp"; cv::Mat output_image; cv::resize(image, output_image, cv::Size(10, 12), 0, 0, CV_INTER_LANCZOS4); cv::imwrite((output_dir / fs::path(image_name)).c_str(), output_image); }; auto equalize = [](const cv::Mat& image) { cv::Mat result; cv::equalizeHist(image, result); return result; }; auto get_symbol_by_roi = [&original_image](cv::Rect r, int left_pad, int right_pad, int top_pad, int bottom_pad) { cv::Rect new_roi = r; int width = r.width; int height = r.height; new_roi.x = std::max(0, new_roi.x - width * left_pad / 100); new_roi.y = std::max(0, new_roi.y - height * top_pad / 100); int new_x2 = std::min(original_image.cols, r.x + width * (1 + right_pad / 100)); int new_y2 = std::min(original_image.rows, r.y + height * (1 + bottom_pad / 100)); new_roi.width = new_x2 - new_roi.x; new_roi.height = new_y2 - new_roi.y; return original_image(new_roi); }; cv::Rect symbol_roi = p.symbols[i].rect; //todo: rewrite using cycles save_image(equalize(get_symbol_by_roi(symbol_roi, 0, 0, 0, 0))); save_image(equalize(get_symbol_by_roi(symbol_roi, 20, 0, 0, 0))); save_image(equalize(get_symbol_by_roi(symbol_roi, 0, 20, 0, 0))); save_image(equalize(get_symbol_by_roi(symbol_roi, 0, 0, 20, 0))); save_image(equalize(get_symbol_by_roi(symbol_roi, 0, 0, 0, 20))); save_image(equalize(get_symbol_by_roi(symbol_roi, 20, 20, 20, 20))); save_image(get_symbol_by_roi(symbol_roi, 0, 0, 0, 0)); save_image(get_symbol_by_roi(symbol_roi, 20, 0, 0, 0)); save_image(get_symbol_by_roi(symbol_roi, 0, 20, 0, 0)); save_image(get_symbol_by_roi(symbol_roi, 0, 0, 20, 0)); save_image(get_symbol_by_roi(symbol_roi, 0, 0, 0, 20)); save_image(get_symbol_by_roi(symbol_roi, 20, 20, 20, 20)); } } } exit(1); }
void extract_data_transformer::transform( const float * data, float * data_transformed, const layer_configuration_specific& original_config, unsigned int sample_id) { if (input_window_sizes == output_window_sizes) { const std::vector<unsigned int>& dimension_sizes = original_config.dimension_sizes; if (dimension_sizes.size() != input_window_sizes.size()) throw neural_network_exception((boost::format("extract_data_transformer is created with %1%-dimensions, data has %2% dimensions") % input_window_sizes.size() % dimension_sizes.size()).str()); std::vector<unsigned int> src_offset_list; for(unsigned int i = 0; i < dimension_sizes.size(); ++i) { if (dimension_sizes[i] < output_window_sizes[i]) throw neural_network_exception((boost::format("Dimension %1% of original config has %2% size while minimum is %3%") % i % dimension_sizes[i] % output_window_sizes[i]).str()); src_offset_list.push_back((dimension_sizes[i] - output_window_sizes[i]) / 2); } std::vector<unsigned int> dst_pos_list(dimension_sizes.size(), 0); const float * src_begin = data; float * dst = data_transformed; for(unsigned int feature_map_id = 0; feature_map_id < original_config.feature_map_count; ++feature_map_id) { while (true) { unsigned int offset = dst_pos_list.back() + src_offset_list.back(); for(int i = static_cast<int>(dimension_sizes.size()) - 2; i >= 0; --i) offset = offset * dimension_sizes[i] + dst_pos_list[i] + src_offset_list[i]; memcpy(dst, src_begin + offset, output_window_sizes[0] * sizeof(float)); dst += output_window_sizes[0]; bool inc = false; for(int i = 1; i < output_window_sizes.size(); ++i) { dst_pos_list[i]++; if (dst_pos_list[i] < output_window_sizes[i]) { inc = true; break; } else dst_pos_list[i] = 0; } if (!inc) break; } src_begin += original_config.get_neuron_count_per_feature_map(); } } else { if (original_config.dimension_sizes.size() != 2) throw neural_network_exception((boost::format("Resizing extract_data_transformer is processing 2D data only, data is passed with number of dimensions %1%") % original_config.dimension_sizes.size()).str()); int window_top_left_x = (original_config.dimension_sizes[0] - input_window_sizes[0]) / 2; int window_bottom_right_x = window_top_left_x + input_window_sizes[0]; int window_top_left_y = (original_config.dimension_sizes[1] - input_window_sizes[1]) / 2; int window_bottom_right_y = window_top_left_y + input_window_sizes[1]; unsigned int original_neuron_count_per_feature_map = original_config.get_neuron_count_per_feature_map(); unsigned int transformed_neuron_count_per_feature_map = get_transformed_configuration(original_config).get_neuron_count_per_feature_map(); for(unsigned int feature_map_id = 0; feature_map_id < original_config.feature_map_count; ++feature_map_id) { cv::Mat1f original_image(static_cast<int>(original_config.dimension_sizes[1]), static_cast<int>(original_config.dimension_sizes[0]), const_cast<float *>(data) + (original_neuron_count_per_feature_map * feature_map_id)); cv::Mat1f cropped_image = original_image.rowRange(window_top_left_y, window_bottom_right_y).colRange(window_top_left_x, window_bottom_right_x); cv::Mat1f dest_image(static_cast<int>(output_window_sizes[1]), static_cast<int>(output_window_sizes[0]), data_transformed + (transformed_neuron_count_per_feature_map * feature_map_id)); cv::resize(cropped_image, dest_image, dest_image.size()); } } }
int main( int argc, char * argv[] ) { const char * WINDOW_NAME = "Original Image vs. Box Filter vs. Gaussian"; const int QUIT_KEY_CODE = 113; int box_filter_width = 3; float sigma = 1.0; std::string filename = "cameraman.tif"; ImageRAII original_image( filename ); CvSize image_dimensions = { original_image.image->width, original_image.image->height }; ImageRAII box_filter_image( cvCreateImage( image_dimensions, original_image.image->depth, 3 ) ); ImageRAII gaussian_image( cvCreateImage( image_dimensions, original_image.image->depth, 3 ) ); ImageRAII combined_image( cvCreateImage( cvSize( original_image.image->width * 3, original_image.image->height ), original_image.image->depth, 3 ) ); MatrixRAII box_filter = makeBoxFilter( box_filter_width ); MatrixRAII gaussian_filter_x = make1DGaussianFilter( sigma ); MatrixRAII gaussian_filter_y = cvCreateMat( sigma * 5, 1, CV_64FC1 ); cvTranspose( gaussian_filter_x.matrix, gaussian_filter_y.matrix ); std::vector<ImageRAII> original_image_channels( 3 ); std::vector<ImageRAII> box_filter_channels( 3 ); std::vector<ImageRAII> gaussian_filter_channels( 3 ); std::vector<ImageRAII> gaussian_filter_2_channels( 3 ); // initialize image channel vectors for( int i = 0; i < original_image.image->nChannels; i++ ) { original_image_channels[i].image = cvCreateImage( image_dimensions, original_image.image->depth, 1 ); box_filter_channels[i].image = cvCreateImage( image_dimensions, original_image.image->depth, 1 ); gaussian_filter_channels[i].image = cvCreateImage( image_dimensions, original_image.image->depth, 1 ); gaussian_filter_2_channels[i].image = cvCreateImage( image_dimensions, original_image.image->depth, 1 ); } // split image channels cvSplit( original_image.image, original_image_channels[0].image, original_image_channels[1].image, original_image_channels[2].image, NULL ); // apply filters for( int i = 0; i < original_image.image->nChannels; i++ ) { cvFilter2D( original_image_channels[i].image, box_filter_channels[i].image, box_filter.matrix ); cvFilter2D( original_image_channels[i].image, gaussian_filter_channels[i].image, gaussian_filter_x.matrix ); cvFilter2D( gaussian_filter_channels[i].image, gaussian_filter_2_channels[i].image, gaussian_filter_y.matrix ); } // Merge channels back cvMerge( box_filter_channels[0].image, box_filter_channels[1].image, box_filter_channels[2].image, NULL, box_filter_image.image ); cvMerge( gaussian_filter_2_channels[0].image, gaussian_filter_2_channels[1].image, gaussian_filter_2_channels[2].image, NULL, gaussian_image.image ); // Combine images side by side int step = original_image.image->widthStep; int step_destination = combined_image.image->widthStep; int nChan = original_image.image->nChannels; char *buf = combined_image.image->imageData; char *original_buf = original_image.image->imageData; char *box_filter_buf = box_filter_image.image->imageData; char *gaussian_filter_buf = gaussian_image.image->imageData; for( int row = 0; row < original_image.image->width; row++ ) { for( int col = 0; col < original_image.image->height; col++ ) { int width_adjust = 0; //original image // blue *( buf + row * step_destination + nChan * col + width_adjust ) = *( original_buf + row * step + nChan * col ); // green *( buf + row * step_destination + nChan * col + 1 + width_adjust ) = *( original_buf + row * step + nChan * col ); // red *( buf + row * step_destination + nChan * col + 2 + width_adjust ) = *( original_buf + row * step + nChan * col ); // box filter width_adjust = original_image.image->height * nChan; *( buf + row * step_destination + nChan * col + width_adjust ) = *( box_filter_buf + row * step + nChan * col ); *( buf + row * step_destination + nChan * col + 1 + width_adjust ) = *( box_filter_buf + row * step + nChan * col ); *( buf + row * step_destination + nChan * col + 2 + width_adjust ) = *( box_filter_buf + row * step + nChan * col ); // gaussian filter width_adjust = original_image.image->height * 2 * nChan; *( buf + row * step_destination + nChan * col + width_adjust ) = *( gaussian_filter_buf + row * step + nChan * col ); *( buf + row * step_destination + nChan * col + 1 + width_adjust ) = *( gaussian_filter_buf + row * step + nChan * col ); *( buf + row * step_destination + nChan * col + 2 + width_adjust ) = *( gaussian_filter_buf + row * step + nChan * col ); } } // create windows cvNamedWindow( WINDOW_NAME, CV_WINDOW_AUTOSIZE ); cvShowImage( WINDOW_NAME, combined_image.image ); // wait for keyboard input int key_code = 0; while( key_code != QUIT_KEY_CODE ) { key_code = cvWaitKey( 0 ); } return 0; }