// https://github.com/richzhang/colorization TEST(Reproducibility_Colorization, Accuracy) { const float l1 = 1e-5; const float lInf = 3e-3; Mat inp = blobFromNPY(_tf("colorization_inp.npy")); Mat ref = blobFromNPY(_tf("colorization_out.npy")); Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy")); const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false); const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false); Net net = readNetFromCaffe(proto, model); net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel); net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606)); net.setInput(inp); Mat out = net.forward(); normAssert(out, ref, "", l1, lInf); }
void expectNoFallbacks(Net& net) { // Check if all the layers are supported with current backend and target. // Some layers might be fused so their timings equal to zero. std::vector<double> timings; net.getPerfProfile(timings); std::vector<String> names = net.getLayerNames(); CV_Assert(names.size() == timings.size()); for (int i = 0; i < names.size(); ++i) { Ptr<dnn::Layer> l = net.getLayer(net.getLayerId(names[i])); bool fused = !timings[i]; if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused) CV_Error(Error::StsNotImplemented, "Layer [" + l->name + "] of type [" + l->type + "] is expected to has backend implementation"); } }
int main(int argc, char **argv) { string imageFileName; // Take arguments from commmand line if (argc < 2) { cout << "Please input the greyscale image filename." << endl; cout << "Usage example: ./colorizeImage.out greyscaleImage.png" << endl; return 1; } imageFileName = argv[1]; Mat img = imread(imageFileName); if (img.empty()) { cout << "Can't read image from file: " << imageFileName << endl; return 2; } string protoFile = "./models/colorization_deploy_v2.prototxt"; string weightsFile = "./models/colorization_release_v2.caffemodel"; //string weightsFile = "./models/colorization_release_v2_norebal.caffemodel"; double t = (double) cv::getTickCount(); // fixed input size for the pretrained network const int W_in = 224; const int H_in = 224; Net net = dnn::readNetFromCaffe(protoFile, weightsFile); // setup additional layers: int sz[] = {2, 313, 1, 1}; const Mat pts_in_hull(4, sz, CV_32F, hull_pts); Ptr<dnn::Layer> class8_ab = net.getLayer("class8_ab"); class8_ab->blobs.push_back(pts_in_hull); Ptr<dnn::Layer> conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, Scalar(2.606))); // extract L channel and subtract mean Mat lab, L, input; img.convertTo(img, CV_32F, 1.0/255); cvtColor(img, lab, COLOR_BGR2Lab); extractChannel(lab, L, 0); resize(L, input, Size(W_in, H_in)); input -= 50; // run the L channel through the network Mat inputBlob = blobFromImage(input); net.setInput(inputBlob); Mat result = net.forward(); // retrieve the calculated a,b channels from the network output Size siz(result.size[2], result.size[3]); Mat a = Mat(siz, CV_32F, result.ptr(0,0)); Mat b = Mat(siz, CV_32F, result.ptr(0,1)); resize(a, a, img.size()); resize(b, b, img.size()); // merge, and convert back to BGR Mat color, chn[] = {L, a, b}; merge(chn, 3, lab); cvtColor(lab, color, COLOR_Lab2BGR); t = ((double)cv::getTickCount() - t)/cv::getTickFrequency(); cout << "Time taken : " << t << " secs" << endl; string str = imageFileName; str.replace(str.end()-4, str.end(), ""); str = str+"_colorized.png"; color = color*255; color.convertTo(color, CV_8U); imwrite(str, color); cout << "Colorized image saved as " << str << endl; return 0; }
int main(int argc, char **argv) { const string about = "This sample demonstrates recoloring grayscale images with dnn.\n" "This program is based on:\n" " http://richzhang.github.io/colorization\n" " https://github.com/richzhang/colorization\n" "Download caffemodel and prototxt files:\n" " http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2.caffemodel\n" " https://raw.githubusercontent.com/richzhang/colorization/master/colorization/models/colorization_deploy_v2.prototxt\n"; const string keys = "{ h help | | print this help message }" "{ proto | colorization_deploy_v2.prototxt | model configuration }" "{ model | colorization_release_v2.caffemodel | model weights }" "{ image | space_shuttle.jpg | path to image file }" "{ opencl | | enable OpenCL }"; CommandLineParser parser(argc, argv, keys); parser.about(about); if (parser.has("help")) { parser.printMessage(); return 0; } string modelTxt = samples::findFile(parser.get<string>("proto")); string modelBin = samples::findFile(parser.get<string>("model")); string imageFile = samples::findFile(parser.get<string>("image")); bool useOpenCL = parser.has("opencl"); if (!parser.check()) { parser.printErrors(); return 1; } Mat img = imread(imageFile); if (img.empty()) { cout << "Can't read image from file: " << imageFile << endl; return 2; } // fixed input size for the pretrained network const int W_in = 224; const int H_in = 224; Net net = dnn::readNetFromCaffe(modelTxt, modelBin); if (useOpenCL) net.setPreferableTarget(DNN_TARGET_OPENCL); // setup additional layers: int sz[] = {2, 313, 1, 1}; const Mat pts_in_hull(4, sz, CV_32F, hull_pts); Ptr<dnn::Layer> class8_ab = net.getLayer("class8_ab"); class8_ab->blobs.push_back(pts_in_hull); Ptr<dnn::Layer> conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, Scalar(2.606))); // extract L channel and subtract mean Mat lab, L, input; img.convertTo(img, CV_32F, 1.0/255); cvtColor(img, lab, COLOR_BGR2Lab); extractChannel(lab, L, 0); resize(L, input, Size(W_in, H_in)); input -= 50; // run the L channel through the network Mat inputBlob = blobFromImage(input); net.setInput(inputBlob); Mat result = net.forward(); // retrieve the calculated a,b channels from the network output Size siz(result.size[2], result.size[3]); Mat a = Mat(siz, CV_32F, result.ptr(0,0)); Mat b = Mat(siz, CV_32F, result.ptr(0,1)); resize(a, a, img.size()); resize(b, b, img.size()); // merge, and convert back to BGR Mat color, chn[] = {L, a, b}; merge(chn, 3, lab); cvtColor(lab, color, COLOR_Lab2BGR); imshow("color", color); imshow("original", img); waitKey(); return 0; }
void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); opencv_onnx::GraphProto graph_proto = model_proto.graph(); std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto); // List of internal blobs shapes. std::map<std::string, MatShape> outShapes; // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes. for (int i = 0; i < graph_proto.input_size(); ++i) { opencv_onnx::ValueInfoProto valueInfoProto = graph_proto.input(i); CV_Assert(valueInfoProto.has_type()); opencv_onnx::TypeProto typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); opencv_onnx::TensorShapeProto tensorShape = tensor.shape(); MatShape inpShape(tensorShape.dim_size()); for (int j = 0; j < inpShape.size(); ++j) { inpShape[j] = tensorShape.dim(j).dim_value(); } outShapes[valueInfoProto.name()] = inpShape; } std::string framework_name; if (model_proto.has_producer_name()) { framework_name = model_proto.producer_name(); } // create map with network inputs (without const blobs) std::map<std::string, LayerInfo> layer_id; std::map<std::string, LayerInfo>::iterator layerId; std::map<std::string, MatShape>::iterator shapeIt; // fill map: push layer name, layer id and output id std::vector<String> netInputs; for (int j = 0; j < graph_proto.input_size(); j++) { const std::string& name = graph_proto.input(j).name(); if (constBlobs.find(name) == constBlobs.end()) { netInputs.push_back(name); layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } dstNet.setInputsNames(netInputs); int layersSize = graph_proto.node_size(); LayerParams layerParams; opencv_onnx::NodeProto node_proto; for(int li = 0; li < layersSize; li++) { node_proto = graph_proto.node(li); layerParams = getLayerParams(node_proto); CV_Assert(node_proto.output_size() >= 1); layerParams.name = node_proto.output(0); std::string layer_type = node_proto.op_type(); layerParams.type = layer_type; if (layer_type == "MaxPool") { layerParams.type = "Pooling"; layerParams.set("pool", "MAX"); layerParams.set("ceil_mode", isCeilMode(layerParams)); } else if (layer_type == "AveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); layerParams.set("ceil_mode", isCeilMode(layerParams)); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); } else if (layer_type == "GlobalAveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); layerParams.set("global_pooling", true); } else if (layer_type == "Add" || layer_type == "Sum") { if (layer_id.find(node_proto.input(1)) == layer_id.end()) { Mat blob = getBlob(node_proto, constBlobs, 1); blob = blob.reshape(1, 1); if (blob.total() == 1) { layerParams.type = "Power"; layerParams.set("shift", blob.at<float>(0)); } else { layerParams.type = "Scale"; layerParams.set("bias_term", true); layerParams.blobs.push_back(blob); } } else { layerParams.type = "Eltwise"; } } else if (layer_type == "Sub") { Mat blob = getBlob(node_proto, constBlobs, 1); if (blob.total() == 1) { layerParams.type = "Power"; layerParams.set("shift", -blob.at<float>(0)); } else { layerParams.type = "Scale"; layerParams.set("has_bias", true); layerParams.blobs.push_back(-1.0f * blob.reshape(1, 1)); } } else if (layer_type == "Div") { Mat blob = getBlob(node_proto, constBlobs, 1); CV_Assert_N(blob.type() == CV_32F, blob.total()); if (blob.total() == 1) { layerParams.set("scale", 1.0f / blob.at<float>(0)); layerParams.type = "Power"; } else { layerParams.type = "Scale"; divide(1.0, blob, blob); layerParams.blobs.push_back(blob); layerParams.set("bias_term", false); } } else if (layer_type == "Constant") { CV_Assert(node_proto.input_size() == 0); CV_Assert(layerParams.blobs.size() == 1); constBlobs.insert(std::make_pair(layerParams.name, layerParams.blobs[0])); continue; } else if (layer_type == "ImageScaler") { const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f; layerParams.erase("scale"); if (layerParams.has("bias")) { layerParams.type = "Scale"; layerParams.blobs.push_back( Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); layerParams.set("bias_term", true); Mat bias(1, layerParams.get("bias").size(), CV_32FC1); for (int j = 0; j < bias.total(); j++) { bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j); } layerParams.blobs.push_back(bias); layerParams.erase("bias"); } else { layerParams.set("scale", scale); layerParams.type = "Power"; } } else if (layer_type == "LeakyRelu") { layerParams.type = "ReLU"; replaceLayerParam(layerParams, "alpha", "negative_slope"); } else if (layer_type == "LRN") { replaceLayerParam(layerParams, "size", "local_size"); } else if (layer_type == "BatchNormalization") { if (node_proto.input_size() != 5) CV_Error(Error::StsNotImplemented, "Expected input, scale, bias, mean and var"); layerParams.type = "BatchNorm"; replaceLayerParam(layerParams, "epsilon", "eps"); replaceLayerParam(layerParams, "spatial", "use_global_stats"); Mat meanData = getBlob(node_proto, constBlobs, 3); Mat stdData = getBlob(node_proto, constBlobs, 4); layerParams.blobs.push_back(meanData); layerParams.blobs.push_back(stdData); if (!node_proto.input(1).empty()) { layerParams.set("has_weight", true); layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 1)); // weightData } else { layerParams.set("has_weight", false); } if (!node_proto.input(2).empty()) { layerParams.set("has_bias", true); layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 2)); // biasData } else { layerParams.set("has_bias", false); } } else if (layer_type == "Gemm") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "InnerProduct"; Mat weights = getBlob(node_proto, constBlobs, 1); int ind_num_out = 0; if (layerParams.has("transB") && !layerParams.get<int>("transB")) { transpose(weights, weights); ind_num_out = 1; } layerParams.blobs.push_back(weights); if (node_proto.input_size() == 3) { Mat bias = getBlob(node_proto, constBlobs, 2); layerParams.blobs.push_back(bias); } layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "MatMul") { CV_Assert(node_proto.input_size() == 2); layerParams.type = "InnerProduct"; Mat blob = getBlob(node_proto, constBlobs, 1); layerParams.blobs.push_back(blob.t()); layerParams.set("bias_term", false); layerParams.set("num_output", layerParams.blobs[0].size[0]); } else if (layer_type == "Mul") { CV_Assert(node_proto.input_size() == 2); if (layer_id.find(node_proto.input(1)) == layer_id.end()) { Mat blob = getBlob(node_proto, constBlobs, 1); blob = blob.reshape(1, 1); if (blob.total() == 1) { layerParams.set("scale", blob.at<float>(0)); layerParams.type = "Power"; } else { layerParams.blobs.push_back(blob); layerParams.type = "Scale"; } } else { layerParams.type = "Eltwise"; layerParams.set("operation", "prod"); } } else if (layer_type == "Conv") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Convolution"; for (int j = 1; j < node_proto.input_size(); j++) { layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } layerParams.set("num_output", layerParams.blobs[0].size[0]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "ConvTranspose") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Deconvolution"; for (int j = 1; j < node_proto.input_size(); j++) { layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } layerParams.set("num_output", layerParams.blobs[0].size[1]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "Transpose") { layerParams.type = "Permute"; replaceLayerParam(layerParams, "perm", "order"); } else if (layer_type == "Unsqueeze") { CV_Assert(node_proto.input_size() == 1); Mat input = getBlob(node_proto, constBlobs, 0); DictValue axes = layerParams.get("axes"); std::vector<int> dims; for (int j = 0; j < input.dims; j++) { dims.push_back(input.size[j]); } CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); for (int j = 0; j < axes.size(); j++) { dims.insert(dims.begin() + axes.getIntValue(j), 1); } Mat out = input.reshape(0, dims); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } else if (layer_type == "Reshape") { CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); if (node_proto.input_size() == 2) { Mat blob = getBlob(node_proto, constBlobs, 1); CV_Assert(blob.type() == CV_32SC1); if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, constBlobs, 0); Mat out = input.reshape(0, static_cast<std::vector<int> >(blob)); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } layerParams.set("dim", DictValue::arrayInt<int*>( blob.ptr<int>(), blob.total() )); } else { DictValue shape = layerParams.get("shape"); std::vector<int> dim; for (int j = 0; j < shape.size(); j++) { dim.push_back(shape.getIntValue(j)); } if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, constBlobs, 0); Mat out = input.reshape(0, dim); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } replaceLayerParam(layerParams, "shape", "dim"); } } else if (layer_type == "Pad") { layerParams.type = "Padding"; } else if (layer_type == "Shape") { CV_Assert(node_proto.input_size() == 1); shapeIt = outShapes.find(node_proto.input(0)); CV_Assert(shapeIt != outShapes.end()); MatShape inpShape = shapeIt->second; Mat shapeMat(inpShape.size(), 1, CV_32S); for (int j = 0; j < inpShape.size(); ++j) shapeMat.at<int>(j) = inpShape[j]; shapeMat.dims = 1; constBlobs.insert(std::make_pair(layerParams.name, shapeMat)); continue; } else if (layer_type == "Gather") { CV_Assert(node_proto.input_size() == 2); CV_Assert(layerParams.has("axis")); Mat input = getBlob(node_proto, constBlobs, 0); Mat indexMat = getBlob(node_proto, constBlobs, 1); CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); int index = indexMat.at<int>(0); int axis = layerParams.get<int>("axis"); std::vector<cv::Range> ranges(input.dims, Range::all()); ranges[axis] = Range(index, index + 1); Mat out = input(ranges); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } else if (layer_type == "Concat") { bool hasVariableInps = false; for (int i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { hasVariableInps = true; break; } } if (!hasVariableInps) { std::vector<Mat> inputs(node_proto.input_size()), concatenated; for (size_t i = 0; i < inputs.size(); ++i) { inputs[i] = getBlob(node_proto, constBlobs, i); } Ptr<Layer> concat = ConcatLayer::create(layerParams); runLayer(concat, inputs, concatenated); CV_Assert(concatenated.size() == 1); constBlobs.insert(std::make_pair(layerParams.name, concatenated[0])); continue; } } else { for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } } int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0))); std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes; for (int j = 0; j < node_proto.input_size(); j++) { layerId = layer_id.find(node_proto.input(j)); if (layerId != layer_id.end()) { dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); // Collect input shapes. shapeIt = outShapes.find(node_proto.input(j)); CV_Assert(shapeIt != outShapes.end()); layerInpShapes.push_back(shapeIt->second); } } // Compute shape of output blob for this layer. Ptr<Layer> layer = dstNet.getLayer(id); layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); CV_Assert(!layerOutShapes.empty()); outShapes[layerParams.name] = layerOutShapes[0]; } }
int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); parser.about("Use this script to run object detection deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { parser.printMessage(); return 0; } confThreshold = parser.get<float>("thr"); float scale = parser.get<float>("scale"); Scalar mean = parser.get<Scalar>("mean"); bool swapRB = parser.get<bool>("rgb"); int inpWidth = parser.get<int>("width"); int inpHeight = parser.get<int>("height"); // Open file with classes names. if (parser.has("classes")) { std::string file = parser.get<String>("classes"); std::ifstream ifs(file.c_str()); if (!ifs.is_open()) CV_Error(Error::StsError, "File " + file + " not found"); std::string line; while (std::getline(ifs, line)) { classes.push_back(line); } } // Load a model. CV_Assert(parser.has("model")); Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework")); net.setPreferableBackend(parser.get<int>("backend")); net.setPreferableTarget(parser.get<int>("target")); // Create a window static const std::string kWinName = "Deep learning object detection in OpenCV"; namedWindow(kWinName, WINDOW_NORMAL); int initialConf = (int)(confThreshold * 100); createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback); // Open a video file or an image file or a camera stream. VideoCapture cap; if (parser.has("input")) cap.open(parser.get<String>("input")); else cap.open(0); // Process frames. Mat frame, blob; while (waitKey(1) < 0) { cap >> frame; if (frame.empty()) { waitKey(); break; } // Create a 4D blob from a frame. Size inpSize(inpWidth > 0 ? inpWidth : frame.cols, inpHeight > 0 ? inpHeight : frame.rows); blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); // Run a model. net.setInput(blob); if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { resize(frame, frame, inpSize); Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f); net.setInput(imInfo, "im_info"); } std::vector<Mat> outs; net.forward(outs, getOutputsNames(net)); postprocess(frame, outs, net); // Put efficiency information. std::vector<double> layersTimes; double freq = getTickFrequency() / 1000; double t = net.getPerfProfile(layersTimes) / freq; std::string label = format("Inference time: %.2f ms", t); putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); imshow(kWinName, frame); } return 0; }
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) { static std::vector<int> outLayers = net.getUnconnectedOutLayers(); static std::string outLayerType = net.getLayer(outLayers[0])->type; if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] CV_Assert(outs.size() == 1); float* data = (float*)outs[0].data; for (size_t i = 0; i < outs[0].total(); i += 7) { float confidence = data[i + 2]; if (confidence > confThreshold) { int left = (int)data[i + 3]; int top = (int)data[i + 4]; int right = (int)data[i + 5]; int bottom = (int)data[i + 6]; int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. drawPred(classId, confidence, left, top, right, bottom, frame); } } } else if (outLayerType == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] CV_Assert(outs.size() == 1); float* data = (float*)outs[0].data; for (size_t i = 0; i < outs[0].total(); i += 7) { float confidence = data[i + 2]; if (confidence > confThreshold) { int left = (int)(data[i + 3] * frame.cols); int top = (int)(data[i + 4] * frame.rows); int right = (int)(data[i + 5] * frame.cols); int bottom = (int)(data[i + 6] * frame.rows); int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. drawPred(classId, confidence, left, top, right, bottom, frame); } } } else if (outLayerType == "Region") { std::vector<int> classIds; std::vector<float> confidences; std::vector<Rect> boxes; for (size_t i = 0; i < outs.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of // detected objects and C is a number of classes + 4 where the first 4 // numbers are [center_x, center_y, width, height] float* data = (float*)outs[i].data; for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) { Mat scores = outs[i].row(j).colRange(5, outs[i].cols); Point classIdPoint; double confidence; minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > confThreshold) { int centerX = (int)(data[0] * frame.cols); int centerY = (int)(data[1] * frame.rows); int width = (int)(data[2] * frame.cols); int height = (int)(data[3] * frame.rows); int left = centerX - width / 2; int top = centerY - height / 2; classIds.push_back(classIdPoint.x); confidences.push_back((float)confidence); boxes.push_back(Rect(left, top, width, height)); } } } std::vector<int> indices; NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices); for (size_t i = 0; i < indices.size(); ++i) { int idx = indices[i]; Rect box = boxes[idx]; drawPred(classIds[idx], confidences[idx], box.x, box.y, box.x + box.width, box.y + box.height, frame); } } else CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); }
void postprocess(Mat& frame, const Mat& out, Net& net) { static std::vector<int> outLayers = net.getUnconnectedOutLayers(); static std::string outLayerType = net.getLayer(outLayers[0])->type; float* data = (float*)out.data; if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] for (size_t i = 0; i < out.total(); i += 7) { float confidence = data[i + 2]; if (confidence > confThreshold) { int left = (int)data[i + 3]; int top = (int)data[i + 4]; int right = (int)data[i + 5]; int bottom = (int)data[i + 6]; int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. drawPred(classId, confidence, left, top, right, bottom, frame); } } } else if (outLayerType == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] for (size_t i = 0; i < out.total(); i += 7) { float confidence = data[i + 2]; if (confidence > confThreshold) { int left = (int)(data[i + 3] * frame.cols); int top = (int)(data[i + 4] * frame.rows); int right = (int)(data[i + 5] * frame.cols); int bottom = (int)(data[i + 6] * frame.rows); int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. drawPred(classId, confidence, left, top, right, bottom, frame); } } } else if (outLayerType == "Region") { // Network produces output blob with a shape NxC where N is a number of // detected objects and C is a number of classes + 4 where the first 4 // numbers are [center_x, center_y, width, height] for (int i = 0; i < out.rows; ++i, data += out.cols) { Mat confidences = out.row(i).colRange(5, out.cols); Point classIdPoint; double confidence; minMaxLoc(confidences, 0, &confidence, 0, &classIdPoint); if (confidence > confThreshold) { int classId = classIdPoint.x; int centerX = (int)(data[0] * frame.cols); int centerY = (int)(data[1] * frame.rows); int width = (int)(data[2] * frame.cols); int height = (int)(data[3] * frame.rows); int left = centerX - width / 2; int top = centerY - height / 2; drawPred(classId, (float)confidence, left, top, left + width, top + height, frame); } } } else CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); }
int main(int argc, char **argv) { string videoFileName; // Take arguments from commmand line if (argc < 2) { cout << "Please input the greyscale video filename." << endl; cout << "Usage example: ./colorizeVideo.out greyscaleVideo.mp4" << endl; return 1; } videoFileName = argv[1]; cv::VideoCapture cap(videoFileName); if (!cap.isOpened()) { cerr << "Unable to open video" << endl; return 1; } string protoFile = "./models/colorization_deploy_v2.prototxt"; string weightsFile = "./models/colorization_release_v2.caffemodel"; //string weightsFile = "./models/colorization_release_v2_norebal.caffemodel"; Mat frame, frameCopy; int frameWidth = cap.get(CAP_PROP_FRAME_WIDTH); int frameHeight = cap.get(CAP_PROP_FRAME_HEIGHT); string str = videoFileName; str.replace(str.end()-4, str.end(), ""); string outVideoFileName = str+"_colorized.avi"; VideoWriter video(outVideoFileName, VideoWriter::fourcc('M','J','P','G'), 60, Size(frameWidth,frameHeight)); // fixed input size for the pretrained network const int W_in = 224; const int H_in = 224; Net net = dnn::readNetFromCaffe(protoFile, weightsFile); // setup additional layers: int sz[] = {2, 313, 1, 1}; const Mat pts_in_hull(4, sz, CV_32F, hull_pts); Ptr<dnn::Layer> class8_ab = net.getLayer("class8_ab"); class8_ab->blobs.push_back(pts_in_hull); Ptr<dnn::Layer> conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, Scalar(2.606))); for(;;) { cap >> frame; if (frame.empty()) break; frameCopy = frame.clone(); // extract L channel and subtract mean Mat lab, L, input; frame.convertTo(frame, CV_32F, 1.0/255); cvtColor(frame, lab, COLOR_BGR2Lab); extractChannel(lab, L, 0); resize(L, input, Size(W_in, H_in)); input -= 50; // run the L channel through the network Mat inputBlob = blobFromImage(input); net.setInput(inputBlob); Mat result = net.forward(); // retrieve the calculated a,b channels from the network output Size siz(result.size[2], result.size[3]); Mat a = Mat(siz, CV_32F, result.ptr(0,0)); Mat b = Mat(siz, CV_32F, result.ptr(0,1)); resize(a, a, frame.size()); resize(b, b, frame.size()); // merge, and convert back to BGR Mat coloredFrame, chn[] = {L, a, b}; merge(chn, 3, lab); cvtColor(lab, coloredFrame, COLOR_Lab2BGR); coloredFrame = coloredFrame*255; coloredFrame.convertTo(coloredFrame, CV_8U); video.write(coloredFrame); } cout << "Colorized video saved as " << outVideoFileName << endl << "Done !!!" << endl; cap.release(); video.release(); return 0; }
int main(int argc, char **argv) { CommandLineParser parser(argc, argv, "{ help | false | print this help message }" "{ proto | colorization_deploy_v2.prototxt | model configuration }" "{ model | colorization_release_v2.caffemodel | model weights }" "{ image | space_shuttle.jpg | path to image file }" "{ opencl | false | enable OpenCL }" ); String modelTxt = parser.get<string>("proto"); String modelBin = parser.get<string>("model"); String imageFile = parser.get<String>("image"); if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty()) { cout << "A sample app to demonstrate recoloring grayscale images with dnn." << endl; parser.printMessage(); return 0; } // fixed input size for the pretrained network int W_in = 224; int H_in = 224; Net net = dnn::readNetFromCaffe(modelTxt, modelBin); // setup additional layers: int sz[] = {2, 313, 1, 1}; Mat pts_in_hull(4, sz, CV_32F, hull_pts); Ptr<dnn::Layer> class8_ab = net.getLayer("class8_ab"); class8_ab->blobs.push_back(pts_in_hull); Ptr<dnn::Layer> conv8_313_rh = net.getLayer("conv8_313_rh"); conv8_313_rh->blobs.push_back(Mat(1, 313, CV_32F, 2.606f)); if (parser.get<bool>("opencl")) { net.setPreferableTarget(DNN_TARGET_OPENCL); } Mat img = imread(imageFile); if (img.empty()) { std::cerr << "Can't read image from the file: " << imageFile << std::endl; exit(-1); } // extract L channel and subtract mean Mat lab, L, input; img.convertTo(img, CV_32F, 1.0/255); cvtColor(img, lab, COLOR_BGR2Lab); extractChannel(lab, L, 0); resize(L, input, Size(W_in, H_in)); input -= 50; // run the L channel through the network Mat inputBlob = blobFromImage(input); net.setInput(inputBlob); Mat result = net.forward("class8_ab"); // retrieve the calculated a,b channels from the network output Size siz(result.size[2], result.size[3]); Mat a = Mat(siz, CV_32F, result.ptr(0,0)); Mat b = Mat(siz, CV_32F, result.ptr(0,1)); resize(a, a, img.size()); resize(b, b, img.size()); // merge, and convert back to bgr Mat color, chn[] = {L, a, b}; merge(chn, 3, lab); cvtColor(lab, color, COLOR_Lab2BGR); namedWindow("color", WINDOW_NORMAL); namedWindow("original", WINDOW_NORMAL); imshow("color", color); imshow("original", img); waitKey(); return 0; }