void expectNoFallbacks(Net& net) { // Check if all the layers are supported with current backend and target. // Some layers might be fused so their timings equal to zero. std::vector<double> timings; net.getPerfProfile(timings); std::vector<String> names = net.getLayerNames(); CV_Assert(names.size() == timings.size()); for (int i = 0; i < names.size(); ++i) { Ptr<dnn::Layer> l = net.getLayer(net.getLayerId(names[i])); bool fused = !timings[i]; if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused) CV_Error(Error::StsNotImplemented, "Layer [" + l->name + "] of type [" + l->type + "] is expected to has backend implementation"); } }
int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); parser.about("Use this script to run object detection deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { parser.printMessage(); return 0; } confThreshold = parser.get<float>("thr"); float scale = parser.get<float>("scale"); Scalar mean = parser.get<Scalar>("mean"); bool swapRB = parser.get<bool>("rgb"); int inpWidth = parser.get<int>("width"); int inpHeight = parser.get<int>("height"); // Open file with classes names. if (parser.has("classes")) { std::string file = parser.get<String>("classes"); std::ifstream ifs(file.c_str()); if (!ifs.is_open()) CV_Error(Error::StsError, "File " + file + " not found"); std::string line; while (std::getline(ifs, line)) { classes.push_back(line); } } // Load a model. CV_Assert(parser.has("model")); Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework")); net.setPreferableBackend(parser.get<int>("backend")); net.setPreferableTarget(parser.get<int>("target")); // Create a window static const std::string kWinName = "Deep learning object detection in OpenCV"; namedWindow(kWinName, WINDOW_NORMAL); int initialConf = (int)(confThreshold * 100); createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback); // Open a video file or an image file or a camera stream. VideoCapture cap; if (parser.has("input")) cap.open(parser.get<String>("input")); else cap.open(0); // Process frames. Mat frame, blob; while (waitKey(1) < 0) { cap >> frame; if (frame.empty()) { waitKey(); break; } // Create a 4D blob from a frame. Size inpSize(inpWidth > 0 ? inpWidth : frame.cols, inpHeight > 0 ? inpHeight : frame.rows); blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); // Run a model. net.setInput(blob); if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { resize(frame, frame, inpSize); Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f); net.setInput(imInfo, "im_info"); } std::vector<Mat> outs; net.forward(outs, getOutputsNames(net)); postprocess(frame, outs, net); // Put efficiency information. std::vector<double> layersTimes; double freq = getTickFrequency() / 1000; double t = net.getPerfProfile(layersTimes) / freq; std::string label = format("Inference time: %.2f ms", t); putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); imshow(kWinName, frame); } return 0; }