bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, const bool is_color, const string& encoding, JDatum* datum) { cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); if (cv_img.data) { if (encoding.size()) { if ((cv_img.channels() == 3) && is_color == 1 && !height && !width && matchExt(filename, encoding)) { return ReadFileToDatum(filename, label, datum); } vector<uchar> buf; cv::imencode("." + encoding, cv_img, buf); datum->SetData(string(reinterpret_cast<char*>(&buf[0]), buf.size())); datum->SetLabel(label); datum->SetEncoded(true); return true; } CVMatToDatum(cv_img, datum); datum->SetLabel(label); return true; } else { return false; } }
/** * Get the prediction response for the given image. * @param originImage image, which should be predicted * @param resultLayer the name of the result layer * @param dataLayer the name of the data layer * @param predictions the predictions */ void CaffeClassifier::predict(std::vector<cv::Mat> originImages, std::vector<int> labels, string resultLayer, string dataLayer, vector<short> & predictions) { vector<Datum> vecDatum; for (int i = 0; i < originImages.size(); i++) { cv::Mat originImage = originImages[i]; // resize image Mat image; if (originImage.cols != imageSize.width || originImage.rows != imageSize.height) { resize(originImage, image, imageSize); } else image = originImage; // check channels if (channels != image.channels()) { cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl; exit(1); } // mat to datum Datum datum; CVMatToDatum(image, &datum); datum.set_label(labels[i]); vecDatum.push_back(datum); image.release(); } // get the data layer const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>> (caffeNet->layer_by_name(dataLayer)); // push new image data memDataLayer->AddDatumVector(vecDatum); //memDataLayer->ExactNumBottomBlobs(); // do forward pass vector<Blob<float>*> inputVec; caffeNet->Forward(inputVec); // get results const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(resultLayer); int batchSize = featureBlob->num(); int dimFeatures = featureBlob->count() / batchSize; // std::cout << "Batch size is " << batchSize << "/ dim features is " << dimFeatures << std::endl; // get output from each channel for (int n = 0; n < batchSize; ++n) { float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(n); if (sizeof(fs) > 0) { vector<float> feature_vector(fs, fs + dimFeatures); predictions.insert(predictions.end(), feature_vector.begin(), feature_vector.end()); } } // release data // for (Datum d : vecDatum) { // d.release_data(); // } }
TEST_F(IOTest, TestCVMatToDatum) { string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; cv::Mat cv_img = ReadImageToCVMat(filename); Datum datum; CVMatToDatum(cv_img, &datum); EXPECT_EQ(datum.channels(), 3); EXPECT_EQ(datum.height(), 360); EXPECT_EQ(datum.width(), 480); }
bool DecodeDatum(Datum* datum, bool is_color) { if (datum->encoded()) { cv::Mat cv_img = DecodeDatumToCVMat((*datum), is_color); CVMatToDatum(cv_img, datum); return true; } else { return false; } }
// If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum // If Datum is not encoded will do nothing bool DecodeDatumNative(Datum* datum) { if (datum->encoded()) { cv::Mat cv_img = DecodeDatumToCVMatNative((*datum)); CVMatToDatum(cv_img, datum); return true; } else { return false; } }
TEST_F(IOTest, TestCVMatToDatumReference) { string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; cv::Mat cv_img = ReadImageToCVMat(filename); Datum datum; CVMatToDatum(cv_img, &datum); Datum datum_ref; ReadImageToDatumReference(filename, 0, 0, 0, true, &datum_ref); EXPECT_EQ(datum.channels(), datum_ref.channels()); EXPECT_EQ(datum.height(), datum_ref.height()); EXPECT_EQ(datum.width(), datum_ref.width()); EXPECT_EQ(datum.data().size(), datum_ref.data().size()); const string& data = datum.data(); const string& data_ref = datum_ref.data(); for (int i = 0; i < datum.data().size(); ++i) { EXPECT_TRUE(data[i] == data_ref[i]); } }
void CaffeClassifier::predictHeatMap(cv::Mat& inputImage, int label, string predictionLayer, string dataLayer, cv::Mat& heatMap) { const int IMAGE_SIZE = 227; const int BATCH_SIZE = 64; heatMap = cv::Mat(IMAGE_SIZE, IMAGE_SIZE, CV_32FC1, Scalar(26.932154)); const int STEP_SIZE = 9; const int START_OFFSET = STEP_SIZE / 2; const int FILLER_SIZE = 50; cv::Scalar mean = cv::mean(inputImage); std::vector<Point> middlePoints; for (int i = START_OFFSET; i < IMAGE_SIZE; i += STEP_SIZE) { for (int j = START_OFFSET; j < IMAGE_SIZE; j += STEP_SIZE) { middlePoints.push_back(Point(i, j)); } } for (int i = 0; i < middlePoints.size(); i += BATCH_SIZE) { std::cout << (i * 100) / middlePoints.size() << "% " << std::flush; vector<Datum> vecDatum; for (int j = 0; j < BATCH_SIZE; ++j) { // do not go over the last middle point int index = min(static_cast<int>(middlePoints.size() - 1), i + j); Point p = middlePoints[index]; cv::Mat image = inputImage.clone(); // cv::Rect rect(Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)), Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE))); // cv::Mat subMat = image(rect); // cv::Scalar mean = cv::mean(subMat); circle(image, p, FILLER_SIZE, mean, CV_FILLED); // rectangle(image, // Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)), // Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE)), // Scalar(0, 0, 0), // CV_FILLED); std::ostringstream o; o << "/home/knub/Repositories/video-classification/nets/activity_recognition/caffenet/"; o << index; o << "_heat.png"; cv::imwrite(o.str(), image); // check channels if (channels != image.channels()) { cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl; exit(1); } // mat to datum Datum datum; CVMatToDatum(image, &datum); vecDatum.push_back(datum); image.release(); } // get the data layer const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>>(caffeNet->layer_by_name(dataLayer)); // push new image data memDataLayer->AddDatumVector(vecDatum); // do forward pass vector<Blob<float>*> inputVec; caffeNet->Forward(inputVec); // get results const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(predictionLayer); int dimFeatures = featureBlob->count() / BATCH_SIZE; // 101 assert(dimFeatures == 101); // get output from each input image for (int j = 0; j < BATCH_SIZE; ++j) { int index = min(static_cast<int>(middlePoints.size() - 1), i + j); Point p = middlePoints[index]; // std::cout << "Channels: " << featureBlob->channels() << ", Count: " << featureBlob->count() << ", Width: " << featureBlob->width() << ", Height: " << featureBlob->height() << std::endl; // featureBlob = 64 x 101 matrix float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(j); vector<float> featureVector(fs, fs + dimFeatures); // std::vector<float>::iterator result = std::max_element(featureVector.begin(), featureVector.end()); // int predicted = result - featureVector.begin(); // std::cout << "Predicted: " << predicted << ", Actual: " << label << std::endl; // assert(predicted == label); float confidence = featureVector[label]; rectangle(heatMap, Point(p.x - START_OFFSET, p.y - START_OFFSET), Point(p.x + START_OFFSET, p.y + START_OFFSET), Scalar(confidence), CV_FILLED); } } }