/** * Get the prediction response for the given image. * @param originImage image, which should be predicted * @param resultLayer the name of the result layer * @param dataLayer the name of the data layer * @param predictions the predictions */ void CaffeClassifier::predict(std::vector<cv::Mat> originImages, std::vector<int> labels, string resultLayer, string dataLayer, vector<short> & predictions) { vector<Datum> vecDatum; for (int i = 0; i < originImages.size(); i++) { cv::Mat originImage = originImages[i]; // resize image Mat image; if (originImage.cols != imageSize.width || originImage.rows != imageSize.height) { resize(originImage, image, imageSize); } else image = originImage; // check channels if (channels != image.channels()) { cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl; exit(1); } // mat to datum Datum datum; CVMatToDatum(image, &datum); datum.set_label(labels[i]); vecDatum.push_back(datum); image.release(); } // get the data layer const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>> (caffeNet->layer_by_name(dataLayer)); // push new image data memDataLayer->AddDatumVector(vecDatum); //memDataLayer->ExactNumBottomBlobs(); // do forward pass vector<Blob<float>*> inputVec; caffeNet->Forward(inputVec); // get results const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(resultLayer); int batchSize = featureBlob->num(); int dimFeatures = featureBlob->count() / batchSize; // std::cout << "Batch size is " << batchSize << "/ dim features is " << dimFeatures << std::endl; // get output from each channel for (int n = 0; n < batchSize; ++n) { float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(n); if (sizeof(fs) > 0) { vector<float> feature_vector(fs, fs + dimFeatures); predictions.insert(predictions.end(), feature_vector.begin(), feature_vector.end()); } } // release data // for (Datum d : vecDatum) { // d.release_data(); // } }
void CaffeClassifier::predictHeatMap(cv::Mat& inputImage, int label, string predictionLayer, string dataLayer, cv::Mat& heatMap) { const int IMAGE_SIZE = 227; const int BATCH_SIZE = 64; heatMap = cv::Mat(IMAGE_SIZE, IMAGE_SIZE, CV_32FC1, Scalar(26.932154)); const int STEP_SIZE = 9; const int START_OFFSET = STEP_SIZE / 2; const int FILLER_SIZE = 50; cv::Scalar mean = cv::mean(inputImage); std::vector<Point> middlePoints; for (int i = START_OFFSET; i < IMAGE_SIZE; i += STEP_SIZE) { for (int j = START_OFFSET; j < IMAGE_SIZE; j += STEP_SIZE) { middlePoints.push_back(Point(i, j)); } } for (int i = 0; i < middlePoints.size(); i += BATCH_SIZE) { std::cout << (i * 100) / middlePoints.size() << "% " << std::flush; vector<Datum> vecDatum; for (int j = 0; j < BATCH_SIZE; ++j) { // do not go over the last middle point int index = min(static_cast<int>(middlePoints.size() - 1), i + j); Point p = middlePoints[index]; cv::Mat image = inputImage.clone(); // cv::Rect rect(Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)), Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE))); // cv::Mat subMat = image(rect); // cv::Scalar mean = cv::mean(subMat); circle(image, p, FILLER_SIZE, mean, CV_FILLED); // rectangle(image, // Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)), // Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE)), // Scalar(0, 0, 0), // CV_FILLED); std::ostringstream o; o << "/home/knub/Repositories/video-classification/nets/activity_recognition/caffenet/"; o << index; o << "_heat.png"; cv::imwrite(o.str(), image); // check channels if (channels != image.channels()) { cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl; exit(1); } // mat to datum Datum datum; CVMatToDatum(image, &datum); vecDatum.push_back(datum); image.release(); } // get the data layer const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>>(caffeNet->layer_by_name(dataLayer)); // push new image data memDataLayer->AddDatumVector(vecDatum); // do forward pass vector<Blob<float>*> inputVec; caffeNet->Forward(inputVec); // get results const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(predictionLayer); int dimFeatures = featureBlob->count() / BATCH_SIZE; // 101 assert(dimFeatures == 101); // get output from each input image for (int j = 0; j < BATCH_SIZE; ++j) { int index = min(static_cast<int>(middlePoints.size() - 1), i + j); Point p = middlePoints[index]; // std::cout << "Channels: " << featureBlob->channels() << ", Count: " << featureBlob->count() << ", Width: " << featureBlob->width() << ", Height: " << featureBlob->height() << std::endl; // featureBlob = 64 x 101 matrix float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(j); vector<float> featureVector(fs, fs + dimFeatures); // std::vector<float>::iterator result = std::max_element(featureVector.begin(), featureVector.end()); // int predicted = result - featureVector.begin(); // std::cout << "Predicted: " << predicted << ", Actual: " << label << std::endl; // assert(predicted == label); float confidence = featureVector[label]; rectangle(heatMap, Point(p.x - START_OFFSET, p.y - START_OFFSET), Point(p.x + START_OFFSET, p.y + START_OFFSET), Scalar(confidence), CV_FILLED); } } }
float CaffeFeatExtractor<Dtype>::extractBatch_singleFeat_1D(vector<cv::Mat> &images, int new_batch_size, vector< vector<Dtype> > &features) { // Set the GPU/CPU mode for Caffe (here in order to be thread-safe) if (gpu_mode) { Caffe::set_mode(Caffe::GPU); Caffe::SetDevice(device_id); } else { Caffe::set_mode(Caffe::CPU); } cudaEvent_t start, stop; if (timing) { cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, NULL); } // Initialize labels to zero vector<int> labels(images.size(), 0); // Get pointer to data layer to set the input caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]); // Set batch size if (memory_data_layer->batch_size()!=new_batch_size) { if (images.size()%new_batch_size==0) { memory_data_layer->set_batch_size(new_batch_size); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } else { if (images.size()%memory_data_layer->batch_size()==0) { cout << "WARNING: image number is not multiple of requested batch size,leaving the old one." << endl; cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } else { cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl; memory_data_layer->set_batch_size(1); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } } } else { if (images.size()%memory_data_layer->batch_size()!=0) { cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl; memory_data_layer->set_batch_size(1); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } } int num_batches = images.size()/new_batch_size; // Input preprocessing // The image passed to AddMatVector must be same size as the mean image // If not, it is resized: // if it is downsampled, an anti-aliasing Gaussian Filter is applied for (int i=0; i<images.size(); i++) { if (images[i].rows != mean_height || images[i].cols != mean_height) { if (images[i].rows > mean_height || images[i].cols > mean_height) { cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4); } else { cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR); } } } memory_data_layer->AddMatVector(images,labels); size_t num_features = blob_names.size(); if (num_features!=1) { cout<< "Error! The list of features to be extracted has not size one!" << endl; return -1; } // Run network and retrieve features! std::vector<Blob<Dtype>*> results; for (int b=0; b<num_batches; ++b) { results = feature_extraction_net->Forward(); const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[0]); int batch_size = feature_blob->num(); int feat_dim = feature_blob->count() / batch_size; // should be equal to: channels*width*height if (feat_dim!=feature_blob->channels()) { cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl; } for (int i=0; i<batch_size; ++i) { features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(i), feature_blob->mutable_cpu_data() + feature_blob->offset(i) + feat_dim)); } } if (timing) { // Record the stop event cudaEventRecord(stop, NULL); // Wait for the stop event to complete cudaEventSynchronize(stop); float msecTotal = 0.0f; cudaEventElapsedTime(&msecTotal, start, stop); float msecPerImage = msecTotal/(float)images.size(); return msecPerImage; } else { return 0; } }
float CaffeFeatExtractor<Dtype>::extract_multipleFeat_1D(cv::Mat &image, vector< vector<Dtype> > &features) { // Set the GPU/CPU mode for Caffe (here in order to be thread-safe) if (gpu_mode) { Caffe::set_mode(Caffe::GPU); Caffe::SetDevice(device_id); } else { Caffe::set_mode(Caffe::CPU); } cudaEvent_t start, stop; if (timing) { cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, NULL); } // Initialize labels to zero int label = 0; // Get pointer to data layer to set the input caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]); // Set batch size to 1 if (memory_data_layer->batch_size()!=1) { memory_data_layer->set_batch_size(1); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } // Input preprocessing // The image passed to AddMatVector must be same size as the mean image // If not, it is resized: // if it is downsampled, an anti-aliasing Gaussian Filter is applied if (image.rows != mean_height || image.cols != mean_height) { if (image.rows > mean_height || image.cols > mean_height) { cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4); } else { cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR); } } memory_data_layer->AddMatVector(vector<cv::Mat>(1, image),vector<int>(1,label)); size_t num_features = blob_names.size(); // Run network and retrieve features! // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc std::vector<Blob<Dtype>*> results = feature_extraction_net->Forward(); for (int f = 0; f < num_features; ++f) { const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[f]); int batch_size = feature_blob->num(); // should be 1 if (batch_size!=1) { cout << "Error! Retrieved more than one feature, exiting..." << endl; return -1; } int feat_dim = feature_blob->count(); // should be equal to: count/batch_size=channels*width*height if (feat_dim!=feature_blob->channels()) { cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl; } features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(0), feature_blob->mutable_cpu_data() + feature_blob->offset(0) + feat_dim)); } if (timing) { // Record the stop event cudaEventRecord(stop, NULL); // Wait for the stop event to complete cudaEventSynchronize(stop); float msecTotal = 0.0f; cudaEventElapsedTime(&msecTotal, start, stop); return msecTotal; } else { return 0; } }
float CaffeFeatExtractor<Dtype>::extractBatch_multipleFeat(vector<cv::Mat> &images, int new_batch_size, vector< Blob<Dtype>* > &features) { // Set the GPU/CPU mode for Caffe (here in order to be thread-safe) if (gpu_mode) { Caffe::set_mode(Caffe::GPU); Caffe::SetDevice(device_id); } else { Caffe::set_mode(Caffe::CPU); } cudaEvent_t start, stop; if (timing) { cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, NULL); } // Initialize labels to zero vector<int> labels(images.size(), 0); // Get pointer to data layer to set the input caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]); // Set batch size if (memory_data_layer->batch_size()!=new_batch_size) { if (images.size()%new_batch_size==0) { memory_data_layer->set_batch_size(new_batch_size); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } else { if (images.size()%memory_data_layer->batch_size()==0) { cout << "WARNING: image number is not multiple of requested batch size, leaving the old one..." << endl; cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } else { cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl; memory_data_layer->set_batch_size(1); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } } } else { if (images.size()%memory_data_layer->batch_size()!=0) { cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl; memory_data_layer->set_batch_size(1); cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl; } } int num_batches = images.size()/new_batch_size; // Input preprocessing // The image passed to AddMatVector must be same size as the mean image // If not, it is resized anisotropically (BILINEAR) // if it is downsampled, LANCZOS4 is used for antialiasing for (int i=0; i<images.size(); i++) { if (images[i].rows != mean_height || images[i].cols != mean_height) { if (images[i].rows > mean_height || images[i].cols > mean_height) { cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4); } else { cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR); } } } memory_data_layer->AddMatVector(images,labels); size_t num_features = blob_names.size(); // Run network and retrieve features! // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc std::vector<Blob<Dtype>*> results; for (int b=0; b<num_batches; b++) { results = feature_extraction_net->Forward(); for (int i = 0; i < num_features; ++i) { const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[i]); int batch_size = feature_blob->num(); int channels = feature_blob->channels(); int width = feature_blob->width(); int height = feature_blob->height(); features.push_back(new Blob<Dtype>(batch_size, channels, height, width)); features.back()->CopyFrom(*feature_blob); } } if (timing) { // Record the stop event cudaEventRecord(stop, NULL); // Wait for the stop event to complete cudaEventSynchronize(stop); float msecTotal = 0.0f; cudaEventElapsedTime(&msecTotal, start, stop); float msecPerImage = msecTotal/(float)images.size(); return msecPerImage; } else { return 0; } }