float CaffeFeatExtractor<Dtype>::extractBatch_singleFeat_1D(vector<cv::Mat> &images, int new_batch_size, vector< vector<Dtype> > &features)
{

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    vector<int> labels(images.size(), 0);

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size

    if (memory_data_layer->batch_size()!=new_batch_size)
    {
        if (images.size()%new_batch_size==0)
        {
            memory_data_layer->set_batch_size(new_batch_size);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
        else
        {
            if (images.size()%memory_data_layer->batch_size()==0)
            {
                cout << "WARNING: image number is not multiple of requested batch size,leaving the old one." << endl;
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            } else
            {
                cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
                memory_data_layer->set_batch_size(1);
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            }

        }

    } else
    {
        if (images.size()%memory_data_layer->batch_size()!=0)
        {
            cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
            memory_data_layer->set_batch_size(1);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
    }

    int num_batches = images.size()/new_batch_size;

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    for (int i=0; i<images.size(); i++)
    {
        if (images[i].rows != mean_height || images[i].cols != mean_height)
        {
            if (images[i].rows > mean_height || images[i].cols > mean_height)
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
            }
            else
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
            }
        }
    }

    memory_data_layer->AddMatVector(images,labels);

    size_t num_features = blob_names.size();
    if (num_features!=1)
    {
        cout<< "Error! The list of features to be extracted has not size one!" << endl;
        return -1;
    }

    // Run network and retrieve features!

    std::vector<Blob<Dtype>*> results;

    for (int b=0; b<num_batches; ++b)
    {
        results = feature_extraction_net->Forward();

        const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[0]);

        int batch_size = feature_blob->num();

        int feat_dim = feature_blob->count() / batch_size; // should be equal to: channels*width*height
        if (feat_dim!=feature_blob->channels())
        {
            cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl;
        }

        for (int i=0; i<batch_size; ++i)
        {
            features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(i), feature_blob->mutable_cpu_data() + feature_blob->offset(i) + feat_dim));
        }

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        float msecPerImage = msecTotal/(float)images.size();

        return msecPerImage;
    }
    else
    {
        return 0;
    }
}
float CaffeFeatExtractor<Dtype>::extract_multipleFeat_1D(cv::Mat &image, vector< vector<Dtype> > &features)
{

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    int label = 0;

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size to 1

    if (memory_data_layer->batch_size()!=1)
    {
        memory_data_layer->set_batch_size(1);
        cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
    }

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    if (image.rows != mean_height || image.cols != mean_height)
    {
        if (image.rows > mean_height || image.cols > mean_height)
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
        }
        else
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
        }
    }

    memory_data_layer->AddMatVector(vector<cv::Mat>(1, image),vector<int>(1,label));

    size_t num_features = blob_names.size();

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results = feature_extraction_net->Forward();

    for (int f = 0; f < num_features; ++f) {

        const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[f]);

        int batch_size = feature_blob->num(); // should be 1
        if (batch_size!=1)
        {
            cout << "Error! Retrieved more than one feature, exiting..." << endl;
            return -1;
        }

        int feat_dim = feature_blob->count(); // should be equal to: count/batch_size=channels*width*height
        if (feat_dim!=feature_blob->channels())
        {
            cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl;
        }

        features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(0), feature_blob->mutable_cpu_data() + feature_blob->offset(0) + feat_dim));

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        return msecTotal;
    }
    else
    {
        return 0;
    }

}
float CaffeFeatExtractor<Dtype>::extractBatch_multipleFeat(vector<cv::Mat> &images, int new_batch_size, vector< Blob<Dtype>* > &features) {

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    vector<int> labels(images.size(), 0);

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size

    if (memory_data_layer->batch_size()!=new_batch_size)
    {
        if (images.size()%new_batch_size==0)
        {
            memory_data_layer->set_batch_size(new_batch_size);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
        else
        {
            if (images.size()%memory_data_layer->batch_size()==0)
            {
                cout << "WARNING: image number is not multiple of requested batch size, leaving the old one..." << endl;
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            } else
            {
                cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl;
                memory_data_layer->set_batch_size(1);
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            }

        }

    } else
    {
        if (images.size()%memory_data_layer->batch_size()!=0)
        {
            cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl;
            memory_data_layer->set_batch_size(1);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
    }

    int num_batches = images.size()/new_batch_size;

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized anisotropically (BILINEAR)
    // if it is downsampled, LANCZOS4 is used for antialiasing

    for (int i=0; i<images.size(); i++)
    {
        if (images[i].rows != mean_height || images[i].cols != mean_height)
        {
            if (images[i].rows > mean_height || images[i].cols > mean_height)
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
            }
            else
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
            }
        }
    }

    memory_data_layer->AddMatVector(images,labels);

    size_t num_features = blob_names.size();

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results;

    for (int b=0; b<num_batches; b++)
    {
        results = feature_extraction_net->Forward();

        for (int i = 0; i < num_features; ++i) {

            const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[i]);

            int batch_size = feature_blob->num();
            int channels = feature_blob->channels();
            int width = feature_blob->width();
            int height = feature_blob->height();

            features.push_back(new Blob<Dtype>(batch_size, channels, height, width));

            features.back()->CopyFrom(*feature_blob);
        }

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        float msecPerImage = msecTotal/(float)images.size();

        return msecPerImage;
    }
    else
    {
        return 0;
    }

}