static void
munkresCost(const dlib::matrix<double> &src, T scale, dlib::matrix<T> &dst /*out*/) {
    ASSERT_require(dst.nr()==src.nr() && dst.nc()==src.nc());
    std::pair<double, double> range = minmax(src);
    if (range.first==range.second) {
        for (long i=0; i<src.nr(); ++i) {
            for (long j=0; j<dst.nc(); ++j)
                dst(i, j) = 0;
        }
    } else {
        for (long i=0; i<src.nr(); ++i) {
            for (long j=0; j<dst.nc(); ++j)
                dst(i, j) = round(((range.second-src(i, j)) / (range.second-range.first)) * scale);
        }
    }
}
static std::pair<T, T>
minmax(const dlib::matrix<T> &matrix) {
    T maxValue = matrix(0, 0);
    T minValue = matrix(0, 0);
    for (long i=0; i<matrix.nr(); ++i) {
        for (long j=0; j<matrix.nc(); ++j) {
            maxValue = std::max(maxValue, matrix(i, j));
            minValue = std::min(minValue, matrix(i, j));
        }
    }
    return std::make_pair(minValue, maxValue);
}