void DynamicProgram<T>::min(Parts& parts, vector2DMat& scores, vector4DMat& Ix, vector4DMat& Iy, vector4DMat& Ik, vector2DMat& rootv, vector2DMat& rooti) {

	// initialize the outputs, preallocate vectors to make them thread safe
	// TODO: better initialisation of Ix, Iy, Ik
	const int nscales = scores.size();
	const int ncomponents = parts.ncomponents();
	Ix.resize(nscales, vector3DMat(ncomponents));
	Iy.resize(nscales, vector3DMat(ncomponents));
	Ik.resize(nscales, vector3DMat(ncomponents));
	rootv.resize(nscales, vectorMat(ncomponents));
	rooti.resize(nscales, vectorMat(ncomponents));

	// for each scale, and each component, update the scores through message passing
	#ifdef _OPENMP
	#pragma omp parallel for
	#endif
	for (int nc = 0; nc < nscales*ncomponents; ++nc) {

		// calculate the inner loop variables from the dual variables
		const int n = floor(nc / ncomponents);
		const int c = nc % ncomponents;

		// allocate the inner loop variables
		Ix[n][c].resize(parts.nparts(c));
		Iy[n][c].resize(parts.nparts(c));
		Ik[n][c].resize(parts.nparts(c));
		vectorMat ncscores(scores[n].size());

		for (int p = parts.nparts(c)-1; p > 0; --p) {

			// get the component part (which may have multiple mixtures associated with it)
			ComponentPart cpart = parts.component(c, p);
			int nmixtures       = cpart.nmixtures();
			Ix[n][c][p].resize(nmixtures);
			Iy[n][c][p].resize(nmixtures);
			Ik[n][c][p].resize(nmixtures);

			// intermediate results for mixtures of this part
			vectorMat scoresp;
			vectorMat Ixp;
			vectorMat Iyp;

			for (int m = 0; m < nmixtures; ++m) {

				// raw score outputs
				Mat score_in, score_dt, Ix_dt, Iy_dt;
				if (cpart.score(ncscores, m).empty()) {
					score_in = cpart.score(scores[n], m);
				} else {
					score_in = cpart.score(ncscores, m);
				}

				// get the anchor position
				Point anchor = cpart.anchor(m);

				// compute the distance transform
				distanceTransform(score_in, cpart.defw(m), anchor, score_dt, Ix_dt, Iy_dt);
				scoresp.push_back(score_dt);
				Ixp.push_back(Ix_dt);
				Iyp.push_back(Iy_dt);
				//cout << score_dt(Range(0,10), Range(0,10)) << endl;

				// calculate a valid region of interest for the scores
				/*
				int X = score_in.cols;
				int Y = score_in.rows;
				int xmin = std::max(std::min(anchor.x, X), 0);
				int ymin = std::max(std::min(anchor.y, Y), 0);
				int xmax = std::min(std::max(anchor.x+X, 0), X);
				int ymax = std::min(std::max(anchor.y+Y, 0), Y);
				int xoff = std::max(-anchor.x,    0);
				int yoff = std::max(-anchor.y,    0);

				// shift the score by the Part's offset from its parent
				Mat scorem = -numeric_limits<T>::infinity() * Mat::ones(score_dt.size(), score_dt.type());
				Mat Ixm    = Mat::zeros(Ix_dt.size(), Ix_dt.type());
				Mat Iym    = Mat::zeros(Iy_dt.size(), Iy_dt.type());
				if (xoff < X && yoff < Y && (ymax - ymin) > 0 && (xmax - xmin) > 0) {
					Mat score_dt_range 	= score_dt(Range(ymin, ymax),         Range(xmin, xmax));
					Mat score_range    	= scorem(Range(yoff, yoff+ymax-ymin), Range(xoff, xoff+xmax-xmin));
					Mat Ix_dt_range 	= Ix_dt(Range(ymin, ymax),            Range(xmin, xmax));
					Mat Ixm_range 		= Ixm(Range(yoff, yoff+ymax-ymin),    Range(xoff, xoff+xmax-xmin));
					Mat Iy_dt_range 	= Iy_dt(Range(ymin, ymax),            Range(xmin, xmax));
					Mat Iym_range 		= Iym(Range(yoff, yoff+ymax-ymin),    Range(xoff, xoff+xmax-xmin));
					score_dt_range.copyTo(score_range);
					Ix_dt_range.copyTo(Ixm_range);
					Iy_dt_range.copyTo(Iym_range);
				}

				// push the scores onto the intermediate vectors
				scoresp.push_back(scorem);
				Ixp.push_back(Ixm);
				Iyp.push_back(Iym);
				*/
			}

			nmixtures = cpart.parent().nmixtures();
			for (int m = 0; m < nmixtures; ++m) {
				vectorMat weighted;
				// weight each of the child scores
				// TODO: More elegant way of handling bias
				for (int mm = 0; mm < cpart.nmixtures(); ++mm) {
					weighted.push_back(scoresp[mm] + cpart.bias(mm)[m]);
				}
				// compute the max over the mixtures
				Mat maxv, maxi;
				reduceMax(weighted, maxv, maxi);

				// choose the best indices
				Mat Ixm, Iym;
				reducePickIndex<int>(Ixp, maxi, Ixm);
				reducePickIndex<int>(Iyp, maxi, Iym);
				Ix[n][c][p][m] = Ixm;
				Iy[n][c][p][m] = Iym;
				Ik[n][c][p][m] = maxi;

				// update the parent's score
				ComponentPart parent = cpart.parent();
				if (parent.score(ncscores,m).empty()) parent.score(scores[n],m).copyTo(parent.score(ncscores,m));
				parent.score(ncscores,m) += maxv;
				//cout << parent.score(ncscores,m)(Range(0,10),Range(0,10)) << endl << endl;
				if (parent.self() == 0) {
					ComponentPart root = parts.component(c);
					//cout << root.score(ncscores,m)(Range(0,10),Range(0,10)) << endl << endl;
				}
				//cout <<parent.self() << endl;
			}
		}
		// add bias to the root score and find the best mixture
		ComponentPart root = parts.component(c);
		//cout << root.self() << endl;
		Mat rncscore = root.score(ncscores,0);
		//cout << rncscore(Range(1,10),Range(1,10)) << endl;
		T bias = root.bias(0)[0];
		vectorMat weighted;
		// weight each of the child scores
		for (int m = 0; m < root.nmixtures(); ++m) {
			weighted.push_back(root.score(ncscores,m) + bias);
		}
		reduceMax(weighted, rootv[n][c], rooti[n][c]);
	}
}
void DynamicProgram<T>::min(Parts& parts, vector2DMat& scores, vector4DMat& Ix, vector4DMat& Iy, vector4DMat& Ik, vector2DMat& rootv, vector2DMat& rooti) {

	// initialize the outputs, preallocate vectors to make them thread safe
	// TODO: better initialisation of Ix, Iy, Ik
	const unsigned int nscales = scores.size();
	const unsigned int ncomponents = parts.ncomponents();
	Ix.resize(nscales, vector3DMat(ncomponents));
	Iy.resize(nscales, vector3DMat(ncomponents));
	Ik.resize(nscales, vector3DMat(ncomponents));
	rootv.resize(nscales, vectorMat(ncomponents));
	rooti.resize(nscales, vectorMat(ncomponents));

	// for each scale, and each component, update the scores through message passing
	#ifdef _OPENMP
	#pragma omp parallel for
	#endif
	for (unsigned int nc = 0; nc < nscales*ncomponents; ++nc) {

		// calculate the inner loop variables from the dual variables
		const unsigned int n = floor(nc / ncomponents);
		const unsigned int c = nc % ncomponents;

		// allocate the inner loop variables
		Ix[n][c].resize(parts.nparts(c));
		Iy[n][c].resize(parts.nparts(c));
		Ik[n][c].resize(parts.nparts(c));
		vectorMat ncscores(scores[n].size());

		for (int p = parts.nparts(c)-1; p > 0; --p) {

			// get the component part (which may have multiple mixtures associated with it)
			ComponentPart cpart = parts.component(c, p);
			const unsigned int nmixtures  = cpart.nmixtures();
			const unsigned int pnmixtures = cpart.parent().nmixtures();
			Ix[n][c][p].resize(pnmixtures);
			Iy[n][c][p].resize(pnmixtures);
			Ik[n][c][p].resize(pnmixtures);

			// intermediate results for mixtures of this part
			vectorMat scoresp;
			vectorMat Ixp;
			vectorMat Iyp;

			for (unsigned int m = 0; m < nmixtures; ++m) {

				// raw score outputs
				Mat_<T> score_in, score_dt;
				Mat_<int> Ix_dt, Iy_dt;
				if (cpart.score(ncscores, m).empty()) {
					score_in = cpart.score(scores[n], m);
				} else {
					score_in = cpart.score(ncscores, m);
				}

				// get the anchor position
				Point anchor = cpart.anchor(m);

				// compute the distance transform
				vectorf w = cpart.defw(m);
				Quadratic fx(-w[0], -w[1]);
				Quadratic fy(-w[2], -w[3]);
				dt_.compute(score_in, fx, fy, anchor, score_dt, Ix_dt, Iy_dt);
				scoresp.push_back(score_dt);
				Ixp.push_back(Ix_dt);
				Iyp.push_back(Iy_dt);
			}

			for (unsigned int m = 0; m < pnmixtures; ++m) {
				vectorMat weighted;
				// weight each of the child scores
				// TODO: More elegant way of handling bias
				for (unsigned int mm = 0; mm < nmixtures; ++mm) {
					weighted.push_back(scoresp[mm] + cpart.bias(mm)[m]);
				}
				// compute the max over the mixtures
				Mat maxv, maxi;
				Math::reduceMax<T>(weighted, maxv, maxi);

				// choose the best indices
				Mat Ixm, Iym;
				Math::reducePickIndex<int>(Ixp, maxi, Ixm);
				Math::reducePickIndex<int>(Iyp, maxi, Iym);
				Ix[n][c][p][m] = Ixm;
				Iy[n][c][p][m] = Iym;
				Ik[n][c][p][m] = maxi;

				// update the parent's score
				ComponentPart parent = cpart.parent();
				if (parent.score(ncscores,m).empty()) parent.score(scores[n],m).copyTo(parent.score(ncscores,m));
				parent.score(ncscores,m) += maxv;
				if (parent.self() == 0) {
					ComponentPart root = parts.component(c);
				}
			}
		}
		// add bias to the root score and find the best mixture
		ComponentPart root = parts.component(c);
		Mat rncscore = root.score(ncscores,0);
		T bias = root.bias(0)[0];
		vectorMat weighted;
		// weight each of the child scores
		for (unsigned int m = 0; m < root.nmixtures(); ++m) {
			weighted.push_back(root.score(ncscores,m) + bias);
		}
		Math::reduceMax<T>(weighted, rootv[n][c], rooti[n][c]);
	}
}