예제 #1
static omxFitFunction *omxNewInternalFitFunction(omxState* os, const char *fitType,
						 omxExpectation *expect, omxMatrix *matrix, bool rowLik)
	omxFitFunction *obj = (omxFitFunction*) R_alloc(1, sizeof(omxFitFunction));
	OMXZERO(obj, 1);

	for (size_t fx=0; fx < OMX_STATIC_ARRAY_SIZE(omxFitFunctionSymbolTable); fx++) {
		const omxFitFunctionTableEntry *entry = omxFitFunctionSymbolTable + fx;
		if(strcmp(fitType, entry->name) == 0) {
			obj->fitType = entry->name;
			obj->initFun = entry->initFun;

			// We need to set up the FreeVarGroup before calling initFun
			// because older fit functions expect to know the number of
			// free variables during initFun.
			obj->setVarGroup = entry->setVarGroup; // ugh!
			obj->addOutput = defaultAddOutput;

	if(obj->initFun == NULL) Rf_error("Fit function '%s' not implemented", fitType);

	if (!matrix) {
		obj->matrix = omxInitMatrix(1, 1, TRUE, os);
		obj->matrix->hasMatrixNumber = TRUE;
		obj->matrix->matrixNumber = ~os->algebraList.size();
	} else {
		obj->matrix = matrix;

	obj->matrix->fitFunction = obj;

	obj->expectation = expect;

	if (rowLik && expect && expect->data) {
		omxData *dat = expect->data;
		omxResizeMatrix(matrix, dat->rows, 1);
	} else {
		omxResizeMatrix(matrix, 1, 1);

	return obj;
예제 #2
void omxInitWLSFitFunction(omxFitFunction* oo) {
	omxMatrix *cov, *means, *weights;
	if(OMX_DEBUG) { mxLog("Initializing WLS FitFunction function."); }
	int vectorSize = 0;
	if(OMX_DEBUG) { mxLog("Retrieving expectation.\n"); }
	if (!oo->expectation) { Rf_error("%s requires an expectation", oo->fitType); }
	if(OMX_DEBUG) { mxLog("Retrieving data.\n"); }
	omxData* dataMat = oo->expectation->data;
	if (dataMat->hasDefinitionVariables()) Rf_error("%s: def vars not implemented", oo->name());
	if(!strEQ(omxDataType(dataMat), "acov") && !strEQ(omxDataType(dataMat), "cov")) {
		char *errstr = (char*) calloc(250, sizeof(char));
		sprintf(errstr, "WLS FitFunction unable to handle data type %s.  Data must be of type 'acov'.\n", omxDataType(dataMat));
		if(OMX_DEBUG) { mxLog("WLS FitFunction unable to handle data type %s.  Aborting.", omxDataType(dataMat)); }
	omxWLSFitFunction *newObj = (omxWLSFitFunction*) R_alloc(1, sizeof(omxWLSFitFunction));
	OMXZERO(newObj, 1);
	oo->argStruct = (void*)newObj;
	/* Get Expectation Elements */
	newObj->expectedCov = omxGetExpectationComponent(oo->expectation, "cov");
	newObj->expectedMeans = omxGetExpectationComponent(oo->expectation, "means");
	// FIXME: threshold structure should be asked for by omxGetExpectationComponent
	/* Read and set expected means, variances, and weights */
	cov = omxDataCovariance(dataMat);
	means = omxDataMeans(dataMat);
	weights = omxDataAcov(dataMat);
	newObj->observedCov = cov;
	newObj->observedMeans = means;
	newObj->weights = weights;
	newObj->n = omxDataNumObs(dataMat);
	// NOTE: If there are any continuous columns then these vectors
	// will not match because eThresh is indexed by column number
	// not by ordinal column number.
	std::vector< omxThresholdColumn > &oThresh = omxDataThresholds(oo->expectation->data);
	std::vector< omxThresholdColumn > &eThresh = oo->expectation->thresholds;
	// Error Checking: Observed/Expected means must agree.  
	// ^ is XOR: true when one is false and the other is not.
	if((newObj->expectedMeans == NULL) ^ (newObj->observedMeans == NULL)) {
		if(newObj->expectedMeans != NULL) {
			omxRaiseError("Observed means not detected, but an expected means matrix was specified.\n  If you  wish to model the means, you must provide observed means.\n");
		} else {
			omxRaiseError("Observed means were provided, but an expected means matrix was not specified.\n  If you provide observed means, you must specify a model for the means.\n");
	if((eThresh.size()==0) ^ (oThresh.size()==0)) {
		if (eThresh.size()) {
			omxRaiseError("Observed thresholds not detected, but an expected thresholds matrix was specified.\n   If you wish to model the thresholds, you must provide observed thresholds.\n ");
		} else {
			omxRaiseError("Observed thresholds were provided, but an expected thresholds matrix was not specified.\nIf you provide observed thresholds, you must specify a model for the thresholds.\n");
	/* Error check weight matrix size */
	int ncol = newObj->observedCov->cols;
	vectorSize = (ncol * (ncol + 1) ) / 2;
	if(newObj->expectedMeans != NULL) {
		vectorSize = vectorSize + ncol;
	for(int i = 0; i < int(oThresh.size()); i++) {
		vectorSize = vectorSize + oThresh[i].numThresholds;
	if(OMX_DEBUG) { mxLog("Intial WLSFitFunction vectorSize comes to: %d.", vectorSize); }
	if(weights != NULL && (weights->rows != weights->cols || weights->cols != vectorSize)) {
		omxRaiseError("Developer Error in WLS-based FitFunction object: WLS-based expectation specified an incorrectly-sized weight matrix.\nIf you are not developing a new expectation type, you should probably post this to the OpenMx forums.");
	// FIXME: More Rf_error checking for incoming Fit Functions
	/* Temporary storage for calculation */
	newObj->observedFlattened = omxInitMatrix(vectorSize, 1, TRUE, oo->matrix->currentState);
	newObj->expectedFlattened = omxInitMatrix(vectorSize, 1, TRUE, oo->matrix->currentState);
	newObj->standardExpectedFlattened = omxInitMatrix(vectorSize, 1, TRUE, oo->matrix->currentState);
	newObj->P = omxInitMatrix(1, vectorSize, TRUE, oo->matrix->currentState);
	newObj->B = omxInitMatrix(vectorSize, 1, TRUE, oo->matrix->currentState);
	newObj->standardExpectedCov = omxInitMatrix(ncol, ncol, TRUE, oo->matrix->currentState);
	if (oo->expectation->thresholdsMat) {
		newObj->standardExpectedThresholds = omxInitMatrix(oo->expectation->thresholdsMat->rows, oo->expectation->thresholdsMat->cols, TRUE, oo->matrix->currentState);
		newObj->standardExpectedMeans = omxInitMatrix(1, ncol, TRUE, oo->matrix->currentState);
	omxMatrix *obsThresholdsMat = oo->expectation->data->obsThresholdsMat;
	flattenDataToVector(newObj->observedCov, newObj->observedMeans, obsThresholdsMat, oThresh, newObj->observedFlattened);
	flattenDataToVector(newObj->expectedCov, newObj->expectedMeans, oo->expectation->thresholdsMat,
				eThresh, newObj->expectedFlattened);

예제 #3
static void gradCov(omxFitFunction *oo, FitContext *fc)
	const double Scale = Global->llScale;
	omxExpectation *expectation = oo->expectation;
	BA81FitState *state = (BA81FitState*) oo->argStruct;
	BA81Expect *estate = (BA81Expect*) expectation->argStruct;
	if (estate->verbose >= 1) mxLog("%s: cross product approximation", oo->name());

	estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE);

	const int numThreads = Global->numThreads;
	const int numUnique = estate->getNumUnique();
	ba81NormalQuad &quad = estate->getQuad();
	const int numSpecific = quad.numSpecific;
	const int maxDims = quad.maxDims;
	const int pDims = numSpecific? maxDims-1 : maxDims;
	const int maxAbilities = quad.maxAbilities;
	Eigen::MatrixXd icovMat(pDims, pDims);
	if (maxAbilities) {
		Eigen::VectorXd mean;
		Eigen::MatrixXd srcMat;
		estate->getLatentDistribution(fc, mean, srcMat);
		icovMat = srcMat.topLeftCorner(pDims, pDims);
		Matrix tmp(icovMat.data(), pDims, pDims);
		int info = InvertSymmetricPosDef(tmp, 'U');
		if (info) {
			omxRaiseErrorf("%s: latent covariance matrix is not positive definite", oo->name());
		icovMat.triangularView<Eigen::Lower>() = icovMat.transpose().triangularView<Eigen::Lower>();
	std::vector<int> &rowMap = estate->grp.rowMap;
	double *rowWeight = estate->grp.rowWeight;
	std::vector<bool> &rowSkip = estate->grp.rowSkip;
	const int totalQuadPoints = quad.totalQuadPoints;
	omxMatrix *itemParam = estate->itemParam;
	omxBuffer<double> patternLik(numUnique);

	const int priDerivCoef = pDims + triangleLoc1(pDims);
	const int numLatents = maxAbilities + triangleLoc1(maxAbilities);
	const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
	const int totalOutcomes = estate->totalOutcomes();
	const int numItems = state->freeItemParams? estate->numItems() : 0;
	const size_t numParam = fc->varGroup->vars.size();
	std::vector<double> thrGrad(numThreads * numParam);
	std::vector<double> thrMeat(numThreads * numParam * numParam);
	const double *wherePrep = quad.wherePrep.data();

	if (numSpecific == 0) {
		omxBuffer<double> thrLxk(totalQuadPoints * numThreads);
		omxBuffer<double> derivCoef(totalQuadPoints * priDerivCoef);

		if (state->freeLatents) {
#pragma omp parallel for num_threads(numThreads)
			for (int qx=0; qx < totalQuadPoints; qx++) {
				const double *where = wherePrep + qx * maxDims;
				calcDerivCoef(fc, state, estate, icovMat.data(), where,
					      derivCoef.data() + qx * priDerivCoef);

#pragma omp parallel for num_threads(numThreads)
		for (int px=0; px < numUnique; px++) {
			if (rowSkip[px]) continue;
			int thrId = omx_absolute_thread_num();
			double *lxk = thrLxk.data() + thrId * totalQuadPoints;
			omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
			std::vector<double> deriv0(thrDerivSize);
			std::vector<double> latentGrad(numLatents);
			std::vector<double> patGrad(numParam);
			double *grad = thrGrad.data() + thrId * numParam;
			double *meat = thrMeat.data() + thrId * numParam * numParam;
			estate->grp.ba81LikelihoodSlow2(px, lxk);

			// If patternLik is already valid, maybe could avoid this loop TODO
			double patternLik1 = 0;
			for (int qx=0; qx < totalQuadPoints; qx++) {
				patternLik1 += lxk[qx];
			patternLik[px] = patternLik1;

			// if (!validPatternLik(state, patternLik1))  complain, TODO

			for (int qx=0; qx < totalQuadPoints; qx++) {
				double tmp = lxk[qx];
				mapLatentDeriv(state, estate, tmp, derivCoef.data() + qx * priDerivCoef,

				for (int ix=0; ix < numItems; ++ix) {
					int pick = estate->grp.dataColumns[ix][rowMap[px]];
					if (pick == NA_INTEGER) continue;
					OMXZERO(expected.data(), estate->itemOutcomes(ix));
					expected[pick-1] = tmp;
					const double *spec = estate->itemSpec(ix);
					double *iparam = omxMatrixColumn(itemParam, ix);
					const int id = spec[RPF_ISpecID];
					double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
					(*Glibrpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
							      expected.data(), myDeriv);

			gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam,
					state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);
	} else {
		const int totalPrimaryPoints = quad.totalPrimaryPoints;
		const int specificPoints = quad.quadGridSize;
		omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
		omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
		omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);
		const int derivPerPoint = priDerivCoef + 2 * numSpecific;
		omxBuffer<double> derivCoef(totalQuadPoints * derivPerPoint);

		if (state->freeLatents) {
#pragma omp parallel for num_threads(numThreads)
			for (int qx=0; qx < totalQuadPoints; qx++) {
				const double *where = wherePrep + qx * maxDims;
				calcDerivCoef(fc, state, estate, icovMat.data(), where,
					      derivCoef.data() + qx * derivPerPoint);
				for (int Sgroup=0; Sgroup < numSpecific; ++Sgroup) {
					calcDerivCoef1(fc, state, estate, where, Sgroup,
						       derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup);

#pragma omp parallel for num_threads(numThreads)
		for (int px=0; px < numUnique; px++) {
			if (rowSkip[px]) continue;
			int thrId = omx_absolute_thread_num();
			double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
			double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
			double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
			omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
			std::vector<double> deriv0(thrDerivSize);
			std::vector<double> latentGrad(numLatents);
			std::vector<double> patGrad(numParam);
			double *grad = thrGrad.data() + thrId * numParam;
			double *meat = thrMeat.data() + thrId * numParam * numParam;
			estate->grp.cai2010EiEis(px, lxk, Eis, Ei);

			for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) {
				for (int sgroup=0; sgroup < numSpecific; ++sgroup) {
					Eis[qloc] = Ei[qx] / Eis[qloc];

			for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
				for (int sx=0; sx < specificPoints; sx++) {
					mapLatentDeriv(state, estate, Eis[eisloc] * lxk[qloc],
						       derivCoef.data() + qx * derivPerPoint,

					for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
						double lxk1 = lxk[qloc];
						double Eis1 = Eis[eisloc + Sgroup];
						double tmp = Eis1 * lxk1;
						mapLatentDerivS(state, estate, Sgroup, tmp,
								derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup,

						for (int ix=0; ix < numItems; ++ix) {
							if (estate->grp.Sgroup[ix] != Sgroup) continue;
							int pick = estate->grp.dataColumns[ix][rowMap[px]];
							if (pick == NA_INTEGER) continue;
							OMXZERO(expected.data(), estate->itemOutcomes(ix));
							expected[pick-1] = tmp;
							const double *spec = estate->itemSpec(ix);
							double *iparam = omxMatrixColumn(itemParam, ix);
							const int id = spec[RPF_ISpecID];
							const int dims = spec[RPF_ISpecDims];
							double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
							const double *where = wherePrep + qx * maxDims;
							Eigen::VectorXd ptheta(dims);
							for (int dx=0; dx < dims; dx++) {
								ptheta[dx] = where[std::min(dx, maxDims-1)];
							(*Glibrpf_model[id].dLL1)(spec, iparam, ptheta.data(),
									      expected.data(), myDeriv);

			// If patternLik is already valid, maybe could avoid this loop TODO
			double patternLik1 = 0;
			for (int qx=0; qx < totalPrimaryPoints; ++qx) {
				patternLik1 += Ei[qx];
			patternLik[px] = patternLik1;

			gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam,
					state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);

	for (int tx=1; tx < numThreads; ++tx) {
		double *th = thrGrad.data() + tx * numParam;
		for (size_t en=0; en < numParam; ++en) {
			thrGrad[en] += th[en];
	for (int tx=1; tx < numThreads; ++tx) {
		double *th = thrMeat.data() + tx * numParam * numParam;
		for (size_t en=0; en < numParam * numParam; ++en) {
			thrMeat[en] += th[en];
	for (size_t d1=0; d1 < numParam; ++d1) {
		fc->grad(d1) += thrGrad[d1];
	if (fc->infoB) {
		for (size_t d1=0; d1 < numParam; ++d1) {
			for (size_t d2=0; d2 < numParam; ++d2) {
				int cell = d1 * numParam + d2;
				fc->infoB[cell] += thrMeat[cell];
예제 #4
static void sandwich(omxFitFunction *oo, FitContext *fc)
	const double abScale = fabs(Global->llScale);
	omxExpectation *expectation = oo->expectation;
	BA81FitState *state = (BA81FitState*) oo->argStruct;
	BA81Expect *estate = (BA81Expect*) expectation->argStruct;
	if (estate->verbose >= 1) mxLog("%s: sandwich", oo->name());

	estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE);

	const int numThreads = Global->numThreads;
	const int numUnique = estate->getNumUnique();
	ba81NormalQuad &quad = estate->getQuad();
	const int numSpecific = quad.numSpecific;
	const int maxDims = quad.maxDims;
	std::vector<int> &rowMap = estate->grp.rowMap;
	double *rowWeight = estate->grp.rowWeight;
	std::vector<bool> &rowSkip = estate->grp.rowSkip;
	const int totalQuadPoints = quad.totalQuadPoints;
	omxMatrix *itemParam = estate->itemParam;
	omxBuffer<double> patternLik(numUnique);

	std::vector<const double*> &itemSpec = estate->grp.spec;
	const int totalOutcomes = estate->totalOutcomes();
	const int numItems = estate->grp.numItems();
	const size_t numParam = fc->varGroup->vars.size();
	const double *wherePrep = quad.wherePrep.data();
	std::vector<double> thrBreadG(numThreads * numParam * numParam);
	std::vector<double> thrBreadH(numThreads * numParam * numParam);
	std::vector<double> thrMeat(numThreads * numParam * numParam);

	if (numSpecific == 0) {
		omxBuffer<double> thrLxk(totalQuadPoints * numThreads);

#pragma omp parallel for num_threads(numThreads)
		for (int px=0; px < numUnique; px++) {
			if (rowSkip[px]) continue;
			int thrId = omx_absolute_thread_num();
			double *lxk = thrLxk.data() + thrId * totalQuadPoints;
			omxBuffer<double> itemDeriv(state->itemDerivPadSize);
			omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
			double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
			double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
			double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
			std::vector<double> patGrad(numParam);

			estate->grp.ba81LikelihoodSlow2(px, lxk);

			// If patternLik is already valid, maybe could avoid this loop TODO
			double patternLik1 = 0;
			for (int qx=0; qx < totalQuadPoints; qx++) {
				patternLik1 += lxk[qx];
			patternLik[px] = patternLik1;

			// if (!validPatternLik(state, patternLik1))  complain

			double weight = 1 / patternLik[px];
			for (int qx=0; qx < totalQuadPoints; qx++) {
				double tmp = lxk[qx] * weight;
				double sqrtTmp = sqrt(tmp);

				std::vector<double> gradBuf(numParam);
				int gradOffset = 0;

				for (int ix=0; ix < numItems; ++ix) {
					if (ix) gradOffset += state->paramPerItem[ix-1];
					int pick = estate->grp.dataColumns[ix][rowMap[px]];
					if (pick == NA_INTEGER) continue;
					pick -= 1;

					const int iOutcomes = estate->itemOutcomes(ix);
					OMXZERO(expected.data(), iOutcomes);
					expected[pick] = 1;
					const double *spec = itemSpec[ix];
					double *iparam = omxMatrixColumn(itemParam, ix);
					const int id = spec[RPF_ISpecID];
					OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
					(*Glibrpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
							      expected.data(), itemDeriv.data());
					(*Glibrpf_model[id].dLL2)(spec, iparam, itemDeriv.data());

					for (int par = 0; par < state->paramPerItem[ix]; ++par) {
						int to = state->itemGradMap[gradOffset + par];
						if (to >= 0) {
							gradBuf[to] -= itemDeriv[par] * sqrtTmp;
							patGrad[to] -= itemDeriv[par] * tmp;
					int derivBase = ix * state->itemDerivPadSize;
					for (int ox=0; ox < state->itemDerivPadSize; ox++) {
						int to = state->paramMap[derivBase + ox];
						if (to >= int(numParam)) {
							int Hto = to - numParam;
							breadH[Hto] += abScale * itemDeriv[ox] * tmp * rowWeight[px];
				addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG);
			addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat);

	} else {
		Rf_error("Sandwich information matrix method is not implemented for bifactor models");
		const int totalPrimaryPoints = quad.totalPrimaryPoints;
		const int specificPoints = quad.quadGridSize;
		omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
		omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
		omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);

#pragma omp parallel for num_threads(numThreads)
		for (int px=0; px < numUnique; px++) {
			if (rowSkip[px]) continue;
			int thrId = omx_absolute_thread_num();
			omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
			omxBuffer<double> itemDeriv(state->itemDerivPadSize);
			double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
			double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
			double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
			std::vector<double> patGrad(numParam);
			double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
			double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
			double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
			estate->grp.cai2010EiEis(px, lxk, Eis, Ei);

			// If patternLik is already valid, maybe could avoid this loop TODO
			double patternLik1 = 0;
			for (int qx=0; qx < totalPrimaryPoints; ++qx) {
				patternLik1 += Ei[qx];
			patternLik[px] = patternLik1;

			for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) {
				for (int sgroup=0; sgroup < numSpecific; ++sgroup) {
					Eis[qloc] = Ei[qx] / Eis[qloc];

			// WARNING: I didn't work out the math. I just coded this the way
			// it seems to make sense.
			for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
				for (int sx=0; sx < specificPoints; sx++) {
					for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
						std::vector<double> gradBuf(numParam);
						int gradOffset = 0;
						double lxk1 = lxk[qloc + Sgroup];
						double Eis1 = Eis[eisloc + Sgroup];
						double tmp = Eis1 * lxk1 / patternLik1;
						double sqrtTmp = sqrt(tmp);
						for (int ix=0; ix < numItems; ++ix) {
							if (ix) gradOffset += state->paramPerItem[ix-1];
							if (estate->grp.Sgroup[ix] != Sgroup) continue;
							int pick = estate->grp.dataColumns[ix][rowMap[px]];
							if (pick == NA_INTEGER) continue;
							OMXZERO(expected.data(), estate->itemOutcomes(ix));
							expected[pick-1] = 1;
							const double *spec = itemSpec[ix];
							double *iparam = omxMatrixColumn(itemParam, ix);
							const int id = spec[RPF_ISpecID];
							const int dims = spec[RPF_ISpecDims];
							OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
							const double *where = wherePrep + qx * maxDims;
							Eigen::VectorXd ptheta(dims);
							for (int dx=0; dx < dims; dx++) {
								ptheta[dx] = where[std::min(dx, maxDims-1)];
							(*Glibrpf_model[id].dLL1)(spec, iparam, ptheta.data(),
									      expected.data(), itemDeriv.data());
							(*Glibrpf_model[id].dLL2)(spec, iparam, itemDeriv.data());

							for (int par = 0; par < state->paramPerItem[ix]; ++par) {
								int to = state->itemGradMap[gradOffset + par];
								if (to >= 0) {
									gradBuf[to] -= itemDeriv[par] * sqrtTmp;
									patGrad[to] -= itemDeriv[par] * tmp;
							int derivBase = ix * state->itemDerivPadSize;
							for (int ox=0; ox < state->itemDerivPadSize; ox++) {
								int to = state->paramMap[derivBase + ox];
								if (to >= int(numParam)) {
									int Hto = to - numParam;
									breadH[Hto] += (abScale * itemDeriv[ox] *
											tmp * rowWeight[px]);
						addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG);
					qloc += numSpecific;
			addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat);

	// only need upper triangle TODO
	for (int tx=1; tx < numThreads; ++tx) {
		double *th = thrBreadG.data() + tx * numParam * numParam;
		for (size_t en=0; en < numParam * numParam; ++en) {
			thrBreadG[en] += th[en];
	for (int tx=1; tx < numThreads; ++tx) {
		double *th = thrBreadH.data() + tx * numParam * numParam;
		for (size_t en=0; en < numParam * numParam; ++en) {
			thrBreadH[en] += th[en];
	for (int tx=1; tx < numThreads; ++tx) {
		double *th = thrMeat.data() + tx * numParam * numParam;
		for (size_t en=0; en < numParam * numParam; ++en) {
			thrMeat[en] += th[en];
	//pda(thrBreadG.data(), numParam, numParam);
	//pda(thrBreadH.data(), numParam, numParam);
	//pda(thrMeat.data(), numParam, numParam);
	if (fc->infoA) {
		for (size_t d1=0; d1 < numParam; ++d1) {
			for (size_t d2=0; d2 < numParam; ++d2) {
				int cell = d1 * numParam + d2;
				fc->infoA[cell] += thrBreadH[cell] - thrBreadG[cell] + thrMeat[cell];
	if (fc->infoB) {
		for (size_t d1=0; d1 < numParam; ++d1) {
			for (size_t d2=0; d2 < numParam; ++d2) {
				int cell = d1 * numParam + d2;
				fc->infoB[cell] += thrMeat[cell];