static void gradCov(omxFitFunction *oo, FitContext *fc) { const double Scale = Global->llScale; omxExpectation *expectation = oo->expectation; BA81FitState *state = (BA81FitState*) oo->argStruct; BA81Expect *estate = (BA81Expect*) expectation->argStruct; if (estate->verbose >= 1) mxLog("%s: cross product approximation", oo->name()); estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE); const int numThreads = Global->numThreads; const int numUnique = estate->getNumUnique(); ba81NormalQuad &quad = estate->getQuad(); const int numSpecific = quad.numSpecific; const int maxDims = quad.maxDims; const int pDims = numSpecific? maxDims-1 : maxDims; const int maxAbilities = quad.maxAbilities; Eigen::MatrixXd icovMat(pDims, pDims); if (maxAbilities) { Eigen::VectorXd mean; Eigen::MatrixXd srcMat; estate->getLatentDistribution(fc, mean, srcMat); icovMat = srcMat.topLeftCorner(pDims, pDims); Matrix tmp(icovMat.data(), pDims, pDims); int info = InvertSymmetricPosDef(tmp, 'U'); if (info) { omxRaiseErrorf("%s: latent covariance matrix is not positive definite", oo->name()); return; } icovMat.triangularView<Eigen::Lower>() = icovMat.transpose().triangularView<Eigen::Lower>(); } std::vector<int> &rowMap = estate->grp.rowMap; double *rowWeight = estate->grp.rowWeight; std::vector<bool> &rowSkip = estate->grp.rowSkip; const int totalQuadPoints = quad.totalQuadPoints; omxMatrix *itemParam = estate->itemParam; omxBuffer<double> patternLik(numUnique); const int priDerivCoef = pDims + triangleLoc1(pDims); const int numLatents = maxAbilities + triangleLoc1(maxAbilities); const int thrDerivSize = itemParam->cols * state->itemDerivPadSize; const int totalOutcomes = estate->totalOutcomes(); const int numItems = state->freeItemParams? estate->numItems() : 0; const size_t numParam = fc->varGroup->vars.size(); std::vector<double> thrGrad(numThreads * numParam); std::vector<double> thrMeat(numThreads * numParam * numParam); const double *wherePrep = quad.wherePrep.data(); if (numSpecific == 0) { omxBuffer<double> thrLxk(totalQuadPoints * numThreads); omxBuffer<double> derivCoef(totalQuadPoints * priDerivCoef); if (state->freeLatents) { #pragma omp parallel for num_threads(numThreads) for (int qx=0; qx < totalQuadPoints; qx++) { const double *where = wherePrep + qx * maxDims; calcDerivCoef(fc, state, estate, icovMat.data(), where, derivCoef.data() + qx * priDerivCoef); } } #pragma omp parallel for num_threads(numThreads) for (int px=0; px < numUnique; px++) { if (rowSkip[px]) continue; int thrId = omx_absolute_thread_num(); double *lxk = thrLxk.data() + thrId * totalQuadPoints; omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO std::vector<double> deriv0(thrDerivSize); std::vector<double> latentGrad(numLatents); std::vector<double> patGrad(numParam); double *grad = thrGrad.data() + thrId * numParam; double *meat = thrMeat.data() + thrId * numParam * numParam; estate->grp.ba81LikelihoodSlow2(px, lxk); // If patternLik is already valid, maybe could avoid this loop TODO double patternLik1 = 0; for (int qx=0; qx < totalQuadPoints; qx++) { patternLik1 += lxk[qx]; } patternLik[px] = patternLik1; // if (!validPatternLik(state, patternLik1)) complain, TODO for (int qx=0; qx < totalQuadPoints; qx++) { double tmp = lxk[qx]; mapLatentDeriv(state, estate, tmp, derivCoef.data() + qx * priDerivCoef, latentGrad.data()); for (int ix=0; ix < numItems; ++ix) { int pick = estate->grp.dataColumns[ix][rowMap[px]]; if (pick == NA_INTEGER) continue; OMXZERO(expected.data(), estate->itemOutcomes(ix)); expected[pick-1] = tmp; const double *spec = estate->itemSpec(ix); double *iparam = omxMatrixColumn(itemParam, ix); const int id = spec[RPF_ISpecID]; double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize; (*Glibrpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims, expected.data(), myDeriv); } } gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam, state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat); } } else { const int totalPrimaryPoints = quad.totalPrimaryPoints; const int specificPoints = quad.quadGridSize; omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads); omxBuffer<double> thrEi(totalPrimaryPoints * numThreads); omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads); const int derivPerPoint = priDerivCoef + 2 * numSpecific; omxBuffer<double> derivCoef(totalQuadPoints * derivPerPoint); if (state->freeLatents) { #pragma omp parallel for num_threads(numThreads) for (int qx=0; qx < totalQuadPoints; qx++) { const double *where = wherePrep + qx * maxDims; calcDerivCoef(fc, state, estate, icovMat.data(), where, derivCoef.data() + qx * derivPerPoint); for (int Sgroup=0; Sgroup < numSpecific; ++Sgroup) { calcDerivCoef1(fc, state, estate, where, Sgroup, derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup); } } } #pragma omp parallel for num_threads(numThreads) for (int px=0; px < numUnique; px++) { if (rowSkip[px]) continue; int thrId = omx_absolute_thread_num(); double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId; double *Ei = thrEi.data() + totalPrimaryPoints * thrId; double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId; omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO std::vector<double> deriv0(thrDerivSize); std::vector<double> latentGrad(numLatents); std::vector<double> patGrad(numParam); double *grad = thrGrad.data() + thrId * numParam; double *meat = thrMeat.data() + thrId * numParam * numParam; estate->grp.cai2010EiEis(px, lxk, Eis, Ei); for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) { for (int sgroup=0; sgroup < numSpecific; ++sgroup) { Eis[qloc] = Ei[qx] / Eis[qloc]; ++qloc; } } for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) { for (int sx=0; sx < specificPoints; sx++) { mapLatentDeriv(state, estate, Eis[eisloc] * lxk[qloc], derivCoef.data() + qx * derivPerPoint, latentGrad.data()); for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) { double lxk1 = lxk[qloc]; double Eis1 = Eis[eisloc + Sgroup]; double tmp = Eis1 * lxk1; mapLatentDerivS(state, estate, Sgroup, tmp, derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup, latentGrad.data()); for (int ix=0; ix < numItems; ++ix) { if (estate->grp.Sgroup[ix] != Sgroup) continue; int pick = estate->grp.dataColumns[ix][rowMap[px]]; if (pick == NA_INTEGER) continue; OMXZERO(expected.data(), estate->itemOutcomes(ix)); expected[pick-1] = tmp; const double *spec = estate->itemSpec(ix); double *iparam = omxMatrixColumn(itemParam, ix); const int id = spec[RPF_ISpecID]; const int dims = spec[RPF_ISpecDims]; double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize; const double *where = wherePrep + qx * maxDims; Eigen::VectorXd ptheta(dims); for (int dx=0; dx < dims; dx++) { ptheta[dx] = where[std::min(dx, maxDims-1)]; } (*Glibrpf_model[id].dLL1)(spec, iparam, ptheta.data(), expected.data(), myDeriv); } ++qloc; } ++qx; } } // If patternLik is already valid, maybe could avoid this loop TODO double patternLik1 = 0; for (int qx=0; qx < totalPrimaryPoints; ++qx) { patternLik1 += Ei[qx]; } patternLik[px] = patternLik1; gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam, state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat); } } for (int tx=1; tx < numThreads; ++tx) { double *th = thrGrad.data() + tx * numParam; for (size_t en=0; en < numParam; ++en) { thrGrad[en] += th[en]; } } for (int tx=1; tx < numThreads; ++tx) { double *th = thrMeat.data() + tx * numParam * numParam; for (size_t en=0; en < numParam * numParam; ++en) { thrMeat[en] += th[en]; } } for (size_t d1=0; d1 < numParam; ++d1) { fc->grad(d1) += thrGrad[d1]; } if (fc->infoB) { for (size_t d1=0; d1 < numParam; ++d1) { for (size_t d2=0; d2 < numParam; ++d2) { int cell = d1 * numParam + d2; fc->infoB[cell] += thrMeat[cell]; } } } }
static void sandwich(omxFitFunction *oo, FitContext *fc) { const double abScale = fabs(Global->llScale); omxExpectation *expectation = oo->expectation; BA81FitState *state = (BA81FitState*) oo->argStruct; BA81Expect *estate = (BA81Expect*) expectation->argStruct; if (estate->verbose >= 1) mxLog("%s: sandwich", oo->name()); estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE); const int numThreads = Global->numThreads; const int numUnique = estate->getNumUnique(); ba81NormalQuad &quad = estate->getQuad(); const int numSpecific = quad.numSpecific; const int maxDims = quad.maxDims; std::vector<int> &rowMap = estate->grp.rowMap; double *rowWeight = estate->grp.rowWeight; std::vector<bool> &rowSkip = estate->grp.rowSkip; const int totalQuadPoints = quad.totalQuadPoints; omxMatrix *itemParam = estate->itemParam; omxBuffer<double> patternLik(numUnique); std::vector<const double*> &itemSpec = estate->grp.spec; const int totalOutcomes = estate->totalOutcomes(); const int numItems = estate->grp.numItems(); const size_t numParam = fc->varGroup->vars.size(); const double *wherePrep = quad.wherePrep.data(); std::vector<double> thrBreadG(numThreads * numParam * numParam); std::vector<double> thrBreadH(numThreads * numParam * numParam); std::vector<double> thrMeat(numThreads * numParam * numParam); if (numSpecific == 0) { omxBuffer<double> thrLxk(totalQuadPoints * numThreads); #pragma omp parallel for num_threads(numThreads) for (int px=0; px < numUnique; px++) { if (rowSkip[px]) continue; int thrId = omx_absolute_thread_num(); double *lxk = thrLxk.data() + thrId * totalQuadPoints; omxBuffer<double> itemDeriv(state->itemDerivPadSize); omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a double *meat = thrMeat.data() + thrId * numParam * numParam; //b std::vector<double> patGrad(numParam); estate->grp.ba81LikelihoodSlow2(px, lxk); // If patternLik is already valid, maybe could avoid this loop TODO double patternLik1 = 0; for (int qx=0; qx < totalQuadPoints; qx++) { patternLik1 += lxk[qx]; } patternLik[px] = patternLik1; // if (!validPatternLik(state, patternLik1)) complain double weight = 1 / patternLik[px]; for (int qx=0; qx < totalQuadPoints; qx++) { double tmp = lxk[qx] * weight; double sqrtTmp = sqrt(tmp); std::vector<double> gradBuf(numParam); int gradOffset = 0; for (int ix=0; ix < numItems; ++ix) { if (ix) gradOffset += state->paramPerItem[ix-1]; int pick = estate->grp.dataColumns[ix][rowMap[px]]; if (pick == NA_INTEGER) continue; pick -= 1; const int iOutcomes = estate->itemOutcomes(ix); OMXZERO(expected.data(), iOutcomes); expected[pick] = 1; const double *spec = itemSpec[ix]; double *iparam = omxMatrixColumn(itemParam, ix); const int id = spec[RPF_ISpecID]; OMXZERO(itemDeriv.data(), state->itemDerivPadSize); (*Glibrpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims, expected.data(), itemDeriv.data()); (*Glibrpf_model[id].dLL2)(spec, iparam, itemDeriv.data()); for (int par = 0; par < state->paramPerItem[ix]; ++par) { int to = state->itemGradMap[gradOffset + par]; if (to >= 0) { gradBuf[to] -= itemDeriv[par] * sqrtTmp; patGrad[to] -= itemDeriv[par] * tmp; } } int derivBase = ix * state->itemDerivPadSize; for (int ox=0; ox < state->itemDerivPadSize; ox++) { int to = state->paramMap[derivBase + ox]; if (to >= int(numParam)) { int Hto = to - numParam; breadH[Hto] += abScale * itemDeriv[ox] * tmp * rowWeight[px]; } } } addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG); } addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat); } } else { Rf_error("Sandwich information matrix method is not implemented for bifactor models"); const int totalPrimaryPoints = quad.totalPrimaryPoints; const int specificPoints = quad.quadGridSize; omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads); omxBuffer<double> thrEi(totalPrimaryPoints * numThreads); omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads); #pragma omp parallel for num_threads(numThreads) for (int px=0; px < numUnique; px++) { if (rowSkip[px]) continue; int thrId = omx_absolute_thread_num(); omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO omxBuffer<double> itemDeriv(state->itemDerivPadSize); double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a double *meat = thrMeat.data() + thrId * numParam * numParam; //b std::vector<double> patGrad(numParam); double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId; double *Ei = thrEi.data() + totalPrimaryPoints * thrId; double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId; estate->grp.cai2010EiEis(px, lxk, Eis, Ei); // If patternLik is already valid, maybe could avoid this loop TODO double patternLik1 = 0; for (int qx=0; qx < totalPrimaryPoints; ++qx) { patternLik1 += Ei[qx]; } patternLik[px] = patternLik1; for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) { for (int sgroup=0; sgroup < numSpecific; ++sgroup) { Eis[qloc] = Ei[qx] / Eis[qloc]; ++qloc; } } // WARNING: I didn't work out the math. I just coded this the way // it seems to make sense. for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) { for (int sx=0; sx < specificPoints; sx++) { for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) { std::vector<double> gradBuf(numParam); int gradOffset = 0; double lxk1 = lxk[qloc + Sgroup]; double Eis1 = Eis[eisloc + Sgroup]; double tmp = Eis1 * lxk1 / patternLik1; double sqrtTmp = sqrt(tmp); for (int ix=0; ix < numItems; ++ix) { if (ix) gradOffset += state->paramPerItem[ix-1]; if (estate->grp.Sgroup[ix] != Sgroup) continue; int pick = estate->grp.dataColumns[ix][rowMap[px]]; if (pick == NA_INTEGER) continue; OMXZERO(expected.data(), estate->itemOutcomes(ix)); expected[pick-1] = 1; const double *spec = itemSpec[ix]; double *iparam = omxMatrixColumn(itemParam, ix); const int id = spec[RPF_ISpecID]; const int dims = spec[RPF_ISpecDims]; OMXZERO(itemDeriv.data(), state->itemDerivPadSize); const double *where = wherePrep + qx * maxDims; Eigen::VectorXd ptheta(dims); for (int dx=0; dx < dims; dx++) { ptheta[dx] = where[std::min(dx, maxDims-1)]; } (*Glibrpf_model[id].dLL1)(spec, iparam, ptheta.data(), expected.data(), itemDeriv.data()); (*Glibrpf_model[id].dLL2)(spec, iparam, itemDeriv.data()); for (int par = 0; par < state->paramPerItem[ix]; ++par) { int to = state->itemGradMap[gradOffset + par]; if (to >= 0) { gradBuf[to] -= itemDeriv[par] * sqrtTmp; patGrad[to] -= itemDeriv[par] * tmp; } } int derivBase = ix * state->itemDerivPadSize; for (int ox=0; ox < state->itemDerivPadSize; ox++) { int to = state->paramMap[derivBase + ox]; if (to >= int(numParam)) { int Hto = to - numParam; breadH[Hto] += (abScale * itemDeriv[ox] * tmp * rowWeight[px]); } } } addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG); } qloc += numSpecific; ++qx; } } addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat); } } // only need upper triangle TODO for (int tx=1; tx < numThreads; ++tx) { double *th = thrBreadG.data() + tx * numParam * numParam; for (size_t en=0; en < numParam * numParam; ++en) { thrBreadG[en] += th[en]; } } for (int tx=1; tx < numThreads; ++tx) { double *th = thrBreadH.data() + tx * numParam * numParam; for (size_t en=0; en < numParam * numParam; ++en) { thrBreadH[en] += th[en]; } } for (int tx=1; tx < numThreads; ++tx) { double *th = thrMeat.data() + tx * numParam * numParam; for (size_t en=0; en < numParam * numParam; ++en) { thrMeat[en] += th[en]; } } //pda(thrBreadG.data(), numParam, numParam); //pda(thrBreadH.data(), numParam, numParam); //pda(thrMeat.data(), numParam, numParam); if (fc->infoA) { for (size_t d1=0; d1 < numParam; ++d1) { for (size_t d2=0; d2 < numParam; ++d2) { int cell = d1 * numParam + d2; fc->infoA[cell] += thrBreadH[cell] - thrBreadG[cell] + thrMeat[cell]; } } } if (fc->infoB) { for (size_t d1=0; d1 < numParam; ++d1) { for (size_t d2=0; d2 < numParam; ++d2) { int cell = d1 * numParam + d2; fc->infoB[cell] += thrMeat[cell]; } } } }