void omxExpectation::loadThresholds() { bool debug = false; CheckAST(thresholdsMat, 0); numOrdinal = 0; //omxPrint(thresholdsMat, "loadThr"); auto dc = base::getDataColumns(); thresholds.reserve(dc.size()); std::vector<bool> found(thresholdsMat->cols, false); for(int dx = 0; dx < int(dc.size()); dx++) { int index = dc[dx]; omxThresholdColumn col; col.dColumn = index; const char *colname = data->columnName(index); int tc = thresholdsMat->lookupColumnByName(colname); if (tc < 0 || (data->rawCols.size() && !omxDataColumnIsFactor(data, index))) { // Continuous variable if(debug || OMX_DEBUG) { mxLog("%s: column[%d] '%s' is continuous (tc=%d isFactor=%d)", name, index, colname, tc, omxDataColumnIsFactor(data, index)); } thresholds.push_back(col); } else { found[tc] = true; col.column = tc; if (data->rawCols.size()) { col.numThresholds = omxDataGetNumFactorLevels(data, index) - 1; } else { // See omxData::permute } thresholds.push_back(col); if(debug || OMX_DEBUG) { mxLog("%s: column[%d] '%s' is ordinal with %d thresholds in threshold column %d.", name, index, colname, col.numThresholds, tc); } numOrdinal++; } } if (numOrdinal != thresholdsMat->cols) { std::string buf; for (int cx=0; cx < thresholdsMat->cols; ++cx) { if (found[cx]) continue; buf += string_snprintf(" %d", 1+cx); } omxRaiseErrorf("%s: cannot find data for threshold columns:%s\n(Do appropriate threshold column names match data column names?)", name, buf.c_str()); } if(debug || OMX_DEBUG) { mxLog("%d threshold columns processed.", numOrdinal); } }
static void CallFIMLFitFunction(omxFitFunction *off, int want, FitContext *fc) { // TODO: Figure out how to give access to other per-iteration structures. // TODO: Current implementation is slow: update by filtering correlations and thresholds. // TODO: Current implementation does not implement speedups for sorting. // TODO: Current implementation may fail on all-continuous-missing or all-ordinal-missing rows. if (want & (FF_COMPUTE_PREOPTIMIZE)) return; if(OMX_DEBUG) { mxLog("Beginning Joint FIML Evaluation."); } int returnRowLikelihoods = 0; omxFIMLFitFunction* ofiml = ((omxFIMLFitFunction*)off->argStruct); omxMatrix* fitMatrix = off->matrix; int numChildren = (int) fc->childList.size(); omxMatrix *cov = ofiml->cov; omxMatrix *means = ofiml->means; if (!means) { omxRaiseErrorf("%s: raw data observed but no expected means " "vector was provided. Add something like mxPath(from = 'one'," " to = manifests) to your model.", off->name()); return; } omxData* data = ofiml->data; // read-only omxMatrix *dataColumns = ofiml->dataColumns; returnRowLikelihoods = ofiml->returnRowLikelihoods; // read-only omxExpectation* expectation = off->expectation; std::vector< omxThresholdColumn > &thresholdCols = expectation->thresholds; if (data->defVars.size() == 0 && !strEQ(expectation->expType, "MxExpectationStateSpace")) { if(OMX_DEBUG) {mxLog("Precalculating cov and means for all rows.");} omxExpectationRecompute(fc, expectation); // MCN Also do the threshold formulae! for(int j=0; j < dataColumns->cols; j++) { int var = omxVectorElement(dataColumns, j); if (!omxDataColumnIsFactor(data, var)) continue; if (j < int(thresholdCols.size()) && thresholdCols[j].numThresholds > 0) { // j is an ordinal column omxMatrix* nextMatrix = thresholdCols[j].matrix; omxRecompute(nextMatrix, fc); checkIncreasing(nextMatrix, thresholdCols[j].column, thresholdCols[j].numThresholds, fc); for(int index = 0; index < numChildren; index++) { FitContext *kid = fc->childList[index]; omxMatrix *target = kid->lookupDuplicate(nextMatrix); omxCopyMatrix(target, nextMatrix); } } else { Rf_error("No threshold given for ordinal column '%s'", omxDataColumnName(data, j)); } } double *corList = ofiml->corList; double *weights = ofiml->weights; if (corList) { omxStandardizeCovMatrix(cov, corList, weights, fc); // Calculate correlation and covariance } for(int index = 0; index < numChildren; index++) { FitContext *kid = fc->childList[index]; omxMatrix *childFit = kid->lookupDuplicate(fitMatrix); omxFIMLFitFunction* childOfiml = ((omxFIMLFitFunction*) childFit->fitFunction->argStruct); omxCopyMatrix(childOfiml->cov, cov); omxCopyMatrix(childOfiml->means, means); if (corList) { memcpy(childOfiml->weights, weights, sizeof(double) * cov->rows); memcpy(childOfiml->corList, corList, sizeof(double) * (cov->rows * (cov->rows - 1)) / 2); } } if(OMX_DEBUG) { omxPrintMatrix(cov, "Cov"); } if(OMX_DEBUG) { omxPrintMatrix(means, "Means"); } } memset(ofiml->rowLogLikelihoods->data, 0, sizeof(double) * data->rows); int parallelism = (numChildren == 0) ? 1 : numChildren; if (parallelism > data->rows) { parallelism = data->rows; } FIMLSingleIterationType singleIter = ofiml->SingleIterFn; bool failed = false; if (parallelism > 1) { int stride = (data->rows / parallelism); #pragma omp parallel for num_threads(parallelism) reduction(||:failed) for(int i = 0; i < parallelism; i++) { FitContext *kid = fc->childList[i]; omxMatrix *childMatrix = kid->lookupDuplicate(fitMatrix); omxFitFunction *childFit = childMatrix->fitFunction; if (i == parallelism - 1) { failed |= singleIter(kid, childFit, off, stride * i, data->rows - stride * i); } else { failed |= singleIter(kid, childFit, off, stride * i, stride); } } } else { failed |= singleIter(fc, off, off, 0, data->rows); } if (failed) { omxSetMatrixElement(off->matrix, 0, 0, NA_REAL); return; } if(!returnRowLikelihoods) { double val, sum = 0.0; // floating-point addition is not associative, // so we serialized the following reduction operation. for(int i = 0; i < data->rows; i++) { val = omxVectorElement(ofiml->rowLogLikelihoods, i); // mxLog("%d , %f, %llx\n", i, val, *((unsigned long long*) &val)); sum += val; } if(OMX_DEBUG) {mxLog("Total Likelihood is %3.3f", sum);} omxSetMatrixElement(off->matrix, 0, 0, sum); } }
void omxInitExpectationBA81(omxExpectation* oo) { omxState* currentState = oo->currentState; SEXP rObj = oo->rObj; SEXP tmp; if(OMX_DEBUG) { mxLog("Initializing %s.", oo->name); } if (!Glibrpf_model) { #if USE_EXTERNAL_LIBRPF get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL"); (*get_librpf)(LIBIFA_RPF_API_VERSION, &Glibrpf_numModels, &Glibrpf_model); #else // if linking against included source code Glibrpf_numModels = librpf_numModels; Glibrpf_model = librpf_model; #endif } BA81Expect *state = new BA81Expect; // These two constants should be as identical as possible state->name = oo->name; if (0) { state->LogLargestDouble = 0.0; state->LargestDouble = 1.0; } else { state->LogLargestDouble = log(std::numeric_limits<double>::max()) - 1; state->LargestDouble = exp(state->LogLargestDouble); ba81NormalQuad &quad = state->getQuad(); quad.setOne(state->LargestDouble); } state->expectedUsed = false; state->estLatentMean = NULL; state->estLatentCov = NULL; state->type = EXPECTATION_OBSERVED; state->itemParam = NULL; state->EitemParam = NULL; state->itemParamVersion = 0; state->latentParamVersion = 0; oo->argStruct = (void*) state; {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("data"))); state->data = omxDataLookupFromState(tmp, currentState); } if (strcmp(omxDataType(state->data), "raw") != 0) { omxRaiseErrorf("%s unable to handle data type %s", oo->name, omxDataType(state->data)); return; } {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("verbose"))); state->verbose = Rf_asInteger(tmp); } int targetQpoints; {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qpoints"))); targetQpoints = Rf_asInteger(tmp); } {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qwidth"))); state->grp.setGridFineness(Rf_asReal(tmp), targetQpoints); } {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("ItemSpec"))); state->grp.importSpec(tmp); if (state->verbose >= 2) mxLog("%s: found %d item specs", oo->name, state->numItems()); } state->_latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean"); state->_latentCovOut = omxNewMatrixFromSlot(rObj, currentState, "cov"); state->itemParam = omxNewMatrixFromSlot(rObj, currentState, "item"); state->grp.param = state->itemParam->data; // algebra not allowed yet TODO const int numItems = state->itemParam->cols; if (state->numItems() != numItems) { omxRaiseErrorf("ItemSpec length %d must match the number of item columns (%d)", state->numItems(), numItems); return; } if (state->itemParam->rows != state->grp.impliedParamRows) { omxRaiseErrorf("item matrix must have %d rows", state->grp.impliedParamRows); return; } state->grp.paramRows = state->itemParam->rows; // for algebra item param, will need to defer until later? state->grp.learnMaxAbilities(); int maxAbilities = state->grp.itemDims; state->grp.setFactorNames(state->itemParam->rownames); { ProtectedSEXP tmp2(R_do_slot(rObj, Rf_install(".detectIndependence"))); state->grp.detectIndependence = Rf_asLogical(tmp2); } {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("EstepItem"))); if (!Rf_isNull(tmp)) { int rows, cols; getMatrixDims(tmp, &rows, &cols); if (rows != state->itemParam->rows || cols != state->itemParam->cols) { Rf_error("EstepItem must have the same dimensions as the item MxMatrix"); } state->EitemParam = REAL(tmp); } } oo->computeFun = ba81compute; oo->setVarGroup = ignoreSetVarGroup; oo->destructFun = ba81Destroy; oo->populateAttrFun = ba81PopulateAttributes; oo->componentFun = getComponent; oo->canDuplicate = false; // TODO: Exactly identical rows do not contribute any information. // The sorting algorithm ought to remove them so we get better cache behavior. // The following summary stats would be cheaper to calculate too. omxData *data = state->data; if (data->hasDefinitionVariables()) Rf_error("%s: not implemented yet", oo->name); std::vector<int> &rowMap = state->grp.rowMap; int weightCol; {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("weightColumn"))); weightCol = INTEGER(tmp)[0]; } if (weightCol == NA_INTEGER) { // Should rowMap be part of omxData? This is essentially a // generic compression step that shouldn't be specific to IFA models. state->grp.rowWeight = (double*) R_alloc(data->rows, sizeof(double)); rowMap.resize(data->rows); int numUnique = 0; for (int rx=0; rx < data->rows; ) { int rw = 1; state->grp.rowWeight[numUnique] = rw; rowMap[numUnique] = rx; rx += rw; ++numUnique; } rowMap.resize(numUnique); state->weightSum = state->data->rows; } else { if (omxDataColumnIsFactor(data, weightCol)) { omxRaiseErrorf("%s: weightColumn %d is a factor", oo->name, 1 + weightCol); return; } state->grp.rowWeight = omxDoubleDataColumn(data, weightCol); state->weightSum = 0; for (int rx=0; rx < data->rows; ++rx) { state->weightSum += state->grp.rowWeight[rx]; } rowMap.resize(data->rows); for (size_t rx=0; rx < rowMap.size(); ++rx) { rowMap[rx] = rx; } } // complain about non-integral rowWeights (EAP can't work) TODO auto colMap = oo->getDataColumns(); for (int cx = 0; cx < numItems; cx++) { int *col = omxIntDataColumnUnsafe(data, colMap[cx]); state->grp.dataColumns.push_back(col); } // sanity check data for (int cx = 0; cx < numItems; cx++) { if (!omxDataColumnIsFactor(data, colMap[cx])) { data->omxPrintData("diagnostic", 3); omxRaiseErrorf("%s: column %d is not a factor", oo->name, int(1 + colMap[cx])); return; } } // TODO the max outcome should be available from omxData for (int rx=0; rx < data->rows; rx++) { int cols = 0; for (int cx = 0; cx < numItems; cx++) { const int *col = state->grp.dataColumns[cx]; int pick = col[rx]; if (pick == NA_INTEGER) continue; ++cols; const int no = state->grp.itemOutcomes[cx]; if (pick > no) { Rf_error("Data for item '%s' has at least %d outcomes, not %d", state->itemParam->colnames[cx], pick, no); } } if (cols == 0) { Rf_error("Row %d has all NAs", 1+rx); } } if (state->_latentMeanOut && state->_latentMeanOut->rows * state->_latentMeanOut->cols != maxAbilities) { Rf_error("The mean matrix '%s' must be a row or column vector of size %d", state->_latentMeanOut->name(), maxAbilities); } if (state->_latentCovOut && (state->_latentCovOut->rows != maxAbilities || state->_latentCovOut->cols != maxAbilities)) { Rf_error("The cov matrix '%s' must be %dx%d", state->_latentCovOut->name(), maxAbilities, maxAbilities); } state->grp.setLatentDistribution(state->_latentMeanOut? state->_latentMeanOut->data : NULL, state->_latentCovOut? state->_latentCovOut->data : NULL); { EigenArrayAdaptor Eparam(state->itemParam); Eigen::Map< Eigen::VectorXd > meanVec(state->grp.mean, maxAbilities); Eigen::Map< Eigen::MatrixXd > covMat(state->grp.cov, maxAbilities, maxAbilities); state->grp.quad.setStructure(state->grp.qwidth, state->grp.qpoints, Eparam, meanVec, covMat); } {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("minItemsPerScore"))); state->grp.setMinItemsPerScore(Rf_asInteger(tmp)); } state->grp.buildRowSkip(); if (isErrorRaised()) return; {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("debugInternal"))); state->debugInternal = Rf_asLogical(tmp); } state->ElatentVersion = 0; if (state->_latentMeanOut) { state->estLatentMean = omxInitMatrix(maxAbilities, 1, TRUE, currentState); omxCopyMatrix(state->estLatentMean, state->_latentMeanOut); // rename matrices TODO } if (state->_latentCovOut) { state->estLatentCov = omxInitMatrix(maxAbilities, maxAbilities, TRUE, currentState); omxCopyMatrix(state->estLatentCov, state->_latentCovOut); } }