Esempio n. 1
0
void omxExpectation::loadThresholds()
{
	bool debug = false;
	CheckAST(thresholdsMat, 0);
	numOrdinal = 0;

	//omxPrint(thresholdsMat, "loadThr");

	auto dc = base::getDataColumns();
	thresholds.reserve(dc.size());
	std::vector<bool> found(thresholdsMat->cols, false);
	for(int dx = 0; dx < int(dc.size()); dx++) {
		int index = dc[dx];
		omxThresholdColumn col;
		col.dColumn = index;

		const char *colname = data->columnName(index);
		int tc = thresholdsMat->lookupColumnByName(colname);

		if (tc < 0 || (data->rawCols.size() && !omxDataColumnIsFactor(data, index))) {	// Continuous variable
			if(debug || OMX_DEBUG) {
				mxLog("%s: column[%d] '%s' is continuous (tc=%d isFactor=%d)",
				      name, index, colname, tc, omxDataColumnIsFactor(data, index));
			}
			thresholds.push_back(col);
		} else {
			found[tc] = true;
			col.column = tc;
			if (data->rawCols.size()) {
				col.numThresholds = omxDataGetNumFactorLevels(data, index) - 1;
			} else {
				// See omxData::permute
			}

			thresholds.push_back(col);
			if(debug || OMX_DEBUG) {
				mxLog("%s: column[%d] '%s' is ordinal with %d thresholds in threshold column %d.", 
				      name, index, colname, col.numThresholds, tc);
			}
			numOrdinal++;
		}
	}

	if (numOrdinal != thresholdsMat->cols) {
		std::string buf;
		for (int cx=0; cx < thresholdsMat->cols; ++cx) {
			if (found[cx]) continue;
			buf += string_snprintf(" %d", 1+cx);
		}
		omxRaiseErrorf("%s: cannot find data for threshold columns:%s\n(Do appropriate threshold column names match data column names?)", name, buf.c_str());
	}

	if(debug || OMX_DEBUG) {
		mxLog("%d threshold columns processed.", numOrdinal);
	}
}
Esempio n. 2
0
static void CallFIMLFitFunction(omxFitFunction *off, int want, FitContext *fc)
{
	// TODO: Figure out how to give access to other per-iteration structures.
	// TODO: Current implementation is slow: update by filtering correlations and thresholds.
	// TODO: Current implementation does not implement speedups for sorting.
	// TODO: Current implementation may fail on all-continuous-missing or all-ordinal-missing rows.
	
	if (want & (FF_COMPUTE_PREOPTIMIZE)) return;

    if(OMX_DEBUG) { 
	    mxLog("Beginning Joint FIML Evaluation.");
    }
	int returnRowLikelihoods = 0;

	omxFIMLFitFunction* ofiml = ((omxFIMLFitFunction*)off->argStruct);
	omxMatrix* fitMatrix  = off->matrix;
	int numChildren = (int) fc->childList.size();

	omxMatrix *cov 		= ofiml->cov;
	omxMatrix *means	= ofiml->means;
	if (!means) {
		omxRaiseErrorf("%s: raw data observed but no expected means "
			       "vector was provided. Add something like mxPath(from = 'one',"
			       " to = manifests) to your model.", off->name());
		return;
	}
	omxData* data           = ofiml->data;                            //  read-only
	omxMatrix *dataColumns	= ofiml->dataColumns;

	returnRowLikelihoods = ofiml->returnRowLikelihoods;   //  read-only
	omxExpectation* expectation = off->expectation;
	std::vector< omxThresholdColumn > &thresholdCols = expectation->thresholds;

	if (data->defVars.size() == 0 && !strEQ(expectation->expType, "MxExpectationStateSpace")) {
		if(OMX_DEBUG) {mxLog("Precalculating cov and means for all rows.");}
		omxExpectationRecompute(fc, expectation);
		// MCN Also do the threshold formulae!
		
		for(int j=0; j < dataColumns->cols; j++) {
			int var = omxVectorElement(dataColumns, j);
			if (!omxDataColumnIsFactor(data, var)) continue;
			if (j < int(thresholdCols.size()) && thresholdCols[j].numThresholds > 0) { // j is an ordinal column
				omxMatrix* nextMatrix = thresholdCols[j].matrix;
				omxRecompute(nextMatrix, fc);
				checkIncreasing(nextMatrix, thresholdCols[j].column, thresholdCols[j].numThresholds, fc);
				for(int index = 0; index < numChildren; index++) {
					FitContext *kid = fc->childList[index];
					omxMatrix *target = kid->lookupDuplicate(nextMatrix);
					omxCopyMatrix(target, nextMatrix);
				}
			} else {
				Rf_error("No threshold given for ordinal column '%s'",
					 omxDataColumnName(data, j));
			}
		}

		double *corList 	= ofiml->corList;
		double *weights		= ofiml->weights;

		if (corList) {
			omxStandardizeCovMatrix(cov, corList, weights, fc);	// Calculate correlation and covariance
		}
		for(int index = 0; index < numChildren; index++) {
			FitContext *kid = fc->childList[index];
			omxMatrix *childFit = kid->lookupDuplicate(fitMatrix);
			omxFIMLFitFunction* childOfiml = ((omxFIMLFitFunction*) childFit->fitFunction->argStruct);
			omxCopyMatrix(childOfiml->cov, cov);
			omxCopyMatrix(childOfiml->means, means);
			if (corList) {
				memcpy(childOfiml->weights, weights, sizeof(double) * cov->rows);
				memcpy(childOfiml->corList, corList, sizeof(double) * (cov->rows * (cov->rows - 1)) / 2);
			}
		}
		if(OMX_DEBUG) { omxPrintMatrix(cov, "Cov"); }
		if(OMX_DEBUG) { omxPrintMatrix(means, "Means"); }
    }

	memset(ofiml->rowLogLikelihoods->data, 0, sizeof(double) * data->rows);
    
	int parallelism = (numChildren == 0) ? 1 : numChildren;

	if (parallelism > data->rows) {
		parallelism = data->rows;
	}

	FIMLSingleIterationType singleIter = ofiml->SingleIterFn;

	bool failed = false;
	if (parallelism > 1) {
		int stride = (data->rows / parallelism);

#pragma omp parallel for num_threads(parallelism) reduction(||:failed)
		for(int i = 0; i < parallelism; i++) {
			FitContext *kid = fc->childList[i];
			omxMatrix *childMatrix = kid->lookupDuplicate(fitMatrix);
			omxFitFunction *childFit = childMatrix->fitFunction;
			if (i == parallelism - 1) {
				failed |= singleIter(kid, childFit, off, stride * i, data->rows - stride * i);
			} else {
				failed |= singleIter(kid, childFit, off, stride * i, stride);
			}
		}
	} else {
		failed |= singleIter(fc, off, off, 0, data->rows);
	}
	if (failed) {
		omxSetMatrixElement(off->matrix, 0, 0, NA_REAL);
		return;
	}

	if(!returnRowLikelihoods) {
		double val, sum = 0.0;
		// floating-point addition is not associative,
		// so we serialized the following reduction operation.
		for(int i = 0; i < data->rows; i++) {
			val = omxVectorElement(ofiml->rowLogLikelihoods, i);
//			mxLog("%d , %f, %llx\n", i, val, *((unsigned long long*) &val));
			sum += val;
		}	
		if(OMX_DEBUG) {mxLog("Total Likelihood is %3.3f", sum);}
		omxSetMatrixElement(off->matrix, 0, 0, sum);
	}
}
Esempio n. 3
0
void omxInitExpectationBA81(omxExpectation* oo) {
	omxState* currentState = oo->currentState;	
	SEXP rObj = oo->rObj;
	SEXP tmp;
	
	if(OMX_DEBUG) {
		mxLog("Initializing %s.", oo->name);
	}
	if (!Glibrpf_model) {
#if USE_EXTERNAL_LIBRPF
		get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
		(*get_librpf)(LIBIFA_RPF_API_VERSION, &Glibrpf_numModels, &Glibrpf_model);
#else
		// if linking against included source code
		Glibrpf_numModels = librpf_numModels;
		Glibrpf_model = librpf_model;
#endif
	}
	
	BA81Expect *state = new BA81Expect;

	// These two constants should be as identical as possible
	state->name = oo->name;
	if (0) {
		state->LogLargestDouble = 0.0;
		state->LargestDouble = 1.0;
	} else {
		state->LogLargestDouble = log(std::numeric_limits<double>::max()) - 1;
		state->LargestDouble = exp(state->LogLargestDouble);
		ba81NormalQuad &quad = state->getQuad();
		quad.setOne(state->LargestDouble);
	}

	state->expectedUsed = false;

	state->estLatentMean = NULL;
	state->estLatentCov = NULL;
	state->type = EXPECTATION_OBSERVED;
	state->itemParam = NULL;
	state->EitemParam = NULL;
	state->itemParamVersion = 0;
	state->latentParamVersion = 0;
	oo->argStruct = (void*) state;

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("data")));
	state->data = omxDataLookupFromState(tmp, currentState);
	}

	if (strcmp(omxDataType(state->data), "raw") != 0) {
		omxRaiseErrorf("%s unable to handle data type %s", oo->name, omxDataType(state->data));
		return;
	}

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("verbose")));
	state->verbose = Rf_asInteger(tmp);
	}

	int targetQpoints;
	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qpoints")));
		targetQpoints = Rf_asInteger(tmp);
	}

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qwidth")));
	state->grp.setGridFineness(Rf_asReal(tmp), targetQpoints);
	}

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("ItemSpec")));
	state->grp.importSpec(tmp);
	if (state->verbose >= 2) mxLog("%s: found %d item specs", oo->name, state->numItems());
	}

	state->_latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean");
	state->_latentCovOut  = omxNewMatrixFromSlot(rObj, currentState, "cov");

	state->itemParam = omxNewMatrixFromSlot(rObj, currentState, "item");
	state->grp.param = state->itemParam->data; // algebra not allowed yet TODO

	const int numItems = state->itemParam->cols;
	if (state->numItems() != numItems) {
		omxRaiseErrorf("ItemSpec length %d must match the number of item columns (%d)",
			       state->numItems(), numItems);
		return;
	}
	if (state->itemParam->rows != state->grp.impliedParamRows) {
		omxRaiseErrorf("item matrix must have %d rows", state->grp.impliedParamRows);
		return;
	}
	state->grp.paramRows = state->itemParam->rows;

	// for algebra item param, will need to defer until later?
	state->grp.learnMaxAbilities();

	int maxAbilities = state->grp.itemDims;
	state->grp.setFactorNames(state->itemParam->rownames);

	{
		ProtectedSEXP tmp2(R_do_slot(rObj, Rf_install(".detectIndependence")));
		state->grp.detectIndependence = Rf_asLogical(tmp2);
	}

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("EstepItem")));
	if (!Rf_isNull(tmp)) {
		int rows, cols;
		getMatrixDims(tmp, &rows, &cols);
		if (rows != state->itemParam->rows || cols != state->itemParam->cols) {
			Rf_error("EstepItem must have the same dimensions as the item MxMatrix");
		}
		state->EitemParam = REAL(tmp);
	}
	}

	oo->computeFun = ba81compute;
	oo->setVarGroup = ignoreSetVarGroup;
	oo->destructFun = ba81Destroy;
	oo->populateAttrFun = ba81PopulateAttributes;
	oo->componentFun = getComponent;
	oo->canDuplicate = false;
	
	// TODO: Exactly identical rows do not contribute any information.
	// The sorting algorithm ought to remove them so we get better cache behavior.
	// The following summary stats would be cheaper to calculate too.

	omxData *data = state->data;
	if (data->hasDefinitionVariables()) Rf_error("%s: not implemented yet", oo->name);

	std::vector<int> &rowMap = state->grp.rowMap;

	int weightCol;
	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("weightColumn")));
		weightCol = INTEGER(tmp)[0];
	}

	if (weightCol == NA_INTEGER) {
		// Should rowMap be part of omxData? This is essentially a
		// generic compression step that shouldn't be specific to IFA models.
		state->grp.rowWeight = (double*) R_alloc(data->rows, sizeof(double));
		rowMap.resize(data->rows);
		int numUnique = 0;
		for (int rx=0; rx < data->rows; ) {
			int rw = 1;
			state->grp.rowWeight[numUnique] = rw;
			rowMap[numUnique] = rx;
			rx += rw;
			++numUnique;
		}
		rowMap.resize(numUnique);
		state->weightSum = state->data->rows;
	}
	else {
		if (omxDataColumnIsFactor(data, weightCol)) {
			omxRaiseErrorf("%s: weightColumn %d is a factor", oo->name, 1 + weightCol);
			return;
		}
		state->grp.rowWeight = omxDoubleDataColumn(data, weightCol);
		state->weightSum = 0;
		for (int rx=0; rx < data->rows; ++rx) { state->weightSum += state->grp.rowWeight[rx]; }
		rowMap.resize(data->rows);
		for (size_t rx=0; rx < rowMap.size(); ++rx) {
			rowMap[rx] = rx;
		}
	}
	// complain about non-integral rowWeights (EAP can't work) TODO

	auto colMap = oo->getDataColumns();

	for (int cx = 0; cx < numItems; cx++) {
		int *col = omxIntDataColumnUnsafe(data, colMap[cx]);
		state->grp.dataColumns.push_back(col);
	}

	// sanity check data
	for (int cx = 0; cx < numItems; cx++) {
		if (!omxDataColumnIsFactor(data, colMap[cx])) {
			data->omxPrintData("diagnostic", 3);
			omxRaiseErrorf("%s: column %d is not a factor", oo->name, int(1 + colMap[cx]));
			return;
		}
	}

	// TODO the max outcome should be available from omxData
	for (int rx=0; rx < data->rows; rx++) {
		int cols = 0;
		for (int cx = 0; cx < numItems; cx++) {
			const int *col = state->grp.dataColumns[cx];
			int pick = col[rx];
			if (pick == NA_INTEGER) continue;
			++cols;
			const int no = state->grp.itemOutcomes[cx];
			if (pick > no) {
				Rf_error("Data for item '%s' has at least %d outcomes, not %d",
					 state->itemParam->colnames[cx], pick, no);
			}
		}
		if (cols == 0) {
			Rf_error("Row %d has all NAs", 1+rx);
		}
	}

	if (state->_latentMeanOut && state->_latentMeanOut->rows * state->_latentMeanOut->cols != maxAbilities) {
		Rf_error("The mean matrix '%s' must be a row or column vector of size %d",
			 state->_latentMeanOut->name(), maxAbilities);
	}

	if (state->_latentCovOut && (state->_latentCovOut->rows != maxAbilities ||
				    state->_latentCovOut->cols != maxAbilities)) {
		Rf_error("The cov matrix '%s' must be %dx%d",
			 state->_latentCovOut->name(), maxAbilities, maxAbilities);
	}

	state->grp.setLatentDistribution(state->_latentMeanOut? state->_latentMeanOut->data : NULL,
					 state->_latentCovOut? state->_latentCovOut->data : NULL);

	{
		EigenArrayAdaptor Eparam(state->itemParam);
		Eigen::Map< Eigen::VectorXd > meanVec(state->grp.mean, maxAbilities);
		Eigen::Map< Eigen::MatrixXd > covMat(state->grp.cov, maxAbilities, maxAbilities);
		state->grp.quad.setStructure(state->grp.qwidth, state->grp.qpoints,
					     Eparam, meanVec, covMat);
	}

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("minItemsPerScore")));
	state->grp.setMinItemsPerScore(Rf_asInteger(tmp));
	}

	state->grp.buildRowSkip();

	if (isErrorRaised()) return;

	{ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("debugInternal")));
	state->debugInternal = Rf_asLogical(tmp);
	}

	state->ElatentVersion = 0;
	if (state->_latentMeanOut) {
		state->estLatentMean = omxInitMatrix(maxAbilities, 1, TRUE, currentState);
		omxCopyMatrix(state->estLatentMean, state->_latentMeanOut); // rename matrices TODO
	}
	if (state->_latentCovOut) {
		state->estLatentCov = omxInitMatrix(maxAbilities, maxAbilities, TRUE, currentState);
		omxCopyMatrix(state->estLatentCov, state->_latentCovOut);
	}
}