Ejemplo n.º 1
0
 Factor::Factor(SEXP r_factor)
     : values_(Rf_length(r_factor)),
       levels_(new CatKey(GetFactorLevels(r_factor)))
 {
   if (Rf_isFactor(r_factor)) {
     int * factor_numeric_values = INTEGER(r_factor);
     for (int i = 0; i < values_.size(); ++i) {
       values_[i] = factor_numeric_values[i] - 1;
     }
   } else {
     report_error("A C++ Factor can only be created from an R factor.");
   }
 }
Ejemplo n.º 2
0
CharacterVector reencode_char(SEXP x) {
  if (Rf_isFactor(x)) return reencode_factor(x);

  CharacterVector xc(x);
  R_xlen_t first = get_first_reencode_pos(xc);
  if (first >= xc.length()) return x;

  CharacterVector ret(Rf_duplicate(xc));

  R_xlen_t len = ret.length();
  for (R_xlen_t i = first; i < len; ++i) {
    SEXP reti = ret[i];
    if (reti != NA_STRING && !IS_ASCII(reti) && !IS_UTF8(reti)) {
      ret[i] = String(Rf_translateCharUTF8(reti), CE_UTF8);
    }
  }

  return ret;
}
Ejemplo n.º 3
0
void ifaGroup::import(SEXP Rlist)
{
	SEXP argNames;
	Rf_protect(argNames = Rf_getAttrib(Rlist, R_NamesSymbol));
	if (Rf_length(Rlist) != Rf_length(argNames)) {
		mxThrow("All list elements must be named");
	}

	std::vector<const char *> dataColNames;

	paramRows = -1;
	int pmatCols=-1;
	int mips = 1;
	int dataRows = 0;
	SEXP Rmean=0, Rcov=0;

	for (int ax=0; ax < Rf_length(Rlist); ++ax) {
		const char *key = R_CHAR(STRING_ELT(argNames, ax));
		SEXP slotValue = VECTOR_ELT(Rlist, ax);
		if (strEQ(key, "spec")) {
			importSpec(slotValue);
		} else if (strEQ(key, "param")) {
			if (!Rf_isReal(slotValue)) mxThrow("'param' must be a numeric matrix of item parameters");
			param = REAL(slotValue);
			getMatrixDims(slotValue, &paramRows, &pmatCols);

			SEXP dimnames;
			Rf_protect(dimnames = Rf_getAttrib(slotValue, R_DimNamesSymbol));
			if (!Rf_isNull(dimnames) && Rf_length(dimnames) == 2) {
				SEXP names;
				Rf_protect(names = VECTOR_ELT(dimnames, 0));
				int nlen = Rf_length(names);
				factorNames.resize(nlen);
				for (int nx=0; nx < nlen; ++nx) {
					factorNames[nx] = CHAR(STRING_ELT(names, nx));
				}
				Rf_protect(names = VECTOR_ELT(dimnames, 1));
				nlen = Rf_length(names);
				itemNames.resize(nlen);
				for (int nx=0; nx < nlen; ++nx) {
					itemNames[nx] = CHAR(STRING_ELT(names, nx));
				}
			}
		} else if (strEQ(key, "mean")) {
			Rmean = slotValue;
			if (!Rf_isReal(slotValue)) mxThrow("'mean' must be a numeric vector or matrix");
			mean = REAL(slotValue);
		} else if (strEQ(key, "cov")) {
			Rcov = slotValue;
			if (!Rf_isReal(slotValue)) mxThrow("'cov' must be a numeric matrix");
			cov = REAL(slotValue);
		} else if (strEQ(key, "data")) {
			Rdata = slotValue;
			dataRows = Rf_length(VECTOR_ELT(Rdata, 0));

			SEXP names;
			Rf_protect(names = Rf_getAttrib(Rdata, R_NamesSymbol));
			int nlen = Rf_length(names);
			dataColNames.reserve(nlen);
			for (int nx=0; nx < nlen; ++nx) {
				dataColNames.push_back(CHAR(STRING_ELT(names, nx)));
			}
			Rf_protect(dataRowNames = Rf_getAttrib(Rdata, R_RowNamesSymbol));
		} else if (strEQ(key, "weightColumn")) {
			if (Rf_length(slotValue) != 1) {
				mxThrow("You can only have one %s", key);
			}
			weightColumnName = CHAR(STRING_ELT(slotValue, 0));
		} else if (strEQ(key, "freqColumn")) {
			if (Rf_length(slotValue) != 1) {
				mxThrow("You can only have one %s", key);
			}
			freqColumnName = CHAR(STRING_ELT(slotValue, 0));
		} else if (strEQ(key, "qwidth")) {
			qwidth = Rf_asReal(slotValue);
		} else if (strEQ(key, "qpoints")) {
			qpoints = Rf_asInteger(slotValue);
		} else if (strEQ(key, "minItemsPerScore")) {
			mips = Rf_asInteger(slotValue);
		} else {
			// ignore
		}
	}

	learnMaxAbilities();

	if (itemDims < (int) factorNames.size())
		factorNames.resize(itemDims);

	if (int(factorNames.size()) < itemDims) {
		factorNames.reserve(itemDims);
		const int SMALLBUF = 24;
		char buf[SMALLBUF];
		while (int(factorNames.size()) < itemDims) {
			snprintf(buf, SMALLBUF, "s%d", int(factorNames.size()) + 1);
			factorNames.push_back(CHAR(Rf_mkChar(buf)));
		}
	}

	if (Rmean) {
		if (Rf_isMatrix(Rmean)) {
			int nrow, ncol;
			getMatrixDims(Rmean, &nrow, &ncol);
			if (!(nrow * ncol == itemDims && (nrow==1 || ncol==1))) {
				mxThrow("mean must be a column or row matrix of length %d", itemDims);
			}
		} else {
			if (Rf_length(Rmean) != itemDims) {
				mxThrow("mean must be a vector of length %d", itemDims);
			}
		}

		verifyFactorNames(Rmean, "mean");
	}

	if (Rcov) {
		if (Rf_isMatrix(Rcov)) {
			int nrow, ncol;
			getMatrixDims(Rcov, &nrow, &ncol);
			if (nrow != itemDims || ncol != itemDims) {
				mxThrow("cov must be %dx%d matrix", itemDims, itemDims);
			}
		} else {
			if (Rf_length(Rcov) != 1) {
				mxThrow("cov must be %dx%d matrix", itemDims, itemDims);
			}
		}

		verifyFactorNames(Rcov, "cov");
	}

	setLatentDistribution(mean, cov);

	setMinItemsPerScore(mips);

	if (numItems() != pmatCols) {
		mxThrow("item matrix implies %d items but spec is length %d",
			 pmatCols, numItems());
	}

	if (Rdata) {
		if (itemNames.size() == 0) mxThrow("Item matrix must have colnames");
		for (int ix=0; ix < numItems(); ++ix) {
			bool found=false;
			for (int dc=0; dc < int(dataColNames.size()); ++dc) {
				if (strEQ(itemNames[ix], dataColNames[dc])) {
					SEXP col = VECTOR_ELT(Rdata, dc);
					if (!Rf_isFactor(col)) {
						if (TYPEOF(col) == INTSXP) {
							mxThrow("Column '%s' is an integer but "
								 "not an ordered factor",
								 dataColNames[dc]);
						} else {
							mxThrow("Column '%s' is of type %s; expecting an "
								 "ordered factor (integer)",
								 dataColNames[dc], Rf_type2char(TYPEOF(col)));
						}
					}
					dataColumns.push_back(INTEGER(col));
					found=true;
					break;
				}
			}
			if (!found) {
				mxThrow("Cannot find item '%s' in data", itemNames[ix]);
			}
		}
		if (weightColumnName) {
			for (int dc=0; dc < int(dataColNames.size()); ++dc) {
				if (strEQ(weightColumnName, dataColNames[dc])) {
					SEXP col = VECTOR_ELT(Rdata, dc);
					if (TYPEOF(col) != REALSXP) {
						mxThrow("Column '%s' is of type %s; expecting type numeric (double)",
							 dataColNames[dc], Rf_type2char(TYPEOF(col)));
					}
					rowWeight = REAL(col);
					break;
				}
			}
			if (!rowWeight) {
				mxThrow("Cannot find weight column '%s'", weightColumnName);
			}
		}
		if (freqColumnName) {
			for (int dc=0; dc < int(dataColNames.size()); ++dc) {
				if (strEQ(freqColumnName, dataColNames[dc])) {
					SEXP col = VECTOR_ELT(Rdata, dc);
					if (TYPEOF(col) != INTSXP) {
						mxThrow("Column '%s' is of type %s; expecting type integer",
							 dataColNames[dc], Rf_type2char(TYPEOF(col)));
					}
					rowFreq = INTEGER(col);
					break;
				}
			}
			if (!rowFreq) {
				mxThrow("Cannot find frequency column '%s'", freqColumnName);
			}
		}
		rowMap.reserve(dataRows);
		for (int rx=0; rx < dataRows; ++rx) rowMap.push_back(rx);
	}

	Eigen::Map< Eigen::ArrayXXd > Eparam(param, paramRows, numItems());
	Eigen::Map< Eigen::VectorXd > meanVec(mean, itemDims);
	Eigen::Map< Eigen::MatrixXd > covMat(cov, itemDims, itemDims);

	quad.setStructure(qwidth, qpoints, Eparam, meanVec, covMat);

	if (paramRows < impliedParamRows) {
		mxThrow("At least %d rows are required in the item parameter matrix, only %d found",
			 impliedParamRows, paramRows);
	}
	
	quad.refresh(meanVec, covMat);
}
Ejemplo n.º 4
0
MetaData::MetaData (SEXP xSEXP, SEXP ySEXP)
/*
 * Obtain meta data directly from data set.
 *
 * Used for training.
 *
 * Assume that the target variable is the last variable,
 * and no unused variables are in argument <data>
 */
{
    Rcpp::DataFrame data(xSEXP);
    nvars_ = data.size();

    if (nvars_ == 0) throw std::range_error(EMPTY_DATASET_MSG);

    feature_vars_ = idx_vec(nvars_);
    var_names_    = name_vec(nvars_);
    var_types_    = type_vec(nvars_);

    Rcpp::CharacterVector vnames(data.names());
    for (int vindex = 0; vindex < nvars_; vindex++) {
        // Store the names of feature variables.
        var_names_[vindex] = Rcpp::as<string>((SEXPREC*)vnames[vindex]);
    }

    for (int vindex = 0; vindex < nvars_; vindex++) {
        if (Rf_isFactor((SEXPREC*)data[vindex])) {
            // Store the levels of factor variables.
            Rcpp::IntegerVector vals(data[vindex]);
            Rcpp::CharacterVector levels(vals.attr("levels"));
            int nlevels = levels.size();

            name_value_map namevals;
            name_vec levnames(nlevels);
            for (int lindex = 0; lindex < nlevels; lindex++) {
                string name = Rcpp::as<string>((SEXPREC*)levels[lindex]);
                namevals.insert(name_value_map::value_type(name, lindex));  // Here, factor values starts from 0.
                levnames[lindex] = name;
            }
            var_values_[vindex].swap(namevals);
            val_names_[vindex].swap(levnames);
            var_types_[vindex] = DISCRETE;
        } else {
            var_types_[vindex] = TYPEOF((SEXPREC*)data[vindex]);
        }
    }

    // Store the levels of the target variable.
    Rcpp::IntegerVector vals(ySEXP);
    Rcpp::CharacterVector levels(vals.attr("levels"));
    int nlevels = levels.size();

    name_value_map namevals;
    name_vec levnames(nlevels);
    for (int lindex = 0; lindex < nlevels; lindex++) {
        string name = Rcpp::as<string>((SEXPREC*)levels[lindex]);
        namevals.insert(name_value_map::value_type(name, lindex));
        levnames[lindex] = name;
    }
    var_values_[nvars_].swap(namevals);
    val_names_[nvars_].swap(levnames);

    nlabels_ = val_names_[nvars_].size();

    // Store the indexes of feature variables.
    for (int i = 0; i < nvars_; ++i)
        feature_vars_[i] = i;
}
Ejemplo n.º 5
0
char *nvimcom_browser_line(SEXP *x, const char *xname, const char *curenv, const char *prefix, char *p)
{
    char xclass[64];
    char newenv[512];
    char curenvB[512];
    char ebuf[64];
    char pre[128];
    char newpre[128];
    int len;
    const char *ename;
    SEXP listNames, label, lablab, eexp, elmt = R_NilValue;
    SEXP cmdSexp, cmdexpr, ans, cmdSexp2, cmdexpr2;
    ParseStatus status, status2;
    int er = 0;
    char buf[128];

    if(strlen(xname) > 64)
        return p;

    if(obbrbufzise < strlen(obbrbuf2) + 1024)
        p = nvimcom_grow_obbrbuf();

    p = nvimcom_strcat(p, prefix);
    if(Rf_isLogical(*x)){
        p = nvimcom_strcat(p, "%#");
        strcpy(xclass, "logical");
    } else if(Rf_isNumeric(*x)){
        p = nvimcom_strcat(p, "{#");
        strcpy(xclass, "numeric");
    } else if(Rf_isFactor(*x)){
        p = nvimcom_strcat(p, "'#");
        strcpy(xclass, "factor");
    } else if(Rf_isValidString(*x)){
        p = nvimcom_strcat(p, "\"#");
        strcpy(xclass, "character");
    } else if(Rf_isFunction(*x)){
        p = nvimcom_strcat(p, "(#");
        strcpy(xclass, "function");
    } else if(Rf_isFrame(*x)){
        p = nvimcom_strcat(p, "[#");
        strcpy(xclass, "data.frame");
    } else if(Rf_isNewList(*x)){
        p = nvimcom_strcat(p, "[#");
        strcpy(xclass, "list");
    } else if(Rf_isS4(*x)){
        p = nvimcom_strcat(p, "<#");
        strcpy(xclass, "s4");
    } else if(TYPEOF(*x) == PROMSXP){
        p = nvimcom_strcat(p, "&#");
        strcpy(xclass, "lazy");
    } else {
        p = nvimcom_strcat(p, "=#");
        strcpy(xclass, "other");
    }

    PROTECT(lablab = allocVector(STRSXP, 1));
    SET_STRING_ELT(lablab, 0, mkChar("label"));
    PROTECT(label = getAttrib(*x, lablab));
    p = nvimcom_strcat(p, xname);
    p = nvimcom_strcat(p, "\t");
    if(length(label) > 0){
        if(Rf_isValidString(label)){
            snprintf(buf, 127, "%s", CHAR(STRING_ELT(label, 0)));
            p = nvimcom_strcat(p, buf);
        } else {
            if(labelerr)
                p = nvimcom_strcat(p, "Error: label isn't \"character\".");
        }
    }
    p = nvimcom_strcat(p, "\n");
    UNPROTECT(2);

    if(strcmp(xclass, "list") == 0 || strcmp(xclass, "data.frame") == 0 || strcmp(xclass, "s4") == 0){
        strncpy(curenvB, curenv, 500);
        if(xname[0] == '[' && xname[1] == '['){
            curenvB[strlen(curenvB) - 1] = 0;
        }
        if(strcmp(xclass, "s4") == 0)
            snprintf(newenv, 500, "%s%s@", curenvB, xname);
        else
            snprintf(newenv, 500, "%s%s$", curenvB, xname);
        if((nvimcom_get_list_status(newenv, xclass) == 1)){
            len = strlen(prefix);
            if(nvimcom_is_utf8){
                int j = 0, i = 0;
                while(i < len){
                    if(prefix[i] == '\xe2'){
                        i += 3;
                        if(prefix[i-1] == '\x80' || prefix[i-1] == '\x94'){
                            pre[j] = ' '; j++;
                        } else {
                            pre[j] = '\xe2'; j++;
                            pre[j] = '\x94'; j++;
                            pre[j] = '\x82'; j++;
                        }
                    } else {
                        pre[j] = prefix[i];
                        i++, j++;
                    }
                }
                pre[j] = 0;
            } else {
                for(int i = 0; i < len; i++){
                    if(prefix[i] == '-' || prefix[i] == '`')
                        pre[i] = ' ';
                    else
                        pre[i] = prefix[i];
                }
                pre[len] = 0;
            }
            sprintf(newpre, "%s%s", pre, strT);

            if(strcmp(xclass, "s4") == 0){
                snprintf(buf, 127, "slotNames(%s%s)", curenvB, xname);
                PROTECT(cmdSexp = allocVector(STRSXP, 1));
                SET_STRING_ELT(cmdSexp, 0, mkChar(buf));
                PROTECT(cmdexpr = R_ParseVector(cmdSexp, -1, &status, R_NilValue));

                if (status != PARSE_OK) {
                    p = nvimcom_strcat(p, "nvimcom error: invalid value in slotNames(");
                    p = nvimcom_strcat(p, xname);
                    p = nvimcom_strcat(p, ")\n");
                } else {
                    PROTECT(ans = R_tryEval(VECTOR_ELT(cmdexpr, 0), R_GlobalEnv, &er));
                    if(er){
                        p = nvimcom_strcat(p, "nvimcom error: ");
                        p = nvimcom_strcat(p, xname);
                        p = nvimcom_strcat(p, "\n");
                    } else {
                        len = length(ans);
                        if(len > 0){
                            int len1 = len - 1;
                            for(int i = 0; i < len; i++){
                                ename = CHAR(STRING_ELT(ans, i));
                                snprintf(buf, 127, "%s%s@%s", curenvB, xname, ename);
                                PROTECT(cmdSexp2 = allocVector(STRSXP, 1));
                                SET_STRING_ELT(cmdSexp2, 0, mkChar(buf));
                                PROTECT(cmdexpr2 = R_ParseVector(cmdSexp2, -1, &status2, R_NilValue));
                                if (status2 != PARSE_OK) {
                                    p = nvimcom_strcat(p, "nvimcom error: invalid code \"");
                                    p = nvimcom_strcat(p, xname);
                                    p = nvimcom_strcat(p, "@");
                                    p = nvimcom_strcat(p, ename);
                                    p = nvimcom_strcat(p, "\"\n");
                                } else {
                                    PROTECT(elmt = R_tryEval(VECTOR_ELT(cmdexpr2, 0), R_GlobalEnv, &er));
                                    if(i == len1)
                                        sprintf(newpre, "%s%s", pre, strL);
                                    p = nvimcom_browser_line(&elmt, ename, newenv, newpre, p);
                                    UNPROTECT(1);
                                }
                                UNPROTECT(2);
                            }
                        }
                    }
                    UNPROTECT(1);
                }
                UNPROTECT(2);
            } else {
                PROTECT(listNames = getAttrib(*x, R_NamesSymbol));
                len = length(listNames);
                if(len == 0){ /* Empty list? */
                    int len1 = length(*x);
                    if(len1 > 0){ /* List without names */
                        len1 -= 1;
                        for(int i = 0; i < len1; i++){
                            sprintf(ebuf, "[[%d]]", i + 1);
                            elmt = VECTOR_ELT(*x, i);
                            p = nvimcom_browser_line(&elmt, ebuf, newenv, newpre, p);
                        }
                        sprintf(newpre, "%s%s", pre, strL);
                        sprintf(ebuf, "[[%d]]", len1 + 1);
                        PROTECT(elmt = VECTOR_ELT(*x, len));
                        p = nvimcom_browser_line(&elmt, ebuf, newenv, newpre, p);
                        UNPROTECT(1);
                    }
                } else { /* Named list */
                    len -= 1;
                    for(int i = 0; i < len; i++){
                        PROTECT(eexp = STRING_ELT(listNames, i));
                        ename = CHAR(eexp);
                        UNPROTECT(1);
                        if(ename[0] == 0){
                            sprintf(ebuf, "[[%d]]", i + 1);
                            ename = ebuf;
                        }
                        PROTECT(elmt = VECTOR_ELT(*x, i));
                        p = nvimcom_browser_line(&elmt, ename, newenv, newpre, p);
                        UNPROTECT(1);
                    }
                    sprintf(newpre, "%s%s", pre, strL);
                    ename = CHAR(STRING_ELT(listNames, len));
                    if(ename[0] == 0){
                        sprintf(ebuf, "[[%d]]", len + 1);
                        ename = ebuf;
                    }
                    PROTECT(elmt = VECTOR_ELT(*x, len));
                    p = nvimcom_browser_line(&elmt, ename, newenv, newpre, p);
                    UNPROTECT(1);
                }
                UNPROTECT(1); /* listNames */
            }
        }
    }
    return p;
}