Пример #1
0
void CSVImporter::initColumn(Column &column, const string &name, const vector<string> &cells)
{
	// we treat single spaces as missing values, because SPSS saves missing values as a single space in CSV files

	column.setName(name);

	// try to make the column nominal

	bool success = true;
	set<int> uniqueValues;
	Column::Ints::iterator intInputItr = column.AsInts.begin();
	Labels &labels = column.labels();
	labels.clear();

	BOOST_FOREACH(const string &value, cells)
	{
		if (value != "NaN" && value != "nan" && value != "" && value != " ")
		{
			try
			{
				int v = lexical_cast<int>(value);
				uniqueValues.insert(v);
				*intInputItr = v;
			}
			catch (...)
			{
				// column can't be made nominal numeric

				success = false;
				break;
			}
		}
		else
		{
			*intInputItr = INT_MIN;
		}

		intInputItr++;
	}

	if (success && uniqueValues.size() <= 24)
	{
		labels.clear();

		BOOST_FOREACH(int value, uniqueValues)
		{
			(void)uniqueValues;
			labels.add(value);
		}
Пример #2
0
void SPSSImporter::setColumnLabeledData(Column &column, size_t numCases, const SPSSColumn &spssCol)
{
	column.labels().clear();

	// Add lables from the SPSS file first.
	map<double, string> lbs;
	for (SPSSColumn::LabelByValueDict::const_iterator it = spssCol.spssLables.begin();
			it != spssCol.spssLables.end(); ++it)
		lbs.insert( pair<double, string>(it->first.dbl, it->second) );

	// Add labels for numeric values (if not already present)..
	vector<bool> isMissing(numCases);
	for (size_t i = 0; i < numCases; ++i)
	{
		if (spssCol.missingChecker().isMissingValue(_floatInfo, spssCol.numerics[i]) == false)
		{
			isMissing[i] = false;
			if (lbs.find(spssCol.numerics[i]) == lbs.end())
				lbs.insert( pair<double, string>( spssCol.numerics[i], spssCol.format(spssCol.numerics[i], _floatInfo)) );
		}
		else
			isMissing[i] = true;
	}

	// Extract the data were are going to use.
	vector<int> dataToInsert;
	map<int, string> labels;
	// We cannot insert doubles as data valuesm and get labels
	// for them to work (JASP limitation).
	if (spssCol.containsFraction())
	{
		// Generate an index value for each data point.
		for (size_t i = 0; i < numCases; ++i)
		{
			// Find insert the index as a data point, if not missing value.
			if (isMissing[i])
				dataToInsert.push_back(INT_MIN);
			else
			{
				map<double, string>::iterator fltLabeI = lbs.find(spssCol.numerics[i]);
				dataToInsert.push_back( distance(lbs.begin(), fltLabeI) );
				// Pair the inserted value with a lable string.
				labels.insert(pair<int, string>(dataToInsert.back(), fltLabeI->second));
			}
		}
	}
	else
	{
		// Use the raw data as the index to labels.
		for (size_t i = 0; i < numCases; ++i)
		{
			// insert the (rounded) value as the data point.
			if (isMissing[i])
				dataToInsert.push_back(INT_MIN);
			else
			{
				dataToInsert.push_back( static_cast<int>(spssCol.numerics[i]) );
				map<double, string>::iterator fltLabeI = lbs.find(spssCol.numerics[i]);
				// pair the inserted value with a lable string.
				labels.insert(pair<int, string>( static_cast<int>(fltLabeI->first), fltLabeI->second));
			}
		}
	}

	// Insert the labels into the JASP data set.
	for (map<int, string>::const_iterator it = labels.begin(); it != labels.end(); ++it)
		column.labels().add(it->first, it->second);

	// Insert the data into the data set.
	Column::Ints::iterator intInputItr = column.AsInts.begin();
	for (size_t i = 0; i < dataToInsert.size(); ++i, ++intInputItr)
		*intInputItr = dataToInsert[i];
}