void CSVImporter::initColumn(Column &column, const string &name, const vector<string> &cells) { // we treat single spaces as missing values, because SPSS saves missing values as a single space in CSV files column.setName(name); // try to make the column nominal bool success = true; set<int> uniqueValues; Column::Ints::iterator intInputItr = column.AsInts.begin(); Labels &labels = column.labels(); labels.clear(); BOOST_FOREACH(const string &value, cells) { if (value != "NaN" && value != "nan" && value != "" && value != " ") { try { int v = lexical_cast<int>(value); uniqueValues.insert(v); *intInputItr = v; } catch (...) { // column can't be made nominal numeric success = false; break; } } else { *intInputItr = INT_MIN; } intInputItr++; } if (success && uniqueValues.size() <= 24) { labels.clear(); BOOST_FOREACH(int value, uniqueValues) { (void)uniqueValues; labels.add(value); }
void SPSSImporter::setColumnLabeledData(Column &column, size_t numCases, const SPSSColumn &spssCol) { column.labels().clear(); // Add lables from the SPSS file first. map<double, string> lbs; for (SPSSColumn::LabelByValueDict::const_iterator it = spssCol.spssLables.begin(); it != spssCol.spssLables.end(); ++it) lbs.insert( pair<double, string>(it->first.dbl, it->second) ); // Add labels for numeric values (if not already present).. vector<bool> isMissing(numCases); for (size_t i = 0; i < numCases; ++i) { if (spssCol.missingChecker().isMissingValue(_floatInfo, spssCol.numerics[i]) == false) { isMissing[i] = false; if (lbs.find(spssCol.numerics[i]) == lbs.end()) lbs.insert( pair<double, string>( spssCol.numerics[i], spssCol.format(spssCol.numerics[i], _floatInfo)) ); } else isMissing[i] = true; } // Extract the data were are going to use. vector<int> dataToInsert; map<int, string> labels; // We cannot insert doubles as data valuesm and get labels // for them to work (JASP limitation). if (spssCol.containsFraction()) { // Generate an index value for each data point. for (size_t i = 0; i < numCases; ++i) { // Find insert the index as a data point, if not missing value. if (isMissing[i]) dataToInsert.push_back(INT_MIN); else { map<double, string>::iterator fltLabeI = lbs.find(spssCol.numerics[i]); dataToInsert.push_back( distance(lbs.begin(), fltLabeI) ); // Pair the inserted value with a lable string. labels.insert(pair<int, string>(dataToInsert.back(), fltLabeI->second)); } } } else { // Use the raw data as the index to labels. for (size_t i = 0; i < numCases; ++i) { // insert the (rounded) value as the data point. if (isMissing[i]) dataToInsert.push_back(INT_MIN); else { dataToInsert.push_back( static_cast<int>(spssCol.numerics[i]) ); map<double, string>::iterator fltLabeI = lbs.find(spssCol.numerics[i]); // pair the inserted value with a lable string. labels.insert(pair<int, string>( static_cast<int>(fltLabeI->first), fltLabeI->second)); } } } // Insert the labels into the JASP data set. for (map<int, string>::const_iterator it = labels.begin(); it != labels.end(); ++it) column.labels().add(it->first, it->second); // Insert the data into the data set. Column::Ints::iterator intInputItr = column.AsInts.begin(); for (size_t i = 0; i < dataToInsert.size(); ++i, ++intInputItr) *intInputItr = dataToInsert[i]; }