/**
 * @brief Clears the columns.
 *
 */
void FileHeaderRecord::process(SPSSColumns &columns)
{
	if (columns.size() != 0)
	{
		cout << "This file appears to have more than one file header record.\n"
				"  Only the last one found will be used." << endl;
		cout.flush();
	}
	columns.clear();

	// Check compression type.
	switch(compressed())
	{
	case compression_none:
	case compression_bytecode:
	case compression_zlib:
		break;
	default:
		throw runtime_error("Cannot find compression type for .SAV file.");
	}

	// Extract the number of cases.
	if (ncases() != -1)
		columns.numCases(ncases());
}
void ExtNumberCasesRecord::process(SPSSColumns &columns)
{

	// Extract the number of cases.
	if (ncases64() != -1L)
		columns.numCases(ncases64());
}
/**
 * @brief createCol Appends a colum to the vector.
 *
 */
void VariableRecord::process(SPSSColumns &columns)
{

	// check for string continuation.
	if (isStringContinuation())
	{
		if ((columns.size() != 0) && (columns[columns.size()-1].isString()))
			columns[columns.size()-1].columnSpan++;

		DEBUG_COUT5("Existing column ", columns[columns.size()-1].spssName, " spans ", columns[columns.size()-1].columnSpan, " cols.");

		return;
	}

	int32_t strLen = 0;
	int32_t measure = spss::Measures::measure_undefined;

	if (type() == 0)
		measure = spss::Measures::measure_continuous;

	else if (type() == -1)
		measure = spss::Measures::string_type;

	else
	{
		measure = spss::Measures::string_type;
		strLen = type();
	}

	{
		SPSSColumn col(name(), hasVarLabel()? label() : name(),
					  MissingValueChecker(n_missing_values(), missing_values()),
					  strLen, measure);
		columns.push_back(col);
	}

	DEBUG_COUT4("VariableRecord::process() - Added column ", columns.back().spssName, "/", columns.back().spssLabel);
}
/**
 * @brief Does nothing
 *
 */
void DictionaryTermination::process(SPSSColumns &columns)
{
	// inin the columns iterator.
	columns.resetCols();
}
Esempio n. 5
0
void SPSSImporter::loadDataSet(
		DataSetPackage *packageData,
		const std::string &locator,
		boost::function<void (const std::string &, int)> progress)
{
	(void)progress;

	packageData->isArchive = false;						 // SPSS/spss files are never archives.
	packageData->dataSet = SharedMemory::createDataSet();   // Do our space.

	killFhr();

	// Open the file.
	SPSSStream stream(locator.c_str(), ios::in | ios::binary);

	// Get it's size
	stream.seekg(0, stream.end);
	_fileSize = static_cast<double>(stream.tellg());
	stream.seekg(0, stream.beg);

	// Data we have scraped to date.
	SPSSColumns dictData;

	// Fetch the dictionary.
	bool processingDict = true;

	while(stream.good() && processingDict)
	{
		// Inform user of progress.
		reportFileProgress(stream.tellg(), progress);

		// Get the record type.
		union { int32_t u; RecordTypes t; Char_4 c; } rec_type;
		rec_type.u = rectype_unknown;
		stream.read((char *) &rec_type.u, sizeof(rec_type.u));
		// Endiness for rec_type, if known.
		if (_pFileHeaderRecord != 0)
			dictData.numericsConv().fixup(&rec_type.u);

		// ... and the record type type is....
		switch(rec_type.t)
		{
		case FileHeaderRecord::RECORD_TYPE:
			_pFileHeaderRecord = new FileHeaderRecord(dictData.numericsConv(), rec_type.t, stream);
			_pFileHeaderRecord->process(dictData);
			break;

		case VariableRecord::RECORD_TYPE:
		{
			VariableRecord record(dictData.numericsConv(), rec_type.t, _pFileHeaderRecord, stream);
			record.process(dictData);
		}
			break;

		case ValueLabelVarsRecord::RECORD_TYPE:
		{
			ValueLabelVarsRecord record(dictData.numericsConv(), rec_type.t, stream);
			record.process(dictData);
		}
			break;

		case rectype_meta_data: // Need to find the type of the data..
		{
			union { int32_t i; RecordSubTypes s; } sub_type;
			sub_type.s = recsubtype_unknown;
			stream.read((char *) &sub_type.i, sizeof(sub_type.i));
			dictData.numericsConv().fixup(&sub_type.i);
			switch (sub_type.s)
			{
			case  IntegerInfoRecord::SUB_RECORD_TYPE:
			{
				_integerInfo = IntegerInfoRecord(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				_integerInfo.process(dictData);
			}
				break;

			case FloatInfoRecord::SUB_RECORD_TYPE:
			{
				_floatInfo = FloatInfoRecord(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				_floatInfo.process(dictData);
			}
				break;

			case VarDisplayParamRecord::SUB_RECORD_TYPE:
			{
				VarDisplayParamRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, dictData.size(), stream);
				record.process(dictData);
			}
				break;

			case LongVarNamesRecord::SUB_RECORD_TYPE:
			{
				LongVarNamesRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				record.process(dictData);
			}
				break;

			case VeryLongStringRecord::SUB_RECORD_TYPE:
			{
				VeryLongStringRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				record.process(dictData);
			}
				break;

			case ExtNumberCasesRecord::SUB_RECORD_TYPE:
			{
				ExtNumberCasesRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				record.process(dictData);
			}
				break;

			case CharacterEncodingRecord::SUB_RECORD_TYPE:
			{
				CharacterEncodingRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream);
				record.process(dictData);
			}
				break;

			default:
			{
				MiscInfoRecord record(dictData.numericsConv(), sub_type.i, rec_type.t, stream);
				record.process(dictData);
			}
			}
		}
			break;

		case DocumentRecord::RECORD_TYPE:
		{
			DocumentRecord dummy(dictData.numericsConv(), rec_type.t, stream);
			dummy.process(dictData);
		}
			break;

		case DictionaryTermination::RECORD_TYPE:
		{
			DictionaryTermination dummy(dictData.numericsConv(), rec_type.t, stream);
			dummy.process(dictData);
		}
			processingDict = false; // Got end of dictionary.
			break;

		case rectype_unknown:
		default:
        {
			string msg("Unknown record type '"); msg.append(rec_type.c, sizeof(rec_type.c)); msg.append("' found.\n"
				"The SAV importer cannot yet read this file.\n"
				"Please report this error at \n"
				"https://github.com/jasp-stats/jasp-desktop/issues\n"
				"including a small sample .SAV file that produces this message.");
			throw runtime_error(msg);
			break;
		}
        }
	}

	//If we got a file header then..
	if (_pFileHeaderRecord == 0)
		throw runtime_error("No header found in .SAV file.");

	// Now convert the string in the header that we are interested in.,
	ConvertedStringContainer::processAllStrings(dictData.stringsConv());

	// read the data records from the file.
	DataRecords data(dictData.numericsConv(), *_pFileHeaderRecord, dictData, stream, progress);
	data.read(packageData);

	dictData.processStringsPostLoad(progress);


	DEBUG_COUT5("Read ", data.numDbls(), " doubles and ", data.numStrs(), " string cells.");


	// bail if unknown number of cases.
	if (dictData.hasNoCases())
		throw runtime_error("Found no cases in .SAV file.");

	// Set the data size
	setDataSetSize(*packageData, dictData.numCases(), dictData.size());

	bool success;
	do {

		success = true;
		try {
			// Now go fetch the data.
			int jaspColCount = 0;
			for (SPSSDictionary::iterator colI = dictData.begin();
				 (colI != dictData.end() ) && (jaspColCount < packageData->dataSet->columnCount());
				 ++colI, ++jaspColCount)
			{
				SPSSColumn &spssCol = colI->second;
				Column &column = packageData->dataSet->column(jaspColCount);

				DEBUG_COUT3("Getting col: ", spssCol.spssRawColName(), ".\n");

				column.setName(spssCol.spssColumnLabel());
				column.setColumnType( spssCol.getJaspColumnType() );
				column.labels().clear();

				switch(column.columnType())
				{
				default:	// Skip unknown columns
					break;
				case Column::ColumnTypeScale:
					setColumnScaleData(column, dictData.numCases(), spssCol);
					break;

				case Column::ColumnTypeNominal:
				case Column::ColumnTypeOrdinal:
					setColumnLabeledData(column, dictData.numCases(), spssCol);
					break;

				case Column::ColumnTypeNominalText:
					switch(spssCol.cellType())
					{
					case SPSSColumn::cellString:
						setColumnConvrtStringData(column, dictData.stringsConv(), spssCol);
						break;

					case SPSSColumn::cellDouble:
						 // convert to UTF-8 strings.
						spssCol.strings.clear();
						for (size_t i = 0; i < spssCol.numerics.size(); i++)
							spssCol.strings.push_back( spssCol.format(spssCol.numerics[i], _floatInfo) );
						column.setColumnAsNominalString(spssCol.strings);
						break;
					}
					break;
				}
			}
		}
		catch (boost::interprocess::bad_alloc &e)
		{
			try {

				packageData->dataSet = SharedMemory::enlargeDataSet(packageData->dataSet);
				success = false;
			}
			catch (boost::exception &e)
			{
				throw runtime_error("Out of memory: this data set is too large for your computer's available memory");
			}
		}
		catch (std::exception e)
		{
			cout << "n " << e.what();
			cout.flush();
		}
		catch (...)
		{
			cout << "something else\n ";
			cout.flush();
		}

	} while (success == false);

	if (stream.bad())
	{
		killFhr();
		SharedMemory::deleteDataSet(packageData->dataSet);
		throw runtime_error("Error reading .SAV file.");
	}
};