/** * @brief Clears the columns. * */ void FileHeaderRecord::process(SPSSColumns &columns) { if (columns.size() != 0) { cout << "This file appears to have more than one file header record.\n" " Only the last one found will be used." << endl; cout.flush(); } columns.clear(); // Check compression type. switch(compressed()) { case compression_none: case compression_bytecode: case compression_zlib: break; default: throw runtime_error("Cannot find compression type for .SAV file."); } // Extract the number of cases. if (ncases() != -1) columns.numCases(ncases()); }
void ExtNumberCasesRecord::process(SPSSColumns &columns) { // Extract the number of cases. if (ncases64() != -1L) columns.numCases(ncases64()); }
/** * @brief createCol Appends a colum to the vector. * */ void VariableRecord::process(SPSSColumns &columns) { // check for string continuation. if (isStringContinuation()) { if ((columns.size() != 0) && (columns[columns.size()-1].isString())) columns[columns.size()-1].columnSpan++; DEBUG_COUT5("Existing column ", columns[columns.size()-1].spssName, " spans ", columns[columns.size()-1].columnSpan, " cols."); return; } int32_t strLen = 0; int32_t measure = spss::Measures::measure_undefined; if (type() == 0) measure = spss::Measures::measure_continuous; else if (type() == -1) measure = spss::Measures::string_type; else { measure = spss::Measures::string_type; strLen = type(); } { SPSSColumn col(name(), hasVarLabel()? label() : name(), MissingValueChecker(n_missing_values(), missing_values()), strLen, measure); columns.push_back(col); } DEBUG_COUT4("VariableRecord::process() - Added column ", columns.back().spssName, "/", columns.back().spssLabel); }
/** * @brief Does nothing * */ void DictionaryTermination::process(SPSSColumns &columns) { // inin the columns iterator. columns.resetCols(); }
void SPSSImporter::loadDataSet( DataSetPackage *packageData, const std::string &locator, boost::function<void (const std::string &, int)> progress) { (void)progress; packageData->isArchive = false; // SPSS/spss files are never archives. packageData->dataSet = SharedMemory::createDataSet(); // Do our space. killFhr(); // Open the file. SPSSStream stream(locator.c_str(), ios::in | ios::binary); // Get it's size stream.seekg(0, stream.end); _fileSize = static_cast<double>(stream.tellg()); stream.seekg(0, stream.beg); // Data we have scraped to date. SPSSColumns dictData; // Fetch the dictionary. bool processingDict = true; while(stream.good() && processingDict) { // Inform user of progress. reportFileProgress(stream.tellg(), progress); // Get the record type. union { int32_t u; RecordTypes t; Char_4 c; } rec_type; rec_type.u = rectype_unknown; stream.read((char *) &rec_type.u, sizeof(rec_type.u)); // Endiness for rec_type, if known. if (_pFileHeaderRecord != 0) dictData.numericsConv().fixup(&rec_type.u); // ... and the record type type is.... switch(rec_type.t) { case FileHeaderRecord::RECORD_TYPE: _pFileHeaderRecord = new FileHeaderRecord(dictData.numericsConv(), rec_type.t, stream); _pFileHeaderRecord->process(dictData); break; case VariableRecord::RECORD_TYPE: { VariableRecord record(dictData.numericsConv(), rec_type.t, _pFileHeaderRecord, stream); record.process(dictData); } break; case ValueLabelVarsRecord::RECORD_TYPE: { ValueLabelVarsRecord record(dictData.numericsConv(), rec_type.t, stream); record.process(dictData); } break; case rectype_meta_data: // Need to find the type of the data.. { union { int32_t i; RecordSubTypes s; } sub_type; sub_type.s = recsubtype_unknown; stream.read((char *) &sub_type.i, sizeof(sub_type.i)); dictData.numericsConv().fixup(&sub_type.i); switch (sub_type.s) { case IntegerInfoRecord::SUB_RECORD_TYPE: { _integerInfo = IntegerInfoRecord(dictData.numericsConv(), sub_type.s, rec_type.t, stream); _integerInfo.process(dictData); } break; case FloatInfoRecord::SUB_RECORD_TYPE: { _floatInfo = FloatInfoRecord(dictData.numericsConv(), sub_type.s, rec_type.t, stream); _floatInfo.process(dictData); } break; case VarDisplayParamRecord::SUB_RECORD_TYPE: { VarDisplayParamRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, dictData.size(), stream); record.process(dictData); } break; case LongVarNamesRecord::SUB_RECORD_TYPE: { LongVarNamesRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream); record.process(dictData); } break; case VeryLongStringRecord::SUB_RECORD_TYPE: { VeryLongStringRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream); record.process(dictData); } break; case ExtNumberCasesRecord::SUB_RECORD_TYPE: { ExtNumberCasesRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream); record.process(dictData); } break; case CharacterEncodingRecord::SUB_RECORD_TYPE: { CharacterEncodingRecord record(dictData.numericsConv(), sub_type.s, rec_type.t, stream); record.process(dictData); } break; default: { MiscInfoRecord record(dictData.numericsConv(), sub_type.i, rec_type.t, stream); record.process(dictData); } } } break; case DocumentRecord::RECORD_TYPE: { DocumentRecord dummy(dictData.numericsConv(), rec_type.t, stream); dummy.process(dictData); } break; case DictionaryTermination::RECORD_TYPE: { DictionaryTermination dummy(dictData.numericsConv(), rec_type.t, stream); dummy.process(dictData); } processingDict = false; // Got end of dictionary. break; case rectype_unknown: default: { string msg("Unknown record type '"); msg.append(rec_type.c, sizeof(rec_type.c)); msg.append("' found.\n" "The SAV importer cannot yet read this file.\n" "Please report this error at \n" "https://github.com/jasp-stats/jasp-desktop/issues\n" "including a small sample .SAV file that produces this message."); throw runtime_error(msg); break; } } } //If we got a file header then.. if (_pFileHeaderRecord == 0) throw runtime_error("No header found in .SAV file."); // Now convert the string in the header that we are interested in., ConvertedStringContainer::processAllStrings(dictData.stringsConv()); // read the data records from the file. DataRecords data(dictData.numericsConv(), *_pFileHeaderRecord, dictData, stream, progress); data.read(packageData); dictData.processStringsPostLoad(progress); DEBUG_COUT5("Read ", data.numDbls(), " doubles and ", data.numStrs(), " string cells."); // bail if unknown number of cases. if (dictData.hasNoCases()) throw runtime_error("Found no cases in .SAV file."); // Set the data size setDataSetSize(*packageData, dictData.numCases(), dictData.size()); bool success; do { success = true; try { // Now go fetch the data. int jaspColCount = 0; for (SPSSDictionary::iterator colI = dictData.begin(); (colI != dictData.end() ) && (jaspColCount < packageData->dataSet->columnCount()); ++colI, ++jaspColCount) { SPSSColumn &spssCol = colI->second; Column &column = packageData->dataSet->column(jaspColCount); DEBUG_COUT3("Getting col: ", spssCol.spssRawColName(), ".\n"); column.setName(spssCol.spssColumnLabel()); column.setColumnType( spssCol.getJaspColumnType() ); column.labels().clear(); switch(column.columnType()) { default: // Skip unknown columns break; case Column::ColumnTypeScale: setColumnScaleData(column, dictData.numCases(), spssCol); break; case Column::ColumnTypeNominal: case Column::ColumnTypeOrdinal: setColumnLabeledData(column, dictData.numCases(), spssCol); break; case Column::ColumnTypeNominalText: switch(spssCol.cellType()) { case SPSSColumn::cellString: setColumnConvrtStringData(column, dictData.stringsConv(), spssCol); break; case SPSSColumn::cellDouble: // convert to UTF-8 strings. spssCol.strings.clear(); for (size_t i = 0; i < spssCol.numerics.size(); i++) spssCol.strings.push_back( spssCol.format(spssCol.numerics[i], _floatInfo) ); column.setColumnAsNominalString(spssCol.strings); break; } break; } } } catch (boost::interprocess::bad_alloc &e) { try { packageData->dataSet = SharedMemory::enlargeDataSet(packageData->dataSet); success = false; } catch (boost::exception &e) { throw runtime_error("Out of memory: this data set is too large for your computer's available memory"); } } catch (std::exception e) { cout << "n " << e.what(); cout.flush(); } catch (...) { cout << "something else\n "; cout.flush(); } } while (success == false); if (stream.bad()) { killFhr(); SharedMemory::deleteDataSet(packageData->dataSet); throw runtime_error("Error reading .SAV file."); } };