// // setString(): // void MadelineTable::setString(std::string inString){ _readString(inString); _getLineCount(); _allocateLineInformationVector(); _assignLinePointers(); _determineNumberOfBlocks(); _assignRecordTypes(); _checkDataBlockRectangularity(); _calculateColumnFillRates(); _determineNumberOfColumns(); _determineFieldBoundaries(); _parseHeader(); // // Now Handled downstream in DataTable instead: //_classifyColumns(); // This is basically obsoleted now because DataTable now handles classification: //_resolveDeclaredVersusDiscoveredColumnTypes(); // _fillVectors(); // DEBUG: //display(); }
/// /// readFile: Reads a local file on disk /// void Parser::readFile(const std::string &fileName){ _fileName = fileName; ////////////////////////////////////////////////////////////////////////////// // // First find out what kind of file it is using the fileTypeClassifier class: // ////////////////////////////////////////////////////////////////////////////// // // FileTypeClassifier is now a local instance only: // FileTypeClassifier fileTypeClassifier; if(!fileTypeClassifier.setFileToTest(_fileName.c_str())){ Warning("In Parser::readFile(), File %s cannot be located or read and will be ignored", _fileName.c_str() ); return; } _fileType = fileTypeClassifier.classify(); _fileTypeName = fileTypeClassifier.classifyByName(); std::string fileData; Compressor newCompressor; // // Message for user: // std::cout << "Parser::readFile(): Opening a(n) " << _fileTypeName << " file ...\n"; // // If the file is compressed, decompress it into a std::string. // Otherwise, read it into a std::string. // if( _fileType==FileTypeClassifier::PKZIP || _fileType==FileTypeClassifier::GZIP || _fileType==FileTypeClassifier::BZIP2 ) { // // Decompress into a std::string : // std::cout << "Parser::readFile(): Decompressing " << _fileTypeName << " file ...\n"; fileData = newCompressor.decompress(_fileName); } else { // // Read into a std::string : // fileData = _stringify(_fileName); } //////////////////////////////////////// // // Now process the fileData std::string: // //////////////////////////////////////// if( _xmlTagManager.discoverFormat(fileData) ) { // // fileData are in a recognized XML format: // std::cout << "Reading file data in " << _xmlTagManager.getFormatName() << " format ...\n"; _readXML(fileData); } else { // // If it is not an XML file, then it is a flat file. // Two flat file formats are recognized: // // (1) Original Madeline format (2) Delimited format // // If there is only 1 block, then it is a tab delimited file. // If there are 2 blocks, it is in Madeline format: // int blockCount = _determineNumberOfBlocks( fileData ); if( blockCount == 1) { std::cout << "Reading file data in delimited format ...\n"; _readDelimited(fileData); } else if( blockCount == 2) { std::cout << "Reading file data in Madeline flat file format ...\n"; _readMadeline(fileData); } else { Warning("Parser::readFile(): Sorry, the format of the file %s could not be determined and will be ignored.", _fileName.c_str() ); } } }