//
// setString():
//
void MadelineTable::setString(std::string inString){
	
	_readString(inString);
	_getLineCount();
	_allocateLineInformationVector();
	_assignLinePointers();
	_determineNumberOfBlocks();
	_assignRecordTypes();
	_checkDataBlockRectangularity();
	_calculateColumnFillRates();
	_determineNumberOfColumns();
	_determineFieldBoundaries();
	_parseHeader();
	//
	// Now Handled downstream in DataTable instead: 
	//_classifyColumns();
	// This is basically obsoleted now because DataTable now handles classification: 
	//_resolveDeclaredVersusDiscoveredColumnTypes();
	//
	_fillVectors();
	// DEBUG:
	//display();
}
Example #2
0
///
/// readFile: Reads a local file on disk
///
void Parser::readFile(const std::string &fileName){
	
	_fileName = fileName;
	
	//////////////////////////////////////////////////////////////////////////////
	//
	// First find out what kind of file it is using the fileTypeClassifier class:
	//
	//////////////////////////////////////////////////////////////////////////////
	
	//
	// FileTypeClassifier is now a local instance only:
	//
	FileTypeClassifier fileTypeClassifier;
	
	if(!fileTypeClassifier.setFileToTest(_fileName.c_str())){
		Warning("In Parser::readFile(), File %s cannot be located or read and will be ignored",
		        _fileName.c_str() );
		return;
	}
	
	_fileType     = fileTypeClassifier.classify();
	_fileTypeName = fileTypeClassifier.classifyByName();
	
	std::string fileData;
	Compressor newCompressor;
	
	//
	// Message for user:
	//
	std::cout << "Parser::readFile(): Opening a(n) " << _fileTypeName << " file ...\n";
	
	//
	// If the file is compressed, decompress it into a std::string. 
	// Otherwise, read it into a std::string.
	//
	if( _fileType==FileTypeClassifier::PKZIP || 
	    _fileType==FileTypeClassifier::GZIP  || 
	    _fileType==FileTypeClassifier::BZIP2
	)
	{
		//
		// Decompress into a std::string :
		//
		std::cout << "Parser::readFile(): Decompressing " << _fileTypeName << " file ...\n";
		fileData = newCompressor.decompress(_fileName);
	}
	else
	{
		//
		// Read into a std::string :
		//
		fileData = _stringify(_fileName);
	}
	
	////////////////////////////////////////
	//
	// Now process the fileData std::string:
	//
	////////////////////////////////////////
	
	if( _xmlTagManager.discoverFormat(fileData) )
	{
		//
		// fileData are in a recognized XML format:
		//
		std::cout << "Reading file data in " << _xmlTagManager.getFormatName() << " format ...\n";
		_readXML(fileData);
		
	}
	else
	{
		// 
		// If it is not an XML file, then it is a flat file.
		// Two flat file formats are recognized:
		//  
		//  (1) Original Madeline format    (2) Delimited format
		//
		// If there is only 1 block, then it is a tab delimited file. 
		// If there are 2 blocks, it is in Madeline format:
		//
		int blockCount = _determineNumberOfBlocks( fileData );
		if( blockCount == 1)
		{
			
			std::cout << "Reading file data in delimited format ...\n";
			_readDelimited(fileData);
		}
		else if( blockCount == 2)
		{
			
			std::cout << "Reading file data in Madeline flat file format ...\n";
			_readMadeline(fileData);
			
		}
		else
		{
			
			Warning("Parser::readFile(): Sorry, the format of the file %s could not be determined and will be ignored.",
			        _fileName.c_str()
			);
			
		}
	}
}