Esempio n. 1
0
void CSVLineParser :: Parse( const string & csv, vector <string> & data ) {
	mPos = 0;
	mCSV = & csv;
	data.clear();
	mMore = true;
	while( mMore ) {
		mMore = false;
		char c = Peek();
		if ( c == '"' ) {
			data.push_back( GetQuoted() );
		}
		else {
			data.push_back( GetNonQuoted() );
		}
	}
}
Esempio n. 2
0
/*----------------------------------------------------------------------------------------------------------------------
|	Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number 
|	of useful operations in the process of retrieving tokens:
|~
|	o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores
|	  is set)
|	o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved 
|	  as the next token
|	o paired single quotes are automatically converted to single quotes before being stored
|	o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to 
|	  the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to 
|	  handle these in an appropriate fashion)
|	o leading whitespace (including comments) is automatically skipped
|	o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF 
|	  member function
|	o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper 
|	  for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags,
|	  in which case the normal punctuation symbols are treated just like any other darkspace character.
|~
|	The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments 
|	can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as 
|	tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after 
|	each application.
*/
void NxsToken::GetNextToken()
	{
	ResetToken();

	char ch = ' ';
	if (saved == '\0' || IsWhitespace(saved))
		{
		// Skip leading whitespace
		//
		while( IsWhitespace(ch) && !atEOF)
			ch = GetNextChar();
		saved = ch;
		}

	for(;;)
		{
		// Break now if singleCharacterToken mode on and token length > 0.
		//
		if (labileFlags & singleCharacterToken && token.size() > 0)
			break;

		// Get next character either from saved or from input stream.
		//
		if (saved != '\0')
			{
			ch = saved;
			saved = '\0';
			}
		else
			ch = GetNextChar();

		// Break now if we've hit EOF.
		//
		if (atEOF)
			break;

		if (ch == '\n' && labileFlags & newlineIsToken)
			{
			if (token.size() > 0)
				{
				// Newline came after token, save newline until next time when it will be 
				// reported as a separate token.
				//
				atEOL = 0;
				saved = ch;
				}
			else
				{
				atEOL = 1;
				AppendToToken(ch);
				}
			break;
			}

		else if (IsWhitespace(ch))
			{
			// Break only if we've begun adding to token (remember, if we hit a comment before a token,
			// there might be further white space between the comment and the next token).
			//
			if (token.size() > 0)
				break;
			}

		else if (ch == '_')
			{
			// If underscores are discovered in unquoted tokens, they should be 
			// automatically converted to spaces.
			//
			if (!(labileFlags & preserveUnderscores))
				ch = ' ';
			AppendToToken(ch);
			}

		else if (ch == '[')
			{
			// Get rest of comment and deal with it, but notice that we only break if the comment ends a token,
			// not if it starts one (comment counts as whitespace). In the case of command comments 
			// (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because
			// token.size() will be greater than 0.
			comment.clear();
			GetComment();
			if (token.size() > 0)
			break;
			}

		else if (ch == '(' && labileFlags & parentheticalToken)
			{
			AppendToToken(ch);

			// Get rest of parenthetical token.
			//
			GetParentheticalToken();
			break;
			}

		else if (ch == '{' && labileFlags & curlyBracketedToken)
			{
			AppendToToken(ch);

			// Get rest of curly-bracketed token.
			//
			GetCurlyBracketedToken();
			break;
			}

		else if (ch == '\"' && labileFlags & doubleQuotedToken)
			{
			// Get rest of double-quoted token.
			//
			GetDoubleQuotedToken();
			break;
			}

		else if (ch == '\'')
			{
			if (token.size() > 0)
				{
				// We've encountered a single quote after a token has
				// already begun to be read; should be another tandem
				// single quote character immediately following.
				//
				ch = GetNextChar();
				if (ch == '\'')
					AppendToToken(ch);
				else
					{
					errormsg = "Expecting second single quote character";
					throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn());
					}
				}
			else
				{
				// Get rest of quoted NEXUS word and break, since
				// we will have eaten one token after calling GetQuoted.
				//
				GetQuoted();
				}
			break;
			}

		else if (IsPunctuation(ch))
			{
			if (token.size() > 0)
				{
				// If we've already begun reading the token, encountering
				// a punctuation character means we should stop, saving
				// the punctuation character for the next token.
				//
				saved = ch;
				break;
				}
			else
				{
				// If we haven't already begun reading the token, encountering
				// a punctuation character means we should stop and return
				// the punctuation character as this token (i.e., the token
				// is just the single punctuation character.
				//
				AppendToToken(ch);
				break;
				}
			}

		else
			{
			AppendToToken(ch);
			}

		}

	labileFlags = 0;
	}