void CSVLineParser :: Parse( const string & csv, vector <string> & data ) { mPos = 0; mCSV = & csv; data.clear(); mMore = true; while( mMore ) { mMore = false; char c = Peek(); if ( c == '"' ) { data.push_back( GetQuoted() ); } else { data.push_back( GetNonQuoted() ); } } }
/*---------------------------------------------------------------------------------------------------------------------- | Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number | of useful operations in the process of retrieving tokens: |~ | o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores | is set) | o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved | as the next token | o paired single quotes are automatically converted to single quotes before being stored | o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to | the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to | handle these in an appropriate fashion) | o leading whitespace (including comments) is automatically skipped | o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF | member function | o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper | for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags, | in which case the normal punctuation symbols are treated just like any other darkspace character. |~ | The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments | can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as | tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after | each application. */ void NxsToken::GetNextToken() { ResetToken(); char ch = ' '; if (saved == '\0' || IsWhitespace(saved)) { // Skip leading whitespace // while( IsWhitespace(ch) && !atEOF) ch = GetNextChar(); saved = ch; } for(;;) { // Break now if singleCharacterToken mode on and token length > 0. // if (labileFlags & singleCharacterToken && token.size() > 0) break; // Get next character either from saved or from input stream. // if (saved != '\0') { ch = saved; saved = '\0'; } else ch = GetNextChar(); // Break now if we've hit EOF. // if (atEOF) break; if (ch == '\n' && labileFlags & newlineIsToken) { if (token.size() > 0) { // Newline came after token, save newline until next time when it will be // reported as a separate token. // atEOL = 0; saved = ch; } else { atEOL = 1; AppendToToken(ch); } break; } else if (IsWhitespace(ch)) { // Break only if we've begun adding to token (remember, if we hit a comment before a token, // there might be further white space between the comment and the next token). // if (token.size() > 0) break; } else if (ch == '_') { // If underscores are discovered in unquoted tokens, they should be // automatically converted to spaces. // if (!(labileFlags & preserveUnderscores)) ch = ' '; AppendToToken(ch); } else if (ch == '[') { // Get rest of comment and deal with it, but notice that we only break if the comment ends a token, // not if it starts one (comment counts as whitespace). In the case of command comments // (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because // token.size() will be greater than 0. comment.clear(); GetComment(); if (token.size() > 0) break; } else if (ch == '(' && labileFlags & parentheticalToken) { AppendToToken(ch); // Get rest of parenthetical token. // GetParentheticalToken(); break; } else if (ch == '{' && labileFlags & curlyBracketedToken) { AppendToToken(ch); // Get rest of curly-bracketed token. // GetCurlyBracketedToken(); break; } else if (ch == '\"' && labileFlags & doubleQuotedToken) { // Get rest of double-quoted token. // GetDoubleQuotedToken(); break; } else if (ch == '\'') { if (token.size() > 0) { // We've encountered a single quote after a token has // already begun to be read; should be another tandem // single quote character immediately following. // ch = GetNextChar(); if (ch == '\'') AppendToToken(ch); else { errormsg = "Expecting second single quote character"; throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn()); } } else { // Get rest of quoted NEXUS word and break, since // we will have eaten one token after calling GetQuoted. // GetQuoted(); } break; } else if (IsPunctuation(ch)) { if (token.size() > 0) { // If we've already begun reading the token, encountering // a punctuation character means we should stop, saving // the punctuation character for the next token. // saved = ch; break; } else { // If we haven't already begun reading the token, encountering // a punctuation character means we should stop and return // the punctuation character as this token (i.e., the token // is just the single punctuation character. // AppendToToken(ch); break; } } else { AppendToToken(ch); } } labileFlags = 0; }