//------------------------------------------------------------------------------ char Tokeniser::GetNextChar () { int ch; #ifdef __MWERKS__ if (putBuffer != '\0') { ch = putBuffer; putBuffer = '\0'; } else ch = in.get(); #else if (putBackChar == '\0') { ch = in.get(); } else { ch = putBackChar; } putBackChar = '\0'; #endif int failed = in.bad(); if( failed ) throw XTokeniser ( "Unknown error reading data file (check to make sure file exists)" ); //std::cout << "[" << (char)ch << "]" << std::endl; if( ch == 13 || ch == 10 ) { fileline++; filecol = 1L; if( ch == 13 && (int)in.peek() == 10 ) ch = in.get(); atEOL = true; } else if( ch == EOF ) { atEOF = true; } else { filecol++; atEOL = false; } filepos = in.tellg(); if (atEOF ) return '\0'; else if (atEOL ) return '\n'; else return (char)ch; }
//------------------------------------------------------------------------------ // Uses a simple pushdown automaton to read Newick-style trees bool TreeReader::Read (TreePtr t) { // States of pushdown automaton that reads trees enum statetype { GETNAME, GETINTERNODE, NEXTMOVE, DOSIBLING, FINISHCHILDREN, ACCEPTED, CLEANUP, QUIT } state; std::stack< NodePtr, std::vector<NodePtr> > stk; Tokeniser::tokentype token; tree = t; tree->MakeRoot(); token = parser.GetNextToken (); if (token == Tokeniser::EMPTY) return false; // Parse the tree description state = GETNAME; while ((state != QUIT) && (state != ACCEPTED)) { switch (state) { case GETNAME: switch (token) { case Tokeniser::STRING: case Tokeniser::NUMBER: LabelLeaf (parser.GetToken()); token = parser.GetNextToken (); state = GETINTERNODE; break; case Tokeniser::LPAR: state = NEXTMOVE; break; default: errormsg = "Syntax error [GETNAME]: expecting a \"(\" or leaf name, got \""; errormsg += parser.GetToken(); errormsg += "\" instead"; state = QUIT; break; } break; case GETINTERNODE: switch (token) { case Tokeniser::COLON: case Tokeniser::COMMA: case Tokeniser::RPAR: state = NEXTMOVE; break; default: errormsg = "Syntax error [GETINTERNODE]: expecting one of \":,)\", got "; errormsg += parser.GetToken(); errormsg += " instead"; state = QUIT; break; } break; case NEXTMOVE: switch (token) { case Tokeniser::COLON: if (LabelEdge ()) token = parser.GetNextToken (); else state = QUIT; break; // The next node encountered will be a sibling // of Curnode and a descendant of the node on // the top of the node stack. case Tokeniser::COMMA: if (stk.empty()) { errormsg = "Tree description unbalanced, this \")\" has no matching \"(\""; state = QUIT; } else { tree->MakeSibling (); //token = parser.GetNextToken (); token = GetTaxonName(); state = GETNAME; } break; // The next node will be a child of CurNode, hence // we create the node and push CurNode onto the // node stack. case Tokeniser::LPAR: stk.push (tree->GetCurNode()); tree->MakeChild(); //token = parser.GetNextToken (); token = GetTaxonName(); state = GETNAME; break; // We've finished ready the descendants of the node // at the top of the node stack so pop it off. case Tokeniser::RPAR: if (stk.empty()) { errormsg = "Tree description unbalanced (an extra \")\")"; state = QUIT; } else { NodePtr q = stk.top(); q->AddWeight(tree->GetCurNode()->GetWeight()); tree->SetCurNode (q); stk.pop (); token = parser.GetNextToken (); state = FINISHCHILDREN; } break; // We should have finished the tree case Tokeniser::SEMICOLON: if (stk.empty()) { state = ACCEPTED; } else { errormsg = "Tree description ended prematurely (stack not empty)"; state = QUIT; } break; default: errormsg = "Syntax error [NEXTMOVE]: expecting one of \":,();\", got "; errormsg += parser.GetToken(); errormsg += " instead"; state = QUIT; break; } break; case FINISHCHILDREN: switch (token) { case Tokeniser::STRING: case Tokeniser::NUMBER: LabelInternalNode (parser.GetToken()); token = parser.GetNextToken (); break; case Tokeniser::COLON: if (LabelEdge ()) token = parser.GetNextToken (); else state = QUIT; break; // We've completed traversing the descendants of the // node at the top of the stack, so pop it off. case Tokeniser::RPAR: if (stk.empty()) { errormsg = "Tree description unbalanced, this \")\" has no matching \"(\""; state = QUIT; } else { NodePtr q = stk.top(); q->AddWeight(tree->GetCurNode()->GetWeight()); tree->SetCurNode (q); stk.pop (); token = parser.GetNextToken (); } break; // The node at the top of the stack still has some // descendants. case Tokeniser::COMMA: if (stk.empty()) { errormsg = "Tree description unbalanced, missing a \"(\""; state = QUIT; } else { tree->MakeSibling (); //token = parser.GetNextToken (); token = GetTaxonName(); state = GETNAME; } break; case Tokeniser::SEMICOLON: state = NEXTMOVE; break; default: if (stk.empty()) { errormsg = "Tree description unbalanced"; state = QUIT; } else { errormsg = "Syntax error [FINISHCHILDREN]: expecting one of \":,();\" or internal label, got "; errormsg += parser.GetToken(); errormsg += " instead"; } state = QUIT; break; } break; } } // Handle errors if (state == QUIT) { // Clean memory here... // ... then throw exception throw XTokeniser (errormsg, parser.GetFilePosition(), parser.GetFileLine (), parser.GetFileColumn()); } else { tree->GetRoot()->SetWeight(tree->GetNumLeaves()); doAdjust (); } return true; }