Element Parser::Parse(std::istream& in) { std::vector<Element> lexed_elements; LexResult lex_result(Lex(in, lexed_elements)); strine::Element element; if (lex_result.Success()) { TokenStream token_stream(lexed_elements); strine::Imperative imperative; while(token_stream.HasTokens()) { bool success = ParseAny( token_stream, element ); if (false == success) { Element token = token_stream.NextToken(); SourceLocation const& location = token.GetSourceLocation(); std::stringstream ss; ss << "parser(" << location.GetRow() << "): Unrecognized start \"" << token.ToString() << "\""; Error error(ss.str()); error.SetRow(location.GetRow()); error.SetColumn(location.GetColumn()); // Okay, not sure if I want to mess with this yet. // This means that we received some un-intelligeable code. return error; } else if (token_stream.HasTokens()) { // If we have more tokens, then we start doing something // fun, like putting into an imperative. imperative.Add(element); } else { if (imperative.NumberOfElements() > 0) { imperative.Add(element); } } } if (imperative.NumberOfElements() > 0) { // Set the element to be returned to this imperative // element. element = imperative; } } return element; }
bool XSDParser::ParseContent(DTDElement& node, bool extended /*=false*/) { DTDElement::EType curr_type; int emb=0; bool eatEOT= false; bool hasContents= false; TToken tok; for ( tok=GetNextToken(); ; tok=GetNextToken()) { emb= node.GetContent().size(); if (tok != T_EOF && tok != K_ENDOFTAG && tok != K_ANNOTATION) { hasContents= true; } switch (tok) { case T_EOF: return hasContents; case K_ENDOFTAG: if (eatEOT) { eatEOT= false; break; } FixEmbeddedNames(node); return hasContents; case K_COMPLEXTYPE: ParseComplexType(node); break; case K_SIMPLECONTENT: ParseSimpleContent(node); break; case K_EXTENSION: ParseExtension(node); break; case K_RESTRICTION: ParseRestriction(node); break; case K_ATTRIBUTE: ParseAttribute(node); break; case K_ATTRIBUTEGROUP: ParseAttributeGroup(node); break; case K_ANY: node.SetTypeIfUnknown(DTDElement::eSequence); { string name = CreateTmpEmbeddedName(node.GetName(), emb); DTDElement& elem = m_MapElement[name]; elem.SetName(name); elem.SetSourceLine(Lexer().CurrentLine()); elem.SetEmbedded(); elem.SetType(DTDElement::eAny); elem.SetQualified(node.IsQualified()); ParseAny(elem); AddElementContent(node,name); } break; case K_SEQUENCE: emb= node.GetContent().size(); if (emb != 0 && extended) { node.SetTypeIfUnknown(DTDElement::eSequence); if (node.GetType() != DTDElement::eSequence) { ParseError("sequence"); } tok = GetRawAttributeSet(); eatEOT = true; break; } curr_type = node.GetType(); if (curr_type == DTDElement::eUnknown || curr_type == DTDElement::eUnknownGroup || (m_ResolveTypes && curr_type == DTDElement::eEmpty)) { node.SetType(DTDElement::eSequence); ParseContainer(node); if (node.GetContent().empty()) { node.ResetType(curr_type); } } else { string name = CreateTmpEmbeddedName(node.GetName(), emb); DTDElement& elem = m_MapElement[name]; elem.SetName(name); elem.SetSourceLine(Lexer().CurrentLine()); elem.SetEmbedded(); elem.SetType(DTDElement::eSequence); elem.SetQualified(node.IsQualified()); ParseContainer(elem); AddElementContent(node,name); } break; case K_CHOICE: curr_type = node.GetType(); if (curr_type == DTDElement::eUnknown || curr_type == DTDElement::eUnknownGroup || (m_ResolveTypes && curr_type == DTDElement::eEmpty)) { node.SetType(DTDElement::eChoice); ParseContainer(node); if (node.GetContent().empty()) { node.ResetType(curr_type); } } else { string name = CreateTmpEmbeddedName(node.GetName(), emb); DTDElement& elem = m_MapElement[name]; elem.SetName(name); elem.SetSourceLine(Lexer().CurrentLine()); elem.SetEmbedded(); elem.SetType(DTDElement::eChoice); elem.SetQualified(node.IsQualified()); ParseContainer(elem); AddElementContent(node,name); } break; case K_SET: curr_type = node.GetType(); if (curr_type == DTDElement::eUnknown || curr_type == DTDElement::eUnknownGroup || (m_ResolveTypes && curr_type == DTDElement::eEmpty)) { node.SetType(DTDElement::eSet); ParseContainer(node); if (node.GetContent().empty()) { node.ResetType(curr_type); } } else { string name = CreateTmpEmbeddedName(node.GetName(), emb); DTDElement& elem = m_MapElement[name]; elem.SetName(name); elem.SetSourceLine(Lexer().CurrentLine()); elem.SetEmbedded(); elem.SetType(DTDElement::eSet); elem.SetQualified(node.IsQualified()); ParseContainer(elem); AddElementContent(node,name); } break; case K_ELEMENT: { string name = ParseElementContent(&node,emb); AddElementContent(node,name); } break; case K_GROUP: { string name = ParseGroup(&node,emb); AddElementContent(node,name); } break; case K_ANNOTATION: SetCommentsIfEmpty(&(node.Comments())); ParseAnnotation(); break; case K_UNION: ParseUnion(node); break; case K_LIST: ParseList(node); break; default: for ( tok = GetNextToken(); tok == K_ATTPAIR || tok == K_XMLNS; tok = GetNextToken()) ; if (tok == K_CLOSING) { ParseContent(node); } break; } } FixEmbeddedNames(node); return hasContents; }
/** * One of the key methods inside of parser can take a general description * of what should be on the token stream and attempts to figure it out. * * @param in -- The input token stream. * @param rules -- The set of input rules we tokenize by. * @param elements -- The set of elements. * * @return true if the set of rules can be parsed, false otherwise. */ bool Parser::CanParse( TokenStream& in , std::vector<ParseRule> const& rules , std::vector<Element>& elements) const { bool success = true; elements.clear(); size_t rule_index = 0; while(in.HasTokens() && rule_index < rules.size()) { // Grab the current rule. ParseRule const& rule = rules[rule_index]; // Check to see if the current rule states that we expect a token, // if this is the case, then try to match the next element as a // token. if (rule.Type() == strine::Types::TOKEN) { strine::Element current_element = in.NextToken(); std::string const rule_token(rule.Token()); std::string const element_string(current_element.ToString()); success = (rule_token == element_string); } // If the rule states that the type should be ANY, try to parse // out any element by using ParseAny. else if (rule.Type() == ParseRule::ANY) { strine::Element current_element; // Push a marker onto the token stream. in.Push(); success = ParseAny( in, current_element ); if (success) { elements.push_back(current_element); } else { in.Rollback(); } } // If the rule.Type() hasn't been specified, then go ahead and try // to match up the types. else { strine::Element current_element = in.NextToken(); if (rule.Type() == current_element.Type()) { success = true; elements.push_back(current_element); } else { success = false; } } if (false == success) { break; } rule_index += 1; } if (rule_index < rules.size()) { success = false; } return success; }