Example #1
0
   bool Parser::parseDocument(Document& doc, ContextPtr& context)
   {
      // set root nodename
      doc.mContext = context;
      std::string rootstr("root");
      doc.mNodeNameHandle = context->insertTagname(rootstr);
#ifdef CPPDOM_DEBUG
      doc.mNodeName_debug = rootstr;
#endif

      bool handle = context->hasEventHandler();

      // start parsing
      if (handle)
      {
         context->getEventHandler().startDocument();
      }

      parseHeader(doc, context);

      // parse the only one subnode
      NodePtr new_subnode(new Node(context));

      bool ret = parseNode(*new_subnode, context);

      // if successful, put node into nodelist
      if (ret)
      {
         doc.addChild(new_subnode);
      }

      if (handle)
      {
         context->getEventHandler().endDocument();
      }

      return ret;
   }
Example #2
0
   // parses the header, ie processing instructions and doctype tag
   /// \todo parse <!doctype> tag
   bool Parser::parseHeader(Document& doc, ContextPtr& context)
   {
      while(true)
      {
         ++mTokenizer;
         Token token1 = *mTokenizer;
         if (token1 != '<')
         {
            throw CPPDOM_ERROR(xml_opentag_expected, "");
         }

         // token after opening < is a literal?
         mTokenizer++;
         Token token2 = *mTokenizer;
         if (!token2.isLiteral())
         {
            // generic string encountered: assume no pi and doctype tags
            mTokenizer.putBack();
            mTokenizer.putBack(token1);
            return false;
         }

         // now check for the literal
         switch(token2.getLiteral())
         {
            // comment or doctype tag
         case '!':
            {
               ++mTokenizer;
               Token token3 = *mTokenizer;

               if (!token3.isLiteral())
               {
                  // now a doctype tag or a comment may follow
                  if (token3.getGeneric().at(0) == '-' &&
                      token3.getGeneric().at(1) == '-')
                  {
                      // needed to correctly handle <!---->
                      Token temp_token(token3.getGeneric().substr(2));
                      mTokenizer.putBack(temp_token);
                      parseComment(context);
                  }
                  else
                  {
                     std::string doctypestr(token3.getGeneric());

                     std::transform(doctypestr.begin(), doctypestr.end(), doctypestr.begin(), toupper);

                     if (doctypestr == "DOCTYPE")
                     {
                        // \todo parse doctype tag

                        // read the complete tag till the closing >
                        while (*(mTokenizer++) != '>');
                     }
                     else
                     {
                        throw CPPDOM_ERROR(xml_unknown, "");
                     }
                  }
               }
               else
               {
                  throw CPPDOM_ERROR(xml_pi_doctype_expected, "");
               }

               break;
            }
         case '?':
            {
               ++mTokenizer;
               Token token3 = *mTokenizer;

               if (token3.isLiteral())
               {
                  throw CPPDOM_ERROR(xml_pi_doctype_expected, "");
               }

               // parse processing instruction
               Node pinode(context);

               std::string tagname(token3.getGeneric());
               pinode.mNodeNameHandle = context->insertTagname(tagname);
#ifdef CPPDOM_DEBUG
               pinode.mNodeName_debug = tagname;
#endif


               parseAttributes(pinode.attrib());

               NodePtr nodeptr(new Node(pinode));
               doc.mProcInstructions.push_back(nodeptr);

               if (context->hasEventHandler())
               {
                  context->getEventHandler().processingInstruction(pinode);
               }

               ++mTokenizer;
               if (*mTokenizer != '?')
               {
                  throw CPPDOM_ERROR(xml_pi_doctype_expected, "");
               }

               ++mTokenizer;
               if (*mTokenizer != '>')
               {
                  throw CPPDOM_ERROR(xml_closetag_expected, "");
               }
               break;
            }
         default:
            // unknown literal encountered
            throw CPPDOM_ERROR(xml_pi_doctype_expected, "");

         } // end switch

      } // end while
   }
Example #3
0
   // parses the contents of the current node
   bool Parser::parseNode(Node& node, ContextPtr& context)
   {
      node.mContext = context;
      bool handle = context->hasEventHandler();

      ++mTokenizer;
      Token token1 = *mTokenizer;

      if (token1.isEndOfStream())
      {
         return false;
      }

      Token token2;

      // loop when we encounter a comment
      bool again;
      do
      {
         again = false;

         // check if we have cdata
         if (!token1.isLiteral())
         {
            std::string cdataname("cdata");
            node.mNodeNameHandle = context->insertTagname(cdataname);
#ifdef CPPDOM_DEBUG
            node.mNodeName_debug = cdataname;
#endif

            // parse cdata section(s) and return
            node.mNodeType = Node::xml_nt_cdata;
            node.mCdata.empty();

            while(!token1.isLiteral())
            {
               node.mCdata += token1.getGeneric();
               ++mTokenizer;
               token1 = *mTokenizer;
            }
            mTokenizer.putBack();

            // Clean up the cdata escaping
            if(textContainsXmlEscaping(node.mCdata))
            {  node.mCdata = removeXmlEscaping(node.mCdata, true); }

            if (handle)
            {
               context->getEventHandler().gotCdata( node.mCdata );
            }

            return true;
         }

         // no cdata, try to continue parsing node content
         // Must be a start of a node (ie. < literal)
         if (token1 != '<')
         {
            throw CPPDOM_ERROR(xml_opentag_cdata_expected, "");
         }

         // get node name
         ++mTokenizer;
         token2 = *mTokenizer;
         if (token2.isLiteral())
         {
            // check the following literal
            switch(token2.getLiteral())
            {
               // closing '</...>' follows
            case '/':
               // return, we have a closing node with no more content
               mTokenizer.putBack();
               mTokenizer.putBack(token1);
               return false;

               // comment follows
            case '!':
               {
                  // Consume the -- part of the comment opening string.
                  ++mTokenizer;

                  // needed to correctly handle <!---->
                  Token temp_token(mTokenizer->getGeneric().substr(2));
                  mTokenizer.putBack(temp_token);
                  this->parseComment(context);

                  // get next token
                  ++mTokenizer;
                  token1 = *mTokenizer;

                  // parse again, until we encounter some useful data
                  again = true;
               }
               break;

            default:
               throw CPPDOM_ERROR(xml_tagname_expected, "");
            }
         }
      } while (again);

      // insert tag name and set handle for it
      std::string tagname(token2.getGeneric());
      node.mNodeNameHandle = context->insertTagname(tagname);
#ifdef CPPDOM_DEBUG
      node.mNodeName_debug = tagname;
#endif

      // notify event handler
      if (handle)
      {
         context->getEventHandler().startNode(tagname);
      }

      // parse attributes
      this->parseAttributes(node.attrib());

      if (handle)
      {
         context->getEventHandler().parsedAttributes(node.attrib());
      }

      // check for leaf
      ++mTokenizer;
      Token token3 = *mTokenizer;
      if (token3 == '/' )
      {
         // node has finished
         ++mTokenizer;
         Token token4 = *mTokenizer;
         if (token4 != '>' )
         {
            throw CPPDOM_ERROR(xml_closetag_expected, "");
         }

         node.mNodeType = Node::xml_nt_leaf;

         // return, let the caller continue to parse
         return true;
      }

      // now a closing bracket must follow
      if (token3 != '>')
      {
         throw CPPDOM_ERROR(xml_closetag_expected, "");
      }

      // loop to parse all subnodes
      while (true)
      {
         // create subnode
         NodePtr new_subnode(new Node(context));

         // try to parse possible sub nodes
         if (this->parseNode(*new_subnode, context))
         {
            // if successful, put node into nodelist
   //         NodePtr nodeptr( new Node(subnode) );
            node.addChild(new_subnode);
         }
         else
         {
            break;
         }
      }

      // parse end tag
      Token token5 = *mTokenizer++;
      ++mTokenizer;
      if (token5 != '<' && *mTokenizer != '/')
      {
         throw CPPDOM_ERROR(xml_opentag_expected, "");
      }

      ++mTokenizer;
      token1 = *mTokenizer;
      if (token1.isLiteral())
      {
         throw CPPDOM_ERROR(xml_tagname_expected, "");
      }

      // check if open and close tag names are identical
      if (token1.getGeneric() != token2.getGeneric())
      {
         throw CPPDOM_ERROR(xml_tagname_close_mismatch, "");
      }

      ++mTokenizer;
      if (*mTokenizer != '>')
      {
         throw CPPDOM_ERROR(xml_opentag_expected, "");
      }

      if (handle)
      {
         context->getEventHandler().endNode(node);
      }

      return true;
   }