예제 #1
0
Node *Parser::parseDocument()
{
    readChar( false );
    Node *result = new Node;
    result->setName( "" );
    result->setNodeType( Document );

    if( !parseProlog( result ) )
        reportError( "Missing prolog" );

    if( !parseElement( result ) )
        reportError( "Missing content" );

    while( parseMisc( result ) ) {}

    return result;
}
예제 #2
0
void SGMLParser :: parse(const char * aSchemaFile, const char * aDocument) {

	printf("Loading schema\n");
	clock_t start = clock();
	loadSchema(aSchemaFile);
	clock_t end = clock();
	printf("Time taken for loading the schema: %f\n", (double)(end - start)/CLOCKS_PER_SEC);

	start = clock();
	printf("Starting to scan the HTML document\n");
	mScanner->setDocument(aDocument);
	printf("Loaded the document\n");
	// Assume the doctype is HTML.
	mDocTypeName = "HTML";
	ElementParser elementParser(mScanner, mSchema, mDocTypeName);
	// See if we can scan a whole HTML document.
	try {
		mToken = mScanner->nextToken();
		parseSStar();
		printf("Got first token: %s\n", mScanner->getTokenText().c_str());
		parseProlog();
		while (mToken != EOF_SYM) {
			switch (mToken) {
				case ELEMENT_OPEN_SYM: {
					// Kickstart the element parser.
					TElementPtr element = elementParser.parseStartTag();
					TDOMString name = element->getTagName();
					ElementToken elmToken = ElementToken(START_TAG, name, element);
					TElementDeclarationPtr declaration = mSchema->getDeclaration(mDocTypeName);
					mToken = elementParser.parse(elmToken, declaration);
					break;
				}
				case DECLARATION_SYM: {
					mToken = mScanner->nextToken();
					if (mToken == COMMENT_SYM) {
						if (mCommentDeclParser == NULL)
							mCommentDeclParser = new CommentDeclParser(mScanner, TSchemaPtr());
						mToken = mCommentDeclParser->parse(mToken, ELEMENT_OPEN_SYM);
					}
					else
						throw ReadException(mScanner->getLineNr(),
											mScanner->getCharNr(),
											"Expected comment sym",
											GENERIC,
											true);
					break;
				}
				case DECLARATION_END_SYM: {
					mToken = mScanner->nextToken(ELEMENT_OPEN_SYM);
					break;
				}
				case TEXT_SYM: {
					mToken = mScanner->nextToken();
					break;
				}
				case SPACE_SYM: {
					// Not doing anything with that right now.
					mToken = mScanner->nextToken();
					break;
				}
				default: {
					printf("Found token: %s\n", mScanner->getTokenText().c_str());
					mToken = mScanner->nextToken();
				}
			}
		}
	}
	catch(ReadException r) {
		printf(
			"Found error: line: %i char %i message: %s\n",
			r.getLineNr(),
			r.getCharNr(),
			r.getErrorMessage().c_str());
	}

	end = clock();
	printf("Time taken: %f\n", (double)(end - start)/CLOCKS_PER_SEC);

	TDocumentPtr document = elementParser.getDocument();
	showTree(document, 0);

}
예제 #3
0
static UXMLNODE parseElement(struct UXMLPARSER *parser, UXMLNODE parent, int *type)
{
  UXMLNODE node;
  UXMLCHAR *name = parser->offset;
  size_t nameLength;
  int isEmpty;

  if (!skipName(parser))
  {
    setError(parser, 0, "Unexpected end of input parsing stag name");
    return NULL;
  }

  nameLength = (parser->offset - name);

  switch (*name)
  {
  case '!':
    {
      if (stringCompare(name, "![CDATA[", 8))
      {
        *type = UXML_NT_CDATA;
        return parseCDATA(parser, parent);
      }
      else if (stringCompare(name, "!--", 3))
      {
        *type = UXML_NT_COMMENT;
        return parseComment(parser, parent);
      }
      else if (stringCompare(name, "!DOCTYPE", 8))
      {
        *type = UXML_NT_DOCTYPE;
        return parseDocType(parser, parent);
      }
      else
      {
        setError(parser, 0, "Unexpected ! tag");
        return NULL;
      }
      break;
    }

  case '?':
    {
      if (stringCompare(name, "?xml", 4))
      {
        *type = UXML_NT_PROLOG;
        return parseProlog(parser, parent);
      }
      else
      {
        setError(parser, 0, "Unexpected prolog tag");
        return NULL;
      }
      break;
    }

  default:
    {
      *type = UXML_NT_ELEMENT;
      node = parser->createElement(parser, parent, name, nameLength);

      if (!parseAttributes(parser, node, &isEmpty))
      {
        goto ERROR_DESTROY_NODE;
      }

      if (isEmpty)
      {
        parser->offset ++;
        if (*(parser->offset) != '>')
        {
          setError(parser, 0, "Unexpected end of input parsing stag name");
          goto ERROR_DESTROY_NODE;
        }

        parser->offset ++;
      }
      else
      {
        parser->offset ++;

        if (!parseContent(parser, node, name, nameLength))
        {
          goto ERROR_DESTROY_NODE;
        }
      }
      break;
    }
  }

  return node;

ERROR_DESTROY_NODE:
  parser->destroyNode(parser, node);
  return NULL;
}