static void XMLCALL framer_start(void *data, const char *el, const char **attr) { framer_t *framer = (framer_t*) data; char *sep; char *name; // printf("start: element %s level %d\n", el, framer->level); sep = strchr(el, '\xFF'); if (sep != NULL) { name = sep + 1; // printf(" bare element name: %s\n", name); if (strcmp(name, "stream") == 0) { // printf(" Opening stream element\n"); framer->level = 0; /* Reset level */ framer_add_frame( framer, XML_GetCurrentByteIndex(framer->parser), XML_GetCurrentByteCount(framer->parser) ); } else if (framer->level == 1) { // printf(" Stanza opening\n"); framer->index = XML_GetCurrentByteIndex(framer->parser); } } framer->level++; }
static void XMLCALL framer_end(void *data, const char *el) { framer_t *framer = (framer_t*) data; int end_index; char *sep; char *name = NULL; framer->level--; // printf("end: element %s level %d\n", el, framer->level); /* Get the bare element name (without namespace) */ sep = strchr(el, '\xFF'); if (sep != NULL) name = sep + 1; if (framer->level == 1) { // printf(" Stanza closing\n"); end_index = XML_GetCurrentByteIndex(framer->parser) + XML_GetCurrentByteCount(framer->parser); // printf(" Stanza from %d to %d\n", framer->index, end_index); /* Add new frame at the end of the list. */ framer_add_frame(framer, framer->index, end_index - framer->index); } else if ( (name != NULL) && (strcmp(name, "stream") == 0) ) { // printf(" Stream closing\n"); framer_add_frame( framer, XML_GetCurrentByteIndex(framer->parser), XML_GetCurrentByteCount(framer->parser)); } }
void nsExpatDriver::ParseBuffer(const PRUnichar *aBuffer, PRUint32 aLength, PRBool aIsFinal, PRUint32 *aConsumed) { NS_ASSERTION((aBuffer && aLength != 0) || (!aBuffer && aLength == 0), "?"); NS_ASSERTION(mInternalState != NS_OK || aIsFinal || aBuffer, "Useless call, we won't call Expat"); NS_PRECONDITION(!BlockedOrInterrupted() || !aBuffer, "Non-null buffer when resuming"); NS_PRECONDITION(XML_GetCurrentByteIndex(mExpatParser) % sizeof(PRUnichar) == 0, "Consumed part of a PRUnichar?"); if (mExpatParser && (mInternalState == NS_OK || BlockedOrInterrupted())) { PRInt32 parserBytesBefore = XML_GetCurrentByteIndex(mExpatParser); NS_ASSERTION(parserBytesBefore >= 0, "Unexpected value"); XML_Status status; if (BlockedOrInterrupted()) { mInternalState = NS_OK; // Resume in case we're blocked. status = XML_ResumeParser(mExpatParser); } else { status = XML_Parse(mExpatParser, reinterpret_cast<const char*>(aBuffer), aLength * sizeof(PRUnichar), aIsFinal); } PRInt32 parserBytesConsumed = XML_GetCurrentByteIndex(mExpatParser); NS_ASSERTION(parserBytesConsumed >= 0, "Unexpected value"); NS_ASSERTION(parserBytesConsumed >= parserBytesBefore, "How'd this happen?"); NS_ASSERTION(parserBytesConsumed % sizeof(PRUnichar) == 0, "Consumed part of a PRUnichar?"); // Consumed something. *aConsumed = (parserBytesConsumed - parserBytesBefore) / sizeof(PRUnichar); NS_ASSERTION(*aConsumed <= aLength + mExpatBuffered, "Too many bytes consumed?"); NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(), "Inconsistent expat suspension state."); if (status == XML_STATUS_ERROR) { mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING; } } else { *aConsumed = 0; } }
xode xode_from_strx(char *str, int len, int *err, int *pos) { XML_Parser p; xode *x, node; /* pointer to an xmlnode */ if(NULL == str) return NULL; if(len == -1) len = strlen(str); x = malloc(sizeof(void *)); *x = NULL; /* pointer to NULL */ p = XML_ParserCreate(NULL); XML_SetUserData(p, x); XML_SetElementHandler(p, _xode_expat_startElement, _xode_expat_endElement); XML_SetCharacterDataHandler(p, _xode_expat_charData); XML_Parse(p, str, len, 0); if(err != NULL) *err = XML_GetErrorCode(p); if(pos != NULL) *pos = XML_GetCurrentByteIndex(p); node = *x; free(x); XML_ParserFree(p); return node; /* return the xmlnode x points to */ }
static int lxp_pos (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); XML_Parser p = xpu->parser; lua_pushnumber(L, XML_GetCurrentLineNumber(p)); lua_pushnumber(L, XML_GetCurrentColumnNumber(p) + 1); lua_pushnumber(L, XML_GetCurrentByteIndex(p) + 1); return 3; }
/* params : desc : the document descriptor * return : an indicator of the progression in the processing */ int p_getProgression(struct doc_descriptor *desc) { if(desc->size > 0) { return (100 * (XML_GetCurrentByteIndex(desc->parser) - ((struct ParserState *)(desc->myState))->begin_byte)) / (desc->size - ((struct ParserState *)(desc->myState))->begin_byte); } else { return 0; } }
static int reporterror (lxp_userdata *xpu) { lua_State *L = xpu->L; XML_Parser p = xpu->parser; lua_pushnil(L); lua_pushstring(L, XML_ErrorString(XML_GetErrorCode(p))); lua_pushnumber(L, XML_GetCurrentLineNumber(p)); lua_pushnumber(L, XML_GetCurrentColumnNumber(p) + 1); lua_pushnumber(L, XML_GetCurrentByteIndex(p) + 1); return 5; }
void xml_map_application::do_element_start(char const * const element, char const * const * const attributes) { if (element_depth > options_depth_max) return; ++element_found_count; comma::xpath const & element_path = current_xpath(); // get the start location long long const at = XML_GetCurrentByteIndex(parser); element_location_t loc(at, 0); // push the start location into the map element_location_map[element_path].push_back(loc); }
bool_t audit_parser(parser_t *parser) { /* since a parser is used by nearly all xml-coreutils, and the do_parser() functions call audit_parser(), this is a great place to process signals */ process_pending_signal(); if( parser ) { parser->cur.pstatus = get_pstatus(parser); parser->cur.lineno = XML_GetCurrentLineNumber(parser->p); parser->cur.colno = XML_GetCurrentColumnNumber(parser->p); parser->cur.length = XML_GetCurrentByteCount(parser->p); parser->cur.byteno = XML_GetCurrentByteIndex(parser->p); return (bool_t)(parser->cur.rstatus == XML_STATUS_OK); } return FALSE; }
void xml_map_application::do_element_end(char const * const element) { if (element_depth > options_depth_max) return; comma::xpath const & element_path = current_xpath(); element_location_t & entry = element_location_map[element_path].back(); { // force the use of the entry to prevent errors long long txtlen = 3 + std::strlen(element); long long const at = XML_GetCurrentByteIndex(parser) + txtlen; entry.second = at; } if (! options_compact) std::cout << element_path << ',' << entry.first << '-' << entry.second << std::endl; }
/** * Parse an XML string into a nested list. * The second parameter indicates if body text (text within XML tags) * should show up among the children of the tag or in its own * section. * * See documentation (ext-xml.README) for examples. */ static package parse_xml(const char *data, int bool_stream) { /* * FIXME: Feed expat smaller chunks of the string and * check for task timeout between chunks * */ int decoded_length; const char *decoded; package result; XML_Parser parser = XML_ParserCreate(NULL); XMLdata *root = new_node(NULL, ""); XMLdata *child = root; decoded_length = strlen(data); decoded = data; XML_SetUserData(parser, &child); XML_SetElementHandler(parser, xml_startElement, xml_endElement); if(bool_stream) { XML_SetCharacterDataHandler(parser, xml_streamCharacterDataHandler); } else { XML_SetCharacterDataHandler(parser, xml_characterDataHandler); } if (!XML_Parse(parser, decoded, decoded_length, 1)) { Var r; r.type = TYPE_INT; r.v.num = XML_GetCurrentByteIndex(parser); flush_nodes(child); result = make_raise_pack(E_INVARG, XML_ErrorString(XML_GetErrorCode(parser)), r); } else { finish_node(root); result = make_var_pack(var_ref(root->element.v.list[4].v.list[1])); free_node(root); } XML_ParserFree(parser); return result; }
int main(int argc, const char *argv[]) { int retval = 0; int res, errcode, errpos; char * errstring; XML_Parser parser; parser = XML_ParserCreate("UTF-8"); res = XML_Parse(parser, EXAMPLE_XML, strlen(EXAMPLE_XML), true); if (!res) { errcode = XML_GetErrorCode(parser); errstring = (char *)XML_ErrorString(errcode); errpos = XML_GetCurrentByteIndex(parser); printf("errcode: %d\n" "errstring: %s\n" "errpos: %d\n" "rest: %s\n", errcode, errstring, errpos, EXAMPLE_XML+errpos); retval = 1; goto exit; } exit: XML_ParserFree(parser); return retval; }
int64 XmlParser::GetCurrentByteIndex() { ex_assert( m_pParser, "m_pParser is not create" ); return XML_GetCurrentByteIndex(m_pParser); }
size_t ZLXMLReaderInternal::getCurrentPosition() const { return XML_GetCurrentByteIndex(myParser); }
void XMLCALL CDOMDocument::charHandler(void *userData, const XML_Char *xmls, int len) { CDOMDocument *_this = (CDOMDocument *)(userData); if(!_this->parseText) return; // printf("charHandler @0x%04X [len=%d] :\n", 888, len); XML_Index index = XML_GetCurrentByteIndex(_this->parser); if(_this->State != CDOMDocument::INTO_CDATA) { int i; unsigned char c0, c; unsigned int u, msk; unsigned char nBytes, nLCBytes, nLCNDBytes; unsigned char *s = (unsigned char *)xmls; char cbreak; register char *p; i = 0; if(_this->currentNode->index_start == 0) _this->currentNode->index_start = index+i; while(len >= 0) { if(len == 0) { // at the end of data nBytes = nLCBytes = nLCNDBytes = 0; cbreak = true; len--; } else { unsigned char flags = CFLAG_NORMALCHAR; _this->currentNode->index_end = index+i; if(_this->indexStart == 0) _this->indexStart = index+i; cbreak = 0; u = 0xFFFFFFFF; c0 = *s++; len--; i++; // calculate the 'lowed' value of the char nLCBytes = 0; if(c0 & 0x80) { if(c0 & 0x40) { // 11xxxxxx : multi bytes character _this->token[_this->tokenLen++] = c0; u = ((unsigned int) c0) & 0x0000001F; msk = 0xFFFFFF7F; nBytes = 1; // read max 6 bytes while(len && ((c0 <<= 1) & 0x80) && (((c = *s) & 0xC0) == 0x80) && ++nBytes <= 6) { _this->token[_this->tokenLen++] = c; u = (u<<6 & (msk = (msk<<5) | 0x1F)) | (unsigned int)(c & 0x3F); len--; i++; s++; _this->currentNode->index_end++; } if(nBytes <= 4) { // char in 2. 3 or 4 bytes if(u >= 0x0080 && u <= 0x07FF) { // char on 2 bytes : transcode via look-up table cmap_2 flags = cmap_2[u - 0x0080].flags; cbreak = (flags & CFLAG_ENDCHAR) ? 1 : 0; for(p = (char *)(cmap_2[u - 0x0080].s[UNICODE_LC]); *p; p++) { _this->tokenLC[_this->tokenLCLen++] = *p; nLCBytes++; if(_this->getContent) { _this->currentNode->addValueLC(*p, _this->currentNode->lastFlags, flags); } } for(p = (char *)(cmap_2[u - 0x0080].s[UNICODE_LCND]); *p; p++) { _this->tokenLCND[_this->tokenLCNDLen++] = *p; nLCNDBytes++; if(_this->getContent) { _this->currentNode->addValueLCND(*p, _this->currentNode->lastFlags, flags); } } } else { // char on 3 or 4 bytes : don't transcode register int j; for(j=0, s-=nBytes; j<nBytes; j++, s++) { _this->tokenLC[_this->tokenLCLen++] = *s; nLCBytes++; _this->tokenLCND[_this->tokenLCNDLen++] = *s; nLCNDBytes++; if(_this->getContent) { _this->currentNode->addValueLC(*s, _this->currentNode->lastFlags, 0); _this->currentNode->addValueLCND(*s, _this->currentNode->lastFlags, 0); } } } } else { // char on 5 or 6 bytes : skip u = 0xFFFFFFFF; } if(_this->getContent) { register int j; for(j=0, s-=nBytes; j<nBytes; j++) { // add the byte to the 'value' of the curent node _this->currentNode->addValueC(*s++, _this->currentNode->lastFlags, flags); } } } else { // 10xxxxxx : inproper byte as c0 } } else { // 0xxxxxxx : 1 byte char, transcode via look-up table cmap_1 flags = cmap_1[(int)c0].flags; cbreak = (flags & CFLAG_ENDCHAR) ? 1 : 0; _this->token[_this->tokenLen++] = c0; _this->tokenLC[_this->tokenLCLen++] = cmap_1[(int)c0].c[UNICODE_LC]; _this->tokenLCND[_this->tokenLCNDLen++] = cmap_1[(int)c0].c[UNICODE_LCND]; if(_this->getContent) { // add the byte to the 'value' of the curent node _this->currentNode->addValueC(c0, _this->currentNode->lastFlags, flags); // add the transcoded byte to the 'lowercase value' of the curent node _this->currentNode->addValueLC(cmap_1[(int)c0].c[UNICODE_LC], _this->currentNode->lastFlags, flags); // add the transcoded byte to the 'lowercase-nodiacritics value' of the curent node _this->currentNode->addValueLCND(cmap_1[(int)c0].c[UNICODE_LCND], _this->currentNode->lastFlags, flags); } u = (unsigned int) c0; nLCNDBytes = nLCBytes = nBytes = 1; } _this->indexEnd = index+i; _this->currentNode->lastFlags = flags; } if(cbreak || _this->tokenLen>=400) // cbreak or buffer full { if(cbreak) { _this->tokenLen -= nBytes; // remove the cbreak _this->tokenLCLen -= nLCBytes; // remove the cbreak _this->tokenLCNDLen -= nLCNDBytes; // remove the cbreak } if(_this->tokenLen > 0) { if(cbreak) { _this->indexEnd -= nBytes; // remove the cbreak } // _this->currentNode->addLowValueC('\0', CFLAG_NORMALCHAR); _this->flushToken(); } _this->tokenLCNDLen = _this->tokenLCLen = _this->tokenLen = 0; _this->indexStart = 0; } else // normal char { } } } }
void XMLCALL CDOMDocument::charHandler(void *userData, const XML_Char *xmls, int len) { CDOMDocument *_this = (CDOMDocument *)(userData); if(!_this->parseText) return; // printf("charHandler @0x%04X [len=%d] :\n", 888, len); XML_Index index = XML_GetCurrentByteIndex(_this->parser); if(_this->State != CDOMDocument::INTO_CDATA) { int i; unsigned char c0, c; UINT32 u, msk; unsigned char nBytes; unsigned char nLowBytes; unsigned char *s = (unsigned char *)xmls; char cbreak; register char *p; /* unsigned char outc; printf("charHandler @0x%04X [len=%d] :\n", (int)index, len); for(i=0; i<len; i++) { outc = (c=s[i]) < 32 ? '.' : s[i]; printf(" %c ", (outc)); } putchar('\n'); for(i=0; i<len; i++) { outc = s[i]; printf("0x%02X ", (outc)); } putchar('\n'); */ i = 0; if(_this->currentNode->index_start == 0) _this->currentNode->index_start = index+i; while(len >= 0) { if(len == 0) { // at the end of data nBytes = nLowBytes = 0; cbreak = true; len--; } else { _this->currentNode->index_end = index+i; if(_this->indexStart == 0) _this->indexStart = index+i; cbreak = 0; u = 0xFFFFFFFF; c0 = *s++; len--; i++; // calculate the 'lowed' value of the char if(c0 & 0x80) { if(c0 & 0x40) { // 11xxxxxx : multi bytes character unsigned char flags = CFLAG_NORMALCHAR; _this->tokBin[_this->tokBinLen++] = c0; u = ((UINT32) c0) & 0x0000001F; msk = 0xFFFFFF7F; nBytes = 1; // read max 6 bytes while(len && ((c0 <<= 1) & 0x80) && (((c = *s) & 0xC0) == 0x80) && ++nBytes <= 6) { _this->tokBin[_this->tokBinLen++] = c; u = (u<<6 & (msk = (msk<<5) | 0x1F)) | (UINT32)(c & 0x3F); len--; i++; s++; _this->currentNode->index_end++; } // printf("%i\n", nBytes); if(nBytes <= 4) { // char in 2. 3 or 4 bytes if(u >= 0x0080 && u <= 0x07FF) { // char on 2 bytes : transcode via look-up table cmap_2 flags = cmap_2[u - 0x0080].flags; cbreak = (flags & CFLAG_ENDCHAR) ? 1 : 0; nLowBytes = 0; for(p = (char *)(cmap_2[u - 0x0080].s); *p; p++) { _this->lowtokBin[_this->lowtokBinLen++] = *p; nLowBytes++; if(_this->getContent) { _this->currentNode->addLowValueC(*p, cmap_2[u - 0x0080].flags); // if(_this->currentNode->index_start == 0) // _this->currentNode->index_start = index+i; // _this->currentNode->index_end = index+i; } } } else { // printf("!!! Caractere non transcodable (nBytes=%d ; u=0x%04X) !!!\n", nBytes, u); // char on 3 or 4 bytes : don't transcode register int j; for(j=0, s-=nBytes; j<nBytes; j++, s++) { _this->lowtokBin[_this->lowtokBinLen++] = *s; nLowBytes++; if(_this->getContent) { _this->currentNode->addLowValueC(*s, 0); } } /* _this->lowtokBin[_this->lowtokBinLen++] = '?'; nLowBytes = 1; if(_this->getContent) { _this->currentNode->addLowValueC('?', 0); // if(_this->currentNode->index_start == 0) // _this->currentNode->index_start = index+i; // _this->currentNode->index_end = index+i; } */ } } else { // char on 5 or 6 bytes : skip u = 0xFFFFFFFF; } if(_this->getContent) { register int j; // printf("!!! addValueC :"); for(j=0, s-=nBytes; j<nBytes; j++) { // printf(" %s 0x%02X", (j>0?",":""), *s); // add the byte to the 'value' of the curent node _this->currentNode->addValueC(*s++, flags); } // putchar('\n'); } } else { // 10xxxxxx : inproper byte as c0 } } else { // 0xxxxxxx : 1 byte char, transcode via look-up table cmap_1 unsigned char flags = cmap_1[(int)c0].flags; cbreak = (flags & CFLAG_ENDCHAR) ? 1 : 0; _this->tokBin[_this->tokBinLen++] = c0; _this->lowtokBin[_this->lowtokBinLen++] = cmap_1[(int)c0].c; if(_this->getContent) { // add the byte to the 'value' of the curent node _this->currentNode->addValueC(c0, flags); // add the transcoded byte to the 'lowed value' of the curent node _this->currentNode->addLowValueC(cmap_1[(int)c0].c, flags); // if(_this->currentNode->index_start == 0) // _this->currentNode->index_start = index+i; // _this->currentNode->index_end = index+i; } u = (UINT32) c0; nLowBytes = nBytes = 1; } // printf("got U+%06X ; i=%i\n", (int)u, i); _this->indexEnd = index+i; } if(cbreak || _this->tokBinLen>=400) // cbreak or buffer full { if(cbreak) { // printf("---- break ----\n"); _this->tokBinLen -= nBytes; // remove the cbreak _this->lowtokBinLen -= nLowBytes; // remove the cbreak } if(_this->tokBinLen > 0) { if(cbreak) { _this->indexEnd -= nBytes; // remove the cbreak // printf("---- break : indexEnd -= %i ----\n", nBytes); } _this->flushToken(); } _this->lowtokBinLen = _this->tokBinLen = 0; _this->indexStart = 0; } else // normal char { } } // _this->currentNode->dump(); } // printf("CHAREND start=%i, end=%i (len=%i) \n", _this->currentNode->index_start, _this->currentNode->index_end, _this->currentNode->index_end - _this->currentNode->index_start + 1); }
int64_t f_xml_get_current_byte_index(CObjRef parser) { XmlParser * p = parser.getTyped<XmlParser>(); return XML_GetCurrentByteIndex(p->parser); }
WBXML_DECLARE(WBXMLError) wbxml_tree_from_xml(WB_UTINY *xml, WB_ULONG xml_len, WBXMLTree **tree) { #if defined( HAVE_EXPAT ) const XML_Feature *feature_list = NULL; XML_Parser xml_parser = NULL; WBXMLError ret = WBXML_OK; WB_BOOL expat_utf16 = FALSE; WBXMLTreeClbCtx wbxml_tree_clb_ctx; /* First Check if Expat is outputing UTF-16 strings */ feature_list = (const XML_Feature *)XML_GetFeatureList(); if ((feature_list != NULL) && (feature_list[0].value != sizeof(WB_TINY))) { #if !defined( HAVE_ICONV ) /* Ouch, can't convert from UTF-16 to UTF-8 */ return WBXML_ERROR_XMLPARSER_OUTPUT_UTF16; #else /* Expat returns UTF-16 encoded strings in its callbacks */ expat_utf16 = TRUE; #endif /* !HAVE_ICONV */ } if (tree != NULL) *tree = NULL; /* Create Expat XML Parser */ if ((xml_parser = XML_ParserCreateNS(NULL, WBXML_NAMESPACE_SEPARATOR)) == NULL) return WBXML_ERROR_NOT_ENOUGH_MEMORY; /* Init context */ wbxml_tree_clb_ctx.current = NULL; wbxml_tree_clb_ctx.error = WBXML_OK; wbxml_tree_clb_ctx.skip_lvl = 0; wbxml_tree_clb_ctx.skip_start = 0; wbxml_tree_clb_ctx.xml_parser = xml_parser; wbxml_tree_clb_ctx.input_buff = xml; wbxml_tree_clb_ctx.expat_utf16 = expat_utf16; /* Create WBXML Tree */ if ((wbxml_tree_clb_ctx.tree = wbxml_tree_create(WBXML_LANG_UNKNOWN, WBXML_CHARSET_UNKNOWN)) == NULL) { XML_ParserFree(xml_parser); WBXML_ERROR((WBXML_PARSER, "Can't create WBXML Tree")); return WBXML_ERROR_NOT_ENOUGH_MEMORY; } /* Set Handlers Callbacks */ XML_SetXmlDeclHandler(xml_parser, wbxml_tree_clb_xml_decl); XML_SetStartDoctypeDeclHandler(xml_parser, wbxml_tree_clb_xml_doctype_decl); XML_SetElementHandler(xml_parser, wbxml_tree_clb_xml_start_element, wbxml_tree_clb_xml_end_element); XML_SetCdataSectionHandler(xml_parser, wbxml_tree_clb_xml_start_cdata, wbxml_tree_clb_xml_end_cdata); XML_SetProcessingInstructionHandler(xml_parser , wbxml_tree_clb_xml_pi); XML_SetCharacterDataHandler(xml_parser, wbxml_tree_clb_xml_characters); XML_SetUserData(xml_parser, (void*)&wbxml_tree_clb_ctx); /* Parse the XML Document to WBXML Tree */ if (XML_Parse(xml_parser, (WB_TINY*) xml, xml_len, TRUE) == 0) { WBXML_ERROR((WBXML_CONV, "xml2wbxml conversion failed - expat error %i\n" "\tdescription: %s\n" "\tline: %i\n" "\tcolumn: %i\n" "\tbyte index: %i\n" "\ttotal bytes: %i\n%s", XML_GetErrorCode(xml_parser), XML_ErrorString(XML_GetErrorCode(xml_parser)), XML_GetCurrentLineNumber(xml_parser), XML_GetCurrentColumnNumber(xml_parser), XML_GetCurrentByteIndex(xml_parser), XML_GetCurrentByteCount(xml_parser), xml)); wbxml_tree_destroy(wbxml_tree_clb_ctx.tree); ret = WBXML_ERROR_XML_PARSING_FAILED; } else { if ((ret = wbxml_tree_clb_ctx.error) != WBXML_OK) { WBXML_ERROR((WBXML_CONV, "xml2wbxml conversion failed - context error %i", ret)); wbxml_tree_destroy(wbxml_tree_clb_ctx.tree); } else *tree = wbxml_tree_clb_ctx.tree; } /* Clean-up */ XML_ParserFree(xml_parser); return ret; #else /* HAVE_EXPAT */ #if defined( HAVE_LIBXML ) /** @todo Use LibXML2 SAX interface ! */ return WBXML_ERROR_NO_XMLPARSER; #else /* HAVE_LIBXML */ /** @note You can add here another XML Parser support */ return WBXML_ERROR_NO_XMLPARSER; #endif /* HAVE_LIBXML */ #endif /* HAVE_EXPAT */ }
void wbxml_tree_clb_xml_end_element(void *ctx, const XML_Char *localName) { WBXMLTreeClbCtx *tree_ctx = (WBXMLTreeClbCtx *) ctx; WBXMLBuffer *content = NULL; WBXMLTreeNode *node = NULL; WBXMLError ret = WBXML_OK; WBXML_DEBUG((WBXML_PARSER, "Expat element end callback ('%s')", localName)); /* If the node is flagged as binary node * then the data is base64 encoded in the XML document * and the data must be decoded in one step. * Examples: Microsoft ActiveSync tags ConversationId or MIME */ node = tree_ctx->current; if (node && node->type == WBXML_TREE_ELEMENT_NODE && node->name->type == WBXML_VALUE_TOKEN && node->name->u.token->options & WBXML_TAG_OPTION_BINARY) { if (node->content == NULL) { WBXML_DEBUG((WBXML_PARSER, " Binary tag: No content => no conversion!")); } else { WBXML_DEBUG((WBXML_PARSER, " Binary tag: Convert base64 data")); ret = wbxml_buffer_decode_base64(node->content); if (ret != WBXML_OK) { WBXML_DEBUG((WBXML_PARSER, " Binary tag: Base64 decoder failed!")); tree_ctx->error = ret; } else { /* Add the buffer as a regular string node (since libwbxml doesn't * offer a way to specify an opaque data node). The WBXML * encoder is responsible for generating correct opaque data for * nodes like this. */ if (wbxml_tree_add_text(tree_ctx->tree, tree_ctx->current, (const WB_UTINY*)wbxml_buffer_get_cstr(node->content), wbxml_buffer_len(node->content)) == NULL) { WBXML_DEBUG((WBXML_PARSER, " Binary tag: Cannot add base64 decoded node!")); tree_ctx->error = WBXML_ERROR_INTERNAL; } } /* safe cleanup */ content = node->content; node->content = NULL; wbxml_buffer_destroy(content); } } if (tree_ctx->expat_utf16) { /** @todo Convert from UTF-16 to UTF-8 */ } /* Check for Error */ if (tree_ctx->error != WBXML_OK) return; /* Are we skipping a whole node ? */ if (tree_ctx->skip_lvl > 0) { if (tree_ctx->skip_lvl == 1) { /* End of skipped node */ #if defined( WBXML_SUPPORT_SYNCML ) if (WBXML_STRCMP(localName, "syncml:devinf:DevInf") == 0 || WBXML_STRCMP(localName, "syncml:dmddf1.2:MgmtTree") == 0) { /* definitions first ... or some compilers don't like it */ WBXMLBuffer *embed_doc = NULL; WBXMLTree *tree = NULL; const WBXMLLangEntry *lang; /* Get embedded DevInf or DM DDF Document */ embed_doc = wbxml_buffer_create(tree_ctx->input_buff + tree_ctx->skip_start, XML_GetCurrentByteIndex(tree_ctx->xml_parser) - tree_ctx->skip_start, XML_GetCurrentByteIndex(tree_ctx->xml_parser) - tree_ctx->skip_start + 10); if (embed_doc == NULL) { tree_ctx->error = WBXML_ERROR_NOT_ENOUGH_MEMORY; wbxml_buffer_destroy(embed_doc); return; } if (tree_ctx->expat_utf16) { /** @todo Convert from UTF-16 to UTF-8 */ } /* Check Buffer Creation and add the closing tag */ if ((WBXML_STRCMP(localName, "syncml:devinf:DevInf") == 0 && (!wbxml_buffer_append_cstr(embed_doc, "</DevInf>"))) || (WBXML_STRCMP(localName, "syncml:dmddf1.2:MgmtTree") == 0 && (!wbxml_buffer_append_cstr(embed_doc, "</MgmtTree>")))) { tree_ctx->error = WBXML_ERROR_NOT_ENOUGH_MEMORY; wbxml_buffer_destroy(embed_doc); return; } /* Add doctype to give the XML parser a chance */ if (WBXML_STRCMP(localName, "syncml:dmddf1.2:MgmtTree") == 0 && tree_ctx->tree->lang->langID != WBXML_LANG_SYNCML_SYNCML12) { tree_ctx->error = WBXML_ERROR_UNKNOWN_XML_LANGUAGE; wbxml_buffer_destroy(embed_doc); return; } switch(tree_ctx->tree->lang->langID) { case WBXML_LANG_SYNCML_SYNCML10: lang = wbxml_tables_get_table(WBXML_LANG_SYNCML_DEVINF10); break; case WBXML_LANG_SYNCML_SYNCML11: lang = wbxml_tables_get_table(WBXML_LANG_SYNCML_DEVINF11); break; case WBXML_LANG_SYNCML_SYNCML12: if (WBXML_STRCMP(localName, "syncml:dmddf1.2:MgmtTree") == 0) { lang = wbxml_tables_get_table(WBXML_LANG_SYNCML_DMDDF12); } else { lang = wbxml_tables_get_table(WBXML_LANG_SYNCML_DEVINF12); } break; default: tree_ctx->error = WBXML_ERROR_UNKNOWN_XML_LANGUAGE; wbxml_buffer_destroy(embed_doc); return; } assert (lang!= NULL); if (lang == NULL) { tree_ctx->error = WBXML_ERROR_UNKNOWN_XML_LANGUAGE; wbxml_buffer_destroy(embed_doc); return; } /* DOCTYPE in reverse order */ if (!wbxml_buffer_insert_cstr(embed_doc,(WB_UTINY *) "\">\n", 0) || /* > */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) lang->publicID->xmlDTD, 0) || /* DTD */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) "\" \"", 0) || /* DTD */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) lang->publicID->xmlPublicID, 0) || /* Public ID */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) " PUBLIC \"", 0) || /* PUBLIC " */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) lang->publicID->xmlRootElt, 0) || /* Root Element */ !wbxml_buffer_insert_cstr(embed_doc, (WB_UTINY *) "<!DOCTYPE ", 0)) /* <!DOCTYPE */ { tree_ctx->error = WBXML_ERROR_ENCODER_APPEND_DATA; wbxml_buffer_destroy(embed_doc); return; } WBXML_DEBUG((WBXML_PARSER, "\t Embedded Doc : '%s'", wbxml_buffer_get_cstr(embed_doc))); /* Parse 'DevInf' Document */ if ((ret = wbxml_tree_from_xml(wbxml_buffer_get_cstr(embed_doc), wbxml_buffer_len(embed_doc), &tree)) != WBXML_OK) { tree_ctx->error = ret; wbxml_buffer_destroy(embed_doc); return; } /* Add Tree Node */ tree_ctx->current = wbxml_tree_add_tree(tree_ctx->tree, tree_ctx->current, tree); if (tree_ctx->current == NULL) { tree_ctx->error = WBXML_ERROR_INTERNAL; wbxml_tree_destroy(tree); wbxml_buffer_destroy(embed_doc); return; } /* Clean-up */ wbxml_buffer_destroy(embed_doc); tree_ctx->skip_lvl = 0; } #endif /* WBXML_SUPPORT_SYNCML */ } else { tree_ctx->skip_lvl--; return; } } if (tree_ctx->current == NULL) { tree_ctx->error = WBXML_ERROR_INTERNAL; return; } if (tree_ctx->current->parent == NULL) { /* This must be the Root Element */ if (tree_ctx->current != tree_ctx->tree->root) { tree_ctx->error = WBXML_ERROR_INTERNAL; } } else { #if defined ( WBXML_SUPPORT_SYNCML ) /* Have we added a missing CDATA section ? * If so, we assume that now that we have reached an end of Element, * the CDATA section ended, and so we go back to parent. */ if ((tree_ctx->current != NULL) && (tree_ctx->current->type == WBXML_TREE_CDATA_NODE)) tree_ctx->current = tree_ctx->current->parent; #endif /* WBXML_SUPPORT_SYNCML */ /* Go back one step upper in the tree */ tree_ctx->current = tree_ctx->current->parent; } }
void wbxml_tree_clb_xml_start_element(void *ctx, const XML_Char *localName, const XML_Char **attrs) { WBXMLTreeClbCtx *tree_ctx = (WBXMLTreeClbCtx *) ctx; const WBXMLLangEntry *lang_table = NULL; WBXML_DEBUG((WBXML_PARSER, "Expat element start callback ('%s')", localName)); if (tree_ctx->expat_utf16) { /** @todo Convert from UTF-16 to UTF-8 */ } /* Check for Error */ if (tree_ctx->error != WBXML_OK) return; /* Are we skipping a whole node ? */ if (tree_ctx->skip_lvl > 0) { tree_ctx->skip_lvl++; return; } if (tree_ctx->current == NULL) { /* This is the Root Element */ if (tree_ctx->tree->lang == NULL) { /* Language Table not already found: Search again */ lang_table = wbxml_tables_search_table(wbxml_tables_get_main(), NULL, NULL, (const WB_UTINY *) localName); if (lang_table == NULL) { /* Damn, this is an unknown language for us... */ tree_ctx->error = WBXML_ERROR_UNKNOWN_XML_LANGUAGE; return; } else { /* Well, we hope this was the Language we are searching for.. let's try with it :| */ tree_ctx->tree->lang = lang_table; } } } #if defined( WBXML_SUPPORT_SYNCML ) /* If this is an embedded (not root) document, skip it * Actually SyncML DevInf and DM DDF are known as such * potentially embedded documents. */ if (( (WBXML_STRCMP(localName, "syncml:devinf:DevInf") == 0) || (WBXML_STRCMP(localName, "syncml:dmddf1.2:MgmtTree") == 0) )&& (tree_ctx->current != NULL)) { tree_ctx->skip_start = XML_GetCurrentByteIndex(tree_ctx->xml_parser); /* Skip this node */ tree_ctx->skip_lvl++; return; } #endif /* WBXML_SUPPORT_SYNCML */ /* Add Element Node */ tree_ctx->current = wbxml_tree_add_xml_elt_with_attrs(tree_ctx->tree, tree_ctx->current, (WB_UTINY *) localName, (const WB_UTINY**) attrs); if (tree_ctx->current == NULL) { tree_ctx->error = WBXML_ERROR_NOT_ENOUGH_MEMORY; } }
long _Expat_XML_GetCurrentByteIndex(struct ExpatIFace * Self, XML_Parser parser) { return XML_GetCurrentByteIndex(parser); }
int64_t f_xml_get_current_byte_index(const Resource& parser) { XmlParser * p = parser.getTyped<XmlParser>(); return XML_GetCurrentByteIndex(p->parser); }
/****f* xml_element/xml_elem_parse_buf * NAME * xml_elem_parse_buf * SYNOPSIS * xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error) * FUNCTION * parse a buffer containing XML into an xml_element in-memory tree * INPUTS * in_buf - buffer containing XML document * len - length of buffer * options - input options. optional * error - error result data. optional. check if result is null. * RESULT * void * NOTES * The returned data must be free'd by caller * SEE ALSO * xml_elem_serialize_to_string () * xml_elem_free () * SOURCE */ xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error) { xml_element* xReturn = NULL; char buf[100] = ""; static STRUCT_XML_ELEM_INPUT_OPTIONS default_opts = {encoding_utf_8}; if(!options) { options = &default_opts; } if(in_buf) { XML_Parser parser; xml_elem_data mydata = {0}; parser = XML_ParserCreate(NULL); mydata.root = xml_elem_new(); mydata.current = mydata.root; mydata.input_options = options; mydata.needs_enc_conversion = options->encoding && strcmp(options->encoding, encoding_utf_8); XML_SetElementHandler(parser, (XML_StartElementHandler)_xmlrpc_startElement, (XML_EndElementHandler)_xmlrpc_endElement); XML_SetCharacterDataHandler(parser, (XML_CharacterDataHandler)_xmlrpc_charHandler); /* pass the xml_elem_data struct along */ XML_SetUserData(parser, (void*)&mydata); if(!len) { len = strlen(in_buf); } /* parse the XML */ if(XML_Parse(parser, in_buf, len, 1) == 0) { enum XML_Error err_code = XML_GetErrorCode(parser); int line_num = XML_GetCurrentLineNumber(parser); int col_num = XML_GetCurrentColumnNumber(parser); long byte_idx = XML_GetCurrentByteIndex(parser); /* int byte_total = XML_GetCurrentByteCount(parser); */ const char * error_str = XML_ErrorString(err_code); if(byte_idx >= 0) { snprintf(buf, sizeof(buf), "\n\tdata beginning %ld before byte index: %s\n", byte_idx > 10 ? 10 : byte_idx, in_buf + (byte_idx > 10 ? byte_idx - 10 : byte_idx)); } /* fprintf(stderr, "expat reports error code %i\n" "\tdescription: %s\n" "\tline: %i\n" "\tcolumn: %i\n" "\tbyte index: %ld\n" "\ttotal bytes: %i\n%s ", err_code, error_str, line_num, col_num, byte_idx, byte_total, buf); */ /* error condition */ if(error) { error->parser_code = (long)err_code; error->line = line_num; error->column = col_num; error->byte_index = byte_idx; error->parser_error = error_str; } } else { xReturn = (xml_element*)Q_Head(&mydata.root->children); xReturn->parent = NULL; } XML_ParserFree(parser); xml_elem_free_non_recurse(mydata.root); } return xReturn; }
long XMLParser::GetCurrentByteIndex(void) { assert(m_parser != NULL); return XML_GetCurrentByteIndex(m_parser); }
/** handler for read data */ void _sx_process_read(sx_t s, sx_buf_t buf) { sx_error_t sxe; nad_t nad; char *errstring; int i; int ns, elem; /* Note that buf->len can validly be 0 here, if we got data from the socket but the plugin didn't return anything to us (e.g. a SSL packet was split across a tcp segment boundary) */ /* count bytes parsed */ s->pbytes += buf->len; /* parse it */ if(XML_Parse(s->expat, buf->data, buf->len, 0) == 0) { /* only report error we haven't already */ if(!s->fail) { /* parse error */ errstring = (char *) XML_ErrorString(XML_GetErrorCode(s->expat)); _sx_debug(ZONE, "XML parse error: %s, character %d: %.*s", errstring, XML_GetCurrentByteIndex(s->expat) - s->tbytes, buf->len, buf->data); _sx_gen_error(sxe, SX_ERR_XML_PARSE, "XML parse error", errstring); _sx_event(s, event_ERROR, (void *) &sxe); _sx_error(s, stream_err_XML_NOT_WELL_FORMED, errstring); _sx_close(s); _sx_buffer_free(buf); return; } /* !!! is this the right thing to do? we should probably set * s->fail and let the code further down handle it. */ _sx_buffer_free(buf); return; } /* check if the stanza size limit is exceeded (it wasn't reset by parser) */ if(s->rbytesmax && s->pbytes > s->rbytesmax) { /* parse error */ _sx_debug(ZONE, "maximum stanza size (%d) exceeded by reading %d bytes", s->rbytesmax, s->pbytes); errstring = (char *) XML_ErrorString(XML_GetErrorCode(s->expat)); _sx_gen_error(sxe, SX_ERR_XML_PARSE, "stream read error", "Maximum stanza size exceeded"); _sx_event(s, event_ERROR, (void *) &sxe); _sx_error(s, stream_err_POLICY_VIOLATION, errstring); _sx_close(s); _sx_buffer_free(buf); return; } /* count bytes processed */ s->tbytes += buf->len; /* done with the buffer */ _sx_buffer_free(buf); /* process completed nads */ if(s->state >= state_STREAM) while((nad = jqueue_pull(s->rnadq)) != NULL) { int plugin_error; #ifdef SX_DEBUG const char *out; int len; nad_print(nad, 0, &out, &len); _sx_debug(ZONE, "completed nad: %.*s", len, out); #endif /* check for errors */ if(NAD_ENS(nad, 0) >= 0 && NAD_NURI_L(nad, NAD_ENS(nad, 0)) == strlen(uri_STREAMS) && strncmp(NAD_NURI(nad, NAD_ENS(nad, 0)), uri_STREAMS, strlen(uri_STREAMS)) == 0 && NAD_ENAME_L(nad, 0) == 5 && strncmp(NAD_ENAME(nad, 0), "error", 5) == 0) { errstring = NULL; /* get text error description if available - XMPP 4.7.2 */ if((ns = nad_find_scoped_namespace(nad, uri_STREAM_ERR, NULL)) >= 0) if((elem = nad_find_elem(nad, 0, ns, "text", 1)) >= 0) if(NAD_CDATA_L(nad, elem) > 0) { errstring = (char *) malloc(sizeof(char) * (NAD_CDATA_L(nad, elem) + 1)); sprintf(errstring, "%.*s", NAD_CDATA_L(nad, elem), NAD_CDATA(nad, elem)); } /* if not available, look for legacy error text as in <stream:error>description</stream:error> */ if (errstring == NULL && NAD_CDATA_L(nad, 0) > 0) { errstring = (char *) malloc(sizeof(char) * (NAD_CDATA_L(nad, 0) + 1)); sprintf(errstring, "%.*s", NAD_CDATA_L(nad, 0), NAD_CDATA(nad, 0)); } /* if not available, log the whole packet for debugging */ if (errstring == NULL) { const char *xml; int xlen; nad_print(nad, 0, &xml, &xlen); errstring = (char *) malloc(sizeof(char) * (xlen + 1)); sprintf(errstring, "%.*s", xlen, xml); } if(s->state < state_CLOSING) { _sx_gen_error(sxe, SX_ERR_STREAM, "Stream error", errstring); _sx_event(s, event_ERROR, (void *) &sxe); _sx_state(s, state_CLOSING); } free(errstring); nad_free(nad); break; } /* check for close */ if ((s->flags & SX_WEBSOCKET_WRAPPER) && NAD_ENS(nad, 0) >= 0 && NAD_NURI_L(nad, NAD_ENS(nad, 0)) == strlen(uri_XFRAMING) && strncmp(NAD_NURI(nad, NAD_ENS(nad, 0)), uri_XFRAMING, strlen(uri_XFRAMING)) == 0 && NAD_ENAME_L(nad, 0) == 5 && strncmp(NAD_ENAME(nad, 0), "close", 5) == 0) { _sx_debug(ZONE, "<close/> frame @ depth %d", s->depth); s->fail = 1; break; } /* run it by the plugins */ if(_sx_chain_nad_read(s, nad) == 0) return; /* now let the plugins process the completed nad */ plugin_error = 0; if(s->env != NULL) for(i = 0; i < s->env->nplugins; i++) if(s->env->plugins[i]->process != NULL) { int plugin_ret; plugin_ret = (s->env->plugins[i]->process)(s, s->env->plugins[i], nad); if(plugin_ret == 0) { plugin_error ++; break; } } /* hand it to the app */ if ((plugin_error == 0) && (s->state < state_CLOSING)) _sx_event(s, event_PACKET, (void *) nad); } /* something went wrong, bail */ if(s->fail) { _sx_close(s); return; } /* stream was closed */ if(s->depth < 0 && s->state < state_CLOSING) { /* close the stream if necessary */ if(s->state >= state_STREAM_SENT) { if (s->flags & SX_WEBSOCKET_WRAPPER) jqueue_push(s->wbufq, _sx_buffer_new("<close xmlns='" uri_XFRAMING "' />", sizeof(uri_XFRAMING) + 17, NULL, NULL), 0); else jqueue_push(s->wbufq, _sx_buffer_new("</stream:stream>", 16, NULL, NULL), 0); s->want_write = 1; } _sx_state(s, state_CLOSING); return; } }