static void my_startElementNs(void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { debug_out("%d:%d: start %s:%s (%s)\n", xmlSAX2GetLineNumber(ctx), xmlSAX2GetColumnNumber(ctx), prefix, localname, URI); for (int xpath_num = 0; xpath_num < num_xpaths; ++xpath_num) { XPathFinder *xpath_finder = &xpath_finders[xpath_num]; debug_out(" Checking xpath finder #%d; line_number = %d\n", xpath_num, xpath_finder->line_number); int xpf_level = xpath_finder->current_level; // This shouldn't happen, but just to be sure if (xpf_level >= xpath_finder->num_segs) continue; // If this XPath hasn't been found yet, and we're at the right level if (xpath_finder->line_number == 0 && xpf_level == parser_level) { XPathSegFinder *seg_finder = &xpath_finder->seg_finders[xpf_level]; debug_out(" Checking seg_finder[%d]\n", xpf_level); const xmlChar *seg_uri = seg_finder->namespace_uri; if ( !xstrcmp(localname, seg_finder->local_name) && ( (URI == NULL && seg_uri == NULL) || (URI != NULL && seg_uri != NULL && !xstrcmp(URI, seg_uri)) ) ) { debug_out(" element match!\n"); seg_finder->count++; if (seg_finder->count == seg_finder->position) { debug_out(" position match! parser_level = %d, " "num_segs = %d\n", parser_level, xpath_finder->num_segs); if (parser_level == xpath_finder->num_segs - 1) { // Found! int line_number = xmlSAX2GetLineNumber(ctx); debug_out(" line_number <- %d\n", line_number); xpath_finder->line_number = line_number; xpath_finder->column_number = xmlSAX2GetColumnNumber(ctx); } else { xpath_finder->current_level++; } } } } } parser_level++; }
static void my_endElementNs(void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { parser_level--; debug_out("%d:%d: end %s:%s (%s)\n", xmlSAX2GetLineNumber(ctx), xmlSAX2GetColumnNumber(ctx), prefix, localname, URI); }
static void log_v (gpointer ctx, gchar const * message, va_list argv, GLogLevelFlags flags) { gchar* full_msg = g_strdup_vprintf (message, argv); gchar* location = g_strdup_printf ("%s:%d:%d: %s", xmlSAX2GetSystemId (ctx), xmlSAX2GetLineNumber (ctx), xmlSAX2GetColumnNumber (ctx), full_msg); g_log (G_LOG_DOMAIN, flags, "%s", location); g_free (location); g_free (full_msg); }
static void get_tag_location (IdeXmlSax *self, gint *line, gint *line_offset, gint *end_line, gint *end_line_offset, const gchar **content, gsize *size) { xmlParserInput *input; const gchar *base; const gchar *current; const gchar *end_current; const gchar *line_start; const gchar *end_line_start; gint start_line_number; gint end_line_number; gint size_offset = 1; gunichar ch; gboolean end_line_found = FALSE; g_assert (IDE_IS_XML_SAX (self)); g_assert (line != NULL); g_assert (line_offset != NULL); g_assert (end_line != NULL); g_assert (end_line_offset != NULL); g_assert (content != NULL); g_assert (size != NULL); /* TODO: handle other types of line break */ input = self->context->input; base = (const gchar *)input->base; current = (const gchar *)input->cur; *end_line = end_line_number = start_line_number = xmlSAX2GetLineNumber (self->context); /* Adjust the element size, can be a start, a end or an auto-closed one */ ch = g_utf8_get_char (current); if (ch != '>') { /* End element case */ if (current > base && g_utf8_get_char (current - 1) == '>') { --current; size_offset = 0; } /* Auto-closed start element case */ else if (ch == '/' && g_utf8_get_char (current + 1) == '>') { ++current; size_offset = 2; } /* Not properly closed tag */ else { ch = g_utf8_get_char (--current); if (ch == '<') { /* Empty node */ *line = *end_line = end_line_number; *line_offset = *end_line_offset = xmlSAX2GetColumnNumber (self->context) - 1; *size = 1; return; } else { while (current >= base) { if (ch == '\n') --end_line_number; if (!g_unichar_isspace (ch) || current == base) break; current = g_utf8_prev_char (current); ch = g_utf8_get_char (current); } end_current = current; *end_line = start_line_number = end_line_number; size_offset = 0; goto next; } } } end_current = current; if (g_utf8_get_char (current) != '>') { *line = start_line_number; *line_offset = *end_line_offset = xmlSAX2GetColumnNumber (self->context); *content = NULL; *size = 0; return; } next: /* Search back the tag start and adjust the start and end line */ while (current > base) { ch = g_utf8_get_char (current); if (ch == '<') break; if (ch == '\n') { --start_line_number; if (!end_line_found ) { end_line_start = current + 1; end_line_found = TRUE; } } current = g_utf8_prev_char (current); } /* Search back the tag start offset */ line_start = current; while (line_start > base) { ch = g_utf8_get_char (line_start); if (ch == '\n') { ++line_start; if (!end_line_found ) { end_line_start = line_start; end_line_found = TRUE; } break; } line_start = g_utf8_prev_char (line_start); } if (!end_line_found) end_line_start = line_start; *line = start_line_number; *line_offset = (current - line_start) + 1; *end_line_offset = (end_current - end_line_start) + 1; *content = current; *size = (const gchar *)input->cur - current + size_offset; }
UT_Error UT_XML::parse (const char * szFilename) { UT_ASSERT (m_pListener || m_pExpertListener); UT_ASSERT (szFilename); if ((szFilename == 0) || ((m_pListener == 0) && (m_pExpertListener == 0))) return UT_ERROR; if (!reset_all ()) return UT_OUTOFMEM; UT_Error ret = UT_OK; DefaultReader defaultReader; Reader * reader = &defaultReader; if (m_pReader) reader = m_pReader; if (!reader->openFile (szFilename)) { UT_DEBUGMSG (("Could not open file %s\n", szFilename)); return UT_errnoToUTError (); } char buffer[2048]; m_bStopped = false; xmlSAXHandler hdl; xmlParserCtxtPtr ctxt = 0; memset(&hdl, 0, sizeof(hdl)); hdl.getEntity = _getEntity; hdl.startElement = _startElement; hdl.endElement = _endElement; hdl.characters = _charData; hdl.error = _errorSAXFunc; hdl.fatalError = _fatalErrorSAXFunc; hdl.processingInstruction = _processingInstruction; hdl.comment = _comment; hdl.cdataBlock = _cdata; size_t length = reader->readBytes (buffer, sizeof (buffer)); int done = (length < sizeof (buffer)); if (length != 0) { ctxt = xmlCreatePushParserCtxt (&hdl, static_cast<void *>(this), buffer, static_cast<int>(length), szFilename); if (ctxt == NULL) { UT_DEBUGMSG (("Unable to create libxml2 push-parser context!\n")); reader->closeFile (); return UT_ERROR; } xmlSubstituteEntitiesDefault (1); UT_sint32 chucks = -1; while (!done && !m_bStopped) { chucks++; length = reader->readBytes (buffer, sizeof (buffer)); UT_DEBUGMSG(("Done chunk %d length %zd \n",chucks,length)); done = (length < sizeof (buffer)); if (xmlParseChunk (ctxt, buffer, static_cast<int>(length), 0)) { if(getNumMinorErrors() > getNumRecoveredErrors()) { UT_DEBUGMSG (("Error - 1 parsing '%s' (Line: %d, Column: %d)\n", szFilename, xmlSAX2GetLineNumber(ctxt), xmlSAX2GetColumnNumber(ctxt))); ret = UT_IE_IMPORTERROR; break; } } } if (ret == UT_OK) if (!m_bStopped && (getNumMinorErrors() == 0)) { if (xmlParseChunk (ctxt, "", 0, 1)) { UT_DEBUGMSG (("Error -2 parsing '%s' (Line: %d, Column: %d)\n", szFilename, xmlSAX2GetLineNumber(ctxt), xmlSAX2GetColumnNumber(ctxt))); ret = UT_IE_IMPORTERROR; } } if (ret == UT_OK && (getNumMinorErrors() == 0)) if (!ctxt->wellFormed && !m_bStopped) ret = UT_IE_IMPORTERROR; // How does stopping mid-file affect wellFormed? xmlDocPtr myXmlDoc = ctxt->myDoc; xmlFreeParserCtxt (ctxt); xmlFreeDoc(myXmlDoc); } else { UT_DEBUGMSG(("Empty file to parse - not sure how to proceed\n")); } reader->closeFile (); return ret; }
size_t LibxmlSaxParser::getColumnNumer() const { return (size_t)xmlSAX2GetColumnNumber(mParserContext); }