Ejemplo n.º 1
0
VALUE
ox_parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) {
    struct _PInfo	pi;
    int			body_read = 0;

    if (0 == xml) {
	raise_error("Invalid arg, xml string can not be null", xml, 0);
    }
    if (DEBUG <= trace) {
	printf("Parsing xml:\n%s\n", xml);
    }
    /* initialize parse info */
    pi.str = xml;
    pi.s = xml;
    pi.h = 0;
    pi.pcb = pcb;
    pi.obj = Qnil;
    pi.circ_array = 0;
    pi.encoding = 0;
    pi.trace = trace;
    pi.effort = effort;
    while (1) {
	next_non_white(&pi);	// skip white space
	if ('\0' == *pi.s) {
	    break;
	}
	if (body_read && 0 != endp) {
	    *endp = pi.s;
	    break;
	}
	if ('<' != *pi.s) {		// all top level entities start with <
	    raise_error("invalid format, expected <", pi.str, pi.s);
	}
	pi.s++;		// past <
	switch (*pi.s) {
	case '?':	// prolog
	    pi.s++;
	    read_instruction(&pi);
	    break;
	case '!':	/* comment or doctype */
	    pi.s++;
	    if ('\0' == *pi.s) {
		raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s);
	    } else if ('-' == *pi.s) {
		pi.s++;	// skip -
		if ('-' != *pi.s) {
		    raise_error("invalid format, bad comment format", pi.str, pi.s);
		} else {
		    pi.s++;	// skip second -
		    read_comment(&pi);
		}
	    } else if (0 == strncmp("DOCTYPE", pi.s, 7)) {
		pi.s += 7;
		read_doctype(&pi);
	    } else {
		raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s);
	    }
	    break;
	case '\0':
	    raise_error("invalid format, document not terminated", pi.str, pi.s);
	default:
	    read_element(&pi);
	    body_read = 1;
	    break;
	}
    }
    return pi.obj;
}
Ejemplo n.º 2
0
static void
parse(SaxDrive dr) {
    char        c = skipBOM(dr);
    int		state = START_STATE;

    while ('\0' != c) {
	buf_protect(&dr->buf);
        if (is_white(c) && '\0' == (c = buf_next_non_white(&dr->buf))) {
            break;
        }
	if ('<' == c) {
	    c = buf_get(&dr->buf);
	    switch (c) {
	    case '?': /* instructions (xml or otherwise) */
		c = read_instruction(dr);
		break;
	    case '!': /* comment or doctype */
		buf_protect(&dr->buf);
		c = buf_get(&dr->buf);
		if ('\0' == c) {
		    ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
		    goto DONE;
		} else if ('-' == c) {
		    c = buf_get(&dr->buf); /* skip first - and get next character */
		    if ('-' != c) {
			ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
		    } else {
			c = buf_get(&dr->buf); /* skip second - */
		    }
		    c = read_comment(dr);
		} else {
		    int	i;
		    int	spaced = 0;
		    int	line = dr->buf.line;
		    int	col = dr->buf.col;

		    if (is_white(c)) {
			spaced = 1;
			c = buf_next_non_white(&dr->buf);
		    }
		    dr->buf.str = dr->buf.tail - 1;
		    for (i = 7; 0 < i; i--) {
			c = buf_get(&dr->buf);
		    }
		    if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
			if (spaced) {
			    ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col);
			}
			if (START_STATE != state) {
			    ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
			}
			c = read_doctype(dr);
		    } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
			ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
			if (START_STATE != state) {
			    ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
			}
			c = read_doctype(dr);
		    } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
			if (spaced) {
			    ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col);
			}
			c = read_cdata(dr);
		    } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
			ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
			c = read_cdata(dr);
		    } else {
			ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col);
			c = read_name_token(dr);
			if ('>' == c) {
			    c = buf_get(&dr->buf);
			}
		    }
		}
		break;
	    case '/': /* element end */
		c = read_element_end(dr);
		if (0 == stack_peek(&dr->stack)) {
		    state = AFTER_STATE;
		}
		break;
	    case '\0':
		goto DONE;
	    default:
		buf_backup(&dr->buf);
		if (AFTER_STATE == state) {
		    ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
		}
		state = BODY_STATE;
		c = read_element_start(dr);
		if (0 == stack_peek(&dr->stack)) {
		    state = AFTER_STATE;
		}
		break;
	    }
	} else {
	    buf_reset(&dr->buf);
	    c = read_text(dr);
	}
    }
 DONE:
    if (dr->stack.head < dr->stack.tail) {
	char	msg[256];
	Nv	sp;

	if (dr->has.line) {
	    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
	}
	if (dr->has.column) {
	    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
	}
	for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
	    snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
	    ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
	    if (dr->has.end_element) {
		VALUE       args[1];

		args[0] = sp->val;
		rb_funcall2(dr->handler, ox_end_element_id, 1, args);
	    }
        }
    }
}