Example #1
0
File: parse.c Project: edanaher/ox
/* Entered after the "<?" sequence. Ready to read the rest.
 */
static void
read_instruction(PInfo pi) {
    struct _Attr	attrs[MAX_ATTRS + 1];
    Attr		a = attrs;
    char		*target;
    char		*end;
    char		c;
	
    memset(attrs, 0, sizeof(attrs));
    target = read_name_token(pi);
    end = pi->s;
    next_non_white(pi);
    c = *pi->s;
    *end = '\0'; // terminate name
    if ('?' != c) {
	while ('?' != *pi->s) {
	    if ('\0' == *pi->s) {
		raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	    }
	    next_non_white(pi);
	    a->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		raise_error("invalid format, no attribute value", pi->str, pi->s);
	    }
	    *end = '\0'; // terminate name
	    // read value
	    next_non_white(pi);
	    a->value = read_quoted_value(pi);
	    a++;
	    if (MAX_ATTRS <= (a - attrs)) {
		raise_error("too many attributes", pi->str, pi->s);
	    }
	    next_non_white(pi);
	}
	if ('?' == *pi->s) {
	    pi->s++;
	}
    } else {
	pi->s++;
    }
    if ('>' != *pi->s++) {
	raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
    }
    if (0 != pi->pcb->instruct) {
	pi->pcb->instruct(pi, target, attrs);
    }
}
Example #2
0
/* Entered after the '<' and the first character after that. Returns status
 * code.
 */
static void
read_element(PInfo pi) {
    struct _Attr	attrs[MAX_ATTRS];
    Attr		ap = attrs;
    char		*name;
    char		*ename;
    char		*end;
    char		c;
    long		elen;
    int			hasChildren = 0;
    int			done = 0;

    ename = read_name_token(pi);
    end = pi->s;
    elen = end - ename;
    next_non_white(pi);
    c = *pi->s;
    *end = '\0';
    if ('/' == c) {
	/* empty element, no attributes and no children */
	pi->s++;
	if ('>' != *pi->s) {
	    //printf("*** '%s' ***\n", pi->s);
	    raise_error("invalid format, element not closed", pi->str, pi->s);
	}
	pi->s++;	/* past > */
	ap->name = 0;
	pi->pcb->add_element(pi, ename, attrs, hasChildren);
	pi->pcb->end_element(pi, ename);

	return;
    }
    /* read attribute names until the close (/ or >) is reached */
    while (!done) {
	if ('\0' == c) {
	    next_non_white(pi);
	    c = *pi->s;
	}
	switch (c) {
	case '\0':
	    raise_error("invalid format, document not terminated", pi->str, pi->s);
	case '/':
	    // Element with just attributes.
	    pi->s++;
	    if ('>' != *pi->s) {
		raise_error("invalid format, element not closed", pi->str, pi->s);
	    }
	    pi->s++;
	    ap->name = 0;
	    pi->pcb->add_element(pi, ename, attrs, hasChildren);
	    pi->pcb->end_element(pi, ename);

	    return;
	case '>':
	    // has either children or a value
	    pi->s++;
	    hasChildren = 1;
	    done = 1;
	    ap->name = 0;
	    pi->pcb->add_element(pi, ename, attrs, hasChildren);
	    break;
	default:
	    // Attribute name so it's an element and the attribute will be
	    // added to it.
	    ap->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		raise_error("invalid format, no attribute value", pi->str, pi->s);
	    }
	    *end = '\0'; // terminate name
	    // read value
	    next_non_white(pi);
	    ap->value = read_quoted_value(pi);
	    if (0 != strchr(ap->value, '&')) {
		if (0 != collapse_special((char*)ap->value)) {
		    raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
		}
	    }
	    ap++;
	    if (MAX_ATTRS <= (ap - attrs)) {
		raise_error("too many attributes", pi->str, pi->s);
	    }
	    break;
	}
	c = '\0';
    }
    if (hasChildren) {
	char	*start;
	
	done = 0;
	// read children
	while (!done) {
	    start = pi->s;
	    next_non_white(pi);
	    c = *pi->s++;
	    if ('\0' == c) {
		raise_error("invalid format, document not terminated", pi->str, pi->s);
	    }
	    if ('<' == c) {
		switch (*pi->s) {
		case '!':	/* better be a comment or CDATA */
		    pi->s++;
		    if ('-' == *pi->s && '-' == *(pi->s + 1)) {
			pi->s += 2;
			read_comment(pi);
		    } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
			pi->s += 7;
			read_cdata(pi);
		    } else {
			raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
		    }
		    break;
		case '/':
		    pi->s++;
		    name = read_name_token(pi);
		    end = pi->s;
		    next_non_white(pi);
		    c = *pi->s;
		    *end = '\0';
		    if (0 != strcmp(name, ename)) {
			raise_error("invalid format, elements overlap", pi->str, pi->s);
		    }
		    if ('>' != c) {
			raise_error("invalid format, element not closed", pi->str, pi->s);
		    }
		    pi->s++;
		    pi->pcb->end_element(pi, ename);
		    return;
		case '\0':
		    raise_error("invalid format, document not terminated", pi->str, pi->s);
		default:
		    // a child element
		    read_element(pi);
		    break;
		}
	    } else {	// read as TEXT
		pi->s = start;
		//pi->s--;
		read_text(pi);
		//read_reduced_text(pi);

		// to exit read_text with no errors the next character must be <
		if ('/' == *(pi->s + 1) &&
		    0 == strncmp(ename, pi->s + 2, elen) &&
		    '>' == *(pi->s + elen + 2)) {
		    // close tag after text so treat as a value
		    pi->s += elen + 3;
		    pi->pcb->end_element(pi, ename);
		    return;
		}
	    }
	}
    }
}
Example #3
0
static char
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req) {
    VALUE       name = Qnil;
    int         is_encoding = 0;
    int		line;
    int		col;
    char	*attr_value;

    // already protected by caller
    dr->buf.str = dr->buf.tail;
    if (is_white(c)) {
        c = buf_next_non_white(&dr->buf);
    }
    while (termc != c && term2 != c) {
	buf_backup(&dr->buf);
        if ('\0' == c) {
	    ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
	    return '\0';
        }
	line = dr->buf.line;
	col = dr->buf.col;
        if ('\0' == (c = read_name_token(dr))) {
	    ox_sax_drive_error(dr, NO_TERM "error reading token");
	    return '\0';
        }
        if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
            is_encoding = 1;
        }
        if (dr->has.attr || dr->has.attr_value) {
            name = str2sym(dr, dr->buf.str, 0);
        }
        if (is_white(c)) {
            c = buf_next_non_white(&dr->buf);
        }
        if ('=' != c) {
	    if (eq_req) {
		dr->err = 1;
		return c;
	    } else {
		ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
		attr_value = (char*)"";
	    }
        } else {
	    line = dr->buf.line;
	    col = dr->buf.col;
	    c = read_quoted_value(dr);
	    attr_value = dr->buf.str;
	    if (is_encoding) {
#if HAS_ENCODING_SUPPORT
		dr->encoding = rb_enc_find(dr->buf.str);
#elif HAS_PRIVATE_ENCODING
		dr->encoding = rb_str_new2(dr->buf.str);
#else
		dr->encoding = dr->buf.str;
#endif
		is_encoding = 0;
	    }
	}
        if (dr->has.attr_value) {
            VALUE       args[2];

	    if (dr->has.line) {
		rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	    }
	    if (dr->has.column) {
		rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	    }
            args[0] = name;
            args[1] = dr->value_obj;
            rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
	} else if (dr->has.attr) {
            VALUE       args[2];

            args[0] = name;
            ox_sax_collapse_special(dr, dr->buf.str, line, col);
            args[1] = rb_str_new2(attr_value);
#if HAS_ENCODING_SUPPORT
            if (0 != dr->encoding) {
                rb_enc_associate(args[1], dr->encoding);
            }
#elif HAS_PRIVATE_ENCODING
	    if (Qnil != dr->encoding) {
		rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
	    }
#endif
	    if (dr->has.line) {
		rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	    }
	    if (dr->has.column) {
		rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	    }
            rb_funcall2(dr->handler, ox_attr_id, 2, args);
        }
	if (is_white(c)) {
	    c = buf_next_non_white(&dr->buf);
	}
    }
    dr->buf.str = 0;

    return c;
}
Example #4
0
static char
read_element_end(SaxDrive dr) {
    VALUE       name = Qnil;
    char        c;
    int		line = dr->buf.line;
    int		col = dr->buf.col - 2;
    Nv		nv;

    if ('\0' == (c = read_name_token(dr))) {
        return '\0';
    }
    // c should be > and current is one past so read another char
    c = buf_get(&dr->buf);
    nv = stack_peek(&dr->stack);
    if (0 != nv && 0 == strcmp(dr->buf.str, nv->name)) {
	name = nv->val;
	stack_pop(&dr->stack);
    } else {
	// Mismatched start and end
	char	msg[256];
	Nv	match = stack_rev_find(&dr->stack, dr->buf.str);

	if (0 == match) {
	    // Not found so open and close element.
	    char	*ename = 0;
	    Hint	h = ox_hint_find(dr->hints, dr->buf.str);

	    if (0 != h && h->empty) {
		// Just close normally
		name = str2sym(dr, dr->buf.str, &ename);
		snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
		ox_sax_drive_error_at(dr, msg, line, col);
		return c;
	    } else {
		snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
		ox_sax_drive_error_at(dr, msg, line, col);
		name = str2sym(dr, dr->buf.str, &ename);
		if (dr->has.start_element) {
		    VALUE       args[1];

		    if (dr->has.line) {
			rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
		    }
		    if (dr->has.column) {
			rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
		    }
		    args[0] = name;
		    rb_funcall2(dr->handler, ox_start_element_id, 1, args);
		}
	    }
	} else {
	    // Found a match so close all up to the found element in stack.
	    Nv	n2;

	    if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
		name = n2->val;
	    } else {
		snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
		ox_sax_drive_error_at(dr, msg, line, col);
		if (dr->has.line) {
		    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
		}
		if (dr->has.column) {
		    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
		}
		for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
		    if (dr->has.end_element) {
			rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
		    }
		}
		name = nv->val;
	    }
	}
    }
    end_element_cb(dr, name, line, col);

    return c;
}
Example #5
0
/* Entered after the '<' and the first character after that. Returns status
 * code.
 */
static char
read_element_start(SaxDrive dr) {
    char	*ename = 0;
    VALUE       name = Qnil;
    char        c;
    int         closed;
    int		line = dr->buf.line;
    int		col = dr->buf.col - 1;
    Hint	h = 0;
    int		stackless = 0;

    if ('\0' == (c = read_name_token(dr))) {
        return '\0';
    }
    if (dr->options.smart && 0 == dr->hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
	dr->hints = ox_hints_html();
    }
    if (0 != dr->hints) {
	hint_clear_empty(dr);
	h = ox_hint_find(dr->hints, dr->buf.str);
	if (0 == h) {
	    char	msg[100];

	    sprintf(msg, "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->hints->name);
	    ox_sax_drive_error(dr, msg);
	} else {
	    Nv	top_nv = stack_peek(&dr->stack);

	    if (h->empty) {
		stackless = 1;
	    }
	    if (0 != top_nv) {
		char	msg[256];

		if (!h->nest && 0 == strcasecmp(top_nv->name, h->name)) {
		    snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
			     INV_ELEMENT, dr->buf.str, dr->hints->name);
		    ox_sax_drive_error(dr, msg);
		    stack_pop(&dr->stack);
		    end_element_cb(dr, top_nv->val, line, col);
		    top_nv = stack_peek(&dr->stack);
		}
		if (0 != h->parents) {
		    const char	**p;
		    int		ok = 0;

		    for (p = h->parents; 0 != *p; p++) {
			if (0 == strcasecmp(*p, top_nv->name)) {
			    ok = 1;
			    break;
			}
		    }
		    if (!ok) {
			snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
				 INV_ELEMENT, h->name, top_nv->name, dr->hints->name);
			ox_sax_drive_error(dr, msg);
		    }
		}
	    }
	}
    }
    name = str2sym(dr, dr->buf.str, &ename);
    if (dr->has.start_element) {
        VALUE       args[1];

	if (dr->has.line) {
	    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	}
	if (dr->has.column) {
	    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	}
        args[0] = name;
        rb_funcall2(dr->handler, ox_start_element_id, 1, args);
    }
    if ('/' == c) {
        closed = 1;
    } else if ('>' == c) {
        closed = 0;
    } else {
	buf_protect(&dr->buf);
        c = read_attrs(dr, c, '/', '>', 0, 0);
	if (is_white(c)) {
	    c = buf_next_non_white(&dr->buf);
	}
	closed = ('/' == c);
    }
    if (dr->has.attrs_done) {
	    rb_funcall(dr->handler, ox_attrs_done_id, 0);
    }
    if (closed) {
	c = buf_next_non_white(&dr->buf);
	line = dr->buf.line;
	col = dr->buf.col - 1;
	end_element_cb(dr, name, line, col);
    } else if (stackless) {
	end_element_cb(dr, name, line, col);
    } else {
	stack_push(&dr->stack, ename, name, h);
    }
    if ('>' != c) {
	ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
	return c;
    }
    dr->buf.str = 0;

    return buf_get(&dr->buf);
}
Example #6
0
/* Entered after the "<?" sequence. Ready to read the rest.
 */
static char
read_instruction(SaxDrive dr) {
    char	content[1024];
    char        c;
    char	*cend;
    VALUE	target = Qnil;
    int		is_xml;
    int		line = dr->buf.line;
    int		col = dr->buf.col - 1;

    buf_protect(&dr->buf);
    if ('\0' == (c = read_name_token(dr))) {
        return c;
    }
    is_xml = (0 == strcmp("xml", dr->buf.str));
    if (dr->has.instruct || dr->has.end_instruct) {
	target = rb_str_new2(dr->buf.str);
    }
    if (dr->has.instruct) {
        VALUE       args[1];

	if (dr->has.line) {
	    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	}
	if (dr->has.column) {
	    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	}
        args[0] = target;
        rb_funcall2(dr->handler, ox_instruct_id, 1, args);
    }
    buf_protect(&dr->buf);
    line = dr->buf.line;
    col = dr->buf.col;
    read_content(dr, content, sizeof(content) - 1);
    cend = dr->buf.tail;
    buf_reset(&dr->buf);
    dr->err = 0;
    c = read_attrs(dr, c, '?', '?', is_xml, 1);
    if (dr->has.attrs_done) {
	    rb_funcall(dr->handler, ox_attrs_done_id, 0);
    }
    if (dr->err) {
	if (dr->has.text) {
	    VALUE   args[1];

	    if (dr->options.convert_special) {
		ox_sax_collapse_special(dr, content, line, col);
	    }
	    args[0] = rb_str_new2(content);
#if HAS_ENCODING_SUPPORT
	    if (0 != dr->encoding) {
		rb_enc_associate(args[0], dr->encoding);
	    }
#elif HAS_PRIVATE_ENCODING
	    if (Qnil != dr->encoding) {
		rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
	    }
#endif
	    if (dr->has.line) {
		rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	    }
	    if (dr->has.column) {
		rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	    }
	    rb_funcall2(dr->handler, ox_text_id, 1, args);
	}
	dr->buf.tail = cend;
	c = buf_get(&dr->buf);
    } else {
	line = dr->buf.line;
	col = dr->buf.col;
	c = buf_next_non_white(&dr->buf);
	if ('>' == c) {
	    c = buf_get(&dr->buf);
	} else {
	    ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", line, col);
	    if ('>' == c) {
		c = buf_get(&dr->buf);
	    }
	}
    }
    if (dr->has.end_instruct) {
        VALUE       args[1];

	if (dr->has.line) {
	    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	}
	if (dr->has.column) {
	    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	}
        args[0] = target;
        rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
    }
    dr->buf.str = 0;

    return c;
}
Example #7
0
static void
parse(SaxDrive dr) {
    char        c = skipBOM(dr);
    int		state = START_STATE;

    while ('\0' != c) {
	buf_protect(&dr->buf);
        if (is_white(c) && '\0' == (c = buf_next_non_white(&dr->buf))) {
            break;
        }
	if ('<' == c) {
	    c = buf_get(&dr->buf);
	    switch (c) {
	    case '?': /* instructions (xml or otherwise) */
		c = read_instruction(dr);
		break;
	    case '!': /* comment or doctype */
		buf_protect(&dr->buf);
		c = buf_get(&dr->buf);
		if ('\0' == c) {
		    ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
		    goto DONE;
		} else if ('-' == c) {
		    c = buf_get(&dr->buf); /* skip first - and get next character */
		    if ('-' != c) {
			ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
		    } else {
			c = buf_get(&dr->buf); /* skip second - */
		    }
		    c = read_comment(dr);
		} else {
		    int	i;
		    int	spaced = 0;
		    int	line = dr->buf.line;
		    int	col = dr->buf.col;

		    if (is_white(c)) {
			spaced = 1;
			c = buf_next_non_white(&dr->buf);
		    }
		    dr->buf.str = dr->buf.tail - 1;
		    for (i = 7; 0 < i; i--) {
			c = buf_get(&dr->buf);
		    }
		    if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
			if (spaced) {
			    ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col);
			}
			if (START_STATE != state) {
			    ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
			}
			c = read_doctype(dr);
		    } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
			ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
			if (START_STATE != state) {
			    ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
			}
			c = read_doctype(dr);
		    } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
			if (spaced) {
			    ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col);
			}
			c = read_cdata(dr);
		    } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
			ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
			c = read_cdata(dr);
		    } else {
			ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col);
			c = read_name_token(dr);
			if ('>' == c) {
			    c = buf_get(&dr->buf);
			}
		    }
		}
		break;
	    case '/': /* element end */
		c = read_element_end(dr);
		if (0 == stack_peek(&dr->stack)) {
		    state = AFTER_STATE;
		}
		break;
	    case '\0':
		goto DONE;
	    default:
		buf_backup(&dr->buf);
		if (AFTER_STATE == state) {
		    ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
		}
		state = BODY_STATE;
		c = read_element_start(dr);
		if (0 == stack_peek(&dr->stack)) {
		    state = AFTER_STATE;
		}
		break;
	    }
	} else {
	    buf_reset(&dr->buf);
	    c = read_text(dr);
	}
    }
 DONE:
    if (dr->stack.head < dr->stack.tail) {
	char	msg[256];
	Nv	sp;

	if (dr->has.line) {
	    rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
	}
	if (dr->has.column) {
	    rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
	}
	for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
	    snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
	    ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
	    if (dr->has.end_element) {
		VALUE       args[1];

		args[0] = sp->val;
		rb_funcall2(dr->handler, ox_end_element_id, 1, args);
	    }
        }
    }
}
Example #8
0
File: parse.c Project: phlipper/ox
/* Entered after the "<?" sequence. Ready to read the rest.
 */
static void
read_instruction(PInfo pi) {
    char		content[1024];
    struct _Attr	attrs[MAX_ATTRS + 1];
    Attr		a = attrs;
    char		*target;
    char		*end;
    char		c;
    char		*cend;
    int			attrs_ok = 1;

    *content = '\0';
    memset(attrs, 0, sizeof(attrs));
    target = read_name_token(pi);
    end = pi->s;
    if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
	raise_error("processing instruction content too large or not terminated", pi->str, pi->s);
    }
    next_non_white(pi);
    c = *pi->s;
    *end = '\0'; /* terminate name */
    if ('?' != c) {
	while ('?' != *pi->s) {
	    if ('\0' == *pi->s) {
		raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	    }
	    next_non_white(pi);
	    a->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		attrs_ok = 0;
		break;
	    }
	    *end = '\0'; /* terminate name */
	    /* read value */
	    next_non_white(pi);
	    a->value = read_quoted_value(pi);
	    a++;
	    if (MAX_ATTRS <= (a - attrs)) {
		attrs_ok = 0;
		break;
	    }
	    next_non_white(pi);
	}
	if ('?' == *pi->s) {
	    pi->s++;
	}
    } else {
	pi->s++;
    }
    if (attrs_ok) {
	if ('>' != *pi->s++) {
	    raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	}
    } else {
	pi->s = cend + 1;
    }
    if (0 != pi->pcb->instruct) {
	if (attrs_ok) {
	    pi->pcb->instruct(pi, target, attrs, 0);
	} else {
	    pi->pcb->instruct(pi, target, attrs, content);
	}
    }
}