Пример #1
0
static void
read_str(ParseInfo pi, const char *key) {
    char	*text;

    text = read_quoted_value(pi);
    if (pi->has_add_value) {
	VALUE	s = rb_str_new2(text);

#if HAS_ENCODING_SUPPORT
	rb_enc_associate(s, oj_utf8_encoding);
#endif
	call_add_value(pi->handler, s, key);
    }
}
Пример #2
0
/* Entered after the "<?" sequence. Ready to read the rest.
 */
static void
read_instruction(PInfo pi) {
    struct _Attr	attrs[MAX_ATTRS + 1];
    Attr		a = attrs;
    char		*target;
    char		*end;
    char		c;
	
    memset(attrs, 0, sizeof(attrs));
    target = read_name_token(pi);
    end = pi->s;
    next_non_white(pi);
    c = *pi->s;
    *end = '\0'; // terminate name
    if ('?' != c) {
	while ('?' != *pi->s) {
	    if ('\0' == *pi->s) {
		raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	    }
	    next_non_white(pi);
	    a->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		raise_error("invalid format, no attribute value", pi->str, pi->s);
	    }
	    *end = '\0'; // terminate name
	    // read value
	    next_non_white(pi);
	    a->value = read_quoted_value(pi);
	    a++;
	    if (MAX_ATTRS <= (a - attrs)) {
		raise_error("too many attributes", pi->str, pi->s);
	    }
	    next_non_white(pi);
	}
	if ('?' == *pi->s) {
	    pi->s++;
	}
    } else {
	pi->s++;
    }
    if ('>' != *pi->s++) {
	raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
    }
    if (0 != pi->pcb->instruct) {
	pi->pcb->instruct(pi, target, attrs);
    }
}
Пример #3
0
static void
read_hash(ParseInfo pi, const char *key) {
    const char	*ks;
    
    if (pi->has_hash_start) {
	call_no_value(pi->handler, oj_hash_start_id, key);
    }
    pi->s++;
    next_non_white(pi);
    if ('}' == *pi->s) {
	pi->s++;
    } else {
	while (1) {
	    next_non_white(pi);
	    ks = read_quoted_value(pi);
	    next_non_white(pi);
	    if (':' == *pi->s) {
		pi->s++;
	    } else {
		if (pi->has_error) {
		    call_error("invalid format, expected :", pi, __FILE__, __LINE__);
		}
		raise_error("invalid format, expected :", pi->str, pi->s);
	    }
	    read_next(pi, ks);
	    next_non_white(pi);
	    if ('}' == *pi->s) {
		pi->s++;
		break;
	    } else if (',' == *pi->s) {
		pi->s++;
	    } else {
		if (pi->has_error) {
		    call_error("invalid format, expected , or } while in an object", pi, __FILE__, __LINE__);
		}
		raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
	    }
	}
    }
    if (pi->has_hash_end) {
	call_no_value(pi->handler, oj_hash_end_id, key);
    }
}
Пример #4
0
/* Entered after the '<' and the first character after that. Returns status
 * code.
 */
static void
read_element(PInfo pi) {
    struct _Attr	attrs[MAX_ATTRS];
    Attr		ap = attrs;
    char		*name;
    char		*ename;
    char		*end;
    char		c;
    long		elen;
    int			hasChildren = 0;
    int			done = 0;

    ename = read_name_token(pi);
    end = pi->s;
    elen = end - ename;
    next_non_white(pi);
    c = *pi->s;
    *end = '\0';
    if ('/' == c) {
	/* empty element, no attributes and no children */
	pi->s++;
	if ('>' != *pi->s) {
	    //printf("*** '%s' ***\n", pi->s);
	    raise_error("invalid format, element not closed", pi->str, pi->s);
	}
	pi->s++;	/* past > */
	ap->name = 0;
	pi->pcb->add_element(pi, ename, attrs, hasChildren);
	pi->pcb->end_element(pi, ename);

	return;
    }
    /* read attribute names until the close (/ or >) is reached */
    while (!done) {
	if ('\0' == c) {
	    next_non_white(pi);
	    c = *pi->s;
	}
	switch (c) {
	case '\0':
	    raise_error("invalid format, document not terminated", pi->str, pi->s);
	case '/':
	    // Element with just attributes.
	    pi->s++;
	    if ('>' != *pi->s) {
		raise_error("invalid format, element not closed", pi->str, pi->s);
	    }
	    pi->s++;
	    ap->name = 0;
	    pi->pcb->add_element(pi, ename, attrs, hasChildren);
	    pi->pcb->end_element(pi, ename);

	    return;
	case '>':
	    // has either children or a value
	    pi->s++;
	    hasChildren = 1;
	    done = 1;
	    ap->name = 0;
	    pi->pcb->add_element(pi, ename, attrs, hasChildren);
	    break;
	default:
	    // Attribute name so it's an element and the attribute will be
	    // added to it.
	    ap->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		raise_error("invalid format, no attribute value", pi->str, pi->s);
	    }
	    *end = '\0'; // terminate name
	    // read value
	    next_non_white(pi);
	    ap->value = read_quoted_value(pi);
	    if (0 != strchr(ap->value, '&')) {
		if (0 != collapse_special((char*)ap->value)) {
		    raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
		}
	    }
	    ap++;
	    if (MAX_ATTRS <= (ap - attrs)) {
		raise_error("too many attributes", pi->str, pi->s);
	    }
	    break;
	}
	c = '\0';
    }
    if (hasChildren) {
	char	*start;
	
	done = 0;
	// read children
	while (!done) {
	    start = pi->s;
	    next_non_white(pi);
	    c = *pi->s++;
	    if ('\0' == c) {
		raise_error("invalid format, document not terminated", pi->str, pi->s);
	    }
	    if ('<' == c) {
		switch (*pi->s) {
		case '!':	/* better be a comment or CDATA */
		    pi->s++;
		    if ('-' == *pi->s && '-' == *(pi->s + 1)) {
			pi->s += 2;
			read_comment(pi);
		    } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
			pi->s += 7;
			read_cdata(pi);
		    } else {
			raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
		    }
		    break;
		case '/':
		    pi->s++;
		    name = read_name_token(pi);
		    end = pi->s;
		    next_non_white(pi);
		    c = *pi->s;
		    *end = '\0';
		    if (0 != strcmp(name, ename)) {
			raise_error("invalid format, elements overlap", pi->str, pi->s);
		    }
		    if ('>' != c) {
			raise_error("invalid format, element not closed", pi->str, pi->s);
		    }
		    pi->s++;
		    pi->pcb->end_element(pi, ename);
		    return;
		case '\0':
		    raise_error("invalid format, document not terminated", pi->str, pi->s);
		default:
		    // a child element
		    read_element(pi);
		    break;
		}
	    } else {	// read as TEXT
		pi->s = start;
		//pi->s--;
		read_text(pi);
		//read_reduced_text(pi);

		// to exit read_text with no errors the next character must be <
		if ('/' == *(pi->s + 1) &&
		    0 == strncmp(ename, pi->s + 2, elen) &&
		    '>' == *(pi->s + elen + 2)) {
		    // close tag after text so treat as a value
		    pi->s += elen + 3;
		    pi->pcb->end_element(pi, ename);
		    return;
		}
	    }
	}
    }
}
Пример #5
0
static char
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req) {
    VALUE       name = Qnil;
    int         is_encoding = 0;
    int		line;
    int		col;
    char	*attr_value;

    // already protected by caller
    dr->buf.str = dr->buf.tail;
    if (is_white(c)) {
        c = buf_next_non_white(&dr->buf);
    }
    while (termc != c && term2 != c) {
	buf_backup(&dr->buf);
        if ('\0' == c) {
	    ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
	    return '\0';
        }
	line = dr->buf.line;
	col = dr->buf.col;
        if ('\0' == (c = read_name_token(dr))) {
	    ox_sax_drive_error(dr, NO_TERM "error reading token");
	    return '\0';
        }
        if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
            is_encoding = 1;
        }
        if (dr->has.attr || dr->has.attr_value) {
            name = str2sym(dr, dr->buf.str, 0);
        }
        if (is_white(c)) {
            c = buf_next_non_white(&dr->buf);
        }
        if ('=' != c) {
	    if (eq_req) {
		dr->err = 1;
		return c;
	    } else {
		ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
		attr_value = (char*)"";
	    }
        } else {
	    line = dr->buf.line;
	    col = dr->buf.col;
	    c = read_quoted_value(dr);
	    attr_value = dr->buf.str;
	    if (is_encoding) {
#if HAS_ENCODING_SUPPORT
		dr->encoding = rb_enc_find(dr->buf.str);
#elif HAS_PRIVATE_ENCODING
		dr->encoding = rb_str_new2(dr->buf.str);
#else
		dr->encoding = dr->buf.str;
#endif
		is_encoding = 0;
	    }
	}
        if (dr->has.attr_value) {
            VALUE       args[2];

	    if (dr->has.line) {
		rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	    }
	    if (dr->has.column) {
		rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	    }
            args[0] = name;
            args[1] = dr->value_obj;
            rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
	} else if (dr->has.attr) {
            VALUE       args[2];

            args[0] = name;
            ox_sax_collapse_special(dr, dr->buf.str, line, col);
            args[1] = rb_str_new2(attr_value);
#if HAS_ENCODING_SUPPORT
            if (0 != dr->encoding) {
                rb_enc_associate(args[1], dr->encoding);
            }
#elif HAS_PRIVATE_ENCODING
	    if (Qnil != dr->encoding) {
		rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
	    }
#endif
	    if (dr->has.line) {
		rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
	    }
	    if (dr->has.column) {
		rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
	    }
            rb_funcall2(dr->handler, ox_attr_id, 2, args);
        }
	if (is_white(c)) {
	    c = buf_next_non_white(&dr->buf);
	}
    }
    dr->buf.str = 0;

    return c;
}
Пример #6
0
/* Entered after the "<?" sequence. Ready to read the rest.
 */
static void
read_instruction(PInfo pi) {
    char		content[1024];
    struct _Attr	attrs[MAX_ATTRS + 1];
    Attr		a = attrs;
    char		*target;
    char		*end;
    char		c;
    char		*cend;
    int			attrs_ok = 1;

    *content = '\0';
    memset(attrs, 0, sizeof(attrs));
    target = read_name_token(pi);
    end = pi->s;
    if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
	raise_error("processing instruction content too large or not terminated", pi->str, pi->s);
    }
    next_non_white(pi);
    c = *pi->s;
    *end = '\0'; /* terminate name */
    if ('?' != c) {
	while ('?' != *pi->s) {
	    if ('\0' == *pi->s) {
		raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	    }
	    next_non_white(pi);
	    a->name = read_name_token(pi);
	    end = pi->s;
	    next_non_white(pi);
	    if ('=' != *pi->s++) {
		attrs_ok = 0;
		break;
	    }
	    *end = '\0'; /* terminate name */
	    /* read value */
	    next_non_white(pi);
	    a->value = read_quoted_value(pi);
	    a++;
	    if (MAX_ATTRS <= (a - attrs)) {
		attrs_ok = 0;
		break;
	    }
	    next_non_white(pi);
	}
	if ('?' == *pi->s) {
	    pi->s++;
	}
    } else {
	pi->s++;
    }
    if (attrs_ok) {
	if ('>' != *pi->s++) {
	    raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
	}
    } else {
	pi->s = cend + 1;
    }
    if (0 != pi->pcb->instruct) {
	if (attrs_ok) {
	    pi->pcb->instruct(pi, target, attrs, 0);
	} else {
	    pi->pcb->instruct(pi, target, attrs, content);
	}
    }
}