Esempio n. 1
0
// 分析邮件头及 multipart 各部分的头
static int mime_state_head(MIME_STATE *state, const char *s, int n)
{
	MIME_NODE *node = state->curr_node;

	if (n <= 0)
		return n;

	/* 如果还未找到换行符,则继续 */

	if (node->last_lf != '\n') {
		while (n > 0) {
			node->last_ch = *s;
			ADDCH(node->buffer, node->last_ch);
			n--;
			state->curr_off++;

			if (node->last_ch == '\n') {
				node->last_lf = '\n';
				break;
			}
			s++;
		}

		return n;
	}

	/* 如果数据以换行开始, 说明当前的邮件头结束 */

	if (*s == '\n') {
		/* 上次数据为: \n\r 或 \n */

		state->curr_off++;
		node->header_end = state->curr_off;

		if (LEN(node->buffer) > 0) {
			/* 处理头部的最后一行数据 */
			mime_header_line(node);
			node->valid_line++;
		}

		/* 略过开头无用的空行 */
		if (node->valid_line == 0)
			return 0;

		/* 如果当前结点为 multipart 格式, 则重置 state->curr_bound */
		if (node->boundary != NULL)
			state->curr_bound = STR(node->boundary);
		state->curr_status = MIME_S_BODY;
		node->body_begin = state->curr_off;
		return n - 1;
	}
	if (*s == '\r') {
		state->curr_off++;
		if (node->last_ch == '\r') {
			/* XXX: 出现了 \n\r\r 现象 */
			node->last_ch = '\r';
			node->last_lf = 0;
			return n - 1;
		}

		node->last_ch = '\r';
		/* 返回, 以期待下一个字符为 '\n' */
		return n - 1;
	}

	/* 清除 '\n' */
	node->last_lf = 0;

	/* 如果数据以空格或TAB开始, 说明数据附属于上一行 */

	if (IS_SPACE_TAB(*s)) {
		/* 说明本行数据附属于上一行数据 */
		while (n > 0) {
			node->last_ch = *s;
			ADDCH(node->buffer, node->last_ch);
			n--;
			state->curr_off++;

			if (node->last_ch == '\n') {
				/* 处理完本完整行数据 */
				node->last_lf = '\n';
				break;
			}
			s++;
		}

		return n;
	}

	/* 处理头部的上一行数据 */

	if (LEN(node->buffer) > 0) {
		mime_header_line(node);
		node->valid_line++;
	}

	return n;
}
Esempio n. 2
0
static const char *json_string(ACL_JSON *json, const char *data)
{
	ACL_JSON_NODE *node = json->curr_node;
	int   ch;

	/* 当文本长度为 0 时,可以认为还未遇到有效的字符 */

	if (LEN(node->text) == 0) {
		/* 先过滤开头没用的空格 */
		SKIP_SPACE(data);
		if (*data == 0)
			return data;
	}

	/* 说明本节点是叶节点 */

	while ((ch = *data) != 0) {
		/* 如果开始有引号,则需要以该引号作为结尾符 */
		if (node->quote) {
			if (node->backslash) {
				if (ch == 'b')
					ADDCH(node->text, '\b');
				else if (ch == 'f')
					ADDCH(node->text, '\f');
				else if (ch == 'n')
					ADDCH(node->text, '\n');
				else if (ch == 'r')
					ADDCH(node->text, '\r');
				else if (ch == 't')
					ADDCH(node->text, '\t');
				else
					ADDCH(node->text, ch);
				node->backslash = 0;
			}

			/* 当为双字节汉字时,第一个字节为的高位为 1,
			 * 第二个字节有可能为 92,正好与转义字符相同
			 */
			else if (ch == '\\') {
				/* 处理半个汉字的情况,如果前一个字节是前
				 * 半个汉字,则当前的转义符当作后半个汉字
				 */
				if (node->part_word) {
					ADDCH(node->text, ch);
					node->part_word = 0;
				} else
					node->backslash = 1;
			} else if (ch == node->quote) {
				/* 对节点的值,必须保留该 quote 值,以便于区分
				 * 不同的值类型:bool, null, number, string
				 * node->quote = 0;
				 */

				/* 切换至查询该节点的兄弟节点的过程 */
				json->status = ACL_JSON_S_STREND;
				node->part_word = 0;
				data++;
				break;
			}

			/* 是否兼容后半个汉字为转义符 '\' 的情况 */
			else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) {
				ADDCH(node->text, ch);

				/* 若前一个字节为前半个汉字,则当前字节
				 * 为后半个汉字,正好为一个完整的汉字
				 */
				if (node->part_word)
					node->part_word = 0;

				/* 前一个字节非前半个汉字且当前字节高位
				 * 为 1,则表明当前字节为前半个汉字
				 */
				else if (ch < 0)
					node->part_word = 1;
			} else {
				ADDCH(node->text, ch);
			}
		} else if (node->backslash) {
			ADDCH(node->text, ch);
			node->backslash = 0;
		} else if (ch == '\\') {
			if (node->part_word) {
				ADDCH(node->text, ch);
				node->part_word = 0;
			} else
				node->backslash = 1;
		} else if (IS_SPACE(ch) || ch == ',' || ch == ';'
			|| ch == '}' || ch == ']')
		{
			/* 切换至查询该节点的兄弟节点的过程 */
			json->status = ACL_JSON_S_STREND;
			break;
		}

		/* 是否兼容后半个汉字为转义符 '\' 的情况 */
		else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) {
			ADDCH(node->text, ch);

			/* 处理半个汉字的情形 */
			if (node->part_word)
				node->part_word = 0;
			else if (ch < 0)
				node->part_word = 1;
		} else {
			ADDCH(node->text, ch);
		}
		data++;
	}

	if (LEN(node->text) > 0)
		ACL_VSTRING_TERMINATE(node->text);

	return data;
}
Esempio n. 3
0
/* Calculate new record length or append field to record.  Return new
 * record length.
 */
static int
join_append_data(WriterObj *self, char *field, int quote_empty,
                 int *quoted, int copy_phase)
{
    DialectObj *dialect = self->dialect;
    int i, rec_len;
    char *lineterm;

#define ADDCH(c) \
    do {\
        if (copy_phase) \
            self->rec[rec_len] = c;\
        rec_len++;\
    } while(0)

    lineterm = PyString_AsString(dialect->lineterminator);
    if (lineterm == NULL)
        return -1;

    rec_len = self->rec_len;

    /* If this is not the first field we need a field separator */
    if (self->num_fields > 0)
        ADDCH(dialect->delimiter);

    /* Handle preceding quote */
    if (copy_phase && *quoted)
        ADDCH(dialect->quotechar);

    /* Copy/count field data */
    for (i = 0;; i++) {
        char c = field[i];
        int want_escape = 0;

        if (c == '\0')
            break;

        if (c == dialect->delimiter ||
            c == dialect->escapechar ||
            c == dialect->quotechar ||
            strchr(lineterm, c)) {
            if (dialect->quoting == QUOTE_NONE)
                want_escape = 1;
            else {
                if (c == dialect->quotechar) {
                    if (dialect->doublequote)
                        ADDCH(dialect->quotechar);
                    else
                        want_escape = 1;
                }
                if (!want_escape)
                    *quoted = 1;
            }
            if (want_escape) {
                if (!dialect->escapechar) {
                    PyErr_Format(error_obj,
                                 "need to escape, but no escapechar set");
                    return -1;
                }
                ADDCH(dialect->escapechar);
            }
        }
        /* Copy field character into record buffer.
         */
        ADDCH(c);
    }

    /* If field is empty check if it needs to be quoted.
     */
    if (i == 0 && quote_empty) {
        if (dialect->quoting == QUOTE_NONE) {
            PyErr_Format(error_obj,
                         "single empty field record must be quoted");
            return -1;
        }
        else
            *quoted = 1;
    }

    if (*quoted) {
        if (copy_phase)
            ADDCH(dialect->quotechar);
        else
            rec_len += 2;
    }
    return rec_len;
#undef ADDCH
}
Esempio n. 4
0
static const char *json_tag(ACL_JSON *json, const char *data)
{
	ACL_JSON_NODE *node = json->curr_node;
	int   ch;

	while ((ch = *data) != 0) {
		/* 如果前面有引号,则需要找到结尾引号 */
		if (node->quote) {
			if (node->backslash) {
				if (ch == 'b')
					ADDCH(node->ltag, '\b');
				else if (ch == 'f')
					ADDCH(node->ltag, '\f');
				else if (ch == 'n')
					ADDCH(node->ltag, '\n');
				else if (ch == 'r')
					ADDCH(node->ltag, '\r');
				else if (ch == 't')
					ADDCH(node->ltag, '\t');
				else
					ADDCH(node->ltag, ch);
				node->backslash = 0;
			}

			/* 当为双字节汉字时,第一个字节为的高位为 1,
			 * 第二个字节为 92,正好与转义字符相同
			 */
			else if (ch == '\\') {
				/* 处理半个汉字的情形 */
				if (node->part_word) {
					ADDCH(node->ltag, ch);
					node->part_word = 0;
				} else
					node->backslash = 1;
			} else if (ch == node->quote) {
				ACL_JSON_NODE *parent;

				parent = acl_json_node_parent(node);

				acl_assert(parent);

				/* 数组对象的子节点允许为单独的字符串或对象 */
				if (parent->left_ch == '[')
					json->status = ACL_JSON_S_NEXT;

				/* 标签值分析结束,下一步需要找到冒号 */
				else
					json->status = ACL_JSON_S_COLON;

				/* 当在分析标签名结束后,需要把 quote 赋 0,
				 * 这样在分析标签值时,可以复用该 quote 变量,
				 * 如果不清 0,则会干扰分析标签值过程
				 */
				node->quote = 0;
				node->part_word = 0;
				data++;
				break;
			}

			/* 是否兼容后半个汉字为转义符 '\' 的情况 */
			else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) {
				ADDCH(node->ltag, ch);

				/* 处理半个汉字的情形 */
				if (node->part_word)
					node->part_word = 0;
				else if (ch < 0)
					node->part_word = 1;
			} else {
				ADDCH(node->ltag, ch);
			}
		}

		/* 分析标签名前没有引号的情况 */

		else if (node->backslash) {
			ADDCH(node->ltag, ch);
			node->backslash = 0;
		}

		/* 当为双字节汉字时,第一个字节为的高位为 1,
		 * 第二个字节为 92,正好与转义字符相同
		 */
		else if (ch == '\\') {
			/* 处理半个汉字的情形 */
			if (node->part_word) {
				ADDCH(node->ltag, ch);
				node->part_word = 0;
			} else
				node->backslash = 1;
		} else if (IS_SPACE(ch) || ch == ':') {
			/* 标签名分析结束,下一步需要找到冒号 */
			json->status = ACL_JSON_S_COLON;
			node->part_word = 0;
			break;
		}

		/* 是否兼容后半个汉字为转义符 '\' 的情况 */
		else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) {
			ADDCH(node->ltag, ch);

			/* 处理半个汉字的情形 */
			if (node->part_word)
				node->part_word = 0;
			else if (ch < 0)
				node->part_word = 1;
		} else {
			ADDCH(node->ltag, ch);
		}
		data++;
	}

	/* 如果标签名非空,则需要保证以 0 结尾 */
	if (LEN(node->ltag) > 0)
		ACL_VSTRING_TERMINATE(node->ltag);

	return data;
}
Esempio n. 5
0
static const char *xml_parse_attr_val(ACL_XML *xml, const char *data)
{
	int   ch;
	ACL_XML_ATTR *attr = xml->curr_node->curr_attr;

	if (LEN(attr->value) == 0 && !attr->quote) {
		SKIP_SPACE(data);
		if (IS_QUOTE(*data)) {
			attr->quote = *data++;
		}
		if (*data == 0) {
			return (NULL);
		}
	}

	while ((ch = *data) != 0) {
		if (attr->backslash) {
			if (ch == 'b')
				ADDCH(attr->value, '\b');
			else if (ch == 'f')
				ADDCH(attr->value, '\f');
			else if (ch == 'n')
				ADDCH(attr->value, '\n');
			else if (ch == 'r')
				ADDCH(attr->value, '\r');
			else if (ch == 't')
				ADDCH(attr->value, '\t');
			else
				ADDCH(attr->value, ch);
			xml->curr_node->last_ch = ch;
			attr->backslash = 0;
		} else if (ch == '\\') {
			if (attr->part_word) {
				ADDCH(attr->value, ch);
				attr->part_word = 0;
			}
			else
				attr->backslash = 1;
		} else if (attr->quote) {
			if (ch == attr->quote) {
				xml->curr_node->status = ACL_XML_S_ATTR;
				xml->curr_node->last_ch = ch;
				data++;
				break;
			}
			ADDCH(attr->value, ch);
			xml->curr_node->last_ch = ch;
		} else if (ch == '>') {
			xml->curr_node->status = ACL_XML_S_LGT;
			xml_parse_check_self_closed(xml);
			data++;
			break;
		} else if (IS_SPACE(ch)) {
			xml->curr_node->status = ACL_XML_S_ATTR;
			xml->curr_node->last_ch = ch;
			data++;
			break;
		} else {
			ADDCH(attr->value, ch);
			xml->curr_node->last_ch = ch;

			if ((xml->flag & ACL_XML_FLAG_PART_WORD)) {
				/* 处理半个汉字的情形 */
				if (attr->part_word)
					attr->part_word = 0;
				else if (ch < 0)
					attr->part_word = 1;
			}
		}
		data++;
	}

	ACL_VSTRING_TERMINATE(attr->value);

	if (xml->curr_node->status != ACL_XML_S_AVAL) {
		/* 将该标签ID号映射至哈希表中,以便于快速查询 */
		if (IS_ID(STR(attr->name)) && LEN(attr->value) > 0) {
			const char *ptr = STR(attr->value);

			/* 防止重复ID被插入现象 */
			if (acl_htable_find(xml->id_table, ptr) == NULL) {
				acl_htable_enter(xml->id_table, ptr, attr);

				/* 只有当该属性被加入哈希表后才会赋于结点的 id */
				xml->curr_node->id = attr->value;
			}
		}

		/* 必须将该结点的当前属性对象置空,以便于继续解析时
		 * 可以创建新的属性对象
		 */
		xml->curr_node->curr_attr = NULL;
	}
	return (data);
}
Esempio n. 6
0
static const char *xml_parse_meta_comment(ACL_XML *xml, const char *data)
{
	int   ch;

	if (LEN(xml->curr_node->text) == 0) {
		SKIP_SPACE(data);
	}

	while ((ch = *data) != 0) {
		if (xml->curr_node->quote) {
			if (ch == xml->curr_node->quote) {
				xml->curr_node->quote = 0;
			} else {
				ADDCH(xml->curr_node->text, ch);
			}
		} else if (IS_QUOTE(ch)) {
			if (xml->curr_node->quote == 0) {
				xml->curr_node->quote = ch;
			} else {
				ADDCH(xml->curr_node->text, ch);
			}
		} else if (ch == '<') {
			xml->curr_node->nlt++;
			ADDCH(xml->curr_node->text, ch);
		} else if (ch == '>') {
			if (xml->curr_node->nlt == 0) {
				if (xml->curr_node->meta[0] == '-'
					&& xml->curr_node->meta[1] == '-')
				{
					data++;
					xml->curr_node->status = ACL_XML_S_MEND;
					break;
				}
			}
			xml->curr_node->nlt--;
			ADDCH(xml->curr_node->text, ch);
		} else if (xml->curr_node->nlt > 0) {
			ADDCH(xml->curr_node->text, ch);
		} else if (ch == '-') {
			if (xml->curr_node->meta[0] != '-') {
				xml->curr_node->meta[0] = '-';
			} else if (xml->curr_node->meta[1] != '-') {
				xml->curr_node->meta[1] = '-';
			}
		} else {
			if (xml->curr_node->meta[0] == '-') {
				ADDCH(xml->curr_node->text, '-');
				xml->curr_node->meta[0] = 0;
			}
			if (xml->curr_node->meta[1] == '-') {
				ADDCH(xml->curr_node->text, '-');
				xml->curr_node->meta[1] = 0;
			}
			ADDCH(xml->curr_node->text, ch);
		}
		data++;
	}

	ACL_VSTRING_TERMINATE(xml->curr_node->text);
	return (data);
}
Esempio n. 7
0
void input(FILE *fd, void (*cb)(struct message *), struct message *p)
{
	int c;

#define ST(X) do { p->status = (X); } while(0)
#define MK(X) do { p->X = p->p; } while(0)
#define RST() do { \
	ST(AT_BOM); \
	p->p = p->buffer; \
	p->sz = p->argc = 0; \
	p->org = p->cmd = NULL; \
} while (0)
#define ADDCH(X) do { \
	*p->p++ = X; p->sz++; \
	if (p->sz >= (sizeof p->buffer)-1) \
		RST(); \
} while (0)
#define ADDARG(s) do { \
	assert(p->argc < MAX_ARGS); \
	p->argv[p->argc] = s; \
	if (s) p->argc++; \
} while (0)

	RST();
	while ((c = fgetc(fd)) != EOF) {
		switch (p->status) {
		case AT_BOM:
			switch (c) {
			case ':': ST(IN_ORG); MK(org); break;
			case ' ': break; /* ignored */
			case '\r': ST(IN_EOL); break;
			case '\n': RST(); break;
			default: ST(IN_CMD); MK(cmd); ADDCH(c); break;
			} break;
		case IN_ORG:
			switch(c) {
			case ' ': ST(IN_SPC0); ADDCH(0); break;
			case '\r': ST(IN_EOL); ADDCH(0); break;
			case '\n': RST(); break;
			case ':': /* NO BREAK HERE, valid char */
			default: ADDCH(c); break;
			} break;
		case IN_SPC0:
			switch(c) {
			case ' ': break; /* ignore extra */
			case '\r': ST(IN_EOL); break;
			case '\n': RST(); break;
			default: ST(IN_CMD); MK(cmd); ADDCH(c); break;
			} break;
		case IN_CMD:
			switch(c) {
			case ' ': ST(IN_SPC1); ADDCH(0); break;
			case '\r': ST(IN_EOL); ADDCH(0); break;
			case '\n': ADDARG(NULL); cb(p); RST(); break;
			case ':': /* NO BREAK HERE, valid char */
			default: ADDCH(c); break;
			} break;
		case IN_SPC1:
			switch(c) {
			case ' ': break; /* ignore */
			case '\r': ST(IN_EOL); break;
			case '\n': ADDARG(NULL); cb(p); RST(); break;
			case ':': ST(IN_ARGN); ADDARG(p->p); break;
			default: ST(IN_ARG); ADDARG(p->p); ADDCH(c); break;
			} break;
		case IN_ARG:
			switch (c) {
			case ' ': ST(IN_SPC1); ADDCH(0); break;
			case '\r': ST(IN_EOL); ADDCH(0); break;
			case '\n': ADDARG(NULL); cb(p); RST(); break;
			case ':':
			default: ADDCH(c); break;
			} break;
		case IN_ARGN:
			switch (c) {
			case '\r': ST(IN_EOL); ADDCH(0); break;
			case '\n': ADDARG(NULL); cb(p); RST(); break;
			default: ADDCH(c); break;
			} break;
		case IN_EOL:
			switch (c) {
			case '\r': break;
			case '\n': ADDARG(NULL); cb(p); RST(); break;
			case ':': RST(); ST(IN_ORG); break;
			default: RST(); break;
			} break;
		} /* switch */
	} /* while */
} /* input */
Esempio n. 8
0
/* Calculate new record length or append field to record.  Return new
 * record length.
 */
static Py_ssize_t
join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
                 Py_ssize_t field_len, int *quoted,
                 int copy_phase)
{
    DialectObj *dialect = self->dialect;
    int i;
    Py_ssize_t rec_len;

#define INCLEN \
    do {\
        if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
            goto overflow; \
        } \
        rec_len++; \
    } while(0)

#define ADDCH(c)                                \
    do {\
        if (copy_phase) \
            self->rec[rec_len] = c;\
        INCLEN;\
    } while(0)

    rec_len = self->rec_len;

    /* If this is not the first field we need a field separator */
    if (self->num_fields > 0)
        ADDCH(dialect->delimiter);

    /* Handle preceding quote */
    if (copy_phase && *quoted)
        ADDCH(dialect->quotechar);

    /* Copy/count field data */
    /* If field is null just pass over */
    for (i = 0; field_data && (i < field_len); i++) {
        Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
        int want_escape = 0;

        if (c == dialect->delimiter ||
            c == dialect->escapechar ||
            c == dialect->quotechar  ||
            PyUnicode_FindChar(
                dialect->lineterminator, c, 0,
                PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
            if (dialect->quoting == QUOTE_NONE)
                want_escape = 1;
            else {
                if (c == dialect->quotechar) {
                    if (dialect->doublequote)
                        ADDCH(dialect->quotechar);
                    else
                        want_escape = 1;
                }
                if (!want_escape)
                    *quoted = 1;
            }
            if (want_escape) {
                if (!dialect->escapechar) {
                    PyErr_Format(_csvstate_global->error_obj,
                                 "need to escape, but no escapechar set");
                    return -1;
                }
                ADDCH(dialect->escapechar);
            }
        }
        /* Copy field character into record buffer.
         */
        ADDCH(c);
    }

    if (*quoted) {
        if (copy_phase)
            ADDCH(dialect->quotechar);
        else {
            INCLEN; /* starting quote */
            INCLEN; /* ending quote */
        }
    }
    return rec_len;

  overflow:
    PyErr_NoMemory();
    return -1;
#undef ADDCH
#undef INCLEN
}