// 分析邮件头及 multipart 各部分的头 static int mime_state_head(MIME_STATE *state, const char *s, int n) { MIME_NODE *node = state->curr_node; if (n <= 0) return n; /* 如果还未找到换行符,则继续 */ if (node->last_lf != '\n') { while (n > 0) { node->last_ch = *s; ADDCH(node->buffer, node->last_ch); n--; state->curr_off++; if (node->last_ch == '\n') { node->last_lf = '\n'; break; } s++; } return n; } /* 如果数据以换行开始, 说明当前的邮件头结束 */ if (*s == '\n') { /* 上次数据为: \n\r 或 \n */ state->curr_off++; node->header_end = state->curr_off; if (LEN(node->buffer) > 0) { /* 处理头部的最后一行数据 */ mime_header_line(node); node->valid_line++; } /* 略过开头无用的空行 */ if (node->valid_line == 0) return 0; /* 如果当前结点为 multipart 格式, 则重置 state->curr_bound */ if (node->boundary != NULL) state->curr_bound = STR(node->boundary); state->curr_status = MIME_S_BODY; node->body_begin = state->curr_off; return n - 1; } if (*s == '\r') { state->curr_off++; if (node->last_ch == '\r') { /* XXX: 出现了 \n\r\r 现象 */ node->last_ch = '\r'; node->last_lf = 0; return n - 1; } node->last_ch = '\r'; /* 返回, 以期待下一个字符为 '\n' */ return n - 1; } /* 清除 '\n' */ node->last_lf = 0; /* 如果数据以空格或TAB开始, 说明数据附属于上一行 */ if (IS_SPACE_TAB(*s)) { /* 说明本行数据附属于上一行数据 */ while (n > 0) { node->last_ch = *s; ADDCH(node->buffer, node->last_ch); n--; state->curr_off++; if (node->last_ch == '\n') { /* 处理完本完整行数据 */ node->last_lf = '\n'; break; } s++; } return n; } /* 处理头部的上一行数据 */ if (LEN(node->buffer) > 0) { mime_header_line(node); node->valid_line++; } return n; }
static const char *json_string(ACL_JSON *json, const char *data) { ACL_JSON_NODE *node = json->curr_node; int ch; /* 当文本长度为 0 时,可以认为还未遇到有效的字符 */ if (LEN(node->text) == 0) { /* 先过滤开头没用的空格 */ SKIP_SPACE(data); if (*data == 0) return data; } /* 说明本节点是叶节点 */ while ((ch = *data) != 0) { /* 如果开始有引号,则需要以该引号作为结尾符 */ if (node->quote) { if (node->backslash) { if (ch == 'b') ADDCH(node->text, '\b'); else if (ch == 'f') ADDCH(node->text, '\f'); else if (ch == 'n') ADDCH(node->text, '\n'); else if (ch == 'r') ADDCH(node->text, '\r'); else if (ch == 't') ADDCH(node->text, '\t'); else ADDCH(node->text, ch); node->backslash = 0; } /* 当为双字节汉字时,第一个字节为的高位为 1, * 第二个字节有可能为 92,正好与转义字符相同 */ else if (ch == '\\') { /* 处理半个汉字的情况,如果前一个字节是前 * 半个汉字,则当前的转义符当作后半个汉字 */ if (node->part_word) { ADDCH(node->text, ch); node->part_word = 0; } else node->backslash = 1; } else if (ch == node->quote) { /* 对节点的值,必须保留该 quote 值,以便于区分 * 不同的值类型:bool, null, number, string * node->quote = 0; */ /* 切换至查询该节点的兄弟节点的过程 */ json->status = ACL_JSON_S_STREND; node->part_word = 0; data++; break; } /* 是否兼容后半个汉字为转义符 '\' 的情况 */ else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) { ADDCH(node->text, ch); /* 若前一个字节为前半个汉字,则当前字节 * 为后半个汉字,正好为一个完整的汉字 */ if (node->part_word) node->part_word = 0; /* 前一个字节非前半个汉字且当前字节高位 * 为 1,则表明当前字节为前半个汉字 */ else if (ch < 0) node->part_word = 1; } else { ADDCH(node->text, ch); } } else if (node->backslash) { ADDCH(node->text, ch); node->backslash = 0; } else if (ch == '\\') { if (node->part_word) { ADDCH(node->text, ch); node->part_word = 0; } else node->backslash = 1; } else if (IS_SPACE(ch) || ch == ',' || ch == ';' || ch == '}' || ch == ']') { /* 切换至查询该节点的兄弟节点的过程 */ json->status = ACL_JSON_S_STREND; break; } /* 是否兼容后半个汉字为转义符 '\' 的情况 */ else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) { ADDCH(node->text, ch); /* 处理半个汉字的情形 */ if (node->part_word) node->part_word = 0; else if (ch < 0) node->part_word = 1; } else { ADDCH(node->text, ch); } data++; } if (LEN(node->text) > 0) ACL_VSTRING_TERMINATE(node->text); return data; }
/* Calculate new record length or append field to record. Return new * record length. */ static int join_append_data(WriterObj *self, char *field, int quote_empty, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i, rec_len; char *lineterm; #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ rec_len++;\ } while(0) lineterm = PyString_AsString(dialect->lineterminator); if (lineterm == NULL) return -1; rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ for (i = 0;; i++) { char c = field[i]; int want_escape = 0; if (c == '\0') break; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || strchr(lineterm, c)) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } /* If field is empty check if it needs to be quoted. */ if (i == 0 && quote_empty) { if (dialect->quoting == QUOTE_NONE) { PyErr_Format(error_obj, "single empty field record must be quoted"); return -1; } else *quoted = 1; } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else rec_len += 2; } return rec_len; #undef ADDCH }
static const char *json_tag(ACL_JSON *json, const char *data) { ACL_JSON_NODE *node = json->curr_node; int ch; while ((ch = *data) != 0) { /* 如果前面有引号,则需要找到结尾引号 */ if (node->quote) { if (node->backslash) { if (ch == 'b') ADDCH(node->ltag, '\b'); else if (ch == 'f') ADDCH(node->ltag, '\f'); else if (ch == 'n') ADDCH(node->ltag, '\n'); else if (ch == 'r') ADDCH(node->ltag, '\r'); else if (ch == 't') ADDCH(node->ltag, '\t'); else ADDCH(node->ltag, ch); node->backslash = 0; } /* 当为双字节汉字时,第一个字节为的高位为 1, * 第二个字节为 92,正好与转义字符相同 */ else if (ch == '\\') { /* 处理半个汉字的情形 */ if (node->part_word) { ADDCH(node->ltag, ch); node->part_word = 0; } else node->backslash = 1; } else if (ch == node->quote) { ACL_JSON_NODE *parent; parent = acl_json_node_parent(node); acl_assert(parent); /* 数组对象的子节点允许为单独的字符串或对象 */ if (parent->left_ch == '[') json->status = ACL_JSON_S_NEXT; /* 标签值分析结束,下一步需要找到冒号 */ else json->status = ACL_JSON_S_COLON; /* 当在分析标签名结束后,需要把 quote 赋 0, * 这样在分析标签值时,可以复用该 quote 变量, * 如果不清 0,则会干扰分析标签值过程 */ node->quote = 0; node->part_word = 0; data++; break; } /* 是否兼容后半个汉字为转义符 '\' 的情况 */ else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) { ADDCH(node->ltag, ch); /* 处理半个汉字的情形 */ if (node->part_word) node->part_word = 0; else if (ch < 0) node->part_word = 1; } else { ADDCH(node->ltag, ch); } } /* 分析标签名前没有引号的情况 */ else if (node->backslash) { ADDCH(node->ltag, ch); node->backslash = 0; } /* 当为双字节汉字时,第一个字节为的高位为 1, * 第二个字节为 92,正好与转义字符相同 */ else if (ch == '\\') { /* 处理半个汉字的情形 */ if (node->part_word) { ADDCH(node->ltag, ch); node->part_word = 0; } else node->backslash = 1; } else if (IS_SPACE(ch) || ch == ':') { /* 标签名分析结束,下一步需要找到冒号 */ json->status = ACL_JSON_S_COLON; node->part_word = 0; break; } /* 是否兼容后半个汉字为转义符 '\' 的情况 */ else if ((json->flag & ACL_JSON_FLAG_PART_WORD)) { ADDCH(node->ltag, ch); /* 处理半个汉字的情形 */ if (node->part_word) node->part_word = 0; else if (ch < 0) node->part_word = 1; } else { ADDCH(node->ltag, ch); } data++; } /* 如果标签名非空,则需要保证以 0 结尾 */ if (LEN(node->ltag) > 0) ACL_VSTRING_TERMINATE(node->ltag); return data; }
static const char *xml_parse_attr_val(ACL_XML *xml, const char *data) { int ch; ACL_XML_ATTR *attr = xml->curr_node->curr_attr; if (LEN(attr->value) == 0 && !attr->quote) { SKIP_SPACE(data); if (IS_QUOTE(*data)) { attr->quote = *data++; } if (*data == 0) { return (NULL); } } while ((ch = *data) != 0) { if (attr->backslash) { if (ch == 'b') ADDCH(attr->value, '\b'); else if (ch == 'f') ADDCH(attr->value, '\f'); else if (ch == 'n') ADDCH(attr->value, '\n'); else if (ch == 'r') ADDCH(attr->value, '\r'); else if (ch == 't') ADDCH(attr->value, '\t'); else ADDCH(attr->value, ch); xml->curr_node->last_ch = ch; attr->backslash = 0; } else if (ch == '\\') { if (attr->part_word) { ADDCH(attr->value, ch); attr->part_word = 0; } else attr->backslash = 1; } else if (attr->quote) { if (ch == attr->quote) { xml->curr_node->status = ACL_XML_S_ATTR; xml->curr_node->last_ch = ch; data++; break; } ADDCH(attr->value, ch); xml->curr_node->last_ch = ch; } else if (ch == '>') { xml->curr_node->status = ACL_XML_S_LGT; xml_parse_check_self_closed(xml); data++; break; } else if (IS_SPACE(ch)) { xml->curr_node->status = ACL_XML_S_ATTR; xml->curr_node->last_ch = ch; data++; break; } else { ADDCH(attr->value, ch); xml->curr_node->last_ch = ch; if ((xml->flag & ACL_XML_FLAG_PART_WORD)) { /* 处理半个汉字的情形 */ if (attr->part_word) attr->part_word = 0; else if (ch < 0) attr->part_word = 1; } } data++; } ACL_VSTRING_TERMINATE(attr->value); if (xml->curr_node->status != ACL_XML_S_AVAL) { /* 将该标签ID号映射至哈希表中,以便于快速查询 */ if (IS_ID(STR(attr->name)) && LEN(attr->value) > 0) { const char *ptr = STR(attr->value); /* 防止重复ID被插入现象 */ if (acl_htable_find(xml->id_table, ptr) == NULL) { acl_htable_enter(xml->id_table, ptr, attr); /* 只有当该属性被加入哈希表后才会赋于结点的 id */ xml->curr_node->id = attr->value; } } /* 必须将该结点的当前属性对象置空,以便于继续解析时 * 可以创建新的属性对象 */ xml->curr_node->curr_attr = NULL; } return (data); }
static const char *xml_parse_meta_comment(ACL_XML *xml, const char *data) { int ch; if (LEN(xml->curr_node->text) == 0) { SKIP_SPACE(data); } while ((ch = *data) != 0) { if (xml->curr_node->quote) { if (ch == xml->curr_node->quote) { xml->curr_node->quote = 0; } else { ADDCH(xml->curr_node->text, ch); } } else if (IS_QUOTE(ch)) { if (xml->curr_node->quote == 0) { xml->curr_node->quote = ch; } else { ADDCH(xml->curr_node->text, ch); } } else if (ch == '<') { xml->curr_node->nlt++; ADDCH(xml->curr_node->text, ch); } else if (ch == '>') { if (xml->curr_node->nlt == 0) { if (xml->curr_node->meta[0] == '-' && xml->curr_node->meta[1] == '-') { data++; xml->curr_node->status = ACL_XML_S_MEND; break; } } xml->curr_node->nlt--; ADDCH(xml->curr_node->text, ch); } else if (xml->curr_node->nlt > 0) { ADDCH(xml->curr_node->text, ch); } else if (ch == '-') { if (xml->curr_node->meta[0] != '-') { xml->curr_node->meta[0] = '-'; } else if (xml->curr_node->meta[1] != '-') { xml->curr_node->meta[1] = '-'; } } else { if (xml->curr_node->meta[0] == '-') { ADDCH(xml->curr_node->text, '-'); xml->curr_node->meta[0] = 0; } if (xml->curr_node->meta[1] == '-') { ADDCH(xml->curr_node->text, '-'); xml->curr_node->meta[1] = 0; } ADDCH(xml->curr_node->text, ch); } data++; } ACL_VSTRING_TERMINATE(xml->curr_node->text); return (data); }
void input(FILE *fd, void (*cb)(struct message *), struct message *p) { int c; #define ST(X) do { p->status = (X); } while(0) #define MK(X) do { p->X = p->p; } while(0) #define RST() do { \ ST(AT_BOM); \ p->p = p->buffer; \ p->sz = p->argc = 0; \ p->org = p->cmd = NULL; \ } while (0) #define ADDCH(X) do { \ *p->p++ = X; p->sz++; \ if (p->sz >= (sizeof p->buffer)-1) \ RST(); \ } while (0) #define ADDARG(s) do { \ assert(p->argc < MAX_ARGS); \ p->argv[p->argc] = s; \ if (s) p->argc++; \ } while (0) RST(); while ((c = fgetc(fd)) != EOF) { switch (p->status) { case AT_BOM: switch (c) { case ':': ST(IN_ORG); MK(org); break; case ' ': break; /* ignored */ case '\r': ST(IN_EOL); break; case '\n': RST(); break; default: ST(IN_CMD); MK(cmd); ADDCH(c); break; } break; case IN_ORG: switch(c) { case ' ': ST(IN_SPC0); ADDCH(0); break; case '\r': ST(IN_EOL); ADDCH(0); break; case '\n': RST(); break; case ':': /* NO BREAK HERE, valid char */ default: ADDCH(c); break; } break; case IN_SPC0: switch(c) { case ' ': break; /* ignore extra */ case '\r': ST(IN_EOL); break; case '\n': RST(); break; default: ST(IN_CMD); MK(cmd); ADDCH(c); break; } break; case IN_CMD: switch(c) { case ' ': ST(IN_SPC1); ADDCH(0); break; case '\r': ST(IN_EOL); ADDCH(0); break; case '\n': ADDARG(NULL); cb(p); RST(); break; case ':': /* NO BREAK HERE, valid char */ default: ADDCH(c); break; } break; case IN_SPC1: switch(c) { case ' ': break; /* ignore */ case '\r': ST(IN_EOL); break; case '\n': ADDARG(NULL); cb(p); RST(); break; case ':': ST(IN_ARGN); ADDARG(p->p); break; default: ST(IN_ARG); ADDARG(p->p); ADDCH(c); break; } break; case IN_ARG: switch (c) { case ' ': ST(IN_SPC1); ADDCH(0); break; case '\r': ST(IN_EOL); ADDCH(0); break; case '\n': ADDARG(NULL); cb(p); RST(); break; case ':': default: ADDCH(c); break; } break; case IN_ARGN: switch (c) { case '\r': ST(IN_EOL); ADDCH(0); break; case '\n': ADDARG(NULL); cb(p); RST(); break; default: ADDCH(c); break; } break; case IN_EOL: switch (c) { case '\r': break; case '\n': ADDARG(NULL); cb(p); RST(); break; case ':': RST(); ST(IN_ORG); break; default: RST(); break; } break; } /* switch */ } /* while */ } /* input */
/* Calculate new record length or append field to record. Return new * record length. */ static Py_ssize_t join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, Py_ssize_t field_len, int *quoted, int copy_phase) { DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; #define INCLEN \ do {\ if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ goto overflow; \ } \ rec_len++; \ } while(0) #define ADDCH(c) \ do {\ if (copy_phase) \ self->rec[rec_len] = c;\ INCLEN;\ } while(0) rec_len = self->rec_len; /* If this is not the first field we need a field separator */ if (self->num_fields > 0) ADDCH(dialect->delimiter); /* Handle preceding quote */ if (copy_phase && *quoted) ADDCH(dialect->quotechar); /* Copy/count field data */ /* If field is null just pass over */ for (i = 0; field_data && (i < field_len); i++) { Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); int want_escape = 0; if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { if (dialect->quoting == QUOTE_NONE) want_escape = 1; else { if (c == dialect->quotechar) { if (dialect->doublequote) ADDCH(dialect->quotechar); else want_escape = 1; } if (!want_escape) *quoted = 1; } if (want_escape) { if (!dialect->escapechar) { PyErr_Format(_csvstate_global->error_obj, "need to escape, but no escapechar set"); return -1; } ADDCH(dialect->escapechar); } } /* Copy field character into record buffer. */ ADDCH(c); } if (*quoted) { if (copy_phase) ADDCH(dialect->quotechar); else { INCLEN; /* starting quote */ INCLEN; /* ending quote */ } } return rec_len; overflow: PyErr_NoMemory(); return -1; #undef ADDCH #undef INCLEN }