static std::pair <UnicodeChar, Point> decode (Point cur, Point end) { if ((*cur & 0x80) == 0) { UnicodeChar chr = *cur++; return std::make_pair (chr, cur); } int octets; UnicodeChar chr; std::tie (octets, chr) = octet_count (*cur++); if (octets > 5) return std::make_pair (sBadChar(), cur); Point eoc = cur + octets; if (eoc > end) return std::make_pair (sBadChar(), cur); while (cur != eoc) { if ((*cur & 0xC0) != 0x80) // check continuation mark return std::make_pair (sBadChar(), cur); chr = (chr << 6) | UnicodeChar ((*cur++) & 0x3F); } return std::make_pair (chr, cur); }
// Big state switch int uslg_parser_exec(syslog_parser *parser, const syslog_parser_settings *settings, const char *data, size_t length) { int d_index; int error = 0; char next_byte; for (d_index = 0; d_index < length; d_index++) { int action = pa_none; next_byte = data[d_index]; #if DEBUG_OUTPUT printf("Next byte: %c\n", next_byte); #endif // Token state is managed first if (parser->token_state == ts_before) { switch (next_byte) { case ' ': case '\t': action = pa_advance; break; case '\r': //case '\n': removed for issues with syslog messages with empty msg field if (!(parser->flags & F_COUNT_OCTETS)) { parser->error = SLERR_PREMATURE_MSG_END; } break; default: set_token_state(parser, ts_read); action = pa_rehash; } } else { // Parser state switch (parser->state) { case s_msg_start: action = msg_start(parser, settings, next_byte); break; case s_octet_count: action = octet_count(parser, next_byte); break; case s_priority_start: action = priority_start(parser, next_byte); break; case s_priority: action = priority(parser, next_byte); break; case s_version: action = version(parser, next_byte); break; case s_timestamp: action = parse_msg_head_part(parser, s_hostname, next_byte); break; case s_hostname: action = parse_msg_head_part(parser, s_appname, next_byte); break; case s_appname: action = parse_msg_head_part(parser, s_processid, next_byte); break; case s_processid: action = parse_msg_head_part(parser, s_messageid, next_byte); break; case s_messageid: action = parse_msg_head_part(parser, s_sd_start, next_byte); break; case s_sd_start: action = sd_start(parser, settings, next_byte); break; case s_sd_element: action = sd_element(parser, settings, next_byte); break; case s_sd_field_start: action = sd_field_start(parser, next_byte); break; case s_sd_field: action = sd_field(parser, settings, next_byte); break; case s_sd_value_start: action = sd_value_start(parser, next_byte); break; case s_sd_value: action = sd_value(parser, settings, next_byte); break; case s_message: d_index += read_message(parser, settings, data + d_index, length - d_index); action = pa_rehash; break; default: parser->error = SLERR_BAD_STATE; } } // Upon error, exit the read loop regardless of action if (parser->error) { error = parser->error; uslg_parser_reset(parser); break; } // What action should be taken for this byte switch (action) { case pa_advance: if (parser->flags & F_COUNT_OCTETS) { parser->octets_remaining--; } else { parser->message_length++; } break; case pa_rehash: d_index--; break; } } return error; }