void token_init(void) { static bool fTokenInit = false; yyinit(); if ( fTokenInit) { token_clear(); } else { fTokenInit = true; if (max_multi_token_len == 0) max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN; yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING; yylval_text = (byte *) malloc( yylval_text_size+D ); yylval.leng = 0; yylval.u.text = yylval_text; /* First IP Address in Received: statement */ msg_addr = word_new( NULL, max_token_len ); /* Message ID */ msg_id = word_new( NULL, max_token_len * 3 ); /* Message's first queue ID */ queue_id = word_new( NULL, max_token_len ); ipsave = word_new( NULL, max_token_len ); /* word_new() used to avoid compiler complaints */ w_to = word_news("to:"); /* To: */ w_from = word_news("from:"); /* From: */ w_rtrn = word_news("rtrn:"); /* Return-Path: */ w_subj = word_news("subj:"); /* Subject: */ w_recv = word_news("rcvd:"); /* Received: */ w_head = word_news("head:"); /* Header: */ w_mime = word_news("mime:"); /* Mime: */ w_ip = word_news("ip:"); /* ip: */ w_url = word_news("url:"); /* url: */ nonblank_line = word_news(NONBLANK); /* do multi-word token initializations */ init_token_array(); } return; }
static void read_symtab(symtab_t tab, const char *input, size_t limit) { token_t tok; const char *inputend = limit ? input + limit : NULL; token_init(tok); for (;;) { input = token_get(tok, input, inputend); if (tok->type != token_word) break; char *key = pbc_strdup(tok->s); input = token_get(tok, input, inputend); if (tok->type != token_word) { pbc_free(key); break; } symtab_put(tab, pbc_strdup(tok->s), key); pbc_free(key); } token_clear(tok); }
/* Cleanup storage allocation */ void token_cleanup() { WFREE(w_to); WFREE(w_from); WFREE(w_rtrn); WFREE(w_subj); WFREE(w_recv); WFREE(w_head); WFREE(w_mime); WFREE(w_ip); WFREE(w_url); WFREE(nonblank_line); token_clear(); /* do multi-word token cleanup */ free_token_array(); }
/** * camel_scalix_stream_next_token: * @stream: scalix stream * @token: scalix token * * Reads the next token from the scalix stream and saves it in @token. * * Returns 0 on success or -1 on fail. **/ int camel_scalix_stream_next_token (CamelSCALIXStream *stream, camel_scalix_token_t *token) { register unsigned char *inptr; unsigned char *inend, *start, *p; gboolean escaped = FALSE; size_t literal = 0; guint32 nz_number; int ret; g_return_val_if_fail (CAMEL_IS_SCALIX_STREAM (stream), -1); g_return_val_if_fail (stream->mode != CAMEL_SCALIX_STREAM_MODE_LITERAL, -1); g_return_val_if_fail (token != NULL, -1); if (stream->have_unget) { memcpy (token, &stream->unget, sizeof (camel_scalix_token_t)); stream->have_unget = FALSE; return 0; } token_clear (stream); inptr = stream->inptr; inend = stream->inend; *inend = '\0'; do { if (inptr == inend) { if ((ret = scalix_fill (stream)) < 0) { token->token = CAMEL_SCALIX_TOKEN_ERROR; return -1; } else if (ret == 0) { token->token = CAMEL_SCALIX_TOKEN_NO_DATA; return 0; } inptr = stream->inptr; inend = stream->inend; *inend = '\0'; } while (*inptr == ' ' || *inptr == '\r') inptr++; } while (inptr == inend); do { if (inptr < inend) { if (*inptr == '"') { /* qstring token */ escaped = FALSE; start = inptr; /* eat the beginning " */ inptr++; p = inptr; while (inptr < inend) { if (*inptr == '"' && !escaped) break; if (*inptr == '\\' && !escaped) { token_save (stream, p, inptr - p); escaped = TRUE; inptr++; p = inptr; } else { inptr++; escaped = FALSE; } } token_save (stream, p, inptr - p); if (inptr == inend) { stream->inptr = start; goto refill; } /* eat the ending " */ inptr++; /* nul-terminate the atom token */ token_save (stream, "", 1); token->token = CAMEL_SCALIX_TOKEN_QSTRING; token->v.qstring = stream->tokenbuf; d(fprintf (stderr, "token: \"%s\"\n", token->v.qstring)); break; } else if (strchr ("+*()[]\n", *inptr)) { /* special character token */ token->token = *inptr++; if (camel_debug ("scalix:stream")) { if (token->token != '\n') fprintf (stderr, "token: %c\n", token->token); else fprintf (stderr, "token: \\n\n"); } break; } else if (*inptr == '{') { /* literal identifier token */ if ((p = strchr (inptr, '}')) && strchr (p, '\n')) { inptr++; while (isdigit ((int) *inptr) && literal < UINT_MAX / 10) literal = (literal * 10) + (*inptr++ - '0'); if (*inptr != '}') { if (isdigit ((int) *inptr)) g_warning ("illegal literal identifier: literal too large"); else if (*inptr != '+') g_warning ("illegal literal identifier: garbage following size"); while (*inptr != '}') inptr++; } /* skip over '}' */ inptr++; /* skip over any trailing whitespace */ while (*inptr == ' ' || *inptr == '\r') inptr++; if (*inptr != '\n') { g_warning ("illegal token following literal identifier: %s", inptr); /* skip ahead to the eoln */ inptr = strchr (inptr, '\n'); } /* skip over '\n' */ inptr++; token->token = CAMEL_SCALIX_TOKEN_LITERAL; token->v.literal = literal; d(fprintf (stderr, "token: {%zu}\n", literal)); stream->mode = CAMEL_SCALIX_STREAM_MODE_LITERAL; stream->literal = literal; stream->eol = FALSE; break; } else { stream->inptr = inptr; goto refill; } } else if (*inptr >= '0' && *inptr <= '9') { /* number token */ *inend = '\0'; nz_number = strtoul ((char *) inptr, (char **) &start, 10); if (start == inend) goto refill; if (*start == ':' || *start == ',') { /* workaround for 'set' tokens (APPENDUID / COPYUID) */ goto atom_token; } inptr = start; token->token = CAMEL_SCALIX_TOKEN_NUMBER; token->v.number = nz_number; d(fprintf (stderr, "token: %u\n", nz_number)); break; } else if (is_atom (*inptr)) { atom_token: /* simple atom token */ start = inptr; while (inptr < inend && is_atom (*inptr)) inptr++; if (inptr == inend) { stream->inptr = start; goto refill; } token_save (stream, start, inptr - start); /* nul-terminate the atom token */ token_save (stream, "", 1); if (!strcmp (stream->tokenbuf, "NIL")) { /* special atom token */ token->token = CAMEL_SCALIX_TOKEN_NIL; d(fprintf (stderr, "token: NIL\n")); } else { token->token = CAMEL_SCALIX_TOKEN_ATOM; token->v.atom = stream->tokenbuf; d(fprintf (stderr, "token: %s\n", token->v.atom)); } break; } else if (*inptr == '\\') { /* possible flag token ("\" atom) */ start = inptr++; while (inptr < inend && is_atom (*inptr)) inptr++; if (inptr == inend) { stream->inptr = start; goto refill; } /* handle the \* case */ if ((inptr - start) == 1 && *inptr == '*') inptr++; if ((inptr - start) > 1) { token_save (stream, start, inptr - start); /* nul-terminate the flag token */ token_save (stream, "", 1); token->token = CAMEL_SCALIX_TOKEN_FLAG; token->v.atom = stream->tokenbuf; d(fprintf (stderr, "token: %s\n", token->v.atom)); } else { token->token = '\\'; d(fprintf (stderr, "token: %c\n", token->token)); } break; } else if (is_lwsp (*inptr)) { inptr++; } else { /* unknown character token? */ token->token = *inptr++; d(fprintf (stderr, "token: %c\n", token->token)); break; } } else { refill: token_clear (stream); if (scalix_fill (stream) <= 0) { token->token = CAMEL_SCALIX_TOKEN_ERROR; return -1; } inptr = stream->inptr; inend = stream->inend; *inend = '\0'; } } while (inptr < inend); stream->inptr = inptr; return 0; }
/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add * them to <b>out</b>. Parse according to the token rules in <b>table</b>. * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the * entire string. */ int tokenize_string(memarea_t *area, const char *start, const char *end, smartlist_t *out, token_rule_t *table, int flags) { const char **s; directory_token_t *tok = NULL; int counts[NIL_]; int i; int first_nonannotation; int prev_len = smartlist_len(out); tor_assert(area); s = &start; if (!end) { end = start+strlen(start); } else { /* it's only meaningful to check for nuls if we got an end-of-string ptr */ if (memchr(start, '\0', end-start)) { log_warn(LD_DIR, "parse error: internal NUL character."); return -1; } } for (i = 0; i < NIL_; ++i) counts[i] = 0; SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]); while (*s < end && (!tok || tok->tp != EOF_)) { tok = get_next_token(area, s, end, table); if (tok->tp == ERR_) { log_warn(LD_DIR, "parse error: %s", tok->error); token_clear(tok); return -1; } ++counts[tok->tp]; smartlist_add(out, tok); *s = eat_whitespace_eos(*s, end); } if (flags & TS_NOCHECK) return 0; if ((flags & TS_ANNOTATIONS_OK)) { first_nonannotation = -1; for (i = 0; i < smartlist_len(out); ++i) { tok = smartlist_get(out, i); if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) { first_nonannotation = i; break; } } if (first_nonannotation < 0) { log_warn(LD_DIR, "parse error: item contains only annotations"); return -1; } for (i=first_nonannotation; i < smartlist_len(out); ++i) { tok = smartlist_get(out, i); if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { log_warn(LD_DIR, "parse error: Annotations mixed with keywords"); return -1; } } if ((flags & TS_NO_NEW_ANNOTATIONS)) { if (first_nonannotation != prev_len) { log_warn(LD_DIR, "parse error: Unexpected annotations."); return -1; } } } else { for (i=0; i < smartlist_len(out); ++i) { tok = smartlist_get(out, i); if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { log_warn(LD_DIR, "parse error: no annotations allowed."); return -1; } } first_nonannotation = 0; } for (i = 0; table[i].t; ++i) { if (counts[table[i].v] < table[i].min_cnt) { log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t); return -1; } if (counts[table[i].v] > table[i].max_cnt) { log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t); return -1; } if (table[i].pos & AT_START) { if (smartlist_len(out) < 1 || (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) { log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t); return -1; } } if (table[i].pos & AT_END) { if (smartlist_len(out) < 1 || (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) { log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t); return -1; } } } return 0; }