// Counts the number of words (separted by separator charactors: newlines, tabs, // return, space). See r_util.h for more details of the isseparator macro. R_API int r_str_word_count(const char *string) { const char *text, *tmp; int word; for (text = tmp = string; *text && isseparator (*text); text++); for (word = 0; *text; word++) { for (;*text && !isseparator (*text); text++); for (tmp = text; *text && isseparator (*text); text++); } return word; }
/* * Sets the given char in the string to an '\0' char to end the previous string * in the argument list. Then adds the next non-empty string to the argument * list. */ inline int add_arg_to_list (char *cmd, int i, char **args, int *count) { cmd[i] = '\0'; if (!isseparator(cmd[i+1])) { // the next string isn't empty args[++(*count)] = cmd + i + 1; } }
/* Return whole word at specified position in string */ char *get_word_at(const char *str, int pos, char **startpos) { const char *start, *end; g_return_val_if_fail(str != NULL, NULL); g_return_val_if_fail(pos >= 0, NULL); /* get previous word if char at `pos' is space */ start = str+pos; while (start > str && isseparator(start[-1])) start--; end = start; while (start > str && !isseparator(start[-1])) start--; while (*end != '\0' && !isseparator(*end)) end++; while (*end != '\0' && isseparator_notspace(*end)) end++; *startpos = (char *) start; return g_strndup(start, (int) (end-start)); }
R_API char *r_str_word_get_first(const char *text) { char *ret; int len = 0; for (;*text && isseparator (*text); text++); /* strdup */ len = strlen (text); ret = (char *)malloc (len + 1); if (!ret) { eprintf ("Cannot allocate %d bytes.\n", len+1); exit (1); } strncpy (ret, text, len); ret[len] = '\0'; return ret; }
/* manual word completion - called when TAB is pressed */ char *word_complete(WINDOW_REC *window, const char *line, int *pos) { static int startpos = 0, wordlen = 0; GString *result; char *word, *wordstart, *linestart, *ret; int want_space; g_return_val_if_fail(line != NULL, NULL); g_return_val_if_fail(pos != NULL, NULL); if (complist != NULL && *pos == last_line_pos && strcmp(line, last_line) == 0) { /* complete from old list */ complist = complist->next != NULL ? complist->next : g_list_first(complist); want_space = last_want_space; } else { /* get new completion list */ free_completions(); /* get the word we want to complete */ word = get_word_at(line, *pos, &wordstart); startpos = (int) (wordstart-line); wordlen = strlen(word); /* get the start of line until the word we're completing */ if (isseparator(*line)) { /* empty space at the start of line */ if (wordstart == line) wordstart += strlen(wordstart); } else { while (wordstart > line && isseparator(wordstart[-1])) wordstart--; } linestart = g_strndup(line, (int) (wordstart-line)); /* completions usually add space after the word, that makes things a bit harder. When continuing a completion "/msg nick1 "<tab> we have to cycle to nick2, etc. BUT if we start completion with "/msg "<tab>, we don't want to complete the /msg word, but instead complete empty word with /msg being in linestart. */ if (*pos > 0 && line[*pos-1] == ' ') { char *old; old = linestart; linestart = *linestart == '\0' ? g_strdup(word) : g_strconcat(linestart, " ", word, NULL); g_free(old); g_free(word); word = g_strdup(""); startpos = strlen(linestart)+1; wordlen = 0; } want_space = TRUE; signal_emit("complete word", 5, &complist, window, word, linestart, &want_space); last_want_space = want_space; g_free(linestart); g_free(word); } if (complist == NULL) return NULL; /* word completed */ *pos = startpos+strlen(complist->data); /* replace the word in line - we need to return a full new line */ result = g_string_new(line); g_string_erase(result, startpos, wordlen); g_string_insert(result, startpos, complist->data); if (want_space) { if (!isseparator(result->str[*pos])) g_string_insert_c(result, *pos, ' '); (*pos)++; } wordlen = strlen(complist->data); last_line_pos = *pos; g_free_not_null(last_line); last_line = g_strdup(result->str); ret = result->str; g_string_free(result, FALSE); return ret; }
token_type get_token(token *token) { token_type current_token_t; lexer_status status = INITIAL_STATUS; char cc; char only_one_char_sign = TRUE; char skip = FALSE; unsigned char token_pos = 0; while (status != INFINISH_STATUS) { skip = FALSE; cc = get_next_char(); switch (status) { case INITIAL_STATUS: if ('"' == cc) { status = INSTRING_STATUS; } else if (isdigit((int)cc)) { status = INNUM_INT_STATUS; } else if (isalpha((int)cc) || ('_' == cc)) { status = INIDENTIFIER_STATUS; } else if ('`' == cc) { status = INREGEXP_STATUS; } else if ('#' == cc) { skip = TRUE; status = INCOMMENT_STATUS; } else if (isseparator(cc)) { skip = TRUE; } else { status = INFINISH_STATUS; switch (cc) { case '+': current_token_t = PLUS; break; case '-': current_token_t = MINUS; break; case '*': current_token_t = TIMES; break; case '/': current_token_t = OVER; break; case '=': current_token_t = ASSIGN; status = INSIGN_STATUS; break; case '<': current_token_t = LT; status = INSIGN_STATUS; break; case '>': current_token_t = GT; status = INSIGN_STATUS; break; case '(': current_token_t = LPAREN; break; case ')': current_token_t = RPAREN; break; case ';': current_token_t = SEMI; break; case EOF: current_token_t = END_OF_FILE_TOKEN; break; default: current_token_t = ERROR; break; } } break; case INSTRING_STATUS: if ('"' == cc) { status = INFINISH_STATUS; } break; case INSIGN_STATUS: if (current_token_t == LT) { if ('<' == cc) { current_token_t = LSHIFT; } else if ('=' == cc) { current_token_t = LTOREQ; } else { skip = TRUE; unget_next_char(); } } else if (current_token_t == GT) { if ('>' == cc) { current_token_t = RSHIFT; } else if ('=' == cc) { current_token_t = GTOREQ; } else { skip = TRUE; unget_next_char(); } } else if (current_token_t == ASSIGN) { if ('=' == cc) { current_token_t = EQUAL; } else { skip = TRUE; unget_next_char(); } } else { /* no way to get here */ } status = INFINISH_STATUS; break; case INCOMMENT_STATUS: skip = TRUE; if ('\n' == cc) { status = INITIAL_STATUS; } else if (EOF == cc) { status == INFINISH_STATUS; current_token_t = END_OF_FILE_TOKEN; } else { status = INCOMMENT_STATUS; } break; case INNUM_INT_STATUS: if ('.' == cc) { status = INNUM_DOT_STATUS; } else if (!isdigit(cc)) { status = INFINISH_STATUS; skip = TRUE; unget_next_char(); } break; case INNUM_DOT_STATUS: if (isdigit(cc)) { } else { } break; case INNUM_DEC_STATUS: if (!isdigit(cc)) { status = INFINISH_STATUS; } break; case INIDENTIFIER_STATUS: if (isdigit((int)cc) || isalpha((int)cc) || ('_' == cc)) { current_token_t = IDENTIFIER_TOKEN; } else { skip = TRUE; status = INFINISH_STATUS; unget_next_char(); } break; case INREGEXP_STATUS: if ('`' == cc) { status = INFINISH_STATUS; } break; case INFINISH_STATUS: break; case INERROR_STATUS: /* point to where is wrong */ exit(-1); default: break; } if (!skip && (token_pos <= MAX_TOKEN_SIZE)) { token_buf[token_pos++] = cc; } else { /* skip or token length is too long*/ } if (INFINISH_STATUS == status) { /* Do some saving token work */ token_buf[token_pos] = '\0'; if (IDENTIFIER_TOKEN == current_token_t) { if (NONE != is_key_word(token_buf, token_pos)) { current_token_t = KEYWORD_TOKEN; } } printf("token = %d = %s\n", token_pos, token_buf); } } return current_token_t; }
/* manual word completion - called when TAB is pressed */ char *word_complete(WINDOW_REC *window, const char *line, int *pos, int erase, int backward) { static int startpos = 0, wordlen = 0; int old_startpos, old_wordlen; GString *result; const char *cmdchars; char *word, *wordstart, *linestart, *ret, *data; int continue_complete, want_space, expand_escapes; g_return_val_if_fail(line != NULL, NULL); g_return_val_if_fail(pos != NULL, NULL); continue_complete = complist != NULL && *pos == last_line_pos && g_strcmp0(line, last_line) == 0; if (erase && !continue_complete) return NULL; old_startpos = startpos; old_wordlen = wordlen; if (!erase && continue_complete) { word = NULL; linestart = NULL; } else { char* old_wordstart; /* get the word we want to complete */ word = get_word_at(line, *pos, &wordstart); old_wordstart = wordstart; startpos = (int) (wordstart-line); wordlen = strlen(word); /* remove trailing spaces from linestart */ while (wordstart > line && isseparator_space(wordstart[-1])) wordstart--; /* unless everything was spaces */ if (old_wordstart > line && wordstart == line) wordstart = old_wordstart - 1; linestart = g_strndup(line, (int) (wordstart-line)); /* completions usually add space after the word, that makes things a bit harder. When continuing a completion "/msg nick1 "<tab> we have to cycle to nick2, etc. BUT if we start completion with "/msg "<tab>, we don't want to complete the /msg word, but instead complete empty word with /msg being in linestart. */ if (!erase && *pos > 0 && isseparator_space(line[*pos-1]) && (*linestart == '\0' || !isseparator_space(wordstart[-1]))) { char *old; old = linestart; /* we want to move word into linestart */ if (*linestart == '\0') { linestart = g_strdup(word); } else { GString *str = g_string_new(linestart); if (old_wordstart[-1] != str->str[str->len - 1]) { /* do not accidentally duplicate the word separator */ g_string_append_c(str, old_wordstart[-1]); } g_string_append(str, word); linestart = g_string_free(str, FALSE); } g_free(old); g_free(word); word = g_strdup(""); startpos = *linestart == '\0' ? 0 : strlen(linestart)+1; wordlen = 0; } } if (erase) { signal_emit("complete erase", 3, window, word, linestart); /* jump to next completion */ startpos = old_startpos; wordlen = old_wordlen; } if (continue_complete) { /* complete from old list */ if (backward) complist = complist->prev != NULL ? complist->prev : g_list_last(complist); else complist = complist->next != NULL ? complist->next : g_list_first(complist); want_space = last_want_space; } else { int keep_word = settings_get_bool("completion_keep_word"); /* get new completion list */ free_completions(); want_space = TRUE; signal_emit("complete word", 5, &complist, window, word, linestart, &want_space); last_want_space = want_space; if (complist != NULL) { /* Remove all nulls (from the signal) before doing further processing */ complist = g_list_remove_all(g_list_first(complist), NULL); if (keep_word) { complist = g_list_append(complist, g_strdup(word)); } if (backward) { complist = g_list_last(complist); if (keep_word) { complist = complist->prev; } } } } g_free(linestart); g_free(word); if (complist == NULL) return NULL; /* get the cmd char */ cmdchars = settings_get_str("cmdchars"); /* get the expand_escapes setting */ expand_escapes = settings_get_bool("expand_escapes"); /* escape if the word doesn't begin with '/' and expand_escapes are turned on */ data = strchr(cmdchars, *line) == NULL && expand_escapes ? escape_string_backslashes(complist->data) : g_strdup(complist->data); /* word completed */ *pos = startpos + strlen(data); /* replace the word in line - we need to return a full new line */ result = g_string_new(line); g_string_erase(result, startpos, wordlen); g_string_insert(result, startpos, data); if (want_space) { if (!isseparator(result->str[*pos])) g_string_insert_c(result, *pos, ' '); (*pos)++; } wordlen = strlen(data); last_line_pos = *pos; g_free_not_null(last_line); last_line = g_strdup(result->str); ret = result->str; g_string_free(result, FALSE); /* free the data */ g_free(data); return ret; }
int MyRequest::Lexer(char *buf, int length) { int consumed = 0; if(length == 0) { valid = methodValid && urlValid && httpVersionValid; if(data.size() == contentLength) done = lexerState != LEXER_init; return 0; } if(parserState == PARSER_body) { consumed = AppendData(buf, length); return consumed; } for(int i = 0; i < length; i++, consumed++) { if(parserState == PARSER_body) { consumed += AppendData(buf + i, length - i); break; } char ch = buf[i]; rawHeader.push_back(ch); again: if(parserState == PARSER_body) { consumed += AppendData(buf + i, length - i); break; } if(lexerState > LEXER_error) { aborted = true; break; } switch(lexerState) { case LEXER_init: if((isctl(ch) || isseparator(ch) || ch == ' ' || ch == '\t') && !lexerToken.empty()) { ProcessTokenIfAvailable(); goto again; } if(ch == '\r') lexerState = LEXER_cr_read; else if(ch == '\n') ProcessLF(); else if(opaqueEnabled) { lexerOpaqueString.clear(); lexerState = LEXER_opaque; goto again; } else if(parseURLEnabled && !isspace(ch)) { lexerUrl.clear(); lexerState = LEXER_url; goto again; } else if(ch == '"' && quotedStringsEnabled) { lexerQuotedString.clear(); lexerState = LEXER_quoted_string; } else if(ch == '(' && commentsEnabled) { lexerComment.clear(); lexerComment.push_back(ch); lexerState = LEXER_comment; lexerCommentNesting = 1; } else if(ch == ' ' || ch == '\t') lexerState = LEXER_LWS_read; else if(isseparator(ch)) ProcessSeparator(ch); else if(!isctl(ch)) lexerToken.push_back(ch); else lexerState = LEXER_token_text_error; break; case LEXER_opaque: ProcessTokenIfAvailable(); if(ch == '\r') { if(foldingEnabled) lexerState = LEXER_opaque_cr_read; else { ProcessOpaque(lexerOpaqueString); lexerState = LEXER_init; goto again; } } else lexerOpaqueString.push_back(ch); break; case LEXER_opaque_cr_read: ProcessTokenIfAvailable(); if(ch == '\n') lexerState = LEXER_opaque_newline_read; else { ProcessOpaque(lexerOpaqueString); lexerState = LEXER_init; goto again; } break; case LEXER_opaque_newline_read: if(ch == ' ' || ch == '\t') { lexerOpaqueString.push_back(' '); lexerState = LEXER_opaque; } else { ProcessOpaque(lexerOpaqueString); ProcessNewline(); lexerState = LEXER_init; goto again; } break; case LEXER_url: ProcessTokenIfAvailable(); if(isspace(ch)) { ProcessURL(lexerUrl); lexerState = LEXER_init; goto again; } else { if(ch == '%') lexerState = LEXER_url_encoded_1; else if(ch == '+') lexerUrl.push_back(' '); else lexerUrl.push_back(ch); } break; case LEXER_url_encoded_1: if(ch == '%') { lexerUrl.push_back('%'); lexerState = LEXER_url; } else if(isxdigit(ch)) { lexerTmpHex[0] = ch; lexerTmpHex[2] = '\0'; lexerState = LEXER_url_encoded_2; } else lexerState = LEXER_url_error; break; case LEXER_url_encoded_2: if(isxdigit(ch)) { lexerTmpHex[1] = ch; char newch = strtol(lexerTmpHex, NULL, 16); lexerUrl.push_back(newch); lexerState = LEXER_url; } else lexerState = LEXER_url_error; break; case LEXER_comment: ProcessTokenIfAvailable(); if(ch == '\\') lexerState = LEXER_quoted_pair_comment; else if(ch == '(') { lexerCommentNesting++; lexerComment.push_back(ch); } else if(ch == ')') { lexerCommentNesting--; lexerComment.push_back(ch); if(lexerCommentNesting == 0) { ProcessComment(lexerComment); lexerState = LEXER_init; } } else if(istext(ch)) { lexerComment.push_back(ch); } else lexerState = LEXER_comment_text_error; break; case LEXER_quoted_pair_comment: ProcessTokenIfAvailable(); lexerComment.push_back(ch); lexerState = LEXER_comment; break; case LEXER_quoted_string: ProcessTokenIfAvailable(); if(ch == '\\') lexerState = LEXER_quoted_pair; else if(ch == '"') { ProcessQuotedString(lexerQuotedString); lexerState = LEXER_init; } else if(istext(ch)) { lexerQuotedString.push_back(ch); } else lexerState = LEXER_quoted_string_text_error; break; case LEXER_quoted_pair: ProcessTokenIfAvailable(); lexerQuotedString.push_back(ch); lexerState = LEXER_quoted_string; break; case LEXER_cr_read: ProcessTokenIfAvailable(); if(ch == '\n') { if(foldingEnabled) lexerState = LEXER_newline_read; else { ProcessNewline(); lexerState = LEXER_init; } } else { ProcessCR(); lexerState = LEXER_init; goto again; } break; case LEXER_newline_read: // LWS folding state ProcessTokenIfAvailable(); if(ch == ' ' || ch == '\t') lexerState = LEXER_LWS_read; else { ProcessNewline(); lexerState = LEXER_init; goto again; } break; case LEXER_LWS_read: // partial LWS ProcessTokenIfAvailable(); if(ch == '\r') lexerState = LEXER_LWS_cr_read; else if(ch != ' ' && ch != '\t') { lexerState = LEXER_init; ProcessLWS(); goto again; } break; case LEXER_LWS_cr_read: ProcessTokenIfAvailable(); if(ch == '\n') { if(foldingEnabled) lexerState = LEXER_LWS_newline_read; else { ProcessLWS(); ProcessNewline(); lexerState = LEXER_init; } } else { ProcessCR(); lexerState = LEXER_init; goto again; } break; case LEXER_LWS_newline_read: ProcessTokenIfAvailable(); if(ch == ' ' || ch == '\t') lexerState = LEXER_LWS_read; else { ProcessLWS(); ProcessNewline(); lexerState = LEXER_init; goto again; } break; } } if(parserState == PARSER_body) { valid = methodValid && urlValid && httpVersionValid; if(data.size() == contentLength) done = true; } return consumed; }
/* Reads next symbol */ static void InSymbol(LPIT8 it8) { register char *idptr; register int k; SYMBOL key; int sng; do { while (isseparator(it8->ch)) NextCh(it8); if (isfirstidchar(it8->ch)) { /* Identifier */ k = 0; idptr = it8->id; do { if (++k < MAXID) *idptr++ = (char) it8->ch; NextCh(it8); } while (isidchar(it8->ch)); *idptr = '\0'; xstrupr(it8->id); key = BinSrchKey(it8->id); if (key == SNONE) it8->sy = SIDENT; else it8->sy = key; } else /* Is a number? */ if (isdigit(it8->ch) || it8->ch == '.' || it8->ch == '-' || it8->ch == '+') { int sign = 1; if (it8->ch == '-') { sign = -1; NextCh(it8); } it8->inum = 0; it8->sy = SINUM; while (isdigit(it8->ch)) { if ((long) it8->inum * 10L > (long) INT_MAX) { ReadReal(it8, it8->inum); it8->sy = SDNUM; it8->dnum *= sign; return; } it8->inum = it8->inum * 10 + (it8->ch - '0'); NextCh(it8); } if (it8->ch == '.') { ReadReal(it8, it8->inum); it8->sy = SDNUM; it8->dnum *= sign; return; } it8 -> inum *= sign; return; } else switch ((int) it8->ch) { case '\0': case '\x1a': it8->sy = SEOF; break; case '\n': NextCh(it8); it8->sy = SEOLN; it8->lineno++; break; /* Comment */ case '#': NextCh(it8); while (it8->ch && it8->ch != '\n') NextCh(it8); it8->sy = SCOMMENT; break; /* String. I will support \", \n, \t and \\. */ /* But otherwise I hardly doubt these will be used ... */ case '\'': case '\"': idptr = it8->str; sng = it8->ch; k = 0; NextCh(it8); while (k < MAXSTR && it8->ch != sng) { if (it8->ch == '\n'|| it8->ch == '\r') k = MAXSTR+1; else { if (it8->ch == '\\') { NextCh(it8); switch (it8->ch) { case 'n': *idptr++ = '\n'; break; case 'r': *idptr++ = '\r'; break; case 't': *idptr++ = '\t'; break; case '\\': *idptr++ = '\\'; break; default: *idptr++ = (char) it8->ch; } NextCh(it8); } else { *idptr++ = (char) it8->ch; NextCh(it8); } k++; } } it8->sy = SSTRING; *idptr = '\0'; NextCh(it8); break; default: it8->sy = SSYNERROR; NextCh(it8); } } while (it8->sy == SCOMMENT); }