static int parse_backtick(char **word, size_t * word_length, size_t * max_length, const char *words, size_t * offset, int flags, wordexp_t * pwordexp, const char *ifs, const char *ifs_white) { /* We are poised just after "`" */ int error; int squoting = 0; size_t comm_length; size_t comm_maxlen; char *comm = w_newword(&comm_length, &comm_maxlen); for (; words[*offset]; ++(*offset)) { switch (words[*offset]) { case '`': /* Go -- give the script to the shell */ error = exec_comm(comm, word, word_length, max_length, flags, pwordexp, ifs, ifs_white); free(comm); return error; case '\\': if (squoting) { error = parse_qtd_backslash(&comm, &comm_length, &comm_maxlen, words, offset); if (error) { free(comm); return error; } break; } ++(*offset); error = parse_backslash(&comm, &comm_length, &comm_maxlen, words, offset); if (error) { free(comm); return error; } break; case '\'': squoting = 1 - squoting; default: comm = w_addchar(comm, &comm_length, &comm_maxlen, words[*offset]); if (comm == NULL) return WRDE_NOSPACE; } } /* Premature end */ free(comm); return WRDE_SYNTAX; }
static int parse_glob(char **word, size_t * word_length, size_t * max_length, const char *words, size_t * offset, int flags, wordexp_t * pwordexp, const char *ifs, const char *ifs_white) { /* We are poised just after a '*', a '[' or a '?'. */ int error = WRDE_NOSPACE; int quoted = 0; /* 1 if singly-quoted, 2 if doubly */ int i; wordexp_t glob_list; /* List of words to glob */ glob_list.we_wordc = 0; glob_list.we_wordv = NULL; glob_list.we_offs = 0; for (; words[*offset] != '\0'; ++*offset) { if ((ifs && strchr(ifs, words[*offset])) || (!ifs && strchr(" \t\n", words[*offset]))) /* Reached IFS */ break; /* Sort out quoting */ if (words[*offset] == '\'') { if (quoted == 0) { quoted = 1; continue; } else if (quoted == 1) { quoted = 0; continue; } } else if (words[*offset] == '"') { if (quoted == 0) { quoted = 2; continue; } else if (quoted == 2) { quoted = 0; continue; } } /* Sort out other special characters */ if (quoted != 1 && words[*offset] == '$') { error = parse_dollars(word, word_length, max_length, words, offset, flags, &glob_list, ifs, ifs_white, quoted == 2); if (error) goto tidy_up; continue; } else if (words[*offset] == '\\') { if (quoted) error = parse_qtd_backslash(word, word_length, max_length, words, offset); else error = parse_backslash(word, word_length, max_length, words, offset); if (error) goto tidy_up; continue; } *word = w_addchar(*word, word_length, max_length, words[*offset]); if (*word == NULL) goto tidy_up; } /* Don't forget to re-parse the character we stopped at. */ --*offset; /* Glob the words */ error = w_addword(&glob_list, *word); *word = w_newword(word_length, max_length); for (i = 0; error == 0 && i < glob_list.we_wordc; i++) error = do_parse_glob(glob_list.we_wordv[i], word, word_length, max_length, pwordexp, ifs /*, ifs_white*/); /* Now tidy up */ tidy_up: wordfree(&glob_list); return error; }
int wordexp(const char *words, wordexp_t * we, int flags) { size_t words_offset; size_t word_length; size_t max_length; char *word = w_newword(&word_length, &max_length); int error; char *ifs; char ifs_white[4]; wordexp_t old_word = *we; if (flags & WRDE_REUSE) { /* Minimal implementation of WRDE_REUSE for now */ wordfree(we); old_word.we_wordv = NULL; } if ((flags & WRDE_APPEND) == 0) { we->we_wordc = 0; if (flags & WRDE_DOOFFS) { we->we_wordv = calloc(1 + we->we_offs, sizeof(char *)); if (we->we_wordv == NULL) { error = WRDE_NOSPACE; goto do_error; } } else { we->we_wordv = calloc(1, sizeof(char *)); if (we->we_wordv == NULL) { error = WRDE_NOSPACE; goto do_error; } we->we_offs = 0; } } /* Find out what the field separators are. * There are two types: whitespace and non-whitespace. */ ifs = getenv("IFS"); if (!ifs) /* IFS unset - use <space><tab><newline>. */ ifs = strcpy(ifs_white, " \t\n"); else { char *ifsch = ifs; char *whch = ifs_white; /* Start off with no whitespace IFS characters */ ifs_white[0] = '\0'; while (*ifsch != '\0') { if ((*ifsch == ' ') || (*ifsch == '\t') || (*ifsch == '\n')) { /* Whitespace IFS. See first whether it is already in our collection. */ char *runp = ifs_white; while (runp < whch && *runp != '\0' && *runp != *ifsch) ++runp; if (runp == whch) *whch++ = *ifsch; } ++ifsch; } *whch = '\0'; } for (words_offset = 0; words[words_offset]; ++words_offset) switch (words[words_offset]) { case '\\': error = parse_backslash(&word, &word_length, &max_length, words, &words_offset); if (error) goto do_error; break; case '$': error = parse_dollars(&word, &word_length, &max_length, words, &words_offset, flags, we, ifs, ifs_white, 0); if (error) goto do_error; break; case '`': if (flags & WRDE_NOCMD) { error = WRDE_CMDSUB; goto do_error; } ++words_offset; error = parse_backtick(&word, &word_length, &max_length, words, &words_offset, flags, we, ifs, ifs_white); if (error) goto do_error; break; case '"': ++words_offset; error = parse_dquote(&word, &word_length, &max_length, words, &words_offset, flags, we, ifs, ifs_white); if (error) goto do_error; if (!word_length) { error = w_addword(we, NULL); if (error) return error; } break; case '\'': ++words_offset; error = parse_squote(&word, &word_length, &max_length, words, &words_offset); if (error) goto do_error; if (!word_length) { error = w_addword(we, NULL); if (error) return error; } break; case '~': error = parse_tilde(&word, &word_length, &max_length, words, &words_offset, we->we_wordc); if (error) goto do_error; break; case '*': case '[': case '?': error = parse_glob(&word, &word_length, &max_length, words, &words_offset, flags, we, ifs, ifs_white); if (error) goto do_error; break; default: /* Is it a word separator? */ if (strchr(" \t", words[words_offset]) == NULL) { char ch = words[words_offset]; /* Not a word separator -- but is it a valid word char? */ if (strchr("\n|&;<>(){}", ch)) { /* Fail */ error = WRDE_BADCHAR; goto do_error; } /* "Ordinary" character -- add it to word */ word = w_addchar(word, &word_length, &max_length, ch); if (word == NULL) { error = WRDE_NOSPACE; goto do_error; } break; } /* If a word has been delimited, add it to the list. */ if (word != NULL) { error = w_addword(we, word); if (error) goto do_error; } word = w_newword(&word_length, &max_length); } /* End of string */ /* There was a word separator at the end */ if (word == NULL) /* i.e. w_newword */ return 0; /* There was no field separator at the end */ return w_addword(we, word); do_error: /* Error: * free memory used (unless error is WRDE_NOSPACE), and * set we members back to what they were. */ free(word); if (error == WRDE_NOSPACE) return WRDE_NOSPACE; if ((flags & WRDE_APPEND) == 0) wordfree(we); *we = old_word; return error; }
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output) { std::string input; for(; begin != end; ++begin) { char current_char = *begin; char type = type_lookup_table[current_char]; switch(type) { case char_type_illegal: { lexer_exception(file_name, line, "Illegal character"); } case char_type_name: { std::string::const_iterator name_begin = begin; for(++begin; (begin != end) && (is_name_char(*begin) == true); ++begin); output = lexeme(lexeme_name, std::string(name_begin, begin), line); return true; } case char_type_digit: { std::string::const_iterator number_begin = begin; for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_zero: { std::string::const_iterator number_begin = begin; ++begin; if(begin != end) { char second_character = *begin; if( (second_character == 'x') || (second_character == 'X') ) { for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); } else if(is_binary_digit(second_character) == true) { for(++begin; (begin != end) && (is_binary_digit(*begin) == true) ; ++begin); } } output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_string: { std::string string; for(++begin; begin != end;) { char current_char = *begin; switch(current_char) { case '"': { ++begin; output = lexeme(lexeme_string, string, line); return true; } case '\\': { try { parse_backslash(begin, end, string); } catch(std::exception & exception) { lexer_exception(file_name, line, exception.what()); } break; } case '\n': { lexer_exception(file_name, line, "Newline in string"); } default: { string += current_char; ++begin; break; } } } lexer_exception(file_name, line, "Incomplete string at the end of file"); } case char_type_operator: { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char), line); return true; } case char_type_operator_extended: { ++begin; if(begin == end) { output = lexeme(lexeme_operator, std::string(1, current_char), line); return false; } else { char second_char = *begin; bool is_extended = false; switch(current_char) { case '&': { if(second_char == '&') { is_extended = true; } break; } case '|': { if(second_char == '|') { is_extended = true; } break; } case '=': { if(second_char == '=') { is_extended = true; } break; } case '!': { if(second_char == '=') { is_extended = true; } break; } case '<': { if(second_char == '=') { is_extended = true; } break; } case '>': { if(second_char == '=') { is_extended = true; } break; } } if(is_extended == true) { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line); } else { output = lexeme(lexeme_operator, std::string(1, current_char), line); } return true; } } case char_type_newline: { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } case char_type_comment: { for(++begin; begin != end; ++begin) { if(*begin == '\n') { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } } return false; } } } return false; }