static int
parse_backtick(char **word, size_t * word_length, size_t * max_length,
			   const char *words, size_t * offset, int flags,
			   wordexp_t * pwordexp, const char *ifs,
			   const char *ifs_white)
{
	/* We are poised just after "`" */
	int error;
	int squoting = 0;
	size_t comm_length;
	size_t comm_maxlen;
	char *comm = w_newword(&comm_length, &comm_maxlen);

	for (; words[*offset]; ++(*offset)) {
		switch (words[*offset]) {
		case '`':
			/* Go -- give the script to the shell */
			error = exec_comm(comm, word, word_length, max_length, flags,
							  pwordexp, ifs, ifs_white);
			free(comm);
			return error;

		case '\\':
			if (squoting) {
				error = parse_qtd_backslash(&comm, &comm_length, &comm_maxlen,
										words, offset);

				if (error) {
					free(comm);
					return error;
				}

				break;
			}

			++(*offset);
			error = parse_backslash(&comm, &comm_length, &comm_maxlen, words,
								offset);

			if (error) {
				free(comm);
				return error;
			}

			break;

		case '\'':
			squoting = 1 - squoting;
		default:
			comm = w_addchar(comm, &comm_length, &comm_maxlen,
						  words[*offset]);
			if (comm == NULL)
				return WRDE_NOSPACE;
		}
	}

	/* Premature end */
	free(comm);
	return WRDE_SYNTAX;
}
static int
parse_glob(char **word, size_t * word_length, size_t * max_length,
		   const char *words, size_t * offset, int flags,
		   wordexp_t * pwordexp, const char *ifs, const char *ifs_white)
{
	/* We are poised just after a '*', a '[' or a '?'. */
	int error = WRDE_NOSPACE;
	int quoted = 0;				/* 1 if singly-quoted, 2 if doubly */
	int i;
	wordexp_t glob_list;		/* List of words to glob */

	glob_list.we_wordc = 0;
	glob_list.we_wordv = NULL;
	glob_list.we_offs = 0;
	for (; words[*offset] != '\0'; ++*offset) {
		if ((ifs && strchr(ifs, words[*offset])) ||
			(!ifs && strchr(" \t\n", words[*offset])))
			/* Reached IFS */
			break;

		/* Sort out quoting */
		if (words[*offset] == '\'') {
			if (quoted == 0) {
				quoted = 1;
				continue;
			} else if (quoted == 1) {
				quoted = 0;
				continue;
			}
		} else if (words[*offset] == '"') {
			if (quoted == 0) {
				quoted = 2;
				continue;
			} else if (quoted == 2) {
				quoted = 0;
				continue;
			}
		}

		/* Sort out other special characters */
		if (quoted != 1 && words[*offset] == '$') {
			error = parse_dollars(word, word_length, max_length, words,
								  offset, flags, &glob_list, ifs,
								  ifs_white, quoted == 2);
			if (error)
				goto tidy_up;

			continue;
		} else if (words[*offset] == '\\') {
			if (quoted)
				error = parse_qtd_backslash(word, word_length, max_length,
											words, offset);
			else
				error = parse_backslash(word, word_length, max_length,
										words, offset);

			if (error)
				goto tidy_up;

			continue;
		}

		*word = w_addchar(*word, word_length, max_length, words[*offset]);
		if (*word == NULL)
			goto tidy_up;
	}

	/* Don't forget to re-parse the character we stopped at. */
	--*offset;

	/* Glob the words */
	error = w_addword(&glob_list, *word);
	*word = w_newword(word_length, max_length);
	for (i = 0; error == 0 && i < glob_list.we_wordc; i++)
		error = do_parse_glob(glob_list.we_wordv[i], word, word_length,
				max_length, pwordexp, ifs /*, ifs_white*/);

	/* Now tidy up */
  tidy_up:
	wordfree(&glob_list);
	return error;
}
int wordexp(const char *words, wordexp_t * we, int flags)
{
	size_t words_offset;
	size_t word_length;
	size_t max_length;
	char *word = w_newword(&word_length, &max_length);
	int error;
	char *ifs;
	char ifs_white[4];
	wordexp_t old_word = *we;

	if (flags & WRDE_REUSE) {
		/* Minimal implementation of WRDE_REUSE for now */
		wordfree(we);
		old_word.we_wordv = NULL;
	}

	if ((flags & WRDE_APPEND) == 0) {
		we->we_wordc = 0;

		if (flags & WRDE_DOOFFS) {
			we->we_wordv = calloc(1 + we->we_offs, sizeof(char *));
			if (we->we_wordv == NULL) {
				error = WRDE_NOSPACE;
				goto do_error;
			}
		} else {
			we->we_wordv = calloc(1, sizeof(char *));
			if (we->we_wordv == NULL) {
				error = WRDE_NOSPACE;
				goto do_error;
			}

			we->we_offs = 0;
		}
	}

	/* Find out what the field separators are.
	 * There are two types: whitespace and non-whitespace.
	 */
	ifs = getenv("IFS");

	if (!ifs)
		/* IFS unset - use <space><tab><newline>. */
		ifs = strcpy(ifs_white, " \t\n");
	else {
		char *ifsch = ifs;
		char *whch = ifs_white;

		/* Start off with no whitespace IFS characters */
		ifs_white[0] = '\0';

		while (*ifsch != '\0') {
			if ((*ifsch == ' ') || (*ifsch == '\t') || (*ifsch == '\n')) {
				/* Whitespace IFS.  See first whether it is already in our
				   collection.  */
				char *runp = ifs_white;

				while (runp < whch && *runp != '\0' && *runp != *ifsch)
					++runp;

				if (runp == whch)
					*whch++ = *ifsch;
			}

			++ifsch;
		}
		*whch = '\0';
	}

	for (words_offset = 0; words[words_offset]; ++words_offset)
		switch (words[words_offset]) {
		case '\\':
			error = parse_backslash(&word, &word_length, &max_length, words,
								&words_offset);

			if (error)
				goto do_error;

			break;

		case '$':
			error = parse_dollars(&word, &word_length, &max_length, words,
								  &words_offset, flags, we, ifs, ifs_white,
								  0);

			if (error)
				goto do_error;

			break;

		case '`':
			if (flags & WRDE_NOCMD) {
				error = WRDE_CMDSUB;
				goto do_error;
			}

			++words_offset;
			error = parse_backtick(&word, &word_length, &max_length, words,
								   &words_offset, flags, we, ifs,
								   ifs_white);

			if (error)
				goto do_error;

			break;

		case '"':
			++words_offset;
			error = parse_dquote(&word, &word_length, &max_length, words,
								 &words_offset, flags, we, ifs, ifs_white);

			if (error)
				goto do_error;

			if (!word_length) {
				error = w_addword(we, NULL);

				if (error)
					return error;
			}

			break;

		case '\'':
			++words_offset;
			error = parse_squote(&word, &word_length, &max_length, words,
								 &words_offset);

			if (error)
				goto do_error;

			if (!word_length) {
				error = w_addword(we, NULL);

				if (error)
					return error;
			}

			break;

		case '~':
			error = parse_tilde(&word, &word_length, &max_length, words,
								&words_offset, we->we_wordc);

			if (error)
				goto do_error;

			break;

		case '*':
		case '[':
		case '?':
			error = parse_glob(&word, &word_length, &max_length, words,
							   &words_offset, flags, we, ifs, ifs_white);

			if (error)
				goto do_error;

			break;

		default:
			/* Is it a word separator? */
			if (strchr(" \t", words[words_offset]) == NULL) {
				char ch = words[words_offset];

				/* Not a word separator -- but is it a valid word char? */
				if (strchr("\n|&;<>(){}", ch)) {
					/* Fail */
					error = WRDE_BADCHAR;
					goto do_error;
				}

				/* "Ordinary" character -- add it to word */
				word = w_addchar(word, &word_length, &max_length, ch);
				if (word == NULL) {
					error = WRDE_NOSPACE;
					goto do_error;
				}

				break;
			}

			/* If a word has been delimited, add it to the list. */
			if (word != NULL) {
				error = w_addword(we, word);
				if (error)
					goto do_error;
			}

			word = w_newword(&word_length, &max_length);
		}

	/* End of string */

	/* There was a word separator at the end */
	if (word == NULL)			/* i.e. w_newword */
		return 0;

	/* There was no field separator at the end */
	return w_addword(we, word);

  do_error:
	/* Error:
	 *  free memory used (unless error is WRDE_NOSPACE), and
	 *  set we members back to what they were.
	 */

	free(word);

	if (error == WRDE_NOSPACE)
		return WRDE_NOSPACE;

	if ((flags & WRDE_APPEND) == 0)
		wordfree(we);

	*we = old_word;
	return error;
}
Example #4
0
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output)
{
    std::string input;

    for(; begin != end; ++begin)
    {
        char current_char = *begin;
        char type = type_lookup_table[current_char];
        switch(type)
        {
        case char_type_illegal:
        {
            lexer_exception(file_name, line, "Illegal character");
        }

        case char_type_name:
        {
            std::string::const_iterator name_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (is_name_char(*begin) == true);
                    ++begin);
            output = lexeme(lexeme_name, std::string(name_begin, begin), line);
            return true;
        }

        case char_type_digit:
        {
            std::string::const_iterator number_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (nil::string::is_digit(*begin) == true)
                    ; ++begin);
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_zero:
        {
            std::string::const_iterator number_begin = begin;
            ++begin;
            if(begin != end)
            {
                char second_character = *begin;
                if(
                    (second_character == 'x')
                    ||
                    (second_character == 'X')
                )
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (nil::string::is_digit(*begin) == true)
                            ; ++begin);
                }
                else if(is_binary_digit(second_character) == true)
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (is_binary_digit(*begin) == true)
                            ; ++begin);
                }
            }
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_string:
        {
            std::string string;
            for(++begin; begin != end;)
            {
                char current_char = *begin;
                switch(current_char)
                {
                case '"':
                {
                    ++begin;
                    output = lexeme(lexeme_string, string, line);
                    return true;
                }

                case '\\':
                {
                    try
                    {
                        parse_backslash(begin, end, string);
                    }
                    catch(std::exception & exception)
                    {
                        lexer_exception(file_name, line, exception.what());
                    }
                    break;
                }

                case '\n':
                {
                    lexer_exception(file_name, line, "Newline in string");
                }

                default:
                {
                    string += current_char;
                    ++begin;
                    break;
                }
                }
            }

            lexer_exception(file_name, line, "Incomplete string at the end of file");
        }

        case char_type_operator:
        {
            ++begin;
            output = lexeme(lexeme_operator, std::string(1, current_char), line);
            return true;
        }

        case char_type_operator_extended:
        {
            ++begin;
            if(begin == end)
            {
                output = lexeme(lexeme_operator, std::string(1, current_char), line);
                return false;
            }
            else
            {
                char second_char = *begin;
                bool is_extended = false;
                switch(current_char)
                {
                case '&':
                {
                    if(second_char == '&')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '|':
                {
                    if(second_char == '|')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '=':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '!':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '<':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '>':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }
                }
                if(is_extended == true)
                {
                    ++begin;
                    output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line);
                }
                else
                {
                    output = lexeme(lexeme_operator, std::string(1, current_char), line);
                }
                return true;
            }
        }

        case char_type_newline:
        {
            ++begin;
            ++line;
            output = lexeme(lexeme_newline, line);
            return true;
        }

        case char_type_comment:
        {
            for(++begin; begin != end; ++begin)
            {
                if(*begin == '\n')
                {
                    ++begin;
                    ++line;
                    output = lexeme(lexeme_newline, line);
                    return true;
                }
            }
            return false;
        }
        }
    }

    return false;
}