Esempio n. 1
0
/*
 * UTF-32 string version of strncasecmp().
 */
int
idn__utf32_strncasecmp(const unsigned long *str1, const unsigned long *str2,
		     size_t n) {
	unsigned long c1, c2;

	while (n > 0 && *str1 != '\0') {
		c1 = ASCII_TOLOWER(*str1);
		c2 = ASCII_TOLOWER(*str2);
		if (c1 > c2)
			return (1);
		else if (c1 < c2)
			return (-1);
		str1++;
		str2++;
		n--;
	}

	if (n > 0) {
		c1 = ASCII_TOLOWER(*str1);
		c2 = ASCII_TOLOWER(*str2);
		if (c1 > c2)
			return (1);
		else if (c1 < c2)
			return (-1);
	}

	return (0);
}
Esempio n. 2
0
int
idn__util_asciihaveaceprefix(const char *str, const char *prefix) {
	assert(str != NULL && prefix != NULL);

	while (*prefix != '\0') {
		if (ASCII_TOLOWER(*str) != ASCII_TOLOWER(*prefix))
			return 0;
		str++;
		prefix++;
	}

	return (1);
}
Esempio n. 3
0
int
idn_ucs4_strcasecmp(const unsigned long *str1, const unsigned long *str2) {
	unsigned long c1, c2;

	while (*str1 != '\0') {
		c1 = ASCII_TOLOWER(*str1);
		c2 = ASCII_TOLOWER(*str2);
		if (c1 > c2)
			return (1);
		else if (c1 < c2)
			return (-1);
		str1++;
		str2++;
	}

	c1 = ASCII_TOLOWER(*str1);
	c2 = ASCII_TOLOWER(*str2);
	if (c1 > c2)
		return (1);
	else if (c1 < c2)
		return (-1);

	return (0);
}
Esempio n. 4
0
enum queryparse_ret queryparse_parse(struct queryparse *parser, 
  char *word, unsigned int *len) {
    char c;
    *len = 0;

    JUMP(parser->state);

/* not in any entities yet */
toplevel_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            /* note: AND and OR could be more efficiently detected by special
             * casing their first values, but that would make them harder to
             * change */

            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = ASCII_TOLOWER(c);
            } else {
                word[*len] = '\0';
                parser->state = TOPLEVEL;
                return QUERYPARSE_WORD;
            }

            if (c == AND_OP[0]) {
                /* it could be an AND op */
                goto inand_label;
            } else if (c == OR_OP[0]) {
                /* it could be an OR op */
                goto inor_label;
            } else {
                /* start of a normal word */
                goto inword_label;
            }
            break;

        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                word[*len] = '\0';
                parser->state = TOPLEVEL;
                return QUERYPARSE_WORD;
            }

            /* start of a normal word */
            goto inword_label;
            break;

        case NOSTOP_OP:
            /* start of a non-stopping word */
            goto inword_nostop_label;
            break;

        case EXCLUDE_OP:
            /* start of an excluded word */
            goto inword_exclude_label;
            break;

        case PHRASE_DELIM:
            parser->state = INPHRASE;
            return QUERYPARSE_START_PHRASE;
            break;

        case MOD_START:
            goto inmod_mod_label;
            break;

        case ASCII_CASE_SPACE:
            /* silently eat whitespace */
            break;

        case '(':
            parser->warn |= QUERYPARSE_WARN_PARENS_BOOLEAN; /*warn,fallthrough*/
        default:
            /* anything else we don't record in the word, but go to inword 
             * anyway (so we know when a string of junk characters occurred) */
            goto inword_label;
            break;
        }
    }
    /* if we end in toplevel we just EOF */
    goto endfile_label;

/* in a normal word, we may or may not have actually pushed any characters onto
 * the return word */
inword_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }
            break;

        case '-':
        default:
            /* break across two punctuation marks in a row */
            goto punc_label;

        case ASCII_CASE_CONTROL:
            /* ignore junk characters */
            break;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

punc_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }
            goto inword_label;

        case ASCII_CASE_PUNCTUATION:
            /* break across two punctuation marks in a row */
            word[*len] = '\0';
            parser->state = TOPLEVEL; /* transition back to toplevel */
            return QUERYPARSE_WORD;

        default:
        case ASCII_CASE_CONTROL:
            /* ignore junk characters */
            break;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

/* in a word that shouldn't be stopped (pretty much the same as normal word) */
inword_nostop_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_NOSTOP;
            }
            break;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        default:
            goto inword_nostop_punc_label;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_NOSTOP;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD_NOSTOP;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

/* in a word that shouldn't be stopped (pretty much the same as normal word) */
inword_nostop_punc_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_NOSTOP;
            }
            goto inword_nostop_label;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        case '-':
        default: /* break across punctuation */
        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_NOSTOP;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD_NOSTOP;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

/* in a word that should be excluded (pretty much the same as normal word) */
inword_exclude_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_EXCLUDE;
            }
            break;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        default: 
            goto inword_exclude_punc_label;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_EXCLUDE;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD_EXCLUDE;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

/* in a word that should be excluded (pretty much the same as normal word) */
inword_exclude_punc_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_EXCLUDE;
            }
            break;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        case '-':
        default: /* break across junk characters */
        case ASCII_CASE_SPACE:
            /* word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD_EXCLUDE;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in a word, we have to return it */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE; /* transition to eof */
        return QUERYPARSE_WORD_EXCLUDE;
    } else {
        /* it was empty, so warn them of that */
        parser->warn |= QUERYPARSE_WARN_NONWORD;
    }
    goto endfile_label;

/* in what may be an OR operator */
inor_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = ASCII_TOLOWER(c);
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }

            if (c != OR_OP[*len - 1]) {
                /* it turned out not to be OR, go back to regular word 
                 * parsing */
                goto inword_label;
            }
            break;

        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }
            /* it turned out not to be OR, go back to regular word parsing */
            goto inword_label;
            break;

        case ASCII_CASE_CONTROL:
            goto inword_label;
        
        case '-':
        default:
            goto punc_label;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (!OR_OP[*len]) {
                /* whitespace ended OR operator */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_OR;
            } else if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in an unfinished OR, return it as word */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inword_label;

/* in what may be an AND operator */
inand_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = ASCII_TOLOWER(c);
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }

            if (c != AND_OP[*len - 1]) {
                /* it turned out not to be AND, go back to regular word 
                 * parsing */
                goto inword_label;
            }
            break;

        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }
            /* it turned out not to be AND, go back to regular word parsing */
            goto inword_label;
            break;

        case ASCII_CASE_CONTROL:
            goto inword_label;

        case '-':
        default:
            goto punc_label;

        case ASCII_CASE_SPACE:
            /* word ended */
            if (!AND_OP[*len]) {
                /* whitespace ended OR operator */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_AND;
            } else if (*len) {
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;
        }
    }
    /* if we end in an unfinished AND, return it as word */
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inword_label;

/* inside a phrase */
inphrase_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INPHRASE; /* transition back to phrase */
                return QUERYPARSE_WORD;
            }

            /* phrase word started */
            goto inphrase_word_label;
            break;

        case PHRASE_DELIM:
            /* phrase ended */
            goto inphrase_end_label;
            break;

        case ASCII_CASE_SPACE:
            /* silently eat whitespace characters */
            break;

        case ASCII_CASE_CONTROL:
        default:
            /* phrase word started */
            goto inphrase_word_label;
            break;
        }
    }
    /* if we end in an unfinished phrase, warn them */
    parser->warn |= QUERYPARSE_WARN_QUOTES_UNMATCHED;
    goto endfile_label;
 
/* in a word, inside a phrase */
inphrase_word_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INPHRASE; /* transition back to phrase */
                return QUERYPARSE_WORD;
            }
            break;

        case PHRASE_DELIM:
            /* phrase ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = INPHRASE_END;
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto inphrase_end_label;
            }
            break;

        case '-':
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INPHRASE; /* transition back to phrase */
                return QUERYPARSE_WORD;
            }
            break;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        default: 
            goto inphrase_word_punc_label;

        case ASCII_CASE_SPACE:
            /* phrase word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = INPHRASE;
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;

        case CASE_END_SENTENCE:
            /* watch out for the end of sentences */
            goto endsentence_label;
            break;
        }
    }
    /* if we end in a word in an unfinished phrase, warn them and return word */
    parser->warn |= QUERYPARSE_WARN_QUOTES_UNMATCHED;
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inphrase_end_label;
 
inphrase_word_punc_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INPHRASE; /* transition back to phrase */
                return QUERYPARSE_WORD;
            }

            /* phrase word started */
            goto inphrase_word_label;
            break;

        case PHRASE_DELIM:
            /* phrase ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = INPHRASE_END;
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto inphrase_end_label;
            }
            break;

        case ASCII_CASE_CONTROL:
            /* ignore */
            break;

        default: /* break across junk characters */
        case ASCII_CASE_SPACE:
            /* phrase word ended */
            if (*len) {
                word[*len] = '\0';
                parser->state = INPHRASE;
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto toplevel_label;
            }
            break;

        case CASE_END_SENTENCE:
            /* watch out for the end of sentences */
            goto endsentence_label;
            break;
        }
    }
    /* if we end in a word in an unfinished phrase, warn them and return word */
    parser->warn |= QUERYPARSE_WARN_QUOTES_UNMATCHED;
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inphrase_end_label;

/* need to return phrase end and then continue */
inphrase_end_label:
    parser->state = TOPLEVEL;
    return QUERYPARSE_END_PHRASE;

/* need to return sentence end and then continue */
endsentence_end_label:
    parser->state = INPHRASE;
    return QUERYPARSE_END_SENTENCE;

/* in what might be the end of a sentence */
endsentence_label:
    while (parser->buf < parser->end) {
        /* don't consume this character (yet) */
        c = *parser->buf;
        switch (c) {
        case ASCII_CASE_SPACE:
            /* it was the end of a sentence */
            parser->buf++;        /* consume the byte */
            parser->bytes++;
            assert(*len);
            word[*len] = '\0';
            parser->state = ENDSENTENCE_END;
            return QUERYPARSE_WORD;

        default:
            /* back to a normal phrase word */
            goto inphrase_word_label;
            break;
        }
    }
    /* finishing in endsentence means we need to return the phrase word */
    if (*len) {
        word[*len] = '\0';
        parser->state = INPHRASE;
        return QUERYPARSE_WORD;
    } else goto inphrase_word_label;

/* parsing the modifier of a modifier */
inmod_mod_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case '-':
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = TOPLEVEL; /* transition back to toplevel */
                return QUERYPARSE_WORD;
            }
            break;

        case MOD_STRING_END:
            /* end of the modifier in the mod */
            word[*len] = '\0';
            parser->state = INMOD;        /* transition to inmod */
            return QUERYPARSE_START_MODIFIER;
            break;

        case MOD_START:
            parser->warn |= QUERYPARSE_WARN_PARENS_NESTED; /* warn,fallthrough*/
        default:
        case ASCII_CASE_SPACE:
            /* it wasn't a mod after all (return word) */
            word[*len] = '\0';
            parser->state = TOPLEVEL;     /* transition back to toplevel */
            return QUERYPARSE_WORD;
            break;
        }
    }
    /* if we end in a mod, return as word */
    parser->warn |= QUERYPARSE_WARN_PARENS_UNMATCHED;
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inmod_end_label;

/* in the parameters of a modifier */
inmod_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
        case ASCII_CASE_EXTENDED:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INMOD; /* transition to this state */
                return QUERYPARSE_WORD;
            }

            /* start parsing word */
            goto inmod_word_label;
            break;

        case MOD_END:
            goto inmod_end_label;
            break;

        case ASCII_CASE_SPACE:
            /* silently eat whitespace */
            break;

        case MOD_START:
            parser->warn |= QUERYPARSE_WARN_PARENS_NESTED; /* warn,fallthrough*/
        default:
            /* anything else starts a word, but isn't recorded */
            goto inmod_word_label;
            break;
        }
    }
    /* if we end in a mod, return as word */
    parser->warn |= QUERYPARSE_WARN_PARENS_UNMATCHED;
    goto endfile_label;

/* in a word, in a modifier */
inmod_word_label:
    while (parser->buf < parser->end) {
        c = *parser->buf++;
        parser->bytes++;
        switch (c) {
        case ASCII_CASE_UPPER:
            c = ASCII_TOLOWER(c);
        case '-':
        case ASCII_CASE_LOWER:
        case ASCII_CASE_DIGIT:
            /* push character onto word */
            if (*len < parser->maxwordlen) {
                word[(*len)++] = c;
            } else {
                /* word finished due to length constraint */
                word[*len] = '\0';
                parser->state = INMOD; /* transition back to inmod */
                return QUERYPARSE_WORD;
            }
            break;

        case MOD_END:
            if (*len) {
                /* return recorded word */
                word[*len] = '\0';
                parser->state = INMOD_END; 
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto inmod_end_label;
            }
            break;

        case '\'':
            /* don't break across apostrophe */
            break;

        case MOD_START:
            parser->warn |= QUERYPARSE_WARN_PARENS_NESTED; /* warn,fallthrough*/
        default: /* break across other crap */
        case ASCII_CASE_SPACE:
            if (*len) {
                /* return recorded word */
                word[*len] = '\0';
                parser->state = INMOD; 
                return QUERYPARSE_WORD;
            } else {
                /* it was empty, so warn them of that */
                parser->warn |= QUERYPARSE_WARN_NONWORD;
                goto inmod_label;
            }
            break;
        }
    }
    /* if we end in a mod, return as word */
    parser->warn |= QUERYPARSE_WARN_PARENS_UNMATCHED;
    if (*len) {
        word[*len] = '\0';
        parser->state = ENDFILE;
        return QUERYPARSE_WORD;
    } else goto inmod_end_label;

/* modifier has ended */
inmod_end_label:
    parser->state = TOPLEVEL;
    return QUERYPARSE_END_MODIFIER;

endfile_label:
    parser->state = ENDFILE;
    return QUERYPARSE_EOF;

err_label:
    if (!parser->err) {
        parser->err = EINVAL;
    }
    return ERR;
}