/** * Make sure the string s is a valid connector. * Return 1 if the connector is valid, else return 0, * and print an appropriate warning message. */ static int check_connector(Dictionary dict, const char * s) { int i; i = strlen(s); if (i < 1) { dict_error(dict, "Expecting a connector."); return 0; } i = s[i-1]; /* the last character of the token */ if ((i!='+') && (i!='-')) { dict_error(dict, "A connector must end in a \"+\" or \"-\"."); return 0; } if (*s == '@') s++; if (!isupper((int)*s)) { dict_error(dict, "The first letter of a connector must be in [A--Z]."); return 0; } if ((*s == 'I') && (*(s+1) == 'D')) { dict_error(dict, "Connectors beginning with \"ID\" are forbidden"); return 0; } while (*(s+1)) { if ((!isalnum((int)*s)) && (*s != '*') && (*s != '^')) { dict_error(dict, "All letters of a connector must be ASCII alpha-numeric."); return 0; } s++; } return 1; }
int check_connector(Dictionary dict, wchar_t * s) { /* makes sure the string s is a valid connector */ int i; i = wcslen(s); if (i < 1) { dict_error(dict, L"Expecting a connector."); return 0; } i = s[i-1]; /* the last character of the token */ if ((i!=L'+') && (i!=L'-')) { dict_error(dict, L"A connector must end in a \"+\" or \"-\"."); return 0; } if (*s == L'@') s++; if (!iswupper((wint_t)*s)) { dict_error(dict, L"The first letter of a connector must be in [A--Z]."); return 0; } if ((*s==L'I') && (*(s+1)==L'D')) { dict_error(dict, L"Connectors beginning with \"ID\" are forbidden"); return 0; } while (*(s+1)) { if ((!isalnum((int)*s)) && (*s != L'*') && (*s != L'^')) { dict_error(dict, L"All letters of a connector must be alpha-numeric."); return 0; } s++; } return 1; }
Exp * expression(Dictionary dict) { /* Build (and return the root of) the tree for the expression beginning */ /* with the current token. At the end, the token is the first one not part */ /* of this expression. */ Exp * n; if (is_equal(dict, L'(')) { if (!advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, L')')) { dict_error(dict, L"Expecting a \")\"."); return NULL; } if (!advance(dict)) { return NULL; } } else if (is_equal(dict, L'{')) { if (!advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, L'}')) { dict_error(dict, L"Expecting a \"}\"."); return NULL; } if (!advance(dict)) { return NULL; } n = make_optional_node(dict, n); } else if (is_equal(dict, L'[')) { if (!advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, L']')) { dict_error(dict, L"Expecting a \"]\"."); return NULL; } if (!advance(dict)) { return NULL; } n->cost += 1; } else if (!dict->is_special) { n = connector(dict); if (n == NULL) { return NULL; } } else if (is_equal(dict, L')') || is_equal(dict, L']')) { /* allows "()" or "[]" */ n = make_zeroary_node(dict); } else { dict_error(dict, L"Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } return n; }
/** * Build (and return the root of) the tree for the expression beginning * with the current token. At the end, the token is the first one not * part of this expression. */ Exp * expression(Dictionary dict) { Exp * n; if (is_equal(dict, '(')) { if (!link_advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, ')')) { dict_error(dict, "Expecting a \")\"."); return NULL; } if (!link_advance(dict)) { return NULL; } } else if (is_equal(dict, '{')) { if (!link_advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, '}')) { dict_error(dict, "Expecting a \"}\"."); return NULL; } if (!link_advance(dict)) { return NULL; } n = make_optional_node(dict, n); } else if (is_equal(dict, '[')) { if (!link_advance(dict)) { return NULL; } n = in_parens(dict); if (!is_equal(dict, ']')) { dict_error(dict, "Expecting a \"]\"."); return NULL; } if (!link_advance(dict)) { return NULL; } n->cost += 1.0f; } else if (!dict->is_special) { n = connector(dict); if (n == NULL) { return NULL; } } else if (is_equal(dict, ')') || is_equal(dict, ']')) { /* allows "()" or "[]" */ n = make_zeroary_node(dict); } else { dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } return n; }
Exp * operator_exp(Dictionary dict, int type) { /* We're looking at the first of the stuff after an "and" or "or". */ /* Build a Exp node for this expression. Set the cost and optional */ /* fields to the default values. Set the type field according to type */ Exp * n; E_list first; E_list * elist; n = Exp_create(dict); n->type = type; n->cost = 0; elist = &first; while((!is_equal(dict, L')')) && (!is_equal(dict, L']')) && (!is_equal(dict, L'}'))) { elist->next = (E_list *) xalloc(sizeof(E_list)); elist = elist->next; elist->next = NULL; elist->e = expression(dict); if (elist->e == NULL) { return NULL; } } if (elist == &first) { dict_error(dict, L"An \"or\" or \"and\" of nothing"); return NULL; } n->u.l = first.next; return n; }
/** * We're looking at the first of the stuff after an "and" or "or". * Build a Exp node for this expression. Set the cost and optional * fields to the default values. Set the type field according to type */ Exp * operator_exp(Dictionary dict, int type) { Exp * n; E_list first; E_list * elist; n = Exp_create(dict); n->type = type; n->cost = 0.0f; elist = &first; while((!is_equal(dict, ')')) && (!is_equal(dict, ']')) && (!is_equal(dict, '}'))) { elist->next = (E_list *) xalloc(sizeof(E_list)); elist = elist->next; elist->next = NULL; elist->e = expression(dict); if (elist->e == NULL) { return NULL; } } if (elist == &first) { dict_error(dict, "An \"or\" or \"and\" of nothing"); return NULL; } n->u.l = first.next; return n; }
/** * connector() -- make a node for a connector or dictionary word. * * Assumes the current token is a connector or dictionary word. */ static Exp * connector(Dictionary dict) { Exp * n; Dict_node *dn, *dn_head; int i; i = strlen(dict->token) - 1; /* this must be + or - if a connector */ if ((dict->token[i] != '+') && (dict->token[i] != '-')) { /* If we are here, token is a word */ dn_head = abridged_lookup_list(dict, dict->token); dn = dn_head; while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0)) { dn = dn->right; } if (dn == NULL) { free_lookup_list(dn_head); dict_error(dict, "\nPerhaps missing + or - in a connector.\n" "Or perhaps you forgot the suffix on a word.\n" "Or perhaps a word is used before it is defined.\n"); return NULL; } n = make_unary_node(dict, dn->exp); free_lookup_list(dn_head); } else { /* If we are here, token is a connector */ if (!check_connector(dict, dict->token)) { return NULL; } n = Exp_create(dict); n->dir = dict->token[i]; dict->token[i] = '\0'; /* get rid of the + or - */ if (dict->token[0] == '@') { n->u.string = string_set_add(dict->token+1, dict->string_set); n->multi = TRUE; } else { n->u.string = string_set_add(dict->token, dict->string_set); n->multi = FALSE; } n->type = CONNECTOR_type; n->cost = 0.0f; } if (!link_advance(dict)) { exp_free(n); return NULL; } return n; }
Exp * connector(Dictionary dict) { /* the current token is a connector (or a dictionary word) */ /* make a node for it */ Exp * n; Dict_node * dn; int i; i = wcslen(dict->token)-1; /* this must be + or - if a connector */ if ((dict->token[i] != L'+') && (dict->token[i] != L'-')) { dn = abridged_lookup(dict, dict->token); while((dn != NULL) && (wcscmp(dn->string, dict->token) != 0)) { dn = dn->right; } if (dn == NULL) { dict_error(dict, L"\nPerhaps missing + or - in a connector.\n" L"Or perhaps you forgot the suffix on a word.\n" L"Or perhaps a word is used before it is defined.\n"); return NULL; } n = make_unary_node(dict, dn->exp); } else { if (!check_connector(dict, dict->token)) { return NULL; } n = Exp_create(dict); n->dir = dict->token[i]; dict->token[i] = L'\0'; /* get rid of the + or - */ if (dict->token[0] == L'@') { n->u.string = string_set_add(dict->token+1, dict->string_set); n->multi = TRUE; } else { n->u.string = string_set_add(dict->token, dict->string_set); n->multi = FALSE; } n->type = CONNECTOR_type; n->cost = 0; } if (!advance(dict)) { return NULL; } return n; }
/** * Insert the new node into the dictionary below node n. * Give error message if the new element's string is already there. * Assumes that the "n" field of new is already set, and the left * and right fields of it are NULL. * * The resulting tree is highly unbalanced. It needs to be rebalanced * before used. */ Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode) { int comp; if (NULL == n) return newnode; comp = dict_order(newnode->string, n->string); if (comp < 0) { if (NULL == n->left) { n->left = newnode; return n; } n->left = insert_dict(dict, n->left, newnode); return n; /* return rebalance(n); Uncomment to get an AVL tree */ } else if (comp > 0) { if (NULL == n->right) { n->right = newnode; return n; } n->right = insert_dict(dict, n->right, newnode); return n; /* return rebalance(n); Uncomment to get an AVL tree */ } else { char t[256]; snprintf(t, 256, "The word \"%s\" has been multiply defined\n", newnode->string); dict_error(dict, t); return NULL; } }
Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok) { Exp * nl=NULL, * nr, * n; E_list *ell, *elr; if (is_equal(dict, L'(')) { if (!advance(dict)) { return NULL; } nl = expression(dict); if (nl == NULL) { return NULL; } if (!is_equal(dict, L')')) { dict_error(dict, L"Expecting a \")\"."); return NULL; } if (!advance(dict)) { return NULL; } } else if (is_equal(dict, L'{')) { if (!advance(dict)) { return NULL; } nl = expression(dict); if (nl == NULL) { return NULL; } if (!is_equal(dict, L'}')) { dict_error(dict, L"Expecting a \"}\"."); return NULL; } if (!advance(dict)) { return NULL; } nl = make_optional_node(dict, nl); } else if (is_equal(dict, L'[')) { if (!advance(dict)) { return NULL; } nl = expression(dict); if (nl == NULL) { return NULL; } if (!is_equal(dict, L']')) { dict_error(dict, L"Expecting a \"]\"."); return NULL; } if (!advance(dict)) { return NULL; } nl->cost += 1; } else if (!dict->is_special) { nl = connector(dict); if (nl == NULL) { return NULL; } } else if (is_equal(dict, L')') || is_equal(dict, L']')) { /* allows "()" or "[]" */ nl = make_zeroary_node(dict); } else { dict_error(dict, L"Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } if (is_equal(dict, L'&') || (wcscmp(dict->token, L"and")==0)) { if (!and_ok) { warning(dict, L"\"and\" and \"or\" at the same level in an expression"); } if (!advance(dict)) { return NULL; } nr = restricted_expression(dict, TRUE,FALSE); if (nr == NULL) { return NULL; } n = Exp_create(dict); n->u.l = ell = (E_list *) xalloc(sizeof(E_list)); ell->next = elr = (E_list *) xalloc(sizeof(E_list)); elr->next = NULL; ell->e = nl; elr->e = nr; n->type = AND_type; n->cost = 0; } else if (is_equal(dict, L'|') || (wcscmp(dict->token, L"or")==0)) { if (!or_ok) { warning(dict, L"\"and\" and \"or\" at the same level in an expression"); } if (!advance(dict)) { return NULL; } nr = restricted_expression(dict, FALSE,TRUE); if (nr == NULL) { return NULL; } n = Exp_create(dict); n->u.l = ell = (E_list *) xalloc(sizeof(E_list)); ell->next = elr = (E_list *) xalloc(sizeof(E_list)); elr->next = NULL; ell->e = nl; elr->e = nr; n->type = OR_type; n->cost = 0; } else return nl; return n; }
int advance(Dictionary dict) { /* this reads the next token from the input into token */ wint_t c; int i, quote_mode; dict->is_special = FALSE; if (dict->already_got_it != L'\0') { dict->is_special = (wcschr(SPECIAL, dict->already_got_it) != NULL); if (dict->already_got_it == WEOF) { dict->token[0] = L'\0'; } else { dict->token[0] = dict->already_got_it; dict->token[1] = L'\0'; } dict->already_got_it = L'\0'; return 1; } do c=get_character(dict, FALSE); while (iswspace(c)); quote_mode = FALSE; i = 0; for (;;) { if (i > MAX_TOKEN_LENGTH-1) { dict_error(dict, L"Token too long"); return 0; } if (quote_mode) { if (c == L'\"') { quote_mode = FALSE; dict->token[i] = L'\0'; return 1; } if (iswspace(c)) { dict_error(dict, L"White space inside of token"); return 0; } dict->token[i] = c; i++; } else { if (wcschr(SPECIAL, c) != NULL) { if (i==0) { dict->token[0] = c; dict->token[1] = L'\0'; dict->is_special = TRUE; return 1; } dict->token[i] = L'\0'; dict->already_got_it = c; return 1; } if (c==WEOF) { if (i==0) { dict->token[0] = L'\0'; return 1; } dict->token[i] = L'\0'; dict->already_got_it = c; return 1; } if (iswspace(c)) { dict->token[i] = L'\0'; return 1; } if (c == L'\"') { quote_mode = TRUE; } else { dict->token[i] = c; i++; } } c = get_character(dict, quote_mode); } return 1; }
/** * This reads the next token from the input into token. * Return 1 if a character was read, else return 0 (and print a warning). */ static int link_advance(Dictionary dict) { wchar_t c; int nr, i; int quote_mode; dict->is_special = FALSE; if (dict->already_got_it != '\0') { dict->is_special = is_special(dict->already_got_it, &dict->mbss); if (dict->already_got_it == WEOF) { dict->token[0] = '\0'; } else { dict->token[0] = dict->already_got_it; /* specials are one byte */ dict->token[1] = '\0'; } dict->already_got_it = '\0'; return 1; } do { c = get_character(dict, FALSE); } while (iswspace(c)); quote_mode = FALSE; i = 0; for (;;) { if (i > MAX_TOKEN_LENGTH-3) { /* 3 for multi-byte tokens */ dict_error(dict, "Token too long"); return 0; } if (quote_mode) { if (c == '\"') { quote_mode = FALSE; dict->token[i] = '\0'; return 1; } if (iswspace(c)) { dict_error(dict, "White space inside of token"); return 0; } /* Although we read wide chars, we store UTF8 internally, always. */ nr = wcrtomb(&dict->token[i], c, &dict->mbss); if (nr < 0) { #ifndef _WIN32 dict_error2(dict, "Unable to read UTF8 string in current locale", nl_langinfo(CODESET)); fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n"); #else dict_error(dict, "Unable to read UTF8 string in current locale"); #endif return 0; } i += nr; } else { if (is_special(c, &dict->mbss)) { if (i == 0) { dict->token[0] = c; /* special toks are one char always */ dict->token[1] = '\0'; dict->is_special = TRUE; return 1; } dict->token[i] = '\0'; dict->already_got_it = c; return 1; } if (c == 0x0) { if (i == 0) { dict->token[0] = '\0'; return 1; } dict->token[i] = '\0'; dict->already_got_it = c; return 1; } if (iswspace(c)) { dict->token[i] = '\0'; return 1; } if (c == '\"') { quote_mode = TRUE; } else { /* store UTF8 internally, always. */ nr = wctomb_check(&dict->token[i], c, &dict->mbss); if (nr < 0) { #ifndef _WIN32 dict_error2(dict, "Unable to read UTF8 string in current locale", nl_langinfo(CODESET)); fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n"); #else dict_error(dict, "Unable to read UTF8 string in current locale"); #endif return 0; } i += nr; } } c = get_character(dict, quote_mode); } return 1; }
/** * read_entry() -- read one dictionary entry * Starting with the current token, parse one dictionary entry. * A single dictionary entry must have one and only one colon in it, * and is terminated by a semi-colon. * Add these words to the dictionary. */ static int read_entry(Dictionary dict) { Exp *n; int i; Dict_node *dn_new, *dnx, *dn = NULL; /* Reset multi-byte shift state every line. */ memset(&dict->mbss, 0, sizeof(dict->mbss)); while (!is_equal(dict, ':')) { if (dict->is_special) { dict_error(dict, "I expected a word but didn\'t get it."); return 0; } /* If it's a word-file name */ /* However, be careful to reject "/.v" which is the division symbol * used in equations (.v means verb-like) */ if ((dict->token[0] == '/') && (dict->token[1] != '.')) { dn = read_word_file(dict, dn, dict->token); if (dn == NULL) { err_ctxt ec; ec.sent = NULL; err_msg(&ec, Error, "Error opening word file %s\n", dict->token); return 0; } } else { dn_new = dict_node_new(); dn_new->left = dn; dn = dn_new; dn->file = NULL; dn->string = string_set_add(dict->token, dict->string_set); } /* Advance to next entry, unless error */ if (0 == link_advance(dict)) goto syntax_error; } /* pass the : */ if (!link_advance(dict)) { goto syntax_error; } n = expression(dict); if (n == NULL) { goto syntax_error; } if (!is_equal(dict, ';')) { dict_error(dict, "Expecting \";\" at the end of an entry."); goto syntax_error; } /* pass the ; */ if (!link_advance(dict)) { goto syntax_error; } /* At this point, dn points to a list of Dict_nodes connected by * their left pointers. These are to be inserted into the dictionary */ i = 0; for (dnx = dn; dnx != NULL; dnx = dnx->left) { dnx->exp = n; i++; } insert_list(dict, dn, i); return 1; syntax_error: free_lookup_list(dn); return 0; }