int cmd_variable_labels (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); do { struct variable **v; size_t nv; size_t i; if (!parse_variables (lexer, dict, &v, &nv, PV_NONE)) return CMD_FAILURE; if (!lex_force_string (lexer)) { free (v); return CMD_FAILURE; } for (i = 0; i < nv; i++) var_set_label (v[i], lex_tokcstr (lexer)); lex_get (lexer); while (lex_token (lexer) == T_SLASH) lex_get (lexer); free (v); } while (lex_token (lexer) != T_ENDCMD); return CMD_SUCCESS; }
/* Parse all the labels for the VAR_CNT variables in VARS and add the specified labels to those variables. */ static int get_label (struct lexer *lexer, struct variable **vars, size_t var_cnt, const char *dict_encoding) { /* Parse all the labels and add them to the variables. */ do { enum { MAX_LABEL_LEN = 255 }; int width = var_get_width (vars[0]); union value value; struct string label; size_t trunc_len; size_t i; /* Set value. */ value_init (&value, width); if (!parse_value (lexer, &value, vars[0])) { value_destroy (&value, width); return 0; } lex_match (lexer, T_COMMA); /* Set label. */ if (lex_token (lexer) != T_ID && !lex_force_string (lexer)) { value_destroy (&value, width); return 0; } ds_init_substring (&label, lex_tokss (lexer)); trunc_len = utf8_encoding_trunc_len (ds_cstr (&label), dict_encoding, MAX_LABEL_LEN); if (ds_length (&label) > trunc_len) { msg (SW, _("Truncating value label to %d bytes."), MAX_LABEL_LEN); ds_truncate (&label, trunc_len); } for (i = 0; i < var_cnt; i++) var_replace_value_label (vars[i], &value, ds_cstr (&label)); ds_destroy (&label); value_destroy (&value, width); lex_get (lexer); lex_match (lexer, T_COMMA); } while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD); return 1; }
/*------------------------------------------------------------------------- * Function: parse_dot * * Purpose: Tries to parse an expression followed by the dot operator * followed by another expression. If the dot is not present * then just the left operand is returned. * * Return: Success: The expression. * * Failure: &ErrorCell * * Programmer: Robb Matzke * [email protected] * Dec 5 1996 * * Modifications: * *------------------------------------------------------------------------- */ static obj_t parse_dot (lex_t *f, int skipnl) { obj_t rt=NIL, retval=NIL; retval = parse_range (f, skipnl); if (&ErrorCell==retval) return &ErrorCell; while (TOK_DOT==lex_token(f, NULL, skipnl)) { lex_consume (f); rt = parse_range (f, skipnl); if (&ErrorCell==rt) { obj_dest (retval); return &ErrorCell; } retval = obj_new (C_CONS, obj_new (C_SYM, "Dot"), obj_new (C_CONS, retval, obj_new (C_CONS, rt, NIL))); } return retval; }
/*------------------------------------------------------------------------- * Function: parse_assignment * * Purpose: Parses an assignment statement of the form * * LVALUE = RVALUE * * Return: Success: the resulting parse tree. * * Failure: &ErrorCell * * Programmer: Robb Matzke * [email protected] * Feb 7 1997 * * Modifications: * *------------------------------------------------------------------------- */ static obj_t parse_assignment (lex_t *f, int skipnl) { obj_t rt=NIL, retval=NIL; retval = parse_selection (f, skipnl); if (&ErrorCell==retval) return &ErrorCell; if (TOK_EQ==lex_token (f, NULL, skipnl)) { lex_consume (f); rt = parse_selection (f, skipnl); if (&ErrorCell==rt) { obj_dest(retval); return &ErrorCell; } retval = obj_new (C_CONS, obj_new (C_SYM, "Assign"), obj_new (C_CONS, retval, obj_new (C_CONS, rt, NIL))); } return retval; }
/*------------------------------------------------------------------------- * Function: lex_consume * * Purpose: Consumes the current token. * * Return: Success: Token that was consumed. * * Failure: EOF * * Programmer: Robb Matzke * [email protected] * Dec 4 1996 * * Modifications: * *------------------------------------------------------------------------- */ int lex_consume(lex_t *f) { int retval; retval = lex_token (f, NULL, false); f->tok = 0; return retval; }
/*------------------------------------------------------------------------- * Function: parse_range * * Purpose: Tries to parse a range expression of the form `I1:I2' * where I1 and I2 are integer constants. * * Return: Success: A range object. * * Failure: &ErrorCell * * Programmer: Robb Matzke * [email protected] * Jan 3 1997 * * Modifications: * *------------------------------------------------------------------------- */ static obj_t parse_range (lex_t *f, int skipnl) { obj_t lt=NIL, rt=NIL, retval=NIL; int lo, hi; lt = parse_term (f, skipnl); if (&ErrorCell==lt) return &ErrorCell; if (TOK_COLON==lex_token (f, NULL, skipnl)) { lex_consume (f); rt = parse_term (f, skipnl); if (&ErrorCell==rt) { obj_dest (rt); return &ErrorCell; } /* * Both arguments must be integer constants. */ if (!num_isint(lt)) { out_error ("Range: left limit is not an integer constant: ", lt); obj_dest (lt); obj_dest (rt); return &ErrorCell; } if (!num_isint(rt)) { out_error ("Range: right limit is not an integer constant: ", rt); obj_dest (lt); obj_dest (rt); return &ErrorCell; } /* * The constants must be in a reasonable order. */ lo = num_int (lt); hi = num_int (rt); if (hi<lo) { out_errorn ("Range: inverted range %d:%d changed to %d:%d", lo, hi, hi, lo); lo = num_int (rt); hi = num_int (lt); } /* * Create the range object. */ lt = obj_dest (lt); rt = obj_dest (rt); retval = obj_new (C_RANGE, lo, hi); } else { retval = lt; } return retval; }
static int do_value_labels (struct lexer *lexer, const struct dictionary *dict, bool erase) { struct variable **vars; /* Variable list. */ size_t var_cnt; /* Number of variables. */ int parse_err=0; /* true if error parsing variables */ lex_match (lexer, T_SLASH); while (lex_token (lexer) != T_ENDCMD) { parse_err = !parse_variables (lexer, dict, &vars, &var_cnt, PV_SAME_WIDTH); if (var_cnt < 1) { free(vars); return CMD_FAILURE; } if (erase) erase_labels (vars, var_cnt); while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) if (!get_label (lexer, vars, var_cnt, dict_get_encoding (dict))) goto lossage; if (lex_token (lexer) != T_SLASH) { free (vars); break; } lex_get (lexer); free (vars); } return parse_err ? CMD_FAILURE : CMD_SUCCESS; lossage: free (vars); return CMD_FAILURE; }
static t_lexer_result lex_from_str(const char *string) { t_token_list *tokens; t_result res; tokens = NULL; while (*string) { skip_whitespaces(&string); if (!*string) break ; res = lex_token(&string); if (res.error) return ((t_lexer_result){.tokens = NULL, .error = res.error}); token_list_add(&tokens, res.token); }
static int parse_token_s(const char* field, const char* delim, char** args, const char** val, int allow_empty) { // TODO: we could add support for escaped characters, if necessary assert(val); *val = lex_token(field, delim, args); if (!*val) { return -1; } if (!allow_empty && !(*val)[0]) { cb.log_err("KO: empty token %s\r\n", field); return -1; } return 0; }
static int parse_token_l(const char* field, const char* delim, char** args, long* val) { const char* tok; assert(val); tok = lex_token(field, delim, args); if (!tok) { return -1; } errno = 0; *val = strtol(tok, NULL, 0); if (errno) { cb.log_err("KO: invalid value '%s' for token %s, error %d(%s)\r\n", tok, field, errno, strerror(errno)); return -1; } return 0; }
/* Reads a record from the inline file into R. Returns true if successful, false on failure. */ static bool read_inline_record (struct dfm_reader *r) { if ((r->flags & DFM_SAW_BEGIN_DATA) == 0) { r->flags |= DFM_SAW_BEGIN_DATA; r->flags &= ~DFM_CONSUME; while (lex_token (r->lexer) == T_ENDCMD) lex_get (r->lexer); if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA")) return false; lex_match (r->lexer, T_ENDCMD); } if (r->flags & DFM_CONSUME) lex_get (r->lexer); if (!lex_is_string (r->lexer)) { if (!lex_match_id (r->lexer, "END") || !lex_match_id (r->lexer, "DATA")) { msg (SE, _("Missing END DATA while reading inline data. " "This probably indicates a missing or incorrectly " "formatted END DATA command. END DATA must appear " "by itself on a single line with exactly one space " "between words.")); lex_discard_rest_of_command (r->lexer); } return false; } ds_assign_substring (&r->line, lex_tokss (r->lexer)); r->flags |= DFM_CONSUME; return true; }
t_token_list *lexer(t_program_file *file, t_bool *lexer_result) { t_token_list *list; t_result result; t_file_reader file_reader; list = NULL; if (file == NULL) return (NULL); file_reader = generate_file_reader(file); while (string_reader_has_more(&file_reader)) { result = lex_token(&file_reader); if (result.type == RESULT_ERROR) return (print_syntax_error(&file_reader, &result.syntax_error, list)); else if (result.type == RESULT_NULL) return (print_unexpected_char_error(&file_reader, list)); else if (result.type == RESULT_TOKEN && add_token_to_list(&list, result.token)) return (print_error_n("Error while adding token to list.\n")); } *lexer_result = true; return (list); }
/*------------------------------------------------------------------------- * Function: parse_subscripts * * Purpose: Parses a subscripted expression. The subscript is * enclosed in `[' and `]' after the main expression. * * Return: Success: Ptr to the expression. * * Failure: &ErrorCell * * Programmer: Robb Matzke * [email protected] * Jan 3 1997 * * Modifications: * * Robb Matzke, 4 Feb 1997 * The contents of the `[]' can now be a comma-separated list * of expressions. * *------------------------------------------------------------------------- */ static obj_t parse_selection (lex_t *f, int skipnl) { obj_t retval=NIL; /*first argument, left of `[' */ obj_t tmp=NIL; /*a subscript argument */ obj_t operands=NIL; /*operand list */ int septok; /*separator token */ retval = parse_dot (f, skipnl); if (&ErrorCell==retval) return &ErrorCell; /* * Zero or more array selectors. */ while ('['==lex_token (f, NULL, skipnl)) { lex_consume (f); operands = obj_new (C_CONS, retval, NIL); retval = NIL; /* * One or more comma-separated expressions per selection. */ for (;;) { tmp = parse_expr (f, skipnl); if (&ErrorCell==tmp) { obj_dest (retval); return &ErrorCell; } operands = obj_new (C_CONS, tmp, operands); /*push*/ septok = lex_token (f, NULL, skipnl); if (','==septok) { lex_consume (f); } else if (']'==septok) { lex_consume (f); break; } else { out_errorn ("expected ']'"); obj_dest (operands); return &ErrorCell; } } /* * Put the operands in the correct order. */ tmp = F_reverse (operands); obj_dest (operands); operands = tmp; tmp = NIL; /* * Add the function name `Dot' to the beginning of the * list. */ retval = obj_new (C_CONS, obj_new (C_SYM, "Dot"), operands); } return retval; }
/* Parse all the aggregate functions. */ static bool parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, struct agr_proc *agr) { struct agr_var *tail; /* Tail of linked list starting at agr->vars. */ /* Parse everything. */ tail = NULL; for (;;) { char **dest; char **dest_label; size_t n_dest; struct string function_name; enum mv_class exclude; const struct agr_func *function; int func_index; union agr_argument arg[2]; const struct variable **src; size_t n_src; size_t i; dest = NULL; dest_label = NULL; n_dest = 0; src = NULL; function = NULL; n_src = 0; arg[0].c = NULL; arg[1].c = NULL; ds_init_empty (&function_name); /* Parse the list of target variables. */ while (!lex_match (lexer, T_EQUALS)) { size_t n_dest_prev = n_dest; if (!parse_DATA_LIST_vars (lexer, dict, &dest, &n_dest, (PV_APPEND | PV_SINGLE | PV_NO_SCRATCH | PV_NO_DUPLICATE))) goto error; /* Assign empty labels. */ { int j; dest_label = xnrealloc (dest_label, n_dest, sizeof *dest_label); for (j = n_dest_prev; j < n_dest; j++) dest_label[j] = NULL; } if (lex_is_string (lexer)) { dest_label[n_dest - 1] = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } } /* Get the name of the aggregation function. */ if (lex_token (lexer) != T_ID) { lex_error (lexer, _("expecting aggregation function")); goto error; } ds_assign_substring (&function_name, lex_tokss (lexer)); exclude = ds_chomp_byte (&function_name, '.') ? MV_SYSTEM : MV_ANY; for (function = agr_func_tab; function->name; function++) if (!c_strcasecmp (function->name, ds_cstr (&function_name))) break; if (NULL == function->name) { msg (SE, _("Unknown aggregation function %s."), ds_cstr (&function_name)); goto error; } ds_destroy (&function_name); func_index = function - agr_func_tab; lex_get (lexer); /* Check for leading lparen. */ if (!lex_match (lexer, T_LPAREN)) { if (function->src_vars == AGR_SV_YES) { lex_force_match (lexer, T_LPAREN); goto error; } } else { /* Parse list of source variables. */ { int pv_opts = PV_NO_SCRATCH; if (func_index == SUM || func_index == MEAN || func_index == SD) pv_opts |= PV_NUMERIC; else if (function->n_args) pv_opts |= PV_SAME_TYPE; if (!parse_variables_const (lexer, dict, &src, &n_src, pv_opts)) goto error; } /* Parse function arguments, for those functions that require arguments. */ if (function->n_args != 0) for (i = 0; i < function->n_args; i++) { int type; lex_match (lexer, T_COMMA); if (lex_is_string (lexer)) { arg[i].c = recode_string (dict_get_encoding (agr->dict), "UTF-8", lex_tokcstr (lexer), -1); type = VAL_STRING; } else if (lex_is_number (lexer)) { arg[i].f = lex_tokval (lexer); type = VAL_NUMERIC; } else { msg (SE, _("Missing argument %zu to %s."), i + 1, function->name); goto error; } lex_get (lexer); if (type != var_get_type (src[0])) { msg (SE, _("Arguments to %s must be of same type as " "source variables."), function->name); goto error; } } /* Trailing rparen. */ if (!lex_force_match (lexer, T_RPAREN)) goto error; /* Now check that the number of source variables match the number of target variables. If we check earlier than this, the user can get very misleading error message, i.e. `AGGREGATE x=SUM(y t).' will get this error message when a proper message would be more like `unknown variable t'. */ if (n_src != n_dest) { msg (SE, _("Number of source variables (%zu) does not match " "number of target variables (%zu)."), n_src, n_dest); goto error; } if ((func_index == PIN || func_index == POUT || func_index == FIN || func_index == FOUT) && (var_is_numeric (src[0]) ? arg[0].f > arg[1].f : str_compare_rpad (arg[0].c, arg[1].c) > 0)) { union agr_argument t = arg[0]; arg[0] = arg[1]; arg[1] = t; msg (SW, _("The value arguments passed to the %s function " "are out-of-order. They will be treated as if " "they had been specified in the correct order."), function->name); } } /* Finally add these to the linked list of aggregation variables. */ for (i = 0; i < n_dest; i++) { struct agr_var *v = xzalloc (sizeof *v); /* Add variable to chain. */ if (agr->agr_vars != NULL) tail->next = v; else agr->agr_vars = v; tail = v; tail->next = NULL; v->moments = NULL; /* Create the target variable in the aggregate dictionary. */ { struct variable *destvar; v->function = func_index; if (src) { v->src = src[i]; if (var_is_alpha (src[i])) { v->function |= FSTRING; v->string = xmalloc (var_get_width (src[i])); } if (function->alpha_type == VAL_STRING) destvar = dict_clone_var_as (agr->dict, v->src, dest[i]); else { assert (var_is_numeric (v->src) || function->alpha_type == VAL_NUMERIC); destvar = dict_create_var (agr->dict, dest[i], 0); if (destvar != NULL) { struct fmt_spec f; if ((func_index == N || func_index == NMISS) && dict_get_weight (dict) != NULL) f = fmt_for_output (FMT_F, 8, 2); else f = function->format; var_set_both_formats (destvar, &f); } } } else { struct fmt_spec f; v->src = NULL; destvar = dict_create_var (agr->dict, dest[i], 0); if (destvar != NULL) { if ((func_index == N || func_index == NMISS) && dict_get_weight (dict) != NULL) f = fmt_for_output (FMT_F, 8, 2); else f = function->format; var_set_both_formats (destvar, &f); } } if (!destvar) { msg (SE, _("Variable name %s is not unique within the " "aggregate file dictionary, which contains " "the aggregate variables and the break " "variables."), dest[i]); goto error; } free (dest[i]); if (dest_label[i]) var_set_label (destvar, dest_label[i]); v->dest = destvar; } v->exclude = exclude; if (v->src != NULL) { int j; if (var_is_numeric (v->src)) for (j = 0; j < function->n_args; j++) v->arg[j].f = arg[j].f; else for (j = 0; j < function->n_args; j++) v->arg[j].c = xstrdup (arg[j].c); } } if (src != NULL && var_is_alpha (src[0])) for (i = 0; i < function->n_args; i++) { free (arg[i].c); arg[i].c = NULL; } free (src); free (dest); free (dest_label); if (!lex_match (lexer, T_SLASH)) { if (lex_token (lexer) == T_ENDCMD) return true; lex_error (lexer, "expecting end of command"); return false; } continue; error: ds_destroy (&function_name); for (i = 0; i < n_dest; i++) { free (dest[i]); free (dest_label[i]); } free (dest); free (dest_label); free (arg[0].c); free (arg[1].c); if (src && n_src && var_is_alpha (src[0])) for (i = 0; i < function->n_args; i++) { free (arg[i].c); arg[i].c = NULL; } free (src); return false; } }
/*------------------------------------------------------------------------- * Function: lex_token * * Purpose: Figures out what token is next on the input stream. If * skipnl is non-zero then the new-line token is skipped. * * Return: Success: Token number, optional lexeme returned * through the LEXEME argument. * * Failure: TOK_INVALID * * Programmer: Robb Matzke * [email protected] * Dec 4 1996 * * Modifications: * * Robb Matzke, 3 Feb 1997 * Cleaned up error messages. * * Robb Matzke, 7 Feb 1997 * Added the `=' token. * * Robb Matzke, 7 Feb 1997 * The `*' and `?'characters are now legal as part of a symbol name * so we can give those pattern matching characters to the `ls' * command. * * Robb Matzke, 12 Mar 1997 * Since we don't have mathematical expressions yet, a numeric * constant is allowed to begin with a `-'. * * Robb Matzke, 2000-06-06 * Symbol names may include `-'. Something that starts with a `-' is * only a number if it's followed by a digit. * * Mark C. Miller, Mon Nov 9 18:08:05 PST 2009 * Added logic to support parsing of '#nnnnnn' dataset names, * but only when in '/.silo' dir. *------------------------------------------------------------------------- */ int lex_token(lex_t *f, char **lexeme, int skipnl) { int c, at, quote, inDotSiloDir=0; static const char *symcharsA = "_$/*?"; static const char *symcharsB = "_$/*?#"; const char *symchars = symcharsA; /* Return the current token if appropriate. */ if (f->tok && (!skipnl || TOK_EOL!=f->tok)) { if (lexeme) *lexeme = f->lexeme; return f->tok; } /* Skip leading space. */ f->prompt = skipnl ? LEX_PROMPT2 : LEX_PROMPT; while (EOF!=(c=lex_getc(f)) && '\n'!=c && isspace(c)) /*void*/; /* handle special case of leading '#' and see if we're in .silo dir */ if ('#'==c) { obj_t f1, val; DBfile *file; char cwd[1024]; f1 = obj_new (C_SYM, "$1"); val = sym_vboundp (f1); f1 = obj_dest (f1); if (NULL!=(file=file_file(val)) && DBGetDir(file, cwd)>=0 && !strncmp(cwd,"/.silo",6)) { inDotSiloDir = 1; symchars = symcharsB; } } /* Store the next token. */ if (EOF==c) { f->lexeme[0] = '\0'; f->tok = EOF; } else if ('\n'==c) { if (skipnl) { f->tok = lex_token(f, NULL, true); } else { f->lexeme[0] = '\n'; f->lexeme[1] = '\0'; f->tok = TOK_EOL; } } else if ('#'==c && !inDotSiloDir) { while (EOF!=(c=lex_getc(f)) && '\n'!=c) /*void*/; lex_ungetc(f, c); return lex_token(f, lexeme, skipnl); } else if ('>'==c) { c = lex_getc(f); if ('>'==c) { strcpy(f->lexeme, ">>"); f->tok = TOK_RTRT; } else { lex_ungetc(f, c); strcpy(f->lexeme, ">"); f->tok = TOK_RT; } } else if (strchr("|.()[]{}:,=", c)) { f->lexeme[0] = c; f->lexeme[1] = '\0'; f->tok = c; } else if (isalpha(c) || strchr(symchars,c)) { /* A symbol. */ f->lexeme[0] = c; f->lexeme[1] = '\0'; at = 1; while (EOF!=(c=lex_getc(f)) && (isalpha(c) || isdigit(c) || strchr(symchars, c))) { if (at+1<sizeof(f->lexeme)) { f->lexeme[at++] = c; f->lexeme[at] = '\0'; } } lex_ungetc(f, c); f->tok = TOK_SYM; } else if ('-'==c) { /* Could be a number or a symbol */ f->lexeme[0] = c; f->lexeme[1] = '\0'; if (EOF!=(c=lex_getc(f)) && ('.'==c || isdigit(c))) { f->lexeme[1] = c; f->lexeme[2] = '\0'; at = 2; while (EOF!=(c=lex_getc(f)) && (isdigit(c) || strchr("+-.eE", c))) { if (at+1<sizeof(f->lexeme)) { f->lexeme[at++] = c; f->lexeme[at] = '\0'; } } lex_ungetc(f, c); f->tok = TOK_NUM; } else { at=1; while (EOF!=c && (isalpha(c) || isdigit(c) || strchr("_$/*?-", c))) { if (at+1<sizeof(f->lexeme)) { f->lexeme[at++] = c; f->lexeme[at] = '\0'; } c = lex_getc(f); } lex_ungetc(f, c); f->tok = TOK_SYM; } } else if ('-'==c || isdigit(c)) { /* A number */ f->lexeme[0] = c; f->lexeme[1] = '\0'; at = 1; while (EOF!=(c=lex_getc(f)) && (isdigit(c) || strchr("+-.eE", c))) { if (at+1<sizeof(f->lexeme)) { f->lexeme[at++] = c; f->lexeme[at] = '\0'; } } lex_ungetc(f, c); f->tok = TOK_NUM; } else if ('"'==c || '\''==c) { /* A string */ quote = c; at = 0; f->lexeme[0] = '\0'; while (EOF!=(c=lex_getc(f)) && quote!=c && '\n'!=c) { if ('\\'==c) { switch ((c=lex_getc(f))) { case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case EOF: c = '\\'; break; default: if (c>='0' && c<='7') { int c2 = lex_getc(f); if (c2>='0' && c2<='7') { int c3 = lex_getc(f); if (c3>='0' && c3<='7') { c = ((c-'0')*8+c2-'0')*8+c3-'0'; } else { lex_ungetc(f, c3); c = (c-'0')*8+c2-'0'; } } else { lex_ungetc(f, c2); c -= '0'; } } break; } } if (at+1<sizeof(f->lexeme)) { f->lexeme[at++] = c; f->lexeme[at] = '\0'; } } if ('\n'==c) { out_errorn("linefeed inside string constant (truncated at EOL)"); lex_ungetc(f, c); } else if (c<0) { out_errorn("EOF inside string constant (truncated at EOF)"); } f->tok = TOK_STR; } else { /* Invalid character. Don't print an error message since a * syntax error will result in the parser anyway. */ f->lexeme[0] = c; f->lexeme[1] = '\0'; f->tok = TOK_INVALID; } if (lexeme) *lexeme = f->lexeme; return f->tok; }
int cmd_reliability (struct lexer *lexer, struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); struct reliability reliability; reliability.n_variables = 0; reliability.variables = NULL; reliability.model = MODEL_ALPHA; reliability.exclude = MV_ANY; reliability.summary = 0; reliability.wv = dict_get_weight (dict); reliability.total_start = 0; lex_match (lexer, T_SLASH); if (!lex_force_match_id (lexer, "VARIABLES")) { goto error; } lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables, PV_NO_DUPLICATE | PV_NUMERIC)) goto error; if (reliability.n_variables < 2) msg (MW, _("Reliability on a single variable is not useful.")); { int i; struct cronbach *c; /* Create a default Scale */ reliability.n_sc = 1; reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc); ds_init_cstr (&reliability.scale_name, "ANY"); c = &reliability.sc[0]; c->n_items = reliability.n_variables; c->items = xzalloc (sizeof (struct variable*) * c->n_items); for (i = 0 ; i < c->n_items ; ++i) c->items[i] = reliability.variables[i]; } while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "SCALE")) { struct const_var_set *vs; if ( ! lex_force_match (lexer, T_LPAREN)) goto error; if ( ! lex_force_string (lexer) ) goto error; ds_init_substring (&reliability.scale_name, lex_tokss (lexer)); lex_get (lexer); if ( ! lex_force_match (lexer, T_RPAREN)) goto error; lex_match (lexer, T_EQUALS); vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables); if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0)) { const_var_set_destroy (vs); goto error; } const_var_set_destroy (vs); } else if (lex_match_id (lexer, "MODEL")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "ALPHA")) { reliability.model = MODEL_ALPHA; } else if (lex_match_id (lexer, "SPLIT")) { reliability.model = MODEL_SPLIT; reliability.split_point = -1; if ( lex_match (lexer, T_LPAREN)) { lex_force_num (lexer); reliability.split_point = lex_number (lexer); lex_get (lexer); lex_force_match (lexer, T_RPAREN); } } else goto error; } else if (lex_match_id (lexer, "SUMMARY")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "TOTAL")) { reliability.summary |= SUMMARY_TOTAL; } else if (lex_match (lexer, T_ALL)) { reliability.summary = 0xFFFF; } else goto error; } else if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) { reliability.exclude = MV_SYSTEM; } else if (lex_match_id (lexer, "EXCLUDE")) { reliability.exclude = MV_ANY; } else { lex_error (lexer, NULL); goto error; } } } else { lex_error (lexer, NULL); goto error; } } if ( reliability.model == MODEL_SPLIT) { int i; const struct cronbach *s; if ( reliability.split_point >= reliability.n_variables) { msg (ME, _("The split point must be less than the number of variables")); goto error; } reliability.n_sc += 2 ; reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc); s = &reliability.sc[0]; reliability.sc[1].n_items = (reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point; reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items; reliability.sc[1].items = xzalloc (sizeof (struct variable *) * reliability.sc[1].n_items); reliability.sc[2].items = xzalloc (sizeof (struct variable *) * reliability.sc[2].n_items); for (i = 0; i < reliability.sc[1].n_items ; ++i) reliability.sc[1].items[i] = s->items[i]; while (i < s->n_items) { reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i]; i++; } } if ( reliability.summary & SUMMARY_TOTAL) { int i; const int base_sc = reliability.n_sc; reliability.total_start = base_sc; reliability.n_sc += reliability.sc[0].n_items ; reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc); for (i = 0 ; i < reliability.sc[0].n_items; ++i ) { int v_src; int v_dest = 0; struct cronbach *s = &reliability.sc[i + base_sc]; s->n_items = reliability.sc[0].n_items - 1; s->items = xzalloc (sizeof (struct variable *) * s->n_items); for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src) { if ( v_src != i) s->items[v_dest++] = reliability.sc[0].items[v_src]; } } } if ( ! run_reliability (ds, &reliability)) goto error; free (reliability.variables); return CMD_SUCCESS; error: free (reliability.variables); return CMD_FAILURE; }
int cmd_missing_values (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); struct variable **v = NULL; size_t nv; bool ok = true; while (lex_token (lexer) != T_ENDCMD) { size_t i; if (!parse_variables (lexer, dict, &v, &nv, PV_NONE)) goto error; if (!lex_force_match (lexer, T_LPAREN)) goto error; for (i = 0; i < nv; i++) var_clear_missing_values (v[i]); if (!lex_match (lexer, T_RPAREN)) { struct missing_values mv; for (i = 0; i < nv; i++) if (var_get_type (v[i]) != var_get_type (v[0])) { const struct variable *n = var_is_numeric (v[0]) ? v[0] : v[i]; const struct variable *s = var_is_numeric (v[0]) ? v[i] : v[0]; msg (SE, _("Cannot mix numeric variables (e.g. %s) and " "string variables (e.g. %s) within a single list."), var_get_name (n), var_get_name (s)); goto error; } if (var_is_numeric (v[0])) { mv_init (&mv, 0); while (!lex_match (lexer, T_RPAREN)) { enum fmt_type type = var_get_print_format (v[0])->type; double x, y; bool ok; if (!parse_num_range (lexer, &x, &y, &type)) goto error; ok = (x == y ? mv_add_num (&mv, x) : mv_add_range (&mv, x, y)); if (!ok) ok = false; lex_match (lexer, T_COMMA); } } else { mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, T_RPAREN)) { uint8_t value[MV_MAX_STRING]; char *dict_mv; size_t length; if (!lex_force_string (lexer)) { ok = false; break; } dict_mv = recode_string (dict_get_encoding (dict), "UTF-8", lex_tokcstr (lexer), ss_length (lex_tokss (lexer))); length = strlen (dict_mv); if (length > MV_MAX_STRING) { /* XXX truncate graphemes not bytes */ msg (SE, _("Truncating missing value to maximum " "acceptable length (%d bytes)."), MV_MAX_STRING); length = MV_MAX_STRING; } memset (value, ' ', MV_MAX_STRING); memcpy (value, dict_mv, length); free (dict_mv); if (!mv_add_str (&mv, value)) ok = false; lex_get (lexer); lex_match (lexer, T_COMMA); } } for (i = 0; i < nv; i++) { if (mv_is_resizable (&mv, var_get_width (v[i]))) var_set_missing_values (v[i], &mv); else { msg (SE, _("Missing values provided are too long to assign " "to variable of width %d."), var_get_width (v[i])); ok = false; } } mv_destroy (&mv); } lex_match (lexer, T_SLASH); free (v); v = NULL; } free (v); return ok ? CMD_SUCCESS : CMD_FAILURE; error: free (v); return CMD_FAILURE; }
/*------------------------------------------------------------------------- * Function: parse_stmt * * Purpose: Parses a statement which is a function name followed by * zero or more arguments. * * Return: Success: Ptr to parse tree. * * Failure: NIL, input consumed through end of line. * * Programmer: Robb Matzke * [email protected] * Dec 4 1996 * * Modifications: * * Robb Matzke, 11 Dec 1996 * If IMPLIED_PRINT is true then wrap the input in a call to the * `print' function if it isn't already obviously a call to `print'. * * Robb Matzke, 20 Jan 1997 * Turn off handling of SIGINT during parsing. * * Robb Matzke, 7 Feb 1997 * If the first thing on the line is a symbol which has a built in * function (BIF) as its f-value, and the BIF has the lex_special * property, then we call lex_special() to prepare the next token. * * Robb Matzke, 2000-06-28 * Signal handlers are registered with sigaction() since its behavior * is more consistent. * *------------------------------------------------------------------------- */ obj_t parse_stmt (lex_t *f, int implied_print) { char *lexeme, buf[1024], *s, *fmode; int tok, i; obj_t head=NIL, opstack=NIL, b1=NIL, retval=NIL, tmp=NIL; struct sigaction new_action, old_action; /* SIGINT should have the default action while we're parsing */ new_action.sa_handler = SIG_DFL; sigemptyset(&new_action.sa_mask); new_action.sa_flags = SA_RESTART; sigaction(SIGINT, &new_action, &old_action); tok = lex_token (f, &lexeme, false); /* * At the end of the file, return `(exit)'. */ if (TOK_EOF==tok) { lex_consume (f); if (f->f && isatty (fileno (f->f))) { printf ("exit\n"); retval = obj_new (C_CONS, obj_new (C_SYM, "exit"), NIL); goto done; } else { retval = obj_new (C_SYM, "__END__"); goto done; } } /* * For an empty line, eat the linefeed token and try again. */ if (TOK_EOL==tok) { lex_consume (f); retval = parse_stmt (f, implied_print); goto done; } /* * A statement begins with a function name. If the first token * is not a symbol then assume `print'. */ if (implied_print && TOK_SYM==tok) { head = obj_new (C_SYM, lexeme); if ((tmp=sym_fboundp (head))) { tmp = obj_dest (tmp); lex_consume (f); } else { obj_dest (head); head = obj_new (C_SYM, "print"); } } else if (implied_print) { head = obj_new (C_SYM, "print"); } else { head = &ErrorCell ; /*no function yet*/ } /* * Some functions take a weird first argument that isn't really a * normal token. Like `open' which wants the name of a file. We * call lex_special() to try to get such a token if it appears * next. */ if (head && &ErrorCell!=head && (tmp=sym_fboundp(head))) { if (bif_lex_special (tmp)) lex_special (f, false); tmp = obj_dest (tmp); } /* * Read the arguments... */ while (&ErrorCell!=(b1=parse_expr(f, false))) { opstack = obj_new(C_CONS, b1, opstack); } /* * Construct a function call which is the HEAD applied to the * arguments on the operand stack. */ b1 = F_reverse (opstack); opstack = obj_dest (opstack); if (&ErrorCell==head) { head = NIL; if (1==F_length(b1)) { retval = obj_copy (cons_head (b1), SHALLOW); b1 = obj_dest (b1); } else { retval = b1; b1 = NIL; } } else { retval = F_cons (head, b1); head = obj_dest (head); b1 = obj_dest (b1); } /* * A statement can end with `>' or `>>' followed by the name of * a file, or `|' followed by an unquoted shell command. Leading * and trailing white space is stripped from the file or command. */ tok = lex_token (f, &lexeme, false); if (TOK_RT==tok || TOK_RTRT==tok || TOK_PIPE==tok) { lex_consume (f); if (NULL==lex_gets (f, buf, sizeof(buf))) { out_errorn ("file name required after `%s' operator", lexeme); goto error; } lex_set (f, TOK_EOL, "\n"); for (s=buf; isspace(*s); s++) /*void*/; for (i=strlen(s)-1; i>=0 && isspace(s[i]); --i) s[i] = '\0'; if (!*s) { out_errorn ("file name required after `%s' operator", lexeme); goto error; } switch (tok) { case TOK_RT: lexeme = "Redirect"; fmode = "w"; break; case TOK_RTRT: lexeme = "Redirect"; fmode = "a"; break; case TOK_PIPE: lexeme = "Pipe"; fmode = "w"; break; default: abort(); } retval = obj_new (C_CONS, obj_new (C_SYM, lexeme), obj_new (C_CONS, retval, obj_new (C_CONS, obj_new (C_STR, s), obj_new (C_CONS, obj_new (C_STR, fmode), NIL)))); } /* * We should be at the end of a line. */ tok = lex_token (f, &lexeme, false); if (TOK_EOL!=tok && TOK_EOF!=tok) { s = lex_gets (f, buf, sizeof(buf)); if (s && strlen(s)>0 && '\n'==s[strlen(s)-1]) s[strlen(s)-1] = '\0'; out_errorn ("syntax error before: %s%s", lexeme, s?s:""); lex_consume(f); goto error; } else { lex_consume(f); } done: sigaction(SIGINT, &old_action, NULL); return retval; error: if (head) head = obj_dest (head); if (opstack) opstack = obj_dest (opstack); if (retval) retval = obj_dest (retval); sigaction(SIGINT, &old_action, NULL); return NIL; }
/*------------------------------------------------------------------------- * Function: parse_term * * Purpose: Parses a term which is a symbol, a string, or a number. * * Return: Success: Ptr to the term object or NIL * * Failure: &ErrorCell * * Programmer: Robb Matzke * [email protected] * Dec 4 1996 * * Modifications: * * Robb Matzke, 7 Feb 1997 * If the first thing after a parenthesis is a symbol which has a * built in function (BIF) as its f-value, and the BIF has the * lex_special property, then we call lex_special() to prepare the * next token. * * Robb Matzke, 26 Aug 1997 * The term `.' means current working directory. *------------------------------------------------------------------------- */ static obj_t parse_term (lex_t *f, int skipnl) { char *lexeme; obj_t retval=&ErrorCell, opstack=NIL, tmp=NIL, fval=NIL; int tok, nargs; switch ((tok=lex_token(f, &lexeme, skipnl))) { case TOK_DOT: retval = obj_new (C_SYM, lexeme); lex_consume (f); break; case TOK_SYM: if (!strcmp (lexeme, "nil")) { retval = NIL; } else { retval = obj_new (C_SYM, lexeme); } lex_consume (f); break; case TOK_NUM: retval = obj_new (C_NUM, lexeme); lex_consume (f); break; case TOK_STR: retval = obj_new (C_STR, lexeme); lex_consume (f); break; case TOK_LTPAREN: nargs = 0; lex_consume (f); while (TOK_RTPAREN!=(tok=lex_token(f, NULL, true)) && TOK_EOF!=tok) { /* * If the first token after the left paren is a symbol, and * the symbol has a BIF f-value, and the BIF has the lex_special * attribute, then call lex_special(). */ if (0==nargs++ && TOK_SYM==tok) { tmp = obj_new (C_SYM, f->lexeme); lex_consume (f); fval = sym_fboundp (tmp); if (bif_lex_special (fval)) lex_special (f, true); fval = obj_dest (fval); } else { tmp = parse_expr (f, true); } if (&ErrorCell==tmp) { opstack = obj_dest (opstack); goto done; } opstack = obj_new (C_CONS, tmp, opstack); } if (TOK_RTPAREN!=tok) { out_errorn ("right paren expected near end of input"); opstack = obj_dest (opstack); goto done; } lex_consume (f); retval = F_reverse (opstack); opstack = obj_dest (opstack); break; case TOK_LTCURLY: /* * A list of items inside curly braces `{A B ... Z}' is just short for * `(Quote A B ... Z)' and `Quote' is like the LISP `quote' function in * that (Quote X) returns X without trying to evaluate it. People tend * to use commas, so we accept commas between items. */ lex_consume (f); while (TOK_RTCURLY!=(tok=lex_token(f, NULL, true)) && TOK_EOF!=tok) { tmp = parse_expr (f, true); if (&ErrorCell==tmp) { opstack = obj_dest (opstack); goto done; } opstack = obj_new (C_CONS, tmp, opstack); if (TOK_COMMA==lex_token(f, NULL, true)) lex_consume (f); } if (TOK_RTCURLY!=tok) { out_errorn ("right curly brace expected near end of input"); opstack = obj_dest (opstack); goto done; } lex_consume (f); retval = F_reverse (opstack); opstack = obj_dest (opstack); retval = obj_new (C_CONS, obj_new (C_SYM, "Quote"), retval); break; } done: return retval; }
int cmd_t_test (struct lexer *lexer, struct dataset *ds) { bool ok; const struct dictionary *dict = dataset_dict (ds); struct tt tt; int mode_count = 0; /* Variables pertaining to the paired mode */ const struct variable **v1 = NULL; size_t n_v1; const struct variable **v2 = NULL; size_t n_v2; size_t n_pairs = 0; vp *pairs = NULL; /* One sample mode */ double testval = SYSMIS; /* Independent samples mode */ const struct variable *gvar; union value gval0; union value gval1; bool cut = false; tt.wv = dict_get_weight (dict); tt.dict = dict; tt.confidence = 0.95; tt.exclude = MV_ANY; tt.missing_type = MISS_ANALYSIS; tt.n_vars = 0; tt.vars = NULL; tt.mode = MODE_undef; lex_match (lexer, T_EQUALS); for (; lex_token (lexer) != T_ENDCMD; ) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "TESTVAL")) { mode_count++; tt.mode = MODE_SINGLE; lex_match (lexer, T_EQUALS); lex_force_num (lexer); testval = lex_number (lexer); lex_get (lexer); } else if (lex_match_id (lexer, "GROUPS")) { mode_count++; cut = false; tt.mode = MODE_INDEP; lex_match (lexer, T_EQUALS); if (NULL == (gvar = parse_variable (lexer, dict))) goto parse_failed; if (lex_match (lexer, T_LPAREN)) { value_init (&gval0, var_get_width (gvar)); parse_value (lexer, &gval0, gvar); cut = true; if (lex_match (lexer, T_COMMA)) { value_init (&gval1, var_get_width (gvar)); parse_value (lexer, &gval1, gvar); cut = false; } lex_force_match (lexer, T_RPAREN); } else { value_init (&gval0, 0); value_init (&gval1, 0); gval0.f = 1.0; gval1.f = 2.0; cut = false; } if ( cut == true && var_is_alpha (gvar)) { msg (SE, _("When applying GROUPS to a string variable, two " "values must be specified.")); goto parse_failed; } } else if (lex_match_id (lexer, "PAIRS")) { bool with = false; bool paired = false; if (tt.n_vars > 0) { msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); goto parse_failed; } mode_count++; tt.mode = MODE_PAIRED; lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &v1, &n_v1, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; if ( lex_match (lexer, T_WITH)) { with = true; if (!parse_variables_const (lexer, dict, &v2, &n_v2, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; if (lex_match (lexer, T_LPAREN) && lex_match_id (lexer, "PAIRED") && lex_match (lexer, T_RPAREN)) { paired = true; if (n_v1 != n_v2) { msg (SE, _("PAIRED was specified but the number of variables " "preceding WITH (%zu) did not match the number " "following (%zu)."), n_v1, n_v2); goto parse_failed; } } } { int i; if ( !with ) n_pairs = (n_v1 * (n_v1 - 1)) / 2.0; else if ( paired ) n_pairs = n_v1; else n_pairs = n_v1 * n_v2; pairs = xcalloc (n_pairs, sizeof *pairs); if ( with) { int x = 0; if (paired) { for (i = 0 ; i < n_v1; ++i) { vp *pair = &pairs[i]; (*pair)[0] = v1[i]; (*pair)[1] = v2[i]; } } else { for (i = 0 ; i < n_v1; ++i) { int j; for (j = 0 ; j < n_v2; ++j) { vp *pair = &pairs[x++]; (*pair)[0] = v1[i]; (*pair)[1] = v2[j]; } } } } else { int x = 0; for (i = 0 ; i < n_v1; ++i) { int j; for (j = i + 1 ; j < n_v1; ++j) { vp *pair = &pairs[x++]; (*pair)[0] = v1[i]; (*pair)[1] = v1[j]; } } } } } else if (lex_match_id (lexer, "VARIABLES")) { if ( tt.mode == MODE_PAIRED) { msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); goto parse_failed; } lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dict, &tt.vars, &tt.n_vars, PV_NO_DUPLICATE | PV_NUMERIC)) goto parse_failed; } else if ( lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) { tt.exclude = MV_SYSTEM; } else if (lex_match_id (lexer, "EXCLUDE")) { tt.exclude = MV_ANY; } else if (lex_match_id (lexer, "LISTWISE")) { tt.missing_type = MISS_LISTWISE; } else if (lex_match_id (lexer, "ANALYSIS")) { tt.missing_type = MISS_ANALYSIS; } else { lex_error (lexer, NULL); goto parse_failed; } lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "CRITERIA")) { lex_match (lexer, T_EQUALS); if ( lex_force_match_id (lexer, "CIN")) if ( lex_force_match (lexer, T_LPAREN)) { lex_force_num (lexer); tt.confidence = lex_number (lexer); lex_get (lexer); lex_force_match (lexer, T_RPAREN); } } else { lex_error (lexer, NULL); goto parse_failed; } } if ( mode_count != 1) { msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands " "must be specified.")); goto parse_failed; } if (tt.n_vars == 0 && tt.mode != MODE_PAIRED) { lex_sbc_missing ("VARIABLES"); goto parse_failed; } /* Deal with splits etc */ { struct casereader *group; struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict); while (casegrouper_get_next_group (grouper, &group)) { if ( tt.mode == MODE_SINGLE) { if ( tt.missing_type == MISS_LISTWISE ) group = casereader_create_filter_missing (group, tt.vars, tt.n_vars, tt.exclude, NULL, NULL); one_sample_run (&tt, testval, group); } else if ( tt.mode == MODE_PAIRED) { if ( tt.missing_type == MISS_LISTWISE ) { group = casereader_create_filter_missing (group, v1, n_v1, tt.exclude, NULL, NULL); group = casereader_create_filter_missing (group, v2, n_v2, tt.exclude, NULL, NULL); } paired_run (&tt, n_pairs, pairs, group); } else /* tt.mode == MODE_INDEP */ { if ( tt.missing_type == MISS_LISTWISE ) { group = casereader_create_filter_missing (group, tt.vars, tt.n_vars, tt.exclude, NULL, NULL); group = casereader_create_filter_missing (group, &gvar, 1, tt.exclude, NULL, NULL); } indep_run (&tt, gvar, cut, &gval0, &gval1, group); } } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } free (pairs); free (v1); free (v2); free (tt.vars); return ok ? CMD_SUCCESS : CMD_FAILURE; parse_failed: return CMD_FAILURE; }
static int combine_files (enum comb_command_type command, struct lexer *lexer, struct dataset *ds) { struct comb_proc proc; bool saw_by = false; bool saw_sort = false; struct casereader *active_file = NULL; char *first_name = NULL; char *last_name = NULL; struct taint *taint = NULL; size_t n_tables = 0; size_t allocated_files = 0; size_t i; proc.files = NULL; proc.n_files = 0; proc.dict = dict_create (get_default_encoding ()); proc.output = NULL; proc.matcher = NULL; subcase_init_empty (&proc.by_vars); proc.first = NULL; proc.last = NULL; proc.buffered_case = NULL; proc.prev_BY = NULL; dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds))); lex_match (lexer, T_SLASH); for (;;) { struct comb_file *file; enum comb_file_type type; if (lex_match_id (lexer, "FILE")) type = COMB_FILE; else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE")) { type = COMB_TABLE; n_tables++; } else break; lex_match (lexer, T_EQUALS); if (proc.n_files >= allocated_files) proc.files = x2nrealloc (proc.files, &allocated_files, sizeof *proc.files); file = &proc.files[proc.n_files++]; file->type = type; subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; file->data = NULL; file->is_sorted = true; file->in_name = NULL; file->in_var = NULL; if (lex_match (lexer, T_ASTERISK)) { if (!dataset_has_source (ds)) { msg (SE, _("Cannot specify the active dataset since none " "has been defined.")); goto error; } if (proc_make_temporary_transformations_permanent (ds)) msg (SE, _("This command may not be used after TEMPORARY when " "the active dataset is an input source. " "Temporary transformations will be made permanent.")); file->dict = dict_clone (dataset_dict (ds)); } else { file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (file->handle == NULL) goto error; file->reader = any_reader_open (file->handle, NULL, &file->dict); if (file->reader == NULL) goto error; } while (lex_match (lexer, T_SLASH)) if (lex_match_id (lexer, "RENAME")) { if (!parse_dict_rename (lexer, file->dict)) goto error; } else if (lex_match_id (lexer, "IN")) { lex_match (lexer, T_EQUALS); if (lex_token (lexer) != T_ID) { lex_error (lexer, NULL); goto error; } if (file->in_name) { msg (SE, _("Multiple IN subcommands for a single FILE or " "TABLE.")); goto error; } file->in_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "SORT")) { file->is_sorted = false; saw_sort = true; } if (!merge_dictionary (proc.dict, file)) goto error; } while (lex_token (lexer) != T_ENDCMD) { if (lex_match (lexer, T_BY)) { const struct variable **by_vars; size_t i; bool ok; if (saw_by) { lex_sbc_only_once ("BY"); goto error; } saw_by = true; lex_match (lexer, T_EQUALS); if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars, &by_vars, NULL)) goto error; ok = true; for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; size_t j; for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++) { const char *name = var_get_name (by_vars[j]); struct variable *var = dict_lookup_var (file->dict, name); if (var != NULL) subcase_add_var (&file->by_vars, var, subcase_get_direction (&proc.by_vars, j)); else { if (file->handle != NULL) msg (SE, _("File %s lacks BY variable %s."), fh_get_name (file->handle), name); else msg (SE, _("Active dataset lacks BY variable %s."), name); ok = false; } } assert (!ok || subcase_conformable (&file->by_vars, &proc.files[0].by_vars)); } free (by_vars); if (!ok) goto error; } else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST")) { if (first_name != NULL) { lex_sbc_only_once ("FIRST"); goto error; } lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; first_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST")) { if (last_name != NULL) { lex_sbc_only_once ("LAST"); goto error; } lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; last_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "MAP")) { /* FIXME. */ } else if (lex_match_id (lexer, "DROP")) { if (!parse_dict_drop (lexer, proc.dict)) goto error; } else if (lex_match_id (lexer, "KEEP")) { if (!parse_dict_keep (lexer, proc.dict)) goto error; } else { lex_error (lexer, NULL); goto error; } if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD) { lex_end_of_command (lexer); goto error; } } if (!saw_by) { if (command == COMB_UPDATE) { lex_sbc_missing ("BY"); goto error; } if (n_tables) { msg (SE, _("BY is required when %s is specified."), "TABLE"); goto error; } if (saw_sort) { msg (SE, _("BY is required when %s is specified."), "SORT"); goto error; } } /* Add IN, FIRST, and LAST variables to master dictionary. */ for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var)) goto error; } if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first) || !create_flag_var ("LAST", last_name, proc.dict, &proc.last)) goto error; dict_delete_scratch_vars (proc.dict); dict_compact_values (proc.dict); /* Set up mapping from each file's variables to master variables. */ for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; size_t src_var_cnt = dict_get_var_cnt (file->dict); size_t j; file->mv = xnmalloc (src_var_cnt, sizeof *file->mv); for (j = 0; j < src_var_cnt; j++) { struct variable *src_var = dict_get_var (file->dict, j); struct variable *dst_var = dict_lookup_var (proc.dict, var_get_name (src_var)); if (dst_var != NULL) { size_t n = subcase_get_n_fields (&file->src); file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } } } proc.output = autopaging_writer_create (dict_get_proto (proc.dict)); taint = taint_clone (casewriter_get_taint (proc.output)); /* Set up case matcher. */ proc.matcher = case_matcher_create (); for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; if (file->reader == NULL) { if (active_file == NULL) { proc_discard_output (ds); file->reader = active_file = proc_open_filtering (ds, false); } else file->reader = casereader_clone (active_file); } if (!file->is_sorted) file->reader = sort_execute (file->reader, &file->by_vars); taint_propagate (casereader_get_taint (file->reader), taint); file->data = casereader_read (file->reader); if (file->type == COMB_FILE) case_matcher_add_input (proc.matcher, &file->by_vars, &file->data, &file->is_minimal); } if (command == COMB_ADD) execute_add_files (&proc); else if (command == COMB_MATCH) execute_match_files (&proc); else if (command == COMB_UPDATE) execute_update (&proc); else NOT_REACHED (); case_matcher_destroy (proc.matcher); proc.matcher = NULL; close_all_comb_files (&proc); if (active_file != NULL) proc_commit (ds); dataset_set_dict (ds, proc.dict); dataset_set_source (ds, casewriter_make_reader (proc.output)); proc.dict = NULL; proc.output = NULL; free_comb_proc (&proc); free (first_name); free (last_name); return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: if (active_file != NULL) proc_commit (ds); free_comb_proc (&proc); taint_destroy (taint); free (first_name); free (last_name); return CMD_CASCADING_FAILURE; }
/* Parses a list of sort fields and appends them to ORDERING, which the caller must already have initialized. Returns true if successful, false on error. If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at least one parenthesized sort direction was specified, false otherwise. */ bool parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict, struct subcase *ordering, const struct variable ***vars, bool *saw_direction) { const struct variable **local_vars = NULL; size_t var_cnt = 0; if (vars == NULL) vars = &local_vars; *vars = NULL; if (saw_direction != NULL) *saw_direction = false; do { size_t prev_var_cnt = var_cnt; enum subcase_direction direction; size_t i; /* Variables. */ if (!parse_variables_const (lexer, dict, vars, &var_cnt, PV_APPEND | PV_NO_SCRATCH)) goto error; /* Sort direction. */ if (lex_match (lexer, T_LPAREN)) { if (lex_match_id (lexer, "D") || lex_match_id (lexer, "DOWN")) direction = SC_DESCEND; else if (lex_match_id (lexer, "A") || lex_match_id (lexer, "UP")) direction = SC_ASCEND; else { lex_error_expecting (lexer, "A", "D", NULL_SENTINEL); goto error; } if (!lex_force_match (lexer, T_RPAREN)) goto error; if (saw_direction != NULL) *saw_direction = true; } else direction = SC_ASCEND; for (i = prev_var_cnt; i < var_cnt; i++) { const struct variable *var = (*vars)[i]; if (!subcase_add_var (ordering, var, direction)) msg (SW, _("Variable %s specified twice in sort criteria."), var_get_name (var)); } } while (lex_token (lexer) == T_ID && dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL); free (local_vars); return true; error: free (local_vars); if (vars) *vars = NULL; return false; }
/* Parses the whole DO REPEAT command specification. Returns success. */ static bool parse_specification (struct lexer *lexer, struct dictionary *dict, struct hmap *dummies) { struct dummy_var *first_dv = NULL; hmap_init (dummies); do { struct dummy_var *dv; const char *name; bool ok; /* Get a stand-in variable name and make sure it's unique. */ if (!lex_force_id (lexer)) goto error; name = lex_tokcstr (lexer); if (dict_lookup_var (dict, name)) msg (SW, _("Dummy variable name `%s' hides dictionary variable `%s'."), name, name); if (find_dummy_var (dummies, name, strlen (name))) { msg (SE, _("Dummy variable name `%s' is given twice."), name); goto error; } /* Make a new macro. */ dv = xmalloc (sizeof *dv); dv->name = xstrdup (name); dv->values = NULL; dv->n_values = 0; hmap_insert (dummies, &dv->hmap_node, hash_dummy (name, strlen (name))); /* Skip equals sign. */ lex_get (lexer); if (!lex_force_match (lexer, T_EQUALS)) goto error; /* Get the details of the variable's possible values. */ if (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL) ok = parse_ids (lexer, dict, dv); else if (lex_is_number (lexer)) ok = parse_numbers (lexer, dv); else if (lex_is_string (lexer)) ok = parse_strings (lexer, dv); else { lex_error (lexer, NULL); goto error; } if (!ok) goto error; assert (dv->n_values > 0); if (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { lex_error (lexer, NULL); goto error; } /* If this is the first variable then it defines how many replacements there must be; otherwise enforce this number of replacements. */ if (first_dv == NULL) first_dv = dv; else if (first_dv->n_values != dv->n_values) { msg (SE, _("Dummy variable `%s' had %zu substitutions, so `%s' must " "also, but %zu were specified."), first_dv->name, first_dv->n_values, dv->name, dv->n_values); goto error; } lex_match (lexer, T_SLASH); } while (!lex_match (lexer, T_ENDCMD)); while (lex_match (lexer, T_ENDCMD)) continue; return true; error: destroy_dummies (dummies); return false; }