/* Frees an XSAVE or XEXPORT transformation. Returns true if successful, false if an I/O error occurred. */ static bool output_trns_free (void *trns_) { struct output_trns *t = trns_; bool ok = casewriter_destroy (t->writer); free (t); return ok; }
/* Frees all the data for the procedure. */ static void free_comb_proc (struct comb_proc *proc) { close_all_comb_files (proc); dict_destroy (proc->dict); casewriter_destroy (proc->output); case_matcher_destroy (proc->matcher); if (proc->prev_BY) { caseproto_destroy_values (subcase_get_proto (&proc->by_vars), proc->prev_BY); free (proc->prev_BY); } subcase_destroy (&proc->by_vars); case_unref (proc->buffered_case); }
/* Parses and performs the SAVE or EXPORT procedure. */ static int parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type) { bool retain_unselected; struct casewriter *output; bool ok; output = parse_write_command (lexer, ds, writer_type, PROC_CMD, &retain_unselected); if (output == NULL) return CMD_CASCADING_FAILURE; casereader_transfer (proc_open_filtering (ds, !retain_unselected), output); ok = casewriter_destroy (output); ok = proc_commit (ds) && ok; return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; }
int cmd_save_translate (struct lexer *lexer, struct dataset *ds) { enum { CSV_FILE = 1, TAB_FILE } type; struct dictionary *dict; struct case_map *map; struct casewriter *writer; struct file_handle *handle; struct csv_writer_options csv_opts; bool replace; bool retain_unselected; bool recode_user_missing; bool include_var_names; bool use_value_labels; bool use_print_formats; char decimal; char delimiter; char qualifier; bool ok; type = 0; dict = dict_clone (dataset_dict (ds)); map = NULL; handle = NULL; replace = false; retain_unselected = true; recode_user_missing = false; include_var_names = false; use_value_labels = false; use_print_formats = false; decimal = settings_get_decimal_char (FMT_F); delimiter = 0; qualifier = '"'; case_map_prepare_dict (dict); dict_delete_scratch_vars (dict); while (lex_match (lexer, T_SLASH)) { if (lex_match_id (lexer, "OUTFILE")) { if (handle != NULL) { lex_sbc_only_once ("OUTFILE"); goto error; } lex_match (lexer, T_EQUALS); handle = fh_parse (lexer, FH_REF_FILE, NULL); if (handle == NULL) goto error; } else if (lex_match_id (lexer, "TYPE")) { if (type != 0) { lex_sbc_only_once ("TYPE"); goto error; } lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "CSV")) type = CSV_FILE; else if (lex_match_id (lexer, "TAB")) type = TAB_FILE; else { lex_error_expecting (lexer, "CSV", "TAB", NULL_SENTINEL); goto error; } } else if (lex_match_id (lexer, "REPLACE")) replace = true; else if (lex_match_id (lexer, "FIELDNAMES")) include_var_names = true; else if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "IGNORE")) recode_user_missing = false; else if (lex_match_id (lexer, "RECODE")) recode_user_missing = true; else { lex_error_expecting (lexer, "IGNORE", "RECODE", NULL_SENTINEL); goto error; } } else if (lex_match_id (lexer, "CELLS")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "VALUES")) use_value_labels = false; else if (lex_match_id (lexer, "LABELS")) use_value_labels = true; else { lex_error_expecting (lexer, "VALUES", "LABELS", NULL_SENTINEL); goto error; } } else if (lex_match_id (lexer, "TEXTOPTIONS")) { lex_match (lexer, T_EQUALS); for (;;) { if (lex_match_id (lexer, "DELIMITER")) { lex_match (lexer, T_EQUALS); if (!lex_force_string (lexer)) goto error; /* XXX should support multibyte UTF-8 delimiters */ if (ss_length (lex_tokss (lexer)) != 1) { msg (SE, _("The %s string must contain exactly one " "character."), "DELIMITER"); goto error; } delimiter = ss_first (lex_tokss (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "QUALIFIER")) { lex_match (lexer, T_EQUALS); if (!lex_force_string (lexer)) goto error; /* XXX should support multibyte UTF-8 qualifiers */ if (ss_length (lex_tokss (lexer)) != 1) { msg (SE, _("The %s string must contain exactly one " "character."), "QUALIFIER"); goto error; } qualifier = ss_first (lex_tokss (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "DECIMAL")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "DOT")) decimal = '.'; else if (lex_match_id (lexer, "COMMA")) decimal = ','; else { lex_error_expecting (lexer, "DOT", "COMMA", NULL_SENTINEL); goto error; } } else if (lex_match_id (lexer, "FORMAT")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "PLAIN")) use_print_formats = false; else if (lex_match_id (lexer, "VARIABLE")) use_print_formats = true; else { lex_error_expecting (lexer, "PLAIN", "VARIABLE", NULL_SENTINEL); goto error; } } else break; } } else if (lex_match_id (lexer, "UNSELECTED")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "RETAIN")) retain_unselected = true; else if (lex_match_id (lexer, "DELETE")) retain_unselected = false; else { lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL); goto error; } } else if (!parse_dict_trim (lexer, dict)) goto error; } if (lex_end_of_command (lexer) != CMD_SUCCESS) goto error; if (type == 0) { lex_sbc_missing ("TYPE"); goto error; } else if (handle == NULL) { lex_sbc_missing ("OUTFILE"); goto error; } else if (!replace && fn_exists (fh_get_file_name (handle))) { msg (SE, _("Output file `%s' exists but REPLACE was not specified."), fh_get_file_name (handle)); goto error; } dict_delete_scratch_vars (dict); dict_compact_values (dict); csv_opts.recode_user_missing = recode_user_missing; csv_opts.include_var_names = include_var_names; csv_opts.use_value_labels = use_value_labels; csv_opts.use_print_formats = use_print_formats; csv_opts.decimal = decimal; csv_opts.delimiter = (delimiter ? delimiter : type == TAB_FILE ? '\t' : decimal == '.' ? ',' : ';'); csv_opts.qualifier = qualifier; writer = csv_writer_open (handle, dict, &csv_opts); if (writer == NULL) goto error; fh_unref (handle); map = case_map_from_dict (dict); if (map != NULL) writer = case_map_create_output_translator (map, writer); dict_destroy (dict); casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer); ok = casewriter_destroy (writer); ok = proc_commit (ds) && ok; return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: fh_unref (handle); dict_destroy (dict); case_map_destroy (map); return CMD_FAILURE; }
/* Writes an aggregated record to OUTPUT. */ static void dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case) { struct ccase *c = case_create (dict_get_proto (agr->dict)); if ( agr->add_variables) { case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict)); } else { int value_idx = 0; int i; for (i = 0; i < agr->break_var_cnt; i++) { const struct variable *v = agr->break_vars[i]; value_copy (case_data_rw_idx (c, value_idx), case_data (break_case, v), var_get_width (v)); value_idx++; } } { struct agr_var *i; for (i = agr->agr_vars; i; i = i->next) { union value *v = case_data_rw (c, i->dest); int width = var_get_width (i->dest); if (agr->missing == COLUMNWISE && i->saw_missing && (i->function & FUNC) != N && (i->function & FUNC) != NU && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) { value_set_missing (v, width); casewriter_destroy (i->writer); continue; } switch (i->function) { case SUM: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MEAN: v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case MEDIAN: { if ( i->writer) { struct percentile *median = percentile_create (0.5, i->cc); struct order_stats *os = &median->parent; struct casereader *sorted_reader = casewriter_make_reader (i->writer); i->writer = NULL; order_stats_accumulate (&os, 1, sorted_reader, i->weight, i->subject, i->exclude); i->dbl[0] = percentile_calculate (median, PC_HAVERAGE); statistic_destroy (&median->parent.parent); } v->f = i->dbl[0]; } break; case SD: { double variance; /* FIXME: we should use two passes. */ moments1_calculate (i->moments, NULL, NULL, &variance, NULL, NULL); if (variance != SYSMIS) v->f = sqrt (variance); else v->f = SYSMIS; } break; case MAX: case MIN: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MAX | FSTRING: case MIN | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case FGT: case FGT | FSTRING: case FLT: case FLT | FSTRING: case FIN: case FIN | FSTRING: case FOUT: case FOUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case PGT: case PGT | FSTRING: case PLT: case PLT | FSTRING: case PIN: case PIN | FSTRING: case POUT: case POUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; break; case N: case N | FSTRING: v->f = i->dbl[0]; break; case NU: case NU | FSTRING: v->f = i->int1; break; case FIRST: case LAST: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case FIRST | FSTRING: case LAST | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case NMISS: case NMISS | FSTRING: v->f = i->dbl[0]; break; case NUMISS: case NUMISS | FSTRING: v->f = i->int1; break; default: NOT_REACHED (); } } } casewriter_write (output, c); }
/* Parses and executes the AGGREGATE procedure. */ int cmd_aggregate (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); struct agr_proc agr; struct file_handle *out_file = NULL; struct casereader *input = NULL, *group; struct casegrouper *grouper; struct casewriter *output = NULL; bool copy_documents = false; bool presorted = false; bool saw_direction; bool ok; memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; agr.src_dict = dict; subcase_init_empty (&agr.sort); /* OUTFILE subcommand must be first. */ lex_match (lexer, T_SLASH); if (!lex_force_match_id (lexer, "OUTFILE")) goto error; lex_match (lexer, T_EQUALS); if (!lex_match (lexer, T_ASTERISK)) { out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (out_file == NULL) goto error; } if (out_file == NULL && lex_match_id (lexer, "MODE")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "ADDVARIABLES")) { agr.add_variables = true; /* presorted is assumed in ADDVARIABLES mode */ presorted = true; } else if (lex_match_id (lexer, "REPLACE")) { agr.add_variables = false; } else goto error; } if ( agr.add_variables ) agr.dict = dict_clone (dict); else agr.dict = dict_create (dict_get_encoding (dict)); dict_set_label (agr.dict, dict_get_label (dict)); dict_set_documents (agr.dict, dict_get_documents (dict)); /* Read most of the subcommands. */ for (;;) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); if (!lex_match_id (lexer, "COLUMNWISE")) { lex_error_expecting (lexer, "COLUMNWISE", NULL); goto error; } agr.missing = COLUMNWISE; } else if (lex_match_id (lexer, "DOCUMENT")) copy_documents = true; else if (lex_match_id (lexer, "PRESORTED")) presorted = true; else if (lex_force_match_id (lexer, "BREAK")) { int i; lex_match (lexer, T_EQUALS); if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars, &saw_direction)) goto error; agr.break_var_cnt = subcase_get_n_fields (&agr.sort); if (! agr.add_variables) for (i = 0; i < agr.break_var_cnt; i++) dict_clone_var_assert (agr.dict, agr.break_vars[i]); /* BREAK must follow the options. */ break; } else goto error; } if (presorted && saw_direction) msg (SW, _("When PRESORTED is specified, specifying sorting directions " "with (A) or (D) has no effect. Output data will be sorted " "the same way as the input data.")); /* Read in the aggregate functions. */ lex_match (lexer, T_SLASH); if (!parse_aggregate_functions (lexer, dict, &agr)) goto error; /* Delete documents. */ if (!copy_documents) dict_clear_documents (agr.dict); /* Cancel SPLIT FILE. */ dict_set_split_vars (agr.dict, NULL, 0); /* Initialize. */ agr.case_cnt = 0; if (out_file == NULL) { /* The active dataset will be replaced by the aggregated data, so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); proc_discard_output (ds); output = autopaging_writer_create (dict_get_proto (agr.dict)); } else { output = any_writer_open (out_file, agr.dict); if (output == NULL) goto error; } input = proc_open (ds); if (!subcase_is_empty (&agr.sort) && !presorted) { input = sort_execute (input, &agr.sort); subcase_clear (&agr.sort); } for (grouper = casegrouper_create_vars (input, agr.break_vars, agr.break_var_cnt); casegrouper_get_next_group (grouper, &group); casereader_destroy (group)) { struct casereader *placeholder = NULL; struct ccase *c = casereader_peek (group, 0); if (c == NULL) { casereader_destroy (group); continue; } initialize_aggregate_info (&agr); if ( agr.add_variables ) placeholder = casereader_clone (group); { struct ccase *cg; for (; (cg = casereader_read (group)) != NULL; case_unref (cg)) accumulate_aggregate_info (&agr, cg); } if (agr.add_variables) { struct ccase *cg; for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg)) dump_aggregate_info (&agr, output, cg); casereader_destroy (placeholder); } else { dump_aggregate_info (&agr, output, c); } case_unref (c); } if (!casegrouper_destroy (grouper)) goto error; if (!proc_commit (ds)) { input = NULL; goto error; } input = NULL; if (out_file == NULL) { struct casereader *next_input = casewriter_make_reader (output); if (next_input == NULL) goto error; dataset_set_dict (ds, agr.dict); dataset_set_source (ds, next_input); agr.dict = NULL; } else { ok = casewriter_destroy (output); output = NULL; if (!ok) goto error; } agr_destroy (&agr); fh_unref (out_file); return CMD_SUCCESS; error: if (input != NULL) proc_commit (ds); casewriter_destroy (output); agr_destroy (&agr); fh_unref (out_file); return CMD_CASCADING_FAILURE; }
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command. WRITER_TYPE identifies the type of file to write, and COMMAND_TYPE identifies the type of command. On success, returns a writer. For procedures only, sets *RETAIN_UNSELECTED to true if cases that would otherwise be excluded by FILTER or USE should be included. On failure, returns a null pointer. */ static struct casewriter * parse_write_command (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type, enum command_type command_type, bool *retain_unselected) { /* Common data. */ struct file_handle *handle; /* Output file. */ struct dictionary *dict; /* Dictionary for output file. */ struct casewriter *writer; /* Writer. */ struct case_map *map; /* Map from input data to data for writer. */ /* Common options. */ bool print_map; /* Print map? TODO. */ bool print_short_names; /* Print long-to-short name map. TODO. */ struct sfm_write_options sysfile_opts; struct pfm_write_options porfile_opts; assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER); assert (command_type == XFORM_CMD || command_type == PROC_CMD); assert ((retain_unselected != NULL) == (command_type == PROC_CMD)); if (command_type == PROC_CMD) *retain_unselected = true; handle = NULL; dict = dict_clone (dataset_dict (ds)); writer = NULL; map = NULL; print_map = false; print_short_names = false; sysfile_opts = sfm_writer_default_options (); porfile_opts = pfm_writer_default_options (); case_map_prepare_dict (dict); dict_delete_scratch_vars (dict); lex_match (lexer, T_SLASH); for (;;) { if (lex_match_id (lexer, "OUTFILE")) { if (handle != NULL) { lex_sbc_only_once ("OUTFILE"); goto error; } lex_match (lexer, T_EQUALS); handle = fh_parse (lexer, FH_REF_FILE, NULL); if (handle == NULL) goto error; } else if (lex_match_id (lexer, "NAMES")) print_short_names = true; else if (lex_match_id (lexer, "PERMISSIONS")) { bool cw; lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "READONLY")) cw = false; else if (lex_match_id (lexer, "WRITEABLE")) cw = true; else { lex_error_expecting (lexer, "READONLY", "WRITEABLE", NULL_SENTINEL); goto error; } sysfile_opts.create_writeable = porfile_opts.create_writeable = cw; } else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "RETAIN")) *retain_unselected = true; else if (lex_match_id (lexer, "DELETE")) *retain_unselected = false; else { lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL); goto error; } } else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED")) sysfile_opts.compress = true; else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED")) sysfile_opts.compress = false; else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION")) { lex_match (lexer, T_EQUALS); if (!lex_force_int (lexer)) goto error; sysfile_opts.version = lex_integer (lexer); lex_get (lexer); } else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "COMMUNICATIONS")) porfile_opts.type = PFM_COMM; else if (lex_match_id (lexer, "TAPE")) porfile_opts.type = PFM_TAPE; else { lex_error_expecting (lexer, "COMM", "TAPE", NULL_SENTINEL); goto error; } } else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS")) { lex_match (lexer, T_EQUALS); if (!lex_force_int (lexer)) goto error; porfile_opts.digits = lex_integer (lexer); lex_get (lexer); } else if (!parse_dict_trim (lexer, dict)) goto error; if (!lex_match (lexer, T_SLASH)) break; } if (lex_end_of_command (lexer) != CMD_SUCCESS) goto error; if (handle == NULL) { lex_sbc_missing ("OUTFILE"); goto error; } dict_delete_scratch_vars (dict); dict_compact_values (dict); if (fh_get_referent (handle) == FH_REF_FILE) { switch (writer_type) { case SYSFILE_WRITER: writer = sfm_open_writer (handle, dict, sysfile_opts); break; case PORFILE_WRITER: writer = pfm_open_writer (handle, dict, porfile_opts); break; } } else writer = any_writer_open (handle, dict); if (writer == NULL) goto error; map = case_map_from_dict (dict); if (map != NULL) writer = case_map_create_output_translator (map, writer); dict_destroy (dict); fh_unref (handle); return writer; error: fh_unref (handle); casewriter_destroy (writer); dict_destroy (dict); case_map_destroy (map); return NULL; }