/* Merge the dictionary for file F into master dictionary M. */ static bool merge_dictionary (struct dictionary *const m, struct comb_file *f) { struct dictionary *d = f->dict; const struct string_array *d_docs, *m_docs; int i; if (dict_get_label (m) == NULL) dict_set_label (m, dict_get_label (d)); d_docs = dict_get_documents (d); m_docs = dict_get_documents (m); /* FIXME: If the input files have different encodings, then the result is undefined. The correct thing to do would be to convert to an encoding which can cope with all the input files (eg UTF-8). */ if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m))) msg (MW, _("Combining files with incompatible encodings. String data may " "not be represented correctly.")); if (d_docs != NULL) { if (m_docs == NULL) dict_set_documents (m, d_docs); else { struct string_array new_docs; size_t i; new_docs.n = m_docs->n + d_docs->n; new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings); for (i = 0; i < m_docs->n; i++) new_docs.strings[i] = m_docs->strings[i]; for (i = 0; i < d_docs->n; i++) new_docs.strings[m_docs->n + i] = d_docs->strings[i]; dict_set_documents (m, &new_docs); free (new_docs.strings); } } for (i = 0; i < dict_get_var_cnt (d); i++) { struct variable *dv = dict_get_var (d, i); struct variable *mv = dict_lookup_var (m, var_get_name (dv)); if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH) continue; if (mv != NULL) { if (var_get_width (mv) != var_get_width (dv)) { const char *var_name = var_get_name (dv); struct string s = DS_EMPTY_INITIALIZER; const char *file_name; file_name = f->handle ? fh_get_name (f->handle) : "*"; ds_put_format (&s, _("Variable %s in file %s has different " "type or width from the same variable in " "earlier file."), var_name, file_name); ds_put_cstr (&s, " "); if (var_is_numeric (dv)) ds_put_format (&s, _("In file %s, %s is numeric."), file_name, var_name); else ds_put_format (&s, _("In file %s, %s is a string variable " "with width %d."), file_name, var_name, var_get_width (dv)); ds_put_cstr (&s, " "); if (var_is_numeric (mv)) ds_put_format (&s, _("In an earlier file, %s was numeric."), var_name); else ds_put_format (&s, _("In an earlier file, %s was a string " "variable with width %d."), var_name, var_get_width (mv)); msg (SE, "%s", ds_cstr (&s)); ds_destroy (&s); return false; } if (var_has_value_labels (dv) && !var_has_value_labels (mv)) var_set_value_labels (mv, var_get_value_labels (dv)); if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) var_set_label (mv, var_get_label (dv)); } else mv = dict_clone_var_assert (m, dv); } return true; }
/* Set the clip according to the currently selected range in the data sheet */ static void data_sheet_set_clip (PsppireSheet *sheet) { int i; struct casewriter *writer ; PsppireSheetRange range; PsppireDataStore *ds; struct case_map *map = NULL; casenumber max_rows; size_t max_columns; gint row0, rowi; gint col0, coli; ds = PSPPIRE_DATA_STORE (psppire_sheet_get_model (sheet)); psppire_sheet_get_selected_range (sheet, &range); col0 = MIN (range.col0, range.coli); coli = MAX (range.col0, range.coli); row0 = MIN (range.row0, range.rowi); rowi = MAX (range.row0, range.rowi); /* If nothing selected, then use active cell */ if ( row0 < 0 || col0 < 0 ) { gint row, col; psppire_sheet_get_active_cell (sheet, &row, &col); row0 = rowi = row; col0 = coli = col; } /* The sheet range can include cells that do not include data. Exclude them from the range. */ max_rows = psppire_data_store_get_case_count (ds); if (rowi >= max_rows) { if (max_rows == 0) return; rowi = max_rows - 1; } max_columns = dict_get_var_cnt (ds->dict->dict); if (coli >= max_columns) { if (max_columns == 0) return; coli = max_columns - 1; } /* Destroy any existing clip */ if ( clip_datasheet ) { casereader_destroy (clip_datasheet); clip_datasheet = NULL; } if ( clip_dict ) { dict_destroy (clip_dict); clip_dict = NULL; } /* Construct clip dictionary. */ clip_dict = dict_create (dict_get_encoding (ds->dict->dict)); for (i = col0; i <= coli; i++) dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i)); /* Construct clip data. */ map = case_map_by_name (ds->dict->dict, clip_dict); writer = autopaging_writer_create (dict_get_proto (clip_dict)); for (i = row0; i <= rowi ; ++i ) { struct ccase *old = psppire_data_store_get_case (ds, i); if (old != NULL) casewriter_write (writer, case_map_execute (map, old)); else casewriter_force_error (writer); } case_map_destroy (map); clip_datasheet = casewriter_make_reader (writer); data_sheet_update_clipboard (sheet); }
/* Parses and executes the AGGREGATE procedure. */ int cmd_aggregate (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); struct agr_proc agr; struct file_handle *out_file = NULL; struct casereader *input = NULL, *group; struct casegrouper *grouper; struct casewriter *output = NULL; bool copy_documents = false; bool presorted = false; bool saw_direction; bool ok; memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; agr.src_dict = dict; subcase_init_empty (&agr.sort); /* OUTFILE subcommand must be first. */ lex_match (lexer, T_SLASH); if (!lex_force_match_id (lexer, "OUTFILE")) goto error; lex_match (lexer, T_EQUALS); if (!lex_match (lexer, T_ASTERISK)) { out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (out_file == NULL) goto error; } if (out_file == NULL && lex_match_id (lexer, "MODE")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "ADDVARIABLES")) { agr.add_variables = true; /* presorted is assumed in ADDVARIABLES mode */ presorted = true; } else if (lex_match_id (lexer, "REPLACE")) { agr.add_variables = false; } else goto error; } if ( agr.add_variables ) agr.dict = dict_clone (dict); else agr.dict = dict_create (dict_get_encoding (dict)); dict_set_label (agr.dict, dict_get_label (dict)); dict_set_documents (agr.dict, dict_get_documents (dict)); /* Read most of the subcommands. */ for (;;) { lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); if (!lex_match_id (lexer, "COLUMNWISE")) { lex_error_expecting (lexer, "COLUMNWISE", NULL); goto error; } agr.missing = COLUMNWISE; } else if (lex_match_id (lexer, "DOCUMENT")) copy_documents = true; else if (lex_match_id (lexer, "PRESORTED")) presorted = true; else if (lex_force_match_id (lexer, "BREAK")) { int i; lex_match (lexer, T_EQUALS); if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars, &saw_direction)) goto error; agr.break_var_cnt = subcase_get_n_fields (&agr.sort); if (! agr.add_variables) for (i = 0; i < agr.break_var_cnt; i++) dict_clone_var_assert (agr.dict, agr.break_vars[i]); /* BREAK must follow the options. */ break; } else goto error; } if (presorted && saw_direction) msg (SW, _("When PRESORTED is specified, specifying sorting directions " "with (A) or (D) has no effect. Output data will be sorted " "the same way as the input data.")); /* Read in the aggregate functions. */ lex_match (lexer, T_SLASH); if (!parse_aggregate_functions (lexer, dict, &agr)) goto error; /* Delete documents. */ if (!copy_documents) dict_clear_documents (agr.dict); /* Cancel SPLIT FILE. */ dict_set_split_vars (agr.dict, NULL, 0); /* Initialize. */ agr.case_cnt = 0; if (out_file == NULL) { /* The active dataset will be replaced by the aggregated data, so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); proc_discard_output (ds); output = autopaging_writer_create (dict_get_proto (agr.dict)); } else { output = any_writer_open (out_file, agr.dict); if (output == NULL) goto error; } input = proc_open (ds); if (!subcase_is_empty (&agr.sort) && !presorted) { input = sort_execute (input, &agr.sort); subcase_clear (&agr.sort); } for (grouper = casegrouper_create_vars (input, agr.break_vars, agr.break_var_cnt); casegrouper_get_next_group (grouper, &group); casereader_destroy (group)) { struct casereader *placeholder = NULL; struct ccase *c = casereader_peek (group, 0); if (c == NULL) { casereader_destroy (group); continue; } initialize_aggregate_info (&agr); if ( agr.add_variables ) placeholder = casereader_clone (group); { struct ccase *cg; for (; (cg = casereader_read (group)) != NULL; case_unref (cg)) accumulate_aggregate_info (&agr, cg); } if (agr.add_variables) { struct ccase *cg; for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg)) dump_aggregate_info (&agr, output, cg); casereader_destroy (placeholder); } else { dump_aggregate_info (&agr, output, c); } case_unref (c); } if (!casegrouper_destroy (grouper)) goto error; if (!proc_commit (ds)) { input = NULL; goto error; } input = NULL; if (out_file == NULL) { struct casereader *next_input = casewriter_make_reader (output); if (next_input == NULL) goto error; dataset_set_dict (ds, agr.dict); dataset_set_source (ds, next_input); agr.dict = NULL; } else { ok = casewriter_destroy (output); output = NULL; if (!ok) goto error; } agr_destroy (&agr); fh_unref (out_file); return CMD_SUCCESS; error: if (input != NULL) proc_commit (ds); casewriter_destroy (output); agr_destroy (&agr); fh_unref (out_file); return CMD_CASCADING_FAILURE; }