/* Executes the UPDATE command. */ static void execute_update (struct comb_proc *proc) { union value *by; size_t n_duplicates = 0; while (case_matcher_match (proc->matcher, &by)) { struct comb_file *first, *file; struct ccase *output; /* Find first nonnull case in array and make an output case from it. */ output = create_output_case (proc); for (first = &proc->files[0]; ; first++) if (first->is_minimal) break; apply_case (first, output); advance_file (first, by); /* Read additional cases and update the output case from them. (Don't update the output case from any duplicate cases in the master file.) */ for (file = first + (first == proc->files); file < &proc->files[proc->n_files]; file++) { while (file->is_minimal) { apply_nonmissing_case (file, output); advance_file (file, by); } } casewriter_write (proc->output, output); /* Write duplicate cases in the master file directly to the output. */ if (first == proc->files && first->is_minimal) { n_duplicates++; while (first->is_minimal) { output = create_output_case (proc); apply_case (first, output); advance_file (first, by); casewriter_write (proc->output, output); } } } if (n_duplicates) msg (SW, _("Encountered %zu sets of duplicate cases in the master file."), n_duplicates); }
static void do_merge (struct merge *m) { struct casewriter *w; size_t i; assert (m->input_cnt > 1); w = tmpfile_writer_create (m->proto); for (i = 0; i < m->input_cnt; i++) taint_propagate (casereader_get_taint (m->inputs[i].reader), casewriter_get_taint (w)); for (i = 0; i < m->input_cnt; ) if (read_input_case (m, i)) i++; while (m->input_cnt > 0) { size_t min; min = 0; for (i = 1; i < m->input_cnt; i++) if (subcase_compare_3way (&m->ordering, m->inputs[i].c, &m->ordering, m->inputs[min].c) < 0) min = i; casewriter_write (w, m->inputs[min].c); read_input_case (m, min); } m->input_cnt = 1; m->inputs[0].reader = casewriter_make_reader (w); }
/* Writes case *C to the system file specified on XSAVE or XEXPORT. */ static int output_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) { struct output_trns *t = trns_; casewriter_write (t->writer, case_ref (*c)); return TRNS_CONTINUE; }
/* Writes a trailing buffered case to the output, if FIRST or LAST is in use. */ static void output_buffered_case (struct comb_proc *proc) { if (proc->prev_BY != NULL) { if (proc->last != NULL) case_data_rw (proc->buffered_case, proc->last)->f = 1.0; casewriter_write (proc->output, proc->buffered_case); proc->buffered_case = NULL; } }
/* Writes OUTPUT, whose BY values has been extracted into BY, to PROC's output file, first initializing any FIRST or LAST variables in OUTPUT to the correct values. */ static void output_case (struct comb_proc *proc, struct ccase *output, union value by[]) { if (proc->first == NULL && proc->last == NULL) casewriter_write (proc->output, output); else { /* It's harder with LAST, because we can't know whether this case is the last in a group until we've prepared the *next* case also. Thus, we buffer the previous output case until the next one is ready. */ bool new_BY; if (proc->prev_BY != NULL) { new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by); if (proc->last != NULL) case_data_rw (proc->buffered_case, proc->last)->f = new_BY; casewriter_write (proc->output, proc->buffered_case); } else new_BY = true; proc->buffered_case = output; if (proc->first != NULL) case_data_rw (proc->buffered_case, proc->first)->f = new_BY; if (new_BY) { size_t n_values = subcase_get_n_fields (&proc->by_vars); const struct caseproto *proto = subcase_get_proto (&proc->by_vars); if (proc->prev_BY == NULL) { proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY); caseproto_init_values (proto, proc->prev_BY); } caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values, proc->prev_BY, by); } } }
/* Set the clip according to the currently selected range in the data sheet */ static void data_sheet_set_clip (PsppireSheet *sheet) { int i; struct casewriter *writer ; PsppireSheetRange range; PsppireDataStore *ds; struct case_map *map = NULL; casenumber max_rows; size_t max_columns; gint row0, rowi; gint col0, coli; ds = PSPPIRE_DATA_STORE (psppire_sheet_get_model (sheet)); psppire_sheet_get_selected_range (sheet, &range); col0 = MIN (range.col0, range.coli); coli = MAX (range.col0, range.coli); row0 = MIN (range.row0, range.rowi); rowi = MAX (range.row0, range.rowi); /* If nothing selected, then use active cell */ if ( row0 < 0 || col0 < 0 ) { gint row, col; psppire_sheet_get_active_cell (sheet, &row, &col); row0 = rowi = row; col0 = coli = col; } /* The sheet range can include cells that do not include data. Exclude them from the range. */ max_rows = psppire_data_store_get_case_count (ds); if (rowi >= max_rows) { if (max_rows == 0) return; rowi = max_rows - 1; } max_columns = dict_get_var_cnt (ds->dict->dict); if (coli >= max_columns) { if (max_columns == 0) return; coli = max_columns - 1; } /* Destroy any existing clip */ if ( clip_datasheet ) { casereader_destroy (clip_datasheet); clip_datasheet = NULL; } if ( clip_dict ) { dict_destroy (clip_dict); clip_dict = NULL; } /* Construct clip dictionary. */ clip_dict = dict_create (dict_get_encoding (ds->dict->dict)); for (i = col0; i <= coli; i++) dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i)); /* Construct clip data. */ map = case_map_by_name (ds->dict->dict, clip_dict); writer = autopaging_writer_create (dict_get_proto (clip_dict)); for (i = row0; i <= rowi ; ++i ) { struct ccase *old = psppire_data_store_get_case (ds, i); if (old != NULL) casewriter_write (writer, case_map_execute (map, old)); else casewriter_force_error (writer); } case_map_destroy (map); clip_datasheet = casewriter_make_reader (writer); data_sheet_update_clipboard (sheet); }
/* Writes an aggregated record to OUTPUT. */ static void dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case) { struct ccase *c = case_create (dict_get_proto (agr->dict)); if ( agr->add_variables) { case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict)); } else { int value_idx = 0; int i; for (i = 0; i < agr->break_var_cnt; i++) { const struct variable *v = agr->break_vars[i]; value_copy (case_data_rw_idx (c, value_idx), case_data (break_case, v), var_get_width (v)); value_idx++; } } { struct agr_var *i; for (i = agr->agr_vars; i; i = i->next) { union value *v = case_data_rw (c, i->dest); int width = var_get_width (i->dest); if (agr->missing == COLUMNWISE && i->saw_missing && (i->function & FUNC) != N && (i->function & FUNC) != NU && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) { value_set_missing (v, width); casewriter_destroy (i->writer); continue; } switch (i->function) { case SUM: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MEAN: v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case MEDIAN: { if ( i->writer) { struct percentile *median = percentile_create (0.5, i->cc); struct order_stats *os = &median->parent; struct casereader *sorted_reader = casewriter_make_reader (i->writer); i->writer = NULL; order_stats_accumulate (&os, 1, sorted_reader, i->weight, i->subject, i->exclude); i->dbl[0] = percentile_calculate (median, PC_HAVERAGE); statistic_destroy (&median->parent.parent); } v->f = i->dbl[0]; } break; case SD: { double variance; /* FIXME: we should use two passes. */ moments1_calculate (i->moments, NULL, NULL, &variance, NULL, NULL); if (variance != SYSMIS) v->f = sqrt (variance); else v->f = SYSMIS; } break; case MAX: case MIN: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MAX | FSTRING: case MIN | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case FGT: case FGT | FSTRING: case FLT: case FLT | FSTRING: case FIN: case FIN | FSTRING: case FOUT: case FOUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case PGT: case PGT | FSTRING: case PLT: case PLT | FSTRING: case PIN: case PIN | FSTRING: case POUT: case POUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; break; case N: case N | FSTRING: v->f = i->dbl[0]; break; case NU: case NU | FSTRING: v->f = i->int1; break; case FIRST: case LAST: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case FIRST | FSTRING: case LAST | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case NMISS: case NMISS | FSTRING: v->f = i->dbl[0]; break; case NUMISS: case NUMISS | FSTRING: v->f = i->int1; break; default: NOT_REACHED (); } } } casewriter_write (output, c); }
/* Accumulates aggregation data from the case INPUT. */ static void accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) { struct agr_var *iter; double weight; bool bad_warn = true; weight = dict_get_case_weight (agr->src_dict, input, &bad_warn); for (iter = agr->agr_vars; iter; iter = iter->next) if (iter->src) { const union value *v = case_data (input, iter->src); int src_width = var_get_width (iter->src); if (var_is_value_missing (iter->src, v, iter->exclude)) { switch (iter->function) { case NMISS: case NMISS | FSTRING: iter->dbl[0] += weight; break; case NUMISS: case NUMISS | FSTRING: iter->int1++; break; } iter->saw_missing = true; continue; } /* This is horrible. There are too many possibilities. */ switch (iter->function) { case SUM: iter->dbl[0] += v->f * weight; iter->int1 = 1; break; case MEAN: iter->dbl[0] += v->f * weight; iter->dbl[1] += weight; break; case MEDIAN: { double wv ; struct ccase *cout; cout = case_create (casewriter_get_proto (iter->writer)); case_data_rw (cout, iter->subject)->f = case_data (input, iter->src)->f; wv = dict_get_case_weight (agr->src_dict, input, NULL); case_data_rw (cout, iter->weight)->f = wv; iter->cc += wv; casewriter_write (iter->writer, cout); } break; case SD: moments1_add (iter->moments, v->f, weight); break; case MAX: iter->dbl[0] = MAX (iter->dbl[0], v->f); iter->int1 = 1; break; case MAX | FSTRING: /* Need to do some kind of Unicode collation thingy here */ if (memcmp (iter->string, value_str (v, src_width), src_width) < 0) memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case MIN: iter->dbl[0] = MIN (iter->dbl[0], v->f); iter->int1 = 1; break; case MIN | FSTRING: if (memcmp (iter->string, value_str (v, src_width), src_width) > 0) memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case FGT: case PGT: if (v->f > iter->arg[0].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FGT | FSTRING: case PGT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FLT: case PLT: if (v->f < iter->arg[0].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FLT | FSTRING: case PLT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) > 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FIN: case PIN: if (iter->arg[0].f <= v->f && v->f <= iter->arg[1].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FIN | FSTRING: case PIN | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) <= 0 && memcmp (iter->arg[1].c, value_str (v, src_width), src_width) >= 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FOUT: case POUT: if (iter->arg[0].f > v->f || v->f > iter->arg[1].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FOUT | FSTRING: case POUT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) > 0 || memcmp (iter->arg[1].c, value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case N: case N | FSTRING: iter->dbl[0] += weight; break; case NU: case NU | FSTRING: iter->int1++; break; case FIRST: if (iter->int1 == 0) { iter->dbl[0] = v->f; iter->int1 = 1; } break; case FIRST | FSTRING: if (iter->int1 == 0) { memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; } break; case LAST: iter->dbl[0] = v->f; iter->int1 = 1; break; case LAST | FSTRING: memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case NMISS: case NMISS | FSTRING: case NUMISS: case NUMISS | FSTRING: /* Our value is not missing or it would have been caught earlier. Nothing to do. */ break; default: NOT_REACHED (); } } else { switch (iter->function) { case N: iter->dbl[0] += weight; break; case NU: iter->int1++; break; default: NOT_REACHED (); } } }