/* Call this function for every case in the data set. After all cases have been passed, call covariance_calculate */ void covariance_accumulate (struct covariance *cov, const struct ccase *c) { size_t i, j, m; const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0; assert (cov->passes == 1); if ( !cov->pass_one_first_case_seen) { assert ( cov->state == 0); cov->state = 1; } for (i = 0 ; i < cov->dim; ++i) { const union value *val1 = case_data (c, cov->vars[i]); if ( is_missing (cov, i, c)) continue; for (j = 0 ; j < cov->dim; ++j) { double pwr = 1.0; int idx; const union value *val2 = case_data (c, cov->vars[j]); if ( is_missing (cov, j, c)) continue; idx = cm_idx (cov, i, j); if (idx >= 0) { cov->cm [idx] += val1->f * val2->f * weight; } for (m = 0 ; m < n_MOMENTS; ++m) { double *x = gsl_matrix_ptr (cov->moments[m], i, j); *x += pwr * weight; pwr *= val1->f; } } } cov->pass_one_first_case_seen = true; }
/* Returns true iff the variable corresponding to the Ith element of the covariance matrix has a missing value for case C */ static bool is_missing (const struct covariance *cov, int i, const struct ccase *c) { const struct variable *var = i < cov->n_vars ? cov->vars[i] : categoricals_get_interaction_by_subscript (cov->categoricals, i - cov->n_vars)->vars[0]; const union value *val = case_data (c, var); return var_is_value_missing (var, val, cov->exclude); }
/* Dumps out the values of all the split variables for the case C. */ void output_split_file_values (const struct dataset *ds, const struct ccase *c) { const struct dictionary *dict = dataset_dict (ds); const struct variable *const *split; struct tab_table *t; size_t split_cnt; int i; split_cnt = dict_get_split_cnt (dict); if (split_cnt == 0) return; t = tab_create (3, split_cnt + 1); tab_vline (t, TAL_GAP, 1, 0, split_cnt); tab_vline (t, TAL_GAP, 2, 0, split_cnt); tab_text (t, 0, 0, TAB_NONE, _("Variable")); tab_text (t, 1, 0, TAB_LEFT, _("Value")); tab_text (t, 2, 0, TAB_LEFT, _("Label")); split = dict_get_split_vars (dict); for (i = 0; i < split_cnt; i++) { const struct variable *v = split[i]; char *s; const char *val_lab; const struct fmt_spec *print = var_get_print_format (v); tab_text_format (t, 0, i + 1, TAB_LEFT, "%s", var_get_name (v)); s = data_out (case_data (c, v), dict_get_encoding (dict), print); tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, s); free (s); val_lab = var_lookup_value_label (v, case_data (c, v)); if (val_lab) tab_text (t, 2, i + 1, TAB_LEFT, val_lab); } tab_submit (t); }
/* Perform data_out for case CC, variable V, appending to STRING */ static void data_out_g_string (GString *string, const struct variable *v, const struct ccase *cc) { const struct fmt_spec *fs = var_get_print_format (v); const union value *val = case_data (cc, v); char *s = data_out (val, var_get_encoding (v), fs); g_string_append (string, s); g_free (s); }
static double get_val (const struct covariance *cov, int i, const struct ccase *c) { if ( i < cov->n_vars) { const struct variable *var = cov->vars[i]; const union value *val = case_data (c, var); return val->f; } return categoricals_get_effects_code_for_case (cov->categoricals, i - cov->n_vars, c); }
/* Return the sum of all the item variables in S */ static double append_sum (const struct ccase *c, casenumber n UNUSED, void *aux) { double sum = 0; const struct cronbach *s = aux; int v; for (v = 0 ; v < s->n_items; ++v) { sum += case_data (c, s->items[v])->f; } return sum; };
/* Update IA according to the contents of DICT and CREADER. CREADER will be destroyed by this function. */ void update_assistant (struct import_assistant *ia) { struct sheet_spec_page *ssp = ia->sheet_spec; int rows = 0; if (ssp->dict) { struct ccase *c; int col; ia->column_cnt = dict_get_var_cnt (ssp->dict); ia->columns = xcalloc (ia->column_cnt, sizeof (*ia->columns)); for (col = 0; col < ia->column_cnt ; ++col) { const struct variable *var = dict_get_var (ssp->dict, col); ia->columns[col].name = xstrdup (var_get_name (var)); ia->columns[col].contents = NULL; } for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c)) { rows++; for (col = 0; col < ia->column_cnt ; ++col) { char *ss; const struct variable *var = dict_get_var (ssp->dict, col); ia->columns[col].contents = xrealloc (ia->columns[col].contents, sizeof (struct substring) * rows); ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict), var_get_print_format (var)); ia->columns[col].contents[rows - 1] = ss_cstr (ss); } if (rows > MAX_PREVIEW_LINES) { case_unref (c); break; } } } ia->file.line_cnt = rows; }
/* Call this function for every case in the data set */ void covariance_accumulate_pass1 (struct covariance *cov, const struct ccase *c) { size_t i, j, m; const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0; assert (cov->passes == 2); if (!cov->pass_one_first_case_seen) { assert (cov->state == 0); cov->state = 1; } if (cov->categoricals) categoricals_update (cov->categoricals, c); for (i = 0 ; i < cov->dim; ++i) { double v1 = get_val (cov, i, c); if ( is_missing (cov, i, c)) continue; for (j = 0 ; j < cov->dim; ++j) { double pwr = 1.0; if ( is_missing (cov, j, c)) continue; for (m = 0 ; m <= MOMENT_MEAN; ++m) { double *x = gsl_matrix_ptr (cov->moments[m], i, j); *x += pwr * weight; pwr *= v1; } } } cov->pass_one_first_case_seen = true; }
/* Call this function for every case in the data set */ void covariance_accumulate_pass2 (struct covariance *cov, const struct ccase *c) { size_t i, j; const double weight = cov->wv ? case_data (c, cov->wv)->f : 1.0; assert (cov->passes == 2); assert (cov->state >= 1); if (! cov->pass_two_first_case_seen) { size_t m; assert (cov->state == 1); cov->state = 2; if (cov->categoricals) categoricals_done (cov->categoricals); cov->dim = cov->n_vars; if (cov->categoricals) cov->dim += categoricals_df_total (cov->categoricals); cov->n_cm = (cov->dim * (cov->dim - 1) ) / 2; cov->cm = xcalloc (cov->n_cm, sizeof *cov->cm); /* Grow the moment matrices so that they're large enough to accommodate the categorical elements */ for (i = 0; i < n_MOMENTS; ++i) { cov->moments[i] = resize_matrix (cov->moments[i], cov->dim); } /* Populate the moments matrices with the categorical value elements */ for (i = cov->n_vars; i < cov->dim; ++i) { for (j = 0 ; j < cov->dim ; ++j) /* FIXME: This is WRONG !!! */ { double w = categoricals_get_weight_by_subscript (cov->categoricals, i - cov->n_vars); gsl_matrix_set (cov->moments[MOMENT_NONE], i, j, w); w = categoricals_get_sum_by_subscript (cov->categoricals, i - cov->n_vars); gsl_matrix_set (cov->moments[MOMENT_MEAN], i, j, w); } } /* FIXME: This is WRONG!! It must be fixed to properly handle missing values. For now it assumes there are none */ for (m = 0 ; m < n_MOMENTS; ++m) { for (i = 0 ; i < cov->dim ; ++i) { double x = gsl_matrix_get (cov->moments[m], i, cov->n_vars -1); for (j = cov->n_vars; j < cov->dim; ++j) { gsl_matrix_set (cov->moments[m], i, j, x); } } } /* Divide the means by the number of samples */ for (i = 0; i < cov->dim; ++i) { for (j = 0; j < cov->dim; ++j) { double *x = gsl_matrix_ptr (cov->moments[MOMENT_MEAN], i, j); *x /= gsl_matrix_get (cov->moments[MOMENT_NONE], i, j); } } } for (i = 0 ; i < cov->dim; ++i) { double v1 = get_val (cov, i, c); if ( is_missing (cov, i, c)) continue; for (j = 0 ; j < cov->dim; ++j) { int idx; double ss ; double v2 = get_val (cov, j, c); const double s = pow2 (v1 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)) * weight; if ( is_missing (cov, j, c)) continue; { double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j); *x += s; } ss = (v1 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)) * (v2 - gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)) * weight ; idx = cm_idx (cov, i, j); if (idx >= 0) { cov->cm [idx] += ss; } } } cov->pass_two_first_case_seen = true; }
/* Writes an aggregated record to OUTPUT. */ static void dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case) { struct ccase *c = case_create (dict_get_proto (agr->dict)); if ( agr->add_variables) { case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict)); } else { int value_idx = 0; int i; for (i = 0; i < agr->break_var_cnt; i++) { const struct variable *v = agr->break_vars[i]; value_copy (case_data_rw_idx (c, value_idx), case_data (break_case, v), var_get_width (v)); value_idx++; } } { struct agr_var *i; for (i = agr->agr_vars; i; i = i->next) { union value *v = case_data_rw (c, i->dest); int width = var_get_width (i->dest); if (agr->missing == COLUMNWISE && i->saw_missing && (i->function & FUNC) != N && (i->function & FUNC) != NU && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) { value_set_missing (v, width); casewriter_destroy (i->writer); continue; } switch (i->function) { case SUM: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MEAN: v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case MEDIAN: { if ( i->writer) { struct percentile *median = percentile_create (0.5, i->cc); struct order_stats *os = &median->parent; struct casereader *sorted_reader = casewriter_make_reader (i->writer); i->writer = NULL; order_stats_accumulate (&os, 1, sorted_reader, i->weight, i->subject, i->exclude); i->dbl[0] = percentile_calculate (median, PC_HAVERAGE); statistic_destroy (&median->parent.parent); } v->f = i->dbl[0]; } break; case SD: { double variance; /* FIXME: we should use two passes. */ moments1_calculate (i->moments, NULL, NULL, &variance, NULL, NULL); if (variance != SYSMIS) v->f = sqrt (variance); else v->f = SYSMIS; } break; case MAX: case MIN: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MAX | FSTRING: case MIN | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case FGT: case FGT | FSTRING: case FLT: case FLT | FSTRING: case FIN: case FIN | FSTRING: case FOUT: case FOUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case PGT: case PGT | FSTRING: case PLT: case PLT | FSTRING: case PIN: case PIN | FSTRING: case POUT: case POUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; break; case N: case N | FSTRING: v->f = i->dbl[0]; break; case NU: case NU | FSTRING: v->f = i->int1; break; case FIRST: case LAST: v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case FIRST | FSTRING: case LAST | FSTRING: if (i->int1) memcpy (value_str_rw (v, width), i->string, width); else value_set_missing (v, width); break; case NMISS: case NMISS | FSTRING: v->f = i->dbl[0]; break; case NUMISS: case NUMISS | FSTRING: v->f = i->int1; break; default: NOT_REACHED (); } } } casewriter_write (output, c); }
/* Accumulates aggregation data from the case INPUT. */ static void accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) { struct agr_var *iter; double weight; bool bad_warn = true; weight = dict_get_case_weight (agr->src_dict, input, &bad_warn); for (iter = agr->agr_vars; iter; iter = iter->next) if (iter->src) { const union value *v = case_data (input, iter->src); int src_width = var_get_width (iter->src); if (var_is_value_missing (iter->src, v, iter->exclude)) { switch (iter->function) { case NMISS: case NMISS | FSTRING: iter->dbl[0] += weight; break; case NUMISS: case NUMISS | FSTRING: iter->int1++; break; } iter->saw_missing = true; continue; } /* This is horrible. There are too many possibilities. */ switch (iter->function) { case SUM: iter->dbl[0] += v->f * weight; iter->int1 = 1; break; case MEAN: iter->dbl[0] += v->f * weight; iter->dbl[1] += weight; break; case MEDIAN: { double wv ; struct ccase *cout; cout = case_create (casewriter_get_proto (iter->writer)); case_data_rw (cout, iter->subject)->f = case_data (input, iter->src)->f; wv = dict_get_case_weight (agr->src_dict, input, NULL); case_data_rw (cout, iter->weight)->f = wv; iter->cc += wv; casewriter_write (iter->writer, cout); } break; case SD: moments1_add (iter->moments, v->f, weight); break; case MAX: iter->dbl[0] = MAX (iter->dbl[0], v->f); iter->int1 = 1; break; case MAX | FSTRING: /* Need to do some kind of Unicode collation thingy here */ if (memcmp (iter->string, value_str (v, src_width), src_width) < 0) memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case MIN: iter->dbl[0] = MIN (iter->dbl[0], v->f); iter->int1 = 1; break; case MIN | FSTRING: if (memcmp (iter->string, value_str (v, src_width), src_width) > 0) memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case FGT: case PGT: if (v->f > iter->arg[0].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FGT | FSTRING: case PGT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FLT: case PLT: if (v->f < iter->arg[0].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FLT | FSTRING: case PLT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) > 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FIN: case PIN: if (iter->arg[0].f <= v->f && v->f <= iter->arg[1].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FIN | FSTRING: case PIN | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) <= 0 && memcmp (iter->arg[1].c, value_str (v, src_width), src_width) >= 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FOUT: case POUT: if (iter->arg[0].f > v->f || v->f > iter->arg[1].f) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case FOUT | FSTRING: case POUT | FSTRING: if (memcmp (iter->arg[0].c, value_str (v, src_width), src_width) > 0 || memcmp (iter->arg[1].c, value_str (v, src_width), src_width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case N: case N | FSTRING: iter->dbl[0] += weight; break; case NU: case NU | FSTRING: iter->int1++; break; case FIRST: if (iter->int1 == 0) { iter->dbl[0] = v->f; iter->int1 = 1; } break; case FIRST | FSTRING: if (iter->int1 == 0) { memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; } break; case LAST: iter->dbl[0] = v->f; iter->int1 = 1; break; case LAST | FSTRING: memcpy (iter->string, value_str (v, src_width), src_width); iter->int1 = 1; break; case NMISS: case NMISS | FSTRING: case NUMISS: case NUMISS | FSTRING: /* Our value is not missing or it would have been caught earlier. Nothing to do. */ break; default: NOT_REACHED (); } } else { switch (iter->function) { case N: iter->dbl[0] += weight; break; case NU: iter->int1++; break; default: NOT_REACHED (); } } }
static void do_reliability (struct casereader *input, struct dataset *ds, const struct reliability *rel) { int i; int si; struct ccase *c; casenumber n_missing ; casenumber n_valid = 0; for (si = 0 ; si < rel->n_sc; ++si) { struct cronbach *s = &rel->sc[si]; s->m = xzalloc (sizeof (s->m) * s->n_items); s->total = moments1_create (MOMENT_VARIANCE); for (i = 0 ; i < s->n_items ; ++i ) s->m[i] = moments1_create (MOMENT_VARIANCE); } input = casereader_create_filter_missing (input, rel->variables, rel->n_variables, rel->exclude, &n_missing, NULL); for (si = 0 ; si < rel->n_sc; ++si) { struct cronbach *s = &rel->sc[si]; s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input)); input = casereader_create_append_numeric (input, append_sum, s, NULL); } for (; (c = casereader_read (input)) != NULL; case_unref (c)) { double weight = 1.0; n_valid ++; for (si = 0; si < rel->n_sc; ++si) { struct cronbach *s = &rel->sc[si]; for (i = 0 ; i < s->n_items ; ++i ) moments1_add (s->m[i], case_data (c, s->items[i])->f, weight); moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight); } } casereader_destroy (input); for (si = 0; si < rel->n_sc; ++si) { struct cronbach *s = &rel->sc[si]; s->sum_of_variances = 0; for (i = 0 ; i < s->n_items ; ++i ) { double weight, mean, variance; moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL); s->sum_of_variances += variance; } moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums, NULL, NULL); s->alpha = alpha (s->n_items, s->sum_of_variances, s->variance_of_sums); } text_item_submit (text_item_create_format (TEXT_ITEM_PARAGRAPH, _("Scale: %s"), ds_cstr (&rel->scale_name))); case_processing_summary (n_valid, n_missing, dataset_dict (ds)); }
void sign_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, const struct npar_test *test, bool exact UNUSED, double timer UNUSED) { int i; bool warn = true; const struct dictionary *dict = dataset_dict (ds); const struct two_sample_test *t2s = UP_CAST (test, const struct two_sample_test, parent); struct ccase *c; struct sign_test_params *stp = xcalloc (t2s->n_pairs, sizeof *stp); struct casereader *r = input; for (; (c = casereader_read (r)) != NULL; case_unref (c)) { const double weight = dict_get_case_weight (dict, c, &warn); for (i = 0 ; i < t2s->n_pairs; ++i ) { variable_pair *vp = &t2s->pairs[i]; const union value *value0 = case_data (c, (*vp)[0]); const union value *value1 = case_data (c, (*vp)[1]); const double diff = value0->f - value1->f; if (var_is_value_missing ((*vp)[0], value0, exclude)) continue; if (var_is_value_missing ((*vp)[1], value1, exclude)) continue; if ( diff > 0) stp[i].pos += weight; else if (diff < 0) stp[i].neg += weight; else stp[i].ties += weight; } } casereader_destroy (r); for (i = 0 ; i < t2s->n_pairs; ++i ) { int r = MIN (stp[i].pos, stp[i].neg); stp[i].one_tailed_sig = gsl_cdf_binomial_P (r, 0.5, stp[i].pos + stp[i].neg); stp[i].point_prob = gsl_ran_binomial_pdf (r, 0.5, stp[i].pos + stp[i].neg); } output_frequency_table (t2s, stp, dict); output_statistics_table (t2s, stp); free (stp); }