Exemple #1
0
/* Executes the UPDATE command. */
static void
execute_update (struct comb_proc *proc)
{
  union value *by;
  size_t n_duplicates = 0;

  while (case_matcher_match (proc->matcher, &by))
    {
      struct comb_file *first, *file;
      struct ccase *output;

      /* Find first nonnull case in array and make an output case
         from it. */
      output = create_output_case (proc);
      for (first = &proc->files[0]; ; first++)
        if (first->is_minimal)
          break;
      apply_case (first, output);
      advance_file (first, by);

      /* Read additional cases and update the output case from
         them.  (Don't update the output case from any duplicate
         cases in the master file.) */
      for (file = first + (first == proc->files);
           file < &proc->files[proc->n_files]; file++)
        {
          while (file->is_minimal)
            {
              apply_nonmissing_case (file, output);
              advance_file (file, by);
            }
        }
      casewriter_write (proc->output, output);

      /* Write duplicate cases in the master file directly to the
         output.  */
      if (first == proc->files && first->is_minimal)
        {
          n_duplicates++;
          while (first->is_minimal)
            {
              output = create_output_case (proc);
              apply_case (first, output);
              advance_file (first, by);
              casewriter_write (proc->output, output);
            }
        }
    }

  if (n_duplicates)
    msg (SW, _("Encountered %zu sets of duplicate cases in the master file."),
         n_duplicates);
}
Exemple #2
0
static void
do_merge (struct merge *m)
{
  struct casewriter *w;
  size_t i;

  assert (m->input_cnt > 1);

  w = tmpfile_writer_create (m->proto);
  for (i = 0; i < m->input_cnt; i++)
    taint_propagate (casereader_get_taint (m->inputs[i].reader),
                     casewriter_get_taint (w));

  for (i = 0; i < m->input_cnt; )
    if (read_input_case (m, i))
      i++;
  while (m->input_cnt > 0)
    {
      size_t min;

      min = 0;
      for (i = 1; i < m->input_cnt; i++)
        if (subcase_compare_3way (&m->ordering, m->inputs[i].c,
                                  &m->ordering, m->inputs[min].c) < 0)
          min = i;

      casewriter_write (w, m->inputs[min].c);
      read_input_case (m, min);
    }

  m->input_cnt = 1;
  m->inputs[0].reader = casewriter_make_reader (w);
}
Exemple #3
0
/* Writes case *C to the system file specified on XSAVE or XEXPORT. */
static int
output_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
{
  struct output_trns *t = trns_;
  casewriter_write (t->writer, case_ref (*c));
  return TRNS_CONTINUE;
}
Exemple #4
0
/* Writes a trailing buffered case to the output, if FIRST or
   LAST is in use. */
static void
output_buffered_case (struct comb_proc *proc)
{
  if (proc->prev_BY != NULL)
    {
      if (proc->last != NULL)
        case_data_rw (proc->buffered_case, proc->last)->f = 1.0;
      casewriter_write (proc->output, proc->buffered_case);
      proc->buffered_case = NULL;
    }
}
Exemple #5
0
/* Writes OUTPUT, whose BY values has been extracted into BY, to
   PROC's output file, first initializing any FIRST or LAST
   variables in OUTPUT to the correct values. */
static void
output_case (struct comb_proc *proc, struct ccase *output, union value by[])
{
  if (proc->first == NULL && proc->last == NULL)
    casewriter_write (proc->output, output);
  else
    {
      /* It's harder with LAST, because we can't know whether
         this case is the last in a group until we've prepared
         the *next* case also.  Thus, we buffer the previous
         output case until the next one is ready. */
      bool new_BY;
      if (proc->prev_BY != NULL)
        {
          new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by);
          if (proc->last != NULL)
            case_data_rw (proc->buffered_case, proc->last)->f = new_BY;
          casewriter_write (proc->output, proc->buffered_case);
        }
      else
        new_BY = true;

      proc->buffered_case = output;
      if (proc->first != NULL)
        case_data_rw (proc->buffered_case, proc->first)->f = new_BY;

      if (new_BY)
        {
          size_t n_values = subcase_get_n_fields (&proc->by_vars);
          const struct caseproto *proto = subcase_get_proto (&proc->by_vars);
          if (proc->prev_BY == NULL)
            {
              proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY);
              caseproto_init_values (proto, proc->prev_BY);
            }
          caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values,
                          proc->prev_BY, by);
        }
    }
}
/* Set the clip according to the currently
   selected range in the data sheet */
static void
data_sheet_set_clip (PsppireSheet *sheet)
{
  int i;
  struct casewriter *writer ;
  PsppireSheetRange range;
  PsppireDataStore *ds;
  struct case_map *map = NULL;
  casenumber max_rows;
  size_t max_columns;
  gint row0, rowi;
  gint col0, coli;

  ds = PSPPIRE_DATA_STORE (psppire_sheet_get_model (sheet));

  psppire_sheet_get_selected_range (sheet, &range);

  col0 = MIN (range.col0, range.coli);
  coli = MAX (range.col0, range.coli);
  row0 = MIN (range.row0, range.rowi);
  rowi = MAX (range.row0, range.rowi);

   /* If nothing selected, then use active cell */
  if ( row0 < 0 || col0 < 0 )
    {
      gint row, col;
      psppire_sheet_get_active_cell (sheet, &row, &col);

      row0 = rowi = row;
      col0 = coli = col;
    }

  /* The sheet range can include cells that do not include data.
     Exclude them from the range. */
  max_rows = psppire_data_store_get_case_count (ds);
  if (rowi >= max_rows)
    {
      if (max_rows == 0)
        return;
      rowi = max_rows - 1;
    }
  max_columns = dict_get_var_cnt (ds->dict->dict);
  if (coli >= max_columns)
    {
      if (max_columns == 0)
        return;
      coli = max_columns - 1;
    }

  /* Destroy any existing clip */
  if ( clip_datasheet )
    {
      casereader_destroy (clip_datasheet);
      clip_datasheet = NULL;
    }

  if ( clip_dict )
    {
      dict_destroy (clip_dict);
      clip_dict = NULL;
    }

  /* Construct clip dictionary. */
  clip_dict = dict_create (dict_get_encoding (ds->dict->dict));
  for (i = col0; i <= coli; i++)
    dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i));

  /* Construct clip data. */
  map = case_map_by_name (ds->dict->dict, clip_dict);
  writer = autopaging_writer_create (dict_get_proto (clip_dict));
  for (i = row0; i <= rowi ; ++i )
    {
      struct ccase *old = psppire_data_store_get_case (ds, i);
      if (old != NULL)
        casewriter_write (writer, case_map_execute (map, old));
      else
        casewriter_force_error (writer);
    }
  case_map_destroy (map);

  clip_datasheet = casewriter_make_reader (writer);

  data_sheet_update_clipboard (sheet);
}
Exemple #7
0
/* Writes an aggregated record to OUTPUT. */
static void
dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case)
{
  struct ccase *c = case_create (dict_get_proto (agr->dict));

  if ( agr->add_variables)
    {
      case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict));
    }
  else
    {
      int value_idx = 0;
      int i;

      for (i = 0; i < agr->break_var_cnt; i++)
	{
	  const struct variable *v = agr->break_vars[i];
	  value_copy (case_data_rw_idx (c, value_idx),
		      case_data (break_case, v),
		      var_get_width (v));
	  value_idx++;
	}
    }

  {
    struct agr_var *i;

    for (i = agr->agr_vars; i; i = i->next)
      {
	union value *v = case_data_rw (c, i->dest);
        int width = var_get_width (i->dest);

	if (agr->missing == COLUMNWISE && i->saw_missing
	    && (i->function & FUNC) != N && (i->function & FUNC) != NU
	    && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
	  {
            value_set_missing (v, width);
	    casewriter_destroy (i->writer);
	    continue;
	  }

	switch (i->function)
	  {
	  case SUM:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case MEAN:
	    v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
	    break;
	  case MEDIAN:
	    {
	      if ( i->writer)
		{
		  struct percentile *median = percentile_create (0.5, i->cc);
		  struct order_stats *os = &median->parent;
		  struct casereader *sorted_reader = casewriter_make_reader (i->writer);
		  i->writer = NULL;

		  order_stats_accumulate (&os, 1,
					  sorted_reader,
					  i->weight,
					  i->subject,
					  i->exclude);
		  i->dbl[0] = percentile_calculate (median, PC_HAVERAGE);
		  statistic_destroy (&median->parent.parent);
		}
	      v->f = i->dbl[0];
	    }
	    break;
	  case SD:
            {
              double variance;

              /* FIXME: we should use two passes. */
              moments1_calculate (i->moments, NULL, NULL, &variance,
                                 NULL, NULL);
              if (variance != SYSMIS)
                v->f = sqrt (variance);
              else
                v->f = SYSMIS;
            }
	    break;
	  case MAX:
	  case MIN:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case MAX | FSTRING:
	  case MIN | FSTRING:
	    if (i->int1)
	      memcpy (value_str_rw (v, width), i->string, width);
	    else
              value_set_missing (v, width);
	    break;
	  case FGT:
	  case FGT | FSTRING:
	  case FLT:
	  case FLT | FSTRING:
	  case FIN:
	  case FIN | FSTRING:
	  case FOUT:
	  case FOUT | FSTRING:
	    v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS;
	    break;
	  case PGT:
	  case PGT | FSTRING:
	  case PLT:
	  case PLT | FSTRING:
	  case PIN:
	  case PIN | FSTRING:
	  case POUT:
	  case POUT | FSTRING:
	    v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS;
	    break;
	  case N:
	  case N | FSTRING:
	      v->f = i->dbl[0];
            break;
	  case NU:
	  case NU | FSTRING:
	    v->f = i->int1;
	    break;
	  case FIRST:
	  case LAST:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case FIRST | FSTRING:
	  case LAST | FSTRING:
	    if (i->int1)
	      memcpy (value_str_rw (v, width), i->string, width);
	    else
              value_set_missing (v, width);
	    break;
	  case NMISS:
	  case NMISS | FSTRING:
	    v->f = i->dbl[0];
	    break;
	  case NUMISS:
	  case NUMISS | FSTRING:
	    v->f = i->int1;
	    break;
	  default:
	    NOT_REACHED ();
	  }
      }
  }

  casewriter_write (output, c);
}
Exemple #8
0
/* Accumulates aggregation data from the case INPUT. */
static void
accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input)
{
  struct agr_var *iter;
  double weight;
  bool bad_warn = true;

  weight = dict_get_case_weight (agr->src_dict, input, &bad_warn);

  for (iter = agr->agr_vars; iter; iter = iter->next)
    if (iter->src)
      {
	const union value *v = case_data (input, iter->src);
        int src_width = var_get_width (iter->src);

        if (var_is_value_missing (iter->src, v, iter->exclude))
	  {
	    switch (iter->function)
	      {
	      case NMISS:
	      case NMISS | FSTRING:
		iter->dbl[0] += weight;
                break;
	      case NUMISS:
	      case NUMISS | FSTRING:
		iter->int1++;
		break;
	      }
	    iter->saw_missing = true;
	    continue;
	  }

	/* This is horrible.  There are too many possibilities. */
	switch (iter->function)
	  {
	  case SUM:
	    iter->dbl[0] += v->f * weight;
            iter->int1 = 1;
	    break;
	  case MEAN:
            iter->dbl[0] += v->f * weight;
            iter->dbl[1] += weight;
            break;
	  case MEDIAN:
	    {
	      double wv ;
	      struct ccase *cout;

              cout = case_create (casewriter_get_proto (iter->writer));

	      case_data_rw (cout, iter->subject)->f
                = case_data (input, iter->src)->f;

	      wv = dict_get_case_weight (agr->src_dict, input, NULL);

	      case_data_rw (cout, iter->weight)->f = wv;

	      iter->cc += wv;

	      casewriter_write (iter->writer, cout);
	    }
	    break;
	  case SD:
            moments1_add (iter->moments, v->f, weight);
            break;
	  case MAX:
	    iter->dbl[0] = MAX (iter->dbl[0], v->f);
	    iter->int1 = 1;
	    break;
	  case MAX | FSTRING:
            /* Need to do some kind of Unicode collation thingy here */
	    if (memcmp (iter->string, value_str (v, src_width), src_width) < 0)
	      memcpy (iter->string, value_str (v, src_width), src_width);
	    iter->int1 = 1;
	    break;
	  case MIN:
	    iter->dbl[0] = MIN (iter->dbl[0], v->f);
	    iter->int1 = 1;
	    break;
	  case MIN | FSTRING:
	    if (memcmp (iter->string, value_str (v, src_width), src_width) > 0)
	      memcpy (iter->string, value_str (v, src_width), src_width);
	    iter->int1 = 1;
	    break;
	  case FGT:
	  case PGT:
            if (v->f > iter->arg[0].f)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FGT | FSTRING:
	  case PGT | FSTRING:
            if (memcmp (iter->arg[0].c,
                        value_str (v, src_width), src_width) < 0)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FLT:
	  case PLT:
            if (v->f < iter->arg[0].f)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FLT | FSTRING:
	  case PLT | FSTRING:
            if (memcmp (iter->arg[0].c,
                        value_str (v, src_width), src_width) > 0)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FIN:
	  case PIN:
            if (iter->arg[0].f <= v->f && v->f <= iter->arg[1].f)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FIN | FSTRING:
	  case PIN | FSTRING:
            if (memcmp (iter->arg[0].c,
                        value_str (v, src_width), src_width) <= 0
                && memcmp (iter->arg[1].c,
                           value_str (v, src_width), src_width) >= 0)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FOUT:
	  case POUT:
            if (iter->arg[0].f > v->f || v->f > iter->arg[1].f)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case FOUT | FSTRING:
	  case POUT | FSTRING:
            if (memcmp (iter->arg[0].c,
                        value_str (v, src_width), src_width) > 0
                || memcmp (iter->arg[1].c,
                           value_str (v, src_width), src_width) < 0)
              iter->dbl[0] += weight;
            iter->dbl[1] += weight;
            break;
	  case N:
	  case N | FSTRING:
	    iter->dbl[0] += weight;
	    break;
	  case NU:
	  case NU | FSTRING:
	    iter->int1++;
	    break;
	  case FIRST:
	    if (iter->int1 == 0)
	      {
		iter->dbl[0] = v->f;
		iter->int1 = 1;
	      }
	    break;
	  case FIRST | FSTRING:
	    if (iter->int1 == 0)
	      {
		memcpy (iter->string, value_str (v, src_width), src_width);
		iter->int1 = 1;
	      }
	    break;
	  case LAST:
	    iter->dbl[0] = v->f;
	    iter->int1 = 1;
	    break;
	  case LAST | FSTRING:
	    memcpy (iter->string, value_str (v, src_width), src_width);
	    iter->int1 = 1;
	    break;
          case NMISS:
          case NMISS | FSTRING:
          case NUMISS:
          case NUMISS | FSTRING:
            /* Our value is not missing or it would have been
               caught earlier.  Nothing to do. */
            break;
	  default:
	    NOT_REACHED ();
	  }
      } else {
      switch (iter->function)
	{
	case N:
	  iter->dbl[0] += weight;
	  break;
	case NU:
	  iter->int1++;
	  break;
	default:
	  NOT_REACHED ();
	}
    }
}