Example #1
0
/* Merge the dictionary for file F into master dictionary M. */
static bool
merge_dictionary (struct dictionary *const m, struct comb_file *f)
{
  struct dictionary *d = f->dict;
  const struct string_array *d_docs, *m_docs;
  int i;

  if (dict_get_label (m) == NULL)
    dict_set_label (m, dict_get_label (d));

  d_docs = dict_get_documents (d);
  m_docs = dict_get_documents (m);


  /* FIXME: If the input files have different encodings, then
     the result is undefined.
     The correct thing to do would be to convert to an encoding
     which can cope with all the input files (eg UTF-8).
   */
  if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
    msg (MW, _("Combining files with incompatible encodings. String data may "
               "not be represented correctly."));

  if (d_docs != NULL)
    {
      if (m_docs == NULL)
        dict_set_documents (m, d_docs);
      else
        {
          struct string_array new_docs;
          size_t i;

          new_docs.n = m_docs->n + d_docs->n;
          new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings);
          for (i = 0; i < m_docs->n; i++)
            new_docs.strings[i] = m_docs->strings[i];
          for (i = 0; i < d_docs->n; i++)
            new_docs.strings[m_docs->n + i] = d_docs->strings[i];

          dict_set_documents (m, &new_docs);

          free (new_docs.strings);
        }
    }

  for (i = 0; i < dict_get_var_cnt (d); i++)
    {
      struct variable *dv = dict_get_var (d, i);
      struct variable *mv = dict_lookup_var (m, var_get_name (dv));

      if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
        continue;

      if (mv != NULL)
        {
          if (var_get_width (mv) != var_get_width (dv))
            {
              const char *var_name = var_get_name (dv);
              struct string s = DS_EMPTY_INITIALIZER;
              const char *file_name;

              file_name = f->handle ? fh_get_name (f->handle) : "*";
              ds_put_format (&s,
                             _("Variable %s in file %s has different "
                               "type or width from the same variable in "
                               "earlier file."),
                             var_name, file_name);
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (dv))
                ds_put_format (&s, _("In file %s, %s is numeric."),
                               file_name, var_name);
              else
                ds_put_format (&s, _("In file %s, %s is a string variable "
                                     "with width %d."),
                               file_name, var_name, var_get_width (dv));
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (mv))
                ds_put_format (&s, _("In an earlier file, %s was numeric."),
                               var_name);
              else
                ds_put_format (&s, _("In an earlier file, %s was a string "
                                     "variable with width %d."),
                               var_name, var_get_width (mv));
              msg (SE, "%s", ds_cstr (&s));
              ds_destroy (&s);
              return false;
            }

          if (var_has_value_labels (dv) && !var_has_value_labels (mv))
            var_set_value_labels (mv, var_get_value_labels (dv));
          if (var_has_missing_values (dv) && !var_has_missing_values (mv))
            var_set_missing_values (mv, var_get_missing_values (dv));
          if (var_get_label (dv) && !var_get_label (mv))
            var_set_label (mv, var_get_label (dv));
        }
      else
        mv = dict_clone_var_assert (m, dv);
    }

  return true;
}
Example #2
0
/* Set the clip according to the currently
   selected range in the data sheet */
static void
data_sheet_set_clip (PsppireSheet *sheet)
{
  int i;
  struct casewriter *writer ;
  PsppireSheetRange range;
  PsppireDataStore *ds;
  struct case_map *map = NULL;
  casenumber max_rows;
  size_t max_columns;
  gint row0, rowi;
  gint col0, coli;

  ds = PSPPIRE_DATA_STORE (psppire_sheet_get_model (sheet));

  psppire_sheet_get_selected_range (sheet, &range);

  col0 = MIN (range.col0, range.coli);
  coli = MAX (range.col0, range.coli);
  row0 = MIN (range.row0, range.rowi);
  rowi = MAX (range.row0, range.rowi);

   /* If nothing selected, then use active cell */
  if ( row0 < 0 || col0 < 0 )
    {
      gint row, col;
      psppire_sheet_get_active_cell (sheet, &row, &col);

      row0 = rowi = row;
      col0 = coli = col;
    }

  /* The sheet range can include cells that do not include data.
     Exclude them from the range. */
  max_rows = psppire_data_store_get_case_count (ds);
  if (rowi >= max_rows)
    {
      if (max_rows == 0)
        return;
      rowi = max_rows - 1;
    }
  max_columns = dict_get_var_cnt (ds->dict->dict);
  if (coli >= max_columns)
    {
      if (max_columns == 0)
        return;
      coli = max_columns - 1;
    }

  /* Destroy any existing clip */
  if ( clip_datasheet )
    {
      casereader_destroy (clip_datasheet);
      clip_datasheet = NULL;
    }

  if ( clip_dict )
    {
      dict_destroy (clip_dict);
      clip_dict = NULL;
    }

  /* Construct clip dictionary. */
  clip_dict = dict_create (dict_get_encoding (ds->dict->dict));
  for (i = col0; i <= coli; i++)
    dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i));

  /* Construct clip data. */
  map = case_map_by_name (ds->dict->dict, clip_dict);
  writer = autopaging_writer_create (dict_get_proto (clip_dict));
  for (i = row0; i <= rowi ; ++i )
    {
      struct ccase *old = psppire_data_store_get_case (ds, i);
      if (old != NULL)
        casewriter_write (writer, case_map_execute (map, old));
      else
        casewriter_force_error (writer);
    }
  case_map_destroy (map);

  clip_datasheet = casewriter_make_reader (writer);

  data_sheet_update_clipboard (sheet);
}
Example #3
0
/* Parses and executes the AGGREGATE procedure. */
int
cmd_aggregate (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct agr_proc agr;
  struct file_handle *out_file = NULL;
  struct casereader *input = NULL, *group;
  struct casegrouper *grouper;
  struct casewriter *output = NULL;

  bool copy_documents = false;
  bool presorted = false;
  bool saw_direction;
  bool ok;

  memset(&agr, 0 , sizeof (agr));
  agr.missing = ITEMWISE;
  agr.src_dict = dict;
  subcase_init_empty (&agr.sort);

  /* OUTFILE subcommand must be first. */
  lex_match (lexer, T_SLASH);
  if (!lex_force_match_id (lexer, "OUTFILE"))
    goto error;
  lex_match (lexer, T_EQUALS);
  if (!lex_match (lexer, T_ASTERISK))
    {
      out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
      if (out_file == NULL)
        goto error;
    }

  if (out_file == NULL && lex_match_id (lexer, "MODE"))
    {
      lex_match (lexer, T_EQUALS);
      if (lex_match_id (lexer, "ADDVARIABLES"))
	{
	  agr.add_variables = true;

	  /* presorted is assumed in ADDVARIABLES mode */
	  presorted = true;
	}
      else if (lex_match_id (lexer, "REPLACE"))
	{
	  agr.add_variables = false;
	}
      else
	goto error;
    }

  if ( agr.add_variables )
    agr.dict = dict_clone (dict);
  else
    agr.dict = dict_create (dict_get_encoding (dict));

  dict_set_label (agr.dict, dict_get_label (dict));
  dict_set_documents (agr.dict, dict_get_documents (dict));

  /* Read most of the subcommands. */
  for (;;)
    {
      lex_match (lexer, T_SLASH);

      if (lex_match_id (lexer, "MISSING"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_match_id (lexer, "COLUMNWISE"))
	    {
	      lex_error_expecting (lexer, "COLUMNWISE", NULL);
              goto error;
	    }
	  agr.missing = COLUMNWISE;
	}
      else if (lex_match_id (lexer, "DOCUMENT"))
        copy_documents = true;
      else if (lex_match_id (lexer, "PRESORTED"))
        presorted = true;
      else if (lex_force_match_id (lexer, "BREAK"))
	{
          int i;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
                                    &saw_direction))
            goto error;
          agr.break_var_cnt = subcase_get_n_fields (&agr.sort);

	  if  (! agr.add_variables)
	    for (i = 0; i < agr.break_var_cnt; i++)
	      dict_clone_var_assert (agr.dict, agr.break_vars[i]);

          /* BREAK must follow the options. */
          break;
	}
      else
        goto error;

    }
  if (presorted && saw_direction)
    msg (SW, _("When PRESORTED is specified, specifying sorting directions "
               "with (A) or (D) has no effect.  Output data will be sorted "
               "the same way as the input data."));

  /* Read in the aggregate functions. */
  lex_match (lexer, T_SLASH);
  if (!parse_aggregate_functions (lexer, dict, &agr))
    goto error;

  /* Delete documents. */
  if (!copy_documents)
    dict_clear_documents (agr.dict);

  /* Cancel SPLIT FILE. */
  dict_set_split_vars (agr.dict, NULL, 0);

  /* Initialize. */
  agr.case_cnt = 0;

  if (out_file == NULL)
    {
      /* The active dataset will be replaced by the aggregated data,
         so TEMPORARY is moot. */
      proc_cancel_temporary_transformations (ds);
      proc_discard_output (ds);
      output = autopaging_writer_create (dict_get_proto (agr.dict));
    }
  else
    {
      output = any_writer_open (out_file, agr.dict);
      if (output == NULL)
        goto error;
    }

  input = proc_open (ds);
  if (!subcase_is_empty (&agr.sort) && !presorted)
    {
      input = sort_execute (input, &agr.sort);
      subcase_clear (&agr.sort);
    }

  for (grouper = casegrouper_create_vars (input, agr.break_vars,
                                          agr.break_var_cnt);
       casegrouper_get_next_group (grouper, &group);
       casereader_destroy (group))
    {
      struct casereader *placeholder = NULL;
      struct ccase *c = casereader_peek (group, 0);

      if (c == NULL)
        {
          casereader_destroy (group);
          continue;
        }

      initialize_aggregate_info (&agr);

      if ( agr.add_variables )
	placeholder = casereader_clone (group);

      {
	struct ccase *cg;
	for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
	  accumulate_aggregate_info (&agr, cg);
      }


      if  (agr.add_variables)
	{
	  struct ccase *cg;
	  for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
	    dump_aggregate_info (&agr, output, cg);

	  casereader_destroy (placeholder);
	}
      else
	{
	  dump_aggregate_info (&agr, output, c);
	}
      case_unref (c);
    }
  if (!casegrouper_destroy (grouper))
    goto error;

  if (!proc_commit (ds))
    {
      input = NULL;
      goto error;
    }
  input = NULL;

  if (out_file == NULL)
    {
      struct casereader *next_input = casewriter_make_reader (output);
      if (next_input == NULL)
        goto error;

      dataset_set_dict (ds, agr.dict);
      dataset_set_source (ds, next_input);
      agr.dict = NULL;
    }
  else
    {
      ok = casewriter_destroy (output);
      output = NULL;
      if (!ok)
        goto error;
    }

  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_SUCCESS;

error:
  if (input != NULL)
    proc_commit (ds);
  casewriter_destroy (output);
  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_CASCADING_FAILURE;
}