Beispiel #1
0
/* Frees an XSAVE or XEXPORT transformation.
   Returns true if successful, false if an I/O error occurred. */
static bool
output_trns_free (void *trns_)
{
  struct output_trns *t = trns_;
  bool ok = casewriter_destroy (t->writer);
  free (t);
  return ok;
}
Beispiel #2
0
/* Frees all the data for the procedure. */
static void
free_comb_proc (struct comb_proc *proc)
{
  close_all_comb_files (proc);
  dict_destroy (proc->dict);
  casewriter_destroy (proc->output);
  case_matcher_destroy (proc->matcher);
  if (proc->prev_BY)
    {
      caseproto_destroy_values (subcase_get_proto (&proc->by_vars),
                                proc->prev_BY);
      free (proc->prev_BY);
    }
  subcase_destroy (&proc->by_vars);
  case_unref (proc->buffered_case);
}
Beispiel #3
0
/* Parses and performs the SAVE or EXPORT procedure. */
static int
parse_output_proc (struct lexer *lexer, struct dataset *ds,
                   enum writer_type writer_type)
{
  bool retain_unselected;
  struct casewriter *output;
  bool ok;

  output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
                                &retain_unselected);
  if (output == NULL)
    return CMD_CASCADING_FAILURE;

  casereader_transfer (proc_open_filtering (ds, !retain_unselected), output);
  ok = casewriter_destroy (output);
  ok = proc_commit (ds) && ok;

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
Beispiel #4
0
int
cmd_save_translate (struct lexer *lexer, struct dataset *ds)
{
  enum { CSV_FILE = 1, TAB_FILE } type;

  struct dictionary *dict;
  struct case_map *map;
  struct casewriter *writer;
  struct file_handle *handle;

  struct csv_writer_options csv_opts;

  bool replace;

  bool retain_unselected;
  bool recode_user_missing;
  bool include_var_names;
  bool use_value_labels;
  bool use_print_formats;
  char decimal;
  char delimiter;
  char qualifier;

  bool ok;

  type = 0;

  dict = dict_clone (dataset_dict (ds));
  map = NULL;

  handle = NULL;
  replace = false;

  retain_unselected = true;
  recode_user_missing = false;
  include_var_names = false;
  use_value_labels = false;
  use_print_formats = false;
  decimal = settings_get_decimal_char (FMT_F);
  delimiter = 0;
  qualifier = '"';

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  while (lex_match (lexer, T_SLASH))
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "TYPE"))
        {
          if (type != 0)
            {
              lex_sbc_only_once ("TYPE");
              goto error;
            }

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "CSV"))
            type = CSV_FILE;
          else if (lex_match_id (lexer, "TAB"))
            type = TAB_FILE;
          else
            {
              lex_error_expecting (lexer, "CSV", "TAB", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "REPLACE"))
        replace = true;
      else if (lex_match_id (lexer, "FIELDNAMES"))
        include_var_names = true;
      else if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "IGNORE"))
            recode_user_missing = false;
          else if (lex_match_id (lexer, "RECODE"))
            recode_user_missing = true;
          else
            {
              lex_error_expecting (lexer, "IGNORE", "RECODE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "CELLS"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "VALUES"))
            use_value_labels = false;
          else if (lex_match_id (lexer, "LABELS"))
            use_value_labels = true;
          else
            {
              lex_error_expecting (lexer, "VALUES", "LABELS", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "TEXTOPTIONS"))
        {
          lex_match (lexer, T_EQUALS);
          for (;;)
            {
              if (lex_match_id (lexer, "DELIMITER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 delimiters */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "DELIMITER");
                      goto error;
                    }
                  delimiter = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "QUALIFIER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 qualifiers */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "QUALIFIER");
                      goto error;
                    }
                  qualifier = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "DECIMAL"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "DOT"))
                    decimal = '.';
                  else if (lex_match_id (lexer, "COMMA"))
                    decimal = ',';
                  else
                    {
                      lex_error_expecting (lexer, "DOT", "COMMA",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else if (lex_match_id (lexer, "FORMAT"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "PLAIN"))
                    use_print_formats = false;
                  else if (lex_match_id (lexer, "VARIABLE"))
                    use_print_formats = true;
                  else
                    {
                      lex_error_expecting (lexer, "PLAIN", "VARIABLE",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else
                break;
            }
        }
      else if (lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (type == 0)
    {
      lex_sbc_missing ("TYPE");
      goto error;
    }
  else if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }
  else if (!replace && fn_exists (fh_get_file_name (handle)))
    {
      msg (SE, _("Output file `%s' exists but REPLACE was not specified."),
           fh_get_file_name (handle));
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  csv_opts.recode_user_missing = recode_user_missing;
  csv_opts.include_var_names = include_var_names;
  csv_opts.use_value_labels = use_value_labels;
  csv_opts.use_print_formats = use_print_formats;
  csv_opts.decimal = decimal;
  csv_opts.delimiter = (delimiter ? delimiter
                        : type == TAB_FILE ? '\t'
                        : decimal == '.' ? ','
                        : ';');
  csv_opts.qualifier = qualifier;

  writer = csv_writer_open (handle, dict, &csv_opts);
  if (writer == NULL)
    goto error;
  fh_unref (handle);

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer);
  ok = casewriter_destroy (writer);
  ok = proc_commit (ds) && ok;

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

error:
  fh_unref (handle);
  dict_destroy (dict);
  case_map_destroy (map);
  return CMD_FAILURE;
}
Beispiel #5
0
/* Writes an aggregated record to OUTPUT. */
static void
dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case)
{
  struct ccase *c = case_create (dict_get_proto (agr->dict));

  if ( agr->add_variables)
    {
      case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict));
    }
  else
    {
      int value_idx = 0;
      int i;

      for (i = 0; i < agr->break_var_cnt; i++)
	{
	  const struct variable *v = agr->break_vars[i];
	  value_copy (case_data_rw_idx (c, value_idx),
		      case_data (break_case, v),
		      var_get_width (v));
	  value_idx++;
	}
    }

  {
    struct agr_var *i;

    for (i = agr->agr_vars; i; i = i->next)
      {
	union value *v = case_data_rw (c, i->dest);
        int width = var_get_width (i->dest);

	if (agr->missing == COLUMNWISE && i->saw_missing
	    && (i->function & FUNC) != N && (i->function & FUNC) != NU
	    && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
	  {
            value_set_missing (v, width);
	    casewriter_destroy (i->writer);
	    continue;
	  }

	switch (i->function)
	  {
	  case SUM:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case MEAN:
	    v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
	    break;
	  case MEDIAN:
	    {
	      if ( i->writer)
		{
		  struct percentile *median = percentile_create (0.5, i->cc);
		  struct order_stats *os = &median->parent;
		  struct casereader *sorted_reader = casewriter_make_reader (i->writer);
		  i->writer = NULL;

		  order_stats_accumulate (&os, 1,
					  sorted_reader,
					  i->weight,
					  i->subject,
					  i->exclude);
		  i->dbl[0] = percentile_calculate (median, PC_HAVERAGE);
		  statistic_destroy (&median->parent.parent);
		}
	      v->f = i->dbl[0];
	    }
	    break;
	  case SD:
            {
              double variance;

              /* FIXME: we should use two passes. */
              moments1_calculate (i->moments, NULL, NULL, &variance,
                                 NULL, NULL);
              if (variance != SYSMIS)
                v->f = sqrt (variance);
              else
                v->f = SYSMIS;
            }
	    break;
	  case MAX:
	  case MIN:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case MAX | FSTRING:
	  case MIN | FSTRING:
	    if (i->int1)
	      memcpy (value_str_rw (v, width), i->string, width);
	    else
              value_set_missing (v, width);
	    break;
	  case FGT:
	  case FGT | FSTRING:
	  case FLT:
	  case FLT | FSTRING:
	  case FIN:
	  case FIN | FSTRING:
	  case FOUT:
	  case FOUT | FSTRING:
	    v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS;
	    break;
	  case PGT:
	  case PGT | FSTRING:
	  case PLT:
	  case PLT | FSTRING:
	  case PIN:
	  case PIN | FSTRING:
	  case POUT:
	  case POUT | FSTRING:
	    v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS;
	    break;
	  case N:
	  case N | FSTRING:
	      v->f = i->dbl[0];
            break;
	  case NU:
	  case NU | FSTRING:
	    v->f = i->int1;
	    break;
	  case FIRST:
	  case LAST:
	    v->f = i->int1 ? i->dbl[0] : SYSMIS;
	    break;
	  case FIRST | FSTRING:
	  case LAST | FSTRING:
	    if (i->int1)
	      memcpy (value_str_rw (v, width), i->string, width);
	    else
              value_set_missing (v, width);
	    break;
	  case NMISS:
	  case NMISS | FSTRING:
	    v->f = i->dbl[0];
	    break;
	  case NUMISS:
	  case NUMISS | FSTRING:
	    v->f = i->int1;
	    break;
	  default:
	    NOT_REACHED ();
	  }
      }
  }

  casewriter_write (output, c);
}
Beispiel #6
0
/* Parses and executes the AGGREGATE procedure. */
int
cmd_aggregate (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct agr_proc agr;
  struct file_handle *out_file = NULL;
  struct casereader *input = NULL, *group;
  struct casegrouper *grouper;
  struct casewriter *output = NULL;

  bool copy_documents = false;
  bool presorted = false;
  bool saw_direction;
  bool ok;

  memset(&agr, 0 , sizeof (agr));
  agr.missing = ITEMWISE;
  agr.src_dict = dict;
  subcase_init_empty (&agr.sort);

  /* OUTFILE subcommand must be first. */
  lex_match (lexer, T_SLASH);
  if (!lex_force_match_id (lexer, "OUTFILE"))
    goto error;
  lex_match (lexer, T_EQUALS);
  if (!lex_match (lexer, T_ASTERISK))
    {
      out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
      if (out_file == NULL)
        goto error;
    }

  if (out_file == NULL && lex_match_id (lexer, "MODE"))
    {
      lex_match (lexer, T_EQUALS);
      if (lex_match_id (lexer, "ADDVARIABLES"))
	{
	  agr.add_variables = true;

	  /* presorted is assumed in ADDVARIABLES mode */
	  presorted = true;
	}
      else if (lex_match_id (lexer, "REPLACE"))
	{
	  agr.add_variables = false;
	}
      else
	goto error;
    }

  if ( agr.add_variables )
    agr.dict = dict_clone (dict);
  else
    agr.dict = dict_create (dict_get_encoding (dict));

  dict_set_label (agr.dict, dict_get_label (dict));
  dict_set_documents (agr.dict, dict_get_documents (dict));

  /* Read most of the subcommands. */
  for (;;)
    {
      lex_match (lexer, T_SLASH);

      if (lex_match_id (lexer, "MISSING"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_match_id (lexer, "COLUMNWISE"))
	    {
	      lex_error_expecting (lexer, "COLUMNWISE", NULL);
              goto error;
	    }
	  agr.missing = COLUMNWISE;
	}
      else if (lex_match_id (lexer, "DOCUMENT"))
        copy_documents = true;
      else if (lex_match_id (lexer, "PRESORTED"))
        presorted = true;
      else if (lex_force_match_id (lexer, "BREAK"))
	{
          int i;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
                                    &saw_direction))
            goto error;
          agr.break_var_cnt = subcase_get_n_fields (&agr.sort);

	  if  (! agr.add_variables)
	    for (i = 0; i < agr.break_var_cnt; i++)
	      dict_clone_var_assert (agr.dict, agr.break_vars[i]);

          /* BREAK must follow the options. */
          break;
	}
      else
        goto error;

    }
  if (presorted && saw_direction)
    msg (SW, _("When PRESORTED is specified, specifying sorting directions "
               "with (A) or (D) has no effect.  Output data will be sorted "
               "the same way as the input data."));

  /* Read in the aggregate functions. */
  lex_match (lexer, T_SLASH);
  if (!parse_aggregate_functions (lexer, dict, &agr))
    goto error;

  /* Delete documents. */
  if (!copy_documents)
    dict_clear_documents (agr.dict);

  /* Cancel SPLIT FILE. */
  dict_set_split_vars (agr.dict, NULL, 0);

  /* Initialize. */
  agr.case_cnt = 0;

  if (out_file == NULL)
    {
      /* The active dataset will be replaced by the aggregated data,
         so TEMPORARY is moot. */
      proc_cancel_temporary_transformations (ds);
      proc_discard_output (ds);
      output = autopaging_writer_create (dict_get_proto (agr.dict));
    }
  else
    {
      output = any_writer_open (out_file, agr.dict);
      if (output == NULL)
        goto error;
    }

  input = proc_open (ds);
  if (!subcase_is_empty (&agr.sort) && !presorted)
    {
      input = sort_execute (input, &agr.sort);
      subcase_clear (&agr.sort);
    }

  for (grouper = casegrouper_create_vars (input, agr.break_vars,
                                          agr.break_var_cnt);
       casegrouper_get_next_group (grouper, &group);
       casereader_destroy (group))
    {
      struct casereader *placeholder = NULL;
      struct ccase *c = casereader_peek (group, 0);

      if (c == NULL)
        {
          casereader_destroy (group);
          continue;
        }

      initialize_aggregate_info (&agr);

      if ( agr.add_variables )
	placeholder = casereader_clone (group);

      {
	struct ccase *cg;
	for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
	  accumulate_aggregate_info (&agr, cg);
      }


      if  (agr.add_variables)
	{
	  struct ccase *cg;
	  for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
	    dump_aggregate_info (&agr, output, cg);

	  casereader_destroy (placeholder);
	}
      else
	{
	  dump_aggregate_info (&agr, output, c);
	}
      case_unref (c);
    }
  if (!casegrouper_destroy (grouper))
    goto error;

  if (!proc_commit (ds))
    {
      input = NULL;
      goto error;
    }
  input = NULL;

  if (out_file == NULL)
    {
      struct casereader *next_input = casewriter_make_reader (output);
      if (next_input == NULL)
        goto error;

      dataset_set_dict (ds, agr.dict);
      dataset_set_source (ds, next_input);
      agr.dict = NULL;
    }
  else
    {
      ok = casewriter_destroy (output);
      output = NULL;
      if (!ok)
        goto error;
    }

  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_SUCCESS;

error:
  if (input != NULL)
    proc_commit (ds);
  casewriter_destroy (output);
  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_CASCADING_FAILURE;
}
Beispiel #7
0
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
   WRITER_TYPE identifies the type of file to write,
   and COMMAND_TYPE identifies the type of command.

   On success, returns a writer.
   For procedures only, sets *RETAIN_UNSELECTED to true if cases
   that would otherwise be excluded by FILTER or USE should be
   included.

   On failure, returns a null pointer. */
static struct casewriter *
parse_write_command (struct lexer *lexer, struct dataset *ds,
		     enum writer_type writer_type,
                     enum command_type command_type,
                     bool *retain_unselected)
{
  /* Common data. */
  struct file_handle *handle; /* Output file. */
  struct dictionary *dict;    /* Dictionary for output file. */
  struct casewriter *writer;  /* Writer. */
  struct case_map *map;       /* Map from input data to data for writer. */

  /* Common options. */
  bool print_map;             /* Print map?  TODO. */
  bool print_short_names;     /* Print long-to-short name map.  TODO. */
  struct sfm_write_options sysfile_opts;
  struct pfm_write_options porfile_opts;

  assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
  assert (command_type == XFORM_CMD || command_type == PROC_CMD);
  assert ((retain_unselected != NULL) == (command_type == PROC_CMD));

  if (command_type == PROC_CMD)
    *retain_unselected = true;

  handle = NULL;
  dict = dict_clone (dataset_dict (ds));
  writer = NULL;
  map = NULL;
  print_map = false;
  print_short_names = false;
  sysfile_opts = sfm_writer_default_options ();
  porfile_opts = pfm_writer_default_options ();

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "NAMES"))
        print_short_names = true;
      else if (lex_match_id (lexer, "PERMISSIONS"))
        {
          bool cw;

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "READONLY"))
            cw = false;
          else if (lex_match_id (lexer, "WRITEABLE"))
            cw = true;
          else
            {
              lex_error_expecting (lexer, "READONLY", "WRITEABLE",
                                   NULL_SENTINEL);
              goto error;
            }
          sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
        }
      else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            *retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            *retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "COMPRESSED"))
	sysfile_opts.compress = true;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "UNCOMPRESSED"))
	sysfile_opts.compress = false;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "VERSION"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_force_int (lexer))
            goto error;
          sysfile_opts.version = lex_integer (lexer);
          lex_get (lexer);
	}
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "COMMUNICATIONS"))
            porfile_opts.type = PFM_COMM;
          else if (lex_match_id (lexer, "TAPE"))
            porfile_opts.type = PFM_TAPE;
          else
            {
              lex_error_expecting (lexer, "COMM", "TAPE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
        {
          lex_match (lexer, T_EQUALS);
          if (!lex_force_int (lexer))
            goto error;
          porfile_opts.digits = lex_integer (lexer);
          lex_get (lexer);
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;

      if (!lex_match (lexer, T_SLASH))
	break;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  if (fh_get_referent (handle) == FH_REF_FILE)
    {
      switch (writer_type)
        {
        case SYSFILE_WRITER:
          writer = sfm_open_writer (handle, dict, sysfile_opts);
          break;
        case PORFILE_WRITER:
          writer = pfm_open_writer (handle, dict, porfile_opts);
          break;
        }
    }
  else
    writer = any_writer_open (handle, dict);
  if (writer == NULL)
    goto error;

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  fh_unref (handle);
  return writer;

 error:
  fh_unref (handle);
  casewriter_destroy (writer);
  dict_destroy (dict);
  case_map_destroy (map);
  return NULL;
}