示例#1
0
static int
combine_files (enum comb_command_type command,
               struct lexer *lexer, struct dataset *ds)
{
  struct comb_proc proc;

  bool saw_by = false;
  bool saw_sort = false;
  struct casereader *active_file = NULL;

  char *first_name = NULL;
  char *last_name = NULL;

  struct taint *taint = NULL;

  size_t n_tables = 0;
  size_t allocated_files = 0;

  size_t i;

  proc.files = NULL;
  proc.n_files = 0;
  proc.dict = dict_create (get_default_encoding ());
  proc.output = NULL;
  proc.matcher = NULL;
  subcase_init_empty (&proc.by_vars);
  proc.first = NULL;
  proc.last = NULL;
  proc.buffered_case = NULL;
  proc.prev_BY = NULL;

  dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      struct comb_file *file;
      enum comb_file_type type;

      if (lex_match_id (lexer, "FILE"))
        type = COMB_FILE;
      else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
        {
          type = COMB_TABLE;
          n_tables++;
        }
      else
        break;
      lex_match (lexer, T_EQUALS);

      if (proc.n_files >= allocated_files)
        proc.files = x2nrealloc (proc.files, &allocated_files,
                                sizeof *proc.files);
      file = &proc.files[proc.n_files++];
      file->type = type;
      subcase_init_empty (&file->by_vars);
      subcase_init_empty (&file->src);
      subcase_init_empty (&file->dst);
      file->mv = NULL;
      file->handle = NULL;
      file->dict = NULL;
      file->reader = NULL;
      file->data = NULL;
      file->is_sorted = true;
      file->in_name = NULL;
      file->in_var = NULL;

      if (lex_match (lexer, T_ASTERISK))
        {
          if (!dataset_has_source (ds))
            {
              msg (SE, _("Cannot specify the active dataset since none "
                         "has been defined."));
              goto error;
            }

          if (proc_make_temporary_transformations_permanent (ds))
            msg (SE, _("This command may not be used after TEMPORARY when "
                       "the active dataset is an input source.  "
                       "Temporary transformations will be made permanent."));

          file->dict = dict_clone (dataset_dict (ds));
        }
      else
        {
          file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
          if (file->handle == NULL)
            goto error;

          file->reader = any_reader_open (file->handle, NULL, &file->dict);
          if (file->reader == NULL)
            goto error;
        }

      while (lex_match (lexer, T_SLASH))
        if (lex_match_id (lexer, "RENAME"))
          {
            if (!parse_dict_rename (lexer, file->dict))
              goto error;
          }
        else if (lex_match_id (lexer, "IN"))
          {
            lex_match (lexer, T_EQUALS);
            if (lex_token (lexer) != T_ID)
              {
                lex_error (lexer, NULL);
                goto error;
              }

            if (file->in_name)
              {
                msg (SE, _("Multiple IN subcommands for a single FILE or "
                           "TABLE."));
                goto error;
              }
            file->in_name = xstrdup (lex_tokcstr (lexer));
            lex_get (lexer);
          }
        else if (lex_match_id (lexer, "SORT"))
          {
            file->is_sorted = false;
            saw_sort = true;
          }

      if (!merge_dictionary (proc.dict, file))
        goto error;
    }

  while (lex_token (lexer) != T_ENDCMD)
    {
      if (lex_match (lexer, T_BY))
	{
          const struct variable **by_vars;
          size_t i;
          bool ok;

	  if (saw_by)
	    {
              lex_sbc_only_once ("BY");
	      goto error;
	    }
          saw_by = true;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
                                    &by_vars, NULL))
	    goto error;

          ok = true;
          for (i = 0; i < proc.n_files; i++)
            {
              struct comb_file *file = &proc.files[i];
              size_t j;

              for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
                {
                  const char *name = var_get_name (by_vars[j]);
                  struct variable *var = dict_lookup_var (file->dict, name);
                  if (var != NULL)
                    subcase_add_var (&file->by_vars, var,
                                     subcase_get_direction (&proc.by_vars, j));
                  else
                    {
                      if (file->handle != NULL)
                        msg (SE, _("File %s lacks BY variable %s."),
                             fh_get_name (file->handle), name);
                      else
                        msg (SE, _("Active dataset lacks BY variable %s."),
                             name);
                      ok = false;
                    }
                }
              assert (!ok || subcase_conformable (&file->by_vars,
                                                  &proc.files[0].by_vars));
            }
          free (by_vars);

          if (!ok)
            goto error;
	}
      else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
        {
          if (first_name != NULL)
            {
              lex_sbc_only_once ("FIRST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          first_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
        {
          if (last_name != NULL)
            {
              lex_sbc_only_once ("LAST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          last_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (lex_match_id (lexer, "MAP"))
	{
	  /* FIXME. */
	}
      else if (lex_match_id (lexer, "DROP"))
        {
          if (!parse_dict_drop (lexer, proc.dict))
            goto error;
        }
      else if (lex_match_id (lexer, "KEEP"))
        {
          if (!parse_dict_keep (lexer, proc.dict))
            goto error;
        }
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}

      if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
        {
          lex_end_of_command (lexer);
          goto error;
        }
    }

  if (!saw_by)
    {
      if (command == COMB_UPDATE)
        {
          lex_sbc_missing ("BY");
          goto error;
        }
      if (n_tables)
        {
          msg (SE, _("BY is required when %s is specified."), "TABLE");
          goto error;
        }
      if (saw_sort)
        {
          msg (SE, _("BY is required when %s is specified."), "SORT");
          goto error;
        }
    }

  /* Add IN, FIRST, and LAST variables to master dictionary. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
        goto error;
    }
  if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
      || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
    goto error;

  dict_delete_scratch_vars (proc.dict);
  dict_compact_values (proc.dict);

  /* Set up mapping from each file's variables to master
     variables. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      size_t src_var_cnt = dict_get_var_cnt (file->dict);
      size_t j;

      file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
      for (j = 0; j < src_var_cnt; j++)
        {
          struct variable *src_var = dict_get_var (file->dict, j);
          struct variable *dst_var = dict_lookup_var (proc.dict,
                                                      var_get_name (src_var));
          if (dst_var != NULL)
            {
              size_t n = subcase_get_n_fields (&file->src);
              file->mv[n] = var_get_missing_values (src_var);
              subcase_add_var (&file->src, src_var, SC_ASCEND);
              subcase_add_var (&file->dst, dst_var, SC_ASCEND);
            }
        }
    }

  proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
  taint = taint_clone (casewriter_get_taint (proc.output));

  /* Set up case matcher. */
  proc.matcher = case_matcher_create ();
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (file->reader == NULL)
        {
          if (active_file == NULL)
            {
              proc_discard_output (ds);
              file->reader = active_file = proc_open_filtering (ds, false);
            }
          else
            file->reader = casereader_clone (active_file);
        }
      if (!file->is_sorted)
        file->reader = sort_execute (file->reader, &file->by_vars);
      taint_propagate (casereader_get_taint (file->reader), taint);
      file->data = casereader_read (file->reader);
      if (file->type == COMB_FILE)
        case_matcher_add_input (proc.matcher, &file->by_vars,
                                &file->data, &file->is_minimal);
    }

  if (command == COMB_ADD)
    execute_add_files (&proc);
  else if (command == COMB_MATCH)
    execute_match_files (&proc);
  else if (command == COMB_UPDATE)
    execute_update (&proc);
  else
    NOT_REACHED ();

  case_matcher_destroy (proc.matcher);
  proc.matcher = NULL;
  close_all_comb_files (&proc);
  if (active_file != NULL)
    proc_commit (ds);

  dataset_set_dict (ds, proc.dict);
  dataset_set_source (ds, casewriter_make_reader (proc.output));
  proc.dict = NULL;
  proc.output = NULL;

  free_comb_proc (&proc);

  free (first_name);
  free (last_name);

  return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  if (active_file != NULL)
    proc_commit (ds);
  free_comb_proc (&proc);
  taint_destroy (taint);
  free (first_name);
  free (last_name);
  return CMD_CASCADING_FAILURE;
}
示例#2
0
int
cmd_save_translate (struct lexer *lexer, struct dataset *ds)
{
  enum { CSV_FILE = 1, TAB_FILE } type;

  struct dictionary *dict;
  struct case_map *map;
  struct casewriter *writer;
  struct file_handle *handle;

  struct csv_writer_options csv_opts;

  bool replace;

  bool retain_unselected;
  bool recode_user_missing;
  bool include_var_names;
  bool use_value_labels;
  bool use_print_formats;
  char decimal;
  char delimiter;
  char qualifier;

  bool ok;

  type = 0;

  dict = dict_clone (dataset_dict (ds));
  map = NULL;

  handle = NULL;
  replace = false;

  retain_unselected = true;
  recode_user_missing = false;
  include_var_names = false;
  use_value_labels = false;
  use_print_formats = false;
  decimal = settings_get_decimal_char (FMT_F);
  delimiter = 0;
  qualifier = '"';

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  while (lex_match (lexer, T_SLASH))
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "TYPE"))
        {
          if (type != 0)
            {
              lex_sbc_only_once ("TYPE");
              goto error;
            }

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "CSV"))
            type = CSV_FILE;
          else if (lex_match_id (lexer, "TAB"))
            type = TAB_FILE;
          else
            {
              lex_error_expecting (lexer, "CSV", "TAB", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "REPLACE"))
        replace = true;
      else if (lex_match_id (lexer, "FIELDNAMES"))
        include_var_names = true;
      else if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "IGNORE"))
            recode_user_missing = false;
          else if (lex_match_id (lexer, "RECODE"))
            recode_user_missing = true;
          else
            {
              lex_error_expecting (lexer, "IGNORE", "RECODE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "CELLS"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "VALUES"))
            use_value_labels = false;
          else if (lex_match_id (lexer, "LABELS"))
            use_value_labels = true;
          else
            {
              lex_error_expecting (lexer, "VALUES", "LABELS", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "TEXTOPTIONS"))
        {
          lex_match (lexer, T_EQUALS);
          for (;;)
            {
              if (lex_match_id (lexer, "DELIMITER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 delimiters */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "DELIMITER");
                      goto error;
                    }
                  delimiter = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "QUALIFIER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 qualifiers */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "QUALIFIER");
                      goto error;
                    }
                  qualifier = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "DECIMAL"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "DOT"))
                    decimal = '.';
                  else if (lex_match_id (lexer, "COMMA"))
                    decimal = ',';
                  else
                    {
                      lex_error_expecting (lexer, "DOT", "COMMA",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else if (lex_match_id (lexer, "FORMAT"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "PLAIN"))
                    use_print_formats = false;
                  else if (lex_match_id (lexer, "VARIABLE"))
                    use_print_formats = true;
                  else
                    {
                      lex_error_expecting (lexer, "PLAIN", "VARIABLE",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else
                break;
            }
        }
      else if (lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (type == 0)
    {
      lex_sbc_missing ("TYPE");
      goto error;
    }
  else if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }
  else if (!replace && fn_exists (fh_get_file_name (handle)))
    {
      msg (SE, _("Output file `%s' exists but REPLACE was not specified."),
           fh_get_file_name (handle));
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  csv_opts.recode_user_missing = recode_user_missing;
  csv_opts.include_var_names = include_var_names;
  csv_opts.use_value_labels = use_value_labels;
  csv_opts.use_print_formats = use_print_formats;
  csv_opts.decimal = decimal;
  csv_opts.delimiter = (delimiter ? delimiter
                        : type == TAB_FILE ? '\t'
                        : decimal == '.' ? ','
                        : ';');
  csv_opts.qualifier = qualifier;

  writer = csv_writer_open (handle, dict, &csv_opts);
  if (writer == NULL)
    goto error;
  fh_unref (handle);

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer);
  ok = casewriter_destroy (writer);
  ok = proc_commit (ds) && ok;

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

error:
  fh_unref (handle);
  dict_destroy (dict);
  case_map_destroy (map);
  return CMD_FAILURE;
}
示例#3
0
文件: save.c 项目: RobertDash/pspp
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
   WRITER_TYPE identifies the type of file to write,
   and COMMAND_TYPE identifies the type of command.

   On success, returns a writer.
   For procedures only, sets *RETAIN_UNSELECTED to true if cases
   that would otherwise be excluded by FILTER or USE should be
   included.

   On failure, returns a null pointer. */
static struct casewriter *
parse_write_command (struct lexer *lexer, struct dataset *ds,
		     enum writer_type writer_type,
                     enum command_type command_type,
                     bool *retain_unselected)
{
  /* Common data. */
  struct file_handle *handle; /* Output file. */
  struct dictionary *dict;    /* Dictionary for output file. */
  struct casewriter *writer;  /* Writer. */
  struct case_map *map;       /* Map from input data to data for writer. */

  /* Common options. */
  bool print_map;             /* Print map?  TODO. */
  bool print_short_names;     /* Print long-to-short name map.  TODO. */
  struct sfm_write_options sysfile_opts;
  struct pfm_write_options porfile_opts;

  assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
  assert (command_type == XFORM_CMD || command_type == PROC_CMD);
  assert ((retain_unselected != NULL) == (command_type == PROC_CMD));

  if (command_type == PROC_CMD)
    *retain_unselected = true;

  handle = NULL;
  dict = dict_clone (dataset_dict (ds));
  writer = NULL;
  map = NULL;
  print_map = false;
  print_short_names = false;
  sysfile_opts = sfm_writer_default_options ();
  porfile_opts = pfm_writer_default_options ();

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "NAMES"))
        print_short_names = true;
      else if (lex_match_id (lexer, "PERMISSIONS"))
        {
          bool cw;

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "READONLY"))
            cw = false;
          else if (lex_match_id (lexer, "WRITEABLE"))
            cw = true;
          else
            {
              lex_error_expecting (lexer, "READONLY", "WRITEABLE",
                                   NULL_SENTINEL);
              goto error;
            }
          sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
        }
      else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            *retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            *retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "COMPRESSED"))
	sysfile_opts.compress = true;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "UNCOMPRESSED"))
	sysfile_opts.compress = false;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "VERSION"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_force_int (lexer))
            goto error;
          sysfile_opts.version = lex_integer (lexer);
          lex_get (lexer);
	}
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "COMMUNICATIONS"))
            porfile_opts.type = PFM_COMM;
          else if (lex_match_id (lexer, "TAPE"))
            porfile_opts.type = PFM_TAPE;
          else
            {
              lex_error_expecting (lexer, "COMM", "TAPE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
        {
          lex_match (lexer, T_EQUALS);
          if (!lex_force_int (lexer))
            goto error;
          porfile_opts.digits = lex_integer (lexer);
          lex_get (lexer);
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;

      if (!lex_match (lexer, T_SLASH))
	break;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  if (fh_get_referent (handle) == FH_REF_FILE)
    {
      switch (writer_type)
        {
        case SYSFILE_WRITER:
          writer = sfm_open_writer (handle, dict, sysfile_opts);
          break;
        case PORFILE_WRITER:
          writer = pfm_open_writer (handle, dict, porfile_opts);
          break;
        }
    }
  else
    writer = any_writer_open (handle, dict);
  if (writer == NULL)
    goto error;

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  fh_unref (handle);
  return writer;

 error:
  fh_unref (handle);
  casewriter_destroy (writer);
  dict_destroy (dict);
  case_map_destroy (map);
  return NULL;
}