Пример #1
0
/* Opens the file designated by file handle FH for reading as a
   data file.  Providing fh_inline_file() for FH designates the
   "inline file", that is, data included inline in the command
   file between BEGIN FILE and END FILE.  Returns a reader if
   successful, or a null pointer otherwise. */
struct dfm_reader *
dfm_open_reader (struct file_handle *fh, struct lexer *lexer)
{
  struct dfm_reader *r;
  struct fh_lock *lock;

  /* TRANSLATORS: this fragment will be interpolated into
     messages in fh_lock() that identify types of files. */
  lock = fh_lock (fh, FH_REF_FILE | FH_REF_INLINE, N_("data file"),
                  FH_ACC_READ, false);
  if (lock == NULL)
    return NULL;

  r = fh_lock_get_aux (lock);
  if (r != NULL)
    return r;

  r = xmalloc (sizeof *r);
  r->fh = fh_ref (fh);
  r->lock = lock;
  r->lexer = lexer;
  ds_init_empty (&r->line);
  ds_init_empty (&r->scratch);
  r->flags = DFM_ADVANCE;
  r->eof_cnt = 0;
  r->block_left = 0;
  if (fh_get_referent (fh) != FH_REF_INLINE)
    {
      struct stat s;
      r->line_number = 0;
      r->file = fn_open (fh_get_file_name (fh), "rb");
      if (r->file == NULL)
        {
          msg (ME, _("Could not open `%s' for reading as a data file: %s."),
               fh_get_file_name (r->fh), strerror (errno));
          fh_unlock (r->lock);
          fh_unref (fh);
          free (r);
          return NULL;
        }
      r->file_size = fstat (fileno (r->file), &s) == 0 ? s.st_size : -1;
    }
  else
    r->file_size = -1;
  fh_lock_set_aux (lock, r);

  return r;
}
Пример #2
0
/* Closes reader R opened by dfm_open_reader(). */
void
dfm_close_reader (struct dfm_reader *r)
{
  if (r == NULL)
    return;

  if (fh_unlock (r->lock))
    {
      /* File is still locked by another client. */
      return;
    }

  /* This was the last client, so close the underlying file. */
  if (fh_get_referent (r->fh) != FH_REF_INLINE)
    fn_close (fh_get_file_name (r->fh), r->file);
  else
    {
      /* Skip any remaining data on the inline file. */
      if (r->flags & DFM_SAW_BEGIN_DATA)
        {
          dfm_reread_record (r, 0);
          while (!dfm_eof (r))
            dfm_forward_record (r);
        }
    }

  fh_unref (r->fh);
  ds_destroy (&r->line);
  ds_destroy (&r->scratch);
  free (r);
}
Пример #3
0
const char *
dfm_get_file_name (const struct dfm_reader *r)
{
  return (fh_get_referent (r->fh) == FH_REF_FILE
          ? fh_get_file_name (r->fh)
          : NULL);
}
Пример #4
0
/* Returns a casereader for HANDLE.  On success, returns the new
   casereader and stores the file's dictionary into *DICT.  On
   failure, returns a null pointer.

   Ordinarily the reader attempts to automatically detect the character
   encoding based on the file's contents.  This isn't always possible,
   especially for files written by old versions of SPSS or PSPP, so specifying
   a nonnull ENCODING overrides the choice of character encoding.  */
struct casereader *
any_reader_open (struct file_handle *handle, const char *encoding,
                 struct dictionary **dict)
{
  switch (fh_get_referent (handle))
    {
    case FH_REF_FILE:
      {
        enum detect_result result;

        result = try_detect (fh_get_file_name (handle), sfm_detect);
        if (result == ANY_ERROR)
          return NULL;
        else if (result == ANY_YES)
          {
            struct sfm_reader *r;

            r = sfm_open (handle);
            if (r == NULL)
              return NULL;

            return sfm_decode (r, encoding, dict, NULL);
          }

        result = try_detect (fh_get_file_name (handle), pfm_detect);
        if (result == ANY_ERROR)
          return NULL;
        else if (result == ANY_YES)
          return pfm_open_reader (handle, dict, NULL);

        msg (SE, _("`%s' is not a system or portable file."),
             fh_get_file_name (handle));
        return NULL;
      }

    case FH_REF_INLINE:
      msg (SE, _("The inline file is not allowed here."));
      return NULL;

    case FH_REF_DATASET:
      return dataset_reader_open (handle, dict);
    }
  NOT_REACHED ();
}
Пример #5
0
/* Closes a CSV file after we're done with it.
   Returns true if successful, false if an I/O error occurred. */
bool
close_writer (struct csv_writer *w)
{
  size_t i;
  bool ok;

  if (w == NULL)
    return true;

  ok = true;
  if (w->file != NULL)
    {
      if (write_error (w))
        ok = false;
      if (fclose (w->file) == EOF)
        ok = false;

      if (!ok)
        msg (ME, _("An I/O error occurred writing CSV file `%s'."),
             fh_get_file_name (w->fh));

      if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
        ok = false;
    }

  fh_unlock (w->lock);
  fh_unref (w->fh);

  free (w->encoding);

  for (i = 0; i < w->n_csv_vars; i++)
    {
      struct csv_var *cv = &w->csv_vars[i];
      mv_destroy (&cv->missing);
      val_labs_destroy (cv->val_labs);
    }

  free (w->csv_vars);
  free (w);

  return ok;
}
Пример #6
0
int
cmd_save_translate (struct lexer *lexer, struct dataset *ds)
{
  enum { CSV_FILE = 1, TAB_FILE } type;

  struct dictionary *dict;
  struct case_map *map;
  struct casewriter *writer;
  struct file_handle *handle;

  struct csv_writer_options csv_opts;

  bool replace;

  bool retain_unselected;
  bool recode_user_missing;
  bool include_var_names;
  bool use_value_labels;
  bool use_print_formats;
  char decimal;
  char delimiter;
  char qualifier;

  bool ok;

  type = 0;

  dict = dict_clone (dataset_dict (ds));
  map = NULL;

  handle = NULL;
  replace = false;

  retain_unselected = true;
  recode_user_missing = false;
  include_var_names = false;
  use_value_labels = false;
  use_print_formats = false;
  decimal = settings_get_decimal_char (FMT_F);
  delimiter = 0;
  qualifier = '"';

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  while (lex_match (lexer, T_SLASH))
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "TYPE"))
        {
          if (type != 0)
            {
              lex_sbc_only_once ("TYPE");
              goto error;
            }

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "CSV"))
            type = CSV_FILE;
          else if (lex_match_id (lexer, "TAB"))
            type = TAB_FILE;
          else
            {
              lex_error_expecting (lexer, "CSV", "TAB", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "REPLACE"))
        replace = true;
      else if (lex_match_id (lexer, "FIELDNAMES"))
        include_var_names = true;
      else if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "IGNORE"))
            recode_user_missing = false;
          else if (lex_match_id (lexer, "RECODE"))
            recode_user_missing = true;
          else
            {
              lex_error_expecting (lexer, "IGNORE", "RECODE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "CELLS"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "VALUES"))
            use_value_labels = false;
          else if (lex_match_id (lexer, "LABELS"))
            use_value_labels = true;
          else
            {
              lex_error_expecting (lexer, "VALUES", "LABELS", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "TEXTOPTIONS"))
        {
          lex_match (lexer, T_EQUALS);
          for (;;)
            {
              if (lex_match_id (lexer, "DELIMITER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 delimiters */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "DELIMITER");
                      goto error;
                    }
                  delimiter = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "QUALIFIER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 qualifiers */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "QUALIFIER");
                      goto error;
                    }
                  qualifier = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "DECIMAL"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "DOT"))
                    decimal = '.';
                  else if (lex_match_id (lexer, "COMMA"))
                    decimal = ',';
                  else
                    {
                      lex_error_expecting (lexer, "DOT", "COMMA",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else if (lex_match_id (lexer, "FORMAT"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "PLAIN"))
                    use_print_formats = false;
                  else if (lex_match_id (lexer, "VARIABLE"))
                    use_print_formats = true;
                  else
                    {
                      lex_error_expecting (lexer, "PLAIN", "VARIABLE",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else
                break;
            }
        }
      else if (lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (type == 0)
    {
      lex_sbc_missing ("TYPE");
      goto error;
    }
  else if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }
  else if (!replace && fn_exists (fh_get_file_name (handle)))
    {
      msg (SE, _("Output file `%s' exists but REPLACE was not specified."),
           fh_get_file_name (handle));
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  csv_opts.recode_user_missing = recode_user_missing;
  csv_opts.include_var_names = include_var_names;
  csv_opts.use_value_labels = use_value_labels;
  csv_opts.use_print_formats = use_print_formats;
  csv_opts.decimal = decimal;
  csv_opts.delimiter = (delimiter ? delimiter
                        : type == TAB_FILE ? '\t'
                        : decimal == '.' ? ','
                        : ';');
  csv_opts.qualifier = qualifier;

  writer = csv_writer_open (handle, dict, &csv_opts);
  if (writer == NULL)
    goto error;
  fh_unref (handle);

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer);
  ok = casewriter_destroy (writer);
  ok = proc_commit (ds) && ok;

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

error:
  fh_unref (handle);
  dict_destroy (dict);
  case_map_destroy (map);
  return CMD_FAILURE;
}
Пример #7
0
/* Opens the CSV file designated by file handle FH for writing cases from
   dictionary DICT according to the given OPTS.

   No reference to D is retained, so it may be modified or
   destroyed at will after this function returns. */
struct casewriter *
csv_writer_open (struct file_handle *fh, const struct dictionary *dict,
                 const struct csv_writer_options *opts)
{
  struct csv_writer *w;
  int i;

  /* Create and initialize writer. */
  w = xmalloc (sizeof *w);
  w->fh = fh_ref (fh);
  w->lock = NULL;
  w->file = NULL;
  w->rf = NULL;

  w->opts = *opts;

  w->encoding = xstrdup (dict_get_encoding (dict));

  w->n_csv_vars = dict_get_var_cnt (dict);
  w->csv_vars = xnmalloc (w->n_csv_vars, sizeof *w->csv_vars);
  for (i = 0; i < w->n_csv_vars; i++)
    {
      const struct variable *var = dict_get_var (dict, i);
      struct csv_var *cv = &w->csv_vars[i];

      cv->width = var_get_width (var);
      cv->case_index = var_get_case_index (var);

      cv->format = *var_get_print_format (var);
      if (opts->recode_user_missing)
        mv_copy (&cv->missing, var_get_missing_values (var));
      else
        mv_init (&cv->missing, cv->width);

      if (opts->use_value_labels)
        cv->val_labs = val_labs_clone (var_get_value_labels (var));
      else
        cv->val_labs = NULL;
    }

  /* Open file handle as an exclusive writer. */
  /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock()
     that identify types of files. */
  w->lock = fh_lock (fh, FH_REF_FILE, N_("CSV file"), FH_ACC_WRITE, true);
  if (w->lock == NULL)
    goto error;

  /* Create the file on disk. */
  w->rf = replace_file_start (fh_get_file_name (fh), "w", 0666,
                              &w->file, NULL);
  if (w->rf == NULL)
    {
      msg (ME, _("Error opening `%s' for writing as a system file: %s."),
           fh_get_file_name (fh), strerror (errno));
      goto error;
    }

  if (opts->include_var_names)
    write_var_names (w, dict);

  if (write_error (w))
    goto error;

  return casewriter_create (dict_get_proto (dict),
                            &csv_file_casewriter_class, w);

error:
  close_writer (w);
  return NULL;
}