Example #1
0
/* Report a read error or unexpected end-of-file condition on R. */
static void
read_error (struct dfm_reader *r)
{
  if (ferror (r->file))
    msg (ME, _("Error reading file %s: %s."),
         fh_get_name (r->fh), strerror (errno));
  else if (feof (r->file))
    msg (ME, _("Unexpected end of file reading %s."), fh_get_name (r->fh));
  else
    NOT_REACHED ();
}
Example #2
0
/* Returns the number of attempts, thus far, to advance past
   end-of-file in reader R.  Reads forward in HANDLE's file, if
   necessary, to find out.

   Normally, the user stops attempting to read from the file the
   first time EOF is reached (a return value of 1).  If the user
   tries to read past EOF again (a return value of 2 or more),
   an error message is issued, and the caller should more
   forcibly abort to avoid an infinite loop. */
unsigned
dfm_eof (struct dfm_reader *r)
{
  if (r->flags & DFM_ADVANCE)
    {
      r->flags &= ~DFM_ADVANCE;

      if (r->eof_cnt == 0 && read_record (r) )
        {
          r->pos = 0;
          return 0;
        }

      r->eof_cnt++;
      if (r->eof_cnt == 2)
        {
          if (r->fh != fh_inline_file ())
            msg (ME, _("Attempt to read beyond end-of-file on file %s."),
                 fh_get_name (r->fh));
          else
            msg (ME, _("Attempt to read beyond END DATA."));
        }
    }

  return r->eof_cnt;
}
Example #3
0
/* Reads a block descriptor word or record descriptor word
   (according to TYPE) from R.  Returns 1 if successful, 0 if
   end of file was reached before any bytes could be read, -1 if
   an error occurred.  Reports an error in the latter case.

   If successful, stores the number of remaining bytes in the
   block or record (that is, the block or record length, minus
   the 4 bytes in the BDW or RDW itself) into *REMAINING_SIZE.
   If SEGMENT is nonnull, also stores the segment control
   character (SCC) into *SEGMENT. */
static int
read_descriptor_word (struct dfm_reader *r, enum descriptor_type type,
                      size_t *remaining_size, int *segment)
{
  uint8_t raw_descriptor[4];
  int status;

  status = try_to_read_fully (r, raw_descriptor, sizeof raw_descriptor);
  if (status <= 0)
    return status;

  *remaining_size = (raw_descriptor[0] << 8) | raw_descriptor[1];
  if (segment != NULL)
    *segment = raw_descriptor[2];

  if (*remaining_size < 4)
    {
      msg (ME,
           (type == BLOCK
            ? _("Corrupt block descriptor word at offset 0x%lx in %s.")
            : _("Corrupt record descriptor word at offset 0x%lx in %s.")),
           (long) ftello (r->file) - 4, fh_get_name (r->fh));
      return -1;
    }

  *remaining_size -= 4;
  return 1;
}
Example #4
0
/* Reports that reader R has read a corrupt record size. */
static void
corrupt_size (struct dfm_reader *r)
{
  msg (ME, _("Corrupt record size at offset 0x%lx in %s."),
       (long) ftello (r->file) - 4, fh_get_name (r->fh));
}
Example #5
0
/* Report a partial read at end of file reading R. */
static void
partial_record (struct dfm_reader *r)
{
  msg (ME, _("Unexpected end of file in partial record reading %s."),
       fh_get_name (r->fh));
}
Example #6
0
/* Merge the dictionary for file F into master dictionary M. */
static bool
merge_dictionary (struct dictionary *const m, struct comb_file *f)
{
  struct dictionary *d = f->dict;
  const struct string_array *d_docs, *m_docs;
  int i;

  if (dict_get_label (m) == NULL)
    dict_set_label (m, dict_get_label (d));

  d_docs = dict_get_documents (d);
  m_docs = dict_get_documents (m);


  /* FIXME: If the input files have different encodings, then
     the result is undefined.
     The correct thing to do would be to convert to an encoding
     which can cope with all the input files (eg UTF-8).
   */
  if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
    msg (MW, _("Combining files with incompatible encodings. String data may "
               "not be represented correctly."));

  if (d_docs != NULL)
    {
      if (m_docs == NULL)
        dict_set_documents (m, d_docs);
      else
        {
          struct string_array new_docs;
          size_t i;

          new_docs.n = m_docs->n + d_docs->n;
          new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings);
          for (i = 0; i < m_docs->n; i++)
            new_docs.strings[i] = m_docs->strings[i];
          for (i = 0; i < d_docs->n; i++)
            new_docs.strings[m_docs->n + i] = d_docs->strings[i];

          dict_set_documents (m, &new_docs);

          free (new_docs.strings);
        }
    }

  for (i = 0; i < dict_get_var_cnt (d); i++)
    {
      struct variable *dv = dict_get_var (d, i);
      struct variable *mv = dict_lookup_var (m, var_get_name (dv));

      if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
        continue;

      if (mv != NULL)
        {
          if (var_get_width (mv) != var_get_width (dv))
            {
              const char *var_name = var_get_name (dv);
              struct string s = DS_EMPTY_INITIALIZER;
              const char *file_name;

              file_name = f->handle ? fh_get_name (f->handle) : "*";
              ds_put_format (&s,
                             _("Variable %s in file %s has different "
                               "type or width from the same variable in "
                               "earlier file."),
                             var_name, file_name);
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (dv))
                ds_put_format (&s, _("In file %s, %s is numeric."),
                               file_name, var_name);
              else
                ds_put_format (&s, _("In file %s, %s is a string variable "
                                     "with width %d."),
                               file_name, var_name, var_get_width (dv));
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (mv))
                ds_put_format (&s, _("In an earlier file, %s was numeric."),
                               var_name);
              else
                ds_put_format (&s, _("In an earlier file, %s was a string "
                                     "variable with width %d."),
                               var_name, var_get_width (mv));
              msg (SE, "%s", ds_cstr (&s));
              ds_destroy (&s);
              return false;
            }

          if (var_has_value_labels (dv) && !var_has_value_labels (mv))
            var_set_value_labels (mv, var_get_value_labels (dv));
          if (var_has_missing_values (dv) && !var_has_missing_values (mv))
            var_set_missing_values (mv, var_get_missing_values (dv));
          if (var_get_label (dv) && !var_get_label (mv))
            var_set_label (mv, var_get_label (dv));
        }
      else
        mv = dict_clone_var_assert (m, dv);
    }

  return true;
}
Example #7
0
static int
combine_files (enum comb_command_type command,
               struct lexer *lexer, struct dataset *ds)
{
  struct comb_proc proc;

  bool saw_by = false;
  bool saw_sort = false;
  struct casereader *active_file = NULL;

  char *first_name = NULL;
  char *last_name = NULL;

  struct taint *taint = NULL;

  size_t n_tables = 0;
  size_t allocated_files = 0;

  size_t i;

  proc.files = NULL;
  proc.n_files = 0;
  proc.dict = dict_create (get_default_encoding ());
  proc.output = NULL;
  proc.matcher = NULL;
  subcase_init_empty (&proc.by_vars);
  proc.first = NULL;
  proc.last = NULL;
  proc.buffered_case = NULL;
  proc.prev_BY = NULL;

  dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      struct comb_file *file;
      enum comb_file_type type;

      if (lex_match_id (lexer, "FILE"))
        type = COMB_FILE;
      else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
        {
          type = COMB_TABLE;
          n_tables++;
        }
      else
        break;
      lex_match (lexer, T_EQUALS);

      if (proc.n_files >= allocated_files)
        proc.files = x2nrealloc (proc.files, &allocated_files,
                                sizeof *proc.files);
      file = &proc.files[proc.n_files++];
      file->type = type;
      subcase_init_empty (&file->by_vars);
      subcase_init_empty (&file->src);
      subcase_init_empty (&file->dst);
      file->mv = NULL;
      file->handle = NULL;
      file->dict = NULL;
      file->reader = NULL;
      file->data = NULL;
      file->is_sorted = true;
      file->in_name = NULL;
      file->in_var = NULL;

      if (lex_match (lexer, T_ASTERISK))
        {
          if (!dataset_has_source (ds))
            {
              msg (SE, _("Cannot specify the active dataset since none "
                         "has been defined."));
              goto error;
            }

          if (proc_make_temporary_transformations_permanent (ds))
            msg (SE, _("This command may not be used after TEMPORARY when "
                       "the active dataset is an input source.  "
                       "Temporary transformations will be made permanent."));

          file->dict = dict_clone (dataset_dict (ds));
        }
      else
        {
          file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
          if (file->handle == NULL)
            goto error;

          file->reader = any_reader_open (file->handle, NULL, &file->dict);
          if (file->reader == NULL)
            goto error;
        }

      while (lex_match (lexer, T_SLASH))
        if (lex_match_id (lexer, "RENAME"))
          {
            if (!parse_dict_rename (lexer, file->dict))
              goto error;
          }
        else if (lex_match_id (lexer, "IN"))
          {
            lex_match (lexer, T_EQUALS);
            if (lex_token (lexer) != T_ID)
              {
                lex_error (lexer, NULL);
                goto error;
              }

            if (file->in_name)
              {
                msg (SE, _("Multiple IN subcommands for a single FILE or "
                           "TABLE."));
                goto error;
              }
            file->in_name = xstrdup (lex_tokcstr (lexer));
            lex_get (lexer);
          }
        else if (lex_match_id (lexer, "SORT"))
          {
            file->is_sorted = false;
            saw_sort = true;
          }

      if (!merge_dictionary (proc.dict, file))
        goto error;
    }

  while (lex_token (lexer) != T_ENDCMD)
    {
      if (lex_match (lexer, T_BY))
	{
          const struct variable **by_vars;
          size_t i;
          bool ok;

	  if (saw_by)
	    {
              lex_sbc_only_once ("BY");
	      goto error;
	    }
          saw_by = true;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
                                    &by_vars, NULL))
	    goto error;

          ok = true;
          for (i = 0; i < proc.n_files; i++)
            {
              struct comb_file *file = &proc.files[i];
              size_t j;

              for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
                {
                  const char *name = var_get_name (by_vars[j]);
                  struct variable *var = dict_lookup_var (file->dict, name);
                  if (var != NULL)
                    subcase_add_var (&file->by_vars, var,
                                     subcase_get_direction (&proc.by_vars, j));
                  else
                    {
                      if (file->handle != NULL)
                        msg (SE, _("File %s lacks BY variable %s."),
                             fh_get_name (file->handle), name);
                      else
                        msg (SE, _("Active dataset lacks BY variable %s."),
                             name);
                      ok = false;
                    }
                }
              assert (!ok || subcase_conformable (&file->by_vars,
                                                  &proc.files[0].by_vars));
            }
          free (by_vars);

          if (!ok)
            goto error;
	}
      else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
        {
          if (first_name != NULL)
            {
              lex_sbc_only_once ("FIRST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          first_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
        {
          if (last_name != NULL)
            {
              lex_sbc_only_once ("LAST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          last_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (lex_match_id (lexer, "MAP"))
	{
	  /* FIXME. */
	}
      else if (lex_match_id (lexer, "DROP"))
        {
          if (!parse_dict_drop (lexer, proc.dict))
            goto error;
        }
      else if (lex_match_id (lexer, "KEEP"))
        {
          if (!parse_dict_keep (lexer, proc.dict))
            goto error;
        }
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}

      if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
        {
          lex_end_of_command (lexer);
          goto error;
        }
    }

  if (!saw_by)
    {
      if (command == COMB_UPDATE)
        {
          lex_sbc_missing ("BY");
          goto error;
        }
      if (n_tables)
        {
          msg (SE, _("BY is required when %s is specified."), "TABLE");
          goto error;
        }
      if (saw_sort)
        {
          msg (SE, _("BY is required when %s is specified."), "SORT");
          goto error;
        }
    }

  /* Add IN, FIRST, and LAST variables to master dictionary. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
        goto error;
    }
  if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
      || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
    goto error;

  dict_delete_scratch_vars (proc.dict);
  dict_compact_values (proc.dict);

  /* Set up mapping from each file's variables to master
     variables. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      size_t src_var_cnt = dict_get_var_cnt (file->dict);
      size_t j;

      file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
      for (j = 0; j < src_var_cnt; j++)
        {
          struct variable *src_var = dict_get_var (file->dict, j);
          struct variable *dst_var = dict_lookup_var (proc.dict,
                                                      var_get_name (src_var));
          if (dst_var != NULL)
            {
              size_t n = subcase_get_n_fields (&file->src);
              file->mv[n] = var_get_missing_values (src_var);
              subcase_add_var (&file->src, src_var, SC_ASCEND);
              subcase_add_var (&file->dst, dst_var, SC_ASCEND);
            }
        }
    }

  proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
  taint = taint_clone (casewriter_get_taint (proc.output));

  /* Set up case matcher. */
  proc.matcher = case_matcher_create ();
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (file->reader == NULL)
        {
          if (active_file == NULL)
            {
              proc_discard_output (ds);
              file->reader = active_file = proc_open_filtering (ds, false);
            }
          else
            file->reader = casereader_clone (active_file);
        }
      if (!file->is_sorted)
        file->reader = sort_execute (file->reader, &file->by_vars);
      taint_propagate (casereader_get_taint (file->reader), taint);
      file->data = casereader_read (file->reader);
      if (file->type == COMB_FILE)
        case_matcher_add_input (proc.matcher, &file->by_vars,
                                &file->data, &file->is_minimal);
    }

  if (command == COMB_ADD)
    execute_add_files (&proc);
  else if (command == COMB_MATCH)
    execute_match_files (&proc);
  else if (command == COMB_UPDATE)
    execute_update (&proc);
  else
    NOT_REACHED ();

  case_matcher_destroy (proc.matcher);
  proc.matcher = NULL;
  close_all_comb_files (&proc);
  if (active_file != NULL)
    proc_commit (ds);

  dataset_set_dict (ds, proc.dict);
  dataset_set_source (ds, casewriter_make_reader (proc.output));
  proc.dict = NULL;
  proc.output = NULL;

  free_comb_proc (&proc);

  free (first_name);
  free (last_name);

  return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  if (active_file != NULL)
    proc_commit (ds);
  free_comb_proc (&proc);
  taint_destroy (taint);
  free (first_name);
  free (last_name);
  return CMD_CASCADING_FAILURE;
}