Exemplo n.º 1
0
/* Parses a list of sort fields and appends them to ORDERING,
   which the caller must already have initialized.
   Returns true if successful, false on error.
   If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at
   least one parenthesized sort direction was specified, false
   otherwise. */
bool
parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict,
                     struct subcase *ordering,
                     const struct variable ***vars, bool *saw_direction)
{
  const struct variable **local_vars = NULL;
  size_t var_cnt = 0;

  if (vars == NULL) 
    vars = &local_vars;
  *vars = NULL;

  if (saw_direction != NULL)
    *saw_direction = false;

  do
    {
      size_t prev_var_cnt = var_cnt;
      enum subcase_direction direction;
      size_t i;

      /* Variables. */
      if (!parse_variables_const (lexer, dict, vars, &var_cnt,
                                  PV_APPEND | PV_NO_SCRATCH))
        goto error;

      /* Sort direction. */
      if (lex_match (lexer, T_LPAREN))
	{
	  if (lex_match_id (lexer, "D") || lex_match_id (lexer, "DOWN"))
	    direction = SC_DESCEND;
	  else if (lex_match_id (lexer, "A") || lex_match_id (lexer, "UP"))
            direction = SC_ASCEND;
          else
	    {
              lex_error_expecting (lexer, "A", "D", NULL_SENTINEL);
              goto error;
	    }
	  if (!lex_force_match (lexer, T_RPAREN))
            goto error;
          if (saw_direction != NULL)
            *saw_direction = true;
	}
      else
        direction = SC_ASCEND;

      for (i = prev_var_cnt; i < var_cnt; i++) 
        {
          const struct variable *var = (*vars)[i];
          if (!subcase_add_var (ordering, var, direction))
            msg (SW, _("Variable %s specified twice in sort criteria."),
                 var_get_name (var)); 
        }
    }
  while (lex_token (lexer) == T_ID
         && dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL);

  free (local_vars);
  return true;

error:
  free (local_vars);
  if (vars)
    *vars = NULL;
  return false;
}
Exemplo n.º 2
0
/* Merge the dictionary for file F into master dictionary M. */
static bool
merge_dictionary (struct dictionary *const m, struct comb_file *f)
{
  struct dictionary *d = f->dict;
  const struct string_array *d_docs, *m_docs;
  int i;

  if (dict_get_label (m) == NULL)
    dict_set_label (m, dict_get_label (d));

  d_docs = dict_get_documents (d);
  m_docs = dict_get_documents (m);


  /* FIXME: If the input files have different encodings, then
     the result is undefined.
     The correct thing to do would be to convert to an encoding
     which can cope with all the input files (eg UTF-8).
   */
  if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
    msg (MW, _("Combining files with incompatible encodings. String data may "
               "not be represented correctly."));

  if (d_docs != NULL)
    {
      if (m_docs == NULL)
        dict_set_documents (m, d_docs);
      else
        {
          struct string_array new_docs;
          size_t i;

          new_docs.n = m_docs->n + d_docs->n;
          new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings);
          for (i = 0; i < m_docs->n; i++)
            new_docs.strings[i] = m_docs->strings[i];
          for (i = 0; i < d_docs->n; i++)
            new_docs.strings[m_docs->n + i] = d_docs->strings[i];

          dict_set_documents (m, &new_docs);

          free (new_docs.strings);
        }
    }

  for (i = 0; i < dict_get_var_cnt (d); i++)
    {
      struct variable *dv = dict_get_var (d, i);
      struct variable *mv = dict_lookup_var (m, var_get_name (dv));

      if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
        continue;

      if (mv != NULL)
        {
          if (var_get_width (mv) != var_get_width (dv))
            {
              const char *var_name = var_get_name (dv);
              struct string s = DS_EMPTY_INITIALIZER;
              const char *file_name;

              file_name = f->handle ? fh_get_name (f->handle) : "*";
              ds_put_format (&s,
                             _("Variable %s in file %s has different "
                               "type or width from the same variable in "
                               "earlier file."),
                             var_name, file_name);
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (dv))
                ds_put_format (&s, _("In file %s, %s is numeric."),
                               file_name, var_name);
              else
                ds_put_format (&s, _("In file %s, %s is a string variable "
                                     "with width %d."),
                               file_name, var_name, var_get_width (dv));
              ds_put_cstr (&s, "  ");
              if (var_is_numeric (mv))
                ds_put_format (&s, _("In an earlier file, %s was numeric."),
                               var_name);
              else
                ds_put_format (&s, _("In an earlier file, %s was a string "
                                     "variable with width %d."),
                               var_name, var_get_width (mv));
              msg (SE, "%s", ds_cstr (&s));
              ds_destroy (&s);
              return false;
            }

          if (var_has_value_labels (dv) && !var_has_value_labels (mv))
            var_set_value_labels (mv, var_get_value_labels (dv));
          if (var_has_missing_values (dv) && !var_has_missing_values (mv))
            var_set_missing_values (mv, var_get_missing_values (dv));
          if (var_get_label (dv) && !var_get_label (mv))
            var_set_label (mv, var_get_label (dv));
        }
      else
        mv = dict_clone_var_assert (m, dv);
    }

  return true;
}
Exemplo n.º 3
0
/* Parses the whole DO REPEAT command specification.
   Returns success. */
static bool
parse_specification (struct lexer *lexer, struct dictionary *dict,
                     struct hmap *dummies)
{
  struct dummy_var *first_dv = NULL;

  hmap_init (dummies);
  do
    {
      struct dummy_var *dv;
      const char *name;
      bool ok;

      /* Get a stand-in variable name and make sure it's unique. */
      if (!lex_force_id (lexer))
	goto error;
      name = lex_tokcstr (lexer);
      if (dict_lookup_var (dict, name))
        msg (SW, _("Dummy variable name `%s' hides dictionary variable `%s'."),
             name, name);
      if (find_dummy_var (dummies, name, strlen (name)))
        {
          msg (SE, _("Dummy variable name `%s' is given twice."), name);
          goto error;
        }

      /* Make a new macro. */
      dv = xmalloc (sizeof *dv);
      dv->name = xstrdup (name);
      dv->values = NULL;
      dv->n_values = 0;
      hmap_insert (dummies, &dv->hmap_node, hash_dummy (name, strlen (name)));

      /* Skip equals sign. */
      lex_get (lexer);
      if (!lex_force_match (lexer, T_EQUALS))
	goto error;

      /* Get the details of the variable's possible values. */
      if (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
	ok = parse_ids (lexer, dict, dv);
      else if (lex_is_number (lexer))
	ok = parse_numbers (lexer, dv);
      else if (lex_is_string (lexer))
	ok = parse_strings (lexer, dv);
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}
      if (!ok)
	goto error;
      assert (dv->n_values > 0);
      if (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
        {
          lex_error (lexer, NULL);
          goto error;
        }

      /* If this is the first variable then it defines how many replacements
	 there must be; otherwise enforce this number of replacements. */
      if (first_dv == NULL)
        first_dv = dv;
      else if (first_dv->n_values != dv->n_values)
	{
	  msg (SE, _("Dummy variable `%s' had %zu substitutions, so `%s' must "
                     "also, but %zu were specified."),
               first_dv->name, first_dv->n_values,
               dv->name, dv->n_values);
	  goto error;
	}

      lex_match (lexer, T_SLASH);
    }
  while (!lex_match (lexer, T_ENDCMD));

  while (lex_match (lexer, T_ENDCMD))
    continue;

  return true;

error:
  destroy_dummies (dummies);
  return false;
}
Exemplo n.º 4
0
static int
combine_files (enum comb_command_type command,
               struct lexer *lexer, struct dataset *ds)
{
  struct comb_proc proc;

  bool saw_by = false;
  bool saw_sort = false;
  struct casereader *active_file = NULL;

  char *first_name = NULL;
  char *last_name = NULL;

  struct taint *taint = NULL;

  size_t n_tables = 0;
  size_t allocated_files = 0;

  size_t i;

  proc.files = NULL;
  proc.n_files = 0;
  proc.dict = dict_create (get_default_encoding ());
  proc.output = NULL;
  proc.matcher = NULL;
  subcase_init_empty (&proc.by_vars);
  proc.first = NULL;
  proc.last = NULL;
  proc.buffered_case = NULL;
  proc.prev_BY = NULL;

  dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      struct comb_file *file;
      enum comb_file_type type;

      if (lex_match_id (lexer, "FILE"))
        type = COMB_FILE;
      else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
        {
          type = COMB_TABLE;
          n_tables++;
        }
      else
        break;
      lex_match (lexer, T_EQUALS);

      if (proc.n_files >= allocated_files)
        proc.files = x2nrealloc (proc.files, &allocated_files,
                                sizeof *proc.files);
      file = &proc.files[proc.n_files++];
      file->type = type;
      subcase_init_empty (&file->by_vars);
      subcase_init_empty (&file->src);
      subcase_init_empty (&file->dst);
      file->mv = NULL;
      file->handle = NULL;
      file->dict = NULL;
      file->reader = NULL;
      file->data = NULL;
      file->is_sorted = true;
      file->in_name = NULL;
      file->in_var = NULL;

      if (lex_match (lexer, T_ASTERISK))
        {
          if (!dataset_has_source (ds))
            {
              msg (SE, _("Cannot specify the active dataset since none "
                         "has been defined."));
              goto error;
            }

          if (proc_make_temporary_transformations_permanent (ds))
            msg (SE, _("This command may not be used after TEMPORARY when "
                       "the active dataset is an input source.  "
                       "Temporary transformations will be made permanent."));

          file->dict = dict_clone (dataset_dict (ds));
        }
      else
        {
          file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
          if (file->handle == NULL)
            goto error;

          file->reader = any_reader_open (file->handle, NULL, &file->dict);
          if (file->reader == NULL)
            goto error;
        }

      while (lex_match (lexer, T_SLASH))
        if (lex_match_id (lexer, "RENAME"))
          {
            if (!parse_dict_rename (lexer, file->dict))
              goto error;
          }
        else if (lex_match_id (lexer, "IN"))
          {
            lex_match (lexer, T_EQUALS);
            if (lex_token (lexer) != T_ID)
              {
                lex_error (lexer, NULL);
                goto error;
              }

            if (file->in_name)
              {
                msg (SE, _("Multiple IN subcommands for a single FILE or "
                           "TABLE."));
                goto error;
              }
            file->in_name = xstrdup (lex_tokcstr (lexer));
            lex_get (lexer);
          }
        else if (lex_match_id (lexer, "SORT"))
          {
            file->is_sorted = false;
            saw_sort = true;
          }

      if (!merge_dictionary (proc.dict, file))
        goto error;
    }

  while (lex_token (lexer) != T_ENDCMD)
    {
      if (lex_match (lexer, T_BY))
	{
          const struct variable **by_vars;
          size_t i;
          bool ok;

	  if (saw_by)
	    {
              lex_sbc_only_once ("BY");
	      goto error;
	    }
          saw_by = true;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
                                    &by_vars, NULL))
	    goto error;

          ok = true;
          for (i = 0; i < proc.n_files; i++)
            {
              struct comb_file *file = &proc.files[i];
              size_t j;

              for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
                {
                  const char *name = var_get_name (by_vars[j]);
                  struct variable *var = dict_lookup_var (file->dict, name);
                  if (var != NULL)
                    subcase_add_var (&file->by_vars, var,
                                     subcase_get_direction (&proc.by_vars, j));
                  else
                    {
                      if (file->handle != NULL)
                        msg (SE, _("File %s lacks BY variable %s."),
                             fh_get_name (file->handle), name);
                      else
                        msg (SE, _("Active dataset lacks BY variable %s."),
                             name);
                      ok = false;
                    }
                }
              assert (!ok || subcase_conformable (&file->by_vars,
                                                  &proc.files[0].by_vars));
            }
          free (by_vars);

          if (!ok)
            goto error;
	}
      else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
        {
          if (first_name != NULL)
            {
              lex_sbc_only_once ("FIRST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          first_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
        {
          if (last_name != NULL)
            {
              lex_sbc_only_once ("LAST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          last_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (lex_match_id (lexer, "MAP"))
	{
	  /* FIXME. */
	}
      else if (lex_match_id (lexer, "DROP"))
        {
          if (!parse_dict_drop (lexer, proc.dict))
            goto error;
        }
      else if (lex_match_id (lexer, "KEEP"))
        {
          if (!parse_dict_keep (lexer, proc.dict))
            goto error;
        }
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}

      if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
        {
          lex_end_of_command (lexer);
          goto error;
        }
    }

  if (!saw_by)
    {
      if (command == COMB_UPDATE)
        {
          lex_sbc_missing ("BY");
          goto error;
        }
      if (n_tables)
        {
          msg (SE, _("BY is required when %s is specified."), "TABLE");
          goto error;
        }
      if (saw_sort)
        {
          msg (SE, _("BY is required when %s is specified."), "SORT");
          goto error;
        }
    }

  /* Add IN, FIRST, and LAST variables to master dictionary. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
        goto error;
    }
  if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
      || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
    goto error;

  dict_delete_scratch_vars (proc.dict);
  dict_compact_values (proc.dict);

  /* Set up mapping from each file's variables to master
     variables. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      size_t src_var_cnt = dict_get_var_cnt (file->dict);
      size_t j;

      file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
      for (j = 0; j < src_var_cnt; j++)
        {
          struct variable *src_var = dict_get_var (file->dict, j);
          struct variable *dst_var = dict_lookup_var (proc.dict,
                                                      var_get_name (src_var));
          if (dst_var != NULL)
            {
              size_t n = subcase_get_n_fields (&file->src);
              file->mv[n] = var_get_missing_values (src_var);
              subcase_add_var (&file->src, src_var, SC_ASCEND);
              subcase_add_var (&file->dst, dst_var, SC_ASCEND);
            }
        }
    }

  proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
  taint = taint_clone (casewriter_get_taint (proc.output));

  /* Set up case matcher. */
  proc.matcher = case_matcher_create ();
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (file->reader == NULL)
        {
          if (active_file == NULL)
            {
              proc_discard_output (ds);
              file->reader = active_file = proc_open_filtering (ds, false);
            }
          else
            file->reader = casereader_clone (active_file);
        }
      if (!file->is_sorted)
        file->reader = sort_execute (file->reader, &file->by_vars);
      taint_propagate (casereader_get_taint (file->reader), taint);
      file->data = casereader_read (file->reader);
      if (file->type == COMB_FILE)
        case_matcher_add_input (proc.matcher, &file->by_vars,
                                &file->data, &file->is_minimal);
    }

  if (command == COMB_ADD)
    execute_add_files (&proc);
  else if (command == COMB_MATCH)
    execute_match_files (&proc);
  else if (command == COMB_UPDATE)
    execute_update (&proc);
  else
    NOT_REACHED ();

  case_matcher_destroy (proc.matcher);
  proc.matcher = NULL;
  close_all_comb_files (&proc);
  if (active_file != NULL)
    proc_commit (ds);

  dataset_set_dict (ds, proc.dict);
  dataset_set_source (ds, casewriter_make_reader (proc.output));
  proc.dict = NULL;
  proc.output = NULL;

  free_comb_proc (&proc);

  free (first_name);
  free (last_name);

  return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  if (active_file != NULL)
    proc_commit (ds);
  free_comb_proc (&proc);
  taint_destroy (taint);
  free (first_name);
  free (last_name);
  return CMD_CASCADING_FAILURE;
}