Example #1
0
/* Performs DELETE VARIABLES command. */
int
cmd_delete_variables (struct lexer *lexer, struct dataset *ds)
{
  struct variable **vars;
  size_t var_cnt;
  bool ok;

  if (proc_make_temporary_transformations_permanent (ds))
    msg (SE, _("DELETE VARIABLES may not be used after TEMPORARY.  "
               "Temporary transformations will be made permanent."));

  if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt, PV_NONE))
    goto error;
  if (var_cnt == dict_get_var_cnt (dataset_dict (ds)))
    {
      msg (SE, _("DELETE VARIABLES may not be used to delete all variables "
                 "from the active dataset dictionary.  "
                 "Use NEW FILE instead."));
      goto error;
    }

  ok = casereader_destroy (proc_open (ds));
  ok = proc_commit (ds) && ok;
  if (!ok)
    goto error;
  dict_delete_vars (dataset_dict (ds), vars, var_cnt);

  free (vars);

  return CMD_SUCCESS;

 error:
  free (vars);
  return CMD_CASCADING_FAILURE;
}
Example #2
0
static bool
run_reliability (struct dataset *ds, const struct reliability *reliability)
{
  struct dictionary *dict = dataset_dict (ds);
  bool ok;
  struct casereader *group;

  struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);


  while (casegrouper_get_next_group (grouper, &group))
    {
      do_reliability (group, ds, reliability);

      reliability_statistics (reliability);

      if (reliability->summary & SUMMARY_TOTAL )
	reliability_summary_total (reliability);
    }

  ok = casegrouper_destroy (grouper);
  ok = proc_commit (ds) && ok;

  return ok;
}
Example #3
0
int
cmd_variable_labels (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);

  do
    {
      struct variable **v;
      size_t nv;

      size_t i;

      if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
        return CMD_FAILURE;

      if (!lex_force_string (lexer))
	{
	  free (v);
	  return CMD_FAILURE;
	}

      for (i = 0; i < nv; i++)
        var_set_label (v[i], lex_tokcstr (lexer));

      lex_get (lexer);
      while (lex_token (lexer) == T_SLASH)
	lex_get (lexer);
      free (v);
    }
  while (lex_token (lexer) != T_ENDCMD);
  return CMD_SUCCESS;
}
Example #4
0
int
cmd_split_file (struct lexer *lexer, struct dataset *ds)
{
  if (lex_match_id (lexer, "OFF"))
    dict_set_split_vars (dataset_dict (ds), NULL, 0);
  else
    {
      struct variable **v;
      size_t n;

      /* For now, ignore SEPARATE and LAYERED. */
      (void) ( lex_match_id (lexer, "SEPARATE") || lex_match_id (lexer, "LAYERED") );

      lex_match (lexer, T_BY);
      if (!parse_variables (lexer, dataset_dict (ds), &v, &n, PV_NO_DUPLICATE))
	return CMD_CASCADING_FAILURE;

      dict_set_split_vars (dataset_dict (ds), v, n);
      free (v);
    }

  return CMD_SUCCESS;
}
Example #5
0
int
cmd_do_repeat (struct lexer *lexer, struct dataset *ds)
{
  struct hmap dummies;
  bool ok;

  if (!parse_specification (lexer, dataset_dict (ds), &dummies))
    return CMD_CASCADING_FAILURE;

  ok = parse_commands (lexer, &dummies);

  destroy_dummies (&dummies);

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
Example #6
0
/* Parses the LEAVE command. */
int
cmd_leave (struct lexer *lexer, struct dataset *ds)
{
  struct variable **v;
  size_t nv;

  size_t i;

  if (!parse_variables (lexer, dataset_dict (ds), &v, &nv, PV_NONE))
    return CMD_CASCADING_FAILURE;
  for (i = 0; i < nv; i++)
    var_set_leave (v[i], true);
  free (v);

  return CMD_SUCCESS;
}
Example #7
0
/* Dumps out the values of all the split variables for the case C. */
void
output_split_file_values (const struct dataset *ds, const struct ccase *c)
{
  const struct dictionary *dict = dataset_dict (ds);
  const struct variable *const *split;
  struct tab_table *t;
  size_t split_cnt;
  int i;

  split_cnt = dict_get_split_cnt (dict);
  if (split_cnt == 0)
    return;

  t = tab_create (3, split_cnt + 1);
  tab_vline (t, TAL_GAP, 1, 0, split_cnt);
  tab_vline (t, TAL_GAP, 2, 0, split_cnt);
  tab_text (t, 0, 0, TAB_NONE, _("Variable"));
  tab_text (t, 1, 0, TAB_LEFT, _("Value"));
  tab_text (t, 2, 0, TAB_LEFT, _("Label"));
  split = dict_get_split_vars (dict);
  for (i = 0; i < split_cnt; i++)
    {
      const struct variable *v = split[i];
      char *s;
      const char *val_lab;
      const struct fmt_spec *print = var_get_print_format (v);

      tab_text_format (t, 0, i + 1, TAB_LEFT, "%s", var_get_name (v));

      s = data_out (case_data (c, v), dict_get_encoding (dict), print);

      tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, s);

      free (s);
      
      val_lab = var_lookup_value_label (v, case_data (c, v));
      if (val_lab)
	tab_text (t, 2, i + 1, TAB_LEFT, val_lab);
    }
  tab_submit (t);
}
Example #8
0
/* Parses the STRING command. */
int
cmd_string (struct lexer *lexer, struct dataset *ds)
{
  size_t i;

  /* Names of variables to create. */
  char **v;
  size_t nv;

  /* Format spec for variables to create. */
  struct fmt_spec f;

  /* Width of variables to create. */
  int width;

  do
    {
      if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
                                 &v, &nv, PV_NO_DUPLICATE))
	return CMD_FAILURE;

      if (!lex_force_match (lexer, T_LPAREN)
          || !parse_format_specifier (lexer, &f)
          || !lex_force_match (lexer, T_RPAREN))
	goto fail;
      if (!fmt_is_string (f.type))
	{
          char str[FMT_STRING_LEN_MAX + 1];
	  msg (SE, _("Format type %s may not be used with a string "
                     "variable."), fmt_to_string (&f, str));
	  goto fail;
	}
      if (!fmt_check_output (&f))
        goto fail;

      width = fmt_var_width (&f);

      /* Create each variable. */
      for (i = 0; i < nv; i++)
	{
	  struct variable *new_var = dict_create_var (dataset_dict (ds), v[i],
                                                      width);
	  if (!new_var)
	    msg (SE, _("There is already a variable named %s."), v[i]);
	  else
            var_set_both_formats (new_var, &f);
	}

      /* Clean up. */
      for (i = 0; i < nv; i++)
	free (v[i]);
      free (v);
    }
  while (lex_match (lexer, T_SLASH));

  return CMD_SUCCESS;

  /* If we have an error at a point where cleanup is required,
     flow-of-control comes here. */
fail:
  for (i = 0; i < nv; i++)
    free (v[i]);
  free (v);
  return CMD_FAILURE;
}
Example #9
0
int
cmd_t_test (struct lexer *lexer, struct dataset *ds)
{
  bool ok;
  const struct dictionary *dict = dataset_dict (ds);
  struct tt tt;
  int mode_count = 0;

  /* Variables pertaining to the paired mode */
  const struct variable **v1 = NULL;
  size_t n_v1;
  const struct variable **v2 = NULL;
  size_t n_v2;
	  
  size_t n_pairs = 0;
  vp *pairs = NULL;


  /* One sample mode */
  double testval = SYSMIS;

  /* Independent samples mode */
  const struct variable *gvar;
  union value gval0;
  union value gval1;
  bool cut = false;

  tt.wv = dict_get_weight (dict);
  tt.dict = dict;
  tt.confidence = 0.95;
  tt.exclude = MV_ANY;
  tt.missing_type = MISS_ANALYSIS;
  tt.n_vars = 0;
  tt.vars = NULL;
  tt.mode = MODE_undef;

  lex_match (lexer, T_EQUALS);

  for (; lex_token (lexer) != T_ENDCMD; )
    {
      lex_match (lexer, T_SLASH);
      if (lex_match_id (lexer, "TESTVAL"))
	{
	  mode_count++;
	  tt.mode = MODE_SINGLE;
	  lex_match (lexer, T_EQUALS);
	  lex_force_num (lexer);
	  testval = lex_number (lexer);
	  lex_get (lexer);
	}
      else if (lex_match_id (lexer, "GROUPS"))
	{
	  mode_count++;
	  cut = false;
	  tt.mode = MODE_INDEP;
	  lex_match (lexer, T_EQUALS);

	  if (NULL == (gvar = parse_variable (lexer, dict)))
	    goto parse_failed;
      
	  if (lex_match (lexer, T_LPAREN))
	    {

	      value_init (&gval0, var_get_width (gvar));
	      parse_value (lexer, &gval0, gvar);
	      cut = true;
	      if (lex_match (lexer, T_COMMA))
		{
		  value_init (&gval1, var_get_width (gvar));
		  parse_value (lexer, &gval1, gvar);
		  cut = false;
		}

	      lex_force_match (lexer, T_RPAREN);
	    }
	  else
	    {
	      value_init (&gval0, 0);
	      value_init (&gval1, 0);
	      gval0.f = 1.0;
	      gval1.f = 2.0;
	      cut = false;
	    }

	  if ( cut == true && var_is_alpha (gvar))
	    {
	      msg (SE, _("When applying GROUPS to a string variable, two "
			 "values must be specified."));
	      goto parse_failed;
	    }
	}
      else if (lex_match_id (lexer, "PAIRS"))
	{
	  bool with = false;
	  bool paired = false;

	  if (tt.n_vars > 0)
	    {
	      msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
	      goto parse_failed;
	    }

	  mode_count++;
	  tt.mode = MODE_PAIRED;
	  lex_match (lexer, T_EQUALS);

	  if (!parse_variables_const (lexer, dict,
				      &v1, &n_v1,
				      PV_NO_DUPLICATE | PV_NUMERIC))
	    goto parse_failed;

	  if ( lex_match (lexer, T_WITH))
	    {
	      with = true;
	      if (!parse_variables_const (lexer, dict,
					  &v2, &n_v2,
					  PV_NO_DUPLICATE | PV_NUMERIC))
		goto parse_failed;

	      if (lex_match (lexer, T_LPAREN)
		  && lex_match_id (lexer, "PAIRED")
		  && lex_match (lexer, T_RPAREN))
		{
		  paired = true;
		  if (n_v1 != n_v2)
		    {
		      msg (SE, _("PAIRED was specified but the number of variables "
				 "preceding WITH (%zu) did not match the number "
				 "following (%zu)."),
			   n_v1, n_v2);
		      goto parse_failed;
		    }
		}
	    }
	  {
	    int i;

	    if ( !with )
	      n_pairs = (n_v1 * (n_v1 - 1)) / 2.0;
	    else if ( paired )
	      n_pairs = n_v1;
	    else
	      n_pairs = n_v1 * n_v2;
	  
	    pairs = xcalloc (n_pairs, sizeof *pairs);

	    if ( with)
	      {
		int x = 0;
		if (paired)
		  {
		    for (i = 0 ; i < n_v1; ++i)
		      {
			vp *pair = &pairs[i];
			(*pair)[0] = v1[i];
			(*pair)[1] = v2[i];
		      }	
		  }
		else
		  {
		    for (i = 0 ; i < n_v1; ++i)
		      {
			int j;
			for (j = 0 ; j < n_v2; ++j)
			  {
			    vp *pair = &pairs[x++];
			    (*pair)[0] = v1[i];
			    (*pair)[1] = v2[j];
			  }
		      }
		  }
	      }
	    else
	      {
		int x = 0;
		for (i = 0 ; i < n_v1; ++i)
		  {
		    int j;

		    for (j = i + 1 ; j < n_v1; ++j)
		      {
			vp *pair = &pairs[x++];
			(*pair)[0] = v1[i];
			(*pair)[1] = v1[j];
		      }
		  }
	      }

	  }
	}
      else if (lex_match_id (lexer, "VARIABLES"))
	{
	  if ( tt.mode == MODE_PAIRED)
	    {
	      msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
	      goto parse_failed;
	    }

	  lex_match (lexer, T_EQUALS);

	  if (!parse_variables_const (lexer, dict,
				      &tt.vars,
				      &tt.n_vars,
				      PV_NO_DUPLICATE | PV_NUMERIC))
	    goto parse_failed;
	}
      else if ( lex_match_id (lexer, "MISSING"))
	{
          lex_match (lexer, T_EQUALS);
          while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
            {
	      if (lex_match_id (lexer, "INCLUDE"))
		{
		  tt.exclude = MV_SYSTEM;
		}
	      else if (lex_match_id (lexer, "EXCLUDE"))
		{
		  tt.exclude = MV_ANY;
		}
	      else if (lex_match_id (lexer, "LISTWISE"))
		{
		  tt.missing_type = MISS_LISTWISE;
		}
	      else if (lex_match_id (lexer, "ANALYSIS"))
		{
		  tt.missing_type = MISS_ANALYSIS;
		}
	      else
		{
                  lex_error (lexer, NULL);
		  goto parse_failed;
		}
	      lex_match (lexer, T_COMMA);
	    }
	}
      else if (lex_match_id (lexer, "CRITERIA"))
	{
          lex_match (lexer, T_EQUALS);
	  if ( lex_force_match_id (lexer, "CIN"))
	    if ( lex_force_match (lexer, T_LPAREN))
	      {
		lex_force_num (lexer);
		tt.confidence = lex_number (lexer);
		lex_get (lexer);
		lex_force_match (lexer, T_RPAREN);
	      }
	}
      else 
	{
	  lex_error (lexer, NULL);
	  goto parse_failed;
	}
    }

  if ( mode_count != 1)
    {
      msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands "
		 "must be specified."));
      goto parse_failed;
    }

  if (tt.n_vars == 0 && tt.mode != MODE_PAIRED)
    {
      lex_sbc_missing ("VARIABLES");
      goto parse_failed;
    }



  /* Deal with splits etc */
  {
    struct casereader *group;
    struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);

    while (casegrouper_get_next_group (grouper, &group))
      {
	if ( tt.mode == MODE_SINGLE)
	  {
	    if ( tt.missing_type == MISS_LISTWISE )
	      group  = casereader_create_filter_missing (group,
							 tt.vars, tt.n_vars,
							 tt.exclude,
							 NULL,  NULL);
	    one_sample_run (&tt, testval, group);
	  }
	else if ( tt.mode == MODE_PAIRED)
	  {
	    if ( tt.missing_type == MISS_LISTWISE )
	      {
		group  = casereader_create_filter_missing (group,
							   v1, n_v1,
							   tt.exclude,
							   NULL,  NULL);
		group  = casereader_create_filter_missing (group,
							   v2, n_v2,
							   tt.exclude,
							   NULL,  NULL);
	      }

	    paired_run (&tt, n_pairs, pairs, group);
	  }
	else /* tt.mode == MODE_INDEP */
	  {
	    if ( tt.missing_type == MISS_LISTWISE )
	      {
		group  = casereader_create_filter_missing (group,
							   tt.vars, tt.n_vars,
							   tt.exclude,
							   NULL,  NULL);

		group  = casereader_create_filter_missing (group,
							   &gvar, 1,
							   tt.exclude,
							   NULL,  NULL);

	      }

	    indep_run (&tt, gvar, cut, &gval0, &gval1, group);
	  }
      }

    ok = casegrouper_destroy (grouper);
    ok = proc_commit (ds) && ok;
  }

  free (pairs);
  free (v1);
  free (v2);

  free (tt.vars);

  return ok ? CMD_SUCCESS : CMD_FAILURE;

 parse_failed:
  return CMD_FAILURE;
}
Example #10
0
int
cmd_add_value_labels (struct lexer *lexer, struct dataset *ds)
{
  return do_value_labels (lexer, dataset_dict (ds), false);
}
Example #11
0
int
cmd_reliability (struct lexer *lexer, struct dataset *ds)
{
  const struct dictionary *dict = dataset_dict (ds);

  struct reliability reliability;
  reliability.n_variables = 0;
  reliability.variables = NULL;
  reliability.model = MODEL_ALPHA;
    reliability.exclude = MV_ANY;
  reliability.summary = 0;

  reliability.wv = dict_get_weight (dict);

  reliability.total_start = 0;

  lex_match (lexer, T_SLASH);

  if (!lex_force_match_id (lexer, "VARIABLES"))
    {
      goto error;
    }

  lex_match (lexer, T_EQUALS);

  if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables,
			      PV_NO_DUPLICATE | PV_NUMERIC))
    goto error;

  if (reliability.n_variables < 2)
    msg (MW, _("Reliability on a single variable is not useful."));


    {
      int i;
      struct cronbach *c;
      /* Create a default Scale */

      reliability.n_sc = 1;
      reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc);

      ds_init_cstr (&reliability.scale_name, "ANY");

      c = &reliability.sc[0];
      c->n_items = reliability.n_variables;
      c->items = xzalloc (sizeof (struct variable*) * c->n_items);

      for (i = 0 ; i < c->n_items ; ++i)
	c->items[i] = reliability.variables[i];
    }



  while (lex_token (lexer) != T_ENDCMD)
    {
      lex_match (lexer, T_SLASH);

      if (lex_match_id (lexer, "SCALE"))
	{
	  struct const_var_set *vs;
	  if ( ! lex_force_match (lexer, T_LPAREN))
	    goto error;

	  if ( ! lex_force_string (lexer) ) 
	    goto error;

	  ds_init_substring (&reliability.scale_name, lex_tokss (lexer));

	  lex_get (lexer);

	  if ( ! lex_force_match (lexer, T_RPAREN))
	    goto error;

          lex_match (lexer, T_EQUALS);

	  vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables);


	  if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0))
	    {
	      const_var_set_destroy (vs);
	      goto error;
	    }

	  const_var_set_destroy (vs);
	}
      else if (lex_match_id (lexer, "MODEL"))
	{
          lex_match (lexer, T_EQUALS);
	  if (lex_match_id (lexer, "ALPHA"))
	    {
	      reliability.model = MODEL_ALPHA;
	    }
	  else if (lex_match_id (lexer, "SPLIT"))
	    {
	      reliability.model = MODEL_SPLIT;
	      reliability.split_point = -1;

	      if ( lex_match (lexer, T_LPAREN))
		{
		  lex_force_num (lexer);
		  reliability.split_point = lex_number (lexer);
		  lex_get (lexer);
		  lex_force_match (lexer, T_RPAREN);
		}
	    }
	  else
	    goto error;
	}
      else if (lex_match_id (lexer, "SUMMARY"))
        {
          lex_match (lexer, T_EQUALS);
	  if (lex_match_id (lexer, "TOTAL"))
	    {
	      reliability.summary |= SUMMARY_TOTAL;
	    }
	  else if (lex_match (lexer, T_ALL))
	    {
	      reliability.summary = 0xFFFF;
	    }
	  else
	    goto error;
	}
      else if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, T_EQUALS);
          while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
            {
	      if (lex_match_id (lexer, "INCLUDE"))
		{
		  reliability.exclude = MV_SYSTEM;
		}
	      else if (lex_match_id (lexer, "EXCLUDE"))
		{
		  reliability.exclude = MV_ANY;
		}
	      else
		{
                  lex_error (lexer, NULL);
		  goto error;
		}
	    }
	}
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}
    }

  if ( reliability.model == MODEL_SPLIT)
    {
      int i;
      const struct cronbach *s;

      if ( reliability.split_point >= reliability.n_variables)
        {
          msg (ME, _("The split point must be less than the number of variables"));
          goto error;
        }

      reliability.n_sc += 2 ;
      reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);

      s = &reliability.sc[0];

      reliability.sc[1].n_items =
	(reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point;

      reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items;
      reliability.sc[1].items = xzalloc (sizeof (struct variable *)
				 * reliability.sc[1].n_items);

      reliability.sc[2].items = xzalloc (sizeof (struct variable *) *
				 reliability.sc[2].n_items);

      for  (i = 0; i < reliability.sc[1].n_items ; ++i)
	reliability.sc[1].items[i] = s->items[i];

      while (i < s->n_items)
	{
	  reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i];
	  i++;
	}
    }

  if ( reliability.summary & SUMMARY_TOTAL)
    {
      int i;
      const int base_sc = reliability.n_sc;

      reliability.total_start = base_sc;

      reliability.n_sc +=  reliability.sc[0].n_items ;
      reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);


      for (i = 0 ; i < reliability.sc[0].n_items; ++i )
	{
	  int v_src;
	  int v_dest = 0;
	  struct cronbach *s = &reliability.sc[i + base_sc];

	  s->n_items = reliability.sc[0].n_items - 1;
	  s->items = xzalloc (sizeof (struct variable *) * s->n_items);
	  for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src)
	    {
	      if ( v_src != i)
		s->items[v_dest++] = reliability.sc[0].items[v_src];
	    }
	}
    }


  if ( ! run_reliability (ds, &reliability)) 
    goto error;

  free (reliability.variables);
  return CMD_SUCCESS;

 error:
  free (reliability.variables);
  return CMD_FAILURE;
}
Example #12
0
int
cmd_save_translate (struct lexer *lexer, struct dataset *ds)
{
  enum { CSV_FILE = 1, TAB_FILE } type;

  struct dictionary *dict;
  struct case_map *map;
  struct casewriter *writer;
  struct file_handle *handle;

  struct csv_writer_options csv_opts;

  bool replace;

  bool retain_unselected;
  bool recode_user_missing;
  bool include_var_names;
  bool use_value_labels;
  bool use_print_formats;
  char decimal;
  char delimiter;
  char qualifier;

  bool ok;

  type = 0;

  dict = dict_clone (dataset_dict (ds));
  map = NULL;

  handle = NULL;
  replace = false;

  retain_unselected = true;
  recode_user_missing = false;
  include_var_names = false;
  use_value_labels = false;
  use_print_formats = false;
  decimal = settings_get_decimal_char (FMT_F);
  delimiter = 0;
  qualifier = '"';

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  while (lex_match (lexer, T_SLASH))
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "TYPE"))
        {
          if (type != 0)
            {
              lex_sbc_only_once ("TYPE");
              goto error;
            }

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "CSV"))
            type = CSV_FILE;
          else if (lex_match_id (lexer, "TAB"))
            type = TAB_FILE;
          else
            {
              lex_error_expecting (lexer, "CSV", "TAB", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "REPLACE"))
        replace = true;
      else if (lex_match_id (lexer, "FIELDNAMES"))
        include_var_names = true;
      else if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "IGNORE"))
            recode_user_missing = false;
          else if (lex_match_id (lexer, "RECODE"))
            recode_user_missing = true;
          else
            {
              lex_error_expecting (lexer, "IGNORE", "RECODE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "CELLS"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "VALUES"))
            use_value_labels = false;
          else if (lex_match_id (lexer, "LABELS"))
            use_value_labels = true;
          else
            {
              lex_error_expecting (lexer, "VALUES", "LABELS", NULL_SENTINEL);
              goto error;
            }
        }
      else if (lex_match_id (lexer, "TEXTOPTIONS"))
        {
          lex_match (lexer, T_EQUALS);
          for (;;)
            {
              if (lex_match_id (lexer, "DELIMITER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 delimiters */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "DELIMITER");
                      goto error;
                    }
                  delimiter = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "QUALIFIER"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (!lex_force_string (lexer))
                    goto error;
                  /* XXX should support multibyte UTF-8 qualifiers */
                  if (ss_length (lex_tokss (lexer)) != 1)
                    {
                      msg (SE, _("The %s string must contain exactly one "
                                 "character."), "QUALIFIER");
                      goto error;
                    }
                  qualifier = ss_first (lex_tokss (lexer));
                  lex_get (lexer);
                }
              else if (lex_match_id (lexer, "DECIMAL"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "DOT"))
                    decimal = '.';
                  else if (lex_match_id (lexer, "COMMA"))
                    decimal = ',';
                  else
                    {
                      lex_error_expecting (lexer, "DOT", "COMMA",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else if (lex_match_id (lexer, "FORMAT"))
                {
                  lex_match (lexer, T_EQUALS);
                  if (lex_match_id (lexer, "PLAIN"))
                    use_print_formats = false;
                  else if (lex_match_id (lexer, "VARIABLE"))
                    use_print_formats = true;
                  else
                    {
                      lex_error_expecting (lexer, "PLAIN", "VARIABLE",
                                           NULL_SENTINEL);
                      goto error;
                    }
                }
              else
                break;
            }
        }
      else if (lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (type == 0)
    {
      lex_sbc_missing ("TYPE");
      goto error;
    }
  else if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }
  else if (!replace && fn_exists (fh_get_file_name (handle)))
    {
      msg (SE, _("Output file `%s' exists but REPLACE was not specified."),
           fh_get_file_name (handle));
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  csv_opts.recode_user_missing = recode_user_missing;
  csv_opts.include_var_names = include_var_names;
  csv_opts.use_value_labels = use_value_labels;
  csv_opts.use_print_formats = use_print_formats;
  csv_opts.decimal = decimal;
  csv_opts.delimiter = (delimiter ? delimiter
                        : type == TAB_FILE ? '\t'
                        : decimal == '.' ? ','
                        : ';');
  csv_opts.qualifier = qualifier;

  writer = csv_writer_open (handle, dict, &csv_opts);
  if (writer == NULL)
    goto error;
  fh_unref (handle);

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer);
  ok = casewriter_destroy (writer);
  ok = proc_commit (ds) && ok;

  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

error:
  fh_unref (handle);
  dict_destroy (dict);
  case_map_destroy (map);
  return CMD_FAILURE;
}
Example #13
0
static int
combine_files (enum comb_command_type command,
               struct lexer *lexer, struct dataset *ds)
{
  struct comb_proc proc;

  bool saw_by = false;
  bool saw_sort = false;
  struct casereader *active_file = NULL;

  char *first_name = NULL;
  char *last_name = NULL;

  struct taint *taint = NULL;

  size_t n_tables = 0;
  size_t allocated_files = 0;

  size_t i;

  proc.files = NULL;
  proc.n_files = 0;
  proc.dict = dict_create (get_default_encoding ());
  proc.output = NULL;
  proc.matcher = NULL;
  subcase_init_empty (&proc.by_vars);
  proc.first = NULL;
  proc.last = NULL;
  proc.buffered_case = NULL;
  proc.prev_BY = NULL;

  dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      struct comb_file *file;
      enum comb_file_type type;

      if (lex_match_id (lexer, "FILE"))
        type = COMB_FILE;
      else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
        {
          type = COMB_TABLE;
          n_tables++;
        }
      else
        break;
      lex_match (lexer, T_EQUALS);

      if (proc.n_files >= allocated_files)
        proc.files = x2nrealloc (proc.files, &allocated_files,
                                sizeof *proc.files);
      file = &proc.files[proc.n_files++];
      file->type = type;
      subcase_init_empty (&file->by_vars);
      subcase_init_empty (&file->src);
      subcase_init_empty (&file->dst);
      file->mv = NULL;
      file->handle = NULL;
      file->dict = NULL;
      file->reader = NULL;
      file->data = NULL;
      file->is_sorted = true;
      file->in_name = NULL;
      file->in_var = NULL;

      if (lex_match (lexer, T_ASTERISK))
        {
          if (!dataset_has_source (ds))
            {
              msg (SE, _("Cannot specify the active dataset since none "
                         "has been defined."));
              goto error;
            }

          if (proc_make_temporary_transformations_permanent (ds))
            msg (SE, _("This command may not be used after TEMPORARY when "
                       "the active dataset is an input source.  "
                       "Temporary transformations will be made permanent."));

          file->dict = dict_clone (dataset_dict (ds));
        }
      else
        {
          file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
          if (file->handle == NULL)
            goto error;

          file->reader = any_reader_open (file->handle, NULL, &file->dict);
          if (file->reader == NULL)
            goto error;
        }

      while (lex_match (lexer, T_SLASH))
        if (lex_match_id (lexer, "RENAME"))
          {
            if (!parse_dict_rename (lexer, file->dict))
              goto error;
          }
        else if (lex_match_id (lexer, "IN"))
          {
            lex_match (lexer, T_EQUALS);
            if (lex_token (lexer) != T_ID)
              {
                lex_error (lexer, NULL);
                goto error;
              }

            if (file->in_name)
              {
                msg (SE, _("Multiple IN subcommands for a single FILE or "
                           "TABLE."));
                goto error;
              }
            file->in_name = xstrdup (lex_tokcstr (lexer));
            lex_get (lexer);
          }
        else if (lex_match_id (lexer, "SORT"))
          {
            file->is_sorted = false;
            saw_sort = true;
          }

      if (!merge_dictionary (proc.dict, file))
        goto error;
    }

  while (lex_token (lexer) != T_ENDCMD)
    {
      if (lex_match (lexer, T_BY))
	{
          const struct variable **by_vars;
          size_t i;
          bool ok;

	  if (saw_by)
	    {
              lex_sbc_only_once ("BY");
	      goto error;
	    }
          saw_by = true;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
                                    &by_vars, NULL))
	    goto error;

          ok = true;
          for (i = 0; i < proc.n_files; i++)
            {
              struct comb_file *file = &proc.files[i];
              size_t j;

              for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
                {
                  const char *name = var_get_name (by_vars[j]);
                  struct variable *var = dict_lookup_var (file->dict, name);
                  if (var != NULL)
                    subcase_add_var (&file->by_vars, var,
                                     subcase_get_direction (&proc.by_vars, j));
                  else
                    {
                      if (file->handle != NULL)
                        msg (SE, _("File %s lacks BY variable %s."),
                             fh_get_name (file->handle), name);
                      else
                        msg (SE, _("Active dataset lacks BY variable %s."),
                             name);
                      ok = false;
                    }
                }
              assert (!ok || subcase_conformable (&file->by_vars,
                                                  &proc.files[0].by_vars));
            }
          free (by_vars);

          if (!ok)
            goto error;
	}
      else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
        {
          if (first_name != NULL)
            {
              lex_sbc_only_once ("FIRST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          first_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
        {
          if (last_name != NULL)
            {
              lex_sbc_only_once ("LAST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          last_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (lex_match_id (lexer, "MAP"))
	{
	  /* FIXME. */
	}
      else if (lex_match_id (lexer, "DROP"))
        {
          if (!parse_dict_drop (lexer, proc.dict))
            goto error;
        }
      else if (lex_match_id (lexer, "KEEP"))
        {
          if (!parse_dict_keep (lexer, proc.dict))
            goto error;
        }
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}

      if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
        {
          lex_end_of_command (lexer);
          goto error;
        }
    }

  if (!saw_by)
    {
      if (command == COMB_UPDATE)
        {
          lex_sbc_missing ("BY");
          goto error;
        }
      if (n_tables)
        {
          msg (SE, _("BY is required when %s is specified."), "TABLE");
          goto error;
        }
      if (saw_sort)
        {
          msg (SE, _("BY is required when %s is specified."), "SORT");
          goto error;
        }
    }

  /* Add IN, FIRST, and LAST variables to master dictionary. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
        goto error;
    }
  if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
      || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
    goto error;

  dict_delete_scratch_vars (proc.dict);
  dict_compact_values (proc.dict);

  /* Set up mapping from each file's variables to master
     variables. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      size_t src_var_cnt = dict_get_var_cnt (file->dict);
      size_t j;

      file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
      for (j = 0; j < src_var_cnt; j++)
        {
          struct variable *src_var = dict_get_var (file->dict, j);
          struct variable *dst_var = dict_lookup_var (proc.dict,
                                                      var_get_name (src_var));
          if (dst_var != NULL)
            {
              size_t n = subcase_get_n_fields (&file->src);
              file->mv[n] = var_get_missing_values (src_var);
              subcase_add_var (&file->src, src_var, SC_ASCEND);
              subcase_add_var (&file->dst, dst_var, SC_ASCEND);
            }
        }
    }

  proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
  taint = taint_clone (casewriter_get_taint (proc.output));

  /* Set up case matcher. */
  proc.matcher = case_matcher_create ();
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (file->reader == NULL)
        {
          if (active_file == NULL)
            {
              proc_discard_output (ds);
              file->reader = active_file = proc_open_filtering (ds, false);
            }
          else
            file->reader = casereader_clone (active_file);
        }
      if (!file->is_sorted)
        file->reader = sort_execute (file->reader, &file->by_vars);
      taint_propagate (casereader_get_taint (file->reader), taint);
      file->data = casereader_read (file->reader);
      if (file->type == COMB_FILE)
        case_matcher_add_input (proc.matcher, &file->by_vars,
                                &file->data, &file->is_minimal);
    }

  if (command == COMB_ADD)
    execute_add_files (&proc);
  else if (command == COMB_MATCH)
    execute_match_files (&proc);
  else if (command == COMB_UPDATE)
    execute_update (&proc);
  else
    NOT_REACHED ();

  case_matcher_destroy (proc.matcher);
  proc.matcher = NULL;
  close_all_comb_files (&proc);
  if (active_file != NULL)
    proc_commit (ds);

  dataset_set_dict (ds, proc.dict);
  dataset_set_source (ds, casewriter_make_reader (proc.output));
  proc.dict = NULL;
  proc.output = NULL;

  free_comb_proc (&proc);

  free (first_name);
  free (last_name);

  return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  if (active_file != NULL)
    proc_commit (ds);
  free_comb_proc (&proc);
  taint_destroy (taint);
  free (first_name);
  free (last_name);
  return CMD_CASCADING_FAILURE;
}
Example #14
0
/* Parses the NUMERIC command. */
int
cmd_numeric (struct lexer *lexer, struct dataset *ds)
{
  size_t i;

  /* Names of variables to create. */
  char **v;
  size_t nv;

  do
    {
      /* Format spec for variables to create.  f.type==-1 if default is to
	 be used. */
      struct fmt_spec f;

      if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
                                 &v, &nv, PV_NO_DUPLICATE))
	return CMD_FAILURE;

      /* Get the optional format specification. */
      if (lex_match (lexer, T_LPAREN))
	{
	  if (!parse_format_specifier (lexer, &f))
	    goto fail;

	  if ( ! fmt_check_output (&f))
	    goto fail;

	  if (fmt_is_string (f.type))
	    {
              char str[FMT_STRING_LEN_MAX + 1];
	      msg (SE, _("Format type %s may not be used with a numeric "
                         "variable."), fmt_to_string (&f, str));
	      goto fail;
	    }

	  if (!lex_match (lexer, T_RPAREN))
	    {
              lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
	      goto fail;
	    }
	}
      else
	f.type = -1;

      /* Create each variable. */
      for (i = 0; i < nv; i++)
	{
	  struct variable *new_var = dict_create_var (dataset_dict (ds), v[i], 0);
	  if (!new_var)
	    msg (SE, _("There is already a variable named %s."), v[i]);
	  else
	    {
	      if (f.type != -1)
                var_set_both_formats (new_var, &f);
	    }
	}

      /* Clean up. */
      for (i = 0; i < nv; i++)
	free (v[i]);
      free (v);
    }
  while (lex_match (lexer, T_SLASH));

  return CMD_SUCCESS;

  /* If we have an error at a point where cleanup is required,
     flow-of-control comes here. */
fail:
  for (i = 0; i < nv; i++)
    free (v[i]);
  free (v);
  return CMD_FAILURE;
}
Example #15
0
void
sign_execute (const struct dataset *ds,
		  struct casereader *input,
		  enum mv_class exclude,
		  const struct npar_test *test,
		  bool exact UNUSED,
		  double timer UNUSED)
{
  int i;
  bool warn = true;
  const struct dictionary *dict = dataset_dict (ds);
  const struct two_sample_test *t2s = UP_CAST (test, const struct two_sample_test, parent);
  struct ccase *c;

  struct sign_test_params *stp = xcalloc (t2s->n_pairs, sizeof *stp);

  struct casereader *r = input;

  for (; (c = casereader_read (r)) != NULL; case_unref (c))
    {
      const double weight = dict_get_case_weight (dict, c, &warn);

      for (i = 0 ; i < t2s->n_pairs; ++i )
	{
	  variable_pair *vp = &t2s->pairs[i];
	  const union value *value0 = case_data (c, (*vp)[0]);
	  const union value *value1 = case_data (c, (*vp)[1]);
	  const double diff = value0->f - value1->f;

	  if (var_is_value_missing ((*vp)[0], value0, exclude))
	    continue;

	  if (var_is_value_missing ((*vp)[1], value1, exclude))
	    continue;

	  if ( diff > 0)
	    stp[i].pos += weight;
	  else if (diff < 0)
	    stp[i].neg += weight;
	  else
	    stp[i].ties += weight;
	}
    }

  casereader_destroy (r);

  for (i = 0 ; i < t2s->n_pairs; ++i )
    {
      int r = MIN (stp[i].pos, stp[i].neg);
      stp[i].one_tailed_sig = gsl_cdf_binomial_P (r,
						  0.5,
						  stp[i].pos + stp[i].neg);

      stp[i].point_prob = gsl_ran_binomial_pdf (r, 0.5,
						stp[i].pos + stp[i].neg);
    }

  output_frequency_table (t2s, stp, dict);

  output_statistics_table (t2s, stp);

  free (stp);
}
Example #16
0
int
cmd_input_program (struct lexer *lexer, struct dataset *ds)
{
  struct input_program_pgm *inp;
  bool saw_END_CASE = false;

  dataset_clear (ds);
  if (!lex_match (lexer, T_ENDCMD))
    return lex_end_of_command (lexer);

  inp = xmalloc (sizeof *inp);
  inp->trns_chain = NULL;
  inp->init = NULL;
  inp->proto = NULL;

  inside_input_program = true;
  while (!lex_match_phrase (lexer, "END INPUT PROGRAM"))
    {
      enum cmd_result result;

      result = cmd_parse_in_state (lexer, ds, CMD_STATE_INPUT_PROGRAM);
      if (result == CMD_END_CASE)
        {
          emit_END_CASE (ds, inp);
          saw_END_CASE = true;
        }
      else if (cmd_result_is_failure (result) && result != CMD_FAILURE)
        {
          if (result == CMD_EOF)
            msg (SE, _("Unexpected end-of-file within INPUT PROGRAM."));
          inside_input_program = false;
          dataset_clear (ds);
          destroy_input_program (inp);
          return result;
        }
    }
  if (!saw_END_CASE)
    emit_END_CASE (ds, inp);
  inside_input_program = false;

  if (dict_get_next_value_idx (dataset_dict (ds)) == 0)
    {
      msg (SE, _("Input program did not create any variables."));
      dataset_clear (ds);
      destroy_input_program (inp);
      return CMD_FAILURE;
    }

  inp->trns_chain = proc_capture_transformations (ds);
  trns_chain_finalize (inp->trns_chain);

  inp->restart = TRNS_CONTINUE;

  /* Figure out how to initialize each input case. */
  inp->init = caseinit_create ();
  caseinit_mark_for_init (inp->init, dataset_dict (ds));
  inp->proto = caseproto_ref (dict_get_proto (dataset_dict (ds)));

  dataset_set_source (
    ds, casereader_create_sequential (NULL, inp->proto, CASENUMBER_MAX,
                                      &input_program_casereader_class, inp));

  return CMD_SUCCESS;
}
Example #17
0
static void
do_reliability (struct casereader *input, struct dataset *ds,
		const struct reliability *rel)
{
  int i;
  int si;
  struct ccase *c;
  casenumber n_missing ;
  casenumber n_valid = 0;


  for (si = 0 ; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];

      s->m = xzalloc (sizeof (s->m) * s->n_items);
      s->total = moments1_create (MOMENT_VARIANCE);

      for (i = 0 ; i < s->n_items ; ++i )
	s->m[i] = moments1_create (MOMENT_VARIANCE);
    }

  input = casereader_create_filter_missing (input,
					    rel->variables,
					    rel->n_variables,
					    rel->exclude,
					    &n_missing,
					    NULL);

  for (si = 0 ; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];


      s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
      input =
	casereader_create_append_numeric (input, append_sum,
					  s, NULL);
    }

  for (; (c = casereader_read (input)) != NULL; case_unref (c))
    {
      double weight = 1.0;
      n_valid ++;

      for (si = 0; si < rel->n_sc; ++si)
	{
	  struct cronbach *s = &rel->sc[si];

	  for (i = 0 ; i < s->n_items ; ++i )
	    moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);

	  moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
	}
    }
  casereader_destroy (input);

  for (si = 0; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];

      s->sum_of_variances = 0;
      for (i = 0 ; i < s->n_items ; ++i )
	{
	  double weight, mean, variance;
	  moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);

	  s->sum_of_variances += variance;
	}

      moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
			  NULL, NULL);

      s->alpha =
	alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
    }

  text_item_submit (text_item_create_format (TEXT_ITEM_PARAGRAPH, _("Scale: %s"),
                                             ds_cstr (&rel->scale_name)));

  case_processing_summary (n_valid, n_missing, dataset_dict (ds));
}
Example #18
0
/* Parses and executes the AGGREGATE procedure. */
int
cmd_aggregate (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct agr_proc agr;
  struct file_handle *out_file = NULL;
  struct casereader *input = NULL, *group;
  struct casegrouper *grouper;
  struct casewriter *output = NULL;

  bool copy_documents = false;
  bool presorted = false;
  bool saw_direction;
  bool ok;

  memset(&agr, 0 , sizeof (agr));
  agr.missing = ITEMWISE;
  agr.src_dict = dict;
  subcase_init_empty (&agr.sort);

  /* OUTFILE subcommand must be first. */
  lex_match (lexer, T_SLASH);
  if (!lex_force_match_id (lexer, "OUTFILE"))
    goto error;
  lex_match (lexer, T_EQUALS);
  if (!lex_match (lexer, T_ASTERISK))
    {
      out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
      if (out_file == NULL)
        goto error;
    }

  if (out_file == NULL && lex_match_id (lexer, "MODE"))
    {
      lex_match (lexer, T_EQUALS);
      if (lex_match_id (lexer, "ADDVARIABLES"))
	{
	  agr.add_variables = true;

	  /* presorted is assumed in ADDVARIABLES mode */
	  presorted = true;
	}
      else if (lex_match_id (lexer, "REPLACE"))
	{
	  agr.add_variables = false;
	}
      else
	goto error;
    }

  if ( agr.add_variables )
    agr.dict = dict_clone (dict);
  else
    agr.dict = dict_create (dict_get_encoding (dict));

  dict_set_label (agr.dict, dict_get_label (dict));
  dict_set_documents (agr.dict, dict_get_documents (dict));

  /* Read most of the subcommands. */
  for (;;)
    {
      lex_match (lexer, T_SLASH);

      if (lex_match_id (lexer, "MISSING"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_match_id (lexer, "COLUMNWISE"))
	    {
	      lex_error_expecting (lexer, "COLUMNWISE", NULL);
              goto error;
	    }
	  agr.missing = COLUMNWISE;
	}
      else if (lex_match_id (lexer, "DOCUMENT"))
        copy_documents = true;
      else if (lex_match_id (lexer, "PRESORTED"))
        presorted = true;
      else if (lex_force_match_id (lexer, "BREAK"))
	{
          int i;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
                                    &saw_direction))
            goto error;
          agr.break_var_cnt = subcase_get_n_fields (&agr.sort);

	  if  (! agr.add_variables)
	    for (i = 0; i < agr.break_var_cnt; i++)
	      dict_clone_var_assert (agr.dict, agr.break_vars[i]);

          /* BREAK must follow the options. */
          break;
	}
      else
        goto error;

    }
  if (presorted && saw_direction)
    msg (SW, _("When PRESORTED is specified, specifying sorting directions "
               "with (A) or (D) has no effect.  Output data will be sorted "
               "the same way as the input data."));

  /* Read in the aggregate functions. */
  lex_match (lexer, T_SLASH);
  if (!parse_aggregate_functions (lexer, dict, &agr))
    goto error;

  /* Delete documents. */
  if (!copy_documents)
    dict_clear_documents (agr.dict);

  /* Cancel SPLIT FILE. */
  dict_set_split_vars (agr.dict, NULL, 0);

  /* Initialize. */
  agr.case_cnt = 0;

  if (out_file == NULL)
    {
      /* The active dataset will be replaced by the aggregated data,
         so TEMPORARY is moot. */
      proc_cancel_temporary_transformations (ds);
      proc_discard_output (ds);
      output = autopaging_writer_create (dict_get_proto (agr.dict));
    }
  else
    {
      output = any_writer_open (out_file, agr.dict);
      if (output == NULL)
        goto error;
    }

  input = proc_open (ds);
  if (!subcase_is_empty (&agr.sort) && !presorted)
    {
      input = sort_execute (input, &agr.sort);
      subcase_clear (&agr.sort);
    }

  for (grouper = casegrouper_create_vars (input, agr.break_vars,
                                          agr.break_var_cnt);
       casegrouper_get_next_group (grouper, &group);
       casereader_destroy (group))
    {
      struct casereader *placeholder = NULL;
      struct ccase *c = casereader_peek (group, 0);

      if (c == NULL)
        {
          casereader_destroy (group);
          continue;
        }

      initialize_aggregate_info (&agr);

      if ( agr.add_variables )
	placeholder = casereader_clone (group);

      {
	struct ccase *cg;
	for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
	  accumulate_aggregate_info (&agr, cg);
      }


      if  (agr.add_variables)
	{
	  struct ccase *cg;
	  for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
	    dump_aggregate_info (&agr, output, cg);

	  casereader_destroy (placeholder);
	}
      else
	{
	  dump_aggregate_info (&agr, output, c);
	}
      case_unref (c);
    }
  if (!casegrouper_destroy (grouper))
    goto error;

  if (!proc_commit (ds))
    {
      input = NULL;
      goto error;
    }
  input = NULL;

  if (out_file == NULL)
    {
      struct casereader *next_input = casewriter_make_reader (output);
      if (next_input == NULL)
        goto error;

      dataset_set_dict (ds, agr.dict);
      dataset_set_source (ds, next_input);
      agr.dict = NULL;
    }
  else
    {
      ok = casewriter_destroy (output);
      output = NULL;
      if (!ok)
        goto error;
    }

  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_SUCCESS;

error:
  if (input != NULL)
    proc_commit (ds);
  casewriter_destroy (output);
  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_CASCADING_FAILURE;
}
Example #19
0
int
cmd_missing_values (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct variable **v = NULL;
  size_t nv;

  bool ok = true;

  while (lex_token (lexer) != T_ENDCMD)
    {
      size_t i;

      if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
        goto error;

      if (!lex_force_match (lexer, T_LPAREN))
        goto error;

      for (i = 0; i < nv; i++)
        var_clear_missing_values (v[i]);

      if (!lex_match (lexer, T_RPAREN))
        {
          struct missing_values mv;

          for (i = 0; i < nv; i++)
            if (var_get_type (v[i]) != var_get_type (v[0]))
              {
                const struct variable *n = var_is_numeric (v[0]) ? v[0] : v[i];
                const struct variable *s = var_is_numeric (v[0]) ? v[i] : v[0];
                msg (SE, _("Cannot mix numeric variables (e.g. %s) and "
                           "string variables (e.g. %s) within a single list."),
                     var_get_name (n), var_get_name (s));
                goto error;
              }

          if (var_is_numeric (v[0]))
            {
              mv_init (&mv, 0);
              while (!lex_match (lexer, T_RPAREN))
                {
                  enum fmt_type type = var_get_print_format (v[0])->type;
                  double x, y;
                  bool ok;

                  if (!parse_num_range (lexer, &x, &y, &type))
                    goto error;

                  ok = (x == y
                        ? mv_add_num (&mv, x)
                        : mv_add_range (&mv, x, y));
                  if (!ok)
                    ok = false;

                  lex_match (lexer, T_COMMA);
                }
            }
          else
            {
              mv_init (&mv, MV_MAX_STRING);
              while (!lex_match (lexer, T_RPAREN))
                {
                  uint8_t value[MV_MAX_STRING];
                  char *dict_mv;
                  size_t length;

                  if (!lex_force_string (lexer))
                    {
                      ok = false;
                      break;
                    }

                  dict_mv = recode_string (dict_get_encoding (dict), "UTF-8",
                                           lex_tokcstr (lexer),
                                           ss_length (lex_tokss (lexer)));
                  length = strlen (dict_mv);
                  if (length > MV_MAX_STRING)
                    {
                      /* XXX truncate graphemes not bytes */
                      msg (SE, _("Truncating missing value to maximum "
                                 "acceptable length (%d bytes)."),
                           MV_MAX_STRING);
                      length = MV_MAX_STRING;
                    }
                  memset (value, ' ', MV_MAX_STRING);
                  memcpy (value, dict_mv, length);
                  free (dict_mv);

                  if (!mv_add_str (&mv, value))
                    ok = false;

                  lex_get (lexer);
                  lex_match (lexer, T_COMMA);
                }
            }

          for (i = 0; i < nv; i++)
            {
              if (mv_is_resizable (&mv, var_get_width (v[i])))
                var_set_missing_values (v[i], &mv);
              else
                {
                  msg (SE, _("Missing values provided are too long to assign "
                             "to variable of width %d."),
                       var_get_width (v[i]));
                  ok = false;
                }
            }

          mv_destroy (&mv);
        }

      lex_match (lexer, T_SLASH);
      free (v);
      v = NULL;
    }

  free (v);
  return ok ? CMD_SUCCESS : CMD_FAILURE;

error:
  free (v);
  return CMD_FAILURE;
}
Example #20
0
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
   WRITER_TYPE identifies the type of file to write,
   and COMMAND_TYPE identifies the type of command.

   On success, returns a writer.
   For procedures only, sets *RETAIN_UNSELECTED to true if cases
   that would otherwise be excluded by FILTER or USE should be
   included.

   On failure, returns a null pointer. */
static struct casewriter *
parse_write_command (struct lexer *lexer, struct dataset *ds,
		     enum writer_type writer_type,
                     enum command_type command_type,
                     bool *retain_unselected)
{
  /* Common data. */
  struct file_handle *handle; /* Output file. */
  struct dictionary *dict;    /* Dictionary for output file. */
  struct casewriter *writer;  /* Writer. */
  struct case_map *map;       /* Map from input data to data for writer. */

  /* Common options. */
  bool print_map;             /* Print map?  TODO. */
  bool print_short_names;     /* Print long-to-short name map.  TODO. */
  struct sfm_write_options sysfile_opts;
  struct pfm_write_options porfile_opts;

  assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
  assert (command_type == XFORM_CMD || command_type == PROC_CMD);
  assert ((retain_unselected != NULL) == (command_type == PROC_CMD));

  if (command_type == PROC_CMD)
    *retain_unselected = true;

  handle = NULL;
  dict = dict_clone (dataset_dict (ds));
  writer = NULL;
  map = NULL;
  print_map = false;
  print_short_names = false;
  sysfile_opts = sfm_writer_default_options ();
  porfile_opts = pfm_writer_default_options ();

  case_map_prepare_dict (dict);
  dict_delete_scratch_vars (dict);

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      if (lex_match_id (lexer, "OUTFILE"))
	{
          if (handle != NULL)
            {
              lex_sbc_only_once ("OUTFILE");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);

	  handle = fh_parse (lexer, FH_REF_FILE, NULL);
	  if (handle == NULL)
	    goto error;
	}
      else if (lex_match_id (lexer, "NAMES"))
        print_short_names = true;
      else if (lex_match_id (lexer, "PERMISSIONS"))
        {
          bool cw;

          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "READONLY"))
            cw = false;
          else if (lex_match_id (lexer, "WRITEABLE"))
            cw = true;
          else
            {
              lex_error_expecting (lexer, "READONLY", "WRITEABLE",
                                   NULL_SENTINEL);
              goto error;
            }
          sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
        }
      else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "RETAIN"))
            *retain_unselected = true;
          else if (lex_match_id (lexer, "DELETE"))
            *retain_unselected = false;
          else
            {
              lex_error_expecting (lexer, "RETAIN", "DELETE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "COMPRESSED"))
	sysfile_opts.compress = true;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "UNCOMPRESSED"))
	sysfile_opts.compress = false;
      else if (writer_type == SYSFILE_WRITER
               && lex_match_id (lexer, "VERSION"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_force_int (lexer))
            goto error;
          sysfile_opts.version = lex_integer (lexer);
          lex_get (lexer);
	}
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
        {
          lex_match (lexer, T_EQUALS);
          if (lex_match_id (lexer, "COMMUNICATIONS"))
            porfile_opts.type = PFM_COMM;
          else if (lex_match_id (lexer, "TAPE"))
            porfile_opts.type = PFM_TAPE;
          else
            {
              lex_error_expecting (lexer, "COMM", "TAPE", NULL_SENTINEL);
              goto error;
            }
        }
      else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
        {
          lex_match (lexer, T_EQUALS);
          if (!lex_force_int (lexer))
            goto error;
          porfile_opts.digits = lex_integer (lexer);
          lex_get (lexer);
        }
      else if (!parse_dict_trim (lexer, dict))
        goto error;

      if (!lex_match (lexer, T_SLASH))
	break;
    }
  if (lex_end_of_command (lexer) != CMD_SUCCESS)
    goto error;

  if (handle == NULL)
    {
      lex_sbc_missing ("OUTFILE");
      goto error;
    }

  dict_delete_scratch_vars (dict);
  dict_compact_values (dict);

  if (fh_get_referent (handle) == FH_REF_FILE)
    {
      switch (writer_type)
        {
        case SYSFILE_WRITER:
          writer = sfm_open_writer (handle, dict, sysfile_opts);
          break;
        case PORFILE_WRITER:
          writer = pfm_open_writer (handle, dict, porfile_opts);
          break;
        }
    }
  else
    writer = any_writer_open (handle, dict);
  if (writer == NULL)
    goto error;

  map = case_map_from_dict (dict);
  if (map != NULL)
    writer = case_map_create_output_translator (map, writer);
  dict_destroy (dict);

  fh_unref (handle);
  return writer;

 error:
  fh_unref (handle);
  casewriter_destroy (writer);
  dict_destroy (dict);
  case_map_destroy (map);
  return NULL;
}