Exemple #1
0
/* Advances FILE to its next case.  If BY is nonnull, then FILE's is_minimal
   member is updated based on whether the new case's BY values still match
   those in BY. */
static void
advance_file (struct comb_file *file, union value by[])
{
  case_unref (file->data);
  file->data = casereader_read (file->reader);
  if (by)
    file->is_minimal = (file->data != NULL
                        && subcase_equal_cx (&file->by_vars, file->data, by));
}
Exemple #2
0
static bool
read_input_case (struct merge *m, size_t idx)
{
  struct merge_input *i = &m->inputs[idx];

  i->c = casereader_read (i->reader);
  if (i->c)
    return true;
  else
    {
      casereader_destroy (i->reader);
      remove_element (m->inputs, m->input_cnt, sizeof *m->inputs, idx);
      m->input_cnt--;
      return false;
    }
}
Exemple #3
0
/* Reads FILE, which must be of type COMB_TABLE, until it
   encounters a case with BY or greater for its BY variables.
   Returns true if a case with exactly BY for its BY variables
   was found, otherwise false. */
static bool
scan_table (struct comb_file *file, union value by[])
{
  while (file->data != NULL)
    {
      int cmp = subcase_compare_3way_xc (&file->by_vars, by, file->data);
      if (cmp > 0)
        {
          case_unref (file->data);
          file->data = casereader_read (file->reader);
        }
      else
        return cmp == 0;
    }
  return false;
}
Exemple #4
0
/*
  Update IA according to the contents of DICT and CREADER.
  CREADER will be destroyed by this function.
*/
void 
update_assistant (struct import_assistant *ia)
{
  struct sheet_spec_page *ssp = ia->sheet_spec;
  int rows = 0;

  if (ssp->dict)
    {
      struct ccase *c;
      int col;

      ia->column_cnt = dict_get_var_cnt (ssp->dict);
      ia->columns = xcalloc (ia->column_cnt, sizeof (*ia->columns));
      for (col = 0; col < ia->column_cnt ; ++col)
	{
	  const struct variable *var = dict_get_var (ssp->dict, col);
	  ia->columns[col].name = xstrdup (var_get_name (var));
	  ia->columns[col].contents = NULL;
	}

      for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c))
	{
	  rows++;
	  for (col = 0; col < ia->column_cnt ; ++col)
	    {
	      char *ss;
	      const struct variable *var = dict_get_var (ssp->dict, col);
	      
	      ia->columns[col].contents = xrealloc (ia->columns[col].contents,
						    sizeof (struct substring) * rows);
	      
	      ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict), 
			     var_get_print_format (var));
	      
	      ia->columns[col].contents[rows - 1] = ss_cstr (ss);
	    }
	  
	  if (rows > MAX_PREVIEW_LINES)
	    {
	      case_unref (c);
	      break;
	    }
	}
    }
  
  ia->file.line_cnt = rows;
}
Exemple #5
0
void
order_stats_accumulate_idx (struct order_stats **os, size_t nos,
                            struct casereader *reader,
                            int wt_idx,
                            int val_idx)
{
  struct ccase *cx;
  struct ccase *prev_cx = NULL;
  double prev_value = -DBL_MAX;

  double cc_i = 0;
  double c_i = 0;

  for (; (cx = casereader_read (reader)) != NULL; case_unref (cx))
    {
      const double weight = (wt_idx == -1) ? 1.0 : case_data_idx (cx, wt_idx)->f;
      const double this_value = case_data_idx (cx, val_idx)->f;

      /* The casereader MUST be sorted */
      assert (this_value >= prev_value);

      if ( prev_value == -DBL_MAX || prev_value == this_value)
	c_i += weight;

      if ( prev_value > -DBL_MAX && this_value > prev_value)
	{
	  update_k_values (prev_cx, prev_value, c_i, cc_i, os, nos);
	  c_i = weight;
	}

      case_unref (prev_cx);
      cc_i += weight;
      prev_value = this_value;
      prev_cx = case_ref (cx);
    }

  update_k_values (prev_cx, prev_value, c_i, cc_i, os, nos);
  case_unref (prev_cx);

  casereader_destroy (reader);
}
Exemple #6
0
static int
combine_files (enum comb_command_type command,
               struct lexer *lexer, struct dataset *ds)
{
  struct comb_proc proc;

  bool saw_by = false;
  bool saw_sort = false;
  struct casereader *active_file = NULL;

  char *first_name = NULL;
  char *last_name = NULL;

  struct taint *taint = NULL;

  size_t n_tables = 0;
  size_t allocated_files = 0;

  size_t i;

  proc.files = NULL;
  proc.n_files = 0;
  proc.dict = dict_create (get_default_encoding ());
  proc.output = NULL;
  proc.matcher = NULL;
  subcase_init_empty (&proc.by_vars);
  proc.first = NULL;
  proc.last = NULL;
  proc.buffered_case = NULL;
  proc.prev_BY = NULL;

  dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));

  lex_match (lexer, T_SLASH);
  for (;;)
    {
      struct comb_file *file;
      enum comb_file_type type;

      if (lex_match_id (lexer, "FILE"))
        type = COMB_FILE;
      else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
        {
          type = COMB_TABLE;
          n_tables++;
        }
      else
        break;
      lex_match (lexer, T_EQUALS);

      if (proc.n_files >= allocated_files)
        proc.files = x2nrealloc (proc.files, &allocated_files,
                                sizeof *proc.files);
      file = &proc.files[proc.n_files++];
      file->type = type;
      subcase_init_empty (&file->by_vars);
      subcase_init_empty (&file->src);
      subcase_init_empty (&file->dst);
      file->mv = NULL;
      file->handle = NULL;
      file->dict = NULL;
      file->reader = NULL;
      file->data = NULL;
      file->is_sorted = true;
      file->in_name = NULL;
      file->in_var = NULL;

      if (lex_match (lexer, T_ASTERISK))
        {
          if (!dataset_has_source (ds))
            {
              msg (SE, _("Cannot specify the active dataset since none "
                         "has been defined."));
              goto error;
            }

          if (proc_make_temporary_transformations_permanent (ds))
            msg (SE, _("This command may not be used after TEMPORARY when "
                       "the active dataset is an input source.  "
                       "Temporary transformations will be made permanent."));

          file->dict = dict_clone (dataset_dict (ds));
        }
      else
        {
          file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
          if (file->handle == NULL)
            goto error;

          file->reader = any_reader_open (file->handle, NULL, &file->dict);
          if (file->reader == NULL)
            goto error;
        }

      while (lex_match (lexer, T_SLASH))
        if (lex_match_id (lexer, "RENAME"))
          {
            if (!parse_dict_rename (lexer, file->dict))
              goto error;
          }
        else if (lex_match_id (lexer, "IN"))
          {
            lex_match (lexer, T_EQUALS);
            if (lex_token (lexer) != T_ID)
              {
                lex_error (lexer, NULL);
                goto error;
              }

            if (file->in_name)
              {
                msg (SE, _("Multiple IN subcommands for a single FILE or "
                           "TABLE."));
                goto error;
              }
            file->in_name = xstrdup (lex_tokcstr (lexer));
            lex_get (lexer);
          }
        else if (lex_match_id (lexer, "SORT"))
          {
            file->is_sorted = false;
            saw_sort = true;
          }

      if (!merge_dictionary (proc.dict, file))
        goto error;
    }

  while (lex_token (lexer) != T_ENDCMD)
    {
      if (lex_match (lexer, T_BY))
	{
          const struct variable **by_vars;
          size_t i;
          bool ok;

	  if (saw_by)
	    {
              lex_sbc_only_once ("BY");
	      goto error;
	    }
          saw_by = true;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
                                    &by_vars, NULL))
	    goto error;

          ok = true;
          for (i = 0; i < proc.n_files; i++)
            {
              struct comb_file *file = &proc.files[i];
              size_t j;

              for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
                {
                  const char *name = var_get_name (by_vars[j]);
                  struct variable *var = dict_lookup_var (file->dict, name);
                  if (var != NULL)
                    subcase_add_var (&file->by_vars, var,
                                     subcase_get_direction (&proc.by_vars, j));
                  else
                    {
                      if (file->handle != NULL)
                        msg (SE, _("File %s lacks BY variable %s."),
                             fh_get_name (file->handle), name);
                      else
                        msg (SE, _("Active dataset lacks BY variable %s."),
                             name);
                      ok = false;
                    }
                }
              assert (!ok || subcase_conformable (&file->by_vars,
                                                  &proc.files[0].by_vars));
            }
          free (by_vars);

          if (!ok)
            goto error;
	}
      else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
        {
          if (first_name != NULL)
            {
              lex_sbc_only_once ("FIRST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          first_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
        {
          if (last_name != NULL)
            {
              lex_sbc_only_once ("LAST");
              goto error;
            }

	  lex_match (lexer, T_EQUALS);
          if (!lex_force_id (lexer))
            goto error;
          last_name = xstrdup (lex_tokcstr (lexer));
          lex_get (lexer);
        }
      else if (lex_match_id (lexer, "MAP"))
	{
	  /* FIXME. */
	}
      else if (lex_match_id (lexer, "DROP"))
        {
          if (!parse_dict_drop (lexer, proc.dict))
            goto error;
        }
      else if (lex_match_id (lexer, "KEEP"))
        {
          if (!parse_dict_keep (lexer, proc.dict))
            goto error;
        }
      else
	{
	  lex_error (lexer, NULL);
	  goto error;
	}

      if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
        {
          lex_end_of_command (lexer);
          goto error;
        }
    }

  if (!saw_by)
    {
      if (command == COMB_UPDATE)
        {
          lex_sbc_missing ("BY");
          goto error;
        }
      if (n_tables)
        {
          msg (SE, _("BY is required when %s is specified."), "TABLE");
          goto error;
        }
      if (saw_sort)
        {
          msg (SE, _("BY is required when %s is specified."), "SORT");
          goto error;
        }
    }

  /* Add IN, FIRST, and LAST variables to master dictionary. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
        goto error;
    }
  if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
      || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
    goto error;

  dict_delete_scratch_vars (proc.dict);
  dict_compact_values (proc.dict);

  /* Set up mapping from each file's variables to master
     variables. */
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      size_t src_var_cnt = dict_get_var_cnt (file->dict);
      size_t j;

      file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
      for (j = 0; j < src_var_cnt; j++)
        {
          struct variable *src_var = dict_get_var (file->dict, j);
          struct variable *dst_var = dict_lookup_var (proc.dict,
                                                      var_get_name (src_var));
          if (dst_var != NULL)
            {
              size_t n = subcase_get_n_fields (&file->src);
              file->mv[n] = var_get_missing_values (src_var);
              subcase_add_var (&file->src, src_var, SC_ASCEND);
              subcase_add_var (&file->dst, dst_var, SC_ASCEND);
            }
        }
    }

  proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
  taint = taint_clone (casewriter_get_taint (proc.output));

  /* Set up case matcher. */
  proc.matcher = case_matcher_create ();
  for (i = 0; i < proc.n_files; i++)
    {
      struct comb_file *file = &proc.files[i];
      if (file->reader == NULL)
        {
          if (active_file == NULL)
            {
              proc_discard_output (ds);
              file->reader = active_file = proc_open_filtering (ds, false);
            }
          else
            file->reader = casereader_clone (active_file);
        }
      if (!file->is_sorted)
        file->reader = sort_execute (file->reader, &file->by_vars);
      taint_propagate (casereader_get_taint (file->reader), taint);
      file->data = casereader_read (file->reader);
      if (file->type == COMB_FILE)
        case_matcher_add_input (proc.matcher, &file->by_vars,
                                &file->data, &file->is_minimal);
    }

  if (command == COMB_ADD)
    execute_add_files (&proc);
  else if (command == COMB_MATCH)
    execute_match_files (&proc);
  else if (command == COMB_UPDATE)
    execute_update (&proc);
  else
    NOT_REACHED ();

  case_matcher_destroy (proc.matcher);
  proc.matcher = NULL;
  close_all_comb_files (&proc);
  if (active_file != NULL)
    proc_commit (ds);

  dataset_set_dict (ds, proc.dict);
  dataset_set_source (ds, casewriter_make_reader (proc.output));
  proc.dict = NULL;
  proc.output = NULL;

  free_comb_proc (&proc);

  free (first_name);
  free (last_name);

  return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  if (active_file != NULL)
    proc_commit (ds);
  free_comb_proc (&proc);
  taint_destroy (taint);
  free (first_name);
  free (last_name);
  return CMD_CASCADING_FAILURE;
}
Exemple #7
0
/* Parses and executes the AGGREGATE procedure. */
int
cmd_aggregate (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct agr_proc agr;
  struct file_handle *out_file = NULL;
  struct casereader *input = NULL, *group;
  struct casegrouper *grouper;
  struct casewriter *output = NULL;

  bool copy_documents = false;
  bool presorted = false;
  bool saw_direction;
  bool ok;

  memset(&agr, 0 , sizeof (agr));
  agr.missing = ITEMWISE;
  agr.src_dict = dict;
  subcase_init_empty (&agr.sort);

  /* OUTFILE subcommand must be first. */
  lex_match (lexer, T_SLASH);
  if (!lex_force_match_id (lexer, "OUTFILE"))
    goto error;
  lex_match (lexer, T_EQUALS);
  if (!lex_match (lexer, T_ASTERISK))
    {
      out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
      if (out_file == NULL)
        goto error;
    }

  if (out_file == NULL && lex_match_id (lexer, "MODE"))
    {
      lex_match (lexer, T_EQUALS);
      if (lex_match_id (lexer, "ADDVARIABLES"))
	{
	  agr.add_variables = true;

	  /* presorted is assumed in ADDVARIABLES mode */
	  presorted = true;
	}
      else if (lex_match_id (lexer, "REPLACE"))
	{
	  agr.add_variables = false;
	}
      else
	goto error;
    }

  if ( agr.add_variables )
    agr.dict = dict_clone (dict);
  else
    agr.dict = dict_create (dict_get_encoding (dict));

  dict_set_label (agr.dict, dict_get_label (dict));
  dict_set_documents (agr.dict, dict_get_documents (dict));

  /* Read most of the subcommands. */
  for (;;)
    {
      lex_match (lexer, T_SLASH);

      if (lex_match_id (lexer, "MISSING"))
	{
	  lex_match (lexer, T_EQUALS);
	  if (!lex_match_id (lexer, "COLUMNWISE"))
	    {
	      lex_error_expecting (lexer, "COLUMNWISE", NULL);
              goto error;
	    }
	  agr.missing = COLUMNWISE;
	}
      else if (lex_match_id (lexer, "DOCUMENT"))
        copy_documents = true;
      else if (lex_match_id (lexer, "PRESORTED"))
        presorted = true;
      else if (lex_force_match_id (lexer, "BREAK"))
	{
          int i;

	  lex_match (lexer, T_EQUALS);
          if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
                                    &saw_direction))
            goto error;
          agr.break_var_cnt = subcase_get_n_fields (&agr.sort);

	  if  (! agr.add_variables)
	    for (i = 0; i < agr.break_var_cnt; i++)
	      dict_clone_var_assert (agr.dict, agr.break_vars[i]);

          /* BREAK must follow the options. */
          break;
	}
      else
        goto error;

    }
  if (presorted && saw_direction)
    msg (SW, _("When PRESORTED is specified, specifying sorting directions "
               "with (A) or (D) has no effect.  Output data will be sorted "
               "the same way as the input data."));

  /* Read in the aggregate functions. */
  lex_match (lexer, T_SLASH);
  if (!parse_aggregate_functions (lexer, dict, &agr))
    goto error;

  /* Delete documents. */
  if (!copy_documents)
    dict_clear_documents (agr.dict);

  /* Cancel SPLIT FILE. */
  dict_set_split_vars (agr.dict, NULL, 0);

  /* Initialize. */
  agr.case_cnt = 0;

  if (out_file == NULL)
    {
      /* The active dataset will be replaced by the aggregated data,
         so TEMPORARY is moot. */
      proc_cancel_temporary_transformations (ds);
      proc_discard_output (ds);
      output = autopaging_writer_create (dict_get_proto (agr.dict));
    }
  else
    {
      output = any_writer_open (out_file, agr.dict);
      if (output == NULL)
        goto error;
    }

  input = proc_open (ds);
  if (!subcase_is_empty (&agr.sort) && !presorted)
    {
      input = sort_execute (input, &agr.sort);
      subcase_clear (&agr.sort);
    }

  for (grouper = casegrouper_create_vars (input, agr.break_vars,
                                          agr.break_var_cnt);
       casegrouper_get_next_group (grouper, &group);
       casereader_destroy (group))
    {
      struct casereader *placeholder = NULL;
      struct ccase *c = casereader_peek (group, 0);

      if (c == NULL)
        {
          casereader_destroy (group);
          continue;
        }

      initialize_aggregate_info (&agr);

      if ( agr.add_variables )
	placeholder = casereader_clone (group);

      {
	struct ccase *cg;
	for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
	  accumulate_aggregate_info (&agr, cg);
      }


      if  (agr.add_variables)
	{
	  struct ccase *cg;
	  for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
	    dump_aggregate_info (&agr, output, cg);

	  casereader_destroy (placeholder);
	}
      else
	{
	  dump_aggregate_info (&agr, output, c);
	}
      case_unref (c);
    }
  if (!casegrouper_destroy (grouper))
    goto error;

  if (!proc_commit (ds))
    {
      input = NULL;
      goto error;
    }
  input = NULL;

  if (out_file == NULL)
    {
      struct casereader *next_input = casewriter_make_reader (output);
      if (next_input == NULL)
        goto error;

      dataset_set_dict (ds, agr.dict);
      dataset_set_source (ds, next_input);
      agr.dict = NULL;
    }
  else
    {
      ok = casewriter_destroy (output);
      output = NULL;
      if (!ok)
        goto error;
    }

  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_SUCCESS;

error:
  if (input != NULL)
    proc_commit (ds);
  casewriter_destroy (output);
  agr_destroy (&agr);
  fh_unref (out_file);
  return CMD_CASCADING_FAILURE;
}
Exemple #8
0
static void
do_reliability (struct casereader *input, struct dataset *ds,
		const struct reliability *rel)
{
  int i;
  int si;
  struct ccase *c;
  casenumber n_missing ;
  casenumber n_valid = 0;


  for (si = 0 ; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];

      s->m = xzalloc (sizeof (s->m) * s->n_items);
      s->total = moments1_create (MOMENT_VARIANCE);

      for (i = 0 ; i < s->n_items ; ++i )
	s->m[i] = moments1_create (MOMENT_VARIANCE);
    }

  input = casereader_create_filter_missing (input,
					    rel->variables,
					    rel->n_variables,
					    rel->exclude,
					    &n_missing,
					    NULL);

  for (si = 0 ; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];


      s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
      input =
	casereader_create_append_numeric (input, append_sum,
					  s, NULL);
    }

  for (; (c = casereader_read (input)) != NULL; case_unref (c))
    {
      double weight = 1.0;
      n_valid ++;

      for (si = 0; si < rel->n_sc; ++si)
	{
	  struct cronbach *s = &rel->sc[si];

	  for (i = 0 ; i < s->n_items ; ++i )
	    moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);

	  moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
	}
    }
  casereader_destroy (input);

  for (si = 0; si < rel->n_sc; ++si)
    {
      struct cronbach *s = &rel->sc[si];

      s->sum_of_variances = 0;
      for (i = 0 ; i < s->n_items ; ++i )
	{
	  double weight, mean, variance;
	  moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);

	  s->sum_of_variances += variance;
	}

      moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
			  NULL, NULL);

      s->alpha =
	alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
    }

  text_item_submit (text_item_create_format (TEXT_ITEM_PARAGRAPH, _("Scale: %s"),
                                             ds_cstr (&rel->scale_name)));

  case_processing_summary (n_valid, n_missing, dataset_dict (ds));
}
Exemple #9
0
void
sign_execute (const struct dataset *ds,
		  struct casereader *input,
		  enum mv_class exclude,
		  const struct npar_test *test,
		  bool exact UNUSED,
		  double timer UNUSED)
{
  int i;
  bool warn = true;
  const struct dictionary *dict = dataset_dict (ds);
  const struct two_sample_test *t2s = UP_CAST (test, const struct two_sample_test, parent);
  struct ccase *c;

  struct sign_test_params *stp = xcalloc (t2s->n_pairs, sizeof *stp);

  struct casereader *r = input;

  for (; (c = casereader_read (r)) != NULL; case_unref (c))
    {
      const double weight = dict_get_case_weight (dict, c, &warn);

      for (i = 0 ; i < t2s->n_pairs; ++i )
	{
	  variable_pair *vp = &t2s->pairs[i];
	  const union value *value0 = case_data (c, (*vp)[0]);
	  const union value *value1 = case_data (c, (*vp)[1]);
	  const double diff = value0->f - value1->f;

	  if (var_is_value_missing ((*vp)[0], value0, exclude))
	    continue;

	  if (var_is_value_missing ((*vp)[1], value1, exclude))
	    continue;

	  if ( diff > 0)
	    stp[i].pos += weight;
	  else if (diff < 0)
	    stp[i].neg += weight;
	  else
	    stp[i].ties += weight;
	}
    }

  casereader_destroy (r);

  for (i = 0 ; i < t2s->n_pairs; ++i )
    {
      int r = MIN (stp[i].pos, stp[i].neg);
      stp[i].one_tailed_sig = gsl_cdf_binomial_P (r,
						  0.5,
						  stp[i].pos + stp[i].neg);

      stp[i].point_prob = gsl_ran_binomial_pdf (r, 0.5,
						stp[i].pos + stp[i].neg);
    }

  output_frequency_table (t2s, stp, dict);

  output_statistics_table (t2s, stp);

  free (stp);
}