Example #1
0
/* PRE: a and b are natsets.
   PRE: b is a subset of a

   POST: returns a natset c such that C intersect B = empty
                             C union B     = A

       (i.e. n is in result if and only if n is in
             A but not in B)

   This function is useful if you have a set of rows (record numbers)
   to use as a test set (in b) and you have all the rows for the test
   and train set in a, and you want to get the set of remaining
   rows to use as a training set. */
ivec *mk_ivec_set_difference(ivec *a,ivec *b)
{
  int a_size = ivec_size(a);
  int b_size = ivec_size(b);
  int c_size = a_size - b_size;
  int a_ptr = 0;
  int b_ptr = 0;
  int c_ptr = 0;
  ivec *c = mk_ivec(c_size);

  for ( a_ptr = 0 ; a_ptr < a_size ; a_ptr++ )
  {
    int a_val = ivec_ref(a,a_ptr);
    if ( b_ptr == b_size || ivec_ref(b,b_ptr) > a_val )
    {
      ivec_set(c,c_ptr,a_val);
      c_ptr += 1;
    }
    else if ( ivec_ref(b,b_ptr) == a_val )
      b_ptr += 1;
    else
      my_error("mk_ivec_set_difference: b not a subset of a");
  }

  return c;
}
Example #2
0
/* If you've got a dataset where you are training and testing
   using the set of rows specified in "train_and_test_rows" and you
   wish to make a test-set of size "num_test_rows" and a training-set
   of size "num_training_row (= num_tain_and_test - num_test)"
   this returns the training and test rows you should use, obtained by
   shuffling train_and_test.

   PRE: train_and_test_rows is a natset (see above) (it means
        strictly increasing set of natural numbers) (in which case
        num_rows is ignored).

        OR train_and_test_rows may be NULL, indicating "use all
        rows in the set { 0 , 1 , ... num_rows-1 }

   POST: *r_train_set is the training set
         *r_test_set is the test set.
         (Note that the union of these two is the original set of rows).
         The resulting sets are both returned in strictly increasing
         order.
*/
void make_train_and_test_rows(ivec *train_and_test_rows,
			      int num_rows,int num_test_rows,
			      ivec **r_train_rows,ivec **r_test_rows)
{
  int save_seed = int_random(300000);
  ivec *srows = (train_and_test_rows==NULL) ? 
                mk_identity_ivec(num_rows) :
                mk_copy_ivec(train_and_test_rows);
  int srows_size = ivec_size(srows);
  int i;
  *r_train_rows = mk_ivec(srows_size - num_test_rows);
  *r_test_rows = mk_ivec(num_test_rows);
  am_srand(12345);
  shuffle_ivec(srows);

  for ( i = 0 ; i < num_test_rows ; i++ )
    ivec_set(*r_test_rows,i,ivec_ref(srows,i));

  for ( i = num_test_rows ; i < srows_size ; i++ )
    ivec_set(*r_train_rows,i-num_test_rows,ivec_ref(srows,i));

  free_ivec(srows);
  am_srand(save_seed);
  ivec_sort(*r_train_rows,*r_train_rows);
  ivec_sort(*r_test_rows,*r_test_rows);
}
Example #3
0
/* If you've got a dataset where you are training and testing
   using the set of rows specified in "train_and_test_rows" and you
   wish to do k-fold cross-validation (k == num_folds) then what 
   subset of train_and_test_rows should you use as the train set
   fold_num'th fold? And which should be the test set? 
   This function tells you.
   It uses a deterministic shuffle and then selects the given rows. 

   PRE: train_and_test_rows is a natset (see above) (it means
        strictly increasing set of natural numbers) (in which case
        num_rows is ignored).

        OR train_and_test_rows may be NULL, indicating "use all
        rows in the set { 0 , 1 , ... num_rows-1 }

   POST: *r_train_set is the fold_num'th training set
         *r_test_set is the fold_num'th test set.
         (Note that the union of these two is the original set of rows).
         The resulting sets are both returned in strictly increasing
         order.

    Note that if you called this function repeatedly with the same number
    of folds, but you varied fold_num from 0 up to num_folds on each
    call, then the union of all the resulting test_sets would contain the
    same set as train_and_test_rows. But all pairs of resulting test_sets 
    would have an empty intersection.
*/
void make_kfold_rows(ivec *train_and_test_rows,int num_rows,int num_folds,int fold_num,
                     ivec **r_train_rows,ivec **r_test_rows)
{
  int save_seed = int_random(300000);
  ivec *srows = (train_and_test_rows==NULL) ? mk_identity_ivec(num_rows) :
                                              mk_copy_ivec(train_and_test_rows);
  int srows_size = ivec_size(srows);
  int start_i = (int) floor(fold_num * num_rows / (double) num_folds);
  int end_i = (int) floor((fold_num+1) * num_rows / (double) num_folds);
  int i;
  *r_train_rows = mk_ivec(srows_size - (end_i - start_i));
  *r_test_rows = mk_ivec(end_i - start_i);
  am_srand(12345);
  shuffle_ivec(srows);

  for ( i = 0 ; i < srows_size ; i++ )
  {
    ivec *update_me = (i >= start_i && i < end_i) ? *r_test_rows : *r_train_rows;
    int update_index = (i < start_i) ? i :
                       (i < end_i) ? i - start_i : i - (end_i - start_i);
    ivec_set(update_me,update_index,ivec_ref(srows,i));
  }
  free_ivec(srows);
  am_srand(save_seed);
  ivec_sort(*r_train_rows,*r_train_rows);
  ivec_sort(*r_test_rows,*r_test_rows);
}
Example #4
0
/* Returns the number of times val occurs in iv. */
int count_in_ivec( ivec *iv, int val)
{
  int i, count;
  count = 0;
  for (i=0; i<ivec_size( iv); ++i) if (ivec_ref( iv, i) == val) count += 1;
  return count;
}
Example #5
0
/* Makes a dym consisting of a subset of the rows in x. The members of
   of the subset are those rows mentioned in "rows".
   Result will this have "ivec_size(rows)" rows and dym_cols(x) columns */
dym *mk_dym_from_subset_of_rows(dym *x,ivec *rows)
{
  int num_rows = ivec_size(rows);
  int i;
  dym *result = mk_dym(num_rows,dym_cols(x));

  for ( i = 0 ; i < num_rows ; i++ )
  {
    int row = ivec_ref(rows,i);
    dyv *vec = mk_dyv_from_dym_row(x,row);
    copy_dyv_to_dym_row(vec,result,i);
    free_dyv(vec);
  }
  return result;
}
Example #6
0
int main() {
	int i;
	ivec_t* vec = new_ivec(100);
	for (i = 0; i < 10000; ++i) {
		assert(ivec_push_back(vec, i));
	}

	printf("size = %u, empty = %d, capacity = %u\n", ivec_size(vec), ivec_empty(vec), ivec_capacity(vec));
	printf("popback = %d\n", ivec_pop_back(vec));
	printf("size = %u, empty = %d, capacity = %u\n", ivec_size(vec), ivec_empty(vec), ivec_capacity(vec));

	int* d = ivec_data(vec);
	for (i = 0; i < 9999; ++i) {
		assert(*(d+i) == i && i == ivec_at(vec, i));
	}

	printf("resize to 10, and shrink_to_fit\n");
	ivec_resize_default(vec, 10);
	ivec_shrink_to_fit(vec);
	printf("size = %u, empty = %d, capacity = %u\n", ivec_size(vec), ivec_empty(vec), ivec_capacity(vec));
	
	char buf[1000];
	ivec_dump(buf, 1000, vec);
	printf("%s\n", buf);

	printf("resize to 15\n", ivec_resize_default(vec, 15));
	printf("size = %u, empty = %d, capacity = %u\n", ivec_size(vec), ivec_empty(vec), ivec_capacity(vec));
	ivec_dump(buf, 1000, vec);
	printf("%s\n", buf);

	ivec_destroy(vec);

	svec_t* svec = new_svec(100);
	for (i = 0; i < 100; ++i) {
		snprintf(buf, 1000, "%d", i);
		assert(svec_push_back(svec, buf));
	}

	printf("size = %u, empty = %d, capacity = %u\n", svec_size(svec), svec_empty(svec), svec_capacity(svec));
	char* ret = svec_pop_back(svec);
	printf("popback = %s\n", ret);
	free(ret);
	printf("size = %u, empty = %d, capacity = %u\n", svec_size(svec), svec_empty(svec), svec_capacity(svec));

	char** s = svec_data(svec);
	for (i = 0; i < 99; ++i) {
		snprintf(buf, 1000, "%d", i);
		assert(!strcmp(*(s+i), buf) && !strcmp(buf, svec_at(svec, i)));
	}

	printf("resize to 10, and shrink to fit\n");
	svec_resize_default(svec, 10);
	svec_shrink_to_fit(svec);
	printf("size = %u, empty = %d, capacity = %u\n", svec_size(svec), svec_empty(svec), svec_capacity(svec));
	
	svec_dump(buf, 1000, svec);
	printf("%s\n", buf);

	printf("resize to 15\n", svec_resize_default(svec, 15));
	printf("size = %u, empty = %d, capacity = %u\n", svec_size(svec), svec_empty(svec), svec_capacity(svec));
	svec_dump(buf, 1000, svec);
	printf("%s\n", buf);

	svec_destroy(svec);

	return 0;
}