Esempio n. 1
0
void test_nopolicy (flux_t *h)
{
    flux_reduce_t *r;
    int i, errors;

    clear_counts ();

    ok ((r = flux_reduce_create (h, reduce_ops, 0., NULL, 0)) != NULL,
        "nopolicy: flux_reduce_create works");

    errors = 0;
    for (i = 0; i < 100; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 0) < 0)
            errors++;
    }
    ok (errors == 0,
        "nopolicy: flux_reduce_append added 100 items in batch 0");
    cmp_ok (forward_calls, "==", 0,
        "nopolicy: op.forward not called as we are rank 0");
    cmp_ok (reduce_calls, "==", 0,
        "nopolicy: op.reduce not called as we have no flush policy");
    cmp_ok (sink_calls, "==", 100,
        "nopolicy: op.sink called 100 times");
    cmp_ok (sink_items, "==", 100,
        "nopolicy: op.sink processed 100 items");

    flux_reduce_destroy (r);
}
Esempio n. 2
0
void  reset( void )
   {
    reset_cbuff();
    clear_counts();
#ifdef DEBUG
    dump_cbuff();
#endif
   }
void read_stringpairs() {
	char *my_string = NULL, *token1, *token2;
	char str1[1024], str2[1024];
	size_t nbytes;
	int bytes_read;
	while ((bytes_read = getline(&my_string, &nbytes, stdin)) != -1) {
		if (g_input_format == INPUT_FORMAT_L2P) {
			if (sscanf(my_string, "%1023s %1023s", &str1[0], &str2[0]) == 2)
				add_string_pair(str1, str2);
		} else if (g_input_format == INPUT_FORMAT_NEWS) {
			token1 = strtok(my_string, "\t\n");
			token2 = strtok(NULL, "\t\n");
			if (token1 != NULL && token2 != NULL)
				add_string_pair(token1, token2);	    
		}
	}
	clear_counts();
	initial_align();
}
Esempio n. 4
0
void test_hwm (flux_t *h)
{
    flux_reduce_t *r;
    int i, errors;
    unsigned int hwm;

    clear_counts ();

    ok ((r = flux_reduce_create (h, reduce_ops, 0., NULL,
        FLUX_REDUCE_HWMFLUSH)) != NULL,
        "hwm: flux_reduce_create works");

    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 0,
        "hwm: hwm is initially zero");

    /* batch 0 is a training batch.
     * It looks just like no policy.
     */
    errors = 0;
    for (i = 0; i < 100; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 0) < 0)
            errors++;
    }
    ok (errors == 0,
        "hwm.0: flux_reduce_append added 100 items");
    cmp_ok (reduce_calls, "==", 0,
        "hwm.0: op.reduce not called (training)");
    cmp_ok (sink_calls, "==", 100,
        "hwm.0: op.sink called 100 times");
    cmp_ok (sink_items, "==", 100,
        "hwm.0: op.sink processed 100 items");

    clear_counts ();

    /* batch 1 has a hwm.  Put in one short of hwm items.
     */
    errors = 0;
    for (i = 0; i < 99; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 1) < 0)
            errors++;
    }
    ok (errors == 0,
        "hwm.1: flux_reduce_append added 99 items");
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 100,
        "hwm.0: hwm is 100");
    cmp_ok (reduce_calls, "==", 98,
        "hwm.1: op.reduce called 98 times");
    cmp_ok (sink_calls, "==", 0,
        "hwm.1: op.sink not called yet");

    /* Now finish batch 1 with one item.  Everything should go thru.
     */
    ok (flux_reduce_append (r, xstrdup ("hi"), 1) == 0,
        "hwm.1: flux_reduce_append added 1 item");
    cmp_ok (reduce_calls, "==", 99,
        "hwm.1: op.reduce called");
    cmp_ok (sink_calls, "==", 1,
        "hwm.1: op.sink called 1 time");
    cmp_ok (sink_items, "==", 100,
        "hwm.1: op.sink handled 100 items");
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 100,
        "hwm.1: hwm is 100");

    clear_counts ();

    /* Straggler test
     * Start batch 2, then append one item from batch 1.
     * This should cause last hwm to be recomputed to be 101 instead of 100.
     * Straggler should immediately be sinked.
     */
    ok (flux_reduce_append (r, xstrdup ("hi"), 2) == 0,
        "hwm.2: flux_reduce_append added 1 item");
    cmp_ok (reduce_calls, "==", 0,
        "hwm.2: op.reduce not called");
    cmp_ok (sink_calls, "==", 0,
        "hwm.2: op.sink not called");
    ok (flux_reduce_append (r, xstrdup ("hi"), 1) == 0,
        "hwm.1: flux_reduce_append added 1 straggler");
    cmp_ok (reduce_calls, "==", 0,
        "hwm.1: op.reduce not called");
    cmp_ok (sink_calls, "==", 1,
        "hwm.1: op.sink called 1 time");
    cmp_ok (sink_items, "==", 1,
        "hwm.1: op.sink handled 1 item");
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 101,
        "hwm.1: hwm is 101");

    sink_items = sink_calls = 0; // don't count batch 1 straggler below

    /* At this point we have one batch 2 item in queue.
     * Put in 99 more and we should be one short of 101 hwm.
     */
    errors = 0;
    for (i = 0; i < 99; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 2) < 0)
            errors++;
    }
    ok (errors == 0,
        "hwm.2: flux_reduce_append added 99 items");
    cmp_ok (reduce_calls, "==", 99,
        "hwm.2: op.reduce called 99 times");
    cmp_ok (sink_calls, "==", 0,
        "hwm.2: op.sink not called yet");
    ok (flux_reduce_append (r, xstrdup ("hi"), 2) == 0,
        "hwm.2: flux_reduce_append added 1 item");
    cmp_ok (sink_calls, "==", 1,
        "hwm.2: op.sink called 1 time");
    cmp_ok (sink_items, "==", 101,
        "hwm.2: op.sink handled 101 items");
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 101,
        "hwm.2: hwm is 101");

    clear_counts ();

    /* Manually set the hwm to 10.
     * Append 20 items to batch 3.
     * Reduce is called on the first set of 10.
     * The second set of 10 will be immediately flushed.
     * Put in one batch 4 item and verify the HWM is still 10.
     */
    hwm = 10;
    ok (flux_reduce_opt_set (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0,
        "hwm.3: hwm set to 10");
    errors = 0;
    for (i = 0; i < 20; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 3) < 0)
            errors++;
    }
    ok (errors == 0,
        "hwm.3: flux_reduce_append added 20 items");
    cmp_ok (reduce_calls, "==", 9,
        "hwm.3: op.reduce called 9 times");
    cmp_ok (sink_calls, "==", 11,
        "hwm.3: op.sink called 11 times");
    cmp_ok (sink_items, "==", 20,
        "hwm.3: op.sink handled 20 items");
    ok (flux_reduce_append (r, xstrdup ("hi"), 4) == 0,
        "hwm.4: flux_reduce_append added one item");
    hwm = 0;
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_HWM, &hwm, sizeof (hwm)) == 0
        && hwm == 10,
        "hwm.4: hwm is still 10");

    flux_reduce_destroy (r);
}
Esempio n. 5
0
void test_timed (flux_t *h)
{
    flux_reduce_t *r;
    int i, errors;
    double timeout;

    clear_counts ();

    ok ((r = flux_reduce_create (h, reduce_ops, 0.1, NULL,
                                 FLUX_REDUCE_TIMEDFLUSH)) != NULL,
        "timed: flux_reduce_create works");
    if (!r)
        BAIL_OUT();
    ok (flux_reduce_opt_get (r, FLUX_REDUCE_OPT_TIMEOUT, &timeout,
                             sizeof (timeout)) == 0 && timeout == 0.1,
        "timed: flux_reduce_opt_get TIMEOUT returned timeout");

    /* Append 100 items in batch 0 before starting reactor.
     * Reduction occurs at each append.
     * Nothing should be sinked.
     */
    errors = 0;
    for (i = 0; i < 100; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 0) < 0)
            errors++;
    }
    ok (errors == 0,
        "timed.0: flux_reduce_append added 100 items");
    cmp_ok (reduce_calls, "==", 99,
        "timed.0: op.reduce called 99 times");
    cmp_ok (sink_calls, "==", 0,
        "timed.0: op.sink called 0 times");

    /* Start reactor so timeout handler can run.
     * It should fire once and sink all items in one sink call.
     */
    ok (flux_reactor_run (flux_get_reactor (h), 0) == 0,
        "timed.0: reactor completed normally");
    cmp_ok (sink_calls, "==", 1,
        "timed.0: op.sink called 1 time");
    cmp_ok (sink_items, "==", 100,
        "timed.0: op.sink processed 100 items");

    clear_counts ();

    /* Now append one more item to batch 0.
     * It should be immediately flushed.
     */
    ok (flux_reduce_append (r, xstrdup ("hi"), 0) == 0,
        "timed.0: flux_reduce_append added 1 more item");
    cmp_ok (reduce_calls, "==", 0,
        "timed.0: op.reduce not called");
    cmp_ok (sink_calls, "==", 1,
        "timed.0: op.sink called 1 time");
    cmp_ok (sink_items, "==", 1,
        "timed.0: op.sink processed 1 items");

    clear_counts ();

    /* Append 100 items to batch 1.
     * It should behave like the first batch.
     */
    errors = 0;
    for (i = 0; i < 100; i++) {
        if (flux_reduce_append (r, xstrdup ("hi"), 1) < 0)
            errors++;
    }
    ok (errors == 0,
        "timed.1: flux_reduce_append added 100 items");
    cmp_ok (reduce_calls, "==", 99,
        "timed.1: op.reduce called 99 times");
    cmp_ok (sink_calls, "==", 0,
        "timed.1: op.sink called 0 times");

    /* Start reactor so timeout handler can run.
     * It should fire once and sink all items in one sink call.
     */
    ok (flux_reactor_run (flux_get_reactor (h), 0) == 0,
        "timed.1: reactor completed normally");
    cmp_ok (sink_calls, "==", 1,
        "timed.1: op.sink called 1 time");
    cmp_ok (sink_items, "==", 100,
        "timed.1: op.sink processed 100 items");

    flux_reduce_destroy (r);
}
Esempio n. 6
0
uint8_t GLZAformat(size_t insize, uint8_t * inbuf, size_t * outsize_ptr, uint8_t ** outbuf) {
  const uint32_t CHARS_TO_WRITE = 0x40000;
  uint8_t this_char, prev_char, next_char, user_cap_encoded, user_cap_lock_encoded, user_delta_encoded, stride;
  uint8_t *in_char_ptr, *end_char_ptr, *out_char_ptr;
  uint32_t i, j, k;
  uint32_t num_AZ, num_az_pre_AZ, num_az_post_AZ, num_spaces;
  uint32_t order_1_counts[0x100][0x100];
  uint32_t symbol_counts[0x100];
  double order_1_entropy, best_stride_entropy, saved_entropy[4];

  // format byte: B0: cap encoded, B3:B1 = stride (0 - 4), B5:B4 = log2 delta length (0 - 2), B6: little endian


  user_cap_encoded = 0;
  user_cap_lock_encoded = 0;
  user_delta_encoded = 0;

  *outbuf = (uint8_t *)malloc(2 * insize + 1);
  if (*outbuf == 0)
    return(0);

  end_char_ptr = inbuf + insize;
  num_AZ = 0;
  num_az_pre_AZ = 0;
  num_az_post_AZ = 0;
  num_spaces = 0;

  if (insize > 4) {
    in_char_ptr = inbuf;
    this_char = *in_char_ptr++;
    if (this_char == 0x20)
      num_spaces++;
    if ((this_char >= 'A') && (this_char <= 'Z')) {
      num_AZ++;
      next_char = *in_char_ptr;
      if (((next_char >= 'a') && (next_char <= 'z')) || ((next_char >= 'A') && (next_char <= 'Z')))
        num_az_post_AZ++;
    }

    while (in_char_ptr != end_char_ptr) {
      this_char = *in_char_ptr++;
      if (this_char == 0x20)
        num_spaces++;
      if ((this_char >= 'A') && (this_char <= 'Z')) {
        num_AZ++;
        prev_char = *(in_char_ptr - 2);
        next_char = *in_char_ptr;
        if (((next_char >= 'a') && (next_char <= 'z')) || ((next_char >= 'A') && (next_char <= 'Z')))
          num_az_post_AZ++;
        if (((prev_char >= 'a') && (prev_char <= 'z')) || ((prev_char >= 'A') && (prev_char <= 'Z')))
          num_az_pre_AZ++;
      }
    }
  }

  out_char_ptr = *outbuf;

  if (((num_AZ && (4 * num_az_post_AZ > num_AZ) && (num_az_post_AZ > num_az_pre_AZ)
      && (num_spaces > insize / 50)) && (user_cap_encoded != 1)) || (user_cap_encoded == 2)) {
#ifdef PRINTON
    fprintf(stderr,"Converting textual data\n");
#endif
    *out_char_ptr++ = 1;
    in_char_ptr = inbuf;
    while (in_char_ptr != end_char_ptr) {
      if ((*in_char_ptr >= 'A') && (*in_char_ptr <= 'Z')) {
        if (((*(in_char_ptr + 1) >= 'A') && (*(in_char_ptr + 1) <= 'Z') && (user_cap_lock_encoded != 1))
            && ((*(in_char_ptr + 2) < 'a') || (*(in_char_ptr + 2) > 'z'))) {
          *out_char_ptr++ = 'B';
          *out_char_ptr++ = *in_char_ptr++ + ('a' - 'A');
          *out_char_ptr++ = *in_char_ptr++ + ('a' - 'A');
          while ((*in_char_ptr >= 'A') && (*in_char_ptr <= 'Z'))
            *out_char_ptr++ = *in_char_ptr++ + ('a' - 'A');
          if ((*in_char_ptr >= 'a') && (*in_char_ptr <= 'z'))
            *out_char_ptr++ = 'C';
        }
        else {
          *out_char_ptr++ = 'C';
          *out_char_ptr++ = *in_char_ptr++ + ('a' - 'A');
        }
      }
      else if (*in_char_ptr >= 0xFE) {
        *out_char_ptr++ = *in_char_ptr++;
        *out_char_ptr++ = 0xFF;
      }
      else if (*in_char_ptr == 0xA) {
        in_char_ptr++;
        *out_char_ptr++ = 0xA;
        *out_char_ptr++ = ' ';
      }
      else
        *out_char_ptr++ = *in_char_ptr++;
    }
  }
  else if ((user_delta_encoded != 1) && (insize > 4)) {
    clear_counts(symbol_counts, order_1_counts);
    for (i = 0 ; i < insize - 1 ; i++) {
      symbol_counts[inbuf[i]]++;
      order_1_counts[inbuf[i]][inbuf[i+1]]++;
    }
    symbol_counts[inbuf[insize-1]]++;
    order_1_counts[inbuf[insize-1]][0x80]++;
    order_1_entropy = calculate_order_1_entropy(symbol_counts, order_1_counts);
    best_stride_entropy = order_1_entropy;
    stride = 0;

    for (k = 1 ; k <= 100 ; k++) {
      if (insize <= (size_t)k)
        break;
      clear_counts(symbol_counts, order_1_counts);
      if ((k == 2) | (k == 4)) {
        for (i = 0 ; i < k  ; i++) {
          symbol_counts[inbuf[i]]++;
          order_1_counts[inbuf[i]][0xFF & (inbuf[i+k] - inbuf[i])]++;
        }
        for (i = k ; i < (uint32_t)insize - k ; i++) {
          symbol_counts[0xFF & (inbuf[i] - inbuf[i-k])]++;
          order_1_counts[0xFF & (inbuf[i] - inbuf[i-k])][0xFF & (inbuf[i+k] - inbuf[i])]++;
        }
        for (i = (uint32_t)insize - k ; i < insize ; i++) {
          symbol_counts[0xFF & (inbuf[i] - inbuf[i-k])]++;
          order_1_counts[0xFF & (inbuf[i] - inbuf[i-k])][0x80]++;
        }
        order_1_entropy = calculate_order_1_entropy(symbol_counts, order_1_counts);
        if ((order_1_entropy < 0.95 * best_stride_entropy) || ((stride != 0) && (order_1_entropy < best_stride_entropy))) {
          stride = k;
          best_stride_entropy = order_1_entropy;
        }
      }
      else {
        for (i = 0 ; i < k - 1 ; i++) {
          symbol_counts[inbuf[i]]++;
          order_1_counts[inbuf[i]][inbuf[i+1]]++;
        }
        symbol_counts[inbuf[k-1]]++;
        order_1_counts[inbuf[k-1]][0xFF & (inbuf[k]-inbuf[0])]++;
        uint8_t failed_test = 0;
        i = k;
        if (insize > 100000) {
          uint32_t initial_test_size = 100000 + ((insize - 100000) >> 3);
          while (i < initial_test_size) {
            symbol_counts[0xFF & (inbuf[i] - inbuf[i-k])]++;
            order_1_counts[0xFF & (inbuf[i] - inbuf[i-k])][0xFF & (inbuf[i+1] - inbuf[i+1-k])]++;
            i++;
          }
          order_1_entropy = calculate_order_1_entropy(symbol_counts, order_1_counts);
          if (order_1_entropy >= 1.05 * best_stride_entropy * (double)initial_test_size / (double)insize)
            failed_test = 1;
        }
        if (failed_test == 0) {
          while (i < insize - 1) {
            symbol_counts[0xFF & (inbuf[i] - inbuf[i-k])]++;
            order_1_counts[0xFF & (inbuf[i] - inbuf[i-k])][0xFF & (inbuf[i+1] - inbuf[i+1-k])]++;
            i++;
          }
          symbol_counts[0xFF & (inbuf[insize-1] - inbuf[insize-1-k])]++;
          order_1_counts[0xFF & (inbuf[insize-1] - inbuf[insize-1-k])][0x80]++;
          order_1_entropy = calculate_order_1_entropy(symbol_counts, order_1_counts);
          if ((order_1_entropy < 0.95 * best_stride_entropy) || ((stride != 0) && (order_1_entropy < best_stride_entropy))) {
            stride = k;
            best_stride_entropy = order_1_entropy;
          }
        }
      }
    }