예제 #1
0
static int vecsum_zcr(struct libhdfs_data *ldata,
        const struct options *opts)
{
    int ret, pass;
    struct hadoopRzOptions *zopts = NULL;

    zopts = hadoopRzOptionsAlloc();
    if (!zopts) {
        fprintf(stderr, "hadoopRzOptionsAlloc failed.\n");
        ret = ENOMEM;
        goto done;
    }
    if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) {
        ret = errno;
        perror("hadoopRzOptionsSetSkipChecksum failed: ");
        goto done;
    }
    if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) {
        ret = errno;
        perror("hadoopRzOptionsSetByteBufferPool failed: ");
        goto done;
    }
    for (pass = 0; pass < opts->passes; ++pass) {
        ret = vecsum_zcr_loop(pass, ldata, zopts, opts);
        if (ret) {
            fprintf(stderr, "vecsum_zcr_loop pass %d failed "
                "with error %d\n", pass, ret);
            goto done;
        }
        hdfsSeek(ldata->fs, ldata->file, 0);
    }
    ret = 0;
done:
    if (zopts)
        hadoopRzOptionsFree(zopts);
    return ret;
}
예제 #2
0
static int doTestZeroCopyReads(hdfsFS fs, const char *fileName)
{
    hdfsFile file = NULL;
    struct hadoopRzOptions *opts = NULL;
    struct hadoopRzBuffer *buffer = NULL;
    uint8_t *block;

    file = hdfsOpenFile(fs, fileName, O_RDONLY, 0, 0, 0);
    EXPECT_NONNULL(file);
    opts = hadoopRzOptionsAlloc();
    EXPECT_NONNULL(opts);
    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 1));
    /* haven't read anything yet */
    EXPECT_ZERO(expectFileStats(file, 0LL, 0LL, 0LL, 0LL));
    block = getZeroCopyBlockData(0);
    EXPECT_NONNULL(block);
    /* first read is half of a block. */
    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2);
    EXPECT_NONNULL(buffer);
    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2,
          hadoopRzBufferLength(buffer));
    EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer), block,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2));
    hadoopRzBufferFree(file, buffer);
    /* read the next half of the block */
    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2);
    EXPECT_NONNULL(buffer);
    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2,
          hadoopRzBufferLength(buffer));
    EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer),
          block + (TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2),
          TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2));
    hadoopRzBufferFree(file, buffer);
    free(block);
    EXPECT_ZERO(expectFileStats(file, TEST_ZEROCOPY_FULL_BLOCK_SIZE, 
              TEST_ZEROCOPY_FULL_BLOCK_SIZE,
              TEST_ZEROCOPY_FULL_BLOCK_SIZE,
              TEST_ZEROCOPY_FULL_BLOCK_SIZE));
    /* Now let's read just a few bytes. */
    buffer = hadoopReadZero(file, opts, SMALL_READ_LEN);
    EXPECT_NONNULL(buffer);
    EXPECT_INT_EQ(SMALL_READ_LEN, hadoopRzBufferLength(buffer));
    block = getZeroCopyBlockData(1);
    EXPECT_NONNULL(block);
    EXPECT_ZERO(memcmp(block, hadoopRzBufferGet(buffer), SMALL_READ_LEN));
    hadoopRzBufferFree(file, buffer);
    EXPECT_INT64_EQ(
          (int64_t)TEST_ZEROCOPY_FULL_BLOCK_SIZE + (int64_t)SMALL_READ_LEN,
          hdfsTell(fs, file));
    EXPECT_ZERO(expectFileStats(file,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN));

    /* Clear 'skip checksums' and test that we can't do zero-copy reads any
     * more.  Since there is no ByteBufferPool set, we should fail with
     * EPROTONOSUPPORT.
     */
    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0));
    EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
    EXPECT_INT_EQ(EPROTONOSUPPORT, errno);

    /* Verify that setting a NULL ByteBufferPool class works. */
    EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts, NULL));
    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0));
    EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
    EXPECT_INT_EQ(EPROTONOSUPPORT, errno);

    /* Now set a ByteBufferPool and try again.  It should succeed this time. */
    EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts,
          ELASTIC_BYTE_BUFFER_POOL_CLASS));
    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE);
    EXPECT_NONNULL(buffer);
    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE, hadoopRzBufferLength(buffer));
    EXPECT_ZERO(expectFileStats(file,
          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN));
    EXPECT_ZERO(memcmp(block + SMALL_READ_LEN, hadoopRzBufferGet(buffer),
        TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN));
    free(block);
    block = getZeroCopyBlockData(2);
    EXPECT_NONNULL(block);
    EXPECT_ZERO(memcmp(block, (uint8_t*)hadoopRzBufferGet(buffer) +
        (TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN), SMALL_READ_LEN));
    hadoopRzBufferFree(file, buffer);

    /* Check the result of a zero-length read. */
    buffer = hadoopReadZero(file, opts, 0);
    EXPECT_NONNULL(buffer);
    EXPECT_NONNULL(hadoopRzBufferGet(buffer));
    EXPECT_INT_EQ(0, hadoopRzBufferLength(buffer));
    hadoopRzBufferFree(file, buffer);

    /* Check the result of reading past EOF */
    EXPECT_INT_EQ(0, hdfsSeek(fs, file, TEST_ZEROCOPY_FILE_LEN));
    buffer = hadoopReadZero(file, opts, 1);
    EXPECT_NONNULL(buffer);
    EXPECT_NULL(hadoopRzBufferGet(buffer));
    hadoopRzBufferFree(file, buffer);

    /* Cleanup */
    free(block);
    hadoopRzOptionsFree(opts);
    EXPECT_ZERO(hdfsCloseFile(fs, file));
    return 0;
}
예제 #3
0
int main(int argc, char*argv[]) {
  struct hadoopRzOptions *zopts = NULL;
  struct hadoopRzBuffer *rzbuf = NULL;
  if (argc < 4) {
    usage();
  }
  char* filename = argv[1];
  int num_iters = atoi(argv[2]);
  char method = *argv[3];
  if (NULL == strchr("mrzh", method)) {
    usage();
  }

  int ret;

  void* aligned = NULL;
  // If local mem, copy file into a local mlock'd aligned buffer
  if (method == 'm') {
    printf("Creating %d of aligned data...\n", size);
    aligned = memalign(32, size);
    if (aligned == NULL) {
      perror("memalign");
      exit(3);
    }
    // Read the specified file in buffer
    int fd = open(filename, O_RDONLY);
    int total_bytes = 0;
    while (total_bytes < size) {
      int bytes = read(fd, aligned+total_bytes, size-total_bytes);
      if (bytes == -1) {
        perror("read");
        exit(-1);
      }
      total_bytes += bytes;
    }

    printf("Attempting mlock of buffer\n");
    ret = mlock(aligned, size);
    if (ret != 0) {
      perror("mlock");
      exit(2);
    }
  }

  printf("Summing output %d times...\n", num_iters);
  int i, j, k, l;
  // Copy data into this intermediate buffer
  const int buffer_size = (8*1024*1024);
  void *temp_buffer;
  ret = posix_memalign(&temp_buffer, 32, buffer_size);
  if (ret != 0) {
    printf("error in posix_memalign\n");
    exit(ret);
  }
  // This is for loop unrolling (unroll 4 times)
  __m128d* tempd = memalign(32, 16*4);
  struct timespec start, end;
  if (tempd == NULL) {
    perror("memalign");
    exit(3);
  }
  const int print_iters = 10;
  double end_sum = 0;

  hdfsFS fs = NULL;
  if (method == 'h' || method == 'z') {
    struct hdfsBuilder *builder = hdfsNewBuilder();

    hdfsBuilderSetNameNode(builder, "default");
    hdfsBuilderConfSetStr(builder, "dfs.client.read.shortcircuit.skip.checksum",
                          "true");
    fs = hdfsBuilderConnect(builder);
    if (fs == NULL) {
      printf("Could not connect to default namenode!\n");
      exit(-1);
    }
  }

  for (i=0; i<num_iters; i+=print_iters) {
    gettime(&start);
    __m128d sum;
    // Number of packed doubles we've processed
    for (j=0; j<print_iters; j++) {
      int offset = 0;
      int fd = 0;
      hdfsFile hdfsFile = NULL;

      if (method == 'r') {
        fd = open(filename, O_RDONLY);
      }
      // hdfs zerocopy read
      else if (method == 'z') {
        zopts = hadoopRzOptionsAlloc();
        if (!zopts) abort();
        if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) abort();
        if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) abort();
        hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0);
      }
      // hdfs normal read
      else if (method == 'h') {
        hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0);
      }

      // Each iteration, process the buffer once
      for (k=0; k<size; k+=buffer_size) {
        // Set this with varying methods!
        const double* buffer = NULL;

        // Local file read
        if (method == 'r') {
          // do read
          int total_bytes = 0;
          while (total_bytes < buffer_size) {
            int bytes = read(fd, temp_buffer+total_bytes, buffer_size-total_bytes);
            if (bytes < 0) {
              printf("Error on read\n");
              return -1;
            }
            total_bytes += bytes;
          }
          buffer = (double*)temp_buffer;
        }
        // Local memory read
        else if (method == 'm') {
          buffer = (double*)(aligned + offset);
        }
        // hdfs zerocopy read
        else if (method == 'z') {
          int len;
          rzbuf = hadoopReadZero(hdfsFile, zopts, buffer_size);
          if (!rzbuf) abort();
          buffer = hadoopRzBufferGet(rzbuf);
          if (!buffer) abort();
          len = hadoopRzBufferLength(rzbuf);
          if (len < buffer_size) abort();
        }
        // hdfs normal read
        else if (method == 'h') {
          abort(); // need to implement hdfsReadFully
          //ret = hdfsReadFully(fs, hdfsFile, temp_buffer, buffer_size);
          if (ret == -1) {
            printf("Error: hdfsReadFully errored\n");
            exit(-1);
          }
          buffer = temp_buffer;
        }

        offset += buffer_size;

        // Unroll the loop a bit
        const double* a_ptr = &(buffer[0]);
        const double* b_ptr = &(buffer[2]);
        const double* c_ptr = &(buffer[4]);
        const double* d_ptr = &(buffer[6]);
        for (l=0; l<buffer_size; l+=64) {
          tempd[0] = _mm_load_pd(a_ptr);
          tempd[1] = _mm_load_pd(b_ptr);
          tempd[2] = _mm_load_pd(c_ptr);
          tempd[3] = _mm_load_pd(d_ptr);
          sum = _mm_add_pd(sum, tempd[0]);
          sum = _mm_add_pd(sum, tempd[1]);
          sum = _mm_add_pd(sum, tempd[2]);
          sum = _mm_add_pd(sum, tempd[3]);
          a_ptr += 8;
          b_ptr += 8;
          c_ptr += 8;
          d_ptr += 8;
        }
        if (method == 'z') {
          hadoopRzBufferFree(hdfsFile, rzbuf);
        }
      }
      // Local file read
      if (method == 'r') {
        close(fd);
      }
      // hdfs zerocopy read
      // hdfs normal read
      else if (method == 'z' || method == 'h') {
        hdfsCloseFile(fs, hdfsFile);
      }
      printf("iter %d complete\n", j);
    }
    gettime(&end);
    print_duration(&start, &end, (long)size*print_iters);
    // Force the compiler to actually generate above code
    double* unpack = (double*)&sum;
    double final = unpack[0] + unpack[1];
    end_sum += final;
  }
  if (method == 'z' || method == 'h') {
    hdfsDisconnect(fs);
  }
  printf("%f\n", end_sum);
  return 0;
}