static int hdfsSingleNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs,
                                     const char *username)
{
    int ret;
    tPort port;
    hdfsFS hdfs;
    struct hdfsBuilder *bld;
    
    port = (tPort)nmdGetNameNodePort(cl);
    if (port < 0) {
        fprintf(stderr, "hdfsSingleNameNodeConnect: nmdGetNameNodePort "
                "returned error %d\n", port);
        return port;
    }
    bld = hdfsNewBuilder();
    if (!bld)
        return -ENOMEM;
    hdfsBuilderSetForceNewInstance(bld);
    hdfsBuilderSetNameNode(bld, "localhost");
    hdfsBuilderSetNameNodePort(bld, port);
    hdfsBuilderConfSetStr(bld, "dfs.block.size",
                          TO_STR(TLH_DEFAULT_BLOCK_SIZE));
    hdfsBuilderConfSetStr(bld, "dfs.blocksize",
                          TO_STR(TLH_DEFAULT_BLOCK_SIZE));
    if (username) {
        hdfsBuilderSetUserName(bld, username);
    }
    hdfs = hdfsBuilderConnect(bld);
    if (!hdfs) {
        ret = -errno;
        return ret;
    }
    *fs = hdfs;
    return 0;
}
Exemple #2
0
static struct libhdfs_data *libhdfs_data_create(const struct options *opts)
{
    struct libhdfs_data *ldata = NULL;
    struct hdfsBuilder *builder = NULL;
    hdfsFileInfo *pinfo = NULL;

    ldata = calloc(1, sizeof(struct libhdfs_data));
    if (!ldata) {
        fprintf(stderr, "Failed to allocate libhdfs test data.\n");
        goto error;
    }
    builder = hdfsNewBuilder();
    if (!builder) {
        fprintf(stderr, "Failed to create builder.\n");
        goto error;
    }
    hdfsBuilderSetNameNode(builder, opts->rpc_address);
    hdfsBuilderConfSetStr(builder,
        "dfs.client.read.shortcircuit.skip.checksum", "true");
    ldata->fs = hdfsBuilderConnect(builder);
    if (!ldata->fs) {
        fprintf(stderr, "Could not connect to default namenode!\n");
        goto error;
    }
    pinfo = hdfsGetPathInfo(ldata->fs, opts->path);
    if (!pinfo) {
        int err = errno;
        fprintf(stderr, "hdfsGetPathInfo(%s) failed: error %d (%s).  "
                "Attempting to re-create file.\n",
            opts->path, err, strerror(err));
        if (libhdfs_data_create_file(ldata, opts))
            goto error;
    } else if (pinfo->mSize != opts->length) {
        fprintf(stderr, "hdfsGetPathInfo(%s) failed: length was %lld, "
                "but we want length %lld.  Attempting to re-create file.\n",
                opts->path, (long long)pinfo->mSize, (long long)opts->length);
        if (libhdfs_data_create_file(ldata, opts))
            goto error;
    }
    ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_RDONLY, 0, 0, 0);
    if (!ldata->file) {
        int err = errno;
        fprintf(stderr, "hdfsOpenFile(%s) failed: error %d (%s)\n",
            opts->path, err, strerror(err));
        goto error;
    }
    ldata->length = opts->length;
    return ldata;

error:
    if (pinfo)
        hdfsFreeFileInfo(pinfo, 1);
    if (ldata)
        libhdfs_data_free(ldata);
    return NULL;
}
/**
 * Test that we can write a file with libhdfs and then read it back
 */
int main(void)
{
    int port;
    struct NativeMiniDfsConf conf = {
        1, /* doFormat */
        0, /* webhdfsEnabled */
        0, /* namenodeHttpPort */
        1, /* configureShortCircuit */
    };
    char testFileName[TEST_FILE_NAME_LENGTH];
    hdfsFS fs;
    struct NativeMiniDfsCluster* cl;
    struct hdfsBuilder *bld;

    cl = nmdCreate(&conf);
    EXPECT_NONNULL(cl);
    EXPECT_ZERO(nmdWaitClusterUp(cl));
    port = nmdGetNameNodePort(cl);
    if (port < 0) {
        fprintf(stderr, "TEST_ERROR: test_zerocopy: "
                "nmdGetNameNodePort returned error %d\n", port);
        return EXIT_FAILURE;
    }
    bld = hdfsNewBuilder();
    EXPECT_NONNULL(bld);
    EXPECT_ZERO(nmdConfigureHdfsBuilder(cl, bld));
    hdfsBuilderSetForceNewInstance(bld);
    hdfsBuilderConfSetStr(bld, "dfs.block.size",
                          TO_STR(TEST_ZEROCOPY_FULL_BLOCK_SIZE));
    /* ensure that we'll always get our mmaps */
    hdfsBuilderConfSetStr(bld, "dfs.client.read.shortcircuit.skip.checksum",
                          "true");
    fs = hdfsBuilderConnect(bld);
    EXPECT_NONNULL(fs);
    EXPECT_ZERO(createZeroCopyTestFile(fs, testFileName,
          TEST_FILE_NAME_LENGTH));
    EXPECT_ZERO(doTestZeroCopyReads(fs, testFileName));
    EXPECT_ZERO(hdfsDisconnect(fs));
    EXPECT_ZERO(nmdShutdown(cl));
    nmdFree(cl);
    fprintf(stderr, "TEST_SUCCESS\n"); 
    return EXIT_SUCCESS;
}
int main(int argc, char* argv[]) {
  if (argc < 4) {
    printf("usage: hdfs_get <name node address> <name node port> <input file>\n");
    return 1;
  }
  // Sleep for 100ms.
  usleep(100 * 1000);
  struct hdfsBuilder* hdfs_builder = hdfsNewBuilder();
  if (!hdfs_builder) {
    printf("Could not create HDFS builder");
    return 1;
  }
  hdfsBuilderSetNameNode(hdfs_builder, argv[1]);
  int port = atoi(argv[2]);
  hdfsBuilderSetNameNodePort(hdfs_builder, port);
  hdfsBuilderConfSetStr(hdfs_builder, "dfs.client.read.shortcircuit", "false");
  hdfsFS fs = hdfsBuilderConnect(hdfs_builder);
  hdfsFreeBuilder(hdfs_builder);
  if (!fs) {
    printf("Could not connect to HDFS");
    return 1;
  }

  hdfsFile file_in = hdfsOpenFile(fs, argv[3], O_RDONLY, 0, 0, 0);
  char buffer[1048576];
  int done = 0;
  do {
    done = hdfsRead(fs, file_in, &buffer, 1048576);
  } while (done > 0);
  if (done < 0) {
    printf("Failed to read file: %s", hdfsGetLastError());
    return 1;
  }

  hdfsCloseFile(fs, file_in);
  hdfsDisconnect(fs);
  return 0;
}
int main(int argc, char*argv[]) {
  struct hadoopRzOptions *zopts = NULL;
  struct hadoopRzBuffer *rzbuf = NULL;
  if (argc < 4) {
    usage();
  }
  char* filename = argv[1];
  int num_iters = atoi(argv[2]);
  char method = *argv[3];
  if (NULL == strchr("mrzh", method)) {
    usage();
  }

  int ret;

  void* aligned = NULL;
  // If local mem, copy file into a local mlock'd aligned buffer
  if (method == 'm') {
    printf("Creating %d of aligned data...\n", size);
    aligned = memalign(32, size);
    if (aligned == NULL) {
      perror("memalign");
      exit(3);
    }
    // Read the specified file in buffer
    int fd = open(filename, O_RDONLY);
    int total_bytes = 0;
    while (total_bytes < size) {
      int bytes = read(fd, aligned+total_bytes, size-total_bytes);
      if (bytes == -1) {
        perror("read");
        exit(-1);
      }
      total_bytes += bytes;
    }

    printf("Attempting mlock of buffer\n");
    ret = mlock(aligned, size);
    if (ret != 0) {
      perror("mlock");
      exit(2);
    }
  }

  printf("Summing output %d times...\n", num_iters);
  int i, j, k, l;
  // Copy data into this intermediate buffer
  const int buffer_size = (8*1024*1024);
  void *temp_buffer;
  ret = posix_memalign(&temp_buffer, 32, buffer_size);
  if (ret != 0) {
    printf("error in posix_memalign\n");
    exit(ret);
  }
  // This is for loop unrolling (unroll 4 times)
  __m128d* tempd = memalign(32, 16*4);
  struct timespec start, end;
  if (tempd == NULL) {
    perror("memalign");
    exit(3);
  }
  const int print_iters = 10;
  double end_sum = 0;

  hdfsFS fs = NULL;
  if (method == 'h' || method == 'z') {
    struct hdfsBuilder *builder = hdfsNewBuilder();

    hdfsBuilderSetNameNode(builder, "default");
    hdfsBuilderConfSetStr(builder, "dfs.client.read.shortcircuit.skip.checksum",
                          "true");
    fs = hdfsBuilderConnect(builder);
    if (fs == NULL) {
      printf("Could not connect to default namenode!\n");
      exit(-1);
    }
  }

  for (i=0; i<num_iters; i+=print_iters) {
    gettime(&start);
    __m128d sum;
    // Number of packed doubles we've processed
    for (j=0; j<print_iters; j++) {
      int offset = 0;
      int fd = 0;
      hdfsFile hdfsFile = NULL;

      if (method == 'r') {
        fd = open(filename, O_RDONLY);
      }
      // hdfs zerocopy read
      else if (method == 'z') {
        zopts = hadoopRzOptionsAlloc();
        if (!zopts) abort();
        if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) abort();
        if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) abort();
        hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0);
      }
      // hdfs normal read
      else if (method == 'h') {
        hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0);
      }

      // Each iteration, process the buffer once
      for (k=0; k<size; k+=buffer_size) {
        // Set this with varying methods!
        const double* buffer = NULL;

        // Local file read
        if (method == 'r') {
          // do read
          int total_bytes = 0;
          while (total_bytes < buffer_size) {
            int bytes = read(fd, temp_buffer+total_bytes, buffer_size-total_bytes);
            if (bytes < 0) {
              printf("Error on read\n");
              return -1;
            }
            total_bytes += bytes;
          }
          buffer = (double*)temp_buffer;
        }
        // Local memory read
        else if (method == 'm') {
          buffer = (double*)(aligned + offset);
        }
        // hdfs zerocopy read
        else if (method == 'z') {
          int len;
          rzbuf = hadoopReadZero(hdfsFile, zopts, buffer_size);
          if (!rzbuf) abort();
          buffer = hadoopRzBufferGet(rzbuf);
          if (!buffer) abort();
          len = hadoopRzBufferLength(rzbuf);
          if (len < buffer_size) abort();
        }
        // hdfs normal read
        else if (method == 'h') {
          abort(); // need to implement hdfsReadFully
          //ret = hdfsReadFully(fs, hdfsFile, temp_buffer, buffer_size);
          if (ret == -1) {
            printf("Error: hdfsReadFully errored\n");
            exit(-1);
          }
          buffer = temp_buffer;
        }

        offset += buffer_size;

        // Unroll the loop a bit
        const double* a_ptr = &(buffer[0]);
        const double* b_ptr = &(buffer[2]);
        const double* c_ptr = &(buffer[4]);
        const double* d_ptr = &(buffer[6]);
        for (l=0; l<buffer_size; l+=64) {
          tempd[0] = _mm_load_pd(a_ptr);
          tempd[1] = _mm_load_pd(b_ptr);
          tempd[2] = _mm_load_pd(c_ptr);
          tempd[3] = _mm_load_pd(d_ptr);
          sum = _mm_add_pd(sum, tempd[0]);
          sum = _mm_add_pd(sum, tempd[1]);
          sum = _mm_add_pd(sum, tempd[2]);
          sum = _mm_add_pd(sum, tempd[3]);
          a_ptr += 8;
          b_ptr += 8;
          c_ptr += 8;
          d_ptr += 8;
        }
        if (method == 'z') {
          hadoopRzBufferFree(hdfsFile, rzbuf);
        }
      }
      // Local file read
      if (method == 'r') {
        close(fd);
      }
      // hdfs zerocopy read
      // hdfs normal read
      else if (method == 'z' || method == 'h') {
        hdfsCloseFile(fs, hdfsFile);
      }
      printf("iter %d complete\n", j);
    }
    gettime(&end);
    print_duration(&start, &end, (long)size*print_iters);
    // Force the compiler to actually generate above code
    double* unpack = (double*)&sum;
    double final = unpack[0] + unpack[1];
    end_sum += final;
  }
  if (method == 'z' || method == 'h') {
    hdfsDisconnect(fs);
  }
  printf("%f\n", end_sum);
  return 0;
}