static int hdfsSingleNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs, const char *username) { int ret; tPort port; hdfsFS hdfs; struct hdfsBuilder *bld; port = (tPort)nmdGetNameNodePort(cl); if (port < 0) { fprintf(stderr, "hdfsSingleNameNodeConnect: nmdGetNameNodePort " "returned error %d\n", port); return port; } bld = hdfsNewBuilder(); if (!bld) return -ENOMEM; hdfsBuilderSetForceNewInstance(bld); hdfsBuilderSetNameNode(bld, "localhost"); hdfsBuilderSetNameNodePort(bld, port); hdfsBuilderConfSetStr(bld, "dfs.block.size", TO_STR(TLH_DEFAULT_BLOCK_SIZE)); hdfsBuilderConfSetStr(bld, "dfs.blocksize", TO_STR(TLH_DEFAULT_BLOCK_SIZE)); if (username) { hdfsBuilderSetUserName(bld, username); } hdfs = hdfsBuilderConnect(bld); if (!hdfs) { ret = -errno; return ret; } *fs = hdfs; return 0; }
static struct libhdfs_data *libhdfs_data_create(const struct options *opts) { struct libhdfs_data *ldata = NULL; struct hdfsBuilder *builder = NULL; hdfsFileInfo *pinfo = NULL; ldata = calloc(1, sizeof(struct libhdfs_data)); if (!ldata) { fprintf(stderr, "Failed to allocate libhdfs test data.\n"); goto error; } builder = hdfsNewBuilder(); if (!builder) { fprintf(stderr, "Failed to create builder.\n"); goto error; } hdfsBuilderSetNameNode(builder, opts->rpc_address); hdfsBuilderConfSetStr(builder, "dfs.client.read.shortcircuit.skip.checksum", "true"); ldata->fs = hdfsBuilderConnect(builder); if (!ldata->fs) { fprintf(stderr, "Could not connect to default namenode!\n"); goto error; } pinfo = hdfsGetPathInfo(ldata->fs, opts->path); if (!pinfo) { int err = errno; fprintf(stderr, "hdfsGetPathInfo(%s) failed: error %d (%s). " "Attempting to re-create file.\n", opts->path, err, strerror(err)); if (libhdfs_data_create_file(ldata, opts)) goto error; } else if (pinfo->mSize != opts->length) { fprintf(stderr, "hdfsGetPathInfo(%s) failed: length was %lld, " "but we want length %lld. Attempting to re-create file.\n", opts->path, (long long)pinfo->mSize, (long long)opts->length); if (libhdfs_data_create_file(ldata, opts)) goto error; } ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_RDONLY, 0, 0, 0); if (!ldata->file) { int err = errno; fprintf(stderr, "hdfsOpenFile(%s) failed: error %d (%s)\n", opts->path, err, strerror(err)); goto error; } ldata->length = opts->length; return ldata; error: if (pinfo) hdfsFreeFileInfo(pinfo, 1); if (ldata) libhdfs_data_free(ldata); return NULL; }
hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user) { struct hdfsBuilder* bld = hdfsNewBuilder(); if (!bld) { return NULL; } hdfsBuilderSetNameNode(bld, nn); hdfsBuilderSetNameNodePort(bld, port); hdfsBuilderSetUserName(bld, user); return hdfsBuilderConnect(bld); }
FileManagerHdfs::FileManagerHdfs(const string &storage_path) : FileManager(storage_path) { DEBUG_ASSERT(hdfs_namenode_port_dummy); DEBUG_ASSERT(hdfs_num_replications_dummy); struct hdfsBuilder *builder = hdfsNewBuilder(); hdfsBuilderSetNameNode(builder, FLAGS_hdfs_namenode_host.c_str()); hdfsBuilderSetNameNodePort(builder, FLAGS_hdfs_namenode_port); // hdfsBuilderConnect releases builder. hdfs_ = hdfsBuilderConnect(builder); DEBUG_ASSERT(hdfs_ != nullptr); }
hdfsFS hdfsConnectAsUserNewInstance(const char* host, tPort port, const char *user) { struct hdfsBuilder *bld = hdfsNewBuilder(); if (!bld) return NULL; hdfsBuilderSetNameNode(bld, host); hdfsBuilderSetNameNodePort(bld, port); hdfsBuilderSetUserName(bld, user); hdfsBuilderSetForceNewInstance(bld); return hdfsBuilderConnect(bld); }
hdfsFS Hdfs3FindConnection(const std::string& hostport) { std::unique_lock<std::mutex> lock(s_hdfs_mutex); auto it = s_hdfs_map.find(hostport); if (it != s_hdfs_map.end()) return it->second; // split host:port std::vector<std::string> splitted = common::Split(hostport, ':', 2); uint16_t port; if (splitted.size() == 1) { port = 8020; } else { if (!common::from_str<uint16_t>(splitted[1], port)) die("Could not parse port in host:port \"" << hostport << "\""); } // split user@host std::vector<std::string> user_split = common::Split(splitted[0], '@', 2); const char* host, * user; if (user_split.size() == 1) { host = user_split[0].c_str(); user = nullptr; } else { user = user_split[0].c_str(); host = user_split[1].c_str(); } hdfsBuilder* builder = hdfsNewBuilder(); hdfsBuilderSetNameNode(builder, host); hdfsBuilderSetNameNodePort(builder, port); if (user) hdfsBuilderSetUserName(builder, user); hdfsFS hdfs = hdfsBuilderConnect(builder); if (!hdfs) die("Could not connect to HDFS server \"" << hostport << "\"" ": " << hdfsGetLastError()); s_hdfs_map[hostport] = hdfs; return hdfs; }
/** * Test that we can write a file with libhdfs and then read it back */ int main(void) { int port; struct NativeMiniDfsConf conf = { 1, /* doFormat */ 0, /* webhdfsEnabled */ 0, /* namenodeHttpPort */ 1, /* configureShortCircuit */ }; char testFileName[TEST_FILE_NAME_LENGTH]; hdfsFS fs; struct NativeMiniDfsCluster* cl; struct hdfsBuilder *bld; cl = nmdCreate(&conf); EXPECT_NONNULL(cl); EXPECT_ZERO(nmdWaitClusterUp(cl)); port = nmdGetNameNodePort(cl); if (port < 0) { fprintf(stderr, "TEST_ERROR: test_zerocopy: " "nmdGetNameNodePort returned error %d\n", port); return EXIT_FAILURE; } bld = hdfsNewBuilder(); EXPECT_NONNULL(bld); EXPECT_ZERO(nmdConfigureHdfsBuilder(cl, bld)); hdfsBuilderSetForceNewInstance(bld); hdfsBuilderConfSetStr(bld, "dfs.block.size", TO_STR(TEST_ZEROCOPY_FULL_BLOCK_SIZE)); /* ensure that we'll always get our mmaps */ hdfsBuilderConfSetStr(bld, "dfs.client.read.shortcircuit.skip.checksum", "true"); fs = hdfsBuilderConnect(bld); EXPECT_NONNULL(fs); EXPECT_ZERO(createZeroCopyTestFile(fs, testFileName, TEST_FILE_NAME_LENGTH)); EXPECT_ZERO(doTestZeroCopyReads(fs, testFileName)); EXPECT_ZERO(hdfsDisconnect(fs)); EXPECT_ZERO(nmdShutdown(cl)); nmdFree(cl); fprintf(stderr, "TEST_SUCCESS\n"); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { if (argc < 4) { printf("usage: hdfs_get <name node address> <name node port> <input file>\n"); return 1; } // Sleep for 100ms. usleep(100 * 1000); struct hdfsBuilder* hdfs_builder = hdfsNewBuilder(); if (!hdfs_builder) { printf("Could not create HDFS builder"); return 1; } hdfsBuilderSetNameNode(hdfs_builder, argv[1]); int port = atoi(argv[2]); hdfsBuilderSetNameNodePort(hdfs_builder, port); hdfsBuilderConfSetStr(hdfs_builder, "dfs.client.read.shortcircuit", "false"); hdfsFS fs = hdfsBuilderConnect(hdfs_builder); hdfsFreeBuilder(hdfs_builder); if (!fs) { printf("Could not connect to HDFS"); return 1; } hdfsFile file_in = hdfsOpenFile(fs, argv[3], O_RDONLY, 0, 0, 0); char buffer[1048576]; int done = 0; do { done = hdfsRead(fs, file_in, &buffer, 1048576); } while (done > 0); if (done < 0) { printf("Failed to read file: %s", hdfsGetLastError()); return 1; } hdfsCloseFile(fs, file_in); hdfsDisconnect(fs); return 0; }
int main(int argc, char*argv[]) { struct hadoopRzOptions *zopts = NULL; struct hadoopRzBuffer *rzbuf = NULL; if (argc < 4) { usage(); } char* filename = argv[1]; int num_iters = atoi(argv[2]); char method = *argv[3]; if (NULL == strchr("mrzh", method)) { usage(); } int ret; void* aligned = NULL; // If local mem, copy file into a local mlock'd aligned buffer if (method == 'm') { printf("Creating %d of aligned data...\n", size); aligned = memalign(32, size); if (aligned == NULL) { perror("memalign"); exit(3); } // Read the specified file in buffer int fd = open(filename, O_RDONLY); int total_bytes = 0; while (total_bytes < size) { int bytes = read(fd, aligned+total_bytes, size-total_bytes); if (bytes == -1) { perror("read"); exit(-1); } total_bytes += bytes; } printf("Attempting mlock of buffer\n"); ret = mlock(aligned, size); if (ret != 0) { perror("mlock"); exit(2); } } printf("Summing output %d times...\n", num_iters); int i, j, k, l; // Copy data into this intermediate buffer const int buffer_size = (8*1024*1024); void *temp_buffer; ret = posix_memalign(&temp_buffer, 32, buffer_size); if (ret != 0) { printf("error in posix_memalign\n"); exit(ret); } // This is for loop unrolling (unroll 4 times) __m128d* tempd = memalign(32, 16*4); struct timespec start, end; if (tempd == NULL) { perror("memalign"); exit(3); } const int print_iters = 10; double end_sum = 0; hdfsFS fs = NULL; if (method == 'h' || method == 'z') { struct hdfsBuilder *builder = hdfsNewBuilder(); hdfsBuilderSetNameNode(builder, "default"); hdfsBuilderConfSetStr(builder, "dfs.client.read.shortcircuit.skip.checksum", "true"); fs = hdfsBuilderConnect(builder); if (fs == NULL) { printf("Could not connect to default namenode!\n"); exit(-1); } } for (i=0; i<num_iters; i+=print_iters) { gettime(&start); __m128d sum; // Number of packed doubles we've processed for (j=0; j<print_iters; j++) { int offset = 0; int fd = 0; hdfsFile hdfsFile = NULL; if (method == 'r') { fd = open(filename, O_RDONLY); } // hdfs zerocopy read else if (method == 'z') { zopts = hadoopRzOptionsAlloc(); if (!zopts) abort(); if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) abort(); if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) abort(); hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0); } // hdfs normal read else if (method == 'h') { hdfsFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0); } // Each iteration, process the buffer once for (k=0; k<size; k+=buffer_size) { // Set this with varying methods! const double* buffer = NULL; // Local file read if (method == 'r') { // do read int total_bytes = 0; while (total_bytes < buffer_size) { int bytes = read(fd, temp_buffer+total_bytes, buffer_size-total_bytes); if (bytes < 0) { printf("Error on read\n"); return -1; } total_bytes += bytes; } buffer = (double*)temp_buffer; } // Local memory read else if (method == 'm') { buffer = (double*)(aligned + offset); } // hdfs zerocopy read else if (method == 'z') { int len; rzbuf = hadoopReadZero(hdfsFile, zopts, buffer_size); if (!rzbuf) abort(); buffer = hadoopRzBufferGet(rzbuf); if (!buffer) abort(); len = hadoopRzBufferLength(rzbuf); if (len < buffer_size) abort(); } // hdfs normal read else if (method == 'h') { abort(); // need to implement hdfsReadFully //ret = hdfsReadFully(fs, hdfsFile, temp_buffer, buffer_size); if (ret == -1) { printf("Error: hdfsReadFully errored\n"); exit(-1); } buffer = temp_buffer; } offset += buffer_size; // Unroll the loop a bit const double* a_ptr = &(buffer[0]); const double* b_ptr = &(buffer[2]); const double* c_ptr = &(buffer[4]); const double* d_ptr = &(buffer[6]); for (l=0; l<buffer_size; l+=64) { tempd[0] = _mm_load_pd(a_ptr); tempd[1] = _mm_load_pd(b_ptr); tempd[2] = _mm_load_pd(c_ptr); tempd[3] = _mm_load_pd(d_ptr); sum = _mm_add_pd(sum, tempd[0]); sum = _mm_add_pd(sum, tempd[1]); sum = _mm_add_pd(sum, tempd[2]); sum = _mm_add_pd(sum, tempd[3]); a_ptr += 8; b_ptr += 8; c_ptr += 8; d_ptr += 8; } if (method == 'z') { hadoopRzBufferFree(hdfsFile, rzbuf); } } // Local file read if (method == 'r') { close(fd); } // hdfs zerocopy read // hdfs normal read else if (method == 'z' || method == 'h') { hdfsCloseFile(fs, hdfsFile); } printf("iter %d complete\n", j); } gettime(&end); print_duration(&start, &end, (long)size*print_iters); // Force the compiler to actually generate above code double* unpack = (double*)∑ double final = unpack[0] + unpack[1]; end_sum += final; } if (method == 'z' || method == 'h') { hdfsDisconnect(fs); } printf("%f\n", end_sum); return 0; }