int main(int argc, char **argv) { const char* rfile; tSize fileTotalSize, bufferSize, curSize, totalReadSize; hdfsFS fs; hdfsFile readFile; char *buffer = NULL; if (argc != 4) { fprintf(stderr, "Usage: test_libwebhdfs_read" " <filename> <filesize> <buffersize>\n"); exit(1); } fs = hdfsConnect("localhost", 50070); if (!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); exit(1); } rfile = argv[1]; fileTotalSize = strtoul(argv[2], NULL, 10); bufferSize = strtoul(argv[3], NULL, 10); readFile = hdfsOpenFile(fs, rfile, O_RDONLY, bufferSize, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for writing!\n", rfile); exit(1); } // data to be written to the file buffer = malloc(sizeof(char) * bufferSize); if(buffer == NULL) { fprintf(stderr, "Failed to allocate buffer.\n"); exit(1); } // read from the file curSize = bufferSize; totalReadSize = 0; for (; (curSize = hdfsRead(fs, readFile, buffer, bufferSize)) == bufferSize; ) { totalReadSize += curSize; } totalReadSize += curSize; fprintf(stderr, "size of the file: %d; reading size: %d\n", fileTotalSize, totalReadSize); free(buffer); hdfsCloseFile(fs, readFile); hdfsDisconnect(fs); return 0; }
MaprOutputCodedBlockFile::~MaprOutputCodedBlockFile() { //LOG(INFO) << "MaprOutputCodedBlockFile::~MaprOutputCodedBlockFile()"; // force destructors to be called that cause a write to happen before // releasing resources needed for a write output_stream_.reset(NULL); copying_output_stream_.reset(NULL); CHECK_EQ(hdfsFlush(fs_, file_), 0); //LOG(INFO) << "closing file: " << file_; CHECK_EQ(hdfsCloseFile(fs_, file_), 0); //LOG(INFO) << "disconnecting fs: " << fs_; CHECK_EQ(hdfsDisconnect(fs_), 0); }
void HdfsFile::close() { int ret = 0; if( m_file != 0 ) { ret = hdfsCloseFile(m_fs, m_file); m_file = 0; } if( IDBLogger::isEnabled() ) IDBLogger::logNoArg(m_fname, this, "close", ret); }
int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr, "Usage: hdfs_read <filename> <buffersize>\n"); exit(-1); } hdfsFS fs = hdfsConnect("default", 0); if (!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); exit(-2); } const char* rfile = argv[1]; tSize bufferSize = strtoul(argv[2], NULL, 10); hdfsFile readFile = hdfsOpenFile(fs, rfile, O_RDONLY, bufferSize, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for writing!\n", rfile); exit(-3); } FILE *outf = fopen(rfile, "wb"); if (outf == NULL) { printf("FILEIO error %d\n", errno); exit(-4); } // data to be written to the file char* buffer = malloc(sizeof(char) * bufferSize); if(buffer == NULL) { return -5; } //printf("buffersize is %d\n", bufferSize); // read from the file tSize curSize = bufferSize; for (; curSize == bufferSize;) { //printf("cursize before is %d\n", curSize); curSize = hdfsRead(fs, readFile, (void*)buffer, curSize); //printf("cursize is %d, errno is %d\n", curSize, errno); fwrite((void *)buffer, sizeof(char), curSize, outf); //printf("%.*s", bufferSize, buffer); } fclose(outf); free(buffer); hdfsCloseFile(fs, readFile); hdfsDisconnect(fs); return 0; }
/** * For now implement truncate here and only for size == 0. * Weak implementation in that we just delete the file and * then re-create it, but don't set the user, group, and times to the old * file's metadata. */ int dfs_truncate(const char *path, off_t size) { struct hdfsConn *conn = NULL; hdfsFS fs; dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; TRACE1("truncate", path) assert(path); assert('/' == *path); assert(dfs); if (size != 0) { return 0; } int ret = dfs_unlink(path); if (ret != 0) { return ret; } ret = fuseConnectAsThreadUid(&conn); if (ret) { fprintf(stderr, "fuseConnectAsThreadUid: failed to open a libhdfs " "connection! error %d.\n", ret); ret = -EIO; goto cleanup; } fs = hdfsConnGetFs(conn); int flags = O_WRONLY | O_CREAT; hdfsFile file; if ((file = (hdfsFile)hdfsOpenFile(fs, path, flags, 0, 0, 0)) == NULL) { ERROR("Could not connect open file %s", path); ret = -EIO; goto cleanup; } if (hdfsCloseFile(fs, file) != 0) { ERROR("Could not close file %s", path); ret = -EIO; goto cleanup; } cleanup: if (conn) { hdfsConnRelease(conn); } return ret; }
qioerr hdfs_close(void* fl, void* fs) { int got = 0; qioerr err_out = 0; STARTING_SLOW_SYSCALL; got = hdfsCloseFile(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->file); if(got == -1) err_out = qio_mkerror_errno(); DONE_SLOW_SYSCALL; DO_RELEASE((to_hdfs_fs(fs)), hdfs_disconnect_and_free); return err_out; }
bool HdfsConnector::closeFiles(){ int result = -1; for(int i = 0; i < file_handles_.size(); i++) { for (int j = 0; j < file_handles_[i].size(); j++) { result = hdfsCloseFile(fs, file_handles_[i][j]); } file_handles_[i].clear(); } file_handles_.clear(); if(result == 0) return true; return false; }
int HdfsFile::close() { int ret = 0; int savedErrno = EINVAL; // corresponds to m_file == 0 if( m_file != 0 ) { ret = hdfsCloseFile(m_fs, m_file); savedErrno = errno; m_file = 0; } if( IDBLogger::isEnabled() ) IDBLogger::logNoArg(m_fname, this, "close", ret); errno = savedErrno; return ret; }
int main(int argc, char **argv) { hdfsFS fs; char* rfile; int bufferSize; hdfsFile readFile; char* buffer; int curSize; if (argc != 4) { fprintf(stderr, "Usage: hdfs_read <filename> <filesize> <buffersize>\n"); exit(-1); } fs = hdfsConnect("default", 0); if (!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); exit(-1); } rfile = argv[1]; bufferSize = strtoul(argv[3], NULL, 10); readFile = hdfsOpenFile(fs, rfile, O_RDONLY, bufferSize, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for writing!\n", rfile); exit(-2); } /* data to be written to the file */ buffer = malloc(sizeof(char) * bufferSize); if(buffer == NULL) { return -2; } /* read from the file */ curSize = bufferSize; for (; curSize == bufferSize;) { curSize = hdfsRead(fs, readFile, (void*)buffer, curSize); } free(buffer); hdfsCloseFile(fs, readFile); hdfsDisconnect(fs); return 0; }
/** * For now implement truncate here and only for size == 0. * Weak implementation in that we just delete the file and * then re-create it, but don't set the user, group, and times to the old * file's metadata. */ int dfs_truncate(const char *path, off_t size) { TRACE1("truncate", path) dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; assert(path); assert('/' == *path); assert(dfs); if (size != 0) { return -ENOTSUP; } int ret = dfs_unlink(path); if (ret != 0) { return ret; } hdfsFS userFS = doConnectAsUser(dfs->nn_hostname, dfs->nn_port); if (userFS == NULL) { ERROR("Could not connect"); ret = -EIO; goto cleanup; } int flags = O_WRONLY | O_CREAT; hdfsFile file; if ((file = (hdfsFile)hdfsOpenFile(userFS, path, flags, 0, 0, 0)) == NULL) { ERROR("Could not connect open file %s", path); ret = -EIO; goto cleanup; } if (hdfsCloseFile(userFS, file) != 0) { ERROR("Could not close file %s", path); ret = -EIO; goto cleanup; } cleanup: if (doDisconnect(userFS)) { ret = -EIO; } return ret; }
int main(int argc, char **argv) { hdfsFS fs = hdfsConnect("default", 0); const char* writePath = "/tmp/testfile.txt"; hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } hdfsCloseFile(fs, writeFile); }
void ClaimsHDFS::claimsRead(){ hdfsFS fs; hdfsFile fd; string filename="/home/casa/data/kmeans_data.txt"; fs=hdfsConnect("10.11.1.174",9000); fd=hdfsOpenFile(fs,filename.c_str(),O_RDONLY,0,0,0); if(!fd){ cout<<"failed to open hdfs file!!!"<<endl; } char array[72]; tSize bytes=hdfsRead(fs,fd,array,72); cout<<"string is: "<<array<<endl; hdfsCloseFile(fs,fd); hdfsDisconnect(fs); }
static int libhdfs_data_create_file(struct libhdfs_data *ldata, const struct options *opts) { int ret; double *chunk = NULL; long long offset = 0; ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_WRONLY, 0, 1, 0); if (!ldata->file) { ret = errno; fprintf(stderr, "libhdfs_data_create_file: hdfsOpenFile(%s, " "O_WRONLY) failed: error %d (%s)\n", opts->path, ret, strerror(ret)); goto done; } ret = test_file_chunk_setup(&chunk); if (ret) goto done; while (offset < opts->length) { ret = hdfsWrite(ldata->fs, ldata->file, chunk, VECSUM_CHUNK_SIZE); if (ret < 0) { ret = errno; fprintf(stderr, "libhdfs_data_create_file: got error %d (%s) at " "offset %lld of %s\n", ret, strerror(ret), offset, opts->path); goto done; } else if (ret < VECSUM_CHUNK_SIZE) { fprintf(stderr, "libhdfs_data_create_file: got short write " "of %d at offset %lld of %s\n", ret, offset, opts->path); goto done; } offset += VECSUM_CHUNK_SIZE; } ret = 0; done: free(chunk); if (ldata->file) { if (hdfsCloseFile(ldata->fs, ldata->file)) { fprintf(stderr, "libhdfs_data_create_file: hdfsCloseFile error."); ret = EIO; } ldata->file = NULL; } return ret; }
int libhdfsconnector::streamInFile(const char * rfile, int bufferSize) { if (!fs) { fprintf(stderr, "Could not connect to hdfs on"); return RETURN_FAILURE; } unsigned long fileTotalSize = 0; hdfsFileInfo *fileInfo = NULL; if ((fileInfo = hdfsGetPathInfo(fs, rfile)) != NULL) { fileTotalSize = fileInfo->mSize; hdfsFreeFileInfo(fileInfo, 1); } else { fprintf(stderr, "Error: hdfsGetPathInfo for %s - FAILED!\n", rfile); return RETURN_FAILURE; } hdfsFile readFile = hdfsOpenFile(fs, rfile, O_RDONLY, bufferSize, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for writing!\n", rfile); return RETURN_FAILURE; } unsigned char buff[bufferSize + 1]; buff[bufferSize] = '\0'; for (unsigned long bytes_read = 0; bytes_read < fileTotalSize;) { unsigned long read_length = hdfsRead(fs, readFile, buff, bufferSize); bytes_read += read_length; for (unsigned long i = 0; i < read_length; i++) fprintf(stdout, "%c", buff[i]); } hdfsCloseFile(fs, readFile); return 0; }
bool FileManagerHdfs::readBlockOrBlob(const block_id block, void *buffer, const size_t length) { DEBUG_ASSERT(buffer); DEBUG_ASSERT(length % kSlotSizeBytes == 0); string filename(blockFilename(block)); hdfsFile file_handle = hdfsOpenFile(hdfs_, filename.c_str(), O_RDONLY, kSlotSizeBytes, FLAGS_hdfs_num_replications, kSlotSizeBytes); if (file_handle == nullptr) { LOG_WARNING("Failed to open file " << filename << " with error: " << strerror(errno)); return false; } size_t bytes_total = 0; while (bytes_total < length) { tSize bytes = hdfsRead(hdfs_, file_handle, static_cast<char*>(buffer) + bytes_total, length - bytes_total); if (bytes > 0) { bytes_total += bytes; } else if (bytes == -1) { if (errno != EINTR) { LOG_WARNING("Failed to read file " << filename << " with error: " << strerror(errno)); break; } } else { LOG_WARNING("Failed to read file " << filename << " since EOF was reached unexpectedly"); break; } } if (hdfsCloseFile(hdfs_, file_handle) != 0) { LOG_WARNING("Failed to close file " << filename << " with error: " << strerror(errno)); } return (bytes_total == length); }
static int createZeroCopyTestFile(hdfsFS fs, char *testFileName, size_t testFileNameLen) { int blockIdx, blockLen; hdfsFile file; uint8_t *data; snprintf(testFileName, testFileNameLen, "/zeroCopyTestFile.%d.%d", getpid(), rand()); file = hdfsOpenFile(fs, testFileName, O_WRONLY, 0, 1, TEST_ZEROCOPY_FULL_BLOCK_SIZE); EXPECT_NONNULL(file); for (blockIdx = 0; blockIdx < TEST_ZEROCOPY_NUM_BLOCKS; blockIdx++) { blockLen = getZeroCopyBlockLen(blockIdx); data = getZeroCopyBlockData(blockIdx); EXPECT_NONNULL(data); EXPECT_INT_EQ(blockLen, hdfsWrite(fs, file, data, blockLen)); } EXPECT_ZERO(hdfsCloseFile(fs, file)); return 0; }
bool FileManagerHdfs::writeBlockOrBlob(const block_id block, const void *buffer, const size_t length) { DEBUG_ASSERT(buffer); DEBUG_ASSERT(length % kSlotSizeBytes == 0); string filename(blockFilename(block)); hdfsFile file_handle = hdfsOpenFile(hdfs_, filename.c_str(), O_WRONLY, kSlotSizeBytes, FLAGS_hdfs_num_replications, kSlotSizeBytes); if (file_handle == nullptr) { LOG_WARNING("Failed to open file " << filename << " with error: " << strerror(errno)); return false; } size_t bytes_total = 0; while (bytes_total < length) { tSize bytes = hdfsWrite(hdfs_, file_handle, static_cast<const char*>(buffer) + bytes_total, length - bytes_total); if (bytes > 0) { bytes_total += bytes; } else if (bytes == -1) { LOG_WARNING("Failed to write file " << filename << " with error: " << strerror(errno)); break; } } if (hdfsSync(hdfs_, file_handle) != 0) { LOG_WARNING("Failed to sync file " << filename << " with error: " << strerror(errno)); } if (hdfsCloseFile(hdfs_, file_handle) != 0) { LOG_WARNING("Failed to close file " << filename << " with error: " << strerror(errno)); } return (bytes_total == length); }
int main(int argc, char* argv[]) { if (argc < 4) { printf("usage: hdfs_get <name node address> <name node port> <input file>\n"); return 1; } // Sleep for 100ms. usleep(100 * 1000); struct hdfsBuilder* hdfs_builder = hdfsNewBuilder(); if (!hdfs_builder) { printf("Could not create HDFS builder"); return 1; } hdfsBuilderSetNameNode(hdfs_builder, argv[1]); int port = atoi(argv[2]); hdfsBuilderSetNameNodePort(hdfs_builder, port); hdfsBuilderConfSetStr(hdfs_builder, "dfs.client.read.shortcircuit", "false"); hdfsFS fs = hdfsBuilderConnect(hdfs_builder); hdfsFreeBuilder(hdfs_builder); if (!fs) { printf("Could not connect to HDFS"); return 1; } hdfsFile file_in = hdfsOpenFile(fs, argv[3], O_RDONLY, 0, 0, 0); char buffer[1048576]; int done = 0; do { done = hdfsRead(fs, file_in, &buffer, 1048576); } while (done > 0); if (done < 0) { printf("Failed to read file: %s", hdfsGetLastError()); return 1; } hdfsCloseFile(fs, file_in); hdfsDisconnect(fs); return 0; }
int libhdfsconnector::streamFlatFileOffset(const char * filename, unsigned long seekPos, unsigned long readlen,unsigned long bufferSize, int maxretries) { hdfsFile readFile = hdfsOpenFile(fs, filename, O_RDONLY, 0, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for reading!\n", filename); return EXIT_FAILURE; } if (hdfsSeek(fs, readFile, seekPos)) { fprintf(stderr, "Failed to seek %s for reading!\n", filename); return EXIT_FAILURE; } unsigned char buffer[bufferSize + 1]; unsigned long currentPos = seekPos; fprintf(stderr, "\n--Start piping: %ld--\n", currentPos); unsigned long bytesLeft = readlen; while (hdfsAvailable(fs, readFile) && bytesLeft > 0) { tSize num_read_bytes = hdfsRead(fs, readFile, buffer, bytesLeft < bufferSize ? bytesLeft : bufferSize); if (num_read_bytes <= 0) break; bytesLeft -= num_read_bytes; for (int i = 0; i < num_read_bytes; i++, currentPos++) fprintf(stdout, "%c", buffer[i]); } fprintf(stderr, "--\nStop Streaming: %ld--\n", currentPos); hdfsCloseFile(fs, readFile); return EXIT_SUCCESS; }
// Caller takes ownership of returned object and must delete it when done google::protobuf::io::CodedInputStream* MaprInputCodedBlockFile::CreateCodedStream(uint64 position, uint64 length) { CHECK(is_open_); // Seek to requested position (relative to start of file). CHECK_LT(position, size_); CHECK_LE(position+length, size_); // Starting with MapR V2.1.1, sometimes seek fails and the hdfs connection // must be reset in order to try again. Not sure why this transient error // happens with MapR V2.1.1 but not earlier versions. bool success = false; for (int i=0; i < 10; ++i){ if (hdfsSeek(fs_, file_, position) == 0){ success = true; break; } //LOG(INFO) << "seek attempt failed: " << i; //LOG(INFO) << "path:" << path_ << "\n position: " << position << "\n length: " << length << "\n size: " << size_; // success if returns 0 CHECK_EQ(hdfsCloseFile(fs_, file_), 0); CHECK_EQ(hdfsDisconnect(fs_), 0); std::string host = "default"; fs_ = hdfsConnect(host.c_str(), 0); // use default config file settings CHECK(fs_) << "error connecting to maprfs"; file_ = hdfsOpenFile(fs_, path_.c_str(), O_RDONLY, 0, 0, 0); CHECK(file_ != NULL); sleep(2*i); } CHECK(success); // Create a coded stream (hold it in a scoped ptr to manage deleting). limiting_stream_.reset(NULL); // the destructor references the copying_stream_, so must destroy it before destroying it copying_stream_.reset(new google::protobuf::io::CopyingInputStreamAdaptor(copying_input_stream_.get())); limiting_stream_.reset(new google::protobuf::io::LimitingInputStream(copying_stream_.get(), length)); return new google::protobuf::io::CodedInputStream(limiting_stream_.get()); }
boost::string_ref HDFSFileSplitter::fetch_block(bool is_next) { int nbytes = 0; if (is_next) { // directly read the next block using the current file nbytes = hdfsRead(fs_, file_, data_, hdfs_block_size); if (nbytes == 0) return ""; if (nbytes == -1) { throw base::HuskyException("read next block error!"); } } else { // Ask the master for a new block BinStream question; question << url_ << husky::Context::get_param("hostname"); BinStream answer = husky::Context::get_coordinator()->ask_master(question, husky::TYPE_HDFS_BLK_REQ); std::string fn; answer >> fn; answer >> offset_; if (fn == "") { // no more files return ""; } if (file_ != NULL) { int rc = hdfsCloseFile(fs_, file_); assert(rc == 0); // Notice that "file" will be deleted inside hdfsCloseFile file_ = NULL; } // read block nbytes = read_block(fn); } return boost::string_ref(data_, nbytes); }
static int doTestZeroCopyReads(hdfsFS fs, const char *fileName) { hdfsFile file = NULL; struct hadoopRzOptions *opts = NULL; struct hadoopRzBuffer *buffer = NULL; uint8_t *block; file = hdfsOpenFile(fs, fileName, O_RDONLY, 0, 0, 0); EXPECT_NONNULL(file); opts = hadoopRzOptionsAlloc(); EXPECT_NONNULL(opts); EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 1)); /* haven't read anything yet */ EXPECT_ZERO(expectFileStats(file, 0LL, 0LL, 0LL, 0LL)); block = getZeroCopyBlockData(0); EXPECT_NONNULL(block); /* first read is half of a block. */ buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2); EXPECT_NONNULL(buffer); EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2, hadoopRzBufferLength(buffer)); EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer), block, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2)); hadoopRzBufferFree(file, buffer); /* read the next half of the block */ buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2); EXPECT_NONNULL(buffer); EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2, hadoopRzBufferLength(buffer)); EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer), block + (TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2), TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2)); hadoopRzBufferFree(file, buffer); free(block); EXPECT_ZERO(expectFileStats(file, TEST_ZEROCOPY_FULL_BLOCK_SIZE, TEST_ZEROCOPY_FULL_BLOCK_SIZE, TEST_ZEROCOPY_FULL_BLOCK_SIZE, TEST_ZEROCOPY_FULL_BLOCK_SIZE)); /* Now let's read just a few bytes. */ buffer = hadoopReadZero(file, opts, SMALL_READ_LEN); EXPECT_NONNULL(buffer); EXPECT_INT_EQ(SMALL_READ_LEN, hadoopRzBufferLength(buffer)); block = getZeroCopyBlockData(1); EXPECT_NONNULL(block); EXPECT_ZERO(memcmp(block, hadoopRzBufferGet(buffer), SMALL_READ_LEN)); hadoopRzBufferFree(file, buffer); EXPECT_INT64_EQ( (int64_t)TEST_ZEROCOPY_FULL_BLOCK_SIZE + (int64_t)SMALL_READ_LEN, hdfsTell(fs, file)); EXPECT_ZERO(expectFileStats(file, TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN, TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN, TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN, TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN)); /* Clear 'skip checksums' and test that we can't do zero-copy reads any * more. Since there is no ByteBufferPool set, we should fail with * EPROTONOSUPPORT. */ EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0)); EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE)); EXPECT_INT_EQ(EPROTONOSUPPORT, errno); /* Verify that setting a NULL ByteBufferPool class works. */ EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts, NULL)); EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0)); EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE)); EXPECT_INT_EQ(EPROTONOSUPPORT, errno); /* Now set a ByteBufferPool and try again. It should succeed this time. */ EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts, ELASTIC_BYTE_BUFFER_POOL_CLASS)); buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE); EXPECT_NONNULL(buffer); EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE, hadoopRzBufferLength(buffer)); EXPECT_ZERO(expectFileStats(file, (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN, (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN, (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN, TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN)); EXPECT_ZERO(memcmp(block + SMALL_READ_LEN, hadoopRzBufferGet(buffer), TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN)); free(block); block = getZeroCopyBlockData(2); EXPECT_NONNULL(block); EXPECT_ZERO(memcmp(block, (uint8_t*)hadoopRzBufferGet(buffer) + (TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN), SMALL_READ_LEN)); hadoopRzBufferFree(file, buffer); /* Check the result of a zero-length read. */ buffer = hadoopReadZero(file, opts, 0); EXPECT_NONNULL(buffer); EXPECT_NONNULL(hadoopRzBufferGet(buffer)); EXPECT_INT_EQ(0, hadoopRzBufferLength(buffer)); hadoopRzBufferFree(file, buffer); /* Check the result of reading past EOF */ EXPECT_INT_EQ(0, hdfsSeek(fs, file, TEST_ZEROCOPY_FILE_LEN)); buffer = hadoopReadZero(file, opts, 1); EXPECT_NONNULL(buffer); EXPECT_NULL(hadoopRzBufferGet(buffer)); hadoopRzBufferFree(file, buffer); /* Cleanup */ free(block); hadoopRzOptionsFree(opts); EXPECT_ZERO(hdfsCloseFile(fs, file)); return 0; }
int uploadFile(const char *path){ hdfsFS fs = hdfsConnect("default", 0); hdfsFile fd_w; int fd; unsigned long size, i; struct stat fd_s; char *buf_r; const char *filename; /* Get the file size */ if ((fd = open(path, O_RDONLY)) < 0){ perror("Open file failed"); exit(1); } if (fstat(fd, &fd_s) < 0){ perror("Get file stat failed"); exit(1); } size = fd_s.st_size; /* Open the file at hdfs */ filename = strrchr(path, '/'); if (!filename){ filename = path; }else{ filename += 1; } fd_w = hdfsOpenFile(fs, filename, O_WRONLY|O_CREAT, 0, 0, 0); if (!fd_w){ perror("Failed to create file to upload"); exit(1); } /* Write the file */ for (i = 0; i + UPLOAD_BLOCK < size; i += UPLOAD_BLOCK){ buf_r = (char *)mmap(NULL,\ UPLOAD_BLOCK,\ PROT_READ,\ MAP_SHARED,\ fd, i); if (buf_r == MAP_FAILED){ perror("Failed to map memory"); exit(1); } hdfsWrite(fs, fd_w, (void*)buf_r, UPLOAD_BLOCK); if (hdfsFlush(fs, fd_w)){ perror("Failed to flush"); exit(1); } if ((munmap(buf_r, UPLOAD_BLOCK)) < 0){ perror("memory unmap error"); exit(1); } } if (size){ /* To avoid size == 0 */ buf_r = (char *)mmap(NULL,\ size - i,\ PROT_READ,\ MAP_SHARED,\ fd, i); if (buf_r == MAP_FAILED){ perror("Failed to map memory"); exit(1); } hdfsWrite(fs, fd_w, (void*)buf_r, size - i); if (hdfsFlush(fs, fd_w)){ perror("Failed to flush"); exit(1); } if ((munmap(buf_r, size - i)) < 0){ perror("memory unmap error"); exit(1); } } /* Upload end */ close(fd); hdfsCloseFile(fs, fd_w); hdfsDisconnect(fs); return 0; }
static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs) { char prefix[256], tmp[256]; hdfsFile file; int ret, expected; hdfsFileInfo *fileInfo; snprintf(prefix, sizeof(prefix), "/tlhData%04d", ti->threadIdx); if (hdfsExists(fs, prefix) == 0) { EXPECT_ZERO(hdfsDelete(fs, prefix, 1)); } EXPECT_ZERO(hdfsCreateDirectory(fs, prefix)); snprintf(tmp, sizeof(tmp), "%s/file", prefix); EXPECT_NONNULL(hdfsOpenFile(fs, tmp, O_RDONLY, 0, 0, 0)); file = hdfsOpenFile(fs, tmp, O_WRONLY, 0, 0, 0); EXPECT_NONNULL(file); /* TODO: implement writeFully and use it here */ expected = (int)strlen(prefix); ret = hdfsWrite(fs, file, prefix, expected); if (ret < 0) { ret = errno; fprintf(stderr, "hdfsWrite failed and set errno %d\n", ret); return ret; } if (ret != expected) { fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but " "it wrote %d\n", ret, expected); return EIO; } EXPECT_ZERO(hdfsFlush(fs, file)); EXPECT_ZERO(hdfsCloseFile(fs, file)); /* Let's re-open the file for reading */ file = hdfsOpenFile(fs, tmp, O_RDONLY, 0, 0, 0); EXPECT_NONNULL(file); /* TODO: implement readFully and use it here */ ret = hdfsRead(fs, file, tmp, sizeof(tmp)); if (ret < 0) { ret = errno; fprintf(stderr, "hdfsRead failed and set errno %d\n", ret); return ret; } if (ret != expected) { fprintf(stderr, "hdfsRead was supposed to read %d bytes, but " "it read %d\n", ret, expected); return EIO; } EXPECT_ZERO(memcmp(prefix, tmp, expected)); EXPECT_ZERO(hdfsCloseFile(fs, file)); snprintf(tmp, sizeof(tmp), "%s/file", prefix); EXPECT_NONZERO(hdfsChown(fs, tmp, NULL, NULL)); EXPECT_ZERO(hdfsChown(fs, tmp, NULL, "doop")); fileInfo = hdfsGetPathInfo(fs, tmp); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("doop", fileInfo->mGroup)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsChown(fs, tmp, "ha", "doop2")); fileInfo = hdfsGetPathInfo(fs, tmp); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("ha", fileInfo->mOwner)); EXPECT_ZERO(strcmp("doop2", fileInfo->mGroup)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsChown(fs, tmp, "ha2", NULL)); fileInfo = hdfsGetPathInfo(fs, tmp); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("ha2", fileInfo->mOwner)); EXPECT_ZERO(strcmp("doop2", fileInfo->mGroup)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsDelete(fs, prefix, 1)); return 0; }
/** * hdfsCopy - Copy file from one filesystem to another. * * @param srcFS The handle to source filesystem. * @param src The path of source file. * @param dstFS The handle to destination filesystem. * @param dst The path of destination file. * @return Returns 0 on success, -1 on error. */ int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst) { char *block = NULL, *src_abs, *dst_abs; hdfsFileInfo *srcinfo = NULL; int res = -1; hdfsFile a = NULL, b = NULL; tOffset twritten = 0; src_abs = _makeabs(srcFS, src); dst_abs = _makeabs(dstFS, dst); if (hdfsExists(srcFS, src_abs) == -1) { ERR(ENOENT, "'%s' doesn't exist on srcFS", src_abs); goto out; } srcinfo = hdfsGetPathInfo(srcFS, src_abs); if (!srcinfo) { ERR(errno, "hdfsGetPathInfo failed"); goto out; } if (srcinfo->mKind == kObjectKindDirectory) { ERR(ENOTSUP, "hdfsCopy can't do directories right now"); goto out; } a = hdfsOpenFile(srcFS, src_abs, O_RDONLY, 0, 0, 0); if (!a) { ERR(errno, "hdfsOpenFile failed"); goto out; } b = hdfsOpenFile(dstFS, dst_abs, O_WRONLY, 0, DEFAULT_REPLICATION, DEFAULT_BLOCK_SIZE); if (!b) { ERR(errno, "hdfsOpenFile failed"); goto out; } block = malloc(DEFAULT_BLOCK_SIZE); assert(block); while (twritten < srcinfo->mSize) { tSize toread, read, written; toread = _imin(DEFAULT_BLOCK_SIZE, srcinfo->mSize - twritten); read = hdfsRead(srcFS, a, block, toread); if (read == -1) { ERR(errno, "hdfsRead failed"); goto out; } written = hdfsWrite(dstFS, b, block, read); if (written == -1) { ERR(errno, "hdfsWrite failed"); goto out; } assert(written == read); twritten += written; } res = 0; out: if (a) hdfsCloseFile(srcFS, a); if (b) hdfsCloseFile(dstFS, b); if (src_abs != src) free(src_abs); if (dst_abs != dst) free(dst_abs); if (block) free(block); if (srcinfo) hdfsFreeFileInfo(srcinfo, 1); return res; }
int hdfs_file_close(struct back_storage *storage, bs_file_t file){ //HLOG_DEBUG("hdfs -- enter func %s", __func__); hdfsFile hfile = (hdfsFile)file; //HLOG_DEBUG("hdfs -- leave func %s", __func__); return hdfsCloseFile((hdfsFS)storage->fs_handler, hfile); }
static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs, const struct tlhPaths *paths) { char tmp[4096]; hdfsFile file; int ret, expected, numEntries; hdfsFileInfo *fileInfo; struct hdfsReadStatistics *readStats = NULL; if (hdfsExists(fs, paths->prefix) == 0) { EXPECT_ZERO(hdfsDelete(fs, paths->prefix, 1)); } EXPECT_ZERO(hdfsCreateDirectory(fs, paths->prefix)); EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->prefix)); /* There should be no entry in the directory. */ errno = EACCES; // see if errno is set to 0 on success EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, paths->prefix, &numEntries), 0); if (numEntries != 0) { fprintf(stderr, "hdfsListDirectory set numEntries to " "%d on empty directory.", numEntries); } /* There should not be any file to open for reading. */ EXPECT_NULL(hdfsOpenFile(fs, paths->file1, O_RDONLY, 0, 0, 0)); /* hdfsOpenFile should not accept mode = 3 */ EXPECT_NULL(hdfsOpenFile(fs, paths->file1, 3, 0, 0, 0)); file = hdfsOpenFile(fs, paths->file1, O_WRONLY, 0, 0, 0); EXPECT_NONNULL(file); /* TODO: implement writeFully and use it here */ expected = (int)strlen(paths->prefix); ret = hdfsWrite(fs, file, paths->prefix, expected); if (ret < 0) { ret = errno; fprintf(stderr, "hdfsWrite failed and set errno %d\n", ret); return ret; } if (ret != expected) { fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but " "it wrote %d\n", ret, expected); return EIO; } EXPECT_ZERO(hdfsFlush(fs, file)); EXPECT_ZERO(hdfsHSync(fs, file)); EXPECT_ZERO(hdfsCloseFile(fs, file)); /* There should be 1 entry in the directory. */ EXPECT_NONNULL(hdfsListDirectory(fs, paths->prefix, &numEntries)); if (numEntries != 1) { fprintf(stderr, "hdfsListDirectory set numEntries to " "%d on directory containing 1 file.", numEntries); } /* Let's re-open the file for reading */ file = hdfsOpenFile(fs, paths->file1, O_RDONLY, 0, 0, 0); EXPECT_NONNULL(file); EXPECT_ZERO(hdfsFileGetReadStatistics(file, &readStats)); errno = 0; EXPECT_UINT64_EQ(UINT64_C(0), readStats->totalBytesRead); EXPECT_UINT64_EQ(UINT64_C(0), readStats->totalLocalBytesRead); EXPECT_UINT64_EQ(UINT64_C(0), readStats->totalShortCircuitBytesRead); hdfsFileFreeReadStatistics(readStats); /* TODO: implement readFully and use it here */ ret = hdfsRead(fs, file, tmp, sizeof(tmp)); if (ret < 0) { ret = errno; fprintf(stderr, "hdfsRead failed and set errno %d\n", ret); return ret; } if (ret != expected) { fprintf(stderr, "hdfsRead was supposed to read %d bytes, but " "it read %d\n", ret, expected); return EIO; } EXPECT_ZERO(hdfsFileGetReadStatistics(file, &readStats)); errno = 0; EXPECT_UINT64_EQ((uint64_t)expected, readStats->totalBytesRead); hdfsFileFreeReadStatistics(readStats); EXPECT_ZERO(hdfsFileClearReadStatistics(file)); EXPECT_ZERO(hdfsFileGetReadStatistics(file, &readStats)); EXPECT_UINT64_EQ((uint64_t)0, readStats->totalBytesRead); hdfsFileFreeReadStatistics(readStats); EXPECT_ZERO(memcmp(paths->prefix, tmp, expected)); EXPECT_ZERO(hdfsCloseFile(fs, file)); // TODO: Non-recursive delete should fail? //EXPECT_NONZERO(hdfsDelete(fs, prefix, 0)); EXPECT_ZERO(hdfsCopy(fs, paths->file1, fs, paths->file2)); EXPECT_ZERO(hdfsChown(fs, paths->file2, NULL, NULL)); EXPECT_ZERO(hdfsChown(fs, paths->file2, NULL, "doop")); fileInfo = hdfsGetPathInfo(fs, paths->file2); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("doop", fileInfo->mGroup)); EXPECT_ZERO(hdfsFileIsEncrypted(fileInfo)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsChown(fs, paths->file2, "ha", "doop2")); fileInfo = hdfsGetPathInfo(fs, paths->file2); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("ha", fileInfo->mOwner)); EXPECT_ZERO(strcmp("doop2", fileInfo->mGroup)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsChown(fs, paths->file2, "ha2", NULL)); fileInfo = hdfsGetPathInfo(fs, paths->file2); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("ha2", fileInfo->mOwner)); EXPECT_ZERO(strcmp("doop2", fileInfo->mGroup)); hdfsFreeFileInfo(fileInfo, 1); snprintf(tmp, sizeof(tmp), "%s/nonexistent-file-name", paths->prefix); EXPECT_NEGATIVE_ONE_WITH_ERRNO(hdfsChown(fs, tmp, "ha3", NULL), ENOENT); return 0; }
int dfs_open(const char *path, struct fuse_file_info *fi) { hdfsFS fs = NULL; dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; dfs_fh *fh = NULL; int mutexInit = 0, ret; TRACE1("open", path) // check params and the context var assert(path); assert('/' == *path); assert(dfs); // 0x8000 is always passed in and hadoop doesn't like it, so killing it here // bugbug figure out what this flag is and report problem to Hadoop JIRA int flags = (fi->flags & 0x7FFF); // retrieve dfs specific data fh = (dfs_fh*)calloc(1, sizeof (dfs_fh)); if (!fh) { ERROR("Malloc of new file handle failed"); ret = -EIO; goto error; } ret = fuseConnectAsThreadUid(&fh->conn); if (ret) { fprintf(stderr, "fuseConnectAsThreadUid: failed to open a libhdfs " "connection! error %d.\n", ret); ret = -EIO; goto error; } fs = hdfsConnGetFs(fh->conn); if (flags & O_RDWR) { hdfsFileInfo *info = hdfsGetPathInfo(fs, path); if (info == NULL) { // File does not exist (maybe?); interpret it as a O_WRONLY // If the actual error was something else, we'll get it again when // we try to open the file. flags ^= O_RDWR; flags |= O_WRONLY; } else { // File exists; open this as read only. flags ^= O_RDWR; flags |= O_RDONLY; } } if ((fh->hdfsFH = hdfsOpenFile(fs, path, flags, 0, 0, 0)) == NULL) { ERROR("Could not open file %s (errno=%d)", path, errno); if (errno == 0 || errno == EINTERNAL) { ret = -EIO; goto error; } ret = -errno; goto error; } ret = pthread_mutex_init(&fh->mutex, NULL); if (ret) { fprintf(stderr, "dfs_open: error initializing mutex: error %d\n", ret); ret = -EIO; goto error; } mutexInit = 1; if (fi->flags & O_WRONLY || fi->flags & O_CREAT) { fh->buf = NULL; } else { assert(dfs->rdbuffer_size > 0); fh->buf = (char*)malloc(dfs->rdbuffer_size * sizeof(char)); if (NULL == fh->buf) { ERROR("Could not allocate memory for a read for file %s\n", path); ret = -EIO; goto error; } fh->buffersStartOffset = 0; fh->bufferSize = 0; } fi->fh = (uint64_t)fh; return 0; error: if (fh) { if (mutexInit) { pthread_mutex_destroy(&fh->mutex); } free(fh->buf); if (fh->hdfsFH) { hdfsCloseFile(fs, fh->hdfsFH); } if (fh->conn) { hdfsConnRelease(fh->conn); } free(fh); } return ret; }
int main(int argc, char **argv) { hdfsFS fs = hdfsConnect("default", 0); if(!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); exit(-1); } hdfsFS lfs = hdfsConnect(NULL, 0); if(!lfs) { fprintf(stderr, "Oops! Failed to connect to 'local' hdfs!\n"); exit(-1); } const char* writePath = "/tmp/testfile.txt"; { //Write tests hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } fprintf(stderr, "Opened %s for writing successfully...\n", writePath); char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); fprintf(stderr, "Wrote %d bytes\n", num_written_bytes); tOffset currentPos = -1; if ((currentPos = hdfsTell(fs, writeFile)) == -1) { fprintf(stderr, "Failed to get current file position correctly! Got %ld!\n", currentPos); exit(-1); } fprintf(stderr, "Current position: %ld\n", currentPos); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } fprintf(stderr, "Flushed %s successfully!\n", writePath); hdfsCloseFile(fs, writeFile); } { //Read tests const char* readPath = "/tmp/testfile.txt"; int exists = hdfsExists(fs, readPath); if (exists) { fprintf(stderr, "Failed to validate existence of %s\n", readPath); exit(-1); } hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for reading!\n", readPath); exit(-1); } fprintf(stderr, "hdfsAvailable: %d\n", hdfsAvailable(fs, readFile)); tOffset seekPos = 1; if(hdfsSeek(fs, readFile, seekPos)) { fprintf(stderr, "Failed to seek %s for reading!\n", readPath); exit(-1); } tOffset currentPos = -1; if((currentPos = hdfsTell(fs, readFile)) != seekPos) { fprintf(stderr, "Failed to get current file position correctly! Got %ld!\n", currentPos); exit(-1); } fprintf(stderr, "Current position: %ld\n", currentPos); static char buffer[32]; tSize num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, sizeof(buffer)); fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); num_read_bytes = hdfsPread(fs, readFile, 0, (void*)buffer, sizeof(buffer)); fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); hdfsCloseFile(fs, readFile); } int totalResult = 0; int result = 0; { //Generic file-system operations const char* srcPath = "/tmp/testfile.txt"; const char* dstPath = "/tmp/testfile2.txt"; fprintf(stderr, "hdfsCopy(remote-local): %s\n", ((result = hdfsCopy(fs, srcPath, lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsCopy(remote-remote): %s\n", ((result = hdfsCopy(fs, srcPath, fs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsMove(local-local): %s\n", ((result = hdfsMove(lfs, srcPath, lfs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsMove(remote-local): %s\n", ((result = hdfsMove(fs, srcPath, lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsRename: %s\n", ((result = hdfsRename(fs, dstPath, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsCopy(remote-remote): %s\n", ((result = hdfsCopy(fs, srcPath, fs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; const char* slashTmp = "/tmp"; const char* newDirectory = "/tmp/newdir"; fprintf(stderr, "hdfsCreateDirectory: %s\n", ((result = hdfsCreateDirectory(fs, newDirectory)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsSetReplication: %s\n", ((result = hdfsSetReplication(fs, srcPath, 2)) ? "Failed!" : "Success!")); totalResult += result; char buffer[256]; const char *resp; fprintf(stderr, "hdfsGetWorkingDirectory: %s\n", ((resp = hdfsGetWorkingDirectory(fs, buffer, sizeof(buffer))) ? buffer : "Failed!")); totalResult += (resp ? 0 : 1); fprintf(stderr, "hdfsSetWorkingDirectory: %s\n", ((result = hdfsSetWorkingDirectory(fs, slashTmp)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsGetWorkingDirectory: %s\n", ((resp = hdfsGetWorkingDirectory(fs, buffer, sizeof(buffer))) ? buffer : "Failed!")); totalResult += (resp ? 0 : 1); fprintf(stderr, "hdfsGetDefaultBlockSize: %ld\n", hdfsGetDefaultBlockSize(fs)); fprintf(stderr, "hdfsGetCapacity: %ld\n", hdfsGetCapacity(fs)); fprintf(stderr, "hdfsGetUsed: %ld\n", hdfsGetUsed(fs)); hdfsFileInfo *fileInfo = NULL; if((fileInfo = hdfsGetPathInfo(fs, slashTmp)) != NULL) { fprintf(stderr, "hdfsGetPathInfo - SUCCESS!\n"); fprintf(stderr, "Name: %s, ", fileInfo->mName); fprintf(stderr, "Type: %c, ", (char)(fileInfo->mKind)); fprintf(stderr, "Replication: %d, ", fileInfo->mReplication); fprintf(stderr, "BlockSize: %ld, ", fileInfo->mBlockSize); fprintf(stderr, "Size: %ld, ", fileInfo->mSize); fprintf(stderr, "LastMod: %s", ctime(&fileInfo->mLastMod)); fprintf(stderr, "Owner: %s, ", fileInfo->mOwner); fprintf(stderr, "Group: %s, ", fileInfo->mGroup); char permissions[10]; permission_disp(fileInfo->mPermissions, permissions); fprintf(stderr, "Permissions: %d (%s)\n", fileInfo->mPermissions, permissions); hdfsFreeFileInfo(fileInfo, 1); } else { totalResult++; fprintf(stderr, "waah! hdfsGetPathInfo for %s - FAILED!\n", slashTmp); } hdfsFileInfo *fileList = 0; int numEntries = 0; if((fileList = hdfsListDirectory(fs, slashTmp, &numEntries)) != NULL) { int i = 0; for(i=0; i < numEntries; ++i) { fprintf(stderr, "Name: %s, ", fileList[i].mName); fprintf(stderr, "Type: %c, ", (char)fileList[i].mKind); fprintf(stderr, "Replication: %d, ", fileList[i].mReplication); fprintf(stderr, "BlockSize: %ld, ", fileList[i].mBlockSize); fprintf(stderr, "Size: %ld, ", fileList[i].mSize); fprintf(stderr, "LastMod: %s", ctime(&fileList[i].mLastMod)); fprintf(stderr, "Owner: %s, ", fileList[i].mOwner); fprintf(stderr, "Group: %s, ", fileList[i].mGroup); char permissions[10]; permission_disp(fileList[i].mPermissions, permissions); fprintf(stderr, "Permissions: %d (%s)\n", fileList[i].mPermissions, permissions); } hdfsFreeFileInfo(fileList, numEntries); } else { if (errno) { totalResult++; fprintf(stderr, "waah! hdfsListDirectory - FAILED!\n"); } else { fprintf(stderr, "Empty directory!\n"); } } char*** hosts = hdfsGetHosts(fs, srcPath, 0, 1); if(hosts) { fprintf(stderr, "hdfsGetHosts - SUCCESS! ... \n"); int i=0; while(hosts[i]) { int j = 0; while(hosts[i][j]) { fprintf(stderr, "\thosts[%d][%d] - %s\n", i, j, hosts[i][j]); ++j; } ++i; } } else { totalResult++; fprintf(stderr, "waah! hdfsGetHosts - FAILED!\n"); } char *newOwner = "root"; // setting tmp dir to 777 so later when connectAsUser nobody, we can write to it short newPerm = 0666; // chown write fprintf(stderr, "hdfsChown: %s\n", ((result = hdfsChown(fs, writePath, NULL, "users")) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsChown: %s\n", ((result = hdfsChown(fs, writePath, newOwner, NULL)) ? "Failed!" : "Success!")); totalResult += result; // chmod write fprintf(stderr, "hdfsChmod: %s\n", ((result = hdfsChmod(fs, writePath, newPerm)) ? "Failed!" : "Success!")); totalResult += result; sleep(2); tTime newMtime = time(NULL); tTime newAtime = time(NULL); // utime write fprintf(stderr, "hdfsUtime: %s\n", ((result = hdfsUtime(fs, writePath, newMtime, newAtime)) ? "Failed!" : "Success!")); totalResult += result; // chown/chmod/utime read hdfsFileInfo *finfo = hdfsGetPathInfo(fs, writePath); fprintf(stderr, "hdfsChown read: %s\n", ((result = (strcmp(finfo->mOwner, newOwner) != 0)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsChmod read: %s\n", ((result = (finfo->mPermissions != newPerm)) ? "Failed!" : "Success!")); totalResult += result; // will later use /tmp/ as a different user so enable it fprintf(stderr, "hdfsChmod: %s\n", ((result = hdfsChmod(fs, "/tmp/", 0777)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr,"newMTime=%ld\n",newMtime); fprintf(stderr,"curMTime=%ld\n",finfo->mLastMod); fprintf(stderr, "hdfsUtime read (mtime): %s\n", ((result = (finfo->mLastMod != newMtime)) ? "Failed!" : "Success!")); totalResult += result; // No easy way to turn on access times from hdfs_test right now // fprintf(stderr, "hdfsUtime read (atime): %s\n", ((result = (finfo->mLastAccess != newAtime)) ? "Failed!" : "Success!")); // totalResult += result; hdfsFreeFileInfo(finfo, 1); // Clean up fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(fs, newDirectory)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(fs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(lfs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsExists: %s\n", ((result = hdfsExists(fs, newDirectory)) ? "Success!" : "Failed!")); totalResult += (result ? 0 : 1); } totalResult += (hdfsDisconnect(fs) != 0); { // // Now test as connecting as a specific user // This is only meant to test that we connected as that user, not to test // the actual fs user capabilities. Thus just create a file and read // the owner is correct. const char *tuser = "******"; const char* writePath = "/tmp/usertestfile.txt"; const char **groups = (const char**)malloc(sizeof(char*)* 2); groups[0] = "users"; groups[1] = "nobody"; fs = hdfsConnectAsUser("default", 0, tuser, groups, 2); if(!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs as user %s!\n",tuser); exit(-1); } hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } fprintf(stderr, "Opened %s for writing successfully...\n", writePath); char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); fprintf(stderr, "Wrote %d bytes\n", num_written_bytes); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } fprintf(stderr, "Flushed %s successfully!\n", writePath); hdfsCloseFile(fs, writeFile); hdfsFileInfo *finfo = hdfsGetPathInfo(fs, writePath); fprintf(stderr, "hdfs new file user is correct: %s\n", ((result = (strcmp(finfo->mOwner, tuser) != 0)) ? "Failed!" : "Success!")); totalResult += result; } totalResult += (hdfsDisconnect(fs) != 0); if (totalResult != 0) { return -1; } else { return 0; } }
NABoolean HHDFSFileStats::populate(hdfsFS fs, hdfsFileInfo *fileInfo, Int32& samples, NABoolean doEstimation, char recordTerminator, NABoolean isSequenceFile) { NABoolean result = TRUE; // copy fields from fileInfo fileName_ = fileInfo->mName; replication_ = (Int32) fileInfo->mReplication; totalSize_ = (Int64) fileInfo->mSize; blockSize_ = (Int64) fileInfo->mBlockSize; modificationTS_ = fileInfo->mLastMod; numFiles_ = 1; isSequenceFile_ = isSequenceFile; Int64 sampleBufferSize = MINOF(blockSize_, 65536); NABoolean sortHosts = (CmpCommon::getDefault(HIVE_SORT_HDFS_HOSTS) == DF_ON); sampleBufferSize = MINOF(sampleBufferSize,totalSize_/10); if (doEstimation && sampleBufferSize > 100) { // // Open the hdfs file to estimate record length. Read one block at // a time searching for <s> instances of record separators. Stop reading // when either <s> instances have been found or a partial number of // instances have and we have exhausted all data content in the block. // We will keep reading if the current block does not contain // any instance of the record separator. // hdfsFile file = hdfsOpenFile(fs, fileInfo->mName, O_RDONLY, sampleBufferSize, // buffer size 0, // replication, take the default size fileInfo->mBlockSize // blocksize ); if ( file != NULL ) { tOffset offset = 0; tSize bufLen = sampleBufferSize; char* buffer = new (heap_) char[bufLen+1]; buffer[bufLen] = 0; // extra null at the end to protect strchr() // to run over the buffer. NABoolean sampleDone = FALSE; Int32 totalSamples = 10; Int32 totalLen = 0; while (!sampleDone) { tSize szRead = hdfsPread(fs, file, offset, buffer, bufLen); char* pos = NULL; if ( szRead > 0 ) { //if (isSequenceFile && offset==0 && memcmp(buffer, "SEQ6", 4) == 0) // isSequenceFile_ = TRUE; char* start = buffer; for (Int32 i=0; i<totalSamples; i++ ) { if ( (pos=strchr(start, recordTerminator)) ) { totalLen += pos - start + 1 + offset; samples++; start = pos+1; if ( start > buffer + bufLen ) { sampleDone = TRUE; break; } } else break; } if ( samples > 0 ) break; else offset += bufLen; } else break; // fail to read any bytes. Bail out. } NADELETEBASIC(buffer, heap_); if ( samples > 0 ) { sampledBytes_ += totalLen; sampledRows_ += samples; } hdfsCloseFile(fs, file); } else { // can not do hdfs open on the file. Assume the file is empty. } } if (blockSize_) { numBlocks_ = totalSize_ / blockSize_; if (totalSize_ % blockSize_ > 0) numBlocks_++; // partial block at the end } else { CMPASSERT(blockSize_); // TBD:DIAGS result = FALSE; } if ( totalSize_ > 0 ) { blockHosts_ = new(heap_) HostId[replication_*numBlocks_]; // walk through blocks and record their locations tOffset o = 0; Int64 blockNum; for (blockNum=0; blockNum < numBlocks_ && result; blockNum++) { char*** blockHostNames = hdfsGetHosts(fs, fileInfo->mName, o, fileInfo->mBlockSize); o += blockSize_; if (blockHostNames == NULL) { CMPASSERT(blockHostNames); // TBD:DIAGS result = FALSE; } else { char **h = *blockHostNames; HostId hostId; for (Int32 r=0; r<replication_; r++) { if (h[r]) hostId = HHDFSMasterHostList::getHostNum(h[r]); else hostId = HHDFSMasterHostList::InvalidHostId; blockHosts_[r*numBlocks_+blockNum] = hostId; } if (sortHosts) sortHostArray(blockHosts_, (Int32) numBlocks_, replication_, getFileName()); } hdfsFreeHosts(blockHostNames); } } return result; }