bool MaprInputCodedBlockFile::Open(std::string uri) { CHECK(!is_open_) << "File already open."; if (!IsValidUri(uri, "maprfs")) { LOG(ERROR) << "failed to validate uri: " << uri; return false; } std::string scheme, path; CHECK(ParseUri(uri, &scheme, &path)) << "Invalid uri format: " << uri; file_ = hdfsOpenFile(fs_, path.c_str(), O_RDONLY, 0, 0, 0); if (file_ == NULL) { LOG(ERROR) << "Failed to open file: " << path; return false; } is_open_ = true; path_ = path; // Cache file size hdfsFileInfo* info = hdfsGetPathInfo(fs_, path_.c_str()); CHECK(info); size_ = info->mSize; hdfsFreeFileInfo(info, 1); return true; }
block_id_counter FileManagerHdfs::getMaxUsedBlockCounter(const block_id_domain block_domain) const { int num_files = 0; hdfsFileInfo *file_infos = hdfsListDirectory(hdfs_, storage_path_.c_str(), &num_files); if (file_infos == nullptr) { if (errno != ENOENT) { LOG_WARNING("Failed to list file info with error: " << strerror(errno)); } return 0; } string filename_pattern("/qsblk_"); filename_pattern.append(ToZeroPaddedString(block_domain, kBlockIdDomainLengthInDigits)); filename_pattern.append("_%"); filename_pattern.append(SCNu64); filename_pattern.append(".qsb"); block_id_counter counter_max = 0, counter; for (int i = 0; i < num_files; ++i) { // NOTE(zuyu): mName looks like // "/user/<username>/<storage_path_>/qsblk_<block_domain>_[0-9]*.qsb". const char *filename = std::strrchr(file_infos[i].mName, '/'); if (filename != nullptr && sscanf(filename, filename_pattern.c_str(), &counter) == 1 && counter > counter_max) { counter_max = counter; } } hdfsFreeFileInfo(file_infos, num_files); return counter_max; }
off64_t HdfsFile::size() { off64_t ret = 0; int savedErrno; if( ( m_flags & O_RDONLY ) != 0 ) { hdfsFileInfo* fileinfo; fileinfo = hdfsGetPathInfo(m_fs,m_fname.c_str()); ret = (fileinfo ? fileinfo->mSize : -1); if( fileinfo ) hdfsFreeFileInfo(fileinfo,1); } else { // if file is open for either WRITE or APPEND then we know that // size is always the current file offset since HDFS can only // write at the end ret = tell(); } savedErrno = errno; if( IDBLogger::isEnabled() ) IDBLogger::logSize(m_fname, this, ret); errno = savedErrno; return ret; }
bool MaprFileSystem::ListDirectory(const std::string& uri, std::vector<std::string>* contents){ CHECK(contents); contents->clear(); std::string path = GetUriPathOrDie(uri); std::string host = "default"; hdfsFS fs = hdfsConnect(host.c_str(), 0); // use default config file settings int num_entries; hdfsFileInfo* entries = hdfsListDirectory(fs, path.c_str(), &num_entries); hdfsFileInfo* cur_entry = entries; for (int i=0; i < num_entries; ++i) { // Sometimes the list directory command returns paths with the scheme and sometimes it doesn't // Strange. // Anyway, we need to consistently output uris with a proper scheme prefix. std::string cur_scheme, cur_path, error; if (ParseUri(cur_entry->mName, &cur_scheme, &cur_path, &error)){ CHECK_EQ(cur_scheme, "maprfs"); // if it has a scheme prefix, make sure it is maprfs as expected } else{ // this doesn't have a uri scheme prefix, so assume it is just the path portion cur_path = cur_entry->mName; } contents->push_back(Uri("maprfs", cur_path)); cur_entry++; } hdfsFreeFileInfo(entries, num_entries); CHECK_EQ(hdfsDisconnect(fs), 0); return true; }
bs_file_info_t *hdfs_list_dir(struct back_storage *storage, \ const char *dir_path, uint32_t *num_entries){ HLOG_DEBUG("hdfs -- enter func %s", __func__); char full_path[256]; build_hdfs_path(full_path, storage->dir, storage->fs_name, dir_path); int num; hdfsFileInfo *hinfos = \ hdfsListDirectory((hdfsFS)storage->fs_handler, full_path, &num); if (NULL == hinfos) { //HLOG_ERROR("hdfsListDirectory error"); return NULL; } hdfsFileInfo *hinfo = hinfos; bs_file_info_t *infos = \ (bs_file_info_t*)g_malloc0(sizeof(bs_file_info_t)*8192); if (NULL == infos) { //HLOG_ERROR("Allocate Error!"); return NULL; } bs_file_info_t *info = infos; int i; for (i = 0;i < num;i++) { strcpy((char *)info->name, \ (const char *)g_path_get_basename(hinfo->mName)); info->is_dir = 0; info->size = hinfo->mSize; info->lmtime = hinfo->mLastMod; info++; hinfo++; } hdfsFreeFileInfo(hinfos, num); *num_entries = num; HLOG_DEBUG("hdfs -- leave func %s", __func__); return infos; }
HDFSChunkReaderIterator::HDFSChunkReaderIterator(const ChunkID& chunk_id, unsigned& chunk_size,const unsigned& block_size) :ChunkReaderIterator(chunk_id,block_size,chunk_size){ block_buffer_=new Block(block_size_); fs_=hdfsConnect(Config::hdfs_master_ip.c_str(),Config::hdfs_master_port); hdfs_fd_=hdfsOpenFile(fs_,chunk_id.partition_id.getName().c_str(),O_RDONLY,0,0,0); if(!hdfs_fd_){ printf("fails to open HDFS file [%s]\n",chunk_id.partition_id.getName().c_str()); number_of_blocks_=0; } const unsigned start_pos=start_pos+CHUNK_SIZE*chunk_id_.chunk_off; if(hdfsSeek(fs_,hdfs_fd_,start_pos)==-1){ printf("fails to set the start offset %d for [%s]\n",start_pos,chunk_id.partition_id.getName().c_str()); number_of_blocks_=0; } hdfsFileInfo *file_info=hdfsGetPathInfo(fs_,"/imdb/");// to be refined after communicating with Zhang Lei if(start_pos+CHUNK_SIZE<file_info->mSize){ number_of_blocks_=CHUNK_SIZE/block_size_; } else{ number_of_blocks_=(file_info->mSize-start_pos)/block_size_; } hdfsFreeFileInfo(file_info,1); }
int HdfsFileSystem::listDirectory(const char* pathname, std::list<std::string>& contents) const { // clear the return list contents.erase( contents.begin(), contents.end() ); int numEntries; hdfsFileInfo* fileinfo; if( !exists( pathname ) ) { errno = ENOENT; return -1; } // hdfs not happy if you call list directory on a path that does not exist fileinfo = hdfsListDirectory(m_fs,pathname, &numEntries); for( int i = 0; i < numEntries && fileinfo; ++i ) { // hdfs returns a fully specified path name but we want to // only return paths relative to the directory passed in. boost::filesystem::path filepath( fileinfo[i].mName ); contents.push_back( filepath.filename().c_str() ); } if( fileinfo ) hdfsFreeFileInfo(fileinfo, numEntries); return 0; }
void dir_cpi_impl::sync_is_dir (bool & is_dir, saga::url url) { instance_data idata (this); is_dir = false; saga::url dir_url(idata->location_); boost::filesystem::path name (url.get_path(), boost::filesystem::native); boost::filesystem::path path (idata->location_.get_path(), boost::filesystem::native); if ( ! name.has_root_path () ) path /= name; else path = name; if(hdfsExists(fs_, path.string().c_str()) == 0) { //Check to see if it is a directory hdfsFileInfo *info; instance_data idata(this); info = hdfsGetPathInfo(fs_, path.string().c_str()); if(info == NULL) { SAGA_ADAPTOR_THROW("file_cpi_impl::init failed", saga::NoSuccess); } if(info->mKind == kObjectKindDirectory) is_dir = true; hdfsFreeFileInfo(info, 1); } }
int dfs_getattr(const char *path, struct stat *st) { TRACE1("getattr", path) // retrieve dfs specific data dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; // check params and the context var assert(dfs); assert(path); assert(st); // if not connected, try to connect and fail out if we can't. if (NULL == dfs->fs && NULL == (dfs->fs = hdfsConnect(dfs->nn_hostname,dfs->nn_port))) { syslog(LOG_ERR, "ERROR: could not connect to %s:%d %s:%d\n", dfs->nn_hostname, dfs->nn_port,__FILE__, __LINE__); return -EIO; } // call the dfs API to get the actual information hdfsFileInfo *info = hdfsGetPathInfo(dfs->fs,path); if (NULL == info) { return -ENOENT; } fill_stat_structure(&info[0], st); // setup hard link info - for a file it is 1 else num entries in a dir + 2 (for . and ..) if (info[0].mKind == kObjectKindDirectory) { int numEntries = 0; hdfsFileInfo *info = hdfsListDirectory(dfs->fs,path,&numEntries); if (info) { hdfsFreeFileInfo(info,numEntries); } st->st_nlink = numEntries + 2; } else { // not a directory st->st_nlink = 1; } // free the info pointer hdfsFreeFileInfo(info,1); return 0; }
bool HdfsFileSystem::isDir(const char* pathname) const { hdfsFileInfo* fileinfo; fileinfo = hdfsGetPathInfo(m_fs,pathname); bool retval = (fileinfo ? fileinfo->mKind == kObjectKindDirectory : false); if( fileinfo ) hdfsFreeFileInfo(fileinfo,1); return retval; }
static struct libhdfs_data *libhdfs_data_create(const struct options *opts) { struct libhdfs_data *ldata = NULL; struct hdfsBuilder *builder = NULL; hdfsFileInfo *pinfo = NULL; ldata = calloc(1, sizeof(struct libhdfs_data)); if (!ldata) { fprintf(stderr, "Failed to allocate libhdfs test data.\n"); goto error; } builder = hdfsNewBuilder(); if (!builder) { fprintf(stderr, "Failed to create builder.\n"); goto error; } hdfsBuilderSetNameNode(builder, opts->rpc_address); hdfsBuilderConfSetStr(builder, "dfs.client.read.shortcircuit.skip.checksum", "true"); ldata->fs = hdfsBuilderConnect(builder); if (!ldata->fs) { fprintf(stderr, "Could not connect to default namenode!\n"); goto error; } pinfo = hdfsGetPathInfo(ldata->fs, opts->path); if (!pinfo) { int err = errno; fprintf(stderr, "hdfsGetPathInfo(%s) failed: error %d (%s). " "Attempting to re-create file.\n", opts->path, err, strerror(err)); if (libhdfs_data_create_file(ldata, opts)) goto error; } else if (pinfo->mSize != opts->length) { fprintf(stderr, "hdfsGetPathInfo(%s) failed: length was %lld, " "but we want length %lld. Attempting to re-create file.\n", opts->path, (long long)pinfo->mSize, (long long)opts->length); if (libhdfs_data_create_file(ldata, opts)) goto error; } ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_RDONLY, 0, 0, 0); if (!ldata->file) { int err = errno; fprintf(stderr, "hdfsOpenFile(%s) failed: error %d (%s)\n", opts->path, err, strerror(err)); goto error; } ldata->length = opts->length; return ldata; error: if (pinfo) hdfsFreeFileInfo(pinfo, 1); if (ldata) libhdfs_data_free(ldata); return NULL; }
void Hdfs3Glob(const std::string& _path, const GlobType& gtype, FileList& filelist) { std::string path = _path; // crop off hdfs:// die_unless(common::StartsWith(path, "hdfs://")); path = path.substr(7); // split uri into host/path std::vector<std::string> splitted = common::Split(path, '/', 2); hdfsFS fs = Hdfs3FindConnection(splitted[0]); std::string hosturi = "hdfs://" + splitted[0]; // prepend root / splitted[1] = "/" + splitted[1]; // list directory int num_entries = 0; hdfsFileInfo* list = hdfsListDirectory( fs, splitted[1].c_str(), &num_entries); if (!list) return; for (int i = 0; i < num_entries; ++i) { FileInfo fi; fi.path = list[i].mName; // remove leading slashes while (fi.path.size() >= 2 && fi.path[0] == '/' && fi.path[1] == '/') fi.path.erase(fi.path.begin(), fi.path.begin() + 1); // prepend host uri fi.path = hosturi + fi.path; if (list[i].mKind == kObjectKindFile) { if (gtype == GlobType::All || gtype == GlobType::File) { // strangely full file name globs return the file with a / at // the end. while (fi.path.back() == '/') fi.path.resize(fi.path.size() - 1); fi.type = Type::File; fi.size = list[i].mSize; filelist.emplace_back(fi); } } else if (list[i].mKind == kObjectKindDirectory) { if (gtype == GlobType::All || gtype == GlobType::Directory) { fi.type = Type::Directory; fi.size = list[i].mSize; filelist.emplace_back(fi); } } } hdfsFreeFileInfo(list, num_entries); }
int hdfsExists(hdfsFS fs, const char *path) { hdfsFileInfo *fileInfo = hdfsGetPathInfoImpl(fs, path, 0); if (!fileInfo) { // (errno will have been set by hdfsGetPathInfo) return -1; } hdfsFreeFileInfo(fileInfo, 1); return 0; }
NABoolean HHDFSListPartitionStats::populate(hdfsFS fs, const NAString &dir, Int32 numOfBuckets, NABoolean doEstimation, char recordTerminator, NABoolean isSequenceFile) { NABoolean result = TRUE; int numFiles = 0; // remember parameters partitionDir_ = dir; defaultBucketIdx_ = (numOfBuckets >= 1) ? numOfBuckets : 0; doEstimation_ = doEstimation; recordTerminator_ = recordTerminator; isSequenceFile_ = isSequenceFile; // list all the files in this directory, they all belong // to this partition and either belong to a specific bucket // or to the default bucket hdfsFileInfo *fileInfos = hdfsListDirectory(fs, dir.data(), &numFiles); // populate partition stats for (int f=0; f<numFiles && result; f++) if (fileInfos[f].mKind == kObjectKindFile) { // the default (unbucketed) bucket number is // defaultBucketIdx_ Int32 bucketNum = determineBucketNum(fileInfos[f].mName); HHDFSBucketStats *bucketStats = NULL; if (! bucketStatsList_.used(bucketNum)) { bucketStats = new(heap_) HHDFSBucketStats(heap_); bucketStatsList_.insertAt(bucketNum, bucketStats); } else bucketStats = bucketStatsList_[bucketNum]; if (! bucketStats->addFile(fs, &fileInfos[f], doEstimation, recordTerminator, isSequenceFile)) result = FALSE; } hdfsFreeFileInfo(fileInfos, numFiles); // aggregate statistics over all buckets for (Int32 b=0; b<=defaultBucketIdx_; b++) if (bucketStatsList_.used(b)) add(bucketStatsList_[b]); return result; }
inline std::vector<std::string> list_files(const std::string& path) { int num_files = 0; hdfsFileInfo* hdfs_file_list_ptr = hdfsListDirectory(filesystem, path.c_str(), &num_files); // copy the file list to the string array std::vector<std::string> files(num_files); for(int i = 0; i < num_files; ++i) files[i] = std::string(hdfs_file_list_ptr[i].mName); // free the file list pointer hdfsFreeFileInfo(hdfs_file_list_ptr, num_files); return files; } // end of list_files
/** * call-seq: * hdfs.stat(path) -> file_info * * Stats the file or directory at the supplied path, returning a * Hadoop::DFS::FileInfo object corresponding to it. If this fails, raises a * DFSException. */ VALUE HDFS_File_System_stat(VALUE self, VALUE path) { FSData* data = get_FSData(self); hdfsFileInfo* info = hdfsGetPathInfo(data->fs, StringValuePtr(path)); if (info == NULL) { rb_raise(e_dfs_exception, "Failed to stat file %s: %s", StringValuePtr(path), get_error(errno)); return Qnil; } VALUE file_info = new_HDFS_File_Info(info); hdfsFreeFileInfo(info, 1); return file_info; }
time_t HdfsFile::mtime() { boost::mutex::scoped_lock lock(m_mutex); time_t ret = 0; hdfsFileInfo* fileinfo; fileinfo = hdfsGetPathInfo(m_fs,m_fname.c_str()); ret = (fileinfo ? fileinfo->mLastMod : -1); if( fileinfo ) hdfsFreeFileInfo(fileinfo,1); return ret; }
off64_t HdfsFileSystem::size(const char* path) const { hdfsFileInfo* fileinfo; fileinfo = hdfsGetPathInfo(m_fs,path); off64_t retval = (fileinfo ? fileinfo->mSize : -1); if( fileinfo ) hdfsFreeFileInfo(fileinfo,1); if( IDBLogger::isEnabled() ) IDBLogger::logFSop( HDFS, "fs:size", path, this, retval); return retval; }
int dfs_rmdir(const char *path) { struct hdfsConn *conn = NULL; hdfsFS fs; int ret; dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; int numEntries = 0; hdfsFileInfo *info = NULL; TRACE1("rmdir", path) assert(path); assert(dfs); assert('/' == *path); if (is_protected(path)) { ERROR("Trying to delete protected directory %s", path); ret = -EACCES; goto cleanup; } ret = fuseConnectAsThreadUid(&conn); if (ret) { fprintf(stderr, "fuseConnectAsThreadUid: failed to open a libhdfs " "connection! error %d.\n", ret); ret = -EIO; goto cleanup; } fs = hdfsConnGetFs(conn); info = hdfsListDirectory(fs, path, &numEntries); if (numEntries) { ret = -ENOTEMPTY; goto cleanup; } if (hdfsDeleteWithTrash(fs, path, dfs->usetrash)) { ERROR("Error trying to delete directory %s", path); ret = -EIO; goto cleanup; } ret = 0; cleanup: if (info) { hdfsFreeFileInfo(info, numEntries); } if (conn) { hdfsConnRelease(conn); } return ret; }
void dir_cpi_impl::sync_remove (saga::impl::void_t & ret, saga::url url, int flags) { instance_data idata (this); saga::url dir_url(idata->location_); boost::filesystem::path src_location (idata->location_.get_path(), boost::filesystem::native); // complete paths boost::filesystem::path src_path (url.get_path(), boost::filesystem::native); if ( ! src_path.has_root_path () ) src_location /= src_path; else src_location = src_path; bool is_src_dir = false; if(hdfsExists(fs_, src_location.string().c_str()) != 0) { SAGA_ADAPTOR_THROW("directory::remove: Can't remove directory: " "Does not exist", saga::DoesNotExist); } else { hdfsFileInfo *info; info = hdfsGetPathInfo(fs_, src_location.string().c_str()); if(info == NULL) { SAGA_ADAPTOR_THROW("file_cpi_impl::init failed", saga::NoSuccess); } if(info->mKind == kObjectKindDirectory) is_src_dir = true; else is_src_dir = false; hdfsFreeFileInfo(info, 1); } if (is_src_dir) { if (saga::name_space::Recursive != flags) { SAGA_ADAPTOR_THROW("directory::remove: Can't remove directory. " "Please use recursive mode!", saga::BadParameter); } else { saga_hdfs_delete(fs_, src_location.string().c_str()); } } else { saga_hdfs_delete(fs_, src_location.string().c_str()); } }
bool MaprFileSystem::IsDirectory(const std::string& uri){ std::string path = GetUriPathOrDie(uri); std::string host = "default"; hdfsFS fs = hdfsConnect(host.c_str(), 0); // use default config file settings CHECK(fs); hdfsFileInfo* info = hdfsGetPathInfo(fs, path.c_str()); bool is_directory = false; if (info){ is_directory = (info->mKind == kObjectKindDirectory); hdfsFreeFileInfo(info,1); } else{ LOG(FATAL) << "uri does not exist: " << uri; } CHECK_EQ(hdfsDisconnect(fs), 0); return is_directory; }
int libhdfsconnector::streamInFile(const char * rfile, int bufferSize) { if (!fs) { fprintf(stderr, "Could not connect to hdfs on"); return RETURN_FAILURE; } unsigned long fileTotalSize = 0; hdfsFileInfo *fileInfo = NULL; if ((fileInfo = hdfsGetPathInfo(fs, rfile)) != NULL) { fileTotalSize = fileInfo->mSize; hdfsFreeFileInfo(fileInfo, 1); } else { fprintf(stderr, "Error: hdfsGetPathInfo for %s - FAILED!\n", rfile); return RETURN_FAILURE; } hdfsFile readFile = hdfsOpenFile(fs, rfile, O_RDONLY, bufferSize, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for writing!\n", rfile); return RETURN_FAILURE; } unsigned char buff[bufferSize + 1]; buff[bufferSize] = '\0'; for (unsigned long bytes_read = 0; bytes_read < fileTotalSize;) { unsigned long read_length = hdfsRead(fs, readFile, buff, bufferSize); bytes_read += read_length; for (unsigned long i = 0; i < read_length; i++) fprintf(stdout, "%c", buff[i]); } hdfsCloseFile(fs, readFile); return 0; }
int dfsList(const char* path){ hdfsFS fs = hdfsConnect("default", 0); int i, entries; hdfsFileInfo *files, *head; /* Get the list info */ files = hdfsListDirectory(fs, path, &entries); if (!files){ perror("Get directory info"); exit(-1); } head = files; /* Print the info */ fprintf(stdout, "%s %-50s %-9s %s\n", "Kind", "Name", "Size", "Replicas"); for (i = 0; i < entries; i++){ const char* unit[] = {" B", "KB", "MB", "GB", "TB", "PB"}; double size = files->mSize; unsigned int u = 0; while (size > 1024){ u++; size /= 1024; } assert(u < 6); fprintf(stdout, "%4c %-50s %-7.2lf%s %8d\n", files->mKind, files->mName, size, unit[u], files->mReplication); files += 1; } /* List ends */ hdfsFreeFileInfo(head, entries); hdfsDisconnect(fs); return 0; }
/** * call-seq: * hdfs.ls(path) -> file_infos * * Lists the directory at the supplied path, returning an Array of * HDFS::FileInfo objects. If this fails, raises a DFSException. */ VALUE HDFS_File_System_ls(VALUE self, VALUE path) { FSData* data = get_FSData(self); VALUE file_infos = rb_ary_new(); int num_files = -1; hdfsFileInfo* infos = hdfsListDirectory(data->fs, StringValuePtr(path), &num_files); if (infos == NULL && num_files == -1) { rb_raise(e_dfs_exception, "Failed to list directory %s: %s", StringValuePtr(path), get_error(errno)); return Qnil; } int i; for (i = 0; i < num_files; i++) { hdfsFileInfo* cur_info = infos + i; rb_ary_push(file_infos, new_HDFS_File_Info(cur_info)); } hdfsFreeFileInfo(infos, num_files); return file_infos; }
size_t FileManagerHdfs::numSlots(const block_id block) const { string filename(blockFilename(block)); hdfsFileInfo *file_info = hdfsGetPathInfo(hdfs_, filename.c_str()); if (file_info == nullptr) { if (errno != ENOENT) { LOG_WARNING("Failed to get size of file " << filename << " with error: " << strerror(errno)); } return 0; } size_t file_size = file_info->mSize; hdfsFreeFileInfo(file_info, 1); if ((file_size % kSlotSizeBytes) != 0) { throw CorruptPersistentStorage(); } return file_size / kSlotSizeBytes; }
FileStatus Hdfs::getFileStatus(string path) { hdfsFileInfo* fi = hdfsGetPathInfo(_getFs(), path.data()); if (fi == NULL) { throw ios_base::failure("Error retrieving file status. (" + path + ")"); } FileStatus result; result._setAccessTime(fi->mLastAccess); result._setBlockSize(fi->mBlockSize); result._setModificationTime(fi->mLastMod); result._setLen(fi->mSize); result._setKind(fi->mKind); result._setPath(fi->mName); hdfsFreeFileInfo(fi, 1); return result; }
int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos) { struct webhdfsFileHandle *wf; hdfsFileInfo *fileInfo = NULL; int ret = 0; if (!fs || !file || (file->type == OUTPUT) || (desiredPos < 0)) { ret = EINVAL; goto done; } wf = file->file; if (!wf) { ret = EINVAL; goto done; } fileInfo = hdfsGetPathInfo(fs, wf->absPath); if (!fileInfo) { ret = errno; goto done; } if (desiredPos > fileInfo->mSize) { fprintf(stderr, "hdfsSeek for %s failed since the desired position %" PRId64 " is beyond the size of the file %" PRId64 "\n", wf->absPath, desiredPos, fileInfo->mSize); ret = ENOTSUP; goto done; } file->offset = desiredPos; done: if (fileInfo) { hdfsFreeFileInfo(fileInfo, 1); } if (ret) { errno = ret; return -1; } return 0; }
std::vector<FileStatus> Hdfs::listStatus(string path, const bool sortByPath) { if (exists(path) == false) { throw ios_base::failure("Error retrieving status on non-existant path (" + path + ")"); } std::vector<FileStatus> result; int numEntries; hdfsFileInfo* fis = hdfsListDirectory(_getFs(), path.data(), &numEntries); if (fis == NULL) { throw ios_base::failure("Error listing directory contents. (" + path + ")"); } result.resize(numEntries); for (int i = 0; i < numEntries; i++) { hdfsFileInfo* fi = fis + i; FileStatus& r = result[i]; r._setAccessTime(fi->mLastAccess); r._setModificationTime(fi->mLastMod); r._setLen(fi->mSize); r._setKind(fi->mKind); r._setPath(fi->mName); } hdfsFreeFileInfo(fis, numEntries); if (sortByPath) { sort(result.begin(), result.end(), _fileStatusPathCompare); } return result; }
long libhdfsconnector::getFileSize(const char * filename) { if (!fs) { fprintf(stderr, "Could not connect to hdfs"); return RETURN_FAILURE; } hdfsFileInfo *fileInfo = NULL; if ((fileInfo = hdfsGetPathInfo(fs, filename)) != NULL) { long fsize = fileInfo->mSize; hdfsFreeFileInfo(fileInfo, 1); return fsize; } else { fprintf(stderr, "Error: hdfsGetPathInfo for %s - FAILED!\n", filename); return RETURN_FAILURE; } return RETURN_FAILURE; }
int main(int argc, char **argv) { hdfsFS fs = hdfsConnect("default", 0); if(!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); exit(-1); } hdfsFS lfs = hdfsConnect(NULL, 0); if(!lfs) { fprintf(stderr, "Oops! Failed to connect to 'local' hdfs!\n"); exit(-1); } const char* writePath = "/tmp/testfile.txt"; { //Write tests hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } fprintf(stderr, "Opened %s for writing successfully...\n", writePath); char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); fprintf(stderr, "Wrote %d bytes\n", num_written_bytes); tOffset currentPos = -1; if ((currentPos = hdfsTell(fs, writeFile)) == -1) { fprintf(stderr, "Failed to get current file position correctly! Got %ld!\n", currentPos); exit(-1); } fprintf(stderr, "Current position: %ld\n", currentPos); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } fprintf(stderr, "Flushed %s successfully!\n", writePath); hdfsCloseFile(fs, writeFile); } { //Read tests const char* readPath = "/tmp/testfile.txt"; int exists = hdfsExists(fs, readPath); if (exists) { fprintf(stderr, "Failed to validate existence of %s\n", readPath); exit(-1); } hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0); if (!readFile) { fprintf(stderr, "Failed to open %s for reading!\n", readPath); exit(-1); } fprintf(stderr, "hdfsAvailable: %d\n", hdfsAvailable(fs, readFile)); tOffset seekPos = 1; if(hdfsSeek(fs, readFile, seekPos)) { fprintf(stderr, "Failed to seek %s for reading!\n", readPath); exit(-1); } tOffset currentPos = -1; if((currentPos = hdfsTell(fs, readFile)) != seekPos) { fprintf(stderr, "Failed to get current file position correctly! Got %ld!\n", currentPos); exit(-1); } fprintf(stderr, "Current position: %ld\n", currentPos); static char buffer[32]; tSize num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, sizeof(buffer)); fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); num_read_bytes = hdfsPread(fs, readFile, 0, (void*)buffer, sizeof(buffer)); fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); hdfsCloseFile(fs, readFile); } int totalResult = 0; int result = 0; { //Generic file-system operations const char* srcPath = "/tmp/testfile.txt"; const char* dstPath = "/tmp/testfile2.txt"; fprintf(stderr, "hdfsCopy(remote-local): %s\n", ((result = hdfsCopy(fs, srcPath, lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsCopy(remote-remote): %s\n", ((result = hdfsCopy(fs, srcPath, fs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsMove(local-local): %s\n", ((result = hdfsMove(lfs, srcPath, lfs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsMove(remote-local): %s\n", ((result = hdfsMove(fs, srcPath, lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsRename: %s\n", ((result = hdfsRename(fs, dstPath, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsCopy(remote-remote): %s\n", ((result = hdfsCopy(fs, srcPath, fs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; const char* slashTmp = "/tmp"; const char* newDirectory = "/tmp/newdir"; fprintf(stderr, "hdfsCreateDirectory: %s\n", ((result = hdfsCreateDirectory(fs, newDirectory)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsSetReplication: %s\n", ((result = hdfsSetReplication(fs, srcPath, 2)) ? "Failed!" : "Success!")); totalResult += result; char buffer[256]; const char *resp; fprintf(stderr, "hdfsGetWorkingDirectory: %s\n", ((resp = hdfsGetWorkingDirectory(fs, buffer, sizeof(buffer))) ? buffer : "Failed!")); totalResult += (resp ? 0 : 1); fprintf(stderr, "hdfsSetWorkingDirectory: %s\n", ((result = hdfsSetWorkingDirectory(fs, slashTmp)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsGetWorkingDirectory: %s\n", ((resp = hdfsGetWorkingDirectory(fs, buffer, sizeof(buffer))) ? buffer : "Failed!")); totalResult += (resp ? 0 : 1); fprintf(stderr, "hdfsGetDefaultBlockSize: %ld\n", hdfsGetDefaultBlockSize(fs)); fprintf(stderr, "hdfsGetCapacity: %ld\n", hdfsGetCapacity(fs)); fprintf(stderr, "hdfsGetUsed: %ld\n", hdfsGetUsed(fs)); hdfsFileInfo *fileInfo = NULL; if((fileInfo = hdfsGetPathInfo(fs, slashTmp)) != NULL) { fprintf(stderr, "hdfsGetPathInfo - SUCCESS!\n"); fprintf(stderr, "Name: %s, ", fileInfo->mName); fprintf(stderr, "Type: %c, ", (char)(fileInfo->mKind)); fprintf(stderr, "Replication: %d, ", fileInfo->mReplication); fprintf(stderr, "BlockSize: %ld, ", fileInfo->mBlockSize); fprintf(stderr, "Size: %ld, ", fileInfo->mSize); fprintf(stderr, "LastMod: %s", ctime(&fileInfo->mLastMod)); fprintf(stderr, "Owner: %s, ", fileInfo->mOwner); fprintf(stderr, "Group: %s, ", fileInfo->mGroup); char permissions[10]; permission_disp(fileInfo->mPermissions, permissions); fprintf(stderr, "Permissions: %d (%s)\n", fileInfo->mPermissions, permissions); hdfsFreeFileInfo(fileInfo, 1); } else { totalResult++; fprintf(stderr, "waah! hdfsGetPathInfo for %s - FAILED!\n", slashTmp); } hdfsFileInfo *fileList = 0; int numEntries = 0; if((fileList = hdfsListDirectory(fs, slashTmp, &numEntries)) != NULL) { int i = 0; for(i=0; i < numEntries; ++i) { fprintf(stderr, "Name: %s, ", fileList[i].mName); fprintf(stderr, "Type: %c, ", (char)fileList[i].mKind); fprintf(stderr, "Replication: %d, ", fileList[i].mReplication); fprintf(stderr, "BlockSize: %ld, ", fileList[i].mBlockSize); fprintf(stderr, "Size: %ld, ", fileList[i].mSize); fprintf(stderr, "LastMod: %s", ctime(&fileList[i].mLastMod)); fprintf(stderr, "Owner: %s, ", fileList[i].mOwner); fprintf(stderr, "Group: %s, ", fileList[i].mGroup); char permissions[10]; permission_disp(fileList[i].mPermissions, permissions); fprintf(stderr, "Permissions: %d (%s)\n", fileList[i].mPermissions, permissions); } hdfsFreeFileInfo(fileList, numEntries); } else { if (errno) { totalResult++; fprintf(stderr, "waah! hdfsListDirectory - FAILED!\n"); } else { fprintf(stderr, "Empty directory!\n"); } } char*** hosts = hdfsGetHosts(fs, srcPath, 0, 1); if(hosts) { fprintf(stderr, "hdfsGetHosts - SUCCESS! ... \n"); int i=0; while(hosts[i]) { int j = 0; while(hosts[i][j]) { fprintf(stderr, "\thosts[%d][%d] - %s\n", i, j, hosts[i][j]); ++j; } ++i; } } else { totalResult++; fprintf(stderr, "waah! hdfsGetHosts - FAILED!\n"); } char *newOwner = "root"; // setting tmp dir to 777 so later when connectAsUser nobody, we can write to it short newPerm = 0666; // chown write fprintf(stderr, "hdfsChown: %s\n", ((result = hdfsChown(fs, writePath, NULL, "users")) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsChown: %s\n", ((result = hdfsChown(fs, writePath, newOwner, NULL)) ? "Failed!" : "Success!")); totalResult += result; // chmod write fprintf(stderr, "hdfsChmod: %s\n", ((result = hdfsChmod(fs, writePath, newPerm)) ? "Failed!" : "Success!")); totalResult += result; sleep(2); tTime newMtime = time(NULL); tTime newAtime = time(NULL); // utime write fprintf(stderr, "hdfsUtime: %s\n", ((result = hdfsUtime(fs, writePath, newMtime, newAtime)) ? "Failed!" : "Success!")); totalResult += result; // chown/chmod/utime read hdfsFileInfo *finfo = hdfsGetPathInfo(fs, writePath); fprintf(stderr, "hdfsChown read: %s\n", ((result = (strcmp(finfo->mOwner, newOwner) != 0)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsChmod read: %s\n", ((result = (finfo->mPermissions != newPerm)) ? "Failed!" : "Success!")); totalResult += result; // will later use /tmp/ as a different user so enable it fprintf(stderr, "hdfsChmod: %s\n", ((result = hdfsChmod(fs, "/tmp/", 0777)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr,"newMTime=%ld\n",newMtime); fprintf(stderr,"curMTime=%ld\n",finfo->mLastMod); fprintf(stderr, "hdfsUtime read (mtime): %s\n", ((result = (finfo->mLastMod != newMtime)) ? "Failed!" : "Success!")); totalResult += result; // No easy way to turn on access times from hdfs_test right now // fprintf(stderr, "hdfsUtime read (atime): %s\n", ((result = (finfo->mLastAccess != newAtime)) ? "Failed!" : "Success!")); // totalResult += result; hdfsFreeFileInfo(finfo, 1); // Clean up fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(fs, newDirectory)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(fs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(lfs, srcPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsDelete: %s\n", ((result = hdfsDelete(lfs, dstPath)) ? "Failed!" : "Success!")); totalResult += result; fprintf(stderr, "hdfsExists: %s\n", ((result = hdfsExists(fs, newDirectory)) ? "Success!" : "Failed!")); totalResult += (result ? 0 : 1); } totalResult += (hdfsDisconnect(fs) != 0); { // // Now test as connecting as a specific user // This is only meant to test that we connected as that user, not to test // the actual fs user capabilities. Thus just create a file and read // the owner is correct. const char *tuser = "******"; const char* writePath = "/tmp/usertestfile.txt"; const char **groups = (const char**)malloc(sizeof(char*)* 2); groups[0] = "users"; groups[1] = "nobody"; fs = hdfsConnectAsUser("default", 0, tuser, groups, 2); if(!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs as user %s!\n",tuser); exit(-1); } hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } fprintf(stderr, "Opened %s for writing successfully...\n", writePath); char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); fprintf(stderr, "Wrote %d bytes\n", num_written_bytes); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } fprintf(stderr, "Flushed %s successfully!\n", writePath); hdfsCloseFile(fs, writeFile); hdfsFileInfo *finfo = hdfsGetPathInfo(fs, writePath); fprintf(stderr, "hdfs new file user is correct: %s\n", ((result = (strcmp(finfo->mOwner, tuser) != 0)) ? "Failed!" : "Success!")); totalResult += result; } totalResult += (hdfsDisconnect(fs) != 0); if (totalResult != 0) { return -1; } else { return 0; } }