ReadStreamPtr Hdfs3OpenReadStream( const std::string& _path, const common::Range& range) { std::string path = _path; // crop off hdfs:// die_unless(common::StartsWith(path, "hdfs://")); path = path.substr(7); // split uri into host/path std::vector<std::string> splitted = common::Split(path, '/', 2); die_unless(splitted.size() == 2); // prepend root / splitted[1] = "/" + splitted[1]; hdfsFS fs = Hdfs3FindConnection(splitted[0]); // construct file handler hdfsFile file = hdfsOpenFile( fs, splitted[1].c_str(), O_RDONLY, /* bufferSize */ 0, /* replication */ 0, /* blocksize */ 0); if (!file) die("Could not open HDFS file \"" << _path << "\": " << hdfsGetLastError()); return tlx::make_counting<Hdfs3ReadStream>( fs, file, /* start_byte */ range.begin, /* byte_count */ range.size()); }
hdfsFS Hdfs3FindConnection(const std::string& hostport) { std::unique_lock<std::mutex> lock(s_hdfs_mutex); auto it = s_hdfs_map.find(hostport); if (it != s_hdfs_map.end()) return it->second; // split host:port std::vector<std::string> splitted = common::Split(hostport, ':', 2); uint16_t port; if (splitted.size() == 1) { port = 8020; } else { if (!common::from_str<uint16_t>(splitted[1], port)) die("Could not parse port in host:port \"" << hostport << "\""); } // split user@host std::vector<std::string> user_split = common::Split(splitted[0], '@', 2); const char* host, * user; if (user_split.size() == 1) { host = user_split[0].c_str(); user = nullptr; } else { user = user_split[0].c_str(); host = user_split[1].c_str(); } hdfsBuilder* builder = hdfsNewBuilder(); hdfsBuilderSetNameNode(builder, host); hdfsBuilderSetNameNodePort(builder, port); if (user) hdfsBuilderSetUserName(builder, user); hdfsFS hdfs = hdfsBuilderConnect(builder); if (!hdfs) die("Could not connect to HDFS server \"" << hostport << "\"" ": " << hdfsGetLastError()); s_hdfs_map[hostport] = hdfs; return hdfs; }
int main(int argc, char* argv[]) { if (argc < 4) { printf("usage: hdfs_get <name node address> <name node port> <input file>\n"); return 1; } // Sleep for 100ms. usleep(100 * 1000); struct hdfsBuilder* hdfs_builder = hdfsNewBuilder(); if (!hdfs_builder) { printf("Could not create HDFS builder"); return 1; } hdfsBuilderSetNameNode(hdfs_builder, argv[1]); int port = atoi(argv[2]); hdfsBuilderSetNameNodePort(hdfs_builder, port); hdfsBuilderConfSetStr(hdfs_builder, "dfs.client.read.shortcircuit", "false"); hdfsFS fs = hdfsBuilderConnect(hdfs_builder); hdfsFreeBuilder(hdfs_builder); if (!fs) { printf("Could not connect to HDFS"); return 1; } hdfsFile file_in = hdfsOpenFile(fs, argv[3], O_RDONLY, 0, 0, 0); char buffer[1048576]; int done = 0; do { done = hdfsRead(fs, file_in, &buffer, 1048576); } while (done > 0); if (done < 0) { printf("Failed to read file: %s", hdfsGetLastError()); return 1; } hdfsCloseFile(fs, file_in); hdfsDisconnect(fs); return 0; }
void sync() { int result = hdfsSync(fs.get(), fout); if (result < 0) throwFromErrno("Cannot HDFS sync" + hdfs_uri.toString() + " " + std::string(hdfsGetLastError()), ErrorCodes::CANNOT_FSYNC); }
int write(const char * start, size_t size) { int bytes_written = hdfsWrite(fs.get(), fout, start, size); if (bytes_written < 0) throw Exception("Fail to write HDFS file: " + hdfs_uri.toString() + " " + std::string(hdfsGetLastError()), ErrorCodes::NETWORK_ERROR); return bytes_written; }
WriteBufferFromHDFSImpl(const std::string & hdfs_name_) : hdfs_uri(hdfs_name_) , builder(createHDFSBuilder(hdfs_uri)) , fs(createHDFSFS(builder.get())) { auto & path = hdfs_uri.getPath(); fout = hdfsOpenFile(fs.get(), path.c_str(), O_WRONLY, 0, 0, 0); if (fout == nullptr) { throw Exception("Unable to open HDFS file: " + path + " error: " + std::string(hdfsGetLastError()), ErrorCodes::CANNOT_OPEN_FILE); } }
int read(char * start, size_t size) { int bytes_read = hdfsRead(fs.get(), fin, start, size); if (bytes_read < 0) throw Exception("Fail to read HDFS file: " + hdfs_uri.toString() + " " + std::string(hdfsGetLastError()), ErrorCodes::NETWORK_ERROR); return bytes_read; }