qioerr hdfs_get_owners_for_bytes(qio_file_t* file, hdfs_block_byte_map_t** locs, int* out_num_blocks, char** locale_array, int num_locales, off_t start_byte, off_t len) { int i; int j = 0; int k; qioerr err = 0; char* tmp; int rnd; int block_count = 0; hdfs_block_byte_map_t* loc = NULL; char*** info = NULL; hdfsFileInfo* f_info = hdfsGetPathInfo(to_hdfs_fs(file->fs_info)->hfs, to_hdfs_file(file->file_info)->pathnm); if (start_byte == 0 && len == -1) // We want the whole thing info = hdfsGetHosts(to_hdfs_fs(file->fs_info)->hfs, to_hdfs_file(file->file_info)->pathnm, start_byte, f_info->mSize); else info = hdfsGetHosts(to_hdfs_fs(file->fs_info)->hfs, to_hdfs_file(file->file_info)->pathnm, start_byte, start_byte + len); while(info[block_count] != NULL) { // Get the number of blocks that we have block_count++; } loc = (hdfs_block_byte_map_t*)qio_calloc(sizeof(hdfs_block_byte_map_t), block_count); CREATE_ERROR((!info), err, EREMOTEIO, "Unable to get host for HDFS", end); for (i = 0; info[i] != NULL; i++) { // Assign block owners rnd = rand() % f_info->mReplication; // pick an owner if (info[i][rnd]) {// Valid access tmp = get_locale_name(info[i][rnd]); // strip off .___ for (k = 0; k < num_locales; k++) { // Now find the owner if (strcmp(tmp, locale_array[k]) == 0) { loc[i].locale_id = k; // return locale ID for that name break; } } loc[i].start_byte = (off_t)(i*f_info->mBlockSize); loc[i].len = (off_t)(f_info->mBlockSize); j++; } else { QIO_GET_CONSTANT_ERROR(err, EINVAL, "Unable to find address for blocks in hdfs_get_owners_for_bytes"); qio_free(loc); *locs = NULL; *out_num_blocks = 0; goto end; } } *locs = loc; *out_num_blocks = j; end: return err; }
qioerr hdfs_close(void* fl, void* fs) { int got = 0; qioerr err_out = 0; STARTING_SLOW_SYSCALL; got = hdfsCloseFile(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->file); if(got == -1) err_out = qio_mkerror_errno(); DONE_SLOW_SYSCALL; DO_RELEASE((to_hdfs_fs(fs)), hdfs_disconnect_and_free); return err_out; }
qioerr hdfs_readv (void* file, const struct iovec *vector, int count, ssize_t* num_read_out, void* fs) { ssize_t got; ssize_t got_total; qioerr err_out = 0; int i; STARTING_SLOW_SYSCALL; got_total = 0; for( i = 0; i < count; i++) { got = hdfsRead(to_hdfs_fs(fs)->hfs, to_hdfs_file(file)->file, (void*)vector[i].iov_base, vector[i].iov_len); if( got != -1 ) { got_total += got; } else { err_out = qio_mkerror_errno(); break; } if( got != sys_iov_total_bytes(&vector[i], i)) { break; } } if( err_out == 0 && got_total == 0 && sys_iov_total_bytes(vector, count) != 0 ) err_out = qio_int_to_err(EEOF); *num_read_out = got_total; DONE_SLOW_SYSCALL; return err_out; }
qioerr hdfs_locales_for_range(void* file, off_t start_byte, off_t end_byte, const char*** loc_names_out, int* num_locs_out, void* fs) { int i = 0; int j = 0; char*** info = NULL; info = hdfsGetHosts(to_hdfs_fs(fs)->hfs, to_hdfs_file(file)->pathnm, start_byte, end_byte); // unable to get hosts for this byte range if (!info || !info[0]) { *num_locs_out = 0; hdfsFreeHosts(info); QIO_RETURN_CONSTANT_ERROR(EREMOTEIO, "Unable to get owners for byterange"); } while(info[0][i]) { info[0][i] = get_locale_name(info[0][i]); i++; } *num_locs_out = i - 1; *loc_names_out = (const char**)info[0]; // Free the other hosts that we don't need for (i = 1; info[i]; i++) { for (j = 0; info[i][j]; j++) qio_free(info[i][j]); qio_free(info[i]); } return 0; }
qioerr hdfs_writev(void* fl, const struct iovec* iov, int iovcnt, ssize_t* num_written_out, void* fs) { ssize_t got; ssize_t got_total = 0; qioerr err_out = 0; int i; STARTING_SLOW_SYSCALL; for (i = 0; i < iovcnt; i++) { got = hdfsWrite(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->file, (void*)(iov[i].iov_base), iov[i].iov_len); if (got != -1) got_total += got; else { err_out = qio_mkerror_errno(); break; } if (got != sys_iov_total_bytes(&iov[i], iov[i].iov_len)) break; } *num_written_out = got_total; DONE_SLOW_SYSCALL; return err_out; }
qioerr hdfs_getlength(void* fl, int64_t* len_out, void* fs) { hdfsFileInfo* f_info = NULL; f_info = hdfsGetPathInfo(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->pathnm); if (f_info == NULL) QIO_RETURN_CONSTANT_ERROR(EREMOTEIO, "Unable to get length of file in HDFS"); *len_out = f_info->mSize; return 0; }
qioerr hdfs_open(void** fd, const char* path, int* flags, mode_t mode, qio_hint_t iohints, void* fs) { qioerr err_out = 0; int rc; hdfs_file* fl = (hdfs_file*)qio_calloc(sizeof(hdfs_file), 1); STARTING_SLOW_SYSCALL; DO_RETAIN(((hdfs_fs*)fs)); // assert that we connected CREATE_ERROR((to_hdfs_fs(fs)->hfs == NULL), err_out, ECONNREFUSED,"Unable to open HDFS file", error); fl->file = hdfsOpenFile(to_hdfs_fs(fs)->hfs, path, *flags, 0, 0, 0); // Assert that we opened the file if (fl->file == NULL) { err_out = qio_mkerror_errno(); goto error; } DONE_SLOW_SYSCALL; fl->pathnm = path; rc = *flags | ~O_ACCMODE; rc &= O_ACCMODE; if( rc == O_RDONLY ) { *flags |= QIO_FDFLAG_READABLE; } else if( rc == O_WRONLY ) { *flags |= QIO_FDFLAG_WRITEABLE; } else if( rc == O_RDWR ) { *flags |= QIO_FDFLAG_READABLE; *flags |= QIO_FDFLAG_WRITEABLE; } *fd = fl; // Set fd to fl and return return err_out; error: qio_free(fl); return err_out; }
qioerr hdfs_fsync(void* fl, void* fs) { int got; qioerr err_out = 0; STARTING_SLOW_SYSCALL; got = hdfsFlush(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->file); if(got == -1) err_out = qio_mkerror_errno(); DONE_SLOW_SYSCALL; return err_out; }
qioerr hdfs_getpath(void* file, const char** string_out, void* fs) { // Speculatively allocate 128 bytes for the string int sz = 128; int left = 0; char* buf; char* got; qioerr err = 0; const char* host = to_hdfs_fs(fs)->fs_name; int port = to_hdfs_fs(fs)->fs_port; const char* path = to_hdfs_file(file)->pathnm; buf = (char*) qio_malloc(sz); if( !buf ) QIO_GET_CONSTANT_ERROR(err, ENOMEM, "Out of memory in hdfs_getpath"); while (1) { left = snprintf(buf, sz, "hdfs://%s:%d/%s", host, port, path); if (left > -1 && left < sz) { break; } else { // keep looping but with bigger buffer. // We know the size that we need now if n > -1 sz = left > -1 ? left + 1 : 2*sz; got = (char*) qio_realloc(buf, sz); if( ! got ) { qio_free(buf); QIO_GET_CONSTANT_ERROR(err, ENOMEM, "Out of memory in hdfs_getpath"); } } } *string_out = buf; return err; }
qioerr hdfs_seek(void* fl, off_t offset, int whence, off_t* offset_out, void* fs) { off_t got; qioerr err_out = 0; // We cannot seek unless we are in read mode! (HDFS restriction) if (to_hdfs_file(fl)->file->type != INPUT) QIO_RETURN_CONSTANT_ERROR(ENOSYS, "Seeking is not supported in write mode in HDFS"); STARTING_SLOW_SYSCALL; got = (off_t)hdfsSeek(to_hdfs_fs(fs)->hfs, to_hdfs_file(fl)->file, offset); if( got != (off_t) -1) { *offset_out = got; } else { *offset_out = got; } DONE_SLOW_SYSCALL; return err_out; }
qioerr hdfs_getcwd(void* file, const char** path_out, void* fs) { int sz = 128; char* buf = (char*) qio_malloc(sz); qioerr err = 0; if ( !buf ) QIO_GET_CONSTANT_ERROR(err, ENOMEM, "Out of memory in hdfs_getcwd"); // hdfsGetWorkingDirectory will return 0 if buf[] is not large enough // If this happens, grow the buffer and try again while (err == 0 && hdfsGetWorkingDirectory(to_hdfs_fs(fs)->hfs, buf, sz) == 0) { if (errno == ERANGE) { int newSz = 2 * sz; char* newBuf = (char*) qio_realloc(buf, newSz); if (newBuf == 0) { QIO_GET_CONSTANT_ERROR(err, ENOMEM, "Out of memory in hdfs_getcwd"); } else { sz = newSz; buf = newBuf; } } else { // Other error, stop. QIO_GET_CONSTANT_ERROR(err, EREMOTEIO, "Unable to get path to file in HDFS"); } } if (err != 0) { qio_free(buf); buf = 0; } *path_out = buf; return err; }
void hdfs_do_release(void* fs) { DO_RELEASE(to_hdfs_fs(fs), hdfs_disconnect_and_free); }
qioerr hdfs_preadv (void* file, const struct iovec *vector, int count, off_t offset, ssize_t* num_read_out, void* fs) { ssize_t got; ssize_t got_total; qioerr err_out = 0; int i; STARTING_SLOW_SYSCALL; #ifdef HDFS3 const hdfs_file orig_hfl = *to_hdfs_file(file); const hdfs_fs orig_hfs = *to_hdfs_fs(fs); hdfsFS hfs = hdfsConnect(orig_hfs.fs_name, orig_hfs.fs_port); hdfsFile hfl = hdfsOpenFile(hfs, orig_hfl.pathnm, O_RDONLY, 0, 0, 0); //assert connection CREATE_ERROR((hfs == NULL), err_out, ECONNREFUSED, "Unable to read HDFS file", error); if(hfl == NULL) { err_out = qio_mkerror_errno(); goto error; } #endif err_out = 0; got_total = 0; for(i = 0; i < count; i++) { #ifdef HDFS3 hdfsSeek(hfs, hfl, offset+got_total); got = hdfsRead(hfs, hfl, (void*)vector[i].iov_base, vector[i].iov_len); #else got = hdfsPread(to_hdfs_fs(fs)->hfs, to_hdfs_file(file)->file, offset + got_total, (void*)vector[i].iov_base, vector[i].iov_len); #endif if( got != -1 ) { got_total += got; } else { err_out = qio_mkerror_errno(); break; } if(got != (ssize_t)vector[i].iov_len ) { break; } } if( err_out == 0 && got_total == 0 && sys_iov_total_bytes(vector, count) != 0 ) err_out = qio_int_to_err(EEOF); *num_read_out = got_total; #ifdef HDFS3 got = hdfsCloseFile(hfs, hfl); if(got == -1) { err_out = qio_mkerror_errno(); } got = hdfsDisconnect(hfs); if(got == -1) { err_out = qio_mkerror_errno(); } #endif DONE_SLOW_SYSCALL; #ifdef HDFS3 error: #endif return err_out; }