int default_create_and_write(uint64_t oid, const struct siocb *iocb) { char path[PATH_MAX], tmp_path[PATH_MAX]; int flags = prepare_iocb(oid, iocb, true); int ret, fd; uint32_t len = iocb->length; size_t obj_size; sd_debug("%"PRIx64, oid); get_store_path(oid, iocb->ec_index, path); get_store_tmp_path(oid, iocb->ec_index, tmp_path); fd = open(tmp_path, flags, sd_def_fmode); if (fd < 0) { if (errno == EEXIST) { /* * This happens if node membership changes during object * creation; while gateway retries a CREATE request, * recovery process could also recover the object at the * same time. They should try to write the same date, * so it is okay to simply return success here. */ sd_debug("%s exists", tmp_path); return SD_RES_SUCCESS; } sd_err("failed to open %s: %m", tmp_path); return err_to_sderr(path, oid, errno); } obj_size = get_store_objsize(oid); ret = prealloc(fd, obj_size); if (ret < 0) { ret = err_to_sderr(path, oid, errno); goto out; } ret = xpwrite(fd, iocb->buf, len, iocb->offset); if (ret != len) { sd_err("failed to write object. %m"); ret = err_to_sderr(path, oid, errno); goto out; } /* * Modern FS like ext4, xfs defaults to automatic syncing of files after * replace-via-rename and replace-via-truncate operations. So rename * without fsync() is actually safe. */ ret = rename(tmp_path, path); if (ret < 0) { sd_err("failed to rename %s to %s: %m", tmp_path, path); ret = err_to_sderr(path, oid, errno); goto out; } ret = SD_RES_SUCCESS; objlist_cache_insert(oid); out: if (ret != SD_RES_SUCCESS && unlink(tmp_path) != 0) sd_err("failed to unlink %s: %m", tmp_path); close(fd); return ret; }
int default_create_and_write(uint64_t oid, const struct siocb *iocb) { char path[PATH_MAX], tmp_path[PATH_MAX], *dir; int flags = prepare_iocb(oid, iocb, true); int ret, fd; uint32_t len = iocb->length; uint32_t object_size = 0; size_t obj_size; uint64_t offset = iocb->offset; sd_debug("%016"PRIx64, oid); get_store_path(oid, iocb->ec_index, path); get_store_tmp_path(oid, iocb->ec_index, tmp_path); if (uatomic_is_true(&sys->use_journal) && journal_write_store(oid, iocb->buf, iocb->length, iocb->offset, true) != SD_RES_SUCCESS) { sd_err("turn off journaling"); uatomic_set_false(&sys->use_journal); flags |= O_SYNC; sync(); } fd = open(tmp_path, flags, sd_def_fmode); if (fd < 0) { if (errno == EEXIST) { /* * This happens if node membership changes during object * creation; while gateway retries a CREATE request, * recovery process could also recover the object at the * same time. They should try to write the same date, * so it is okay to simply return success here. */ sd_debug("%s exists", tmp_path); return SD_RES_SUCCESS; } sd_err("failed to open %s: %m", tmp_path); return err_to_sderr(path, oid, errno); } obj_size = get_store_objsize(oid); trim_zero_blocks(iocb->buf, &offset, &len); object_size = get_vdi_object_size(oid_to_vid(oid)); if (offset != 0 || len != get_objsize(oid, object_size)) { if (is_sparse_object(oid)) ret = xftruncate(fd, obj_size); else ret = prealloc(fd, obj_size); if (ret < 0) { ret = err_to_sderr(path, oid, errno); goto out; } } ret = xpwrite(fd, iocb->buf, len, offset); if (ret != len) { sd_err("failed to write object. %m"); ret = err_to_sderr(path, oid, errno); goto out; } ret = rename(tmp_path, path); if (ret < 0) { sd_err("failed to rename %s to %s: %m", tmp_path, path); ret = err_to_sderr(path, oid, errno); goto out; } close(fd); if (uatomic_is_true(&sys->use_journal) || sys->nosync == true) { objlist_cache_insert(oid); return SD_RES_SUCCESS; } pstrcpy(tmp_path, sizeof(tmp_path), path); dir = dirname(tmp_path); fd = open(dir, O_DIRECTORY | O_RDONLY); if (fd < 0) { sd_err("failed to open directory %s: %m", dir); return err_to_sderr(path, oid, errno); } if (fsync(fd) != 0) { sd_err("failed to write directory %s: %m", dir); ret = err_to_sderr(path, oid, errno); close(fd); if (unlink(path) != 0) sd_err("failed to unlink %s: %m", path); return ret; } close(fd); objlist_cache_insert(oid); return SD_RES_SUCCESS; out: if (unlink(tmp_path) != 0) sd_err("failed to unlink %s: %m", tmp_path); close(fd); return ret; }