void ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, u64 snapid, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; u64 orig_len = *plen; u64 objoff, objlen; /* extent in object */ reqhead->snapid = cpu_to_le64(snapid); /* object extent? */ ceph_calc_file_object_mapping(layout, off, plen, bno, &objoff, &objlen); if (*plen < orig_len) dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); if (op_has_extent(op->op)) { op->extent.offset = objoff; op->extent.length = objlen; } req->r_num_pages = calc_pages_for(off, *plen); req->r_page_alignment = off & ~PAGE_MASK; if (op->op == CEPH_OSD_OP_WRITE) op->payload_len = *plen; dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", *bno, objoff, objlen, req->r_num_pages); }
/* * calculate the mapping of a file extent onto an object, and fill out the * request accordingly. shorten extent as necessary if it crosses an * object boundary. * * fill osd op in request message. */ static void calc_layout(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, struct ceph_osd_request *req) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_osd_op *op = (void *)(reqhead + 1); u64 orig_len = *plen; u64 objoff, objlen; /* extent in object */ u64 bno; reqhead->snapid = cpu_to_le64(vino.snap); /* object extent? */ ceph_calc_file_object_mapping(layout, off, plen, &bno, &objoff, &objlen); if (*plen < orig_len) dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); op->extent.offset = cpu_to_le64(objoff); op->extent.length = cpu_to_le64(objlen); req->r_num_pages = calc_pages_for(off, *plen); dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); }
static void ceph_aio_complete_req(struct ceph_osd_request *req) { int rc = req->r_result; struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); int num_pages = calc_pages_for((u64)osd_data->alignment, osd_data->length); dout("ceph_aio_complete_req %p rc %d bytes %llu\n", inode, rc, osd_data->length); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; BUG_ON(!aio_req->write); aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS); if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; queue_work(ceph_inode_to_client(inode)->wb_wq, &aio_work->work); return; } rc = -ENOMEM; } else if (!aio_req->write) { if (rc == -ENOENT) rc = 0; if (rc >= 0 && osd_data->length > rc) { int zoff = osd_data->alignment + rc; int zlen = osd_data->length - rc; /* * If read is satisfied by single OSD request, * it can pass EOF. Otherwise read is within * i_size. */ if (aio_req->num_reqs == 1) { loff_t i_size = i_size_read(inode); loff_t endoff = aio_req->iocb->ki_pos + rc; if (endoff < i_size) zlen = min_t(size_t, zlen, i_size - endoff); aio_req->total_len = rc + zlen; } if (zlen > 0) ceph_zero_page_vector_range(zoff, zlen, osd_data->pages); } } ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write); ceph_osdc_put_request(req); if (rc < 0) cmpxchg(&aio_req->error, 0, rc); ceph_aio_complete(inode, aio_req); return; }
struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, u64 off, u64 *plen, int opcode, int flags, struct ceph_snap_context *snapc, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, bool use_mempool, int num_reply, int page_align) { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; int r; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; ops[1].payload_len = 0; ops[2].op = 0; } else ops[1].op = 0; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, use_mempool, GFP_NOFS, NULL, NULL); if (!req) return NULL; r = calc_layout(osdc, vino, layout, off, plen, req, ops); if (r < 0) return ERR_PTR(r); req->r_file_layout = *layout; req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; ceph_osdc_build_request(req, off, plen, ops, snapc, mtime, req->r_oid, req->r_oid_len); return req; }
/* * build new request AND message, calculate layout, and adjust file * extent as needed. * * if the file was recently truncated, we include information about its * old and new size so that the object can be updated appropriately. (we * avoid synchronously deleting truncated objects because it's slow.) * * if @do_sync, include a 'startsync' command so that the osd will flush * data quickly. */ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, u64 off, u64 *plen, int opcode, int flags, struct ceph_snap_context *snapc, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, bool use_mempool, int num_reply, int page_align) { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; ops[1].payload_len = 0; ops[2].op = 0; } else ops[1].op = 0; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, use_mempool, GFP_NOFS, NULL, NULL); if (!req) return NULL; /* calculate max write size */ calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ /* in case it differs from natural (file) alignment that calc_layout filled in for us */ req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; ceph_osdc_build_request(req, off, plen, ops, snapc, mtime, req->r_oid, req->r_oid_len); return req; }
/* * Completely synchronous read and write methods. Direct from __user * buffer to osd, or directly to user pages (if O_DIRECT). * * If the read spans object boundary, just do multiple reads. */ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, int *checkeof) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct page **pages; u64 off = iocb->ki_pos; int num_pages; ssize_t ret; size_t len = iov_iter_count(to); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); if (!len) return 0; /* * flush any page cache pages in this range. this * will make concurrent normal and sync io slow, * but it will at least behave sensibly when they are * in sequence. */ ret = filemap_write_and_wait_range(inode->i_mapping, off, off + len); if (ret < 0) return ret; if (unlikely(to->type & ITER_PIPE)) { size_t page_off; ret = iov_iter_get_pages_alloc(to, &pages, len, &page_off); if (ret <= 0) return -ENOMEM; num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE); ret = striped_read(inode, off, ret, pages, num_pages, page_off, checkeof); if (ret > 0) { iov_iter_advance(to, ret); off += ret; } else { iov_iter_advance(to, 0); } ceph_put_page_vector(pages, num_pages, false); } else { num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = striped_read(inode, off, len, pages, num_pages, (off & ~PAGE_MASK), checkeof); if (ret > 0) { int l, k = 0; size_t left = ret; while (left) { size_t page_off = off & ~PAGE_MASK; size_t copy = min_t(size_t, left, PAGE_SIZE - page_off); l = copy_page_to_iter(pages[k++], page_off, copy, to); off += l; left -= l; if (l < copy) break; } } ceph_release_page_vector(pages, num_pages); } if (off > iocb->ki_pos) { ret = off - iocb->ki_pos; iocb->ki_pos = off; } dout("sync_read result %zd\n", ret); return ret; }