static void ceph_aio_complete_req(struct ceph_osd_request *req) { int rc = req->r_result; struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); int num_pages = calc_pages_for((u64)osd_data->alignment, osd_data->length); dout("ceph_aio_complete_req %p rc %d bytes %llu\n", inode, rc, osd_data->length); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; BUG_ON(!aio_req->write); aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS); if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; queue_work(ceph_inode_to_client(inode)->wb_wq, &aio_work->work); return; } rc = -ENOMEM; } else if (!aio_req->write) { if (rc == -ENOENT) rc = 0; if (rc >= 0 && osd_data->length > rc) { int zoff = osd_data->alignment + rc; int zlen = osd_data->length - rc; /* * If read is satisfied by single OSD request, * it can pass EOF. Otherwise read is within * i_size. */ if (aio_req->num_reqs == 1) { loff_t i_size = i_size_read(inode); loff_t endoff = aio_req->iocb->ki_pos + rc; if (endoff < i_size) zlen = min_t(size_t, zlen, i_size - endoff); aio_req->total_len = rc + zlen; } if (zlen > 0) ceph_zero_page_vector_range(zoff, zlen, osd_data->pages); } } ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write); ceph_osdc_put_request(req); if (rc < 0) cmpxchg(&aio_req->error, 0, rc); ceph_aio_complete(inode, aio_req); return; }
static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, int *checkeof, bool o_direct, unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int io_align, page_align; int left, pages_left; int read; struct page **page_pos; int ret; bool hit_stripe, was_short; pos = off; left = len; page_pos = pages; pages_left = num_pages; read = 0; io_align = off & ~PAGE_MASK; more: if (o_direct) page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; this_len = left; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, page_pos, pages_left, page_align); if (ret == -ENOENT) ret = 0; hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); ceph_zero_page_vector_range(page_align + read, pos - off - read, pages); } pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; if (left && hit_stripe) goto more; }
void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; if (inc) atomic64_inc(&mdsc->quotarealms_count); else atomic64_dec(&mdsc->quotarealms_count); }
/* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) * * If we get a short result from the OSD, check against i_size; we need to * only return a short read to the caller if we hit EOF. */ static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, int *checkeof) { struct ceph_client *client = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; struct page **page_pos; int ret; bool hit_stripe, was_short; /* * we may need to do multiple reads. not atomic, unfortunately. */ pos = off; left = len; page_pos = pages; pages_left = num_pages; read = 0; more: this_len = left; ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, page_pos, pages_left); hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; if (ret == -ENOENT) ret = 0; dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { int didpages = ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); zero_page_vector_range(page_off + read, pos - off - read, pages); } pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; /* hit stripe? */ if (left && hit_stripe) goto more; }
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); struct ceph_monmap *monmap = fsc->client->monc.monmap; struct ceph_statfs st; u64 fsid; int err; dout("statfs\n"); err = ceph_monc_do_statfs(&fsc->client->monc, &st); if (err < 0) return err; /* fill in kstatfs */ buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. * * NOTE: for the time being, we make bsize == frsize to humor * not-yet-ancient versions of glibc that are broken. * Someday, we will probably want to report a real block * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); buf->f_fsid.val[0] = fsid & 0xffffffff; buf->f_fsid.val[1] = fsid >> 32; return 0; }
static int ceph_get_name(struct dentry *parent, char *name, struct dentry *child) { struct ceph_mds_client *mdsc; struct ceph_mds_request *req; int err; mdsc = ceph_inode_to_client(d_inode(child))->mdsc; req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, USE_ANY_MDS); if (IS_ERR(req)) return PTR_ERR(req); inode_lock(d_inode(parent)); req->r_inode = d_inode(child); ihold(d_inode(child)); req->r_ino2 = ceph_vino(d_inode(parent)); req->r_parent = d_inode(parent); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); inode_unlock(d_inode(parent)); if (!err) { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; memcpy(name, rinfo->dname, rinfo->dname_len); name[rinfo->dname_len] = 0; dout("get_name %p ino %llx.%llx name %s\n", child, ceph_vinop(d_inode(child)), name); } else { dout("get_name %p ino %llx.%llx err %d\n", child, ceph_vinop(d_inode(child)), err); } ceph_mdsc_put_request(req); return err; }
static int ceph_get_name(struct dentry *parent, char *name, struct dentry *child) { struct ceph_mds_client *mdsc; struct ceph_mds_request *req; int err; mdsc = ceph_inode_to_client(child->d_inode)->mdsc; req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, USE_ANY_MDS); if (IS_ERR(req)) return PTR_ERR(req); mutex_lock(&parent->d_inode->i_mutex); req->r_inode = child->d_inode; ihold(child->d_inode); req->r_ino2 = ceph_vino(parent->d_inode); req->r_locked_dir = parent->d_inode; req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); mutex_unlock(&parent->d_inode->i_mutex); if (!err) { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; memcpy(name, rinfo->dname, rinfo->dname_len); name[rinfo->dname_len] = 0; dout("get_name %p ino %llx.%llx name %s\n", child, ceph_vinop(child->d_inode), name); } else { dout("get_name %p ino %llx.%llx err %d\n", child, ceph_vinop(child->d_inode), err); } ceph_mdsc_put_request(req); return err; }
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); struct ceph_monmap *monmap = client->monc.monmap; struct ceph_statfs st; u64 fsid; int err; dout("statfs\n"); err = ceph_monc_do_statfs(&client->monc, &st); if (err < 0) return err; /* fill in kstatfs */ buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); buf->f_fsid.val[0] = fsid & 0xffffffff; buf->f_fsid.val[1] = fsid >> 32; return 0; }
/* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) * * If we get a short result from the OSD, check against i_size; we need to * only return a short read to the caller if we hit EOF. */ static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, int *checkeof, bool o_direct, unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len, left; int io_align, page_align; int pages_left; int read; struct page **page_pos; int ret; bool hit_stripe, was_short; /* * we may need to do multiple reads. not atomic, unfortunately. */ pos = off; left = len; page_pos = pages; pages_left = num_pages; read = 0; io_align = off & ~PAGE_MASK; more: if (o_direct) page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; this_len = left; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, page_pos, pages_left, page_align); if (ret == -ENOENT) ret = 0; hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret >= 0) { int didpages; if (was_short && (pos + ret < inode->i_size)) { u64 tmp = min(this_len - ret, inode->i_size - pos - ret); dout(" zero gap %llu to %llu\n", pos + ret, pos + ret + tmp); ceph_zero_page_vector_range(page_align + read + ret, tmp, pages); ret += tmp; } didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; /* hit stripe and need continue*/ if (left && hit_stripe && pos < inode->i_size) goto more; }
static inline bool ceph_has_realms_with_quotas(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; return atomic64_read(&mdsc->quotarealms_count) > 0; }
if (has_quota || !next) return realm; ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); realm = next; } if (realm) ceph_put_snap_realm(mdsc, realm); return NULL; } bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) { struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; struct ceph_snap_realm *old_realm, *new_realm; bool is_same; down_read(&mdsc->snap_rwsem); old_realm = get_quota_realm(mdsc, old); new_realm = get_quota_realm(mdsc, new); is_same = (old_realm == new_realm); up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); if (new_realm) ceph_put_snap_realm(mdsc, new_realm); return is_same;
/* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) * * If we get a short result from the OSD, check against i_size; we need to * only return a short read to the caller if we hit EOF. */ static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, int *checkeof, bool o_direct, unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len, left; int io_align, page_align; int pages_left; int read; struct page **page_pos; int ret; bool hit_stripe, was_short; /* * we may need to do multiple reads. not atomic, unfortunately. */ pos = off; left = len; page_pos = pages; pages_left = num_pages; read = 0; io_align = off & ~PAGE_MASK; if (pos + left > inode->i_size) { *checkeof = 1; left = inode->i_size - pos; } more: if (o_direct) page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; this_len = left; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, page_pos, pages_left, page_align); if (ret == -ENOENT) ret = 0; hit_stripe = ret >= 0 && this_len < left; was_short = ret >= 0 && ret < this_len; dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (was_short) { /* zero trailing bytes */ dout("zero trailing %llu bytes\n", this_len - ret); ceph_zero_page_vector_range(page_align + ret, this_len - ret, page_pos); } if (ret >= 0) { int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; pos += this_len; read += this_len; left -= this_len; page_pos += didpages; pages_left -= didpages; }
/* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) * * If we get a short result from the OSD, check against i_size; we need to * only return a short read to the caller if we hit EOF. */ static int striped_read(struct inode *inode, u64 pos, u64 len, struct page **pages, int num_pages, int page_align, int *checkeof) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 this_len; loff_t i_size; int page_idx; int ret, read = 0; bool hit_stripe, was_short; /* * we may need to do multiple reads. not atomic, unfortunately. */ more: this_len = len; page_idx = (page_align + read) >> PAGE_SHIFT; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, pages + page_idx, num_pages - page_idx, ((page_align + read) & ~PAGE_MASK)); if (ret == -ENOENT) ret = 0; hit_stripe = this_len < len; was_short = ret >= 0 && ret < this_len; dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); i_size = i_size_read(inode); if (ret >= 0) { if (was_short && (pos + ret < i_size)) { int zlen = min(this_len - ret, i_size - pos - ret); int zoff = page_align + read + ret; dout(" zero gap %llu to %llu\n", pos + ret, pos + ret + zlen); ceph_zero_page_vector_range(zoff, zlen, pages); ret += zlen; } read += ret; pos += ret; len -= ret; /* hit stripe and need continue*/ if (len && hit_stripe && pos < i_size) goto more; } if (read > 0) { ret = read; /* did we bounce off eof? */ if (pos + len > i_size) *checkeof = CHECK_EOF; } dout("striped_read returns %d\n", ret); return ret; }