struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { wait_queue_t wait; struct ceph_msg *msg; if (front_len && front_len > pool->front_len) { pr_err("msgpool_get pool %p need front %d, pool size is %d\n", pool, front_len, pool->front_len); WARN_ON(1); /* try to alloc a fresh message */ msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; } if (!front_len) front_len = pool->front_len; if (pool->blocking) { /* mempool_t behavior; first try to alloc */ msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; } while (1) { spin_lock(&pool->lock); if (likely(pool->num)) { msg = list_entry(pool->msgs.next, struct ceph_msg, list_head); list_del_init(&msg->list_head); pool->num--; dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, pool->num, pool->min); spin_unlock(&pool->lock); return msg; } pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, pool->min, pool->blocking ? "waiting" : "may fail"); spin_unlock(&pool->lock); if (!pool->blocking) { WARN_ON(1); /* maybe we can allocate it now? */ msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; pr_err("msgpool_get %p empty + alloc failed\n", pool); return ERR_PTR(-ENOMEM); } init_wait(&wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); schedule(); finish_wait(&pool->wait, &wait); }
/* * Allocate or release as necessary to meet our target pool size. */ static int __fill_msgpool(struct ceph_msgpool *pool) { struct ceph_msg *msg; while (pool->num < pool->min) { dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, pool->min); spin_unlock(&pool->lock); msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); spin_lock(&pool->lock); if (IS_ERR(msg)) return PTR_ERR(msg); msg->pool = pool; list_add(&msg->list_head, &pool->msgs); pool->num++; } while (pool->num > pool->min) { msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, pool->min, msg); list_del_init(&msg->list_head); pool->num--; ceph_msg_kfree(msg); } return 0; }
static void *alloc_fn(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; void *p; p = ceph_msg_new(0, pool->front_len, gfp_mask); if (!p) pr_err("msgpool %s alloc failed\n", pool->name); return p; }
static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; struct ceph_msg *msg; msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { dout("msgpool_alloc %s %p\n", pool->name, msg); msg->pool = pool; } return msg; }
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { if (front_len > pool->front_len) { pr_err("msgpool_get pool %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); /* try to alloc a fresh message */ return ceph_msg_new(0, front_len, GFP_NOFS); } return mempool_alloc(pool->pool, GFP_NOFS); }
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { struct ceph_msg *msg; if (front_len > pool->front_len) { dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); /* try to alloc a fresh message */ return ceph_msg_new(0, front_len, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); dout("msgpool_get %s %p\n", pool->name, msg); return msg; }
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, int flags, struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, gfp_t gfp_flags, struct page **pages, struct bio *bio) { struct ceph_osd_request *req; struct ceph_msg *msg; int needs_trail; int num_op = get_num_ops(ops, &needs_trail); size_t msg_size = sizeof(struct ceph_osd_request_head); msg_size += num_op*sizeof(struct ceph_osd_op); if (use_mempool) { req = mempool_alloc(osdc->req_mempool, gfp_flags); memset(req, 0, sizeof(*req)); } else { req = kzalloc(sizeof(*req), gfp_flags); } if (req == NULL) return NULL; req->r_osdc = osdc; req->r_mempool = use_mempool; kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, OSD_OPREPLY_FRONT_LEN, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; } req->r_reply = msg; /* allocate space for the trailing data */ if (needs_trail) { req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); if (!req->r_trail) { ceph_osdc_put_request(req); return NULL; } ceph_pagelist_init(req->r_trail); } /* create request message; allow space for oid */ msg_size += MAX_OBJ_NAME_SIZE; if (snapc) msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; } memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; req->r_pages = pages; #ifdef CONFIG_BLOCK if (bio) { req->r_bio = bio; bio_get(req->r_bio); } #endif return req; }
/* * build new request AND message, calculate layout, and adjust file * extent as needed. * * if the file was recently truncated, we include information about its * old and new size so that the object can be updated appropriately. (we * avoid synchronously deleting truncated objects because it's slow.) * * if @do_sync, include a 'startsync' command so that the osd will flush * data quickly. */ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, u64 off, u64 *plen, int opcode, int flags, struct ceph_snap_context *snapc, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, bool use_mempool, int num_reply) { struct ceph_osd_request *req; struct ceph_msg *msg; struct ceph_osd_request_head *head; struct ceph_osd_op *op; void *p; int num_op = 1 + do_sync; size_t msg_size = sizeof(*head) + num_op*sizeof(*op); int i; if (use_mempool) { req = mempool_alloc(osdc->req_mempool, GFP_NOFS); memset(req, 0, sizeof(*req)); } else { req = kzalloc(sizeof(*req), GFP_NOFS); } if (req == NULL) return NULL; req->r_osdc = osdc; req->r_mempool = use_mempool; kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, OSD_OPREPLY_FRONT_LEN, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; } req->r_reply = msg; /* create request message; allow space for oid */ msg_size += 40; if (snapc) msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; } msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); memset(msg->front.iov_base, 0, msg->front.iov_len); head = msg->front.iov_base; op = (void *)(head + 1); p = (void *)(op + num_op); req->r_request = msg; req->r_snapc = ceph_get_snap_context(snapc); head->client_inc = cpu_to_le32(1); /* always, for now. */ head->flags = cpu_to_le32(flags); if (flags & CEPH_OSD_FLAG_WRITE) ceph_encode_timespec(&head->mtime, mtime); head->num_ops = cpu_to_le16(num_op); op->op = cpu_to_le16(opcode); /* calculate max write size */ calc_layout(osdc, vino, layout, off, plen, req); req->r_file_layout = *layout; /* keep a copy */ if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); req->r_request->hdr.data_len = cpu_to_le32(*plen); op->payload_len = cpu_to_le32(*plen); } op->extent.truncate_size = cpu_to_le64(truncate_size); op->extent.truncate_seq = cpu_to_le32(truncate_seq); /* fill in oid */ head->object_len = cpu_to_le32(req->r_oid_len); memcpy(p, req->r_oid, req->r_oid_len); p += req->r_oid_len; if (do_sync) { op++; op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); } if (snapc) { head->snap_seq = cpu_to_le64(snapc->seq); head->num_snaps = cpu_to_le32(snapc->num_snaps); for (i = 0; i < snapc->num_snaps; i++) { put_unaligned_le64(snapc->snaps[i], p); p += sizeof(u64); } } BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); return req; }