static int restore_one_epoll(cpt_object_t *obj, loff_t pos, struct cpt_epoll_image *ebuf, cpt_context_t *ctx) { int err = 0; loff_t endpos; struct file *file = obj->o_obj; struct eventpoll *ep; if (file->f_op != &eventpoll_fops) { eprintk_ctx("bad epoll file\n"); return -EINVAL; } ep = file->private_data; if (unlikely(ep == NULL)) { eprintk_ctx("bad epoll device\n"); return -EINVAL; } endpos = pos + ebuf->cpt_next; pos += ebuf->cpt_hdrlen; while (pos < endpos) { struct cpt_epoll_file_image efi; struct epoll_event epds; cpt_object_t *tobj; err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx); if (err) return err; tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx); if (!tobj) { eprintk_ctx("epoll file not found\n"); return -EINVAL; } epds.events = efi.cpt_events; epds.data = efi.cpt_data; mutex_lock(&ep->mtx); err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd); if (!err) { struct epitem *epi; epi = ep_find(ep, tobj->o_obj, efi.cpt_fd); if (epi) { if (efi.cpt_ready) { unsigned long flags; spin_lock_irqsave(&ep->lock, flags); if (list_empty(&epi->rdllink)) list_add_tail(&epi->rdllink, &ep->rdllist); spin_unlock_irqrestore(&ep->lock, flags); } } } mutex_unlock(&ep->mtx); if (err) break; pos += efi.cpt_next; } return err; }
/* * 创建好epollfd后, 接下来我们要往里面添加fd咯 * 来看epoll_ctl * epfd 就是epollfd * op ADD,MOD,DEL * fd 需要监听的描述符 * event 我们关心的events */ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { int error; struct file *file, *tfile; struct eventpoll *ep; struct epitem *epi; struct epoll_event epds; error = -EFAULT; /* * 错误处理以及从用户空间将epoll_event结构copy到内核空间. */ if (ep_op_has_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) goto error_return; /* Get the "struct file *" for the eventpoll file */ /* 取得struct file结构, epfd既然是真正的fd, 那么内核空间 * 就会有与之对于的一个struct file结构 * 这个结构在epoll_create1()中, 由函数anon_inode_getfd()分配 */ error = -EBADF; file = fget(epfd); if (!file) goto error_return; /* Get the "struct file *" for the target file */ /* 我们需要监听的fd, 它当然也有个struct file结构, 上下2个不要搞混了哦 */ tfile = fget(fd); if (!tfile) goto error_fput; /* The target file descriptor must support poll */ error = -EPERM; /* 如果监听的文件不支持poll, 那就没辙了. * 你知道什么情况下, 文件会不支持poll吗? */ if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit * adding an epoll file descriptor inside itself. */ error = -EINVAL; /* epoll不能自己监听自己... */ if (file == tfile || !is_file_epoll(file)) goto error_tgt_fput; /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ /* 取到我们的eventpoll结构, 来自与epoll_create1()中的分配 */ ep = file->private_data; /* 接下来的操作有可能修改数据结构内容, 锁之~ */ mutex_lock(&ep->mtx); /* * Try to lookup the file inside our RB tree, Since we grabbed "mtx" * above, we can be sure to be able to use the item looked up by * ep_find() till we release the mutex. */ /* 对于每一个监听的fd, 内核都有分片一个epitem结构, * 而且我们也知道, epoll是不允许重复添加fd的, * 所以我们首先查找该fd是不是已经存在了. * ep_find()其实就是RBTREE查找, 跟C++STL的map差不多一回事, O(lgn)的时间复杂度. */ epi = ep_find(ep, tfile, fd); error = -EINVAL; switch (op) { /* 首先我们关心添加 */ case EPOLL_CTL_ADD: if (!epi) { /* 之前的find没有找到有效的epitem, 证明是第一次插入, 接受! * 这里我们可以知道, POLLERR和POLLHUP事件内核总是会关心的 * */ epds.events |= POLLERR | POLLHUP; /* rbtree插入, 详情见ep_insert()的分析 * 其实我觉得这里有insert的话, 之前的find应该 * 是可以省掉的... */ error = ep_insert(ep, &epds, tfile, fd); } else /* 找到了!? 重复添加! */ error = -EEXIST; break; /* 删除和修改操作都比较简单 */ case EPOLL_CTL_DEL: if (epi) error = ep_remove(ep, epi); else error = -ENOENT; break; case EPOLL_CTL_MOD: if (epi) { epds.events |= POLLERR | POLLHUP; error = ep_modify(ep, epi, &epds); } else error = -ENOENT; break; } mutex_unlock(&ep->mtx); error_tgt_fput: fput(tfile); error_fput: fput(file); error_return: return error; }