struct hostent *gethostbyaddr(const void *addr, socklen_t len, int type) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook gethostbyaddr_r(ip=%s, type=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", inet_ntoa(*(in_addr*)addr), type, g_Scheduler.IsCoroutine() ? "In" : "Not in"); static size_t s_buflen = 8192; static char *s_buffer = (char*)malloc(s_buflen); static hostent h; hostent *p = &h; int err = 0; size_t buflen = s_buflen; int res = co::gethostbyaddr_with_ares(addr, len, type, p, s_buffer, buflen, &p, &err); if (res == -1 && buflen > s_buflen) { s_buflen = buflen; s_buffer = (char*)realloc(s_buffer, s_buflen); res = co::gethostbyaddr_with_ares(addr, len, type, p, s_buffer, buflen, &p, &err); } if (res == 0) { return p; } h_errno = err; return nullptr; }
bool BlockObject::CoBlockWaitTimed(MininumTimeDurationType timeo) { auto begin = std::chrono::high_resolution_clock::now(); if (!g_Scheduler.IsCoroutine()) { while (!TryBlockWait() && std::chrono::duration_cast<MininumTimeDurationType> (std::chrono::high_resolution_clock::now() - begin) < timeo) usleep(10 * 1000); return false; } std::unique_lock<LFLock> lock(lock_); if (wakeup_ > 0) { DebugPrint(dbg_syncblock, "wait immedaitely done."); --wakeup_; return true; } lock.unlock(); Task* tk = g_Scheduler.GetLocalInfo().current_task; tk->block_ = this; tk->state_ = TaskState::sys_block; ++tk->block_sequence_; tk->block_timeout_ = timeo; tk->is_block_timeout_ = false; DebugPrint(dbg_syncblock, "wait to switch. task(%s)", tk->DebugInfo()); g_Scheduler.CoYield(); return !tk->is_block_timeout_; }
static ssize_t read_write_mode(int fd, OriginF fn, const char* hook_fn_name, uint32_t event, int timeout_so, Args && ... args) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook %s. %s coroutine.", tk ? tk->DebugInfo() : "nil", hook_fn_name, g_Scheduler.IsCoroutine() ? "In" : "Not in"); FdCtxPtr fd_ctx = FdManager::getInstance().get_fd_ctx(fd); if (!fd_ctx || fd_ctx->closed()) { errno = EBADF; // 已被close或无效的fd return -1; } if (!fd_ctx->is_socket()) // 非socket, 暂不HOOK. 以保障文件fd读写正常 return fn(fd, std::forward<Args>(args)...); if (fd_ctx->user_nonblock()) return fn(fd, std::forward<Args>(args)...); timeval tv; fd_ctx->get_time_o(timeout_so, &tv); int timeout_ms = tv.tv_sec * 1000 + tv.tv_usec / 1000; auto start = std::chrono::system_clock::now(); retry: ssize_t n = fn(fd, std::forward<Args>(args)...); if (n == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { int poll_timeout = 0; if (!timeout_ms) poll_timeout = -1; else { int expired = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::system_clock::now() - start).count(); if (expired > timeout_ms) { errno = EAGAIN; return -1; // 已超时 } // 剩余的等待时间 poll_timeout = timeout_ms - expired; } pollfd pfd; pfd.fd = fd; pfd.events = event; eintr: int triggers = poll(&pfd, 1, poll_timeout); if (-1 == triggers) { if (errno == EINTR) goto eintr; return -1; } else if (0 == triggers) { // poll等待超时 errno = EAGAIN; return -1; } goto retry; // 事件触发 OR epoll惊群效应 } return n; }
struct hostent *gethostbyname2(const char *name, int af) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook gethostbyname2(%s, %d). %s coroutine.", tk ? tk->DebugInfo() : "nil", name ? name : "nil", af, g_Scheduler.IsCoroutine() ? "In" : "Not in"); return co_gethostbyname2(name, af); }
int gethostbyname_r(const char *name, struct hostent *ret_h, char *buf, size_t buflen, struct hostent **result, int *h_errnop) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook gethostbyname_r(name=%s, buflen=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", name ? name : "nil", (int)buflen, g_Scheduler.IsCoroutine() ? "In" : "Not in"); return co::gethostbyname_with_ares(name, AF_INET, ret_h, buf, buflen, result, h_errnop); }
void SleepWait::CoSwitch(int timeout_ms) { Task *tk = g_Scheduler.GetCurrentTask(); if (!tk) return ; tk->sleep_ms_ = timeout_ms; tk->state_ = TaskState::sleep; DebugPrint(dbg_sleepblock, "task(%s) will sleep %d ms", tk->DebugInfo(), tk->sleep_ms_); g_Scheduler.CoYield(); }
int gethostbyaddr_r(const void *addr, socklen_t len, int type, struct hostent *ret, char *buf, size_t buflen, struct hostent **result, int *h_errnop) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook gethostbyaddr_r(ip=%s, type=%d, buflen=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", inet_ntoa(*(in_addr*)addr), type, (int)buflen, g_Scheduler.IsCoroutine() ? "In" : "Not in"); return co::gethostbyaddr_with_ares(addr, len, type, ret, buf, buflen, result, h_errnop); }
void Processer::CoYield() { Task *tk = GetCurrentTask(); assert(tk); tk->proc_ = this; DebugPrint(dbg_yield, "yield task(%s) state=%d", tk->DebugInfo(), (int)tk->state_); ++tk->yield_count_; if (!tk->SwapOut()) { fprintf(stderr, "swapcontext error:%s\n", strerror(errno)); ThrowError(eCoErrorCode::ec_yield_failed); } }
void Processer::Yield(ThreadLocalInfo &info) { Task *tk = info.current_task; if (!tk) return ; DebugPrint(dbg_yield, "yield task(%s) state=%d", tk->DebugInfo(), tk->state_); ++tk->yield_count_; SaveStack(tk); int ret = swapcontext(&tk->ctx_, &info.scheduler); if (ret) { fprintf(stderr, "swapcontext error:%s\n", strerror(errno)); ThrowError(eCoErrorCode::ec_yield_failed); } }
int connect(int fd, const struct sockaddr *addr, socklen_t addrlen) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook connect. %s coroutine.", tk ? tk->DebugInfo() : "nil", g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return connect_f(fd, addr, addrlen); FdCtxPtr fd_ctx = FdManager::getInstance().get_fd_ctx(fd); if (!fd_ctx || fd_ctx->closed()) { errno = EBADF; return -1; } if (fd_ctx->user_nonblock()) return connect_f(fd, addr, addrlen); int n = connect_f(fd, addr, addrlen); if (n == 0) { DebugPrint(dbg_hook, "continue task(%s) connect completed immediately. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), fd); return 0; } else if (n != -1 || errno != EINPROGRESS) { return n; } // EINPROGRESS. use poll for wait connect complete. pollfd pfd; pfd.fd = fd; pfd.events = POLLOUT; int poll_res = poll(&pfd, 1, s_connect_timeout); if (poll_res <= 0 || pfd.revents != POLLOUT) { errno = ETIMEDOUT; return -1; } int error = 0; socklen_t len = sizeof(int); if (-1 == getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len)) return -1; if (!error) return 0; else { errno = error; return -1; } }
unsigned int sleep(unsigned int seconds) { if (!sleep_f) coroutine_hook_init(); Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook sleep(seconds=%u). %s coroutine.", tk ? tk->DebugInfo() : "nil", seconds, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return sleep_f(seconds); int timeout_ms = seconds * 1000; g_Scheduler.SleepSwitch(timeout_ms); return 0; }
int nanosleep(const struct timespec *req, struct timespec *rem) { if (!nanosleep_f) coroutine_hook_init(); Task* tk = g_Scheduler.GetCurrentTask(); int timeout_ms = req->tv_sec * 1000 + req->tv_nsec / 1000000; DebugPrint(dbg_hook, "task(%s) hook nanosleep(milliseconds=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", timeout_ms, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return nanosleep_f(req, rem); g_Scheduler.SleepSwitch(timeout_ms); return 0; }
int usleep(useconds_t usec) { if (!usleep_f) coroutine_hook_init(); Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook usleep(microseconds=%u). %s coroutine.", tk ? tk->DebugInfo() : "nil", usec, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return usleep_f(usec); int timeout_ms = usec / 1000; g_Scheduler.SleepSwitch(timeout_ms); return 0; }
bool BlockObject::Wakeup() { std::unique_lock<LFLock> lock(lock_); Task* tk = wait_queue_.pop(); if (!tk) { if (wakeup_ >= max_wakeup_) { DebugPrint(dbg_syncblock, "wakeup failed."); return false; } ++wakeup_; DebugPrint(dbg_syncblock, "wakeup to %lu.", (long unsigned)wakeup_); return true; } g_Scheduler.AddTaskRunnable(tk); DebugPrint(dbg_syncblock, "wakeup task(%s).", tk->DebugInfo()); return true; }
void IoWait::CoSwitch(std::vector<FdStruct> && fdsts, int timeout_ms) { Task* tk = g_Scheduler.GetCurrentTask(); if (!tk) return ; uint32_t id = ++tk->GetIoWaitData().io_block_id_; tk->state_ = TaskState::io_block; tk->GetIoWaitData().wait_successful_ = 0; tk->GetIoWaitData().io_block_timeout_ = timeout_ms; tk->GetIoWaitData().io_block_timer_.reset(); tk->GetIoWaitData().wait_fds_.swap(fdsts); for (auto &fdst : tk->GetIoWaitData().wait_fds_) { fdst.epoll_ptr.tk = tk; fdst.epoll_ptr.io_block_id = id; } DebugPrint(dbg_ioblock, "task(%s) CoSwitch id=%d, nfds=%d, timeout=%d", tk->DebugInfo(), id, (int)fdsts.size(), timeout_ms); g_Scheduler.CoYield(); }
void BlockObject::CoBlockWait() { if (!g_Scheduler.IsCoroutine()) { while (!TryBlockWait()) usleep(10 * 1000); return ; } std::unique_lock<LFLock> lock(lock_); if (wakeup_ > 0) { DebugPrint(dbg_syncblock, "wait immedaitely done."); --wakeup_; return ; } Task* tk = g_Scheduler.GetLocalInfo().current_task; tk->block_ = this; tk->state_ = TaskState::sys_block; lock.unlock(); DebugPrint(dbg_syncblock, "wait to switch. task(%s)", tk->DebugInfo()); g_Scheduler.Yield(); }
void BlockObject::CoBlockWait() { if (!g_Scheduler.IsCoroutine()) { while (!TryBlockWait()) usleep(10 * 1000); return ; } std::unique_lock<LFLock> lock(lock_); if (wakeup_ > 0) { DebugPrint(dbg_syncblock, "wait immedaitely done."); --wakeup_; return ; } lock.unlock(); Task* tk = g_Scheduler.GetLocalInfo().current_task; tk->block_ = this; tk->state_ = TaskState::sys_block; tk->block_timeout_ = MininumTimeDurationType::zero(); tk->is_block_timeout_ = false; ++ tk->block_sequence_; DebugPrint(dbg_syncblock, "wait to switch. task(%s)", tk->DebugInfo()); g_Scheduler.CoYield(); }
int poll(struct pollfd *fds, nfds_t nfds, int timeout) { if (!poll_f) coroutine_hook_init(); Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook poll(nfds=%d, timeout=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", (int)nfds, timeout, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!g_Scheduler.IsCoroutine()) return poll_f(fds, nfds, timeout); if (timeout == 0) return poll_f(fds, nfds, timeout); if (nfds == 0) { // co sleep g_Scheduler.SleepSwitch(timeout); return 0; } std::vector<FdStruct> fdsts; for (nfds_t i = 0; i < nfds; ++i) { fdsts.emplace_back(); fdsts.back().fd = fds[i].fd; fdsts.back().event = PollEvent2Epoll(fds[i].events); DebugPrint(dbg_hook, "hook poll task(%s), fd[%d]=%d.", tk->DebugInfo(), (int)i, fds[i].fd); } // add into epoll, and switch other context. g_Scheduler.IOBlockSwitch(std::move(fdsts), timeout); bool is_timeout = false; // 是否超时 if (tk->GetIoWaitData().io_block_timer_) { is_timeout = true; if (g_Scheduler.BlockCancelTimer(tk->GetIoWaitData().io_block_timer_)) { tk->DecrementRef(); // timer use ref. is_timeout = false; } } if (tk->GetIoWaitData().wait_successful_ == 0) { if (is_timeout) return 0; else { // 加入epoll失败 if (timeout > 0) g_Scheduler.SleepSwitch(timeout); return poll_f(fds, nfds, 0); } } int n = 0; for (int i = 0; i < (int)tk->GetIoWaitData().wait_fds_.size(); ++i) { fds[i].revents = EpollEvent2Poll(tk->GetIoWaitData().wait_fds_[i].epoll_ptr.revent); if (fds[i].revents) ++n; } // /// 在一次epoll_wait调用中, 同一个fd可能会被触发多次, 此处不必做严格校验 // if (n != (int)tk->GetIoWaitData().wait_successful_) // { // DebugPrint(dbg_debugger, "task(%s) poll assert. n=%d, " // "wait_successful=%d, fds_size=%d", // tk->DebugInfo(), // n, (int)tk->GetIoWaitData().wait_successful_, // (int)tk->GetIoWaitData().wait_fds_.size()); // // for (int i = 0; i < (int)tk->GetIoWaitData().wait_fds_.size(); ++i) // { // fds[i].revents = EpollEvent2Poll(tk->GetIoWaitData().wait_fds_[i].epoll_ptr.revent); // DebugPrint(dbg_debugger, "[%d] epoll_event=%d, poll_event=%d", // i, tk->GetIoWaitData().wait_fds_[i].epoll_ptr.revent, fds[i].revents); // } // } // assert(n == (int)tk->GetIoWaitData().wait_successful_); return n; }
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { if (!select_f) coroutine_hook_init(); int timeout_ms = -1; if (timeout) timeout_ms = timeout->tv_sec * 1000 + timeout->tv_usec / 1000; Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook select(nfds=%d, rd_set=%p, wr_set=%p, er_set=%p, timeout=%d ms).", tk ? tk->DebugInfo() : "nil", (int)nfds, readfds, writefds, exceptfds, timeout_ms); if (!tk) return select_f(nfds, readfds, writefds, exceptfds, timeout); if (timeout_ms == 0) return select_f(nfds, readfds, writefds, exceptfds, timeout); if (!nfds) { g_Scheduler.SleepSwitch(timeout_ms); return 0; } nfds = std::min<int>(nfds, FD_SETSIZE); // 执行一次非阻塞的select, 检测异常或无效fd. fd_set rfs, wfs, efs; FD_ZERO(&rfs); FD_ZERO(&wfs); FD_ZERO(&efs); if (readfds) rfs = *readfds; if (writefds) wfs = *writefds; if (exceptfds) efs = *exceptfds; timeval zero_tv = {0, 0}; int n = select_f(nfds, (readfds ? &rfs : nullptr), (writefds ? &wfs : nullptr), (exceptfds ? &efs : nullptr), &zero_tv); if (n != 0) { if (readfds) *readfds = rfs; if (writefds) *writefds = wfs; if (exceptfds) *exceptfds = efs; return n; } // ------------------------------------- // convert fd_set to pollfd, and clear 3 fd_set. std::pair<fd_set*, uint32_t> sets[3] = { {readfds, POLLIN}, {writefds, POLLOUT}, {exceptfds, 0} }; //static const char* set_names[] = {"readfds", "writefds", "exceptfds"}; std::map<int, int> pfd_map; for (int i = 0; i < 3; ++i) { fd_set* fds = sets[i].first; if (!fds) continue; int event = sets[i].second; for (int fd = 0; fd < nfds; ++fd) { if (FD_ISSET(fd, fds)) { pfd_map[fd] |= event; } } FD_ZERO(fds); } std::vector<pollfd> pfds(pfd_map.size()); int i = 0; for (auto &kv : pfd_map) { pollfd &pfd = pfds[i++]; pfd.fd = kv.first; pfd.events = kv.second; } // ------------------------------------- // ------------------------------------- // poll n = poll(pfds.data(), pfds.size(), timeout_ms); if (n <= 0) return n; // ------------------------------------- // ------------------------------------- // convert pollfd to fd_set. int ret = 0; for (size_t i = 0; i < pfds.size(); ++i) { pollfd &pfd = pfds[i]; if (pfd.events & POLLIN) { if (readfds) { FD_SET(pfd.fd, readfds); ++ret; } } if (pfd.events & POLLOUT) { if (writefds) { FD_SET(pfd.fd, writefds); ++ret; } } if (pfd.events & ~(POLLIN | POLLOUT)) { if (exceptfds) { FD_SET(pfd.fd, exceptfds); ++ret; } } } // ------------------------------------- return ret; }
int poll(struct pollfd *fds, nfds_t nfds, int timeout) { if (!poll_f) coroutine_hook_init(); Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook poll(nfds=%d, timeout=%d). %s coroutine.", tk ? tk->DebugInfo() : "nil", (int)nfds, timeout, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return poll_f(fds, nfds, timeout); if (timeout == 0) return poll_f(fds, nfds, timeout); // -------------------------------- // 全部是负数fd时, 等价于sleep nfds_t negative_fd_n = 0; for (nfds_t i = 0; i < nfds; ++i) if (fds[i].fd < 0) ++ negative_fd_n; if (nfds == negative_fd_n) { // co sleep g_Scheduler.SleepSwitch(timeout); return 0; } // -------------------------------- // 执行一次非阻塞的poll, 检测异常或无效fd. int res = poll_f(fds, nfds, 0); if (res != 0) return res; // create io-sentry IoSentryPtr io_sentry = MakeShared<IoSentry>(tk, fds, nfds); // add file descriptor into epoll or poll. bool added = false; for (nfds_t i = 0; i < nfds; ++i) { fds[i].revents = 0; // clear revents pollfd & pfd = io_sentry->watch_fds_[i]; if (pfd.fd < 0) continue; FdCtxPtr fd_ctx = FdManager::getInstance().get_fd_ctx(pfd.fd); if (!fd_ctx || fd_ctx->closed()) { // bad file descriptor pfd.revents = POLLNVAL; continue; } if (!fd_ctx->add_into_reactor(pfd.events, io_sentry)) { // TODO: 兼容文件fd pfd.revents = POLLNVAL; continue; } added = true; } if (!added) { errno = 0; return nfds; } // set timer if (timeout > 0) io_sentry->timer_ = g_Scheduler.ExpireAt( std::chrono::milliseconds(timeout), [io_sentry]{ g_Scheduler.GetIoWait().IOBlockTriggered(io_sentry); }); // save io-sentry tk->io_sentry_ = io_sentry; // yield g_Scheduler.GetIoWait().CoSwitch(); // clear task->io_sentry_ reference count tk->io_sentry_.reset(); if (io_sentry->timer_) { g_Scheduler.CancelTimer(io_sentry->timer_); io_sentry->timer_.reset(); } int n = 0; for (nfds_t i = 0; i < nfds; ++i) { fds[i].revents = io_sentry->watch_fds_[i].revents; if (fds[i].revents) ++n; } errno = 0; return n; }
uint32_t Processer::Run(ThreadLocalInfo &info, uint32_t &done_count) { info.current_task = NULL; done_count = 0; uint32_t c = 0; SList<Task> slist = runnable_list_.pop_all(); uint32_t do_count = slist.size(); DebugPrint(dbg_scheduler, "Run [Proc(%d) do_count:%u] --------------------------", id_, do_count); SList<Task>::iterator it = slist.begin(); for (; it != slist.end(); ++c) { Task* tk = &*it; info.current_task = tk; tk->state_ = TaskState::runnable; DebugPrint(dbg_switch, "enter task(%s)", tk->DebugInfo()); RestoreStack(tk); int ret = swapcontext(&info.scheduler, &tk->ctx_); if (ret) { fprintf(stderr, "swapcontext error:%s\n", strerror(errno)); runnable_list_.push(tk); ThrowError(eCoErrorCode::ec_swapcontext_failed); } DebugPrint(dbg_switch, "leave task(%s) state=%d", tk->DebugInfo(), tk->state_); info.current_task = NULL; switch (tk->state_) { case TaskState::runnable: ++it; break; case TaskState::io_block: it = slist.erase(it); g_Scheduler.io_wait_.SchedulerSwitch(tk); break; case TaskState::sleep: it = slist.erase(it); g_Scheduler.sleep_wait_.SchedulerSwitch(tk); break; case TaskState::sys_block: case TaskState::user_block: { if (tk->block_) { it = slist.erase(it); if (!tk->block_->AddWaitTask(tk)) runnable_list_.push(tk); tk->block_ = NULL; } else { std::unique_lock<LFLock> lock(g_Scheduler.user_wait_lock_); auto &zone = g_Scheduler.user_wait_tasks_[tk->user_wait_type_]; auto &wait_pair = zone[tk->user_wait_id_]; auto &task_queue = wait_pair.second; if (wait_pair.first) { --wait_pair.first; tk->state_ = TaskState::runnable; ++it; } else { it = slist.erase(it); task_queue.push(tk); } g_Scheduler.ClearWaitPairWithoutLock(tk->user_wait_type_, tk->user_wait_id_, zone, wait_pair); } } break; case TaskState::done: default: --task_count_; ++done_count; it = slist.erase(it); DebugPrint(dbg_task, "task(%s) done.", tk->DebugInfo()); if (tk->eptr_) { std::exception_ptr ep = tk->eptr_; runnable_list_.push(slist); tk->DecrementRef(); std::rethrow_exception(ep); } else tk->DecrementRef(); break; } } if (do_count) runnable_list_.push(slist); return c; }
int connect(int fd, const struct sockaddr *addr, socklen_t addrlen) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook connect. %s coroutine.", tk ? tk->DebugInfo() : "nil", g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) { return connect_f(fd, addr, addrlen); } else { int flags = fcntl(fd, F_GETFL, 0); if (flags & O_NONBLOCK) return connect_f(fd, addr, addrlen); if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) return connect_f(fd, addr, addrlen); int n = connect_f(fd, addr, addrlen); int e = errno; if (n == 0) { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); DebugPrint(dbg_hook, "continue task(%s) connect completed immediately. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), fd); return 0; } else if (n != -1 || errno != EINPROGRESS) { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = e; return n; } else { // add into epoll, and switch other context. g_Scheduler.IOBlockSwitch(fd, EPOLLOUT, -1); } if (tk->GetIoWaitData().wait_successful_ == 0) { // 添加到epoll中失败了 fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = e; return n; } DebugPrint(dbg_hook, "continue task(%s) connect. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), fd); int error = 0; socklen_t len = sizeof(int); if (0 == getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len)) { if (0 == error) { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); DebugPrint(dbg_hook, "continue task(%s) connect success async. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), fd); return 0; } else { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = error; return -1; } } e = errno; // errno set by getsockopt. fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = e; return -1; } }
static ssize_t read_write_mode(int fd, OriginF fn, const char* hook_fn_name, uint32_t event, int timeout_so, Args && ... args) { Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook %s. %s coroutine.", tk ? tk->DebugInfo() : "nil", hook_fn_name, g_Scheduler.IsCoroutine() ? "In" : "Not in"); if (!tk) return fn(fd, std::forward<Args>(args)...); struct stat fd_stat; if (-1 == fstat(fd, &fd_stat)) return fn(fd, std::forward<Args>(args)...); if (!S_ISSOCK(fd_stat.st_mode)) // 不是socket, 不HOOK. return fn(fd, std::forward<Args>(args)...); int flags = fcntl(fd, F_GETFL, 0); if (-1 == flags || (flags & O_NONBLOCK)) return fn(fd, std::forward<Args>(args)...); if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) return fn(fd, std::forward<Args>(args)...); DebugPrint(dbg_hook, "task(%s) real hook %s fd=%d", tk->DebugInfo(), hook_fn_name, fd); ssize_t n = fn(fd, std::forward<Args>(args)...); if (n == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { // get timeout option. int timeout_ms = -1; struct timeval timeout; socklen_t timeout_blen = sizeof(timeout); if (0 == getsockopt(fd, SOL_SOCKET, timeout_so, &timeout, &timeout_blen)) { if (timeout.tv_sec > 0 || timeout.tv_usec > 0) { timeout_ms = timeout.tv_sec * 1000 + timeout.tv_usec / 1000; DebugPrint(dbg_hook, "hook task(%s) %s timeout=%dms. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), hook_fn_name, timeout_ms, fd); } } auto start_time = std::chrono::system_clock::now(); retry: // add into epoll, and switch other context. g_Scheduler.IOBlockSwitch(fd, event, timeout_ms); bool is_timeout = false; if (tk->GetIoWaitData().io_block_timer_) { is_timeout = true; if (g_Scheduler.BlockCancelTimer(tk->GetIoWaitData().io_block_timer_)) { is_timeout = false; tk->DecrementRef(); // timer use ref. } } if (tk->GetIoWaitData().wait_successful_ == 0) { if (is_timeout) { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = EAGAIN; return -1; } else { fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); return fn(fd, std::forward<Args>(args)...); } } DebugPrint(dbg_hook, "continue task(%s) %s. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), hook_fn_name, fd); n = fn(fd, std::forward<Args>(args)...); if (n == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { /// epoll误唤醒 if (timeout_ms == -1) // 不超时, 继续重试 goto retry; int delay_ms = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::system_clock::now() - start_time).count(); if (delay_ms < timeout_ms) { // 还有剩余的超时时间, 重试一次 timeout_ms -= delay_ms; goto retry; } } } else { DebugPrint(dbg_hook, "task(%s) syscall(%s) completed immediately. fd=%d", g_Scheduler.GetCurrentTaskDebugInfo(), hook_fn_name, fd); } int e = errno; fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); errno = e; return n; }
uint32_t Processer::Run(uint32_t &done_count) { ContextScopedGuard guard; (void)guard; done_count = 0; uint32_t c = 0; DebugPrint(dbg_scheduler, "Run [Proc(%d) do_count:%u] --------------------------", id_, (uint32_t)runnable_list_.size()); for (;;) { if (c >= runnable_list_.size()) break; Task *tk = runnable_list_.pop(); if (!tk) break; ++c; current_task_ = tk; DebugPrint(dbg_switch, "enter task(%s)", tk->DebugInfo()); if (!tk->SwapIn()) { fprintf(stderr, "swapcontext error:%s\n", strerror(errno)); current_task_ = nullptr; runnable_list_.erase(tk); tk->DecrementRef(); ThrowError(eCoErrorCode::ec_swapcontext_failed); } DebugPrint(dbg_switch, "leave task(%s) state=%d", tk->DebugInfo(), (int)tk->state_); current_task_ = nullptr; switch (tk->state_) { case TaskState::runnable: runnable_list_.push(tk); break; case TaskState::io_block: g_Scheduler.io_wait_.SchedulerSwitch(tk); break; case TaskState::sleep: g_Scheduler.sleep_wait_.SchedulerSwitch(tk); break; case TaskState::sys_block: assert(tk->block_); if (!tk->block_->AddWaitTask(tk)) runnable_list_.push(tk); break; case TaskState::done: default: ++done_count; DebugPrint(dbg_task, "task(%s) done.", tk->DebugInfo()); if (tk->eptr_) { std::exception_ptr ep = tk->eptr_; tk->DecrementRef(); std::rethrow_exception(ep); } else tk->DecrementRef(); break; } } return c; }
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { if (!select_f) coroutine_hook_init(); int timeout_ms = -1; if (timeout) timeout_ms = timeout->tv_sec * 1000 + timeout->tv_usec / 1000; Task* tk = g_Scheduler.GetCurrentTask(); DebugPrint(dbg_hook, "task(%s) hook select(nfds=%d, rd_set=%p, wr_set=%p, er_set=%p, timeout=%d ms).", tk ? tk->DebugInfo() : "nil", (int)nfds, readfds, writefds, exceptfds, timeout_ms); if (!tk) return select_f(nfds, readfds, writefds, exceptfds, timeout); if (timeout_ms == 0) return select_f(nfds, readfds, writefds, exceptfds, timeout); if (!nfds && !readfds && !writefds && !exceptfds && timeout) { g_Scheduler.SleepSwitch(timeout_ms); return 0; } nfds = std::min<int>(nfds, FD_SETSIZE); std::pair<fd_set*, uint32_t> sets[3] = { {readfds, EPOLLIN | EPOLLERR | EPOLLHUP}, {writefds, EPOLLOUT}, {exceptfds, EPOLLERR | EPOLLHUP} }; static const char* set_names[] = {"readfds", "writefds", "exceptfds"}; std::vector<FdStruct> fdsts; for (int i = 0; i < nfds; ++i) { FdStruct *fdst = NULL; for (int si = 0; si < 3; ++si) { if (!sets[si].first) continue; if (!FD_ISSET(i, sets[si].first)) continue; if (!fdst) { fdsts.emplace_back(); fdst = &fdsts.back(); fdst->fd = i; } fdsts.back().event |= sets[si].second; DebugPrint(dbg_hook, "task(%s) hook select %s(%d)", tk->DebugInfo(), set_names[si], (int)i); } } g_Scheduler.IOBlockSwitch(std::move(fdsts), timeout_ms); bool is_timeout = false; if (tk->GetIoWaitData().io_block_timer_) { is_timeout = true; if (g_Scheduler.BlockCancelTimer(tk->GetIoWaitData().io_block_timer_)) { is_timeout = false; tk->DecrementRef(); // timer use ref. } } if (tk->GetIoWaitData().wait_successful_ == 0) { if (is_timeout) { if (readfds) FD_ZERO(readfds); if (writefds) FD_ZERO(writefds); if (exceptfds) FD_ZERO(exceptfds); return 0; } else { if (timeout_ms > 0) g_Scheduler.SleepSwitch(timeout_ms); timeval immedaitely = {0, 0}; return select_f(nfds, readfds, writefds, exceptfds, &immedaitely); } } int n = 0; for (auto &fdst : tk->GetIoWaitData().wait_fds_) { int fd = fdst.fd; for (int si = 0; si < 3; ++si) { if (!sets[si].first) continue; if (!FD_ISSET(fd, sets[si].first)) continue; if (sets[si].second & fdst.epoll_ptr.revent) { ++n; continue; } FD_CLR(fd, sets[si].first); } } return n; }
int IoWait::WaitLoop(bool enable_block) { int c = 0; for (;;) { std::list<CoTimerPtr> timers; timer_mgr_.GetExpired(timers, 128); if (timers.empty()) break; c += timers.size(); // 此处暂存callback而不是Task*,是为了block_cancel能够真实有效。 std::unique_lock<LFLock> lock(timeout_list_lock_); timeout_list_.merge(std::move(timers)); } std::unique_lock<LFLock> lock(epoll_lock_, std::defer_lock); if (!lock.try_lock()) return c ? c : -1; ++loop_index_; int epoll_n = 0; if (IsEpollCreated()) { static epoll_event *evs = new epoll_event[epoll_event_size_]; for (int epoll_type = 0; epoll_type < 2; ++epoll_type) { retry: int timeout = (enable_block && epoll_type == (int)EpollType::read && !c) ? epollwait_ms_ : 0; int n = epoll_wait(GetEpoll(epoll_type), evs, epoll_event_size_, timeout); if (n == -1) { if (errno == EINTR) { goto retry; } continue; } epoll_n += n; DebugPrint(dbg_scheduler, "do epoll(%d) event, n = %d", epoll_type, n); for (int i = 0; i < n; ++i) { EpollPtr* ep = (EpollPtr*)evs[i].data.ptr; ep->revent = evs[i].events; Task* tk = ep->tk; ++tk->GetIoWaitData().wait_successful_; // 将tk暂存, 最后再执行Cancel, 是为了poll和select可以得到正确的计数。 // 以防Task被加入runnable列表后,被其他线程执行 epollwait_tasks_.insert(EpollWaitSt{tk, ep->io_block_id}); DebugPrint(dbg_ioblock, "task(%s) epoll(%s) trigger fd=%d io_block_id(%u) ep(%p) loop_index(%llu)", tk->DebugInfo(), EpollTypeName(epoll_type), ep->fdst->fd, ep->io_block_id, ep, (unsigned long long)loop_index_); } } for (auto &st : epollwait_tasks_) Cancel(st.tk, st.id); epollwait_tasks_.clear(); } std::list<CoTimerPtr> timeout_list; { std::unique_lock<LFLock> lock(timeout_list_lock_); timeout_list_.swap(timeout_list); } for (auto &cb : timeout_list) (*cb)(); // 由于epoll_wait的结果中会残留一些未计数的Task*, // epoll的性质决定了这些Task无法计数, // 所以这个析构的操作一定要在epoll_lock的保护中做 std::vector<SList<Task>> delete_lists; Task::PopDeleteList(delete_lists); for (auto &delete_list : delete_lists) for (auto it = delete_list.begin(); it != delete_list.end();) { Task* tk = &*it++; DebugPrint(dbg_task, "task(%s) delete.", tk->DebugInfo()); delete tk; } return epoll_n + c; }