/* * dup2 is not thread safe */ int dup2(int oldfd, int newfd) { struct fd_info *oldfdi, *newfdi; int ret; init_preload(); oldfdi = idm_lookup(&idm, oldfd); if (oldfdi) { if (oldfdi->state == fd_fork_passive) fork_passive(oldfd); else if (oldfdi->state == fd_fork_active) fork_active(oldfd); } newfdi = idm_lookup(&idm, newfd); if (newfdi) { /* newfd cannot have been dup'ed directly */ if (atomic_get(&newfdi->refcnt) > 1) return ERR(EBUSY); close(newfd); } ret = real.dup2(oldfd, newfd); if (!oldfdi || ret != newfd) return ret; newfdi = calloc(1, sizeof *newfdi); if (!newfdi) { close(newfd); return ERR(ENOMEM); } pthread_mutex_lock(&mut); idm_set(&idm, newfd, newfdi); pthread_mutex_unlock(&mut); newfdi->fd = oldfdi->fd; newfdi->type = oldfdi->type; if (oldfdi->dupfd != -1) { newfdi->dupfd = oldfdi->dupfd; oldfdi = idm_lookup(&idm, oldfdi->dupfd); } else { newfdi->dupfd = oldfd; } atomic_init(&newfdi->refcnt); atomic_set(&newfdi->refcnt, 1); atomic_inc(&oldfdi->refcnt); return newfd; }
static inline enum fd_type fd_gett(int index) { struct fd_info *fdi; fdi = idm_lookup(&idm, index); return fdi ? fdi->type : fd_normal; }
static inline enum fd_fork_state fd_gets(int index) { struct fd_info *fdi; fdi = idm_lookup(&idm, index); return fdi ? fdi->state : fd_ready; }
static inline int fd_getd(int index) { struct fd_info *fdi; fdi = idm_lookup(&idm, index); return fdi ? fdi->fd : index; }
struct sock_conn *sock_av_lookup_addr(struct sock_ep *ep, struct sock_av *av, fi_addr_t addr) { int idx, ret; int index = ((uint64_t)addr & av->mask); struct sock_av_addr *av_addr; if (index >= av->table_hdr->stored || index < 0) { SOCK_LOG_ERROR("requested rank is larger than av table\n"); errno = EINVAL; return NULL; } if (!av->cmap) { SOCK_LOG_ERROR("EP with no AV bound\n"); errno = EINVAL; return NULL; } av_addr = idm_lookup(&av->addr_idm, index); idx = av_addr - &av->table[0]; if (!av->key[idx]) { ret = sock_conn_map_match_or_connect( ep, av->domain, av->cmap, (struct sockaddr_in*)&av_addr->addr, &av->key[idx]); if (ret) { SOCK_LOG_ERROR("failed to match or connect to addr %" PRIu64 "\n", addr); return NULL; } } return sock_conn_map_lookup_key(av->cmap, av->key[idx]); }
static int sock_av_close(struct fid *fid) { struct sock_av *av; int i; av = container_of(fid, struct sock_av, av_fid.fid); if (atomic_get(&av->ref)) return -FI_EBUSY; for (i=0; i<av->table_hdr->stored; i++) { if(idm_lookup(&av->addr_idm, i)) idm_clear(&av->addr_idm , i); } if (!av->name) free(av->table_hdr); else { shm_unlink(av->name); free(av->name); munmap(av->table_hdr, sizeof(struct sock_av_table_hdr) + av->attr.count * sizeof(struct sock_av_addr)); close(av->shared_fd); } atomic_dec(&av->domain->ref); free(av->key); free(av); return 0; }
int close(int socket) { struct fd_info *fdi; int ret; init_preload(); fdi = idm_lookup(&idm, socket); if (!fdi) return real.close(socket); if (fdi->dupfd != -1) { ret = close(fdi->dupfd); if (ret) return ret; } if (atomic_dec(&fdi->refcnt)) return 0; idm_clear(&idm, socket); real.close(socket); ret = (fdi->type == fd_rsocket) ? rclose(fdi->fd) : real.close(fdi->fd); free(fdi); return ret; }
static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { struct fi_eq_entry eq_entry; struct sock_domain *dom; struct sock_mr *_mr; uint64_t key; dom = container_of(domain, struct sock_domain, dom_fid); if (!(dom->info.mode & FI_PROV_MR_ATTR) && ((attr->requested_key > IDX_MAX_INDEX) || idm_lookup(&dom->mr_idm, (int) attr->requested_key))) return -FI_ENOKEY; _mr = calloc(1, sizeof(*_mr) + sizeof(_mr->mr_iov) * (attr->iov_count - 1)); if (!_mr) return -FI_ENOMEM; _mr->mr_fid.fid.fclass = FI_CLASS_MR; _mr->mr_fid.fid.context = attr->context; _mr->mr_fid.fid.ops = &sock_mr_fi_ops; _mr->domain = dom; _mr->access = attr->access; _mr->flags = flags; _mr->offset = (flags & FI_MR_OFFSET) ? (uintptr_t) attr->mr_iov[0].iov_base + attr->offset : (uintptr_t) attr->mr_iov[0].iov_base; fastlock_acquire(&dom->lock); key = (dom->info.mode & FI_PROV_MR_ATTR) ? sock_get_mr_key(dom) : (uint16_t) attr->requested_key; if (idm_set(&dom->mr_idm, key, _mr) < 0) goto err; _mr->mr_fid.key = key; _mr->mr_fid.mem_desc = (void *)key; fastlock_release(&dom->lock); _mr->iov_count = attr->iov_count; memcpy(&_mr->mr_iov, attr->mr_iov, sizeof(_mr->mr_iov) * attr->iov_count); *mr = &_mr->mr_fid; atomic_inc(&dom->ref); if (dom->mr_eq) { eq_entry.fid = &domain->fid; eq_entry.context = attr->context; return sock_eq_report_event(dom->mr_eq, FI_MR_COMPLETE, &eq_entry, sizeof(eq_entry), 0); } return 0; err: fastlock_release(&dom->lock); free(_mr); return -errno; }
static uint16_t sock_get_mr_key(struct sock_domain *dom) { uint16_t i; for (i = 1; i < IDX_MAX_INDEX; i++) { if (!idm_lookup(&dom->mr_idm, i)) return i; } return 0; }
int sock_av_compare_addr(struct sock_av *av, fi_addr_t addr1, fi_addr_t addr2) { int index1, index2; struct sock_av_addr *av_addr1, *av_addr2; index1 = ((uint64_t)addr1 & av->mask); index2 = ((uint64_t)addr2 & av->mask); if (index1 >= av->table_hdr->stored || index1 < 0 || index2 >= av->table_hdr->stored || index2 < 0) { SOCK_LOG_ERROR("requested rank is larger than av table\n"); return -1; } av_addr1 = idm_lookup(&av->addr_idm, index1); av_addr2 = idm_lookup(&av->addr_idm, index2); return memcmp(&av_addr1->addr, &av_addr2->addr, sizeof(struct sockaddr_in)); }
static inline enum fd_type fd_get(int index, int *fd) { struct fd_info *fdi; fdi = idm_lookup(&idm, index); if (fdi) { *fd = fdi->fd; return fdi->type; } else { *fd = index; return fd_normal; } }
static inline enum fd_type fd_fork_get(int index, int *fd) { struct fd_info *fdi; fdi = idm_lookup(&idm, index); if (fdi) { if (fdi->state == fd_fork_passive) fork_passive(index); else if (fdi->state == fd_fork_active) fork_active(index); *fd = fdi->fd; return fdi->type; } else { *fd = index; return fd_normal; } }
static enum fd_type fd_close(int index, int *fd) { struct fd_info *fdi; enum fd_type type; fdi = idm_lookup(&idm, index); if (fdi) { idm_clear(&idm, index); *fd = fdi->fd; type = fdi->type; real.close(index); free(fdi); } else { *fd = index; type = fd_normal; } return type; }
static int sock_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, size_t *addrlen) { int index; struct sock_av *_av; struct sock_av_addr *av_addr; _av = container_of(av, struct sock_av, av_fid); index = ((uint64_t)fi_addr & _av->mask); if (index >= _av->table_hdr->stored || index < 0) { SOCK_LOG_ERROR("requested address not inserted\n"); return -EINVAL; } av_addr = idm_lookup(&_av->addr_idm, index); memcpy(addr, &av_addr->addr, MIN(*addrlen, _av->addrlen)); *addrlen = _av->addrlen; return 0; }
struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint16_t key, void *buf, size_t len, uint64_t access) { int i; struct sock_mr *mr; mr = idm_lookup(&domain->mr_idm, key); if (!mr) return NULL; if (mr->flags & FI_MR_OFFSET) buf = (char*)buf + mr->offset; for (i = 0; i < mr->iov_count; i++) { if ((uintptr_t)buf >= (uintptr_t)mr->mr_iov[i].iov_base && ((uintptr_t)buf + len <= (uintptr_t) mr->mr_iov[i].iov_base + mr->mr_iov[i].iov_len)) { if ((access & mr->access) == access) return mr; } } SOCK_LOG_ERROR("MR check failed\n"); return NULL; }
struct sock_conn *sock_ep_connect(struct sock_ep *ep, fi_addr_t index) { int conn_fd = -1, ret; int do_retry = sock_conn_retry; struct sock_conn *conn, *new_conn; uint16_t idx; struct sockaddr_in *addr; socklen_t lon; int valopt = 0; struct pollfd poll_fd; if (ep->ep_type == FI_EP_MSG) { idx = 0; addr = ep->dest_addr; } else { idx = index & ep->av->mask; addr = (struct sockaddr_in *)&ep->av->table[idx].addr; } do_connect: fastlock_acquire(&ep->cmap.lock); conn = sock_ep_lookup_conn(ep, index, addr); fastlock_release(&ep->cmap.lock); if (conn != SOCK_CM_CONN_IN_PROGRESS) return conn; conn_fd = socket(AF_INET, SOCK_STREAM, 0); if (conn_fd == -1) { SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno); errno = FI_EOTHER; return NULL; } ret = fd_set_nonblock(conn_fd); if (ret) { SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno); errno = FI_EOTHER; close(conn_fd); return NULL; } SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr), ntohs(addr->sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep->src_addr->sin_addr)); ret = connect(conn_fd, (struct sockaddr *) addr, sizeof *addr); if (ret < 0) { if (errno == EINPROGRESS) { poll_fd.fd = conn_fd; poll_fd.events = POLLOUT; ret = poll(&poll_fd, 1, 15 * 1000); if (ret < 0) { SOCK_LOG_DBG("poll failed\n"); goto retry; } lon = sizeof(int); ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon); if (ret < 0) { SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd); goto retry; } if (valopt) { SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr), ntohs(addr->sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep->src_addr->sin_addr)); goto retry; } goto out; } else { SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr), ntohs(addr->sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep->src_addr->sin_addr)); goto retry; } } else { goto out; } retry: do_retry--; sleep(10); if (!do_retry) goto err; if (conn_fd != -1) { close(conn_fd); conn_fd = -1; } SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr), ntohs(addr->sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep->src_addr->sin_addr)); goto do_connect; out: fastlock_acquire(&ep->cmap.lock); new_conn = sock_conn_map_insert(ep, addr, conn_fd, 0); new_conn->av_index = (ep->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : (fi_addr_t) idx; conn = idm_lookup(&ep->av_idm, index); if (conn == SOCK_CM_CONN_IN_PROGRESS) { idm_set(&ep->av_idm, index, new_conn); conn = new_conn; } fastlock_release(&ep->cmap.lock); return conn; err: close(conn_fd); return NULL; }
struct sock_mr * sock_mr_get_entry(struct sock_domain *domain, uint16_t key) { return (struct sock_mr *)idm_lookup(&domain->mr_idm, key); }