void *app_glue_create_socket(int family,int type) { struct timeval tv; struct socket *sock = NULL; if(sock_create_kern(family,type,0,&sock)) { syslog(LOG_ERR,"cannot create socket %s %d\n",__FILE__,__LINE__); return NULL; } tv.tv_sec = -1; tv.tv_usec = 0; if(sock_setsockopt(sock,SOL_SOCKET,SO_RCVTIMEO,(char *)&tv,sizeof(tv))) { syslog(LOG_ERR,"%s %d cannot set notimeout option\n",__FILE__,__LINE__); } tv.tv_sec = -1; tv.tv_usec = 0; if(sock_setsockopt(sock,SOL_SOCKET,SO_SNDTIMEO,(char *)&tv,sizeof(tv))) { syslog(LOG_ERR,"%s %d cannot set notimeout option\n",__FILE__,__LINE__); } if(type != SOCK_STREAM) { if(sock->sk) { sock_reset_flag(sock->sk,SOCK_USE_WRITE_QUEUE); sock->sk->sk_data_ready = app_glue_sock_readable; sock->sk->sk_write_space = app_glue_sock_write_space; app_glue_sock_write_space(sock->sk); } } return sock; }
/* * sock_tcp_server() * Initialize a tcp server socket. On success, a valid socket number will return. */ int sock_tcp_server(const char *hostname, int port) { int oldfl = 0; int sock; /* create a socket */ if ((sock = sock_create("tcp")) < 0) net_error("initialize server socket failed\n"); /* set it to non-blocking operation */ oldfl = fcntl(sock, F_GETFL, 0); if (!(oldfl & O_NONBLOCK)) fcntl(sock, F_SETFL, oldfl | O_NONBLOCK); /* setup for a fast restart to avoid bind addr in use errors */ sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 1); /* bind it to the appropriate port */ if ((sock_bind(sock, hostname, port)) == -1) return -1; /* go ahead and listen to the socket */ if (listen(sock, TCP_BACKLOG) != 0) return -1; return sock; }
int ksetsockopt(ksocket_t socket, int level, int optname, void *optval, int optlen) { struct socket *sk; int ret; #ifndef KSOCKET_ADDR_SAFE mm_segment_t old_fs; #endif sk = (struct socket *)socket; #ifndef KSOCKET_ADDR_SAFE old_fs = get_fs(); set_fs(KERNEL_DS); #endif if (level == SOL_SOCKET) ret = sock_setsockopt(sk, level, optname, optval, optlen); else ret = sk->ops->setsockopt(sk, level, optname, optval, optlen); #ifndef KSOCKET_ADDR_SAFE set_fs(old_fs); #endif return ret; }
static int unix_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { unix_socket *sk=sock->data; if(level!=SOL_SOCKET) return -EOPNOTSUPP; return sock_setsockopt(sk,level,optname,optval,optlen); }
static int libcfs_sock_create (cfs_socket_t **sockp, int *fatal, __u32 local_ip, int local_port) { struct sockaddr_in locaddr; cfs_socket_t *sock; int option; int optlen; int rc; /* All errors are fatal except bind failure if the port is in use */ *fatal = 1; sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); if (!sock) { CERROR("Can't allocate cfs_socket.\n"); return -ENOMEM; } *sockp = sock; sock->s_magic = CFS_SOCK_MAGIC; rc = -sock_socket(PF_INET, SOCK_STREAM, 0, libcfs_sock_upcall, sock, &C2B_SOCK(sock)); if (rc != 0) goto out; option = 1; optlen = sizeof(option); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_REUSEADDR, &option, optlen); if (rc != 0) goto out; /* can't specify a local port without a local IP */ LASSERT (local_ip == 0 || local_port != 0); if (local_ip != 0 || local_port != 0) { bzero (&locaddr, sizeof (locaddr)); locaddr.sin_len = sizeof(struct sockaddr_in); locaddr.sin_family = AF_INET; locaddr.sin_port = htons (local_port); locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY; rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr); if (rc == -EADDRINUSE) { CDEBUG(D_NET, "Port %d already in use\n", local_port); *fatal = 0; goto out; } if (rc != 0) { CERROR("Error trying to bind to port %d: %d\n", local_port, rc); goto out; } } return 0; out: if (C2B_SOCK(sock) != NULL) sock_close(C2B_SOCK(sock)); FREE(sock, M_TEMP); return rc; }
static int setup_tcp(void) { struct sockaddr_in saddr = {}; mm_segment_t fs; int buffersize = PAGE_SIZE; int ret; ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &server); if (unlikely(ret < 0)) { DBG("error creating socket"); return ret; } saddr.sin_family = AF_INET; saddr.sin_port = htons(tcp_port); saddr.sin_addr.s_addr = INADDR_ANY; fs = get_fs(); set_fs(KERNEL_DS); ret = sock_setsockopt(server, SOL_SOCKET, SO_SNDBUF, (void *)&buffersize, sizeof(buffersize)); set_fs(fs); if (unlikely(ret < 0)) { DBG("error setting buffsize"); goto out_err; } ret = server->ops->bind(server, (struct sockaddr *)&saddr, sizeof(saddr)); if (unlikely(ret < 0)) { DBG("error binding socket"); goto out_err; } ret = server->ops->listen(server, 1); if (unlikely(ret < 0)) { DBG("error listening on socket"); goto out_err; } ret = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &client); if (ret < 0) { DBG("error creating accept socket"); goto out_err; } out: return ret; out_err: server->ops->shutdown(server, 0); server->ops->release(server); goto out; }
static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { if (optname == SO_ATTACH_FILTER) return do_set_attach_filter(sock, level, optname, optval, optlen); if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) return do_set_sock_timeout(sock, level, optname, optval, optlen); return sock_setsockopt(sock, level, optname, optval, optlen); }
int send(struct socket *sock, struct sockaddr_in *addr, void * data, int len) { struct msghdr msg; struct iovec iov; mm_segment_t oldfs; int size = 0; u_int32_t space; if (sock->sk==NULL) return 0; int interface=1; if(sock_setsockopt(sock,SOL_SOCKET,SO_BROADCAST,(char *)&interface,sizeof(interface))<0) { printk(KERN_WARNING "No Broadcast"); } iov.iov_base = (char *) data; iov.iov_len = len; memset(&my_work->addr, 0, sizeof(struct sockaddr)); memset(&my_work->addr_send, 0, sizeof(struct sockaddr)); my_work->addr_send.sin_family = AF_INET; //my_work->addr_send.sin_addr.s_addr = htonl(INADDR_SEND); my_work->addr_send.sin_addr.s_addr = in_aton("255.255.255.255"); //sock->sk->sk_flags.SOCK_BROADCAST=1; my_work->addr_send.sin_port = htons(CONNECT_PORT); msg.msg_flags = 0; msg.msg_name = (void *) & my_work->addr_send; msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; if((space = sock_wspace(sock->sk))<len) { printk(KERN_INFO "ERROR"); return -ENOMEM; } sock_set_flag(my_work->sock_send,SOCK_BROADCAST); //sock->sk->broadcast=1; oldfs = get_fs(); set_fs(KERNEL_DS); size = sock_sendmsg(sock,&msg,len); if(size<0) printk(KERN_WARNING "ERROR SEND MSG:%d:",size); set_fs(oldfs); // printk(KERN_INFO MODULE_NAME":Message Sent from new program"); return size; }
static int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { struct sock *sk = (struct sock *) sock->data; if (level == SOL_SOCKET) return sock_setsockopt(sk,level,optname,optval,optlen); if (sk->prot->setsockopt==NULL) return(-EOPNOTSUPP); else return sk->prot->setsockopt(sk,level,optname,optval,optlen); }
static int do_set_attach_filter(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock_fprog __user *kfprog; kfprog = get_compat_bpf_fprog(optval); if (!kfprog) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, sizeof(struct sock_fprog)); }
int sim_sock_setsockopt (struct SimSocket *socket, int level, int optname, const void *optval, int optlen) { struct socket *sock = (struct socket *)socket; char *coptval = (char *)optval; int err; if (level == SOL_SOCKET) err = sock_setsockopt(sock, level, optname, coptval, optlen); else err = sock->ops->setsockopt(sock, level, optname, coptval, optlen); return err; }
static int _omni_sock_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { mm_segment_t oldfs = get_fs(); char __user *uoptval; int err; uoptval = (char __user __force *) optval; set_fs(KERNEL_DS); if (level == SOL_SOCKET) err = sock_setsockopt(sock, level, optname, uoptval, optlen); else err = sock->ops->setsockopt(sock, level, optname, uoptval, optlen); set_fs(oldfs); return err; }
static int do_set_attach_filter(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); compat_uptr_t ptr; u16 len; if (!access_ok(VERIFY_READ, fprog32, sizeof(*fprog32)) || !access_ok(VERIFY_WRITE, kfprog, sizeof(struct sock_fprog)) || __get_user(len, &fprog32->len) || __get_user(ptr, &fprog32->filter) || __put_user(len, &kfprog->len) || __put_user(compat_ptr(ptr), &kfprog->filter)) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, sizeof(struct sock_fprog)); }
static int do_set_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct compat_timeval __user *up = (struct compat_timeval __user *)optval; struct timeval ktime; mm_segment_t old_fs; int err; if (optlen < sizeof(*up)) return -EINVAL; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || __get_user(ktime.tv_sec, &up->tv_sec) || __get_user(ktime.tv_usec, &up->tv_usec)) return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); set_fs(old_fs); return err; }
int setup_tcp() { struct sockaddr_in saddr; int r; mm_segment_t fs; int buffsize = PAGE_SIZE; #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,5) r = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &control); #else r = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &control); #endif if (r < 0) { DBG("Error creating control socket"); return r; } memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = htons(port); if (localhostonly) { saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); } else { saddr.sin_addr.s_addr = htonl(INADDR_ANY); } fs = get_fs(); set_fs(KERNEL_DS); sock_setsockopt(control, SOL_SOCKET, SO_SNDBUF, (void *) &buffsize, sizeof (int)); set_fs(fs); if (r < 0) { DBG("Error setting buffsize %d", r); return r; } r = control->ops->bind(control,(struct sockaddr*) &saddr,sizeof(saddr)); if (r < 0) { DBG("Error binding control socket"); return r; } r = control->ops->listen(control,1); if (r) { DBG("Error listening on socket"); return r; } #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,5) r = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &accept); #else r = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &accept); #endif if (r < 0) { DBG("Error creating accept socket"); return r; } r = accept->ops->accept(control,accept,0); if (r < 0) { DBG("Error accepting socket"); return r; } return 0; }
int libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t rcvlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0, }; cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket recv timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } then = cfs_time_current(); rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (rcvlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + rcvlen; nob -= rcvlen; } return 0; } int libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t sndlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, }; if (timeout != 0) { cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket send timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } } then = cfs_time_current(); rc = -sock_send(C2B_SOCK(sock), &msg, ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (sndlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + sndlen; nob -= sndlen; } return 0; } int libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) { struct sockaddr_in sin; int rc; if (remote != 0) /* Get remote address */ rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); else /* Get local address */ rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); if (rc != 0) { CERROR ("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } if (ip != NULL) *ip = ntohl (sin.sin_addr.s_addr); if (port != NULL) *port = ntohs (sin.sin_port); return 0; } int libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) { int option; int rc; if (txbufsize != 0) { option = txbufsize; rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set send buffer %d: %d\n", option, rc); return (rc); } } if (rxbufsize != 0) { option = rxbufsize; rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set receive buffer %d: %d\n", option, rc); return (rc); } } return 0; }
int xi_lpx_connect(xi_socket_t *oso, struct sockaddr_lpx daddr, struct sockaddr_lpx saddr, int timeoutsec) { xi_socket_t so = NULL; errno_t error; struct timeval timeout; #ifndef __KPI_SOCKET__ struct sockopt sopt; boolean_t funnel_state; #endif *oso = NULL; error = xi_sock_socket(AF_LPX, SOCK_STREAM, 0, NULL, NULL, &so); if(error) { DebugPrint(1, false, "socreate error %d\n", error); goto bad; } error = xi_sock_bind(so, (struct sockaddr *) &saddr); if(error) { DebugPrint(1, false, "xi_lpx_connect: sobind error\n"); goto bad; } #if 0 DebugPrint(4, false, "xi_lpx_connect to "); for(i=0; i<6; i++) DebugPrint(4, false, "02x ", daddr.slpx_node[i]); #endif error = xi_sock_connect(so, (struct sockaddr *)&daddr, 0); if(error) { DebugPrint(4, false, "soconnect error %d\n", error); goto bad; } #ifndef __KPI_SOCKET__ do { // struct sockaddr_lpx sin; int s; funnel_state = thread_funnel_set(network_flock, TRUE); s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { // IOLog("before sleep\n"); (void) tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "xiscsicon", 0); // IOLog("after sleep\n"); break; } // IOLog("so->so_error = %d\n", so->so_error); if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); (void) thread_funnel_set(network_flock, FALSE); } while(0); #endif // __KPI_SOCKET__ *oso = so; // Set Read Timeout. timeout.tv_sec = timeoutsec; timeout.tv_usec = 0; #ifdef __KPI_SOCKET__ error = sock_setsockopt( so, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(struct timeval) ); #else sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVTIMEO; sopt.sopt_val = &timeout; sopt.sopt_valsize = sizeof(struct timeval); sopt.sopt_p = current_proc(); funnel_state = thread_funnel_set(network_flock, TRUE); error = sosetopt(so, &sopt); (void) thread_funnel_set(network_flock, FALSE); #endif if(error) { DebugPrint(1, false, "xi_lpx_connect: Can't set Receive Time out. error %d\n", error); goto bad; } return 0; bad: #ifdef __KPI_SOCKET__ #else (void) thread_funnel_set(network_flock, FALSE); #endif xi_lpx_disconnect(so); return error; }
/* * Do a remote procedure call (RPC) and wait for its reply. * If from_p is non-null, then we are doing broadcast, and * the address from whence the response came is saved there. */ int krpc_call( struct sockaddr_in *sa, u_int sotype, u_int prog, u_int vers, u_int func, mbuf_t *data, /* input/output */ struct sockaddr_in *from_p) /* output */ { socket_t so; struct sockaddr_in *sin; mbuf_t m, nam, mhead; struct rpc_call *call; struct rpc_reply *reply; int error, timo, secs; size_t len; static u_int32_t xid = ~0xFF; u_int16_t tport; size_t maxpacket = 1<<16; /* * Validate address family. * Sorry, this is INET specific... */ if (sa->sin_family != AF_INET) return (EAFNOSUPPORT); /* Free at end if not null. */ nam = mhead = NULL; /* * Create socket and set its recieve timeout. */ if ((error = sock_socket(AF_INET, sotype, 0, 0, 0, &so))) goto out1; { struct timeval tv; tv.tv_sec = 1; tv.tv_usec = 0; if ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) goto out; } /* * Enable broadcast if necessary. */ if (from_p && (sotype == SOCK_DGRAM)) { int on = 1; if ((error = sock_setsockopt(so, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on)))) goto out; } /* * Bind the local endpoint to a reserved port, * because some NFS servers refuse requests from * non-reserved (non-privileged) ports. */ if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m))) goto out; sin = mbuf_data(m); bzero(sin, sizeof(*sin)); mbuf_setlen(m, sizeof(*sin)); sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED; do { tport--; sin->sin_port = htons(tport); error = sock_bind(so, (struct sockaddr*)sin); } while (error == EADDRINUSE && tport > IPPORT_RESERVED / 2); mbuf_freem(m); m = NULL; if (error) { printf("bind failed\n"); goto out; } /* * Setup socket address for the server. */ if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &nam))) goto out; sin = mbuf_data(nam); mbuf_setlen(nam, sa->sin_len); bcopy((caddr_t)sa, (caddr_t)sin, sa->sin_len); if (sotype == SOCK_STREAM) { struct timeval tv; tv.tv_sec = 60; tv.tv_usec = 0; error = sock_connect(so, mbuf_data(nam), MSG_DONTWAIT); if (error && (error != EINPROGRESS)) goto out; error = sock_connectwait(so, &tv); if (error) { if (error == EINPROGRESS) error = ETIMEDOUT; printf("krpc_call: error waiting for TCP socket connect: %d\n", error); goto out; } } /* * Prepend RPC message header. */ m = *data; *data = NULL; #if DIAGNOSTIC if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: send data w/o pkthdr"); if (mbuf_pkthdr_len(m) < mbuf_len(m)) panic("krpc_call: pkthdr.len not set"); #endif len = sizeof(*call); if (sotype == SOCK_STREAM) len += 4; /* account for RPC record marker */ mhead = m; if ((error = mbuf_prepend(&mhead, len, MBUF_WAITOK))) goto out; if ((error = mbuf_pkthdr_setrcvif(mhead, NULL))) goto out; /* * Fill in the RPC header */ if (sotype == SOCK_STREAM) { /* first, fill in RPC record marker */ u_int32_t *recmark = mbuf_data(mhead); *recmark = htonl(0x80000000 | (mbuf_pkthdr_len(mhead) - 4)); call = (struct rpc_call *)(recmark + 1); } else { call = mbuf_data(mhead); } bzero((caddr_t)call, sizeof(*call)); xid++; call->rp_xid = htonl(xid); /* call->rp_direction = 0; */ call->rp_rpcvers = htonl(2); call->rp_prog = htonl(prog); call->rp_vers = htonl(vers); call->rp_proc = htonl(func); /* call->rp_auth = 0; */ /* call->rp_verf = 0; */ /* * Send it, repeatedly, until a reply is received, * but delay each re-send by an increasing amount. * If the delay hits the maximum, start complaining. */ timo = 0; for (;;) { struct msghdr msg; /* Send RPC request (or re-send). */ if ((error = mbuf_copym(mhead, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) goto out; bzero(&msg, sizeof(msg)); if (sotype == SOCK_STREAM) { msg.msg_name = NULL; msg.msg_namelen = 0; } else { msg.msg_name = mbuf_data(nam); msg.msg_namelen = mbuf_len(nam); } error = sock_sendmbuf(so, &msg, m, 0, 0); if (error) { printf("krpc_call: sosend: %d\n", error); goto out; } m = NULL; /* Determine new timeout. */ if (timo < MAX_RESEND_DELAY) timo++; else printf("RPC timeout for server " IP_FORMAT "\n", IP_LIST(&(sin->sin_addr.s_addr))); /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ secs = timo; while (secs > 0) { size_t readlen; if (m) { mbuf_freem(m); m = NULL; } if (sotype == SOCK_STREAM) { int maxretries = 60; struct iovec aio; aio.iov_base = &len; aio.iov_len = sizeof(u_int32_t); bzero(&msg, sizeof(msg)); msg.msg_iov = &aio; msg.msg_iovlen = 1; do { error = sock_receive(so, &msg, MSG_WAITALL, &readlen); if ((error == EWOULDBLOCK) && (--maxretries <= 0)) error = ETIMEDOUT; } while (error == EWOULDBLOCK); if (!error && readlen < aio.iov_len) { /* only log a message if we got a partial word */ if (readlen != 0) printf("short receive (%ld/%ld) from server " IP_FORMAT "\n", readlen, sizeof(u_int32_t), IP_LIST(&(sin->sin_addr.s_addr))); error = EPIPE; } if (error) goto out; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > maxpacket) { printf("impossible packet length (%ld) from server " IP_FORMAT "\n", len, IP_LIST(&(sin->sin_addr.s_addr))); error = EFBIG; goto out; } do { readlen = len; error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &readlen); } while (error == EWOULDBLOCK); if (!error && (len > readlen)) { printf("short receive (%ld/%ld) from server " IP_FORMAT "\n", readlen, len, IP_LIST(&(sin->sin_addr.s_addr))); error = EPIPE; } } else { len = maxpacket; readlen = len; bzero(&msg, sizeof(msg)); msg.msg_name = from_p; msg.msg_namelen = (from_p == NULL) ? 0 : sizeof(*from_p); error = sock_receivembuf(so, &msg, &m, 0, &readlen); } if (error == EWOULDBLOCK) { secs--; continue; } if (error) goto out; len = readlen; /* Does the reply contain at least a header? */ if (len < MIN_REPLY_HDR) continue; if (mbuf_len(m) < MIN_REPLY_HDR) continue; reply = mbuf_data(m); /* Is it the right reply? */ if (reply->rp_direction != htonl(RPC_REPLY)) continue; if (reply->rp_xid != htonl(xid)) continue; /* Was RPC accepted? (authorization OK) */ if (reply->rp_astatus != 0) { error = ntohl(reply->rp_u.rpu_errno); printf("rpc denied, error=%d\n", error); /* convert rpc error to errno */ switch (error) { case RPC_MISMATCH: error = ERPCMISMATCH; break; case RPC_AUTHERR: error = EAUTH; break; } goto out; } if (mbuf_len(m) < REPLY_SIZE) { error = RPC_SYSTEM_ERR; } else { error = ntohl(reply->rp_u.rpu_ok.rp_rstatus); } /* Did the call succeed? */ if (error != 0) { printf("rpc status=%d\n", error); /* convert rpc error to errno */ switch (error) { case RPC_PROGUNAVAIL: error = EPROGUNAVAIL; break; case RPC_PROGMISMATCH: error = EPROGMISMATCH; break; case RPC_PROCUNAVAIL: error = EPROCUNAVAIL; break; case RPC_GARBAGE: error = EINVAL; break; case RPC_SYSTEM_ERR: error = EIO; break; } goto out; } goto gotreply; /* break two levels */ } /* while secs */ } /* forever send/receive */ error = ETIMEDOUT; goto out; gotreply: /* * Pull as much as we can into first mbuf, to make * result buffer contiguous. Note that if the entire * result won't fit into one mbuf, you're out of luck. * XXX - Should not rely on making the entire reply * contiguous (fix callers instead). -gwr */ #if DIAGNOSTIC if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: received pkt w/o header?"); #endif len = mbuf_pkthdr_len(m); if (sotype == SOCK_STREAM) len -= 4; /* the RPC record marker was read separately */ if (mbuf_len(m) < len) { if ((error = mbuf_pullup(&m, len))) goto out; reply = mbuf_data(m); } /* * Strip RPC header */ len = sizeof(*reply); if (reply->rp_u.rpu_ok.rp_auth.rp_atype != 0) { len += ntohl(reply->rp_u.rpu_ok.rp_auth.rp_alen); len = (len + 3) & ~3; /* XXX? */ } mbuf_adj(m, len); /* result */ *data = m; out: sock_close(so); out1: if (nam) mbuf_freem(nam); if (mhead) mbuf_freem(mhead); return error; }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); struct pfq_rx_opt * ro; struct pfq_tx_opt * to; bool found = true; if (so == NULL) return -EINVAL; ro = &so->rx_opt; to = &so->tx_opt; switch(optname) { case Q_SO_TOGGLE_QUEUE: { int active; if (optlen != sizeof(active)) return -EINVAL; if (copy_from_user(&active, optval, optlen)) return -EFAULT; if (active) { if (!so->mem_addr) { struct pfq_queue_hdr * queue; /* alloc queue memory */ if (pfq_shared_queue_alloc(so, pfq_queue_total_mem(so)) < 0) { return -ENOMEM; } /* so->mem_addr and so->mem_size are correctly configured */ /* initialize queues headers */ queue = (struct pfq_queue_hdr *)so->mem_addr; /* initialize rx queue header */ queue->rx.data = (1L << 24); queue->rx.poll_wait = 0; queue->rx.size = so->rx_opt.size; queue->rx.slot_size = so->rx_opt.slot_size; queue->tx.producer.index = 0; queue->tx.producer.cache = 0; queue->tx.consumer.index = 0; queue->tx.consumer.cache = 0; queue->tx.size_mask = so->tx_opt.size - 1; queue->tx.max_len = so->tx_opt.maxlen; queue->tx.size = so->tx_opt.size; queue->tx.slot_size = so->tx_opt.slot_size; /* update the queues base_addr */ so->rx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr); so->tx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr) + pfq_queue_mpdb_mem(so); /* commit both the queues */ smp_wmb(); so->rx_opt.queue_ptr = &queue->rx; so->tx_opt.queue_ptr = &queue->tx; pr_devel("[PFQ|%d] queue: rx_size:%d rx_slot_size:%d tx_size:%d tx_slot_size:%d\n", so->id, queue->rx.size, queue->rx.slot_size, queue->tx.size, queue->tx.slot_size); } } else { if (so->tx_opt.thread) { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); kthread_stop(so->tx_opt.thread); so->tx_opt.thread = NULL; } msleep(Q_GRACE_PERIOD); pfq_shared_queue_free(so); } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "add binding"); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "remove binding"); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: if_index:%d hw_queue:%d\n", so->id, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; /* update the timestamp_enabled counter */ atomic_add(tstamp - so->rx_opt.tstamp, ×tamp_enabled); so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp_enabled counter: %d\n", so->id, atomic_read(×tamp_enabled)); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { pr_devel("[PFQ|%d] invalid caplen:%zu (max: %d)\n", so->id, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen:%zu -> slot_size:%zu\n", so->id, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)rx_queue_slots) { pr_devel("[PFQ|%d] invalid rx slots:%zu (max: %d)\n", so->id, slots, rx_queue_slots); return -EPERM; } so->rx_opt.size = slots; pr_devel("[PFQ|%d] rx_queue_slots:%zu\n", so->id, so->rx_opt.size); } break; case Q_SO_SET_TX_MAXLEN: { typeof (so->tx_opt.maxlen) maxlen; if (optlen != sizeof(maxlen)) return -EINVAL; if (copy_from_user(&maxlen, optval, optlen)) return -EFAULT; if (maxlen > (size_t)max_len) { pr_devel("[PFQ|%d] invalid maxlen:%zu (max: %d)\n", so->id, maxlen, max_len); return -EPERM; } so->tx_opt.maxlen = maxlen; so->tx_opt.slot_size = SPSC_QUEUE_SLOT_SIZE(so->tx_opt.maxlen); /* max_len: max length */ pr_devel("[PFQ|%d] tx_slot_size:%zu\n", so->id, so->rx_opt.slot_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots & (slots-1)) { pr_devel("[PFQ|%d] tx slots must be a power of two.\n", so->id); return -EINVAL; } if (slots > (size_t)tx_queue_slots) { pr_devel("[PFQ|%d] invalid tx slots:%zu (max: %d)\n", so->id, slots, tx_queue_slots); return -EPERM; } so->tx_opt.size = slots; pr_devel("[PFQ|%d] tx_queue_slots:%zu\n", so->id, so->tx_opt.size); } break; case Q_SO_GROUP_LEAVE: { int gid; if (optlen != sizeof(gid)) return -EINVAL; if (copy_from_user(&gid, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) { return -EFAULT; } pr_devel("[PFQ|%d] leave: gid:%d\n", so->id, gid); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, fprog.gid, "group fprog"); if (fprog.fcode.len > 0) /* set the filter */ { struct sk_filter *filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { pr_devel("[PFQ|%d] fprog error: alloc_sk_filter for gid:%d\n", so->id, fprog.gid); return -EINVAL; } __pfq_set_group_filter(fprog.gid, filter); pr_devel("[PFQ|%d] fprog: gid:%d (fprog len %d bytes)\n", so->id, fprog.gid, fprog.fcode.len); } else /* reset the filter */ { __pfq_set_group_filter(fprog.gid, NULL); pr_devel("[PFQ|%d] fprog: gid:%d (resetting filter)\n", so->id, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, vlan.gid, "group vlan filt toggle"); __pfq_toggle_group_vlan_filters(vlan.gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid:%d\n", so->id, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, filt.gid, "group vlan filt"); if (filt.vid < -1 || filt.vid > 4094) { pr_devel("[PFQ|%d] vlan_set error: gid:%d invalid vid:%d!\n", so->id, filt.gid, filt.vid); return -EINVAL; } if (!__pfq_vlan_filters_enabled(filt.gid)) { pr_devel("[PFQ|%d] vlan_set error: vlan filters disabled for gid:%d!\n", so->id, filt.gid); return -EPERM; } if (filt.vid == -1) /* any */ { int i; for(i = 1; i < 4095; i++) __pfq_set_group_vlan_filter(filt.gid, filt.toggle, i); } else { __pfq_set_group_vlan_filter(filt.gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan_set filter vid %d for gid:%d\n", so->id, filt.vid, filt.gid); } break; case Q_SO_TX_THREAD_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } to->if_index = info.if_index; to->hw_queue = info.hw_queue; pr_devel("[PFQ|%d] TX bind: if_index:%d hw_queue:%d\n", so->id, to->if_index, to->hw_queue); } break; case Q_SO_TX_THREAD_START: { int cpu; if (to->thread) { pr_devel("[PFQ|%d] TX thread already created on cpu %d!\n", so->id, to->cpu); return -EPERM; } if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } if (optlen != sizeof(cpu)) return -EINVAL; if (copy_from_user(&cpu, optval, optlen)) return -EFAULT; if (cpu < -1 || (cpu > -1 && !cpu_online(cpu))) { pr_devel("[PFQ|%d] invalid cpu (%d)!\n", so->id, cpu); return -EPERM; } to->cpu = cpu; pr_devel("[PFQ|%d] creating TX thread on cpu %d -> if_index:%d hw_queue:%d\n", so->id, to->cpu, to->if_index, to->hw_queue); to->thread = kthread_create_on_node(pfq_tx_thread, so, to->cpu == -1 ? -1 : cpu_to_node(to->cpu), "pfq_tx_%d", so->id); if (IS_ERR(to->thread)) { printk(KERN_INFO "[PFQ] kernel_thread() create failed on cpu %d!\n", to->cpu); return PTR_ERR(to->thread); } if (to->cpu != -1) kthread_bind(to->thread, to->cpu); } break; case Q_SO_TX_THREAD_STOP: { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } kthread_stop(to->thread); to->thread = NULL; pr_devel("[PFQ|%d] stop TX thread: done.\n", so->id); } break; case Q_SO_TX_THREAD_WAKEUP: { if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } wake_up_process(to->thread); } break; case Q_SO_TX_QUEUE_FLUSH: { struct net_device *dev; if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->thread && to->thread->state == TASK_RUNNING) { pr_devel("[PFQ|%d] TX thread is running!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } dev = dev_get_by_index(sock_net(&so->sk), to->if_index); if (!dev) { pr_devel("[PFQ|%d] No such device (if_index = %d)\n", so->id, to->if_index); return -EPERM; } pfq_tx_queue_flush(to, dev, get_cpu(), NUMA_NO_NODE); put_cpu(); dev_put(dev); } break; case Q_SO_GROUP_FUNCTION: { struct pfq_group_computation tmp; struct pfq_computation_descr *descr; size_t psize, ucsize; struct pfq_computation_tree *comp; void *context; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, tmp.gid, "group computation"); if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { pr_devel("[PFQ|%d] computation: out of memory!\n", so->id); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { pr_devel("[PFQ|%d] computation: copy_from_user error!\n", so->id); kfree(descr); return -EFAULT; } /* print user computation */ pr_devel_computation_descr(descr); /* ensure the correctness of the specified functional computation */ if (pfq_validate_computation_descr(descr) < 0) { pr_devel("[PFQ|%d] invalid expression!\n", so->id); return -EFAULT; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { pr_devel("[PFQ|%d] context: alloc error!\n", so->id); kfree(descr); return -EFAULT; } /* allocate struct pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { pr_devel("[PFQ|%d] computation: alloc error!\n", so->id); kfree(context); kfree(descr); return -EFAULT; } /* link the functional computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { pr_devel("[PFQ|%d] computation aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* exec init functions */ if (pfq_computation_init(comp) < 0) { pr_devel("[PFQ|%d] computation initialization aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* set the new program */ if (pfq_set_group_prog(tmp.gid, comp, context) < 0) { pr_devel("[PFQ|%d] set group program error!\n", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } kfree(descr); return 0; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); bool found = true; if (so == NULL) return -EINVAL; switch(optname) { case Q_SO_ENABLE: { unsigned long addr; int err = 0; if (optlen != sizeof(addr)) return -EINVAL; if (copy_from_user(&addr, optval, optlen)) return -EFAULT; err = pfq_shared_queue_enable(so, addr); if (err < 0) { printk(KERN_INFO "[PFQ|%d] enable error!\n", so->id.value); return err; } return 0; } break; case Q_SO_DISABLE: { int err = 0; size_t n; for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } err = pfq_shared_queue_disable(so); if (err < 0) { printk(KERN_INFO "[PFQ|%d] disable error!\n", so->id.value); return err; } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] add bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] bind: invalid if_index=%d!\n", so->id.value, bind.if_index); return -EACCES; } rcu_read_unlock(); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] remove bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] unbind: invalid if_index=%d\n", so->id.value, bind.if_index); return -EPERM; } rcu_read_unlock(); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] egress bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } so->egress_type = pfq_endpoint_device; so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: device if_index=%d hw_queue=%d\n", so->id.value, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_type = pfq_endpoint_socket; so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id.value); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp enabled.\n", so->id.value); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { printk(KERN_INFO "[PFQ|%d] invalid caplen=%zu (max %d)\n", so->id.value, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = Q_MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen=%zu, slot_size=%zu\n", so->id.value, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Rx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->rx_opt.queue_size = slots; pr_devel("[PFQ|%d] rx_queue slots=%zu\n", so->id.value, so->rx_opt.queue_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Tx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->tx_opt.queue_size = slots; pr_devel("[PFQ|%d] tx_queue slots=%zu\n", so->id.value, so->tx_opt.queue_size); } break; case Q_SO_GROUP_LEAVE: { pfq_gid_t gid; if (optlen != sizeof(gid.value)) return -EINVAL; if (copy_from_user(&gid.value, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) return -EFAULT; pr_devel("[PFQ|%d] leave: gid=%d\n", so->id.value, gid.value); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; pfq_gid_t gid; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; gid.value = fprog.gid; if (!pfq_has_joined_group(gid, so->id)) { /* don't set the first and return */ return 0; } if (fprog.fcode.len > 0) { /* set the filter */ struct sk_filter *filter; if (fprog.fcode.len == 1) { /* check for dummey BPF_CLASS == BPF_RET */ if (BPF_CLASS(fprog.fcode.filter[0].code) == BPF_RET) { pr_devel("[PFQ|%d] fprog: BPF_RET optimized out!\n", so->id.value); return 0; } } filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { printk(KERN_INFO "[PFQ|%d] fprog error: alloc_sk_filter for gid=%d\n", so->id.value, fprog.gid); return -EINVAL; } pfq_set_group_filter(gid, filter); pr_devel("[PFQ|%d] fprog: gid=%d (fprog len %d bytes)\n", so->id.value, fprog.gid, fprog.fcode.len); } else { /* reset the filter */ pfq_set_group_filter(gid, NULL); pr_devel("[PFQ|%d] fprog: gid=%d (resetting filter)\n", so->id.value, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; pfq_gid_t gid; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; gid.value = vlan.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter toggle: gid=%d not joined!\n", so->id.value, vlan.gid); return -EACCES; } pfq_toggle_group_vlan_filters(gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid=%d\n", so->id.value, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; pfq_gid_t gid; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; gid.value = filt.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter: gid=%d not joined!\n", so->id.value, filt.gid); return -EACCES; } if (filt.vid < -1 || filt.vid > 4094) { printk(KERN_INFO "[PFQ|%d] vlan error: invalid vid=%d for gid=%d!\n", so->id.value, filt.vid, filt.gid); return -EINVAL; } if (!pfq_vlan_filters_enabled(gid)) { printk(KERN_INFO "[PFQ|%d] vlan error: vlan filters disabled for gid=%d!\n", so->id.value, filt.gid); return -EPERM; } if (filt.vid == -1) { /* any */ int i; for(i = 1; i < 4095; i++) { pfq_set_group_vlan_filter(gid, filt.toggle, i); } } else { pfq_set_group_vlan_filter(gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan filter vid %d set for gid=%d\n", so->id.value, filt.vid, filt.gid); } break; case Q_SO_TX_BIND: { struct pfq_binding info; size_t i; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; if (so->tx_opt.num_queues >= Q_MAX_TX_QUEUES) { printk(KERN_INFO "[PFQ|%d] Tx bind: max number of queues exceeded!\n", so->id.value); return -EPERM; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] Tx bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] Tx bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } i = so->tx_opt.num_queues; if (info.cpu < -1) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] thread: invalid cpu (%d)!\n", so->id.value, i, info.cpu); return -EPERM; } so->tx_opt.queue[i].if_index = info.if_index; so->tx_opt.queue[i].hw_queue = info.hw_queue; so->tx_opt.queue[i].cpu = info.cpu; so->tx_opt.num_queues++; pr_devel("[PFQ|%d] Tx[%zu] bind: if_index=%d hw_queue=%d cpu=%d\n", so->id.value, i, so->tx_opt.queue[i].if_index, so->tx_opt.queue[i].hw_queue, info.cpu); } break; case Q_SO_TX_UNBIND: { size_t n; for(n = 0; n < Q_MAX_TX_QUEUES; ++n) { so->tx_opt.queue[n].if_index = -1; so->tx_opt.queue[n].hw_queue = -1; so->tx_opt.queue[n].cpu = -1; } } break; case Q_SO_TX_FLUSH: { int queue, err = 0; size_t n; if (optlen != sizeof(queue)) return -EINVAL; if (copy_from_user(&queue, optval, optlen)) return -EFAULT; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } if (queue < -1 || (queue > 0 && queue >= so->tx_opt.num_queues)) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: bad queue %d (num_queue=%zu)!\n", so->id.value, queue, so->tx_opt.num_queues); return -EPERM; } if (queue != -1) { pr_devel("[PFQ|%d] flushing Tx queue %d...\n", so->id.value, queue); return pfq_queue_flush(so, queue); } for(n = 0; n < so->tx_opt.num_queues; n++) { if (pfq_queue_flush(so, n) != 0) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] queue flush: flush error (if_index=%d)!\n", so->id.value, n, so->tx_opt.queue[n].if_index); err = -EPERM; } } if (err) return err; } break; case Q_SO_TX_ASYNC: { int toggle, err = 0; size_t n; if (optlen != sizeof(toggle)) return -EINVAL; if (copy_from_user(&toggle, optval, optlen)) return -EFAULT; if (toggle) { size_t started = 0; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } /* start Tx kernel threads */ for(n = 0; n < Q_MAX_TX_QUEUES; n++) { struct pfq_thread_data *data; int node; if (so->tx_opt.queue[n].if_index == -1) break; if (so->tx_opt.queue[n].cpu == Q_NO_KTHREAD) continue; if (so->tx_opt.queue[n].task) { printk(KERN_INFO "[PFQ|%d] kernel_thread: Tx[%zu] thread already running!\n", so->id.value, n); continue; } data = kmalloc(sizeof(struct pfq_thread_data), GFP_KERNEL); if (!data) { printk(KERN_INFO "[PFQ|%d] kernel_thread: could not allocate thread_data! Failed starting thread on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = -EPERM; continue; } data->so = so; data->id = n; node = cpu_online(so->tx_opt.queue[n].cpu) ? cpu_to_node(so->tx_opt.queue[n].cpu) : NUMA_NO_NODE; pr_devel("[PFQ|%d] creating Tx[%zu] thread on cpu %d: if_index=%d hw_queue=%d\n", so->id.value, n, so->tx_opt.queue[n].cpu, so->tx_opt.queue[n].if_index, so->tx_opt.queue[n].hw_queue); so->tx_opt.queue[n].task = kthread_create_on_node(pfq_tx_thread, data, node, "pfq_tx_%d#%zu", so->id.value, n); if (IS_ERR(so->tx_opt.queue[n].task)) { printk(KERN_INFO "[PFQ|%d] kernel_thread: create failed on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = PTR_ERR(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; kfree (data); continue; } /* bind the thread */ kthread_bind(so->tx_opt.queue[n].task, so->tx_opt.queue[n].cpu); /* start it */ wake_up_process(so->tx_opt.queue[n].task); started++; } if (started == 0) { printk(KERN_INFO "[PFQ|%d] no kernel thread started!\n", so->id.value); err = -EPERM; } } else { /* stop running threads */ for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] kernel thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } } return err; } break; case Q_SO_GROUP_FUNCTION: { struct pfq_computation_descr *descr = NULL; struct pfq_computation_tree *comp = NULL; struct pfq_group_computation tmp; size_t psize, ucsize; void *context = NULL; pfq_gid_t gid; int err = 0; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; gid.value = tmp.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] group computation: gid=%d not joined!\n", so->id.value, tmp.gid); return -EACCES; } if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id.value, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { printk(KERN_INFO "[PFQ|%d] computation: out of memory!\n", so->id.value); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { printk(KERN_INFO "[PFQ|%d] computation: copy_from_user error!\n", so->id.value); err = -EFAULT; goto error; } /* print user computation */ pr_devel_computation_descr(descr); /* check the correctness of computation */ if (pfq_check_computation_descr(descr) < 0) { printk(KERN_INFO "[PFQ|%d] invalid expression!\n", so->id.value); err = -EFAULT; goto error; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { printk(KERN_INFO "[PFQ|%d] context: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* allocate a pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { printk(KERN_INFO "[PFQ|%d] computation: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* link functions of computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] computation aborted!", so->id.value); err = -EPERM; goto error; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* run init functions */ if (pfq_computation_init(comp) < 0) { printk(KERN_INFO "[PFQ|%d] initialization of computation aborted!", so->id.value); pfq_computation_fini(comp); err = -EPERM; goto error; } /* enable functional program */ if (pfq_set_group_prog(gid, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] set group program error!\n", so->id.value); err = -EPERM; goto error; } kfree(descr); return 0; error: kfree(comp); kfree(context); kfree(descr); return err; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
/* * System call vectors. Since I (RIB) want to rewrite sockets as streams, * we have this level of indirection. Not a lot of overhead, since more of * the work is done via read/write/select directly. */ asmlinkage int sys_socketcall(int call, unsigned long *args) { int er; switch(call) { case SYS_SOCKET: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_socket(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2))); case SYS_BIND: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_bind(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_CONNECT: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_connect(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_LISTEN: er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); if(er) return er; return(sock_listen(get_fs_long(args+0), get_fs_long(args+1))); case SYS_ACCEPT: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_accept(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETSOCKNAME: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getsockname(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETPEERNAME: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getpeername(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_SOCKETPAIR: er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); if(er) return er; return(sock_socketpair(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (unsigned long *)get_fs_long(args+3))); case SYS_SEND: er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_send(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_SENDTO: er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_sendto(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), get_fs_long(args+5))); case SYS_RECV: er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_recv(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_RECVFROM: er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_recvfrom(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), (int *)get_fs_long(args+5))); case SYS_SHUTDOWN: er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); if(er) return er; return(sock_shutdown(get_fs_long(args+0), get_fs_long(args+1))); case SYS_SETSOCKOPT: er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_setsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), get_fs_long(args+4))); case SYS_GETSOCKOPT: er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_getsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), (int *)get_fs_long(args+4))); default: return(-EINVAL); } }
int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); size_t sndlen; int nob; int rc; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i] = tx->tx_iov[i]; nob += scratchiov[i].iov_len; } /* * XXX Liang: * Linux has MSG_MORE, do we have anything to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); lnet_kiov_t *kiov = tx->tx_kiov; int rc; int nob; size_t sndlen; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } /* * XXX Liang: * Linux has MSG_MORE, do wen have anyting to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_recv_iov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; size_t rcvlen; int nob; int i; int rc; LASSERT (niov > 0); for (nob = i = 0; i < niov; i++) { scratchiov[i] = iov[i]; nob += scratchiov[i].iov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); if (rc == 0) rc = rcvlen; return rc; } int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; int nob; int i; size_t rcvlen; int rc; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \ kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = rcvlen; return (rc); } void ksocknal_lib_eager_ack (ksock_conn_t *conn) { /* XXX Liang: */ } int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { socket_t sock = C2B_SOCK(conn->ksnc_sock); int len; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return (-ESHUTDOWN); } rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem); if (rc == 0) { len = sizeof(*nagle); rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY, nagle, &len); } ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; return (rc); } int ksocknal_lib_setup_sock (cfs_socket_t *sock) { int rc; int option; int keep_idle; int keep_intvl; int keep_count; int do_keepalive; socket_t so = C2B_SOCK(sock); struct linger linger; /* Ensure this socket aborts active sends immediately when we close * it. */ linger.l_onoff = 0; linger.l_linger = 0; rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger)); if (rc != 0) { CERROR ("Can't set SO_LINGER: %d\n", rc); return (rc); } if (!*ksocknal_tunables.ksnd_nagle) { option = 1; rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't disable nagle: %d\n", rc); return (rc); } } rc = libcfs_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); if (rc != 0) { CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size, rc); return (rc); } /* snapshot tunables */ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; keep_count = *ksocknal_tunables.ksnd_keepalive_count; keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); return (rc); } if (!do_keepalive) return (rc); rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE, &keep_idle, sizeof(keep_idle)); return (rc); } void ksocknal_lib_push_conn(ksock_conn_t *conn) { socket_t sock; int val = 1; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; sock = C2B_SOCK(conn->ksnc_sock); rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); LASSERT(rc == 0); ksocknal_connsock_decref(conn); return; }
static inline void process_commands() { int ringset_idx; ipaugenblick_cmd_t *cmd; struct rte_mbuf *mbuf; struct socket *sock; char *p; struct sockaddr_in addr; struct sockaddr_in *p_sockaddr; struct rtentry rtentry; int len; cmd = ipaugenblick_dequeue_command_buf(); if(!cmd) return; switch(cmd->cmd) { case IPAUGENBLICK_OPEN_SOCKET_COMMAND: ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"open_sock %x %x %x %x\n",cmd->u.open_sock.family,cmd->u.open_sock.type); sock = app_glue_create_socket(cmd->u.open_sock.family,cmd->u.open_sock.type); if(sock) { ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"setting user data %p\n",sock); socket_satelite_data[cmd->ringset_idx].ringset_idx = cmd->ringset_idx; socket_satelite_data[cmd->ringset_idx].parent_idx = cmd->parent_idx; socket_satelite_data[cmd->ringset_idx].apppid = cmd->u.open_sock.pid; app_glue_set_user_data(sock,(void *)&socket_satelite_data[cmd->ringset_idx]); socket_satelite_data[cmd->ringset_idx].socket = sock; ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"%d setting tx_space %d\n",__LINE__,sk_stream_wspace(sock->sk)); user_set_socket_tx_space(&g_ipaugenblick_sockets[socket_satelite_data[cmd->ringset_idx].ringset_idx].tx_space,sk_stream_wspace(sock->sk)); } ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"Done\n"); break; case IPAUGENBLICK_SOCKET_CONNECT_BIND_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { if(cmd->u.socket_connect_bind.is_connect) { ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"connect %x\n",cmd->ringset_idx); if(app_glue_v4_connect(socket_satelite_data[cmd->ringset_idx].socket, cmd->u.socket_connect_bind.ipaddr, cmd->u.socket_connect_bind.port)) { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"failed to connect socket\n"); } else { ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"socket connected\n"); len = sizeof(addr); inet_getname(socket_satelite_data[cmd->ringset_idx].socket,&addr,&len,0); g_ipaugenblick_sockets[cmd->ringset_idx].local_ipaddr = addr.sin_addr.s_addr; g_ipaugenblick_sockets[cmd->ringset_idx].local_port = addr.sin_port; } } else { ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"bind %x\n",cmd->ringset_idx); if(app_glue_v4_bind(socket_satelite_data[cmd->ringset_idx].socket, cmd->u.socket_connect_bind.ipaddr, cmd->u.socket_connect_bind.port)) { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"cannot bind %x %x\n",cmd->u.socket_connect_bind.ipaddr,cmd->u.socket_connect_bind.port); } } } else { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"no socket to invoke command!!!\n"); } break; case IPAUGENBLICK_LISTEN_SOCKET_COMMAND: ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"listen %x\n",cmd->ringset_idx); if(app_glue_v4_listen(socket_satelite_data[cmd->ringset_idx].socket)) { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"failed listening\n"); } break; case IPAUGENBLICK_SOCKET_CLOSE_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { // ipaugenblick_log(IPAUGENBLICK_LOG_INFO,"closing socket %d %p\n",cmd->ringset_idx,socket_satelite_data[cmd->ringset_idx].socket); // printf("%s %d %p\n",__FILE__,__LINE__,socket_satelite_data[cmd->ringset_idx].socket); // user_on_transmission_opportunity(socket_satelite_data[cmd->ringset_idx].socket); user_flush_rx_tx((struct socket *)socket_satelite_data[cmd->ringset_idx].socket); app_glue_close_socket((struct socket *)socket_satelite_data[cmd->ringset_idx].socket); socket_satelite_data[cmd->ringset_idx].socket = NULL; socket_satelite_data[cmd->ringset_idx].ringset_idx = -1; socket_satelite_data[cmd->ringset_idx].parent_idx = -1; ipaugenblick_free_socket(cmd->ringset_idx); user_sockets_closed++; } break; case IPAUGENBLICK_SOCKET_TX_KICK_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { user_kick_tx++; // user_data_available_cbk(socket_satelite_data[cmd->ringset_idx].socket); user_on_transmission_opportunity(socket_satelite_data[cmd->ringset_idx].socket); } break; case IPAUGENBLICK_SOCKET_RX_KICK_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { user_kick_rx++; user_data_available_cbk(socket_satelite_data[cmd->ringset_idx].socket); // user_on_transmission_opportunity(socket_satelite_data[cmd->ringset_idx].socket); } break; case IPAUGENBLICK_SET_SOCKET_RING_COMMAND: //ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"%s %d %d %d %p\n",__FILE__,__LINE__,cmd->ringset_idx,cmd->parent_idx,cmd->u.set_socket_ring.socket_descr); socket_satelite_data[cmd->ringset_idx].ringset_idx = cmd->ringset_idx; if(cmd->parent_idx != -1) socket_satelite_data[cmd->ringset_idx].parent_idx = cmd->parent_idx; socket_satelite_data[cmd->ringset_idx].apppid = cmd->u.set_socket_ring.pid; app_glue_set_user_data(cmd->u.set_socket_ring.socket_descr,&socket_satelite_data[cmd->ringset_idx]); socket_satelite_data[cmd->ringset_idx].socket = cmd->u.set_socket_ring.socket_descr; //ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"setting tx space: %d connidx %d\n",sk_stream_wspace(socket_satelite_data[cmd->ringset_idx].socket->sk),g_ipaugenblick_sockets[socket_satelite_data[cmd->ringset_idx].ringset_idx].connection_idx); user_set_socket_tx_space(&g_ipaugenblick_sockets[socket_satelite_data[cmd->ringset_idx].ringset_idx].tx_space,sk_stream_wspace(socket_satelite_data[cmd->ringset_idx].socket->sk)); // user_on_transmission_opportunity(socket_satelite_data[cmd->ringset_idx].socket); user_data_available_cbk(socket_satelite_data[cmd->ringset_idx].socket); ipaugenblick_mark_writable(&socket_satelite_data[cmd->ringset_idx]); ipaugenblick_mark_readable(&socket_satelite_data[cmd->ringset_idx]); user_client_app_accepted++; break; case IPAUGENBLICK_SET_SOCKET_SELECT_COMMAND: // ipaugenblick_log(IPAUGENBLICK_LOG_DEBUG,"setting selector %d for socket %d\n",cmd->u.set_socket_select.socket_select,cmd->ringset_idx); socket_satelite_data[cmd->ringset_idx].parent_idx = cmd->u.set_socket_select.socket_select; socket_satelite_data[cmd->ringset_idx].apppid = cmd->u.set_socket_select.pid; user_data_available_cbk(socket_satelite_data[cmd->ringset_idx].socket); ipaugenblick_mark_writable(&socket_satelite_data[cmd->ringset_idx]); ipaugenblick_mark_readable(&socket_satelite_data[cmd->ringset_idx]); break; case IPAUGENBLICK_SOCKET_TX_POOL_EMPTY_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { if(!socket_satelite_data[cmd->ringset_idx].socket->buffers_available_notification_queue_present) { TAILQ_INSERT_TAIL(&buffers_available_notification_socket_list_head,socket_satelite_data[cmd->ringset_idx].socket,buffers_available_notification_queue_entry); socket_satelite_data[cmd->ringset_idx].socket->buffers_available_notification_queue_present = 1; if(socket_satelite_data[cmd->ringset_idx].socket->type == SOCK_DGRAM) user_set_socket_tx_space(&g_ipaugenblick_sockets[socket_satelite_data[cmd->ringset_idx].ringset_idx].tx_space,sk_stream_wspace(socket_satelite_data[cmd->ringset_idx].socket->sk)); } } break; case IPAUGENBLICK_ROUTE_ADD_COMMAND: memset((void *)&rtentry,0,sizeof(rtentry)); rtentry.rt_metric = cmd->u.route.metric; rtentry.rt_flags = RTF_UP|RTF_GATEWAY; p_sockaddr = (struct sockaddr_in *)&rtentry.rt_dst; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.dest_ipaddr; p_sockaddr = (struct sockaddr_in *)&rtentry.rt_gateway; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.next_hop; p_sockaddr = (struct sockaddr_in *)&rtentry.rt_genmask; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.dest_mask; if(ip_rt_ioctl(&init_net,SIOCADDRT,&rtentry)) { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"CANNOT ADD ROUTE ENTRY %x %x %x\n", ((struct sockaddr_in *)&rtentry.rt_dst)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_gateway)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_genmask)->sin_addr.s_addr); } else { ipaugenblick_log(IPAUGENBLICK_LOG_INFO,"ROUTE ENTRY %x %x %x is added\n", ((struct sockaddr_in *)&rtentry.rt_dst)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_gateway)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_genmask)->sin_addr.s_addr); } break; case IPAUGENBLICK_ROUTE_DEL_COMMAND: memset((void *)&rtentry,0,sizeof(rtentry)); p_sockaddr = (struct sockaddr_in *)&rtentry.rt_dst; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.dest_ipaddr; p_sockaddr = (struct sockaddr_in *)&rtentry.rt_gateway; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.next_hop; p_sockaddr = (struct sockaddr_in *)&rtentry.rt_genmask; p_sockaddr->sin_family = AF_INET; p_sockaddr->sin_addr.s_addr = cmd->u.route.dest_mask; if(ip_rt_ioctl(&init_net,SIOCDELRT,&rtentry)) { ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"CANNOT DELETE ROUTE ENTRY %x %x %x\n", ((struct sockaddr_in *)&rtentry.rt_dst)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_gateway)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_genmask)->sin_addr.s_addr); } else { ipaugenblick_log(IPAUGENBLICK_LOG_INFO,"ROUTE ENTRY %x %x %x is deleted\n", ((struct sockaddr_in *)&rtentry.rt_dst)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_gateway)->sin_addr.s_addr, ((struct sockaddr_in *)&rtentry.rt_genmask)->sin_addr.s_addr); } break; case IPAUGENBLICK_CONNECT_CLIENT: if(cmd->ringset_idx >= IPAUGENBLICK_CLIENTS_POOL_SIZE) { break; } if(!ipaugenblick_clients[cmd->ringset_idx].is_busy) { TAILQ_INSERT_TAIL(&ipaugenblick_clients_list_head,&ipaugenblick_clients[cmd->ringset_idx],queue_entry); ipaugenblick_clients[cmd->ringset_idx].is_busy = 1; on_client_connect(cmd->ringset_idx); } break; case IPAUGENBLICK_DISCONNECT_CLIENT: if(cmd->ringset_idx >= IPAUGENBLICK_CLIENTS_POOL_SIZE) { break; } if(ipaugenblick_clients[cmd->ringset_idx].is_busy) { TAILQ_REMOVE(&ipaugenblick_clients_list_head,&ipaugenblick_clients[cmd->ringset_idx],queue_entry); ipaugenblick_clients[cmd->ringset_idx].is_busy = 0; } break; case IPAUGENBLICK_SETSOCKOPT_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { sock_setsockopt(socket_satelite_data[cmd->ringset_idx].socket, cmd->u.setsockopt.level, cmd->u.setsockopt.optname, cmd->u.setsockopt.optval, cmd->u.setsockopt.optlen); } break; case IPAUGENBLICK_SOCKET_SHUTDOWN_COMMAND: if(socket_satelite_data[cmd->ringset_idx].socket) { inet_shutdown(socket_satelite_data[cmd->ringset_idx].socket, cmd->u.socket_shutdown.how); user_sockets_shutdown++; } break; case IPAUGENBLICK_SOCKET_DECLINE_COMMAND: user_flush_rx_tx((struct socket *)cmd->u.socket_decline.socket_descr); app_glue_close_socket((struct socket *)cmd->u.socket_decline.socket_descr); user_sockets_closed++; break; default: ipaugenblick_log(IPAUGENBLICK_LOG_ERR,"unknown cmd %d\n",cmd->cmd); break; } ipaugenblick_free_command_buf(cmd); }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); bool found = true; if (so == NULL) return -EINVAL; switch(optname) { case Q_SO_ENABLE: { unsigned long addr; int err = 0; if (optlen != sizeof(addr)) return -EINVAL; if (copy_from_user(&addr, optval, optlen)) return -EFAULT; err = pfq_shared_queue_enable(so, addr); if (err < 0) { printk(KERN_INFO "[PFQ|%d] enable error!\n", so->id); return err; } return 0; } break; case Q_SO_DISABLE: { int err = 0; pfq_sock_tx_unbind(so); msleep(Q_GRACE_PERIOD); err = pfq_shared_queue_disable(so); if (err < 0) { printk(KERN_INFO "[PFQ|%d] disable error!\n", so->id); return err; } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] add bind: gid=%d not joined!\n", so->id, bind.gid); return -EACCES; } if (!dev_get_by_index(sock_net(&so->sk), bind.ifindex)) { printk(KERN_INFO "[PFQ|%d] bind: invalid ifindex=%d!\n", so->id, bind.ifindex); return -EACCES; } pfq_devmap_update(map_set, bind.ifindex, bind.qindex, gid); pr_devel("[PFQ|%d] group id=%d bind: device ifindex=%d qindex=%d\n", so->id, bind.gid, bind.ifindex, bind.qindex); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] group id=%d unbind: gid=%d not joined!\n", so->id, gid, bind.gid); return -EACCES; } if (dev_put_by_index(sock_net(&so->sk), bind.ifindex) < 0) { printk(KERN_INFO "[PFQ|%d] group id=%d unbind: invalid ifindex=%d!\n", so->id, gid, bind.ifindex); return -EPERM; } pfq_devmap_update(map_reset, bind.ifindex, bind.qindex, gid); pr_devel("[PFQ|%d] group id=%d unbind: device ifindex=%d qindex=%d\n", so->id, gid, bind.ifindex, bind.qindex); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding bind; if (optlen != sizeof(bind)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; if (!dev_get_by_index(sock_net(&so->sk), bind.ifindex)) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid ifindex=%d\n", so->id, bind.ifindex); return -EPERM; } if (bind.qindex < -1) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid qindex=%d\n", so->id, bind.qindex); return -EPERM; } so->egress_type = pfq_endpoint_device; so->egress_index = bind.ifindex; so->egress_queue = bind.qindex; pr_devel("[PFQ|%d] egress bind: device ifindex=%d qindex=%d\n", so->id, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { if (so->egress_index && dev_put_by_index(sock_net(&so->sk), so->egress_index) < 0) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid if_index=%d\n", so->id, so->egress_index); return -EPERM; } so->egress_type = pfq_endpoint_socket; so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; so->opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp enabled.\n", so->id); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)capt_slot_size) { printk(KERN_INFO "[PFQ|%d] invalid caplen=%zu (max %d)\n", so->id, caplen, capt_slot_size); return -EPERM; } so->opt.caplen = caplen; so->opt.rx_slot_size = Q_QUEUE_SLOT_SIZE(so->opt.caplen); pr_devel("[PFQ|%d] caplen=%zu, slot_size=%zu\n", so->id, so->opt.caplen, so->opt.rx_slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->opt.rx_queue_len) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > Q_MAX_SOCKQUEUE_LEN) { printk(KERN_INFO "[PFQ|%d] invalid Rx slots=%zu (max %d)\n", so->id, slots, Q_MAX_SOCKQUEUE_LEN); return -EPERM; } so->opt.rx_queue_len = slots; pr_devel("[PFQ|%d] rx_queue slots=%zu\n", so->id, so->opt.rx_queue_len); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->opt.tx_queue_len) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > Q_MAX_SOCKQUEUE_LEN) { printk(KERN_INFO "[PFQ|%d] invalid Tx slots=%zu (max %d)\n", so->id, slots, Q_MAX_SOCKQUEUE_LEN); return -EPERM; } so->opt.tx_queue_len = slots; pr_devel("[PFQ|%d] tx_queue slots=%zu\n", so->id, so->opt.tx_queue_len); } break; case Q_SO_SET_WEIGHT: { int weight; if (optlen != sizeof(so->weight)) return -EINVAL; if (copy_from_user(&weight, optval, optlen)) return -EFAULT; if (weight < 1 || weight > (Q_MAX_SOCK_MASK/Q_MAX_ID)) { printk(KERN_INFO "[PFQ|%d] weight=%d: invalid range (min 1, max %d)\n", so->id, weight, Q_MAX_SOCK_MASK/Q_MAX_ID); return -EPERM; } so->weight = weight; /* invalidate per-cpu sock mask cache */ pfq_invalidate_percpu_eligible_mask(so->id); pr_devel("[PFQ|%d] new weight set to %d.\n", so->id, weight); } break; case Q_SO_GROUP_LEAVE: { pfq_gid_t gid; if (optlen != sizeof(gid)) return -EINVAL; if (copy_from_user(&gid, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) return -EFAULT; pr_devel("[PFQ|%d] group id=%d left.\n", so->id, gid); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; pfq_gid_t gid; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)fprog.gid; if (!pfq_has_joined_group(gid, so->id)) { /* don't set the first and return */ return 0; } if (fprog.fcode.len > 0) { /* set the filter */ struct sk_filter *filter; if (fprog.fcode.len == 1) { struct sock_filter tmp; /* get the first filter */ if (copy_from_user(&tmp, fprog.fcode.filter, sizeof(tmp))) return -EFAULT; /* check whether the first filter is a dummy BPF_RET */ if (BPF_CLASS(tmp.code) == BPF_RET) { pr_devel("[PFQ|%d] fprog: BPF_RET optimized out!\n", so->id); return 0; } } filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { printk(KERN_INFO "[PFQ|%d] fprog error: alloc_sk_filter for gid=%d\n", so->id, fprog.gid); return -EINVAL; } pfq_set_group_filter(gid, filter); pr_devel("[PFQ|%d] fprog: gid=%d (fprog len %d bytes)\n", so->id, fprog.gid, fprog.fcode.len); } else { /* reset the filter */ pfq_set_group_filter(gid, NULL); pr_devel("[PFQ|%d] fprog: gid=%d (resetting filter)\n", so->id, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; pfq_gid_t gid; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)vlan.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter toggle: gid=%d not joined!\n", so->id, vlan.gid); return -EACCES; } pfq_toggle_group_vlan_filters(gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid=%d\n", so->id, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; pfq_gid_t gid; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)filt.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter: gid=%d not joined!\n", so->id, filt.gid); return -EACCES; } if (filt.vid < -1 || filt.vid > 4094) { printk(KERN_INFO "[PFQ|%d] vlan error: invalid vid=%d for gid=%d!\n", so->id, filt.vid, filt.gid); return -EINVAL; } if (!pfq_vlan_filters_enabled(gid)) { printk(KERN_INFO "[PFQ|%d] vlan error: vlan filters disabled for gid=%d!\n", so->id, filt.gid); return -EPERM; } if (filt.vid == -1) { /* any */ int i; for(i = 1; i < 4095; i++) { pfq_set_group_vlan_filter(gid, filt.toggle, i); } } else { pfq_set_group_vlan_filter(gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan filter vid %d set for gid=%d\n", so->id, filt.vid, filt.gid); } break; case Q_SO_TX_BIND: { struct pfq_binding bind; struct net_device *dev = NULL; if (optlen != sizeof(bind)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; if (bind.tid < -1) { printk(KERN_INFO "[PFQ|%d] Tx thread: invalid thread index (%d)!\n", so->id, bind.tid); return -EPERM; } if (bind.tid >= 0 && so->opt.tx_num_async_queues >= Q_MAX_TX_QUEUES) { printk(KERN_INFO "[PFQ|%d] Tx thread: max number of sock queues exceeded!\n", so->id); return -EPERM; } if (bind.qindex < -1) { printk(KERN_INFO "[PFQ|%d] Tx thread: invalid hw queue (%d)\n", so->id, bind.qindex); return -EPERM; } /* get device */ if (bind.ifindex != -1 && !(dev = dev_get_by_index(sock_net(&so->sk), bind.ifindex))) { printk(KERN_INFO "[PFQ|%d] Tx thread: invalid ifindex=%d\n", so->id, bind.ifindex); return -EPERM; } /* update the socket queue information */ if (bind.tid >= 0) /* async queues */ { int err = pfq_sock_tx_bind(so, bind.tid, bind.ifindex, bind.qindex, dev); if (err < 0) { if (bind.ifindex != -1) dev_put_by_index(sock_net(&so->sk), bind.ifindex); return err; } pr_devel("[PFQ|%d] Tx[%d] bind: if_index=%d qindex=%d\n", so->id, bind.tid, bind.ifindex, bind.qindex); } else /* sync queue */ { so->opt.txq.def_ifindex = bind.ifindex; so->opt.txq.def_queue = bind.qindex; so->opt.txq.def_dev = dev; pr_devel("[PFQ|%d] Tx bind: if_index=%d qindex=%d\n", so->id, so->opt.txq.def_ifindex, so->opt.txq.def_queue); } } break; case Q_SO_TX_UNBIND: { pfq_sock_tx_unbind(so); } break; case Q_SO_TX_QUEUE: { int queue; if (optlen != sizeof(queue)) return -EINVAL; if (copy_from_user(&queue, optval, optlen)) return -EFAULT; if (pfq_get_tx_queue(&so->opt, -1) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue: socket not enabled!\n", so->id); return -EPERM; } if (queue == 0) { /* transmit Tx queue */ atomic_t stop = {0}; pfq_sk_queue_xmit(so, -1, Q_NO_KTHREAD, NUMA_NO_NODE, &stop); return 0; } printk(KERN_INFO "[PFQ|%d] Tx queue: bad queue %d!\n", so->id, queue); return -EPERM; } break; case Q_SO_GROUP_FUNCTION: { struct pfq_lang_computation_descr *descr = NULL; struct pfq_lang_computation_tree *comp = NULL; struct pfq_group_computation tmp; size_t psize, ucsize; void *context = NULL; pfq_gid_t gid; int err = 0; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; gid = (__force pfq_gid_t)tmp.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] group computation: gid=%d not joined!\n", so->id, tmp.gid); return -EACCES; } if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_lang_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { printk(KERN_INFO "[PFQ|%d] computation: out of memory!\n", so->id); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { printk(KERN_INFO "[PFQ|%d] computation: copy_from_user error!\n", so->id); err = -EFAULT; goto error; } /* print user computation */ pr_devel_computation_descr(descr); /* check the correctness of computation */ if (pfq_lang_check_computation_descr(descr) < 0) { printk(KERN_INFO "[PFQ|%d] invalid expression!\n", so->id); err = -EFAULT; goto error; } /* allocate context */ context = pfq_lang_context_alloc(descr); if (context == NULL) { printk(KERN_INFO "[PFQ|%d] context: alloc error!\n", so->id); err = -EFAULT; goto error; } /* allocate a pfq_lang_computation_tree */ comp = pfq_lang_computation_alloc(descr); if (comp == NULL) { printk(KERN_INFO "[PFQ|%d] computation: alloc error!\n", so->id); err = -EFAULT; goto error; } /* link functions of computation */ if (pfq_lang_computation_rtlink(descr, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] computation aborted!", so->id); err = -EPERM; goto error; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* run init functions */ if (pfq_lang_computation_init(comp) < 0) { printk(KERN_INFO "[PFQ|%d] initialization of computation aborted!", so->id); pfq_lang_computation_destruct(comp); err = -EPERM; goto error; } /* enable functional program */ if (pfq_set_group_prog(gid, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] set group program error!\n", so->id); err = -EPERM; goto error; } kfree(descr); return 0; error: kfree(comp); kfree(context); kfree(descr); return err; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
int ksocknal_lib_setup_sock (struct socket *sock) { mm_segment_t oldmm = get_fs (); int rc; int option; int keep_idle; int keep_intvl; int keep_count; int do_keepalive; struct linger linger; sock->sk->sk_allocation = GFP_NOFS; /* Ensure this socket aborts active sends immediately when we close * it. */ linger.l_onoff = 0; linger.l_linger = 0; set_fs (KERNEL_DS); rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, (char *)&linger, sizeof (linger)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set SO_LINGER: %d\n", rc); return (rc); } option = -1; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, (char *)&option, sizeof (option)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set SO_LINGER2: %d\n", rc); return (rc); } if (!*ksocknal_tunables.ksnd_nagle) { option = 1; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, (char *)&option, sizeof (option)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't disable nagle: %d\n", rc); return (rc); } } rc = libcfs_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); if (rc != 0) { CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size, rc); return (rc); } /* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ /* snapshot tunables */ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; keep_count = *ksocknal_tunables.ksnd_keepalive_count; keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); set_fs (KERNEL_DS); rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option, sizeof (option)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); return (rc); } if (!do_keepalive) return (0); set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle, sizeof (keep_idle)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); return (rc); } set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, (char *)&keep_intvl, sizeof (keep_intvl)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); return (rc); } set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count, sizeof (keep_count)); set_fs (oldmm); if (rc != 0) { CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); return (rc); } return (0); }
int system_cloud_connect(int protocol, const ServerAddress* address, sockaddr* saddrCache) { struct addrinfo* info = nullptr; CloudServerAddressType type = CLOUD_SERVER_ADDRESS_TYPE_NONE; bool clean = true; if (saddrCache && /* protocol == IPPROTO_UDP && */ saddrCache->sa_family != AF_UNSPEC) { char tmphost[INET6_ADDRSTRLEN] = {}; char tmpserv[8] = {}; if (!netdb_getnameinfo(saddrCache, saddrCache->sa_len, tmphost, sizeof(tmphost), tmpserv, sizeof(tmpserv), AI_NUMERICHOST | AI_NUMERICSERV)) { /* There is a cached address, use it, but still pass it to getaddrinfo */ struct addrinfo hints = {}; hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_ADDRCONFIG; hints.ai_family = saddrCache->sa_family; hints.ai_protocol = protocol; /* FIXME: */ hints.ai_socktype = hints.ai_protocol == IPPROTO_UDP ? SOCK_DGRAM : SOCK_STREAM; if (!netdb_getaddrinfo(tmphost, tmpserv, &hints, &info)) { type = CLOUD_SERVER_ADDRESS_TYPE_CACHED; } } } if (type == CLOUD_SERVER_ADDRESS_TYPE_NONE) { /* Check if we have another address to try from the cached addrinfo list */ if (s_state.addr && s_state.next) { info = s_state.next; type = CLOUD_SERVER_ADDRESS_TYPE_CACHED_ADDRINFO; } } if ((type == CLOUD_SERVER_ADDRESS_TYPE_NONE) && address) { /* Use passed ServerAddress */ switch (address->addr_type) { case IP_ADDRESS: { struct addrinfo hints = {}; hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_ADDRCONFIG; /* XXX: IPv4-only */ hints.ai_family = AF_INET; hints.ai_protocol = protocol; /* FIXME: */ hints.ai_socktype = hints.ai_protocol == IPPROTO_UDP ? SOCK_DGRAM : SOCK_STREAM; char tmphost[INET_ADDRSTRLEN] = {}; char tmpserv[8] = {}; struct in_addr in = {}; in.s_addr = htonl(address->ip); if (inet_inet_ntop(AF_INET, &in, tmphost, sizeof(tmphost))) { snprintf(tmpserv, sizeof(tmpserv), "%u", address->port); netdb_getaddrinfo(tmphost, tmpserv, &hints, &info); type = CLOUD_SERVER_ADDRESS_TYPE_NEW_ADDRINFO; } break; } case DOMAIN_NAME: { struct addrinfo hints = {}; hints.ai_flags = AI_NUMERICSERV | AI_ADDRCONFIG; hints.ai_protocol = protocol; /* FIXME: */ hints.ai_socktype = hints.ai_protocol == IPPROTO_UDP ? SOCK_DGRAM : SOCK_STREAM; char tmphost[sizeof(address->domain) + 32] = {}; char tmpserv[8] = {}; /* FIXME: this should probably be moved into system_cloud_internal */ system_string_interpolate(address->domain, tmphost, sizeof(tmphost), system_interpolate_cloud_server_hostname); snprintf(tmpserv, sizeof(tmpserv), "%u", address->port); LOG(TRACE, "Resolving %s#%s", tmphost, tmpserv); netdb_getaddrinfo(tmphost, tmpserv, &hints, &info); type = CLOUD_SERVER_ADDRESS_TYPE_NEW_ADDRINFO; break; } } } int r = SYSTEM_ERROR_NETWORK; if (info == nullptr) { LOG(ERROR, "Failed to determine server address"); } LOG(TRACE, "Address type: %d", type); for (struct addrinfo* a = info; a != nullptr; a = a->ai_next) { /* Iterate over all the addresses and attempt to connect */ int s = sock_socket(a->ai_family, a->ai_socktype, a->ai_protocol); if (s < 0) { LOG(ERROR, "Cloud socket failed, family=%d, type=%d, protocol=%d, errno=%d", a->ai_family, a->ai_socktype, a->ai_protocol, errno); continue; } LOG(TRACE, "Cloud socket=%d, family=%d, type=%d, protocol=%d", s, a->ai_family, a->ai_socktype, a->ai_protocol); char serverHost[INET6_ADDRSTRLEN] = {}; uint16_t serverPort = 0; switch (a->ai_family) { case AF_INET: { inet_inet_ntop(a->ai_family, &((sockaddr_in*)a->ai_addr)->sin_addr, serverHost, sizeof(serverHost)); serverPort = ntohs(((sockaddr_in*)a->ai_addr)->sin_port); break; } case AF_INET6: { inet_inet_ntop(a->ai_family, &((sockaddr_in6*)a->ai_addr)->sin6_addr, serverHost, sizeof(serverHost)); serverPort = ntohs(((sockaddr_in6*)a->ai_addr)->sin6_port); break; } } LOG(INFO, "Cloud socket=%d, connecting to %s#%u", s, serverHost, serverPort); /* We are using fixed source port only for IPv6 connections */ if (protocol == IPPROTO_UDP && a->ai_family == AF_INET6) { struct sockaddr_storage saddr = {}; saddr.s2_len = sizeof(saddr); saddr.ss_family = a->ai_family; /* NOTE: Always binding to 5684 by default */ switch (a->ai_family) { case AF_INET: { ((sockaddr_in*)&saddr)->sin_port = htons(PORT_COAPS); break; } case AF_INET6: { ((sockaddr_in6*)&saddr)->sin6_port = htons(PORT_COAPS); break; } } const int one = 1; if (sock_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) { LOG(ERROR, "Cloud socket=%d, failed to set SO_REUSEADDR, errno=%d", s, errno); sock_close(s); continue; } /* Bind socket */ if (sock_bind(s, (const struct sockaddr*)&saddr, sizeof(saddr))) { LOG(ERROR, "Cloud socket=%d, failed to bind, errno=%d"); sock_close(s); continue; } } /* FIXME: timeout for TCP */ /* NOTE: we do this for UDP sockets as well in order to automagically filter * on source address and port */ r = sock_connect(s, a->ai_addr, a->ai_addrlen); if (r) { LOG(ERROR, "Cloud socket=%d, failed to connect to %s#%u, errno=%d", s, serverHost, serverPort, errno); sock_close(s); continue; } LOG(TRACE, "Cloud socket=%d, connected to %s#%u", s, serverHost, serverPort); /* If we got here, we are most likely connected, however keep track of current addrinfo list * in order to try the next address if application layer fails to establish the connection */ if (protocol == IPPROTO_UDP && (type == CLOUD_SERVER_ADDRESS_TYPE_NEW_ADDRINFO || type == CLOUD_SERVER_ADDRESS_TYPE_CACHED_ADDRINFO)) { if (s_state.addr) { /* We are already iterating over a cached addrinfo list */ s_state.next = a->ai_next; if (a->ai_next) { s_state.next = a->ai_next; clean = false; } else { info = s_state.addr; s_state.addr = s_state.next = nullptr; } } else { if (a->ai_next) { s_state.addr = info; s_state.next = a->ai_next; clean = false; } } } s_state.socket = s; if (saddrCache) { memcpy(saddrCache, a->ai_addr, a->ai_addrlen); } unsigned int keepalive = 0; system_cloud_get_inet_family_keepalive(a->ai_family, &keepalive); system_cloud_set_inet_family_keepalive(a->ai_family, keepalive, 1); break; } if (clean) { netdb_freeaddrinfo(info); } return r; }