int libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize) { int option; int optlen; int rc; if (txbufsize != NULL) { optlen = sizeof(option); rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, &optlen); if (rc != 0) { CERROR ("Can't get send buffer size: %d\n", rc); return (rc); } *txbufsize = option; } if (rxbufsize != NULL) { optlen = sizeof(option); rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, &optlen); if (rc != 0) { CERROR ("Can't get receive buffer size: %d\n", rc); return (rc); } *rxbufsize = option; } return 0; }
static int compat_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) return do_get_sock_timeout(sock, level, optname, optval, optlen); return sock_getsockopt(sock, level, optname, optval, optlen); }
static int do_get_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct compat_timeval __user *up; struct timeval ktime; mm_segment_t old_fs; int len, err; up = (struct compat_timeval __user *) optval; if (get_user(len, optlen)) return -EFAULT; if (len < sizeof(*up)) return -EINVAL; len = sizeof(ktime); old_fs = get_fs(); set_fs(KERNEL_DS); err = sock_getsockopt(sock, level, optname, (char *) &ktime, &len); set_fs(old_fs); if (!err) { if (put_user(sizeof(*up), optlen) || !access_ok(VERIFY_WRITE, up, sizeof(*up)) || __put_user(ktime.tv_sec, &up->tv_sec) || __put_user(ktime.tv_usec, &up->tv_usec)) err = -EFAULT; } return err; }
static int unix_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { unix_socket *sk=sock->data; if(level!=SOL_SOCKET) return -EOPNOTSUPP; return sock_getsockopt(sk,level,optname,optval,optlen); }
static int inet_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { struct sock *sk = (struct sock *) sock->data; if (level == SOL_SOCKET) return sock_getsockopt(sk,level,optname,optval,optlen); if(sk->prot->getsockopt==NULL) return(-EOPNOTSUPP); else return sk->prot->getsockopt(sk,level,optname,optval,optlen); }
int sim_sock_getsockopt (struct SimSocket *socket, int level, int optname, void *optval, int *optlen) { struct socket *sock = (struct socket *)socket; int err; if (level == SOL_SOCKET) err = sock_getsockopt(sock, level, optname, optval, optlen); else err = sock->ops->getsockopt(sock, level, optname, optval, optlen); return err; }
static int _omni_sock_getsockopt( struct socket *sock, int level, int optname, char *optval, int *optlen) { mm_segment_t oldfs = get_fs(); char __user *uoptval; int __user *uoptlen; int err; uoptval = (char __user __force *) optval; uoptlen = (int __user __force *) optlen; set_fs(KERNEL_DS); if (level == SOL_SOCKET) err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); else err = sock->ops->getsockopt(sock, level, optname, uoptval, uoptlen); set_fs(oldfs); return err; }
//process // block/allow, or ask user and put thread to sleep kern_return_t process(void *cookie, socket_t so, const struct sockaddr *to) { //result kern_return_t result = kIOReturnError; //event firewallEvent event = {0}; //rule int action = RULE_STATE_NOT_FOUND; //awake reason int reason = 0; //socket type int socketType = 0; //length of socket type int socketTypeLength = 0; //process name char processName[PATH_MAX] = {0}; //what does rule say? // loop until we have an answer while(true) { //reset bzero(&event, sizeof(event)); //extract action action = ((struct cookieStruct*)cookie)->ruleAction; //get process name proc_selfname(processName, PATH_MAX); //block? if(RULE_STATE_BLOCK == action) { //dbg msg IOLog("LULU: rule says block for %s (pid: %d)\n", processName, proc_selfpid()); //gtfo! result = EPERM; //all done goto bail; } //allow? else if(RULE_STATE_ALLOW == action) { //dbg msg IOLog("LULU: rule says allow for %s (pid: %d)\n", processName, proc_selfpid()); //ok result = kIOReturnSuccess; //all done goto bail; } //not found // ->ask daemon and sleep for response else if(RULE_STATE_NOT_FOUND == action) { //dbg msg IOLog("LULU: no rule found for %s (pid: %d)\n", processName, proc_selfpid()); //zero out bzero(&event, sizeof(firewallEvent)); //set type event.networkOutEvent.type = EVENT_NETWORK_OUT; //add pid event.networkOutEvent.pid = proc_selfpid(); //init length socketTypeLength = sizeof(socketType); //get socket type sock_getsockopt(so, SOL_SOCKET, SO_TYPE, &socketType, &socketTypeLength); //save type event.networkOutEvent.socketType = socketType; //UDP sockets destination socket might be null // so grab via 'getpeername' and save as 'remote addr' if(NULL == to) { //copy into 'remote addr' for user mode if(0 != sock_getpeername(so, (struct sockaddr*)&(event.networkOutEvent.remoteAddress), sizeof(event.networkOutEvent.remoteAddress))) { //err msg IOLog("LULU ERROR: sock_getpeername() failed"); //bail goto bail; } } //copy remote socket for user mode else { //add remote (destination) socket addr memcpy(&(event.networkOutEvent.remoteAddress), to, sizeof(event.networkOutEvent.remoteAddress)); } //queue it up sharedDataQueue->enqueue_tail(&event, sizeof(firewallEvent)); //dbg msg IOLog("LULU: queued response to user mode, now going to sleep!\n"); //lock IOLockLock(ruleEventLock); //sleep reason = IOLockSleep(ruleEventLock, &ruleEventLock, THREAD_ABORTSAFE); //TODO: fix panic, think if kext is unloaded (sets ruleEventLock to NULL) this can still wake up? // "Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock" // seems to happen when process is killed or kext unloaded while in the IOLockSleep!? //unlock IOLockUnlock(ruleEventLock); //thread wakeup cuz of signal, etc // ->just bail (process likely exited, etc) if(THREAD_AWAKENED != reason) { //dbg msg IOLog("LULU: thread awoke, but because of %d!\n", reason); //gtfo! result = EPERM; //all done goto bail; } //dbg msg IOLog("LULU: thread awoke, will check/process response\n"); //try get rule action again // ->not found, block, allow, etc ((struct cookieStruct*)(cookie))->ruleAction = queryRule(proc_selfpid()); //loop to (re)process } }//while bail: return result; }
/* * System call vectors. Since I (RIB) want to rewrite sockets as streams, * we have this level of indirection. Not a lot of overhead, since more of * the work is done via read/write/select directly. */ asmlinkage int sys_socketcall(int call, unsigned long *args) { int er; switch(call) { case SYS_SOCKET: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_socket(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2))); case SYS_BIND: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_bind(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_CONNECT: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_connect(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_LISTEN: er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); if(er) return er; return(sock_listen(get_fs_long(args+0), get_fs_long(args+1))); case SYS_ACCEPT: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_accept(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETSOCKNAME: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getsockname(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETPEERNAME: er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getpeername(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_SOCKETPAIR: er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); if(er) return er; return(sock_socketpair(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (unsigned long *)get_fs_long(args+3))); case SYS_SEND: er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_send(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_SENDTO: er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_sendto(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), get_fs_long(args+5))); case SYS_RECV: er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_recv(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_RECVFROM: er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_recvfrom(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), (int *)get_fs_long(args+5))); case SYS_SHUTDOWN: er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); if(er) return er; return(sock_shutdown(get_fs_long(args+0), get_fs_long(args+1))); case SYS_SETSOCKOPT: er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_setsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), get_fs_long(args+4))); case SYS_GETSOCKOPT: er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_getsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), (int *)get_fs_long(args+4))); default: return(-EINVAL); } }
int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); size_t sndlen; int nob; int rc; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i] = tx->tx_iov[i]; nob += scratchiov[i].iov_len; } /* * XXX Liang: * Linux has MSG_MORE, do we have anything to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); lnet_kiov_t *kiov = tx->tx_kiov; int rc; int nob; size_t sndlen; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } /* * XXX Liang: * Linux has MSG_MORE, do wen have anyting to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_recv_iov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; size_t rcvlen; int nob; int i; int rc; LASSERT (niov > 0); for (nob = i = 0; i < niov; i++) { scratchiov[i] = iov[i]; nob += scratchiov[i].iov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); if (rc == 0) rc = rcvlen; return rc; } int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; int nob; int i; size_t rcvlen; int rc; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \ kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = rcvlen; return (rc); } void ksocknal_lib_eager_ack (ksock_conn_t *conn) { /* XXX Liang: */ } int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { socket_t sock = C2B_SOCK(conn->ksnc_sock); int len; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return (-ESHUTDOWN); } rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem); if (rc == 0) { len = sizeof(*nagle); rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY, nagle, &len); } ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; return (rc); } int ksocknal_lib_setup_sock (cfs_socket_t *sock) { int rc; int option; int keep_idle; int keep_intvl; int keep_count; int do_keepalive; socket_t so = C2B_SOCK(sock); struct linger linger; /* Ensure this socket aborts active sends immediately when we close * it. */ linger.l_onoff = 0; linger.l_linger = 0; rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger)); if (rc != 0) { CERROR ("Can't set SO_LINGER: %d\n", rc); return (rc); } if (!*ksocknal_tunables.ksnd_nagle) { option = 1; rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't disable nagle: %d\n", rc); return (rc); } } rc = libcfs_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); if (rc != 0) { CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size, rc); return (rc); } /* snapshot tunables */ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; keep_count = *ksocknal_tunables.ksnd_keepalive_count; keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); return (rc); } if (!do_keepalive) return (rc); rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE, &keep_idle, sizeof(keep_idle)); return (rc); } void ksocknal_lib_push_conn(ksock_conn_t *conn) { socket_t sock; int val = 1; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; sock = C2B_SOCK(conn->ksnc_sock); rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); LASSERT(rc == 0); ksocknal_connsock_decref(conn); return; }