static int usnic_vnic_discover_resources(struct pci_dev *pdev, struct usnic_vnic *vnic) { enum usnic_vnic_res_type res_type; int i; int err = 0; for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) continue; vnic->bar[i].len = pci_resource_len(pdev, i); vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len); if (!vnic->bar[i].vaddr) { usnic_err("Cannot memory-map BAR %d, aborting\n", i); err = -ENODEV; goto out_clean_bar; } vnic->bar[i].bus_addr = pci_resource_start(pdev, i); } vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar, ARRAY_SIZE(vnic->bar)); if (!vnic->vdev) { usnic_err("Failed to register device %s\n", pci_name(pdev)); err = -EINVAL; goto out_clean_bar; } for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { err = usnic_vnic_alloc_res_chunk(vnic, res_type, &vnic->chunks[res_type]); if (err) { usnic_err("Failed to alloc res %s with err %d\n", usnic_vnic_res_type_to_str(res_type), err); goto out_clean_chunks; } } return 0; out_clean_chunks: for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--) usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); vnic_dev_unregister(vnic->vdev); out_clean_bar: for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) continue; if (!vnic->bar[i].vaddr) break; iounmap(vnic->bar[i].vaddr); } return err; }
void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num) { if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { spin_lock(&roce_bitmap_lock); if (!port_num) { usnic_err("Unreserved unvalid port num 0 for %s\n", usnic_transport_to_str(type)); goto out_roce_custom; } if (!test_bit(port_num, roce_bitmap)) { usnic_err("Unreserving invalid %hu for %s\n", port_num, usnic_transport_to_str(type)); goto out_roce_custom; } bitmap_clear(roce_bitmap, port_num, 1); usnic_dbg("Freeing port %hu for %s\n", port_num, usnic_transport_to_str(type)); out_roce_custom: spin_unlock(&roce_bitmap_lock); } else { usnic_err("Freeing invalid port %hu for %d\n", port_num, type); } }
/* * reserve a port number. if "0" specified, we will try to pick one * starting at roce_next_port. roce_next_port will take on the values * 1..4096 */ u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) { if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { spin_lock(&roce_bitmap_lock); if (!port_num) { port_num = bitmap_find_next_zero_area(roce_bitmap, ROCE_BITMAP_SZ, roce_next_port /* start */, 1 /* nr */, 0 /* align */); roce_next_port = (port_num & 4095) + 1; } else if (test_bit(port_num, roce_bitmap)) { usnic_err("Failed to allocate port for %s\n", usnic_transport_to_str(type)); spin_unlock(&roce_bitmap_lock); goto out_fail; } bitmap_set(roce_bitmap, port_num, 1); spin_unlock(&roce_bitmap_lock); } else { usnic_err("Failed to allocate port - transport %s unsupported\n", usnic_transport_to_str(type)); goto out_fail; } usnic_dbg("Allocating port %hu for %s\n", port_num, usnic_transport_to_str(type)); return port_num; out_fail: return 0; }
struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) { struct usnic_vnic *vnic; int err = 0; if (!pci_is_enabled(pdev)) { usnic_err("PCI dev %s is disabled\n", pci_name(pdev)); return ERR_PTR(-EINVAL); } vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); if (!vnic) { usnic_err("Failed to alloc vnic for %s - out of memory\n", pci_name(pdev)); return ERR_PTR(-ENOMEM); } spin_lock_init(&vnic->res_lock); err = usnic_vnic_discover_resources(pdev, vnic); if (err) { usnic_err("Failed to discover %s resources with err %d\n", pci_name(pdev), err); goto out_free_vnic; } usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic)); return vnic; out_free_vnic: kfree(vnic); return ERR_PTR(err); }
int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow) { int status; u64 a0, a1; a0 = flow->flow_id; status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx, CMD_DEL_FILTER, &a0, &a1); if (status) { if (status == ERR_EINVAL) { usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d", flow->flow_id, flow->vnic_idx, flow->ufdev->name, status); } else { usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d", flow->ufdev->name, flow->vnic_idx, flow->flow_id, status); } status = 0; /* * Log the error and fake success to the caller because if * a flow fails to be deleted in the firmware, it is an * unrecoverable error. */ } else { usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED", flow->ufdev->name, flow->vnic_idx, flow->flow_id); } kfree(flow); return status; }
int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) { int status; u64 a0, a1; struct net_device *pf_netdev; pf_netdev = ufdev->netdev; a0 = qp_idx; a1 = CMD_QP_RQWQ; status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, &a0, &a1); if (status) { usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d", netdev_name(pf_netdev), vnic_idx, qp_idx, status); } else { usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED", netdev_name(pf_netdev), vnic_idx, qp_idx); } return status; }
static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx, enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) { int status; struct net_device *netdev = ufdev->netdev; lockdep_assert_held(&ufdev->lock); status = enic_api_devcmd_proxy_by_index(netdev, vnic_idx, cmd, a0, a1, 1000); if (status) { if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", ufdev->name, vnic_idx, cmd); } else { usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", ufdev->name, vnic_idx, cmd, status); } } else { usnic_dbg("Dev %s vnic idx %u cmd %u success", ufdev->name, vnic_idx, cmd); } return status; }
static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) { struct usnic_nl_sk *unlsk; NL_HANDLE *nlh; int err; unlsk = calloc(1, sizeof(*unlsk)); if (!unlsk) { usnic_err("Failed to allocate usnic_nl_sk struct\n"); return ENOMEM; } nlh = NL_HANDLE_ALLOC(); if (!nlh) { usnic_err("Failed to allocate nl handle\n"); err = ENOMEM; goto err_free_unlsk; } err = nl_connect(nlh, protocol); if (err < 0) { usnic_err("Failed to connnect netlink route socket error: %s\n", NL_GETERROR(err)); err = EINVAL; goto err_free_nlh; } NL_DISABLE_SEQ_CHECK(nlh); err = usnic_nl_set_rcvsk_timer(nlh); if (err < 0) goto err_close_nlh; unlsk->nlh = nlh; unlsk->seq = time(NULL); *p_sk = unlsk; return 0; err_close_nlh: nl_close(nlh); err_free_nlh: NL_HANDLE_FREE(nlh); err_free_unlsk: free(unlsk); return err; }
struct usnic_vnic_res_chunk * usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, int cnt, void *owner) { struct usnic_vnic_res_chunk *src, *ret; struct usnic_vnic_res *res; int i; if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) return ERR_PTR(-EINVAL); ret = kzalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) { usnic_err("Failed to allocate chunk for %s - Out of memory\n", usnic_vnic_pci_name(vnic)); return ERR_PTR(-ENOMEM); } ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); if (!ret->res) { usnic_err("Failed to allocate resources for %s. Out of memory\n", usnic_vnic_pci_name(vnic)); kfree(ret); return ERR_PTR(-ENOMEM); } spin_lock(&vnic->res_lock); src = &vnic->chunks[type]; for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { res = src->res[i]; if (!res->owner) { src->free_cnt--; res->owner = owner; ret->res[ret->cnt++] = res; } } spin_unlock(&vnic->res_lock); ret->type = type; ret->vnic = vnic; WARN_ON(ret->cnt != cnt); return ret; }
int usnic_rtnl_sk_alloc(struct usnic_rtnl_sk **p_sk) { struct usnic_rtnl_sk *unlsk; struct nl_handle *nlh; int err; unlsk = calloc(1, sizeof(*unlsk)); if (!unlsk) { usnic_err("Failed to allocate usnic_rtnl_sk struct\n"); return -ENOMEM; } nlh = nl_handle_alloc(); if (!nlh) { usnic_err("Failed to allocate nl handle\n"); err = -ENOMEM; goto err_free_unlsk; } err = nl_connect(nlh, NETLINK_ROUTE); if (err < 0) { usnic_err("Failed to connnect netlink route socket\n"); goto err_free_nlh; } nl_disable_sequence_check(nlh); err = nl_set_recv_timeout(nlh); if (err < 0) goto err_close_nlh; unlsk->nlh = nlh; unlsk->seq = time(NULL); *p_sk = unlsk; return 0; err_close_nlh: nl_close(nlh); err_free_nlh: nl_handle_destroy(nlh); err_free_unlsk: free(unlsk); return err; }
int ompi_btl_usnic_rtnl_sk_alloc(struct usnic_rtnl_sk **p_sk) { struct usnic_rtnl_sk *unlsk; struct nl_sock *sock; int err; unlsk = calloc(1, sizeof(*unlsk)); if (!unlsk) { usnic_err("Failed to allocate usnic_rtnl_sk struct\n"); return -ENOMEM; } sock = nl_socket_alloc(); if (!sock) { usnic_err("Failed to allocate nl socket\n"); err = -ENOMEM; goto err_free_unlsk; } err = nl_connect(sock, NETLINK_ROUTE); if (err < 0) { usnic_err("Failed to connnect netlink route socket\n"); goto err_free_sk; } nl_socket_disable_seq_check(sock); err = nl_set_recv_timeout(sock); if (err < 0) goto err_close_nlsk; unlsk->sock = sock; unlsk->seq = time(NULL); *p_sk = unlsk; return 0; err_close_nlsk: nl_close(sock); err_free_sk: nl_socket_free(sock); err_free_unlsk: free(unlsk); return err; }
int usnic_transport_init(void) { roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); if (!roce_bitmap) { usnic_err("Failed to allocate bit map"); return -ENOMEM; } /* Do not ever allocate bit 0, hence set it here */ bitmap_set(roce_bitmap, 0, 1); return 0; }
static int usnic_is_nlreply_expected(struct usnic_nl_sk *unlsk, struct nlmsghdr *nlm_hdr) { if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh) || nlm_hdr->nlmsg_seq != unlsk->seq) { usnic_err("Not an expected reply msg pid: %u local pid: %u msg seq: %u expected seq: %u\n", nlm_hdr->nlmsg_pid, nl_socket_get_local_port(unlsk->nlh), nlm_hdr->nlmsg_seq, unlsk->seq); return 0; } return 1; }
void usnic_debugfs_init(void) { debugfs_root = debugfs_create_dir(DRV_NAME, NULL); if (IS_ERR(debugfs_root)) { usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); goto out_clear_root; } flows_dentry = debugfs_create_dir("flows", debugfs_root); if (IS_ERR_OR_NULL(flows_dentry)) { usnic_err("Failed to create debugfs flow dir with err %ld\n", PTR_ERR(flows_dentry)); goto out_free_root; } debugfs_create_file("build-info", S_IRUGO, debugfs_root, NULL, &usnic_debugfs_buildinfo_ops); return; out_free_root: debugfs_remove_recursive(debugfs_root); out_clear_root: debugfs_root = NULL; }
static int usnic_is_nlreply_err(struct nlmsghdr *nlm_hdr) { if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e))) usnic_strerror(e->error, "Received a netlink error message"); else usnic_err( "Received a truncated netlink error message\n"); return 1; } return 0; }
static int nl_set_recv_timeout(struct nl_handle *handle) { int err = 0; struct timeval timeout; timeout.tv_sec = 1; timeout.tv_usec = 0; err = setsockopt(nl_socket_get_fd(handle), SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); if (err < 0) usnic_err("Failed to set SO_RCVTIMEO socket option for nl socket, err %d\n", err); return err; }
void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) { if (IS_ERR_OR_NULL(flows_dentry)) return; scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), "%u", qp_flow->flow->flow_id); qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, S_IRUGO, flows_dentry, qp_flow, &flowinfo_ops); if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { usnic_err("Failed to create dbg fs entry for flow %u\n", qp_flow->flow->flow_id); } }
struct socket *usnic_transport_get_socket(int sock_fd) { struct socket *sock; int err; char buf[25]; /* sockfd_lookup will internally do a fget */ sock = sockfd_lookup(sock_fd, &err); if (!sock) { usnic_err("Unable to lookup socket for fd %d with err %d\n", sock_fd, err); return ERR_PTR(-ENOENT); } usnic_transport_sock_to_str(buf, sizeof(buf), sock); usnic_dbg("Get sock %s\n", buf); return sock; }
static int usnic_is_nlreply_err(struct nlmsghdr *nlm_hdr, struct usnic_rt_cb_arg *arg) { if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e))) { usnic_strerror(e->error, "Received a netlink error message"); /* Sometimes nl_send() succeeds, but the * request fails because the kernel is * temporarily out of resources. In these * cases, we should tell the caller that they * should try again. */ if (e->error == -ECONNREFUSED) { arg->retry = 1; } } else usnic_err( "Received a truncated netlink error message\n"); return 1; } return 0; }
int ompi_btl_usnic_nl_ip_rt_lookup(struct usnic_rtnl_sk *unlsk, const char *src_ifname, uint32_t src_addr, uint32_t dst_addr, int *metric) { struct nl_msg *nlm; struct rtmsg rmsg; struct nl_lookup_arg arg; int msg_cnt; int err; int oif; oif = if_nametoindex(src_ifname); if (0 == oif) { return errno; } arg.nh_addr = 0; arg.oif = oif; arg.found = 0; arg.replied = 0; arg.unlsk = unlsk; arg.msg_count = msg_cnt = 0; memset(&rmsg, 0, sizeof(rmsg)); rmsg.rtm_family = AF_INET; rmsg.rtm_dst_len = sizeof(dst_addr)*8; rmsg.rtm_src_len = sizeof(src_addr)*8; nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); nla_put_u32(nlm, RTA_DST, dst_addr); nla_put_u32(nlm, RTA_SRC, src_addr); err = rtnl_send_ack_disable(unlsk, nlm); nlmsg_free(nlm); if (err < 0) { usnic_err("Failed to send rtnl query %s\n", nl_geterror(err)); return err; } err = nl_socket_modify_cb(unlsk->sock, NL_CB_MSG_IN, NL_CB_CUSTOM, rtnl_raw_parse_cb, &arg); if (err != 0) { usnic_err("Failed to setup callback function, error %s\n", nl_geterror(err)); return err; } while (!arg.replied) { err = nl_recvmsgs_default(unlsk->sock); if (err < 0) { /* err will be returned as -NLE_AGAIN if the socket times out */ usnic_err("Failed to receive rtnl query results %s\n", nl_geterror(err)); return err; } } if (arg.found) { if (metric != NULL) { *metric = arg.metric; } return 0; } else { return -1; } }
int usnic_nl_rt_lookup(uint32_t src_addr, uint32_t dst_addr, int oif, uint32_t *nh_addr) { struct usnic_nl_sk *unlsk; struct nl_msg *nlm; struct rtmsg rmsg; struct usnic_rt_cb_arg arg; int err; unlsk = NULL; err = usnic_nl_sk_alloc(&unlsk, NETLINK_ROUTE); if (err) return err; memset(&rmsg, 0, sizeof(rmsg)); rmsg.rtm_family = AF_INET; rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT; rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT; nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); if (!nlm) { usnic_err("Failed to alloc nl message, %s\n", NL_GETERROR(err)); err = ENOMEM; goto out; } nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); nla_put_u32(nlm, RTA_DST, dst_addr); nla_put_u32(nlm, RTA_SRC, src_addr); err = usnic_nl_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); nlmsg_free(nlm); if (err < 0) { usnic_err("Failed to send RTM_GETROUTE query message, error %s\n", NL_GETERROR(err)); err = EINVAL; goto out; } memset(&arg, 0, sizeof(arg)); arg.oif = oif; arg.unlsk = unlsk; err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, usnic_rt_raw_parse_cb, &arg); if (err != 0) { usnic_err("Failed to setup callback function, error %s\n", NL_GETERROR(err)); err = EINVAL; goto out; } NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out); if (arg.found) { *nh_addr = arg.nh_addr; err = 0; } else { err = EHOSTUNREACH; } out: usnic_nl_sk_free(unlsk); return err; }
struct usnic_fwd_flow* usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, struct usnic_filter_action *uaction) { struct filter_tlv *tlv; struct pci_dev *pdev; struct usnic_fwd_flow *flow; uint64_t a0, a1; uint64_t tlv_size; dma_addr_t tlv_pa; int status; pdev = ufdev->pdev; tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) + sizeof(struct filter_action)); flow = kzalloc(sizeof(*flow), GFP_ATOMIC); if (!flow) return ERR_PTR(-ENOMEM); tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); if (!tlv) { usnic_err("Failed to allocate memory\n"); status = -ENOMEM; goto out_free_flow; } fill_tlv(tlv, filter, &uaction->action); spin_lock(&ufdev->lock); status = usnic_fwd_dev_ready_locked(ufdev); if (status) { usnic_err("Forwarding dev %s not ready with status %d\n", ufdev->name, status); goto out_free_tlv; } status = validate_filter_locked(ufdev, filter); if (status) { usnic_err("Failed to validate filter with status %d\n", status); goto out_free_tlv; } /* Issue Devcmd */ a0 = tlv_pa; a1 = tlv_size; status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx, CMD_ADD_FILTER, &a0, &a1); if (status) { usnic_err("VF %s Filter add failed with status:%d", ufdev->name, status); status = -EFAULT; goto out_free_tlv; } else { usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0); } flow->flow_id = (uint32_t) a0; flow->vnic_idx = uaction->vnic_idx; flow->ufdev = ufdev; out_free_tlv: spin_unlock(&ufdev->lock); pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); if (!status) return flow; out_free_flow: kfree(flow); return ERR_PTR(status); }
int opal_btl_usnic_nl_ip_rt_lookup(struct usnic_rtnl_sk *unlsk, const char *src_ifname, uint32_t src_addr, uint32_t dst_addr, int *metric) { struct nl_msg *nlm; struct rtmsg rmsg; struct nl_lookup_arg arg; int msg_cnt; int err; int oif; oif = if_nametoindex(src_ifname); if (0 == oif) { return errno; } arg.nh_addr = 0; arg.oif = oif; arg.found = 0; arg.replied = 0; arg.unlsk = unlsk; arg.msg_count = msg_cnt = 0; memset(&rmsg, 0, sizeof(rmsg)); rmsg.rtm_family = AF_INET; rmsg.rtm_dst_len = sizeof(dst_addr)*8; rmsg.rtm_src_len = sizeof(src_addr)*8; nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); nla_put_u32(nlm, RTA_DST, dst_addr); nla_put_u32(nlm, RTA_SRC, src_addr); err = rtnl_send_ack_disable(unlsk, nlm); nlmsg_free(nlm); if (err < 0) { usnic_err("Failed to send nl route message to kernel, " "error %s\n", nl_geterror()); return err; } err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, rtnl_raw_parse_cb, &arg); if (err != 0) { usnic_err("Failed to setup callback function, error %s\n", nl_geterror()); return err; } while (!arg.replied) { err = nl_recvmsgs_default(unlsk->nlh); if (err < 0) { usnic_err("Failed to receive nl route message from " "kernel, error %s\n", nl_geterror()); return err; } /* * the return value of nl_recvmsgs_default does not tell * whether it returns because of successful read or socket * timeout. So we compare msg count before and after the call * to decide if no new message arrives. In such case, * this function needs to terminate to prevent the caller from * blocking forever * NL_CB_MSG_IN traps every received message, so * there should be no premature exit */ if (msg_cnt != arg.msg_count) msg_cnt = arg.msg_count; else break; } if (arg.found) { if (metric != NULL) { *metric = arg.metric; } return 0; } else { return -1; } }
static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) { struct usnic_rt_cb_arg *lookup_arg = (struct usnic_rt_cb_arg *)arg; struct usnic_nl_sk *unlsk = lookup_arg->unlsk; struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX + 1]; int found = 0; int err; INC_CB_MSGCNT(lookup_arg); if (!usnic_is_nlreply_expected(unlsk, nlm_hdr)) { usnic_nlmsg_dump(msg); return NL_SKIP; } if (usnic_is_nlreply_err(nlm_hdr, lookup_arg)) { usnic_nlmsg_dump(msg); return NL_SKIP; } if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) { char buf[128]; nl_nlmsgtype2str(nlm_hdr->nlmsg_type, buf, sizeof(buf)); usnic_err("Received an invalid route request reply message type: %s\n", buf); usnic_nlmsg_dump(msg); return NL_SKIP; } rtm = nlmsg_data(nlm_hdr); if (rtm->rtm_family != AF_INET) { usnic_err("RTM message contains invalid AF family: %u\n", rtm->rtm_family); usnic_nlmsg_dump(msg); return NL_SKIP; } err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX, route_policy); if (err < 0) { usnic_err("nlmsg parse error %s\n", NL_GETERROR(err)); usnic_nlmsg_dump(msg); return NL_SKIP; } if (tb[RTA_OIF]) { if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif) found = 1; else usnic_err("Retrieved route has a different outgoing interface %d (expected %d)\n", nla_get_u32(tb[RTA_OIF]), lookup_arg->oif); } if (found && tb[RTA_GATEWAY]) lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]); lookup_arg->found = found; return NL_STOP; }
static int rtnl_raw_parse_cb(struct nl_msg *msg, void *arg) { struct nl_lookup_arg *lookup_arg = (struct nl_lookup_arg *)arg; struct usnic_rtnl_sk *unlsk = lookup_arg->unlsk; struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX + 1]; int found = 0; int err; #if WANT_DEBUG_MSGS nl_msg_dump(msg, stderr); #endif /* WANT_DEBUG_MSGS */ lookup_arg->nh_addr = 0; lookup_arg->found = 0; lookup_arg->replied = 0; lookup_arg->msg_count++; if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh) || nlm_hdr->nlmsg_seq != unlsk->seq) { usnic_err("Not an expected reply msg pid: %u local pid: %u " "msg seq: %u expected seq: %u\n", nlm_hdr->nlmsg_pid, nl_socket_get_local_port(unlsk->nlh), nlm_hdr->nlmsg_seq, unlsk->seq); return NL_SKIP; } lookup_arg->replied = 1; if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); if (nlm_hdr->nlmsg_len >= (__u32)nlmsg_msg_size(sizeof(*e))) { usnic_err("Received a netlink error message %d\n", e->error); } else { usnic_err("Received a truncated netlink error message\n"); } return NL_STOP; } if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) { usnic_err("Received an invalid route request reply message\n"); return NL_STOP; } rtm = nlmsg_data(nlm_hdr); if (rtm->rtm_family != AF_INET) { usnic_err("RTM message contains invalid AF family\n"); return NL_STOP; } init_route_policy(route_policy); err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX, route_policy); if (err < 0) { usnic_err("nlmsg parse error %d\n", err); return NL_STOP; } if (tb[RTA_OIF]) { if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif) found = 1; else usnic_err("Retrieved route has a different outgoing interface %d (expected %d)\n", nla_get_u32(tb[RTA_OIF]), lookup_arg->oif); } if (found && tb[RTA_METRICS]) { lookup_arg->metric = (int)nla_get_u32(tb[RTA_METRICS]); } if (found && tb[RTA_GATEWAY]) lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]); lookup_arg->found = found; return NL_STOP; }
int usnic_nl_rt_lookup(uint32_t src_addr, uint32_t dst_addr, int oif, uint32_t *nh_addr) { struct usnic_nl_sk *unlsk; struct nl_msg *nlm; struct rtmsg rmsg; struct usnic_rt_cb_arg arg; int err; retry: unlsk = NULL; err = usnic_nl_sk_alloc(&unlsk, NETLINK_ROUTE); if (err) return err; memset(&rmsg, 0, sizeof(rmsg)); rmsg.rtm_family = AF_INET; rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT; rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT; nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); if (!nlm) { usnic_err("Failed to alloc nl message, %s\n", NL_GETERROR(err)); err = ENOMEM; goto out; } nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); nla_put_u32(nlm, RTA_DST, dst_addr); nla_put_u32(nlm, RTA_SRC, src_addr); err = usnic_nl_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); nlmsg_free(nlm); if (err < 0) { usnic_err("Failed to send RTM_GETROUTE query message, error %s\n", NL_GETERROR(err)); err = EINVAL; goto out; } memset(&arg, 0, sizeof(arg)); arg.oif = oif; arg.unlsk = unlsk; err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, usnic_rt_raw_parse_cb, &arg); if (err != 0) { usnic_err("Failed to setup callback function, error %s\n", NL_GETERROR(err)); err = EINVAL; goto out; } /* Sometimes the recvmsg can fail because something is * temporarily out of resources. In this case, delay a little * and try again. */ do { err = 0; NL_RECVMSGS(unlsk->nlh, arg, EAGAIN, err, out); if (err == EAGAIN) { usleep(5); } } while (err == EAGAIN); /* If we got a reply back that indicated that the kernel was * too busy to handle this request, delay a little and try * again. */ if (arg.retry) { usleep(5); goto retry; } if (arg.found) { *nh_addr = arg.nh_addr; err = 0; } else { err = EHOSTUNREACH; } out: usnic_nl_sk_free(unlsk); return err; }