Exemplo n.º 1
0
static mca_mtl_base_module_t*
ompi_mtl_ofi_component_init(bool enable_progress_threads,
                            bool enable_mpi_threads)
{
    int ret, fi_version;
    struct fi_info *hints;
    struct fi_info *providers = NULL, *prov = NULL;
    struct fi_cq_attr cq_attr = {0};
    struct fi_av_attr av_attr = {0};
    char ep_name[FI_NAME_MAX] = {0};
    size_t namelen;

    /**
     * Hints to filter providers
     * See man fi_getinfo for a list of all filters
     * mode:  Select capabilities MTL is prepared to support.
     *        In this case, MTL will pass in context into communication calls
     * ep_type:  reliable datagram operation
     * caps:     Capabilities required from the provider.
     *           Tag matching is specified to implement MPI semantics.
     * msg_order: Guarantee that messages with same tag are ordered.
     */
    hints = fi_allocinfo();
    if (!hints) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: Could not allocate fi_info\n",
                            __FILE__, __LINE__);
        goto error;
    }
    hints->mode               = FI_CONTEXT;
    hints->ep_attr->type      = FI_EP_RDM;      /* Reliable datagram         */
    hints->caps               = FI_TAGGED;      /* Tag matching interface    */
    hints->tx_attr->msg_order = FI_ORDER_SAS;
    hints->rx_attr->msg_order = FI_ORDER_SAS;

    hints->domain_attr->threading        = FI_THREAD_UNSPEC;
    hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;

    /**
     * FI_VERSION provides binary backward and forward compatibility support
     * Specify the version of OFI is coded to, the provider will select struct
     * layouts that are compatible with this version.
     */
    fi_version = FI_VERSION(1, 0);

    /**
     * fi_getinfo:  returns information about fabric  services for reaching a
     * remote node or service.  this does not necessarily allocate resources.
     * Pass NULL for name/service because we want a list of providers supported.
     */
    ret = fi_getinfo(fi_version,    /* OFI version requested                    */
                     NULL,          /* Optional name or fabric to resolve       */
                     NULL,          /* Optional service name or port to request */
                     0ULL,          /* Optional flag                            */
                     hints,        /* In: Hints to filter providers            */
                     &providers);   /* Out: List of matching providers          */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_getinfo failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Select a provider from the list returned by fi_getinfo().
     */
    prov = select_ofi_provider(providers);
    if (!prov) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: select_ofi_provider: no provider found\n",
                            __FILE__, __LINE__);
        goto error;
    }


    /**
     * Open fabric
     * The getinfo struct returns a fabric attribute struct that can be used to
     * instantiate the virtual or physical network. This opens a "fabric
     * provider". See man fi_fabric for details.
     */
    ret = fi_fabric(prov->fabric_attr,    /* In:  Fabric attributes             */
                    &ompi_mtl_ofi.fabric, /* Out: Fabric handle                 */
                    NULL);                /* Optional context for fabric events */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_fabric failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Create the access domain, which is the physical or virtual network or
     * hardware port/collection of ports.  Returns a domain object that can be
     * used to create endpoints.  See man fi_domain for details.
     */
    ret = fi_domain(ompi_mtl_ofi.fabric,  /* In:  Fabric object                 */
                    prov,                 /* In:  Provider                      */
                    &ompi_mtl_ofi.domain, /* Out: Domain oject                  */
                    NULL);                /* Optional context for domain events */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_domain failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Create a transport level communication endpoint.  To use the endpoint,
     * it must be bound to completion counters or event queues and enabled,
     * and the resources consumed by it, such as address vectors, counters,
     * completion queues, etc.
     * see man fi_endpoint for more details.
     */
    ret = fi_endpoint(ompi_mtl_ofi.domain, /* In:  Domain object   */
                      prov,                /* In:  Provider        */
                      &ompi_mtl_ofi.ep,    /* Out: Endpoint object */
                      NULL);               /* Optional context     */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_endpoint failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Save the maximum inject size.
     */
    ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size;

    /**
     * Create the objects that will be bound to the endpoint.
     * The objects include:
     *     - completion queue for events
     *     - address vector of other endpoint addresses
     *     - dynamic memory-spanning memory region
     */
    cq_attr.format = FI_CQ_FORMAT_TAGGED;
    ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.cq, NULL);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_cq_open failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * The remote fi_addr will be stored in the ofi_endpoint struct.
     * So, we use the AV in "map" mode.
     */
    av_attr.type = FI_AV_MAP;
    ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_av_open failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Bind the CQ and AV to the endpoint object.
     */
    ret = fi_ep_bind(ompi_mtl_ofi.ep,
                     (fid_t)ompi_mtl_ofi.cq,
                     FI_SEND | FI_RECV);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_bind CQ-EP failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    ret = fi_ep_bind(ompi_mtl_ofi.ep,
                     (fid_t)ompi_mtl_ofi.av,
                     0);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_bind AV-EP failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Enable the endpoint for communication
     * This commits the bind operations.
     */
    ret = fi_enable(ompi_mtl_ofi.ep);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_enable failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Free providers info since it's not needed anymore.
     */
    fi_freeinfo(hints);
    hints = NULL;
    fi_freeinfo(providers);
    providers = NULL;

    /**
     * Get our address and publish it with modex.
     */
    namelen = sizeof(ep_name);
    ret = fi_getname((fid_t)ompi_mtl_ofi.ep, &ep_name[0], &namelen);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_getname failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    OFI_COMPAT_MODEX_SEND(ret,
                          &mca_mtl_ofi_component.super.mtl_version,
                          &ep_name,
                          namelen);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: modex_send failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ompi_mtl_ofi.epnamelen = namelen;

    /**
     * Set the ANY_SRC address.
     */
    ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC;

    /**
     * Activate progress callback.
     */
    ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    return &ompi_mtl_ofi.base;

error:
    if (providers) {
        (void) fi_freeinfo(providers);
    }
    if (hints) {
        (void) fi_freeinfo(hints);
    }
    if (ompi_mtl_ofi.av) {
        (void) fi_close((fid_t)ompi_mtl_ofi.av);
    }
    if (ompi_mtl_ofi.cq) {
        (void) fi_close((fid_t)ompi_mtl_ofi.cq);
    }
    if (ompi_mtl_ofi.ep) {
        (void) fi_close((fid_t)ompi_mtl_ofi.ep);
    }
    if (ompi_mtl_ofi.domain) {
        (void) fi_close((fid_t)ompi_mtl_ofi.domain);
    }
    if (ompi_mtl_ofi.fabric) {
        (void) fi_close((fid_t)ompi_mtl_ofi.fabric);
    }
    return NULL;
}
Exemplo n.º 2
0
static int common_setup(void)
{
	struct fi_info *fi;
	int ret;

	ret = getaddr(src_addr, port, (struct sockaddr **) &hints.src_addr,
			  (socklen_t *) &hints.src_addrlen);
	if (ret)
		printf("source address error %s\n", gai_strerror(ret));

	ret = fi_getinfo(FI_VERSION(1, 0), dst_addr, port, 0, &hints, &fi);
	if (ret) {
		printf("fi_getinfo %s\n", strerror(-ret));
		goto err0;
	}
	if (fi->ep_attr->max_msg_size) {
		max_msg_size = fi->ep_attr->max_msg_size;
	}

	ret = fi_fabric(fi->fabric_attr, &fab, NULL);
	if (ret) {
		printf("fi_fabric %s\n", fi_strerror(-ret));
		goto err1;
	}
	if (fi->mode & FI_MSG_PREFIX) {
		prefix_len = fi->ep_attr->msg_prefix_size;
	}

	ret = fi_domain(fab, fi, &dom, NULL);
	if (ret) {
		printf("fi_fdomain %s %s\n", fi_strerror(-ret),
			fi->domain_attr->name);
		goto err2;
	}

	if (fi->src_addr != NULL) {
		((struct sockaddr_in *)fi->src_addr)->sin_port = 
			((struct sockaddr_in *)hints.src_addr)->sin_port;

		if (dst_addr == NULL) {
			printf("Local address %s:%d\n",
					inet_ntoa(((struct sockaddr_in *)fi->src_addr)->sin_addr),
					ntohs(((struct sockaddr_in *)fi->src_addr)->sin_port));
		}
	}

	ret = fi_endpoint(dom, fi, &ep, NULL);
	if (ret) {
		printf("fi_endpoint %s\n", fi_strerror(-ret));
		goto err3;
	}

	ret = alloc_ep_res(fi);
	if (ret) {
		printf("alloc_ep_res %s\n", fi_strerror(-ret));
		goto err4;
	}

	ret = bind_ep_res();
	if (ret) {
		printf("bind_ep_res %s\n", fi_strerror(-ret));
		goto err5;
	}

	if (hints.src_addr)
		free(hints.src_addr);
	fi_freeinfo(fi);
	return 0;

err5:
	free_ep_res();
err4:
	fi_close(&ep->fid);
err3:
	fi_close(&dom->fid);
err2:
	fi_close(&fab->fid);
err1:
	fi_freeinfo(fi);
err0:
	if (hints.src_addr)
		free(hints.src_addr);
	return ret;
}
Exemplo n.º 3
0
int do_test(void)
{
	struct fi_cq_msg_entry	comp;
	int			len = msg_len * post_depth;
	int			msg_cnt = num_msgs;
	int			tx_bufs_sent = 0;
	int			ret;
	char			*mp;
	u64			time_elap;
#if SREAD == 0
	int			eagain_cnt = EAGAIN_TRIES;
#endif

	print_trace("in\n");

	if (!ctx.buf) {
		ctx.buf = kmalloc(len, GFP_KERNEL);
		if (!ctx.buf) {
			print_err("kalloc failed!\n");
			return -ENOMEM;
		}

		ret = fi_mr_reg(ctx.domain, ctx.buf, len, 0, 0, 0, 0,
				&ctx.mr, NULL);
		if (ret) {
			print_err("fi_mr_reg returned %d\n", ret);
			kfree(ctx.buf);
			ctx.buf = ERR_PTR(-EFAULT);
			return ret;
		}
	} else if (IS_ERR(ctx.buf))
		return 0;

	print_msg("post_depth %d num_msgs %d msg_len %d SREAD[%d]\n",
		post_depth, num_msgs, msg_len, SREAD);

	print_dbg("ctx.buf %p '%s' len %ld msg_len %d\n",
		ctx.buf, ctx.buf, strlen(ctx.buf)+1, msg_len);

	time_elap = get_jiffies_64();

	for (mp = ctx.buf; msg_cnt > 0 && !kthread_should_stop(); ) {
		int post_cnt, cnt;

		post_cnt = (msg_cnt > post_depth ? post_depth : msg_cnt);

		for (cnt = 0, mp = ctx.buf; cnt < post_cnt;
			cnt++, mp += msg_len) {

			if (verify) {
				sprintf(mp, TEST_MESSAGE, tx_bufs_sent);
				tx_bufs_sent++;
			}

			ret = fi_send(ctx.ep, mp, msg_len, fi_mr_desc(ctx.mr),
					0, mp);
			if (ret) {
				print_err("fi_send returned %d '%s'\n",
					ret, fi_strerror(ret));
				return ret;
			}
			if (kthread_should_stop())
				return -EINTR;
		}

		/* reap completions */
		for (cnt = 0; cnt < post_cnt; cnt++) {
#if SREAD
			ret = fi_cq_sread(ctx.scq, &comp, 1, 0, TIMEOUT);
			if (ret == -ETIMEDOUT) {
				print_msg("%s(ETIMEDOUT) cnt %d post_cnt %d "
					"msg_cnt %d\n", "fi_cq_sread", cnt,
					post_cnt, msg_cnt);
			}
			if (kthread_should_stop())
				return -EINTR;
#else
			do {
				ret = fi_cq_read(ctx.scq, &comp, 1);
				if (ret == 0 || ret == -EAGAIN) {
					if (--eagain_cnt <= 0) {
						dprint(DEBUG_HIGH,
							"%s(resched %d) cnt "
							"%d post_cnt %d\n",
							"fi_cq_read", ret, cnt,
							post_cnt);
						eagain_cnt = EAGAIN_TRIES;
						schedule();
					}
				}
				if (kthread_should_stop())
					return -EINTR;
			} while (ret == 0 || ret == -EAGAIN);

#endif
			if (ret < 0) {
				struct fi_cq_err_entry cqe = { 0 };
				int rc;

				rc = fi_cq_readerr(ctx.scq, &cqe, 0);
				print_err("fi_cq_read returned %d '%s'\n",
					ret, fi_strerror(ret));
				if (rc) {
					char buf[64];

					print_err("fi_cq_readerr() err '%s'(%d)"
						"\n", fi_strerror(cqe.err),
						cqe.err);
					print_err("fi_cq_readerr() prov_err "
						"'%s'(%d)\n",
						fi_cq_strerror(ctx.scq,
							cqe.prov_errno,
							cqe.err_data, buf,
							sizeof(buf)),
						cqe.prov_errno);
				}
				return ret;
			}
			if (!ret)
				print_err("fi_cq_sread no completion? ret %d\n",
					ret);
#if 0
			if ((char *)comp.op_context < (char *)ctx.buf ||
				(char *)comp.op_context >= (char *)
						&ctx.buf[msg_len*post_depth]) {

				print_err("cq.op_context(%p) not in range "
					"[ctx.buf(%p) ... &ctx.buf[%d](%p)]\n",
						(void *)comp.op_context,
						(void *)ctx.buf,
						msg_len,
						(void *)&ctx.buf[msg_len]);
			}
#endif
			if (verify)
				print_msg("Tx '%s'\n",
					(char *) comp.op_context);
		}
		msg_cnt -= post_cnt;
	}
	time_elap = get_jiffies_64() - time_elap;

#define AGIG (1024UL*1024UL*1024UL)
#define AMEG (1024UL*1024UL)
#define AKILO (1024UL)
	{
		struct timeval	tv;
		ulong		rate, rate_mod, bytes, units_of;
		char		units;

		jiffies_to_timeval(time_elap, &tv);

		bytes = (ulong) num_msgs * (ulong) msg_len;

		if (bytes >= AKILO && tv.tv_sec > 0) {
			rate = bytes / tv.tv_sec;
			rate_mod = bytes % tv.tv_sec;
			if (rate >= AGIG) {
				units = 'G';
				units_of = AGIG;
			} else if (rate >= AMEG) {
				units = 'M';
				units_of = AMEG;
			} else {
				units = 'K';
				units_of = AKILO;
			}
			rate /=  units_of;
		} else {
			rate = rate_mod = 0UL;
			units = ' ';
			units_of = 1UL;
		}

		print_info("Tx %d msgs (%lu.%lu%cB) @ ~%lu.%lu %cB/sec (%ld sec %ld "
			"usec)\n",
				num_msgs, (bytes/units_of), (bytes % units_of),
				units, rate, rate_mod, units,
				tv.tv_sec, tv.tv_usec);
	}

	return 0;
}
Exemplo n.º 4
0
static void setup_ofi_active(struct fi_info *info,
                             struct fid_ep **ep)
{
    // Make an EQ
    int ret;
    ret = fi_endpoint(fidev.domain, info, ep, NULL);
    if (0 != ret) {
        error("fi_endpoint failed");
    }

#if WANT_FDS
    // Add the EQ FD to the epoll fd
    static struct epoll_event edt;
    memset(&edt, 0, sizeof(edt));
    edt.events = EPOLLIN;
    edt.data.u32 = 2222;
    ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fidev.eq_fd, &edt);
    if (ret < 0) {
        error("epoll_ctl failed");
    }
#endif

    // Bind the EP to the EQ
    ret = fi_ep_bind(*ep, &fidev.eq->fid, 0);
    if (0 != ret) {
        error("fi_ep_bind(eq) failed");
    }

    // Make a CQ
    struct fi_cq_attr cq_attr;
    memset(&cq_attr, 0, sizeof(cq_attr));
    cq_attr.format = FI_CQ_FORMAT_CONTEXT;
    cq_attr.wait_obj = FI_WAIT_FD;
    cq_attr.size = 32; // JMS POC
    ret = fi_cq_open(fidev.domain, &cq_attr, &ficonn.cq, NULL);
    if (ret != 0) {
        error("fi_cq_open failed");
    }

    // Bind the CQ TX and RX queues to the EQ
    ret = fi_ep_bind(*ep, &ficonn.cq->fid, FI_TRANSMIT);
    if (0 != ret) {
        error("fi_ep_bind(cq tx) failed");
    }
    ret = fi_ep_bind(*ep, &ficonn.cq->fid, FI_RECV);
    if (0 != ret) {
        error("fi_ep_bind(cq rx) failed");
    }

#if WANT_FDS
    // Get the fd associated with this CQ
    ret = fi_control(&(ficonn.cq->fid), FI_GETWAIT, &ficonn.cq_fd);
    if (ret != 0) {
        error("fi_control to get cq fq failed");
    }
#endif

    // Enable the EP!
    ret = fi_enable(*ep);
    if (0 != ret) {
        error("fi_enable failed");
    }

    // Register the buffers (must use different keys for each)
    ret = fi_mr_reg(fidev.domain, send_buffer, sizeof(send_buffer),
                    FI_SEND, 0, (uintptr_t) send_buffer,
                    0, &ficonn.send_mr, NULL);
    if (ret != 0) {
        error("fi_mr_reg(send) failed\n");
    }
    ret = fi_mr_reg(fidev.domain, recv_buffer, sizeof(recv_buffer),
                    FI_RECV, 0, (uintptr_t) recv_buffer,
                    0, &ficonn.recv_mr, NULL);
    if (ret != 0) {
        printf("ERROR: ret=%d, %s\n", ret, fi_strerror(-ret));
        error("fi_mr_reg(recv) failed\n");
    }

}
Exemplo n.º 5
0
static void test_connect_with_accept_blocking_on_eq_fq_CLIENT(void)
{
    int ret;

    printf("CLIENT running\n");

    // Get the server's node (IP addr) and service (port)
    MPI_Recv(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR,
             0, 101, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    MPI_Recv(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR,
             0, 102, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    printf("CLIENT received via MPI: %s / %s\n", ofi_node, ofi_service);

    //setup_ofi(ofi_node, ofi_service);
    setup_ofi(NULL, NULL, 0);

    memset(&sin, 0, sizeof(sin));
    sin.sin_family = AF_INET;
    inet_aton(ofi_node, &sin.sin_addr);
    sin.sin_port = htons(atoi(ofi_service));
    printf("CLIENT translated: %s\n", addrstr(&sin));

    setup_ofi_active(fidev.info, &ficonn.ep);

    // Print server addr
    printf("CLIENT connecting to %s\n", addrstr(&sin));

    // Connect!
    printf("Client connecting...\n");
    ret = fi_connect(ficonn.ep,
                     //fidev.info->dest_addr,
                     &sin,
                     (void*) client_data, sizeof(client_data));
    if (ret < 0) {
        error("fi_connect failed");
    }

#if WANT_FDS
    // Now wait for the listen to complete
    int nevents;
    #define NEVENTS 32
    struct epoll_event events[NEVENTS];
    int timeout = 10000;
    while (1) {
        printf("CLIENT blocking on epoll\n");
        nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout);
        if (nevents < 0) {
            if (errno != EINTR) {
                error("client epoll wait failed");
            } else {
                continue;
            }
        } else {
            printf("CLIENT successfully woke up from epoll! %d events\n", nevents);
            for (int i = 0; i < nevents; ++i) {
                if (events[i].data.u32 != 2222) {
                    error("CLIENT unexpected epoll return type");
                }
            }
            // If we got the expected event, then go read from the EQ
            break;
        }
    }
#endif

    // Wait for FI_CONNECTED event
    uint32_t event;
    uint8_t *entry_buffer;
    size_t expected_len = sizeof(struct fi_eq_cm_entry) +
        sizeof(client_data);
    entry_buffer = (uint8_t*) calloc(1, expected_len);
    if (NULL == entry_buffer) {
        error("calloc failed");
    }
    struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer;

    while (1) {
        printf("CLIENT waiting for FI_CONNECTED\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            fprintf(stderr, "client fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            fprintf(stderr, "error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (ret == -EAGAIN) {
            fprintf(stderr, "CLIENT fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s, ret = %d)\n", fi_strerror(-ret), ret);
            error("client fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNECTED) {
            error("client got some unexpected event");
        } else if (ret != expected_len) {
            error("client got wrong length back from fi_eq_sread");
        }

        uint32_t *d = (uint32_t*) entry->data;
        for (int i = 0; i < (sizeof(server_data) / sizeof(uint32_t)); ++i) {
            if (d[i] != server_data[i]) {
                printf("CLIENT got wrong CM client data: d[%d]=%d, should be %d\n",
                       i, d[i], server_data[i]);
            }
        }

        printf("client got FI_CONNECTED, correct size, and correct data -- yay!\n");
        break;
    }

    printf("CLIENT connecting -- waiting for server before sending\n");
    MPI_Barrier(MPI_COMM_WORLD);

    sleep(1);
    int msg[4] = { 99, 100, 101, 102 };
    int len = sizeof(msg);
    printf("CLIENT sending len of %d\n", len);

    struct fid_mr no_mr;
    struct fid_mr *mr;
    void *send_context = (void*) 0x42;
#if 0
    fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV,
              0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL);
#else
    // Try using no mr, like fi_msg_pingpong...
    memset(&no_mr, 0, sizeof(no_mr));
    mr = &no_mr;
#endif
    ret = fi_send(ficonn.ep, msg, len,
                  fi_mr_desc(mr), 0, send_context);
    if (ret < 0) {
        printf("fi_Send failed! %d, %s\n", ret, fi_strerror(-ret));
        MPI_Abort(MPI_COMM_WORLD, 37);
    }

    // Wait for send completion
    struct fi_cq_entry cqe;
    while (1) {
        ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1);
        if (cqe.op_context == send_context) {
            printf("CLIENT send completed\n");
            break;
        } else {
            printf("CLIENT got some other completion... continuing\n");
        }
    }

    printf("CLIENT sent -- waiting for server before teardown\n");
    MPI_Barrier(MPI_COMM_WORLD);

    printf("CLIENT tearing down\n");
    fi_close(&(mr->fid));
    teardown_ofi();
}
Exemplo n.º 6
0
/*
 * Set up the progress thread
 */
static void progress_thread(void *args) {
  struct progress_thread_info* pti = args;
  const int id = pti->id;
  const int num_rbufs = 2;
  struct iovec iov[num_rbufs];
  struct fi_msg msg[num_rbufs];
  struct ofi_am_info* dst_buf[num_rbufs];
  const int rbuf_len = 10;
  const size_t rbuf_size = rbuf_len*sizeof(dst_buf[0][0]);
  const int num_cqes = rbuf_len;
  struct fi_cq_data_entry cqes[num_cqes];
  int num_read;
  
  int i;

  for (i = 0; i < num_rbufs; i++) {
    dst_buf[i] = chpl_mem_allocMany(rbuf_len, sizeof(dst_buf[i][0]),
                                    CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0);
    iov[i].iov_base = dst_buf[i];
    iov[i].iov_len = rbuf_size;
    msg[i].msg_iov = &iov[i];
    msg[i].desc = (void **) fi_mr_desc(ofi.mr);
    msg[i].iov_count = 1;
    msg[i].addr = FI_ADDR_UNSPEC;
    msg[i].context = (void *) (uint64_t) i;
    msg[i].data = 0x0;
    OFICHKERR(fi_recvmsg(ofi.am_rx_ep[id], &msg[i], FI_MULTI_RECV));
  }

  // Count this progress thread as running.  The creator thread wants to
  // be released as soon as at least one progress thread is running, so
  // if we're the first, do that.
  if (atomic_fetch_add_uint_least32_t(&progress_thread_count, 1) == 0) {
    CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex));
    CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_enter_cond));
    CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex));
  }

  // Wait for events
  while (!atomic_load_bool(&progress_threads_please_exit)) {
    num_read = fi_cq_read(ofi.am_rx_cq[id], cqes, num_cqes);
    if (num_read > 0) {
      for (i = 0; i < num_read; i++) {
        chpl_comm_ofi_am_handler(&cqes[i]);
        // send ack
      }
    } else {
      if (num_read != -FI_EAGAIN) {
        chpl_internal_error(fi_strerror(-num_read));
      }      
    }
  }

  // Un-count this progress thread.  Whoever told us to exit wants to
  // be released once all the progress threads are done, so if we're
  // the last, do that.
  if (atomic_fetch_sub_uint_least32_t(&progress_thread_count, 1) == 1) {
    CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex));
    CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_exit_cond));
    CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex));
  }
}
Exemplo n.º 7
0
static int client_connect(void)
{
	struct fi_eq_cm_entry entry;
	uint32_t event;
	struct fi_info *fi;
	ssize_t rd;
	int ret;

	if (src_addr) {
		ret = getaddr(src_addr, NULL, (struct sockaddr **) &hints.src_addr,
			      (socklen_t *) &hints.src_addrlen);
		if (ret)
			printf("source address error %s\n", gai_strerror(ret));
	}

	ret = fi_getinfo(FI_VERSION(1, 0), dst_addr, port, 0, &hints, &fi);
	if (ret) {
		printf("fi_getinfo %s\n", strerror(-ret));
		goto err0;
	}

	ret = fi_fabric(fi->fabric_attr, &fab, NULL);
	if (ret) {
		printf("fi_fabric %s\n", fi_strerror(-ret));
		goto err1;
	}

	ret = fi_domain(fab, fi, &dom, NULL);
	if (ret) {
		printf("fi_domain %s %s\n", fi_strerror(-ret),
			fi->domain_attr->name);
		goto err2;
	}

	ret = fi_endpoint(dom, fi, &ep, NULL);
	if (ret) {
		printf("fi_endpoint %s\n", fi_strerror(-ret));
		goto err3;
	}

	ret = alloc_ep_res(fi);
	if (ret)
		goto err4;

	ret = bind_ep_res();
	if (ret)
		goto err5;

	ret = fi_connect(ep, fi->dest_addr, NULL, 0);
	if (ret) {
		printf("fi_connect %s\n", fi_strerror(-ret));
		goto err5;
	}

	rd = fi_eq_sread(cmeq, &event, &entry, sizeof entry, -1, 0);
	if (rd != sizeof entry) {
		printf("fi_eq_sread %zd %s\n", rd, fi_strerror((int) -rd));
		return (int) rd;
	}

	if (event != FI_COMPLETE || entry.fid != &ep->fid) {
		printf("Unexpected CM event %d fid %p (ep %p)\n",
			event, entry.fid, ep);
		ret = -FI_EOTHER;
		goto err1;
	}

	if (hints.src_addr)
		free(hints.src_addr);
	fi_freeinfo(fi);
	return 0;

err5:
	free_ep_res();
err4:
	fi_close(&ep->fid);
err3:
	fi_close(&dom->fid);
err2:
	fi_close(&fab->fid);
err1:
	fi_freeinfo(fi);
err0:
	if (hints.src_addr)
		free(hints.src_addr);
	return ret;
}
Exemplo n.º 8
0
/* mca_btl_ofi_context_alloc_scalable()
 *
 * This function allocate communication contexts and return the pointer
 * to the first btl context. It also take care of all the bindings needed.
 * USE WITH SCALABLE ENDPOINT ONLY */
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info,
                                                          struct fid_domain *domain,
                                                          struct fid_ep *sep,
                                                          struct fid_av *av,
                                                          size_t num_contexts)
{
    BTL_VERBOSE(("creating %zu contexts", num_contexts));

    int rc;
    size_t i;
    char *linux_device_name = info->domain_attr->name;

    struct fi_cq_attr cq_attr = {0};
    struct fi_tx_attr tx_attr = {0};
    struct fi_rx_attr rx_attr = {0};

    mca_btl_ofi_context_t *contexts;
    tx_attr.op_flags = FI_DELIVERY_COMPLETE;

    contexts = (mca_btl_ofi_context_t*) calloc(num_contexts, sizeof(*contexts));
    if (NULL == contexts) {
        BTL_VERBOSE(("cannot allocate communication contexts."));
        return NULL;
    }

    /* Don't really need to check, just avoiding compiler warning because
     * BTL_VERBOSE is a no op in performance build and the compiler will
     * complain about unused variable. */
    if (NULL == linux_device_name) {
        BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
        goto scalable_fail;
    }

     /* bind AV to endpoint */
    rc = fi_scalable_ep_bind(sep, (fid_t)av, 0);
    if (0 != rc) {
        BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
                        linux_device_name,
                        fi_strerror(-rc)
                        ));
        goto scalable_fail;
    }

    for (i=0; i < num_contexts; i++) {
        rc = fi_tx_context(sep, i, &tx_attr, &contexts[i].tx_ctx, NULL);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_tx_context with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        /* We don't actually need a receiving context as we only do one-sided.
         * However, sockets provider will hang if we dont have one. It is
         * also nice to have equal number of tx/rx context. */
        rc = fi_rx_context(sep, i, &rx_attr, &contexts[i].rx_ctx, NULL);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_rx_context with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        /* create CQ */
        cq_attr.format = FI_CQ_FORMAT_CONTEXT;
        cq_attr.wait_obj = FI_WAIT_NONE;
        rc = fi_cq_open(domain, &cq_attr, &contexts[i].cq, NULL);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        /* bind cq to transmit context */
        uint32_t cq_flags = (FI_TRANSMIT);
        rc = fi_ep_bind(contexts[i].tx_ctx, (fid_t)contexts[i].cq, cq_flags);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        /* enable the context. */
        rc = fi_enable(contexts[i].tx_ctx);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_enable with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        rc = fi_enable(contexts[i].rx_ctx);
        if (0 != rc) {
            BTL_VERBOSE(("%s failed fi_enable with err=%s",
                            linux_device_name,
                            fi_strerror(-rc)
                            ));
            goto scalable_fail;
        }

        /* initialize completion freelist. */
        rc = ofi_comp_list_init(&contexts[i].comp_list);
        if (rc != OPAL_SUCCESS) {
            goto scalable_fail;
        }

         /* assign the id */
        contexts[i].context_id = i;
    }

    return contexts;

scalable_fail:
    /* close and free */
    for(i=0; i < num_contexts; i++) {
        mca_btl_ofi_context_finalize(&contexts[i], true);
    }
    free(contexts);

    return NULL;
}
Exemplo n.º 9
0
/* mca_btl_ofi_context_alloc_normal()
 *
 * This function will allocate an ofi_context, map the endpoint to tx/rx context,
 * bind CQ,AV to the endpoint and initialize all the structure.
 * USE WITH NORMAL ENDPOINT ONLY */
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
                                                        struct fid_domain *domain,
                                                        struct fid_ep *ep,
                                                        struct fid_av *av)
{
    int rc;
    uint32_t cq_flags = FI_TRANSMIT;
    char *linux_device_name = info->domain_attr->name;

    struct fi_cq_attr cq_attr = {0};

    mca_btl_ofi_context_t *context;

    context = (mca_btl_ofi_context_t*) calloc(1, sizeof(*context));
    if (NULL == context) {
        BTL_VERBOSE(("cannot allocate context"));
        return NULL;
    }

    /* Don't really need to check, just avoiding compiler warning because
     * BTL_VERBOSE is a no op in performance build and the compiler will
     * complain about unused variable. */
    if (NULL == linux_device_name) {
        BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
        goto single_fail;
    }

    cq_attr.format = FI_CQ_FORMAT_CONTEXT;
    cq_attr.wait_obj = FI_WAIT_NONE;
    rc = fi_cq_open(domain, &cq_attr, &context->cq, NULL);
    if (0 != rc) {
        BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
                        linux_device_name,
                        fi_strerror(-rc)
                        ));
        goto single_fail;
    }

    rc = fi_ep_bind(ep, (fid_t)av, 0);
    if (0 != rc) {
        BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
                        linux_device_name,
                        fi_strerror(-rc)
                        ));
        goto single_fail;
    }

    rc = fi_ep_bind(ep, (fid_t)context->cq, cq_flags);
    if (0 != rc) {
        BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
                        linux_device_name,
                        fi_strerror(-rc)
                        ));
        goto single_fail;
    }

    rc = ofi_comp_list_init(&context->comp_list);
    if (rc != OPAL_SUCCESS) {
        goto single_fail;
    }

    context->tx_ctx = ep;
    context->rx_ctx = ep;
    context->context_id = 0;

    return context;

single_fail:
    mca_btl_ofi_context_finalize(context, false);
    return NULL;
}
Exemplo n.º 10
0
int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain,
		       struct fi_info *info,
		       uint32_t cdm_id,
			   struct gnix_auth_key *auth_key,
		       struct gnix_cm_nic **cm_nic_ptr)
{
	int ret = FI_SUCCESS;
	struct gnix_cm_nic *cm_nic = NULL;
	gnix_hashtable_attr_t gnix_ht_attr = {0};
	uint32_t name_type = GNIX_EPN_TYPE_UNBOUND;
	struct gnix_nic_attr nic_attr = {0};
	struct gnix_ep_name ep_name;
	struct gnix_dgram_hndl_attr dgram_hndl_attr = {0};
	struct gnix_dgram_hndl_attr *dgram_hndl_attr_ptr = NULL;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	*cm_nic_ptr = NULL;

	/*
	 * if app has specified a src_addr in the info
	 * argument and length matches that for gnix_ep_name
	 * we must allocate a cm_nic, otherwise we first
	 * check to see if there is a cm_nic already for this domain
	 * and just use it.
	 */

	if (info->src_addr) {
		/*TODO (optimization): strchr to name_type and strtol */
		_gnix_get_ep_name(info->src_addr, 0, &ep_name, domain);
		name_type = ep_name.name_type;
	}

	GNIX_INFO(FI_LOG_EP_CTRL, "creating cm_nic for %u/0x%x/%u\n",
			auth_key->ptag, auth_key->cookie, cdm_id);

	cm_nic = (struct gnix_cm_nic *)calloc(1, sizeof(*cm_nic));
	if (cm_nic == NULL) {
		ret = -FI_ENOMEM;
		goto err;
	}

	/*
	 * we have to force allocation of a new nic since we want
	 * an a particulard cdm id
	 */
	nic_attr.must_alloc = true;
	nic_attr.use_cdm_id = true;
	nic_attr.cdm_id = cdm_id;
	nic_attr.auth_key = auth_key;

	ret = gnix_nic_alloc(domain, &nic_attr, &cm_nic->nic);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "gnix_nic_alloc returned %s\n",
			  fi_strerror(-ret));
		goto err;
	}

	cm_nic->my_name.gnix_addr.cdm_id = cdm_id;
	cm_nic->ptag = auth_key->ptag;
	cm_nic->my_name.cookie = auth_key->cookie;
	cm_nic->my_name.gnix_addr.device_addr = cm_nic->nic->device_addr;
	cm_nic->domain = domain;
	cm_nic->ctrl_progress = domain->control_progress;
	cm_nic->my_name.name_type = name_type;
	cm_nic->poll_cnt = 0;
	fastlock_init(&cm_nic->wq_lock);
	dlist_init(&cm_nic->cm_nic_wq);

	/*
	 * prep the cm nic's dgram component
	 */
	if (domain->control_progress == FI_PROGRESS_AUTO) {
		dgram_hndl_attr.timeout_needed = __gnix_cm_nic_timeout_needed;
		dgram_hndl_attr.timeout_progress = __gnix_cm_nic_timeout_progress;
		dgram_hndl_attr.timeout_data = (void *)cm_nic;
		dgram_hndl_attr.timeout = domain->params.dgram_progress_timeout;
		dgram_hndl_attr_ptr = &dgram_hndl_attr;
	};

	ret = _gnix_dgram_hndl_alloc(cm_nic,
				     domain->control_progress,
				     dgram_hndl_attr_ptr,
				     &cm_nic->dgram_hndl);
	if (ret != FI_SUCCESS)
		goto err;

	/*
	 * allocate hash table for translating ep addresses
	 * to ep's.
	 * This table will not be large - how many FI_EP_RDM ep's
	 * will an app create using one domain?, nor in the critical path
	 * so just use defaults.
	 */
	cm_nic->addr_to_ep_ht = calloc(1, sizeof(struct gnix_hashtable));
	if (cm_nic->addr_to_ep_ht == NULL)
		goto err;

	gnix_ht_attr.ht_initial_size = 64;
	gnix_ht_attr.ht_maximum_size = 1024;
	gnix_ht_attr.ht_increase_step = 2;
	gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT;
	gnix_ht_attr.ht_collision_thresh = 500;
	gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead;
	gnix_ht_attr.ht_internal_locking = 1;
	gnix_ht_attr.destructor = NULL;

	ret = _gnix_ht_init(cm_nic->addr_to_ep_ht, &gnix_ht_attr);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "gnix_ht_init returned %s\n",
			  fi_strerror(-ret));
		goto err;
	}

	_gnix_ref_init(&cm_nic->ref_cnt, 1, __cm_nic_destruct);

	*cm_nic_ptr = cm_nic;

	pthread_mutex_lock(&gnix_cm_nic_list_lock);
	dlist_insert_tail(&cm_nic->cm_nic_list, &gnix_cm_nic_list);
	pthread_mutex_unlock(&gnix_cm_nic_list_lock);

	return ret;

err:
	if (cm_nic->dgram_hndl)
		_gnix_dgram_hndl_free(cm_nic->dgram_hndl);

	if (cm_nic->nic)
		_gnix_nic_free(cm_nic->nic);

	if (cm_nic->addr_to_ep_ht) {
		_gnix_ht_destroy(cm_nic->addr_to_ep_ht);
		free(cm_nic->addr_to_ep_ht);
	}

	if (cm_nic != NULL)
		free(cm_nic);

	return ret;
}
Exemplo n.º 11
0
static int __process_datagram(struct gnix_datagram *dgram,
				 struct gnix_address peer_address,
				 gni_post_state_t state)
{
	int ret = FI_SUCCESS;
	struct gnix_cm_nic *cm_nic = NULL;
	uint8_t in_tag = 0, out_tag = 0;
	char rcv_buf[GNIX_CM_NIC_MAX_MSG_SIZE];

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	cm_nic = (struct gnix_cm_nic *)dgram->cache;
	if (cm_nic == NULL) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"process_datagram, null cache\n");
		goto err;
	}

	if (state != GNI_POST_COMPLETED) {
		ret = __process_dgram_w_error(cm_nic,
					      dgram,
					      peer_address,
					      state);
		GNIX_WARN(FI_LOG_EP_CTRL,
			"process_datagram bad post state %d\n", state);
		goto err;
	}

	__dgram_get_in_tag(dgram, &in_tag);
	if ((in_tag != GNIX_CM_NIC_BND_TAG) &&
		(in_tag != GNIX_CM_NIC_WC_TAG)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"datagram with unknown in tag %d\n", in_tag);
		goto err;
	}

	 __dgram_unpack_out_tag(dgram, &out_tag);
	if ((out_tag != GNIX_CM_NIC_BND_TAG) &&
		(out_tag != GNIX_CM_NIC_WC_TAG)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"datagram with unknown out tag %d\n", out_tag);
		goto err;
	}

	/*
	 * if out buf actually has data, call consumer's
	 * receive callback
	 */

	if (out_tag == GNIX_CM_NIC_BND_TAG) {
		_gnix_dgram_unpack_buf(dgram,
					GNIX_DGRAM_OUT_BUF,
					rcv_buf,
					GNIX_CM_NIC_MAX_MSG_SIZE);
		ret = cm_nic->rcv_cb_fn(cm_nic,
					rcv_buf,
					peer_address);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				"cm_nic->rcv_cb_fn returned %s\n",
				fi_strerror(-ret));
			goto err;
		}

		ret = _gnix_cm_nic_progress(cm_nic);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				  "_gnix_cm_nic_progress returned %s\n",
				  fi_strerror(-ret));
	}

	/*
	 * if we are processing a WC datagram, repost, otherwise
	 * just put back on the freelist.
	 */
	if (in_tag == GNIX_CM_NIC_WC_TAG) {
		dgram->callback_fn = __process_datagram;
		dgram->cache = cm_nic;
		 __dgram_set_tag(dgram, in_tag);
		ret = _gnix_dgram_wc_post(dgram);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				"_gnix_dgram_wc_post returned %s\n",
				fi_strerror(-ret));
			goto err;
		}
	} else {
		ret  = _gnix_dgram_free(dgram);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				"_gnix_dgram_free returned %s\n",
				fi_strerror(-ret));
	}

	return ret;

err:
	if (in_tag == GNIX_CM_NIC_BND_TAG)
		_gnix_dgram_free(dgram);
	return ret;
}
Exemplo n.º 12
0
int _gnix_cm_nic_send(struct gnix_cm_nic *cm_nic,
		      char *sbuf, size_t len,
		      struct gnix_address target_addr)
{
	int ret = FI_SUCCESS;
	struct gnix_datagram *dgram = NULL;
	ssize_t  __attribute__((unused)) plen;
	uint8_t tag;
	struct gnix_work_req *work_req;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	if ((cm_nic == NULL) || (sbuf == NULL))
		return -FI_EINVAL;

	if (len > GNI_DATAGRAM_MAXSIZE)
		return -FI_ENOSPC;

	ret = _gnix_dgram_alloc(cm_nic->dgram_hndl,
				GNIX_DGRAM_BND,
				&dgram);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "_gnix_dgram_alloc returned %s\n",
			  fi_strerror(-ret));
		goto exit;
	}

	dgram->target_addr = target_addr;
	dgram->callback_fn = __process_datagram;
	dgram->cache = cm_nic;

	tag = GNIX_CM_NIC_BND_TAG;
	 __dgram_set_tag(dgram, tag);

	plen = _gnix_dgram_pack_buf(dgram, GNIX_DGRAM_IN_BUF,
				   sbuf, len);
	assert (plen == len);

	/* If connecting with the same CM NIC, skip datagram exchange.  The
	 * caller could be holding an endpoint lock, so schedule connection
	 * completion for later. */
	if (GNIX_ADDR_EQUAL(target_addr, cm_nic->my_name.gnix_addr)) {
		char tmp_buf[GNIX_CM_NIC_MAX_MSG_SIZE];

		/* Pack output buffer with input data. */
		_gnix_dgram_unpack_buf(dgram, GNIX_DGRAM_IN_BUF, tmp_buf,
				       GNIX_CM_NIC_MAX_MSG_SIZE);
		_gnix_dgram_pack_buf(dgram, GNIX_DGRAM_OUT_BUF, tmp_buf,
				       GNIX_CM_NIC_MAX_MSG_SIZE);

		work_req = calloc(1, sizeof(*work_req));
		if (work_req == NULL) {
			_gnix_dgram_free(dgram);
			return -FI_ENOMEM;
		}

		work_req->progress_fn = __gnix_cm_nic_intra_progress_fn;
		work_req->data = dgram;
		work_req->completer_fn = NULL;

		fastlock_acquire(&cm_nic->wq_lock);
		dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
		fastlock_release(&cm_nic->wq_lock);

		GNIX_INFO(FI_LOG_EP_CTRL, "Initiated intra-CM NIC connect\n");
	} else {
		ret = _gnix_dgram_bnd_post(dgram);
		if (ret == -FI_EBUSY) {
			ret = -FI_EAGAIN;
			_gnix_dgram_free(dgram);
		}
	}

exit:
	return ret;
}
Exemplo n.º 13
0
int _gnix_cm_nic_progress(void *arg)
{
	struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)arg;
	int ret = FI_SUCCESS;
	int complete;
	struct gnix_work_req *p = NULL;

	/*
	 * if we're doing FI_PROGRESS_MANUAL,
	 * see what's going on inside kgni's datagram
	 * box...
	 */

	if (cm_nic->ctrl_progress == FI_PROGRESS_MANUAL) {
		++cm_nic->poll_cnt;
		if (((cm_nic->poll_cnt % 512) == 0)  ||
			!dlist_empty(&cm_nic->cm_nic_wq)) {
			ret = _gnix_dgram_poll(cm_nic->dgram_hndl,
						  GNIX_DGRAM_NOBLOCK);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					"_gnix_dgram_poll returned %s\n",
					  fi_strerror(-ret));
					goto err;
			}
		}
	}

	/*
	 * do a quick check if queue doesn't have anything yet,
	 * don't need this to be atomic
	 */

check_again:
	if (dlist_empty(&cm_nic->cm_nic_wq))
		return ret;

	/*
	 * okay, stuff to do, lock work queue,
	 * dequeue head, unlock, process work element,
	 * if it doesn't compete, put back at the tail
	 * of the queue.
	 */

	fastlock_acquire(&cm_nic->wq_lock);
	p = dlist_first_entry(&cm_nic->cm_nic_wq, struct gnix_work_req,
			      list);
	if (p == NULL) {
		fastlock_release(&cm_nic->wq_lock);
		return ret;
	}

	dlist_remove_init(&p->list);
	fastlock_release(&cm_nic->wq_lock);

	assert(p->progress_fn);

	ret = p->progress_fn(p->data, &complete);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "dgram prog fn returned %s\n",
			  fi_strerror(-ret));
	}

	if (complete == 1) {
		if (p->completer_fn) {
			ret = p->completer_fn(p->completer_data);
			free(p);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					  "dgram completer fn returned %s\n",
					  fi_strerror(-ret));
				goto err;
			}
		} else {
			free(p);
		}
		goto check_again;
	} else {
		fastlock_acquire(&cm_nic->wq_lock);
		dlist_insert_before(&p->list, &cm_nic->cm_nic_wq);
		fastlock_release(&cm_nic->wq_lock);
	}

err:
	return ret;
}
Exemplo n.º 14
0
std::string fi_error_to_string(int err_code) {
    return fi_strerror(err_code >= 0 ? err_code : -err_code);
}
Exemplo n.º 15
0
static int __gnix_vc_hndl_conn_req(struct gnix_cm_nic *cm_nic,
				   char *msg_buffer,
				   struct gnix_address src_cm_nic_addr)
{
	int ret = FI_SUCCESS;
	gni_return_t __attribute__((unused)) status;
	struct gnix_fid_ep *ep = NULL;
	gnix_ht_key_t *key_ptr;
	struct gnix_av_addr_entry entry;
	struct gnix_address src_addr, target_addr;
	struct gnix_vc *vc = NULL;
	struct gnix_vc *vc_try = NULL;
	struct gnix_work_req *work_req;
	int src_vc_id;
	gni_smsg_attr_t src_smsg_attr;
	uint64_t src_vc_ptr;
	struct wq_hndl_conn_req *data = NULL;

	ssize_t __attribute__((unused)) len;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	/*
	 * unpack the message
	 */

	__gnix_vc_unpack_conn_req(msg_buffer,
				  &target_addr,
				  &src_addr,
				  &src_vc_id,
				  &src_vc_ptr,
				  &src_smsg_attr);


	GNIX_DEBUG(FI_LOG_EP_CTRL,
		"conn req rx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d src vc 0x%lx )\n",
		 src_addr.device_addr,
		 src_addr.cdm_id,
		 target_addr.device_addr,
		 target_addr.cdm_id,
		 src_vc_ptr);

	/*
	 * lookup the ep from the addr_to_ep_ht using the target_addr
	 * in the datagram
	 */

	key_ptr = (gnix_ht_key_t *)&target_addr;

	ep = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht,
						   *key_ptr);
	if (ep == NULL) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_ht_lookup addr_to_ep failed\n");
		ret = -FI_ENOENT;
		goto err;
	}

	/*
	 * look to see if there is a VC already for the
	 * address of the connecting EP.
	 */

	key_ptr = (gnix_ht_key_t *)&src_addr;

	fastlock_acquire(&ep->vc_ht_lock);
	vc = (struct gnix_vc *)_gnix_ht_lookup(ep->vc_ht,
					       *key_ptr);

	/*
 	 * if there is no corresponding vc in the hash,
 	 * or there is an entry and its not in connecting state
 	 * go down the conn req ack route.
 	 */

	if ((vc == NULL)  ||
	    (vc->conn_state == GNIX_VC_CONN_NONE)) {
		if (vc == NULL) {
			entry.gnix_addr = src_addr;
			entry.cm_nic_cdm_id = src_cm_nic_addr.cdm_id;
			ret = _gnix_vc_alloc(ep,
					     &entry,
					     &vc_try);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					  "_gnix_vc_alloc returned %s\n",
					  fi_strerror(-ret));
				goto err;
			}

			vc_try->conn_state = GNIX_VC_CONNECTING;
			ret = _gnix_ht_insert(ep->vc_ht,
					      *key_ptr,
					      vc_try);
			if (likely(ret == FI_SUCCESS)) {
				vc = vc_try;
				vc->modes |= GNIX_VC_MODE_IN_HT;
			} else if (ret == -FI_ENOSPC) {
				_gnix_vc_destroy(vc_try);
			} else {
				GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_ht_insert returned %s\n",
				   fi_strerror(-ret));
				goto err;
			}
		} else
			vc->conn_state = GNIX_VC_CONNECTING;

		/*
		 * prepare a work request to
		 * initiate an request response
		 */

		work_req = calloc(1, sizeof(*work_req));
		if (work_req == NULL) {
			ret = -FI_ENOMEM;
			goto err;
		}

		data = calloc(1, sizeof(struct wq_hndl_conn_req));
		if (data == NULL) {
			ret = -FI_ENOMEM;
			goto err;
		}
		memcpy(&data->src_smsg_attr,
		       &src_smsg_attr,
		       sizeof(src_smsg_attr));
		data->vc = vc;
		data->src_vc_id = src_vc_id;
		data->src_vc_ptr = src_vc_ptr;

		work_req->progress_fn = __gnix_vc_conn_ack_prog_fn;
		work_req->data = data;
		work_req->completer_fn = __gnix_vc_conn_ack_comp_fn;
		work_req->completer_data = data;

		/*
		 * add the work request to the tail of the
		 * cm_nic's work queue, progress the cm_nic.
		 */


		fastlock_acquire(&cm_nic->wq_lock);
		dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
		fastlock_release(&cm_nic->wq_lock);

		fastlock_release(&ep->vc_ht_lock);

		_gnix_vc_schedule(vc);
		ret = _gnix_cm_nic_progress(cm_nic);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				"_gnix_cm_nic_progress returned %s\n",
				fi_strerror(-ret));

	} else {

		/*
		 * we can only be in connecting state if we
		 * reach here.  We have all the informatinon,
		 * and the other side will get the information
		 * at some point, so go ahead and build SMSG connection.
		 */
		if (vc->conn_state != GNIX_VC_CONNECTING) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				 "vc %p not in connecting state nor in cm wq\n",
				  vc, vc->conn_state);
			ret = -FI_EINVAL;
			goto err;
		}

		ret = __gnix_vc_smsg_init(vc, src_vc_id,
					  &src_smsg_attr);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				  "_gnix_vc_smsg_init returned %s\n",
				  fi_strerror(-ret));
			goto err;
		}

		vc->conn_state = GNIX_VC_CONNECTED;
		GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
			vc);

		fastlock_release(&ep->vc_ht_lock);

		ret = _gnix_vc_schedule(vc);
		ret = _gnix_cm_nic_progress(cm_nic);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				"_gnix_cm_nic_progress returned %s\n",
				fi_strerror(-ret));
	}
err:
	return ret;
}
Exemplo n.º 16
0
int main(int argc, char **argv)
{
	int op, ret, ndoms = 3, neps = 3, i, j;

	hints = fi_allocinfo();
	if (!hints) {
		exit(1);
	}

	while ((op = getopt(argc, argv, "f:p:d:D:n:d:e:h")) != -1) {
		switch (op) {
		case 'f':
			hints->fabric_attr->name = strdup(optarg);
			break;
		case 'p':
			hints->fabric_attr->prov_name = strdup(optarg);
			break;
		case 'd':
			ndoms = atoi(optarg);
			break;
		case 'e':
			neps = atoi(optarg);
			break;
		case 'h':
		default:
			printf("usage: %s\n", argv[0]);
			printf("\t[-f fabric_name]\n");
			printf("\t[-p provider_name]\n");
			printf("\t[-d ndomains]\n");
			printf("\t[-e neps]\n");
			exit(1);
		}
	}

	hints->mode = ~0;

	ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hints, &fi);
	if (ret != 0) {
		printf("fi_getinfo %s\n", fi_strerror(-ret));
		exit(1);
	}

	ret = fi_fabric(fi->fabric_attr, &fabric, NULL);
	if (ret != 0) {
		printf("fi_fabric %s\n", fi_strerror(-ret));
		exit(1);
	}

	domains = (struct fid_domain **)malloc(ndoms * sizeof(struct fid_domain *));
	assert(domains);
	eps = (struct fid_ep **)malloc(neps * ndoms * sizeof(struct fid_ep *));
	assert(eps);

	for (i = 0; i < ndoms; i++) {
		ret = fi_domain(fabric, fi, &domains[i], NULL);
		if (ret != 0) {
			printf("fi_domain %s\n", fi_strerror(-ret));
			exit(1);
		}

		for (j = 0; j < neps; j++) {
			int idx = (i * neps) + j;
			ret = fi_endpoint(domains[i], fi, &eps[idx], NULL);
			if (ret != 0) {
				printf("[%d:%d] ]fi_endpoint %s\n", i, j, fi_strerror(-ret));
				exit(1);
			}
		}
	}

	for (i = 0; i < ndoms; i++) {
		for (j = 0; j < neps; j++) {
			int idx = (i * neps) + j;
			ret = fi_close(&eps[idx]->fid);
			if (ret != 0) {
				printf("Error %d closing ep: %s\n", ret, fi_strerror(-ret));
				exit(1);
			}
		}

		ret = fi_close(&domains[i]->fid);
		if (ret != 0) {
			printf("Error %d closing domain: %s\n", ret, fi_strerror(-ret));
			exit(1);
		}
	}

	free(eps);
	free(domains);

	ret = fi_close(&fabric->fid);
	if (ret != 0) {
		printf("Error %d closing fabric: %s\n", ret, fi_strerror(-ret));
		exit(1);
	}

	fi_freeinfo(fi);
	fi_freeinfo(hints);

	return ret;
}
Exemplo n.º 17
0
static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr)
{
	int ret = FI_SUCCESS;
	int complete = 0;
	struct wq_hndl_conn_req *work_req_data;
	struct gnix_vc *vc;
	struct gnix_mbox *mbox = NULL;
	gni_smsg_attr_t smsg_mbox_attr;
	struct gnix_fid_ep *ep = NULL;
	struct gnix_fid_domain *dom = NULL;
	struct gnix_cm_nic *cm_nic = NULL;
	char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0};

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");


	work_req_data = (struct wq_hndl_conn_req *)data;

	vc = work_req_data->vc;
	if (vc == NULL)
		return -FI_EINVAL;

	ep = vc->ep;
	if (ep == NULL)
		return -FI_EINVAL;

	dom = ep->domain;
	if (dom == NULL)
		return -FI_EINVAL;

	cm_nic = ep->cm_nic;
	if (cm_nic == NULL)
		return -FI_EINVAL;

	fastlock_acquire(&ep->vc_ht_lock);

	/*
	 * we may have already been moved to connecting or
	 * connected, if so early exit.
	 */
	if(vc->conn_state == GNIX_VC_CONNECTED) {
		complete = 1;
		goto exit;
	}

	/*
	 * first see if we still need a mailbox
	 */

	if (vc->smsg_mbox == NULL) {
		ret = _gnix_mbox_alloc(ep->nic->mbox_hndl,
				       &mbox);
		if (ret == FI_SUCCESS)
			vc->smsg_mbox = mbox;
		else
			goto exit;
	}

	mbox = vc->smsg_mbox;

	/*
	 * prep the smsg_mbox_attr
¬        */

	smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	smsg_mbox_attr.msg_buffer = mbox->base;
	smsg_mbox_attr.buff_size =  ep->nic->mem_per_mbox;
	smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
	smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
	smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
	smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;

	/*
	 * serialize the resp message in the buffer
	 */

	__gnix_vc_pack_conn_resp(sbuf,
				 work_req_data->src_vc_ptr,
				 (uint64_t)vc,
				 vc->vc_id,
				 &smsg_mbox_attr);

	/*
	 * try to send the message, if it succeeds,
	 * initialize mailbox and move vc to connected
	 * state.
	 */

	ret = _gnix_cm_nic_send(cm_nic,
				sbuf,
				GNIX_CM_NIC_MAX_MSG_SIZE,
				vc->peer_cm_nic_addr);
	if (ret == FI_SUCCESS) {
		ret = __gnix_vc_smsg_init(vc,
					  work_req_data->src_vc_id,
					  &work_req_data->src_smsg_attr);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				  "_gnix_vc_smsg_init returned %s\n",
				  fi_strerror(-ret));
			goto exit;
		}
		complete = 1;
		vc->conn_state = GNIX_VC_CONNECTED;
		GNIX_DEBUG(FI_LOG_EP_CTRL,
			   "moving vc %p to connected\n",vc);
	} else if (ret == -FI_EAGAIN) {
		ret = _gnix_vc_schedule(vc);
		ret = FI_SUCCESS;
	} else
		assert(0);

exit:
	fastlock_release(&ep->vc_ht_lock);

	*complete_ptr = complete;
	return ret;
}
Exemplo n.º 18
0
static int
usdf_getinfo(uint32_t version, const char *node, const char *service,
	       uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
	struct usdf_usnic_info *dp;
	struct usdf_dev_entry *dep;
	struct usd_device_attrs *dap;
	struct fi_info *fi_first;
	struct fi_info *fi_last;
	struct addrinfo *ai;
	struct sockaddr_in *src;
	struct sockaddr_in *dest;
	enum fi_ep_type ep_type;
	int metric;
	int d;
	int ret;

	USDF_TRACE("\n");

	fi_first = NULL;
	fi_last = NULL;
	ai = NULL;
	src = NULL;
	dest = NULL;

	/*
	 * Get and cache usNIC device info
	 */
	if (__usdf_devinfo == NULL) {
		ret = usdf_get_devinfo();
		if (ret != 0) {
			USDF_WARN("failed to usdf_get_devinfo, ret=%d (%s)\n",
					ret, fi_strerror(-ret));
			if (ret == -FI_ENODEV)
				ret = -FI_ENODATA;
			goto fail;
		}
	}
	dp = __usdf_devinfo;

	if (node != NULL || service != NULL) {
		ret = getaddrinfo(node, service, NULL, &ai);
		if (ret != 0) {
			USDF_DBG("getaddrinfo failed, likely bad node/service specified (%s:%s)\n",
				node, service);
			ret = -errno;
			goto fail;
		}
		if (flags & FI_SOURCE) {
			src = (struct sockaddr_in *)ai->ai_addr;
		} else {
			dest = (struct sockaddr_in *)ai->ai_addr;
		}
	}
	if (hints != NULL) {
		if (dest == NULL && hints->dest_addr != NULL) {
			dest = hints->dest_addr;
		}
		if (src == NULL && hints->src_addr != NULL) {
			src = hints->src_addr;
		}
	}

	for (d = 0; d < dp->uu_num_devs; ++d) {
		dep = &dp->uu_info[d];
		dap = &dep->ue_dattr;

		/* skip this device if it has some problem */
		if (!dep->ue_dev_ok) {
			USDF_DBG("skipping %s/%s\n", dap->uda_devname,
				dap->uda_ifname);
			continue;
		}

		/* See if dest is reachable from this device */
		if (dest != NULL && dest->sin_addr.s_addr != INADDR_ANY) {
			ret = usdf_get_distance(dap,
					dest->sin_addr.s_addr, &metric);
			if (ret != 0) {
				goto fail;
			}
			if (metric == -1) {
				USDF_DBG("dest %s unreachable from %s/%s, skipping\n",
					inet_ntoa(dest->sin_addr),
					dap->uda_devname, dap->uda_ifname);
				continue;
			}
		}

		/* Does this device match requested attributes? */
		if (hints != NULL) {
			ret = usdf_validate_hints(hints, dap);
			if (ret != 0) {
				USDF_DBG("hints do not match for %s/%s, skipping\n",
					dap->uda_devname, dap->uda_ifname);
				continue;
			}

			ep_type = hints->ep_attr ? hints->ep_attr->type :
				  FI_EP_UNSPEC;
		} else {
			ep_type = FI_EP_UNSPEC;
		}

		if (ep_type == FI_EP_DGRAM || ep_type == FI_EP_UNSPEC) {
			ret = usdf_fill_info_dgram(version, hints, src, dest,
					dap, &fi_first, &fi_last);
			if (ret != 0 && ret != -FI_ENODATA) {
				goto fail;
			}
		}

		if (ep_type == FI_EP_MSG || ep_type == FI_EP_UNSPEC) {
			ret = usdf_fill_info_msg(hints, src, dest, dap,
					&fi_first, &fi_last);
			if (ret != 0 && ret != -FI_ENODATA) {
				goto fail;
			}
		}

		if (ep_type == FI_EP_RDM || ep_type == FI_EP_UNSPEC) {
			ret = usdf_fill_info_rdm(hints, src, dest, dap,
					&fi_first, &fi_last);
			if (ret != 0 && ret != -FI_ENODATA) {
				goto fail;
			}
		}
	}

	if (fi_first != NULL) {
		*info = fi_first;
		ret = 0;
	} else {
		ret = -FI_ENODATA;
	}

fail:
	if (ret != 0) {
		fi_freeinfo(fi_first);
	}
	if (ai != NULL) {
		freeaddrinfo(ai);
	}
	if (ret != 0) {
		USDF_INFO("returning %d (%s)\n", ret, fi_strerror(-ret));
	}
	return ret;
}
Exemplo n.º 19
0
static int server_connect(void)
{
	struct fi_eq_cm_entry entry;
	uint32_t event;
	struct fi_info *info = NULL;
	ssize_t rd;
	int ret;

	rd = fi_eq_sread(cmeq, &event, &entry, sizeof entry, -1, 0);
	if (rd != sizeof entry) {
		printf("fi_eq_sread %zd %s\n", rd, fi_strerror((int) -rd));
		return (int) rd;
	}

	if (event != FI_CONNREQ) {
		printf("Unexpected CM event %d\n", event);
		ret = -FI_EOTHER;
		goto err1;
	}

	info = entry.info;
	ret = fi_domain(fab, info, &dom, NULL);
	if (ret) {
		printf("fi_domain %s\n", fi_strerror(-ret));
		goto err1;
	}

	ret = fi_endpoint(dom, info, &ep, NULL);
	if (ret) {
		printf("fi_endpoint for req %s\n", fi_strerror(-ret));
		goto err1;
	}

	ret = alloc_ep_res(info);
	if (ret)
		 goto err2;

	ret = bind_ep_res();
	if (ret)
		goto err3;

	ret = fi_accept(ep, NULL, 0);
	if (ret) {
		printf("fi_accept %s\n", fi_strerror(-ret));
		goto err3;
	}

	rd = fi_eq_sread(cmeq, &event, &entry, sizeof entry, -1, 0);
	if (rd != sizeof entry) {
		printf("fi_eq_sread %zd %s\n", rd, fi_strerror((int) -rd));
		goto err3;
	}

	if (event != FI_COMPLETE || entry.fid != &ep->fid) {
		printf("Unexpected CM event %d fid %p (ep %p)\n",
			event, entry.fid, ep);
		ret = -FI_EOTHER;
		goto err3;
	}

	fi_freeinfo(info);
	return 0;

err3:
	free_ep_res();
err2:
	fi_close(&ep->fid);
err1:
	fi_reject(pep, info->connreq, NULL, 0);
	fi_freeinfo(info);
	return ret;
}
Exemplo n.º 20
0
static int
usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric,
	       void *context)
{
	struct fid_fabric *ff;
	struct usdf_fabric *fp;
	struct usdf_usnic_info *dp;
	struct usdf_dev_entry *dep;
	struct epoll_event ev;
	struct sockaddr_in sin;
	int ret;
	int d;

	USDF_TRACE("\n");

	/* Make sure this fabric exists */
	dp = __usdf_devinfo;
	for (d = 0; d < dp->uu_num_devs; ++d) {
		dep = &dp->uu_info[d];
		if (dep->ue_dev_ok &&
			strcmp(fattrp->name, dep->ue_dattr.uda_devname) == 0) {
			break;
		}
	}
	if (d >= dp->uu_num_devs) {
		USDF_INFO("device \"%s\" does not exit, returning -FI_ENODEV\n",
				fattrp->name);
		return -FI_ENODEV;
	}

	fp = calloc(1, sizeof(*fp));
	if (fp == NULL) {
		USDF_INFO("unable to allocate memory for fabric\n");
		return -FI_ENOMEM;
	}
	fp->fab_epollfd = -1;
	fp->fab_arp_sockfd = -1;
	LIST_INIT(&fp->fab_domain_list);

	fp->fab_attr.fabric = fab_utof(fp);
	fp->fab_attr.name = strdup(fattrp->name);
	fp->fab_attr.prov_name = strdup(USDF_PROV_NAME);
	fp->fab_attr.prov_version = USDF_PROV_VERSION;
	if (fp->fab_attr.name == NULL ||
			fp->fab_attr.prov_name == NULL) {
		ret = -FI_ENOMEM;
		goto fail;
	}

	fp->fab_fid.fid.fclass = FI_CLASS_FABRIC;
	fp->fab_fid.fid.context = context;
	fp->fab_fid.fid.ops = &usdf_fi_ops;
	fp->fab_fid.ops = &usdf_ops_fabric;

	fp->fab_dev_attrs = &dep->ue_dattr;

	fp->fab_epollfd = epoll_create(1024);
	if (fp->fab_epollfd == -1) {
		ret = -errno;
		USDF_INFO("unable to allocate epoll fd\n");
		goto fail;
	}

	fp->fab_eventfd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE);
	if (fp->fab_eventfd == -1) {
		ret = -errno;
		USDF_INFO("unable to allocate event fd\n");
		goto fail;
	}
	fp->fab_poll_item.pi_rtn = usdf_fabric_progression_cb;
	fp->fab_poll_item.pi_context = fp;
	ev.events = EPOLLIN;
	ev.data.ptr = &fp->fab_poll_item;
	ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, fp->fab_eventfd, &ev);
	if (ret == -1) {
		ret = -errno;
		USDF_INFO("unable to EPOLL_CTL_ADD\n");
		goto fail;
	}

	/* initialize timer subsystem */
	ret = usdf_timer_init(fp);
	if (ret != 0) {
		USDF_INFO("unable to initialize timer\n");
		goto fail;
	}

	ret = pthread_create(&fp->fab_thread, NULL,
			usdf_fabric_progression_thread, fp);
	if (ret != 0) {
		ret = -ret;
		USDF_INFO("unable to create progress thread\n");
		goto fail;
	}

	/* create and bind socket for ARP resolution */
	memset(&sin, 0, sizeof(sin));
	sin.sin_family = AF_INET;
	sin.sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be;
	fp->fab_arp_sockfd = socket(AF_INET, SOCK_DGRAM, 0);
	if (fp->fab_arp_sockfd == -1) {
		USDF_INFO("unable to create socket\n");
		goto fail;
	}
	ret = bind(fp->fab_arp_sockfd, (struct sockaddr *) &sin, sizeof(sin));
	if (ret == -1) {
		ret = -errno;
		goto fail;
	}

	atomic_initialize(&fp->fab_refcnt, 0);
	fattrp->fabric = fab_utof(fp);
	fattrp->prov_version = USDF_PROV_VERSION;
	*fabric = fab_utof(fp);
	USDF_INFO("successfully opened %s/%s\n", fattrp->name,
			fp->fab_dev_attrs->uda_ifname);
	return 0;

fail:
	free(fp->fab_attr.name);
	free(fp->fab_attr.prov_name);
	ff = fab_utof(fp);
	usdf_fabric_close(&ff->fid);
	USDF_DBG("returning %d (%s)\n", ret, fi_strerror(-ret));
	return ret;
}
Exemplo n.º 21
0
static int rxd_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags)
{
	struct rxd_ep *ep;
	struct rxd_av *av;
	struct util_cq *cq;
	struct util_cntr *cntr;
	int ret = 0;

	ep = container_of(ep_fid, struct rxd_ep, util_ep.ep_fid.fid);
	switch (bfid->fclass) {
	case FI_CLASS_AV:
		av = container_of(bfid, struct rxd_av, util_av.av_fid.fid);
		ret = ofi_ep_bind_av(&ep->util_ep, &av->util_av);
		if (ret)
			return ret;

		ret = fi_ep_bind(ep->dg_ep, &av->dg_av->fid, flags);
		if (ret)
			return ret;
		break;
	case FI_CLASS_CQ:
		cq = container_of(bfid, struct util_cq, cq_fid.fid);

		ret = ofi_ep_bind_cq(&ep->util_ep, cq, flags);
		if (ret)
			return ret;

		if (!ep->dg_cq) {
			ret = rxd_dg_cq_open(ep, cq->wait ? FI_WAIT_FD : FI_WAIT_NONE);
			if (ret)
				return ret;
		}

		if (cq->wait)
			ret = rxd_ep_wait_fd_add(ep, cq->wait);
		break;
	case FI_CLASS_EQ:
		break;
	case FI_CLASS_CNTR:
		cntr = container_of(bfid, struct util_cntr, cntr_fid.fid);

		ret = ofi_ep_bind_cntr(&ep->util_ep, cntr, flags);
		if (ret)
			return ret;

		if (!ep->dg_cq) {
			ret = rxd_dg_cq_open(ep, cntr->wait ? FI_WAIT_FD : FI_WAIT_NONE);
		} else if (!ep->dg_cq_fd && cntr->wait) {
			/* Reopen CQ with WAIT fd set */
			ret = fi_close(&ep->dg_cq->fid);
			if (ret) {
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL,
					"Unable to close dg CQ: %s\n",
					fi_strerror(-ret));
				return ret;
			}

			ep->dg_cq = NULL;
			ret = rxd_dg_cq_open(ep, FI_WAIT_FD);
		}
		if (ret)
			return ret;

		if (cntr->wait)
			ret = rxd_ep_wait_fd_add(ep, cntr->wait);
		break;
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL,
			"invalid fid class\n");
		ret = -FI_EINVAL;
		break;
	}
	return ret;
}
Exemplo n.º 22
0
/*
 * this function is intended to be invoked as an argument to pthread_create,
 */
static void *_gnix_dgram_prog_thread_fn(void *the_arg)
{
	int ret = FI_SUCCESS, prev_state;
	struct gnix_dgram_hndl *the_hndl = (struct gnix_dgram_hndl *)the_arg;
	sigset_t  sigmask;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	/*
	 * temporarily disable cancelability while we set up
	 * some stuff
	 */

	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state);

	/*
	 * help out Cray core-spec, say we're not an app thread
	 * and can be run on core-spec cpus.
	 */

	ret = _gnix_task_is_not_app();
	if (ret)
		GNIX_WARN(FI_LOG_EP_CTRL,
		"_gnix_task_is_not_app call returned %d\n", ret);

	/*
	 * block all signals, don't want this thread to catch
	 * signals that may be for app threads
	 */

	memset(&sigmask, 0, sizeof(sigset_t));
	ret = sigfillset(&sigmask);
	if (ret) {
		GNIX_WARN(FI_LOG_EP_CTRL,
		"sigfillset call returned %d\n", ret);
	} else {

		ret = pthread_sigmask(SIG_SETMASK,
					&sigmask, NULL);
		if (ret)
			GNIX_WARN(FI_LOG_EP_CTRL,
			"pthread_sigmask call returned %d\n", ret);
	}

	/*
	 * okay now we're ready to be cancelable.
	 */

	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state);

	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

retry:
	ret = _gnix_dgram_poll(the_hndl, GNIX_DGRAM_BLOCK);
	if ((ret == -FI_ETIMEDOUT) || (ret == FI_SUCCESS))
		goto retry;

	GNIX_WARN(FI_LOG_EP_CTRL,
		"_gnix_dgram_poll returned %s\n", fi_strerror(-ret));

	/*
	 * TODO: need to be able to enqueue events on to the
	 * ep associated with the cm_nic.
	 */
	return NULL;
}
Exemplo n.º 23
0
static void test_connect_with_accept_blocking_on_eq_fq_SERVER(void)
{
    int ret;

    printf("SERVER running\n");

    setup_ofi(NULL, NULL, FI_SOURCE);

#if WANT_FDS
    // Add the EQ FD to the epoll fd
    static struct epoll_event edt;
    memset(&edt, 0, sizeof(edt));
    edt.events = EPOLLIN;
    edt.data.u32 = 2222;
    ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fidev.eq_fd, &edt);
    if (ret < 0) {
        error("server epoll_ctl failed");
    }
#endif

    // Make a PEP
    ret = fi_passive_ep(fidev.fabric, fidev.info, &fidev.pep, NULL);
    if (0 != ret) {
        error("fi_passive_ep failed");
    }

#if WANT_FIXED_PORT
    size_t ss = sizeof(sin);
    ret = fi_getname(&(fidev.pep->fid), &sin, &ss);
    if (0 != ret) {
        error("fi_setname failed");
    }
    sin.sin_port = htons(listen_port);

    // Bind the PEP to listen on a specific port
    ret = fi_setname(&(fidev.pep->fid), &sin, sizeof(sin));
    if (0 != ret) {
        error("fi_setname failed");
    }
#endif

    // Bind the EQ to the PEP
    ret = fi_pep_bind(fidev.pep, &fidev.eq->fid, 0);
    if (0 != ret) {
        error("fi_pep_bind failed");
    }

    // Listen
    ret = fi_listen(fidev.pep);
    if (0 != ret) {
        error("fi_listen failed");
    }

    // Get the actual address of this PEP
    struct sockaddr_in sinout;
    size_t s = sizeof(sinout);
    ret = fi_getname(&(fidev.pep->fid), &sinout, &s);
    if (0 != ret) {
        error("fi_setname failed");
    }
    sin.sin_family = sinout.sin_family;
    sin.sin_addr = sinout.sin_addr;
    sin.sin_port = sinout.sin_port;

    // Print server addr
    printf("SERVER listening on %s\n", addrstr(&sin));

    // Send our node (IP addr) and service (port) to the client
    snprintf(ofi_node, sizeof(ofi_node) - 1, "%s",
             inet_ntoa(sin.sin_addr));
    snprintf(ofi_service, sizeof(ofi_service) - 1, "%d",
             ntohs(sin.sin_port));
    MPI_Send(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR,
             1, 101, MPI_COMM_WORLD);
    MPI_Send(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR,
             1, 102, MPI_COMM_WORLD);
    printf("SERVER sent via MPI to client: %s / %s\n", ofi_node, ofi_service);

#if WANT_FDS
    // Now wait for the listen to complete
    int nevents;
    #define NEVENTS 32
    struct epoll_event events[NEVENTS];
    int timeout = 10000;
    while (1) {
        printf("SERVER blocking on epoll\n");
        nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout);
        if (nevents < 0) {
            if (errno != EINTR) {
                error("server epoll wait failed");
            } else {
                continue;
            }
        } else {
            printf("SERVER successfully woke up from epoll! %d events\n", nevents);
            for (int i = 0; i < nevents; ++i) {
                if (events[i].data.u32 != 2222) {
                    error("server unexpected epoll return type");
                }
            }
            // If we got the expected event, then go read from the EQ
            break;
        }
    }
#endif

    // Wait for the FI_CONNREQ event
    uint32_t event;
    uint8_t *entry_buffer;
    size_t expected_len = sizeof(struct fi_eq_cm_entry) +
        sizeof(client_data);
    entry_buffer = (uint8_t*) calloc(1, expected_len);
    if (NULL == entry_buffer) {
        error("calloc failed");
    }
    struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer;

    while (1) {
        printf("SERVER waiting for FI_CONNREQ\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            printf("server fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (-EAGAIN == ret) {
            fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret);
            error("SERVER fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNREQ) {
            error("SERVER got some unexpected event");
        } else if (ret != expected_len) {
            error("SERVER got wrong length back from fi_eq_sread");
        }

        uint32_t *d = (uint32_t*) entry->data;
        for (int i = 0; i < (sizeof(client_data) / sizeof(uint32_t)); ++i) {
            if (d[i] != client_data[i]) {
                printf("SERVER got wrong CM client data: d[%d]=%d, should be %d\n",
                       i, d[i], client_data[i]);
            }
        }

        printf("SERVER got FI_CONNREQ, correct size, and correct data -- yay!\n");
        break;
    }

    // Silly logistics: setup_ofi_active adds the fd to the epoll set.
    // But we already added it.  So for simplicity, just remove it
    // here so that setup_ofi_active() can re-add it.
#if WANT_FDS
    // Remove the EQ FD from the epoll fd
    ret = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fidev.eq_fd, &edt);
    if (ret < 0) {
        error("server epoll_ctl DEL failed");
    }
#endif

    // Make an active endpoint
    setup_ofi_active(entry->info, &ficonn.ep);

    // Accept the incoming connection
    ret = fi_accept(ficonn.ep, (void*) server_data, sizeof(server_data));
    if (ret != 0) {
        printf("fi_accept: ret=%d, %s\n", ret, fi_strerror(-ret));
        error("SERVER fi_accept failed\n");
    }

    // Need to read and get a FI_CONNECTED event
    while (1) {
        printf("SERVER waiting for FI_CONNECTED\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            printf("server fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (-EAGAIN == ret) {
            fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret);
            error("SERVER fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNECTED) {
            error("SERVER got some unexpected event");
        }

        printf("SERVER got FI_CONNECTED -- yay!\n");
        break;
    }

    // Post a recv buffer for the client to send
    int msg[4] = { 0 };
    int len = sizeof(msg);
    printf("SERVER receiving len of %d\n", len);

    struct fid_mr no_mr;
    struct fid_mr *mr;
    void *recv_context = (void*) 0x17;
#if 0
    fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV,
              0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL);
#else
    // Try using no mr, like fi_msg_pingpong...
    memset(&no_mr, 0, sizeof(no_mr));
    mr = &no_mr;
#endif
    ret = fi_recv(ficonn.ep, msg, len,
                  fi_mr_desc(mr), 0, recv_context);
    if (ret < 0) {
        printf("fi_recv failed! %d, %s\n", ret, fi_strerror(-ret));
        MPI_Abort(MPI_COMM_WORLD, 37);
    }

    sleep(1);
    printf("SERVER posted receive -- waiting for client to send\n");
    MPI_Barrier(MPI_COMM_WORLD);

    // Wait for receive completion
    struct fi_cq_entry cqe;
    while (1) {
        ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1);
        if (cqe.op_context == recv_context) {
            printf("SERVER receive completed\n");
            break;
        } else {
            printf("SERVER got some other completion... continuing\n");
        }
    }

    printf("SERVER finished -- waiting for client before teardown\n");
    MPI_Barrier(MPI_COMM_WORLD);

    printf("SERVER tearing down\n");
    fi_close(&(mr->fid));
    teardown_ofi();
}
Exemplo n.º 24
0
/* Destroy an unconnected VC.  More Support is needed to shutdown and destroy
 * an active VC. */
int _gnix_vc_destroy(struct gnix_vc *vc)
{
	int ret = FI_SUCCESS;
	struct gnix_nic *nic = NULL;
	gni_return_t status;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	if (vc->ep == NULL) {
		GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n");
		return -FI_EINVAL;
	}

	nic = vc->ep->nic;
	if (nic == NULL) {
		GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc);
		return -FI_EINVAL;
	}

	/*
	 * move vc state to terminating
	 */

	vc->conn_state = GNIX_VC_CONN_TERMINATING;

	/*
	 * try to unbind the gni_ep if non-NULL.
	 * If there are SMSG or PostFMA/RDMA outstanding
	 * wait here for them to complete
	 */

	if (vc->gni_ep != NULL) {
		while (status == GNI_RC_NOT_DONE) {

			fastlock_acquire(&nic->lock);
			status = GNI_EpUnbind(vc->gni_ep);
			fastlock_release(&nic->lock);

			if ((status != GNI_RC_NOT_DONE) &&
				(status != GNI_RC_SUCCESS)) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					"GNI_EpUnBind returned %s\n",
					  gni_err_str[status]);
				break;
			}

			if (status == GNI_RC_NOT_DONE)
				_gnix_nic_progress(nic);
		}
		fastlock_acquire(&nic->lock);
		status = GNI_EpDestroy(vc->gni_ep);
		fastlock_release(&nic->lock);
		if (status != GNI_RC_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				"GNI_EpDestroy returned %s\n",
				  gni_err_str[status]);
	}

	/*
	 * if the vc is in a nic's work queue, remove it
	 */
	__gnix_vc_cancel(vc);

	/*
	 * We may eventually want to check the state of the VC, if we
	 * implement true VC shutdown.

	if ((vc->conn_state != GNIX_VC_CONN_NONE)
		&& (vc->conn_state != GNIX_VC_CONN_TERMINATED)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			      "vc conn state  %d\n",
			       vc->conn_state);
		GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n");
		return -FI_EBUSY;
	}
	 */

	/*
	 * if send_q not empty, return -FI_EBUSY
	 * Note for FI_EP_MSG type eps, this behavior
	 * may not be correct for handling fi_shutdown.
	 */

	if (!slist_empty(&vc->tx_queue)) {
		GNIX_WARN(FI_LOG_EP_CTRL, "vc sendqueue not empty\n");
		return -FI_EBUSY;
	}

	fastlock_destroy(&vc->tx_queue_lock);

	if (vc->smsg_mbox != NULL) {
		ret = _gnix_mbox_free(vc->smsg_mbox);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
			      "_gnix_mbox_free returned %s\n",
			      fi_strerror(-ret));
		vc->smsg_mbox = NULL;
	}

	if (vc->dgram != NULL) {
		ret = _gnix_dgram_free(vc->dgram);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
			      "_gnix_dgram_free returned %s\n",
			      fi_strerror(-ret));
		vc->dgram = NULL;
	}

	ret = _gnix_nic_free_rem_id(nic, vc->vc_id);
	if (ret != FI_SUCCESS)
		GNIX_WARN(FI_LOG_EP_CTRL,
		      "__gnix_vc_free_id returned %s\n",
		      fi_strerror(-ret));

	_gnix_free_bitmap(&vc->flags);

	free(vc);

	return ret;
}
Exemplo n.º 25
0
int ADD_SUFFIX(MPID_nem_ofi_iprobe_impl)(struct MPIDI_VC *vc,
                             int source,
                             int tag,
                             MPIR_Comm * comm,
                             int context_offset,
                             int *flag, MPI_Status * status, MPIR_Request ** rreq_ptr)
{
    int ret, mpi_errno = MPI_SUCCESS;
    fi_addr_t remote_proc = 0;
    uint64_t match_bits, mask_bits;
    size_t len;
    MPIR_Request rreq_s, *rreq;

    BEGIN_FUNC(FCNAME);
    if (rreq_ptr) {
        MPIDI_CH3I_NM_OFI_RC(MPID_nem_ofi_create_req(&rreq, 1));
        rreq->kind = MPIR_REQUEST_KIND__RECV;

        *rreq_ptr = rreq;
        rreq->comm = comm;
        rreq->dev.match.parts.rank = source;
        rreq->dev.match.parts.tag = tag;
        rreq->dev.match.parts.context_id = comm->context_id;
        MPIR_Comm_add_ref(comm);
    }
    else {
        rreq = &rreq_s;
        rreq->dev.OnDataAvail = NULL;
    }

    REQ_OFI(rreq)->pack_buffer    = NULL;
    REQ_OFI(rreq)->event_callback = ADD_SUFFIX(peek_callback);
    REQ_OFI(rreq)->match_state    = PEEK_INIT;
    OFI_ADDR_INIT(source, vc, remote_proc);
#if API_SET == API_SET_1
    match_bits = init_recvtag(&mask_bits, comm->context_id + context_offset, source, tag);
#elif API_SET == API_SET_2
    match_bits = init_recvtag_2(&mask_bits, comm->context_id + context_offset, tag);
#endif

    /* ------------------------------------------------------------------------- */
    /* fi_recvmsg with FI_PEEK:                                                  */
    /* Initiate a search for a match in the hardware or software queue.          */
    /* The search can complete immediately with -ENOMSG.                         */
    /* I successful, libfabric will enqueue a context entry into the completion  */
    /* queue to make the search nonblocking.  This code will poll until the      */
    /* entry is enqueued.                                                        */
    /* ------------------------------------------------------------------------- */
    msg_tagged_t msg;
    uint64_t     msgflags = FI_PEEK;
    msg.msg_iov   = NULL;
    msg.desc      = NULL;
    msg.iov_count = 0;
    msg.addr      = remote_proc;
    msg.tag       = match_bits;
    msg.ignore    = mask_bits;
    msg.context   = (void *) &(REQ_OFI(rreq)->ofi_context);
    msg.data      = 0;
    if(*flag == CLAIM_PEEK)
      msgflags|=FI_CLAIM;
    ret = fi_trecvmsg(gl_data.endpoint,&msg,msgflags);
    if(ret == -ENOMSG) {
      if (rreq_ptr) {
        MPIR_Request_free(rreq);
        *rreq_ptr = NULL;
        *flag = 0;
      }
      MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL);
      goto fn_exit;
    }
    MPIR_ERR_CHKANDJUMP4((ret < 0), mpi_errno, MPI_ERR_OTHER,
                         "**ofi_peek", "**ofi_peek %s %d %s %s",
                         __SHORT_FILE__, __LINE__, FCNAME, fi_strerror(-ret));

    while (PEEK_INIT == REQ_OFI(rreq)->match_state)
        MPID_nem_ofi_poll(MPID_BLOCKING_POLL);

    if (PEEK_NOT_FOUND == REQ_OFI(rreq)->match_state) {
        if (rreq_ptr) {
            MPIR_Request_free(rreq);
            *rreq_ptr = NULL;
            *flag = 0;
        }
        MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL);
        goto fn_exit;
    }

    if (status != MPI_STATUS_IGNORE)
        *status = rreq->status;

    if (rreq_ptr)
        MPIR_Request_add_ref(rreq);
    *flag = 1;
    END_FUNC_RC(FCNAME);
}
Exemplo n.º 26
0
/*
 * connect to self, since we use a lock here
 * the only case we need to deal with is one
 * vc connect request with the peer vc not yet
 * being set up
 */
static int __gnix_vc_connect_to_same_cm_nic(struct gnix_vc *vc)
{
	int ret = FI_SUCCESS;
	struct gnix_fid_domain *dom = NULL;
	struct gnix_fid_ep *ep = NULL;
	struct gnix_fid_ep *ep_peer = NULL;
	struct gnix_cm_nic *cm_nic = NULL;
	struct gnix_mbox *mbox = NULL, *mbox_peer = NULL;
	struct gnix_vc *vc_peer;
	gni_smsg_attr_t smsg_mbox_attr;
	gni_smsg_attr_t smsg_mbox_attr_peer;
	gnix_ht_key_t *key_ptr;
	struct gnix_av_addr_entry entry;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	ep = vc->ep;
	if (ep == NULL)
		return -FI_EINVAL;

	cm_nic = ep->cm_nic;
	if (cm_nic == NULL) {
		ret = -FI_EINVAL;
		goto exit;
	}

	dom = ep->domain;
	if (dom == NULL) {
		ret = -FI_EINVAL;
		goto exit;
	}

	fastlock_acquire(&ep->vc_ht_lock);
	if ((vc->conn_state == GNIX_VC_CONNECTING) ||
	    (vc->conn_state == GNIX_VC_CONNECTED)) {
		fastlock_release(&ep->vc_ht_lock);
		return FI_SUCCESS;
	} else
		vc->conn_state = GNIX_VC_CONNECTING;

	fastlock_release(&ep->vc_ht_lock);

	GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connecting\n", vc);


	if (vc->smsg_mbox == NULL) {
		ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl,
				       &mbox);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_mbox_alloc returned %s\n",
				  fi_strerror(-ret));
			goto exit;
		}
		vc->smsg_mbox = mbox;
	} else
		mbox = vc->smsg_mbox;

	smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	smsg_mbox_attr.msg_buffer = mbox->base;
	smsg_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
	smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
	smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
	smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
	smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;

	key_ptr = (gnix_ht_key_t *)&vc->peer_addr;
	ep_peer = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht,
						   *key_ptr);
	if (ep_peer == NULL) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_ht_lookup addr_to_ep failed\n");
		ret = -FI_ENOENT;
		goto exit;
	}

	key_ptr = (gnix_ht_key_t *)&ep->my_name.gnix_addr;

	fastlock_acquire(&ep_peer->vc_ht_lock);
	vc_peer = (struct gnix_vc *)_gnix_ht_lookup(ep_peer->vc_ht,
						   *key_ptr);

	/*
	 * handle the special case of connecting to self
	 */

	if (vc_peer == vc) {
		ret = __gnix_vc_smsg_init(vc, vc->vc_id, &smsg_mbox_attr);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_vc_smsg_init returned %s\n",
				  fi_strerror(-ret));
			goto exit_w_lock;
		}
		vc->conn_state = GNIX_VC_CONNECTED;
		GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
			   vc);
		goto exit_w_lock;
	}

	if ((vc_peer != NULL) &&
	    (vc_peer->conn_state != GNIX_VC_CONN_NONE)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "_gnix_vc_connect self, vc_peer in inconsistent state\n");
		ret = -FI_ENOSPC;
		goto exit_w_lock;
	}

	if (vc_peer == NULL) {
		entry.gnix_addr = ep->my_name.gnix_addr;
		entry.cm_nic_cdm_id = ep->my_name.cm_nic_cdm_id;
		ret = _gnix_vc_alloc(ep_peer,
				     &entry,
				     &vc_peer);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				"_gnix_vc_alloc returned %s\n",
				fi_strerror(-ret));
			goto exit_w_lock;
		}

		ret = _gnix_ht_insert(ep_peer->vc_ht,
				      *key_ptr,
				      vc_peer);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_CTRL,
				  "_gnix_ht_insert returned %s\n",
				  fi_strerror(-ret));
			goto exit_w_lock;
		}
		vc_peer->modes |= GNIX_VC_MODE_IN_HT;
	}

	vc_peer->conn_state = GNIX_VC_CONNECTING;
	GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connecting\n",
		   vc_peer);

	if (vc_peer->smsg_mbox == NULL) {
		ret = _gnix_mbox_alloc(vc_peer->ep->nic->mbox_hndl,
				       &mbox_peer);
		if (ret != FI_SUCCESS) {
			GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_mbox_alloc returned %s\n",
				  fi_strerror(-ret));
			goto exit_w_lock;
		}
		vc_peer->smsg_mbox = mbox_peer;
	} else
		mbox_peer = vc_peer->smsg_mbox;

	smsg_mbox_attr_peer.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	smsg_mbox_attr_peer.msg_buffer = mbox_peer->base;
	smsg_mbox_attr_peer.buff_size =  vc_peer->ep->nic->mem_per_mbox;
	smsg_mbox_attr_peer.mem_hndl = *mbox_peer->memory_handle;
	smsg_mbox_attr_peer.mbox_offset = (uint64_t)mbox_peer->offset;
	smsg_mbox_attr_peer.mbox_maxcredit = dom->params.mbox_maxcredit;
	smsg_mbox_attr_peer.msg_maxsize = dom->params.mbox_msg_maxsize;

	ret = __gnix_vc_smsg_init(vc, vc_peer->vc_id, &smsg_mbox_attr_peer);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_vc_smsg_init returned %s\n",
			  fi_strerror(-ret));
		goto exit_w_lock;
	}

	ret = __gnix_vc_smsg_init(vc_peer, vc->vc_id, &smsg_mbox_attr);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_vc_smsg_init returned %s\n",
			  fi_strerror(-ret));
		goto exit_w_lock;
	}

	vc->conn_state = GNIX_VC_CONNECTED;
	GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
		   vc);
	vc_peer->conn_state = GNIX_VC_CONNECTED;
	GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
		   vc_peer);

exit_w_lock:
	fastlock_release(&ep_peer->vc_ht_lock);
exit:
	return ret;
}
Exemplo n.º 27
0
static int run(void)
{
	char *node, *service;
	uint64_t flags;
	int ret;
	size_t i;

	ret = ft_read_addr_opts(&node, &service, hints, &flags, &opts);
	if (ret)
		return ret;

	ret = opts.dst_addr ? client_setup() : server_setup();
	if (ret) {
		fprintf(stderr, "error: %s\n", fi_strerror(-ret));
		return ret;
	}

	/* Leave extra space for invalid size test */
	cm_data = calloc(1, cm_data_size + 1);
	if (!cm_data)
		return -FI_ENOMEM;

	entry = calloc(1, sizeof(*entry) + cm_data_size);
	if (!entry)
		return -FI_ENOMEM;

	if (opts.dst_addr) {
		ret = ft_sock_connect(opts.dst_addr, sock_service);
		if (ret)
			goto err2;
	} else {
		ret = ft_sock_listen(sock_service);
		if (ret)
			goto err2;
		ret = ft_sock_accept();
		if (ret)
			goto err1;
	}

	for (i = 1; i <= cm_data_size; i <<= 1) {
		printf("trying with data size: %zu\n", i);

		if (opts.dst_addr)
			ret = client(i);
		else
			ret = server(i);

		if (ret)
			goto err1;

		ret = ft_sock_sync(0);
		if (ret)
			goto err1;
	}

	/* Despite server not being setup to handle this, the client should fail
	 * with -FI_EINVAL since this exceeds its max data size.
	 */
	if (opts.dst_addr) {
		printf("trying with data size exceeding maximum: %zu\n",
				cm_data_size + 1);
		/* Don't call client since it produces an error message. */
		ret = client_connect(cm_data_size + 1);
		if (ret != -FI_EINVAL) {
			FT_ERR("expected -FI_EINVAL, got: [%d]:%s\n", ret,
				fi_strerror(-ret));
		} else {
			ret = FI_SUCCESS;
		}
	}

err1:
	ft_sock_shutdown(sock);
err2:
	free(entry);
	return ret;
}
Exemplo n.º 28
0
static int __gnix_vc_hndl_conn_resp(struct gnix_cm_nic *cm_nic,
				    char *msg_buffer,
				    struct gnix_address src_cm_nic_addr)
{
	int ret = FI_SUCCESS;
	int peer_id;
	struct gnix_vc *vc = NULL;
	uint64_t peer_vc_addr;
	struct gnix_fid_ep *ep;
	gni_smsg_attr_t peer_smsg_attr;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	/*
	 * unpack the message
	 */

	__gnix_vc_unpack_resp(msg_buffer,
			      (uint64_t *)&vc,
			      &peer_vc_addr,
			      &peer_id,
			      &peer_smsg_attr);

	GNIX_DEBUG(FI_LOG_EP_CTRL,
		"resp rx: (From Aries 0x%x Id %d src vc %p peer vc addr 0x%lx)\n",
		 src_cm_nic_addr.device_addr,
		 src_cm_nic_addr.cdm_id,
		 vc,
		 peer_vc_addr);

	ep = vc->ep;
	assert(ep != NULL);

	fastlock_acquire(&ep->vc_ht_lock);

	/*
	 * at this point vc should be in connecting state
	 */
	if (vc->conn_state != GNIX_VC_CONNECTING) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "vc %p not in connecting state, rather %d\n",
			  vc, vc->conn_state);
		ret = -FI_EINVAL;
		goto err;
	}

	/*
	 * build the SMSG connection
	 */

	ret = __gnix_vc_smsg_init(vc, peer_id, &peer_smsg_attr);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"__gnix_vc_smsg_init returned %s\n",
			fi_strerror(-ret));
		goto err;
	}

	/*
	 * transition the VC to connected
	 * put in to the nic's work queue for
	 * further processing
	 */

	vc->conn_state = GNIX_VC_CONNECTED;
	GNIX_DEBUG(FI_LOG_EP_CTRL,
		   " moving vc %p to state connected\n",vc);

	fastlock_release(&ep->vc_ht_lock);

	ret = _gnix_vc_schedule(vc);

	return ret;
err:
	vc->conn_state = GNIX_VC_CONN_ERROR;
	fastlock_release(&ep->vc_ht_lock);
	return ret;
}
Exemplo n.º 29
0
int create_connection(void)
{
	struct fi_info		*prov;
	struct fi_eq_cm_entry	entry;
	struct fi_info		*info;
	ssize_t			n;
	uint32_t		event;
	int			ret = -1;

	print_trace("in\n");

	memset(&ctx, 0, sizeof(ctx));

	if (match_provider(&prov))
		goto err1;

	if (client_connect(prov, &ctx))
		goto err2;

	dprint(DEBUG_CONNECT, "Waiting for Server to connect\n");

	n = fi_eq_sread(ctx.eq, &event, &entry, sizeof(entry), CTIMEOUT, 0);
	if (n < sizeof(entry)) {
		struct fi_eq_err_entry eqe;
		int rc;

		print_err("fi_eq_sread '%s'(%d)\n", fi_strerror(n), (int) n);
		rc = fi_eq_readerr(ctx.eq, &eqe, 0);
		if (rc)
			print_err("fi_eq_readerr() returns %d '%s'\n",
				rc, fi_strerror(rc));
		else {
			char buf[64];

			print_err("fi_eq_readerr() prov_err '%s'(%d)\n",
				fi_eq_strerror(ctx.eq, eqe.prov_errno,
					eqe.err_data, buf, sizeof(buf)),
				eqe.prov_errno);
			print_err("fi_eq_readerr() err '%s'(%d)\n",
					fi_strerror(eqe.err), eqe.err);
		}

		return (int) n;
	}

	if (event != FI_CONNECTED) {
		print_err("unexpected event %d\n", event);
		return -FI_EOTHER;
	}

	/* same context specified in fi_endpoint()? */
	if (entry.fid->context != CONTEXT) {
		print_err("entry.fid->context %lx != %lx\n",
			(ulong)entry.fid->context, (ulong)CONTEXT);
	}

	info = entry.info;

	dprint(DEBUG_CONNECT, "*** Client Connected\n");

	dprint(DEBUG_CONNECT, "Client private data(len %ld): '%s'\n",
		(n - sizeof(entry)), entry.data);

	return 0;
err2:
	client_disconnect(&ctx);
	fi_freeinfo(prov);
err1:
	return ret;
}
Exemplo n.º 30
0
static int rxd_av_insert(struct fid_av *av_fid, const void *addr, size_t count,
			fi_addr_t *fi_addr, uint64_t flags, void *context)
{
	struct rxd_av *av;
	int i = 0, index, ret = 0, success_cnt = 0, lookup = 1;
	uint64_t dg_fiaddr;

	av = container_of(av_fid, struct rxd_av, util_av.av_fid);
	fastlock_acquire(&av->util_av.lock);
	if (!av->dg_addrlen) {
		ret = rxd_av_set_addrlen(av, addr);
		if (ret)
			goto out;
		/* Skip lookups if this is the first insertion call.  */
		lookup = 0;
	}

	for (; i < count; i++, addr = (uint8_t *) addr + av->dg_addrlen) {
		ret = lookup ? rxd_av_dg_reverse_lookup(av, i, addr, &dg_fiaddr) :
				-FI_ENODATA;
		if (ret) {
			ret = fi_av_insert(av->dg_av, addr, 1, &dg_fiaddr,
					   flags, context);
			if (ret != 1)
				break;
		}

		ret = ofi_av_insert_addr(&av->util_av, &dg_fiaddr, dg_fiaddr, &index);
		if (ret)
			break;

		success_cnt++;
		if (fi_addr)
			fi_addr[i] = index;
	}

	if (ret) {
		FI_WARN(&rxd_prov, FI_LOG_AV,
			"failed to insert address %d: %d (%s)\n",
			i, -ret, fi_strerror(-ret));
		if (av->util_av.eq)
			ofi_av_write_event(&av->util_av, i, -ret, context);
		if (fi_addr)
			fi_addr[i] = FI_ADDR_NOTAVAIL;
		i++;
	}
out:
	av->dg_av_used += success_cnt;
	fastlock_release(&av->util_av.lock);

	for (; i < count; i++) {
		if (av->util_av.eq)
			ofi_av_write_event(&av->util_av, i, FI_ECANCELED, context);
		if (fi_addr)
			fi_addr[i] = FI_ADDR_NOTAVAIL;
	}

	if (av->util_av.eq) {
		ofi_av_write_event(&av->util_av, success_cnt, 0, context);
		return 0;
	}

	return success_cnt;
}