Exemplo n.º 1
0
static int
check_eq_readerr(struct fid_eq *eq, fid_t fid, void *context, int index)
{
	int ret;
	struct fi_eq_err_entry err_entry;

	ret = fi_eq_readerr(eq, &err_entry, 0);
	if (ret != sizeof(err_entry)) {
		sprintf(err_buf, "fi_eq_readerr ret = %d, %s", ret,
				(ret < 0) ? fi_strerror(-ret) : "unknown");
		return -1;
	}
	if (err_entry.fid != fid) {
		sprintf(err_buf, "fi_eq_readerr fid = %p, should be %p",
				err_entry.fid, fid);
		return -1;
	}
	if (err_entry.context != context) {
		sprintf(err_buf, "fi_eq_readerr fid = %p, should be %p",
				err_entry.context, context);
		return -1;
	}
	if (err_entry.data != index) {
		sprintf(err_buf, "fi_eq_readerr index = %ld, should be %d",
				err_entry.data, index);
		return -1;
	}
	if (err_entry.err <= 0) {
		sprintf(err_buf, "fi_eq_readerr err = %d, should be > 0",
				err_entry.err);
		return -1;
	}
	return 0;
}
Exemplo n.º 2
0
static int client_expect_reject(size_t paramlen)
{
	uint32_t event;
	int ret;

	ret = client_connect(paramlen);
	if (ret) {
		FT_PRINTERR("fi_connect", ret);
		return ret;
	}

	ret = fi_eq_sread(eq, &event, entry, sizeof(*entry), -1, 0);
	if (ret != -FI_EAVAIL) {
		FT_PROCESS_EQ_ERR(ret, eq, "fi_eq_sread", "connect");
		return ret;
	}

	ret = fi_eq_readerr(eq, &err_entry, 0);
	if (ret != sizeof(err_entry)) {
		FT_EQ_ERR(eq, err_entry, NULL, 0);
		return err_entry.err;
	}

	if (err_entry.err != FI_ECONNREFUSED)
		return err_entry.err;

	/* Check data on FI_ECONNREFUSED error event. */
	return ft_check_buf(err_entry.err_data, err_entry.err_data_size);
}
Exemplo n.º 3
0
static ssize_t hook_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
			       uint64_t flags)
{
	struct hook_eq *myeq = container_of(eq, struct hook_eq, eq);
	ssize_t ret;

	ret = fi_eq_readerr(myeq->heq, buf, flags);
	if (ret > 0)
		buf->fid = buf->fid->context;

	return ret;
}
Exemplo n.º 4
0
void eq_readerr(struct fid_eq *eq, const char *eq_str)
{
	struct fi_eq_err_entry eq_err;
	int rd;

	rd = fi_eq_readerr(eq, &eq_err, 0);
	if (rd != sizeof(eq_err)) {
		FT_PRINTERR("fi_eq_readerr", rd);
	} else {
		FT_EQ_ERR(eq, eq_err, NULL, 0);
	}
}
Exemplo n.º 5
0
int ft_eq_readerr(void)
{
	struct fi_eq_err_entry err;
	ssize_t ret;

	ret = fi_eq_readerr(eq, &err, 0);
	if (ret != sizeof(err)) {
		FT_PRINTERR("fi_eq_readerr", ret);
		return ret;
	} else {
		fprintf(stderr, "Error event %d %s\n",
			err.err, fi_strerror(err.err));
		return err.err;
	}
}
Exemplo n.º 6
0
void eq_readerr(struct fid_eq *eq, char *eq_str)
{
    struct fi_eq_err_entry eq_err;
    const char *err_str;
    int rd;

    rd = fi_eq_readerr(eq, &eq_err, 0);
    if (rd != sizeof(eq_err)) {
        FT_PRINTERR("fi_eq_readerr", rd);
    } else {
        err_str = fi_eq_strerror(eq, eq_err.prov_errno, eq_err.err_data, NULL, 0);
        fprintf(stderr, "%s: %d %s\n", eq_str, eq_err.err,
                fi_strerror(eq_err.err));
        fprintf(stderr, "%s: prov_err: %s (%d)\n", eq_str, err_str,
                eq_err.prov_errno);
    }
}
Exemplo n.º 7
0
std::string get_eq_error_string(fid_eq* eq, ssize_t ec) {
    fi_eq_err_entry entry = {};
    std::stringstream error{};

    if (ec < 0) {
        ec = -ec;
    }

    if (ec != FI_EAVAIL) {
        error << "fi_eq_sread error: " << fi_error_to_string(int(ec)) << "(" << ec << ") ";
    }

    auto rc = fi_eq_readerr(eq, &entry, 0);
    if (rc < 0) {
        error << "fi_eq_readerr error: " << fi_error_to_string(int(rc)) << "(" << rc << ")";
    }
    else {
        error << "fi_eq_readerr provider_error: " <<
                fi_eq_strerror(eq, entry.prov_errno, entry.err_data, nullptr, 0)
                << "(" << entry.prov_errno << ") error: " << fi_error_to_string(entry.err);
    }

    return error.str();
}
Exemplo n.º 8
0
int create_connection(void)
{
	struct fi_info		*prov;
	struct fi_eq_cm_entry	entry;
	struct fi_info		*info;
	ssize_t			n;
	uint32_t		event;
	int			ret = -1;

	print_trace("in\n");

	memset(&ctx, 0, sizeof(ctx));

	if (match_provider(&prov))
		goto err1;

	if (client_connect(prov, &ctx))
		goto err2;

	dprint(DEBUG_CONNECT, "Waiting for Server to connect\n");

	n = fi_eq_sread(ctx.eq, &event, &entry, sizeof(entry), CTIMEOUT, 0);
	if (n < sizeof(entry)) {
		struct fi_eq_err_entry eqe;
		int rc;

		print_err("fi_eq_sread '%s'(%d)\n", fi_strerror(n), (int) n);
		rc = fi_eq_readerr(ctx.eq, &eqe, 0);
		if (rc)
			print_err("fi_eq_readerr() returns %d '%s'\n",
				rc, fi_strerror(rc));
		else {
			char buf[64];

			print_err("fi_eq_readerr() prov_err '%s'(%d)\n",
				fi_eq_strerror(ctx.eq, eqe.prov_errno,
					eqe.err_data, buf, sizeof(buf)),
				eqe.prov_errno);
			print_err("fi_eq_readerr() err '%s'(%d)\n",
					fi_strerror(eqe.err), eqe.err);
		}

		return (int) n;
	}

	if (event != FI_CONNECTED) {
		print_err("unexpected event %d\n", event);
		return -FI_EOTHER;
	}

	/* same context specified in fi_endpoint()? */
	if (entry.fid->context != CONTEXT) {
		print_err("entry.fid->context %lx != %lx\n",
			(ulong)entry.fid->context, (ulong)CONTEXT);
	}

	info = entry.info;

	dprint(DEBUG_CONNECT, "*** Client Connected\n");

	dprint(DEBUG_CONNECT, "Client private data(len %ld): '%s'\n",
		(n - sizeof(entry)), entry.data);

	return 0;
err2:
	client_disconnect(&ctx);
	fi_freeinfo(prov);
err1:
	return ret;
}
Exemplo n.º 9
0
static void test_connect_with_accept_blocking_on_eq_fq_CLIENT(void)
{
    int ret;

    printf("CLIENT running\n");

    // Get the server's node (IP addr) and service (port)
    MPI_Recv(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR,
             0, 101, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    MPI_Recv(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR,
             0, 102, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    printf("CLIENT received via MPI: %s / %s\n", ofi_node, ofi_service);

    //setup_ofi(ofi_node, ofi_service);
    setup_ofi(NULL, NULL, 0);

    memset(&sin, 0, sizeof(sin));
    sin.sin_family = AF_INET;
    inet_aton(ofi_node, &sin.sin_addr);
    sin.sin_port = htons(atoi(ofi_service));
    printf("CLIENT translated: %s\n", addrstr(&sin));

    setup_ofi_active(fidev.info, &ficonn.ep);

    // Print server addr
    printf("CLIENT connecting to %s\n", addrstr(&sin));

    // Connect!
    printf("Client connecting...\n");
    ret = fi_connect(ficonn.ep,
                     //fidev.info->dest_addr,
                     &sin,
                     (void*) client_data, sizeof(client_data));
    if (ret < 0) {
        error("fi_connect failed");
    }

#if WANT_FDS
    // Now wait for the listen to complete
    int nevents;
    #define NEVENTS 32
    struct epoll_event events[NEVENTS];
    int timeout = 10000;
    while (1) {
        printf("CLIENT blocking on epoll\n");
        nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout);
        if (nevents < 0) {
            if (errno != EINTR) {
                error("client epoll wait failed");
            } else {
                continue;
            }
        } else {
            printf("CLIENT successfully woke up from epoll! %d events\n", nevents);
            for (int i = 0; i < nevents; ++i) {
                if (events[i].data.u32 != 2222) {
                    error("CLIENT unexpected epoll return type");
                }
            }
            // If we got the expected event, then go read from the EQ
            break;
        }
    }
#endif

    // Wait for FI_CONNECTED event
    uint32_t event;
    uint8_t *entry_buffer;
    size_t expected_len = sizeof(struct fi_eq_cm_entry) +
        sizeof(client_data);
    entry_buffer = (uint8_t*) calloc(1, expected_len);
    if (NULL == entry_buffer) {
        error("calloc failed");
    }
    struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer;

    while (1) {
        printf("CLIENT waiting for FI_CONNECTED\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            fprintf(stderr, "client fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            fprintf(stderr, "error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (ret == -EAGAIN) {
            fprintf(stderr, "CLIENT fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s, ret = %d)\n", fi_strerror(-ret), ret);
            error("client fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNECTED) {
            error("client got some unexpected event");
        } else if (ret != expected_len) {
            error("client got wrong length back from fi_eq_sread");
        }

        uint32_t *d = (uint32_t*) entry->data;
        for (int i = 0; i < (sizeof(server_data) / sizeof(uint32_t)); ++i) {
            if (d[i] != server_data[i]) {
                printf("CLIENT got wrong CM client data: d[%d]=%d, should be %d\n",
                       i, d[i], server_data[i]);
            }
        }

        printf("client got FI_CONNECTED, correct size, and correct data -- yay!\n");
        break;
    }

    printf("CLIENT connecting -- waiting for server before sending\n");
    MPI_Barrier(MPI_COMM_WORLD);

    sleep(1);
    int msg[4] = { 99, 100, 101, 102 };
    int len = sizeof(msg);
    printf("CLIENT sending len of %d\n", len);

    struct fid_mr no_mr;
    struct fid_mr *mr;
    void *send_context = (void*) 0x42;
#if 0
    fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV,
              0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL);
#else
    // Try using no mr, like fi_msg_pingpong...
    memset(&no_mr, 0, sizeof(no_mr));
    mr = &no_mr;
#endif
    ret = fi_send(ficonn.ep, msg, len,
                  fi_mr_desc(mr), 0, send_context);
    if (ret < 0) {
        printf("fi_Send failed! %d, %s\n", ret, fi_strerror(-ret));
        MPI_Abort(MPI_COMM_WORLD, 37);
    }

    // Wait for send completion
    struct fi_cq_entry cqe;
    while (1) {
        ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1);
        if (cqe.op_context == send_context) {
            printf("CLIENT send completed\n");
            break;
        } else {
            printf("CLIENT got some other completion... continuing\n");
        }
    }

    printf("CLIENT sent -- waiting for server before teardown\n");
    MPI_Barrier(MPI_COMM_WORLD);

    printf("CLIENT tearing down\n");
    fi_close(&(mr->fid));
    teardown_ofi();
}
Exemplo n.º 10
0
static void test_connect_with_accept_blocking_on_eq_fq_SERVER(void)
{
    int ret;

    printf("SERVER running\n");

    setup_ofi(NULL, NULL, FI_SOURCE);

#if WANT_FDS
    // Add the EQ FD to the epoll fd
    static struct epoll_event edt;
    memset(&edt, 0, sizeof(edt));
    edt.events = EPOLLIN;
    edt.data.u32 = 2222;
    ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fidev.eq_fd, &edt);
    if (ret < 0) {
        error("server epoll_ctl failed");
    }
#endif

    // Make a PEP
    ret = fi_passive_ep(fidev.fabric, fidev.info, &fidev.pep, NULL);
    if (0 != ret) {
        error("fi_passive_ep failed");
    }

#if WANT_FIXED_PORT
    size_t ss = sizeof(sin);
    ret = fi_getname(&(fidev.pep->fid), &sin, &ss);
    if (0 != ret) {
        error("fi_setname failed");
    }
    sin.sin_port = htons(listen_port);

    // Bind the PEP to listen on a specific port
    ret = fi_setname(&(fidev.pep->fid), &sin, sizeof(sin));
    if (0 != ret) {
        error("fi_setname failed");
    }
#endif

    // Bind the EQ to the PEP
    ret = fi_pep_bind(fidev.pep, &fidev.eq->fid, 0);
    if (0 != ret) {
        error("fi_pep_bind failed");
    }

    // Listen
    ret = fi_listen(fidev.pep);
    if (0 != ret) {
        error("fi_listen failed");
    }

    // Get the actual address of this PEP
    struct sockaddr_in sinout;
    size_t s = sizeof(sinout);
    ret = fi_getname(&(fidev.pep->fid), &sinout, &s);
    if (0 != ret) {
        error("fi_setname failed");
    }
    sin.sin_family = sinout.sin_family;
    sin.sin_addr = sinout.sin_addr;
    sin.sin_port = sinout.sin_port;

    // Print server addr
    printf("SERVER listening on %s\n", addrstr(&sin));

    // Send our node (IP addr) and service (port) to the client
    snprintf(ofi_node, sizeof(ofi_node) - 1, "%s",
             inet_ntoa(sin.sin_addr));
    snprintf(ofi_service, sizeof(ofi_service) - 1, "%d",
             ntohs(sin.sin_port));
    MPI_Send(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR,
             1, 101, MPI_COMM_WORLD);
    MPI_Send(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR,
             1, 102, MPI_COMM_WORLD);
    printf("SERVER sent via MPI to client: %s / %s\n", ofi_node, ofi_service);

#if WANT_FDS
    // Now wait for the listen to complete
    int nevents;
    #define NEVENTS 32
    struct epoll_event events[NEVENTS];
    int timeout = 10000;
    while (1) {
        printf("SERVER blocking on epoll\n");
        nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout);
        if (nevents < 0) {
            if (errno != EINTR) {
                error("server epoll wait failed");
            } else {
                continue;
            }
        } else {
            printf("SERVER successfully woke up from epoll! %d events\n", nevents);
            for (int i = 0; i < nevents; ++i) {
                if (events[i].data.u32 != 2222) {
                    error("server unexpected epoll return type");
                }
            }
            // If we got the expected event, then go read from the EQ
            break;
        }
    }
#endif

    // Wait for the FI_CONNREQ event
    uint32_t event;
    uint8_t *entry_buffer;
    size_t expected_len = sizeof(struct fi_eq_cm_entry) +
        sizeof(client_data);
    entry_buffer = (uint8_t*) calloc(1, expected_len);
    if (NULL == entry_buffer) {
        error("calloc failed");
    }
    struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer;

    while (1) {
        printf("SERVER waiting for FI_CONNREQ\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            printf("server fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (-EAGAIN == ret) {
            fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret);
            error("SERVER fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNREQ) {
            error("SERVER got some unexpected event");
        } else if (ret != expected_len) {
            error("SERVER got wrong length back from fi_eq_sread");
        }

        uint32_t *d = (uint32_t*) entry->data;
        for (int i = 0; i < (sizeof(client_data) / sizeof(uint32_t)); ++i) {
            if (d[i] != client_data[i]) {
                printf("SERVER got wrong CM client data: d[%d]=%d, should be %d\n",
                       i, d[i], client_data[i]);
            }
        }

        printf("SERVER got FI_CONNREQ, correct size, and correct data -- yay!\n");
        break;
    }

    // Silly logistics: setup_ofi_active adds the fd to the epoll set.
    // But we already added it.  So for simplicity, just remove it
    // here so that setup_ofi_active() can re-add it.
#if WANT_FDS
    // Remove the EQ FD from the epoll fd
    ret = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fidev.eq_fd, &edt);
    if (ret < 0) {
        error("server epoll_ctl DEL failed");
    }
#endif

    // Make an active endpoint
    setup_ofi_active(entry->info, &ficonn.ep);

    // Accept the incoming connection
    ret = fi_accept(ficonn.ep, (void*) server_data, sizeof(server_data));
    if (ret != 0) {
        printf("fi_accept: ret=%d, %s\n", ret, fi_strerror(-ret));
        error("SERVER fi_accept failed\n");
    }

    // Need to read and get a FI_CONNECTED event
    while (1) {
        printf("SERVER waiting for FI_CONNECTED\n");
#if WANT_FDS
        ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0);
#else
        ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0);
#endif
        if (-FI_EAVAIL == ret) {
            printf("server fi_eq_sread failed because there's something in the error queue\n");
            char buffer[2048];
            struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer;
            ret = fi_eq_readerr(fidev.eq, err_entry, 0);
            printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno));
            error("sad panda");
        } else if (-EAGAIN == ret) {
            fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n");
            sleep(1);
            continue;
        } else if (ret < 0) {
            fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret);
            error("SERVER fi_eq_sread failed for some random reason");
        } else if (event != FI_CONNECTED) {
            error("SERVER got some unexpected event");
        }

        printf("SERVER got FI_CONNECTED -- yay!\n");
        break;
    }

    // Post a recv buffer for the client to send
    int msg[4] = { 0 };
    int len = sizeof(msg);
    printf("SERVER receiving len of %d\n", len);

    struct fid_mr no_mr;
    struct fid_mr *mr;
    void *recv_context = (void*) 0x17;
#if 0
    fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV,
              0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL);
#else
    // Try using no mr, like fi_msg_pingpong...
    memset(&no_mr, 0, sizeof(no_mr));
    mr = &no_mr;
#endif
    ret = fi_recv(ficonn.ep, msg, len,
                  fi_mr_desc(mr), 0, recv_context);
    if (ret < 0) {
        printf("fi_recv failed! %d, %s\n", ret, fi_strerror(-ret));
        MPI_Abort(MPI_COMM_WORLD, 37);
    }

    sleep(1);
    printf("SERVER posted receive -- waiting for client to send\n");
    MPI_Barrier(MPI_COMM_WORLD);

    // Wait for receive completion
    struct fi_cq_entry cqe;
    while (1) {
        ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1);
        if (cqe.op_context == recv_context) {
            printf("SERVER receive completed\n");
            break;
        } else {
            printf("SERVER got some other completion... continuing\n");
        }
    }

    printf("SERVER finished -- waiting for client before teardown\n");
    MPI_Barrier(MPI_COMM_WORLD);

    printf("SERVER tearing down\n");
    fi_close(&(mr->fid));
    teardown_ofi();
}