Exemple #1
0
static ucs_status_t uct_perf_create_pd(ucx_perf_context_t *perf)
{
    uct_pd_resource_desc_t *pd_resources;
    uct_tl_resource_desc_t *tl_resources;
    unsigned i, num_pd_resources;
    unsigned j, num_tl_resources;
    ucs_status_t status;
    uct_pd_h pd;
    uct_pd_config_t *pd_config;

    status = uct_query_pd_resources(&pd_resources, &num_pd_resources);
    if (status != UCS_OK) {
        goto out;
    }

    for (i = 0; i < num_pd_resources; ++i) {
        status = uct_pd_config_read(pd_resources[i].pd_name, NULL, NULL, &pd_config);
        if (status != UCS_OK) {
            goto out_release_pd_resources;
        }

        status = uct_pd_open(pd_resources[i].pd_name, pd_config, &pd);
        uct_config_release(pd_config);
        if (status != UCS_OK) {
            goto out_release_pd_resources;
        }

        status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources);
        if (status != UCS_OK) {
            uct_pd_close(pd);
            goto out_release_pd_resources;
        }

        for (j = 0; j < num_tl_resources; ++j) {
            if (!strcmp(perf->params.uct.tl_name,  tl_resources[j].tl_name) &&
                !strcmp(perf->params.uct.dev_name, tl_resources[j].dev_name))
            {
                uct_release_tl_resource_list(tl_resources);
                perf->uct.pd = pd;
                status = UCS_OK;
                goto out_release_pd_resources;
            }
        }

        uct_pd_close(pd);
        uct_release_tl_resource_list(tl_resources);
    }

    ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name,
              perf->params.uct.dev_name);
    status = UCS_ERR_NO_DEVICE;

out_release_pd_resources:
    uct_release_pd_resource_list(pd_resources);
out:
    return status;
}
Exemple #2
0
static void uct_perf_cleanup(ucx_perf_context_t *perf)
{
    uct_perf_test_cleanup_endpoints(perf);
    uct_perf_test_free_mem(perf);
    uct_iface_close(perf->uct.iface);
    uct_pd_close(perf->uct.pd);
    uct_worker_destroy(perf->uct.worker);
    ucs_async_context_cleanup(&perf->uct.async);
}
Exemple #3
0
static void ucp_free_resources(ucp_context_t *context)
{
    ucp_rsc_index_t i;

    ucs_free(context->tl_rscs);
    for (i = 0; i < context->num_pds; ++i) {
        if (context->pds[i] != NULL) {
            uct_pd_close(context->pds[i]);
        }
    }
    ucs_free(context->pd_attrs);
    ucs_free(context->pds);
    ucs_free(context->pd_rscs);
}
Exemple #4
0
/* Device and transport to be used are determined by minimum latency */
static ucs_status_t dev_tl_lookup()
{
	int i;
	int j;
	uint64_t min_latency = UINT64_MAX;
	int pd_index = -1;
	int tl_index = -1;
	ucs_status_t status;
	uct_pd_resource_desc_t *pd_resources; /* Protection domain resource descriptor */
	uct_tl_resource_desc_t *tl_resources; /*Communication resource descriptor */
	unsigned num_pd_resources; /* Number of protected domain */
	unsigned num_tl_resources; /* Number of transport resources resource objects created */

	status = uct_query_pd_resources(&pd_resources, &num_pd_resources);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to query for protected domain resources.\n");	
		goto out1;
	}

	/* Iterate through protected domain resources */
	for (i = 0; i < num_pd_resources; ++i) {
		status = uct_pd_open(pd_resources[i].pd_name, &pd);
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to open protected domain.\n"); fflush(stderr);
			goto release1;
		}

		status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to query transport resources.\n"); fflush(stderr);
			uct_pd_close(pd);
			goto release1;
		}
		
		/* Go through each available transport resource for a particular protected domain 
		 * and keep track of the fastest latency */
		for (j = 0; j < num_tl_resources; ++j) {
			status = resource_supported(tl_resources[j].dev_name, tl_resources[j].tl_name, 1);	
			if (UCS_OK == status) {
				if (tl_resources[j].latency < min_latency) {
					min_latency = tl_resources[j].latency;
					pd_index = i;
					tl_index = j;
				}
			}
		}

		uct_release_tl_resource_list(tl_resources);
		uct_pd_close(pd);
	}

	/* Check if any valid device/transport found */
	if ((-1 == pd_index) || (-1 == tl_index)) {
		uct_release_pd_resource_list(pd_resources);
		return UCS_ERR_UNSUPPORTED;
	}

	/* IMPORTANT: Certain functions that operate on an interface rely on a pointer to the protection domain that created it */
	/* Reopen new protection domain and */
	status = uct_pd_open(pd_resources[pd_index].pd_name, &pd);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to open final protected domain.\n"); fflush(stderr);
		goto release1;
	}

	/* Open new tranport resources */
	status = uct_pd_query_tl_resources(pd, &tl_resources, &num_tl_resources);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to query final transport resources.\n"); fflush(stderr);
		uct_pd_close(pd);
		goto release1;
	}

	/* Call resource_supported() again to set the interface */
	status = resource_supported(tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name, 0);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to initialize final interface.\n"); fflush(stderr);
		uct_pd_close(pd);
		return status;	
	}

	printf("Using %s with %s.\n", tl_resources[tl_index].dev_name, tl_resources[tl_index].tl_name);fflush(stdout);

	uct_release_tl_resource_list(tl_resources);
release1:
	uct_release_pd_resource_list(pd_resources);
out1:
	return status;
}
Exemple #5
0
int main(int argc, char **argv)
{
	/* MPI is initially used to swap the endpoint and interface addresses so each
	 * process has knowledge of the others. */
	MPI_Status mpi_status;
	int partner;
	int size;
	struct sockaddr *ep_addr; /* Endpoint address */
	struct sockaddr *iface_addr; /* Interface address */
	ucs_status_t status; /* status codes for UCS */
	ucs_thread_mode_t thread_mode = UCS_THREAD_MODE_SINGLE; /* Specifies thread sharing mode of an object */
	uct_ep_h ep; /* Remote endpoint */
	void *arg;

	MPI_Init(NULL, NULL);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	if (size < 2) {
		fprintf(stderr, "Failed to create enough mpi processes.\n");fflush(stderr);	
		return 1;
	}
	
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	if (0 == rank) { 
		partner = 1; 
	} else if (1 == rank) { 
		partner = 0; 
	} else { 
		MPI_Finalize(); 
		return 0; 
	}

	/* Initialize context */
	status = ucs_async_context_init(&async, UCS_ASYNC_MODE_THREAD);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to init async context.\n");fflush(stderr);
		goto out;
	}	 

	/* Create a worker object */ 
	status = uct_worker_create(&async, thread_mode, &worker);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to create worker.\n");fflush(stderr);
		goto out_cleanup_async;
	}	 

	/* The device and tranport names are determined by latency */
	status = dev_tl_lookup();
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to find supported device and transport\n");fflush(stderr);
		goto out_destroy_worker;
	}

	iface_addr = calloc(1, iface_attr.iface_addr_len);
	ep_addr = calloc(1, iface_attr.ep_addr_len);
	if ((NULL == iface_addr) || (NULL == ep_addr)) { 
		goto out_destroy_iface;
	}

	/* Get interface address */
	status = uct_iface_get_address(iface, iface_addr);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to get interface address.\n");fflush(stderr);
		goto out_free;
	}	 
	
	if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
		/* Create new endpoint */
		status = uct_ep_create(iface, &ep);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to create endpoint.\n");fflush(stderr);
			goto out_free;
		}	 	
		/* Get endpoint address */
		status = uct_ep_get_address(ep, ep_addr);	
		if (UCS_OK != status) {
			fprintf(stderr, "Failed to get endpoint address.\n");fflush(stderr);
			goto out_free_ep;
		}	 	
	}

	/* Communicate interface and endpoint addresses to corresponding process */
	MPI_Send(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD);
	MPI_Recv(iface_addr, iface_attr.iface_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status);
	MPI_Send(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD);
	MPI_Recv(ep_addr, iface_attr.ep_addr_len, MPI_BYTE, partner, 0, MPI_COMM_WORLD, &mpi_status);

	if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) {
		/* Connect endpoint to a remote endpoint */
		status = uct_ep_connect_to_ep(ep, ep_addr);
	} else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
		/* Create an endpoint which is connected to a remote interface */
		status = uct_ep_create_connected(iface, iface_addr, &ep);
	} else status = UCS_ERR_UNSUPPORTED;
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to connect endpoint\n");fflush(stderr);
		goto out_free_ep;
	}

	uint8_t id = 0; /* Tag for active message */
	/*Set active message handler */
	status = uct_iface_set_am_handler(iface, id, hello_world, arg);
	if (UCS_OK != status) {
		fprintf(stderr, "Failed to set callback.\n");fflush(stderr);
		goto out_free_ep;
	}	 	
	
	if (0 == rank) {
		uint64_t header;
		char payload[8];
		unsigned length = sizeof(payload);
		/* Send active message to remote endpoint */
		status = uct_ep_am_short(ep, id, header, payload, length);  		
	} else if (1 == rank) {
		while (holder) { 
			/* Explicitly progress any outstanding active message requests */
			uct_worker_progress(worker);
		}
	}

out_free_ep:
	uct_ep_destroy(ep);
out_free:
	free(iface_addr);
	free(ep_addr);
out_destroy_iface:
	uct_iface_close(iface);
	uct_pd_close(pd);
out_destroy_worker:
	uct_worker_destroy(worker);
out_cleanup_async:
	ucs_async_context_cleanup(&async);
out:
	MPI_Finalize();
	return 0;
}
Exemple #6
0
static ucs_status_t ucp_fill_resources(ucp_context_h context,
                                       const ucp_config_t *config)
{
    unsigned num_tl_resources;
    unsigned num_pd_resources;
    uct_pd_resource_desc_t *pd_rscs;
    ucs_status_t status;
    ucp_rsc_index_t i;
    unsigned pd_index;
    uct_pd_h pd;

    /* if we got here then num_resources > 0.
     * if the user's device list is empty, there is no match */
    if (0 == config->devices.count) {
        ucs_error("The device list is empty. Please specify the devices you would like to use "
                  "or omit the UCX_DEVICES so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* if we got here then num_resources > 0.
     * if the user's tls list is empty, there is no match */
    if (0 == config->tls.count) {
        ucs_error("The TLs list is empty. Please specify the transports you would like to use "
                  "or omit the UCX_TLS so that the default will be used.");
        status = UCS_ERR_NO_ELEM;
        goto err;
    }

    /* List protection domain resources */
    status = uct_query_pd_resources(&pd_rscs, &num_pd_resources);
    if (status != UCS_OK) {
        goto err;
    }

    /* Error check: Make sure there is at least one PD */
    if (num_pd_resources == 0) {
        ucs_error("No pd resources found");
        status = UCS_ERR_NO_DEVICE;
        goto err_release_pd_resources;
    }

    if (num_pd_resources >= UCP_MAX_PDS) {
        ucs_error("Only up to %ld PDs are supported", UCP_MAX_PDS);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_release_pd_resources;
    }

    context->num_pds  = 0;
    context->pd_rscs  = NULL;
    context->pds      = NULL;
    context->pd_attrs = NULL;
    context->num_tls  = 0;
    context->tl_rscs  = NULL;

    /* Allocate array of PD resources we would actually use */
    context->pd_rscs = ucs_calloc(num_pd_resources, sizeof(*context->pd_rscs),
                                  "ucp_pd_resources");
    if (context->pd_rscs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of protection domains */
    context->pds = ucs_calloc(num_pd_resources, sizeof(*context->pds), "ucp_pds");
    if (context->pds == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Allocate array of protection domains attributes */
    context->pd_attrs = ucs_calloc(num_pd_resources, sizeof(*context->pd_attrs),
                                   "ucp_pd_attrs");
    if (context->pd_attrs == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err_free_context_resources;
    }

    /* Open all protection domains, keep only those which have at least one TL
     * resources selected on them.
     */
    pd_index = 0;
    for (i = 0; i < num_pd_resources; ++i) {
        status = uct_pd_open(pd_rscs[i].pd_name, &pd);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        context->pd_rscs[pd_index] = pd_rscs[i];
        context->pds[pd_index]     = pd;

        /* Save PD attributes */
        status = uct_pd_query(pd, &context->pd_attrs[pd_index]);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* Add communication resources of each PD */
        status = ucp_add_tl_resources(context, pd, pd_index, config,
                                      &num_tl_resources);
        if (status != UCS_OK) {
            goto err_free_context_resources;
        }

        /* If the PD does not have transport resources, don't use it */
        if (num_tl_resources > 0) {
            ++pd_index;
            ++context->num_pds;
        } else {
            ucs_debug("closing pd %s because it has no selected transport resources",
                      pd_rscs[i].pd_name);
            uct_pd_close(pd);
        }
    }

    /* Error check: Make sure there is at least one transport */
    if (0 == context->num_tls) {
        ucs_error("There are no available resources matching the configured criteria");
        status = UCS_ERR_NO_DEVICE;
        goto err_free_context_resources;
    }

    /* Error check: Make sure there are no too many transports */
    if (context->num_tls >= UCP_MAX_TLS) {
        ucs_error("Exceeded resources limit (%u requested, up to %d are supported)",
                  context->num_tls, UCP_MAX_TLS);
        status = UCS_ERR_EXCEEDS_LIMIT;
        goto err_free_context_resources;
    }

    uct_release_pd_resource_list(pd_rscs);
    return UCS_OK;

err_free_context_resources:
    ucp_free_resources(context);
err_release_pd_resources:
    uct_release_pd_resource_list(pd_rscs);
err:
    return status;
}
Exemple #7
0
static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params)
{
    uct_iface_config_t *iface_config;
    ucs_status_t status;

    status = ucs_async_context_init(&perf->uct.async, params->async_mode);
    if (status != UCS_OK) {
        goto out;
    }

    status = uct_worker_create(&perf->uct.async, params->thread_mode,
                               &perf->uct.worker);
    if (status != UCS_OK) {
        goto out_cleanup_async;
    }

    status = uct_perf_create_pd(perf);
    if (status != UCS_OK) {
        goto out_destroy_worker;
    }

    status = uct_iface_config_read(params->uct.tl_name, NULL, NULL, &iface_config);
    if (status != UCS_OK) {
        goto out_destroy_pd;
    }

    status = uct_iface_open(perf->uct.pd, perf->uct.worker, params->uct.tl_name,
                            params->uct.dev_name, 0, iface_config, &perf->uct.iface);
    uct_config_release(iface_config);
    if (status != UCS_OK) {
        ucs_error("Failed to open iface: %s", ucs_status_string(status));
        goto out_destroy_pd;
    }

    status = uct_perf_test_check_capabilities(params, perf->uct.iface);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_alloc_mem(perf, params);
    if (status != UCS_OK) {
        goto out_iface_close;
    }

    status = uct_perf_test_setup_endpoints(perf);
    if (status != UCS_OK) {
        ucs_error("Failed to setup endpoints: %s", ucs_status_string(status));
        goto out_free_mem;
    }

    return UCS_OK;

out_free_mem:
    uct_perf_test_free_mem(perf);
out_iface_close:
    uct_iface_close(perf->uct.iface);
out_destroy_pd:
    uct_pd_close(perf->uct.pd);
out_destroy_worker:
    uct_worker_destroy(perf->uct.worker);
out_cleanup_async:
    ucs_async_context_cleanup(&perf->uct.async);
out:
    return status;
}