Ejemplo n.º 1
0
static
int pspsm_open_endpoint(void)
{
	psm_error_t ret;

	if (!pspsm_ep){
		struct psm_ep_open_opts opts;

		ret = psm_ep_open_opts_get_defaults(&opts);
		if (ret != PSM_OK) goto err;

		ret = psm_ep_open(pspsm_uuid.as_uuid, &opts,
				  &pspsm_ep, &pspsm_epid);
		if (ret != PSM_OK) goto err;

		sendbuf = malloc(pscom.env.readahead);

		pspsm_dprint(2, "pspsm_open_endpoint: OK");
	}
	return 0;

 err:
	pspsm_err(psm_error_get_string(ret));
	pspsm_dprint(1, "pspsm_open_endpoint: %s", pspsm_err_str);
	return -1;
}
Ejemplo n.º 2
0
int try_to_open_psm_endpoint(psm_ep_t *ep, // output endpoint handle
			     psm_epid_t *epid, // output endpoint identifier
			     int unit)
{
  struct psm_ep_open_opts epopts;
  psm_uuid_t job_uuid;
  // Let PSM assign its default values to the endpoint options.
  psm_ep_open_opts_get_defaults(&epopts);
  // We want a stricter timeout and a specific unit
  epopts.timeout = 15*1e9;  // 15 second timeout
  epopts.unit = unit; // We want a specific unit, -1 would let PSM
  // choose the unit for us.
  // ve already set affinity, t let PSM do so if it wants to.
  if (epopts.affinity == PSM_EP_OPEN_AFFINITY_SET)
    epopts.affinity = PSM_EP_OPEN_AFFINITY_SKIP;
  // ENDPOINT_UUID is set to the same value in the environment of all the
  // processes that wish to communicate over PSM and was generated by
  // the process spawning utility
  /* c = getenv("ENDPOINT_UUID"); */
  /* if (c && *c) */
  /*   implementor_string_to_16byte_packing(c, job_uuid); */
  /* else { */
  /*   fprintf(stderr, "t find UUID for endpoint\n"); */
  /*   return -1; */
  /* } */

  //psm_uuid_generate(job_uuid);
   memset(&job_uuid, 1, sizeof(psm_uuid_t));
  

  /* int i; */
  /* for (i = 0; i < 16; i++) { */
  /*   printf("%lu\n", job_uuid[i]); */
  /* }   */
  printf("%u\n", job_uuid[0]);

  // Assume we t want to handle errors here.
  psm_ep_open(job_uuid, &epopts, ep, epid);
  return 1;
}
Ejemplo n.º 3
0
int try_open_endpoint_and_initialize_mq(psm_ep_t *ep, // endpoint handle
					psm_epid_t *epid, // unique endpoint ID
					psm_uuid_t job_uuid, // unique job uuid, for ep_open
					psm_mq_t *mq, // MQ handle initialized on 
					uint64_t communicator_bits) // Where we store our communicator or
// context bits in the 64-bit tag.
{
  // Simplifed open, see psm_ep_open documentation for more info
  psm_ep_open(job_uuid,
	      NULL, // no options
	      ep, epid);
  // We initialize a matched queue by telling PSM the bits that are
  // order-significant in the tag.  Point-to-point ordering will not be
  // maintained between senders where the communicator bits are not the
  // same.
  psm_mq_init(*ep,
	      communicator_bits,
	      NULL, // no other MQ options
	      0,    // 0 options passed
	      mq);  // newly initialized matched Queue
  return 1; 
}
Ejemplo n.º 4
0
int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { 
    psm_error_t err;
    psm_ep_t	ep; /* endpoint handle */
    psm_mq_t	mq;
    psm_epid_t	epid; /* unique lid+port identifier */
    psm_uuid_t  unique_job_key;
    struct psm_ep_open_opts ep_opt;
    unsigned long long *uu = (unsigned long long *) unique_job_key;
    char *generated_key;
    char env_string[256];
    
    generated_key = getenv("OMPI_MCA_orte_precondition_transports");
    memset(uu, 0, sizeof(psm_uuid_t));
    
    if (!generated_key || (strlen(generated_key) != 33) ||
        sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
    {
      opal_show_help("help-mtl-psm.txt",
		     "no uuid present", true,
		     generated_key ? "could not be parsed from" :
		     "not present in", ompi_process_info.nodename);
      return OMPI_ERROR;
      
    }

    /* Handle our own errors for opening endpoints */
    psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);

    /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
     * contexts correctly.
     */
    snprintf(env_string, sizeof(env_string), "%d", local_rank);
    setenv("MPI_LOCALRANKID", env_string, 0);
    snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
    setenv("MPI_LOCALNRANKS", env_string, 0);
    
    /* Setup the endpoint options. */
    bzero((void*) &ep_opt, sizeof(ep_opt));
    ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
    ep_opt.unit = ompi_mtl_psm.ib_unit;
    ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */
    ep_opt.shm_mbytes = -1; /* Choose PSM defaults */
    ep_opt.sendbufs_num = -1; /* Choose PSM defaults */

#if PSM_VERNO >= 0x0101   
    ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
#endif
    
#if PSM_VERNO >= 0x0107
    ep_opt.port = ompi_mtl_psm.ib_port;
    ep_opt.outsl = ompi_mtl_psm.ib_service_level;
#endif

#if PSM_VERNO >= 0x010d
    ep_opt.service_id = ompi_mtl_psm.ib_service_id;
    ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
#endif

    /* Open PSM endpoint */
    err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
    if (err) {
      opal_show_help("help-mtl-psm.txt",
		     "unable to open endpoint", true,
		     psm_error_get_string(err));
      return OMPI_ERROR;
    }

    /* Future errors are handled by the default error handler */
    psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
    
    err = psm_mq_init(ep, 
		      0xffff000000000000ULL, 
		      NULL,
		      0,
		      &mq);
    if (err) {
      opal_show_help("help-mtl-psm.txt",
		     "psm init", true,
		     psm_error_get_string(err));
      return OMPI_ERROR;
    }

    ompi_mtl_psm.ep   = ep;
    ompi_mtl_psm.epid = epid;
    ompi_mtl_psm.mq   = mq;

    if (OMPI_SUCCESS != 
	ompi_modex_send( &mca_mtl_psm_component.super.mtl_version, 
                             &ompi_mtl_psm.epid, 
			     sizeof(psm_epid_t))) {
	opal_output(0, "Open MPI couldn't send PSM epid to head node process"); 
	return OMPI_ERROR;
    }

    /* register the psm progress function */
    opal_progress_register(ompi_mtl_psm_progress);
        
    return OMPI_SUCCESS;
}
Ejemplo n.º 5
0
static int psmx_domain_init(struct psmx_fid_domain *domain,
			    struct psmx_src_name *src_addr)
{
	struct psmx_fid_fabric *fabric = domain->fabric;
	struct psm_ep_open_opts opts;
	int err;

	psm_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx_uuid_to_string(fabric->uuid));

	if (src_addr) {
		opts.unit = src_addr->unit;
		opts.port = src_addr->port;
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port);
	}

	err = psm_ep_open(fabric->uuid, &opts,
			  &domain->psm_ep, &domain->psm_epid);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out;
	}

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain->psm_epid);

	err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL,
			  NULL, 0, &domain->psm_mq);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain->mr_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain->mr_map = rbtNew(&psmx_key_compare);
	if (!domain->mr_map) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain->mr_reserved_key = 1;
	
	err = fastlock_init(&domain->poll_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}

	/* Set active domain before psmx_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric->active_domain = domain;

	if (psmx_domain_enable_ep(domain, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain->progress_thread_enabled)
		psmx_domain_start_progress(domain);

	return 0;

err_out_reset_active_domain:
	fabric->active_domain = NULL;
	fastlock_destroy(&domain->poll_lock);

err_out_delete_mr_map:
	rbtDelete(domain->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain->mr_lock);

err_out_finalize_mq:
	psm_mq_finalize(domain->psm_mq);

err_out_close_ep:
	if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL,
			 (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK)
		psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0);

err_out:
	return err;
}