Beispiel #1
0
gaspi_return_t
pgaspi_set_socket_affinity (const gaspi_uchar socket)
{
  cpu_set_t sock_mask;

  if (socket >= 4)
    {
      gaspi_print_error("GPI-2 only allows up to a maximum of 4 NUMA sockets");
      return GASPI_ERROR;
    }
  
  if (gaspi_get_affinity_mask (socket, &sock_mask) < 0)
    {
      gaspi_print_error ("Failed to get affinity mask");
      return GASPI_ERROR;
    }
  else
    {
      if (sched_setaffinity (0, sizeof (cpu_set_t), &sock_mask) != 0)
	{
	  gaspi_print_error ("Failed to set affinity");
	  return GASPI_ERROR;
	}
    }

  return GASPI_SUCCESS;
}
Beispiel #2
0
static inline int
_gaspi_sn_segment_register_command(const gaspi_rank_t rank, void * arg)
{
  gaspi_segment_id_t segment_id = * (gaspi_segment_id_t *) arg;

  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  cdh.op_len = 0; /* in-place */
  cdh.op = GASPI_SN_SEG_REGISTER;
  cdh.rank = glb_gaspi_ctx.rank;
  cdh.seg_id = segment_id;
  cdh.rkey = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].rkey;
  cdh.addr = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].addr;
  cdh.size = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].size;

#ifdef GPI2_CUDA
  cdh.host_rkey = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].host_rkey;
  cdh.host_addr = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].host_addr;
#endif

  ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header));
  if(ret != sizeof(gaspi_cd_header))
    {
      gaspi_print_error("Failed to write to rank %u (args: %d %p %lu)",
			rank,
			glb_gaspi_ctx.sockfd[rank],
			&cdh,
			sizeof(gaspi_cd_header));

      return -1;
    }

  int result = 1;
  ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[rank], &result, sizeof(int));
  if( rret != sizeof(int) )
    {
      gaspi_print_error("Failed to read from rank %u (args: %d %p %lu)",
			rank,
			glb_gaspi_ctx.sockfd[rank],
			&rret,
			sizeof(int));
      return -1;
    }

  /* Registration failed on the remote side */
  if( result != 0)
    return -1;

  return 0;
}
Beispiel #3
0
/* TODO: deal with timeout */
int
gaspi_sn_broadcast_topology(gaspi_context *ctx, const gaspi_timeout_t timeout_ms)
{
  int mask = 0x1;
  int relative_rank;
  int dst, src;
  const int root = 0;

  relative_rank = (ctx->rank >= root) ? ctx->rank - root : ctx->rank - root + ctx->tnc;
  while(mask <= ctx->tnc)
    {
      if(relative_rank & mask)
	{
	  src = ctx->rank - mask;
	  if(src < 0)
	    src += ctx->tnc;

	  if(gaspi_sn_recv_topology(ctx) != 0)
	    {
	      gaspi_print_error("Failed to receive topology.");
	      return -1;
	    }
	  break;
	}
      mask <<=1;
    }
  mask >>=1;

  while (mask > 0)
    {
      if(relative_rank + mask < ctx->tnc)
	{
	  dst = ctx->rank + mask;

	  if(dst >= ctx->tnc)
	    dst -= ctx->tnc;

	  if(gaspi_sn_send_topology(ctx, dst, timeout_ms) != 0)
	    {
	      gaspi_print_error("Failed to send topology to %d", dst);
	      return -1;
	    }
	}
      mask >>=1;
    }

  return 0;
}
Beispiel #4
0
static int
gaspi_sn_connect2port_intern(const char *hn, const unsigned short port)
{
  int ret;
  int sockfd = -1;

  struct sockaddr_in Host;
  struct hostent *serverData;

  sockfd = socket ( AF_INET, SOCK_STREAM, 0 );
  if( -1 == sockfd )
    {
      /* at least deal with open files limit */
      int errsv = errno;
      if(errsv == EMFILE)
	{
	  if( 0 == _gaspi_check_ofile_limit() )
	    {
	      sockfd = socket(AF_INET,SOCK_STREAM,0);
	      if(sockfd == -1)
		return -1;
	    }
	  else
	    return -2;
	}
      else
	return -1;
    }

  Host.sin_family = AF_INET;
  Host.sin_port = htons(port);

  if((serverData = gethostbyname(hn)) == NULL)
    {
      close(sockfd);
      return -1;
    }

  memcpy(&Host.sin_addr, serverData->h_addr, serverData->h_length);

  /* TODO: we need to be able to distinguish between an initialization
     connection attemp and a connection attempt during run-time where
     the remote node is gone (FT) */
  ret = connect( sockfd, (struct sockaddr *) &Host, sizeof(Host) );
  if( 0 != ret )
    {
      close( sockfd );
      return -1;
    }

  if( 0 != gaspi_sn_set_default_opts(sockfd) )
    {
      gaspi_print_error("Failed to set options on socket");
      close(sockfd);
      return -1;
    }

  return sockfd;
}
Beispiel #5
0
int
gaspi_sn_set_default_opts(int sockfd)
{
  int opt = 1;
  if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
    {
      gaspi_print_error("Failed to set options on socket");
      return -1;
    }

  if(setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)) < 0)
    {
      gaspi_print_error("Failed to set options on socket");
      return -1;
    }

  return 0;
}
Beispiel #6
0
gaspi_return_t
pgaspi_dev_wait (const gaspi_queue_id_t queue,
		 int * counter,
		 const gaspi_timeout_t timeout_ms)
{

  int ne = 0, i;
  struct ibv_wc wc;

  const int nr = *counter;
  const gaspi_cycles_t s0 = gaspi_get_cycles ();

  for (i = 0; i < nr; i++)
    {
      do
	{
	  ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc);
	  *counter -= ne;
	  
	  if (ne == 0)
	    {
	      const gaspi_cycles_t s1 = gaspi_get_cycles ();
	      const gaspi_cycles_t tdelta = s1 - s0;

	      const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs;
	      if (ms > timeout_ms)
		{
		  return GASPI_TIMEOUT;
		}
	    }
	}
      while (ne == 0);


      if ((ne < 0) || (wc.status != IBV_WC_SUCCESS))
	{
	  gaspi_print_error("Failed request to %lu. Queue %d might be broken %s",
			    wc.wr_id, queue, ibv_wc_status_str(wc.status) );

	  glb_gaspi_ctx.qp_state_vec[queue][wc.wr_id] = GASPI_STATE_CORRUPT;

	  return GASPI_ERROR;
	}
    }
#ifdef GPI2_CUDA 
  int j,k;
  for(k = 0;k < glb_gaspi_ctx.gpu_count; k++)
    {
      for(j = 0; j < GASPI_CUDA_EVENTS; j++)
	gpus[k].events[queue][j].ib_use = 0;
    }
  
#endif

  return GASPI_SUCCESS;
}
Beispiel #7
0
static inline int
_gaspi_sn_group_connect(const gaspi_rank_t rank, void *arg)
{
  int i = (int) rank;
  gaspi_group_t group = *(gaspi_group_t *) arg;
  gaspi_group_ctx *group_to_commit = &(glb_gaspi_group_ctx[group]);

  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  cdh.op_len = sizeof(gaspi_rc_mseg);
  cdh.op = GASPI_SN_GRP_CONNECT;
  cdh.rank = glb_gaspi_ctx.rank;
  cdh.ret = group;

  ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header));
  if( ret != sizeof(gaspi_cd_header) )
    {
      gaspi_print_error("Failed to write to %u (%ld %d %p %lu)",
			i,
			ret,
			glb_gaspi_ctx.sockfd[i],
			&cdh,
			sizeof(gaspi_cd_header));
      return -1;
    }

  ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], &group_to_commit->rrcd[i], sizeof(gaspi_rc_mseg));
  if( rret != sizeof(gaspi_rc_mseg) )
    {
      gaspi_print_error("Failed to read from %d (%ld %d %p %lu)",
			i,
			ret,
			glb_gaspi_ctx.sockfd[i],
			&group_to_commit->rrcd[i],
			sizeof(gaspi_rc_mseg));

      return -1;
    }

  return 0;
}
Beispiel #8
0
static inline int
_gaspi_sn_connect_command(const gaspi_rank_t rank)
{
  const int i = (int) rank;

  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  const size_t rc_size = pgaspi_dev_get_sizeof_rc();
  cdh.op_len = (int) rc_size;
  cdh.op = GASPI_SN_CONNECT;
  cdh.rank = glb_gaspi_ctx.rank;

  /* if we have something to exchange */
  if(rc_size > 0 )
    {
      ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header));
      if(ret != sizeof(gaspi_cd_header))
	{
	  gaspi_print_error("Failed to write to %d", i);
	  return -1;
	}

      ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], pgaspi_dev_get_lrcd(i), rc_size);
      if(ret != (ssize_t) rc_size)
	{
	  gaspi_print_error("Failed to write to %d", i);
	  return -1;
	}

      ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], pgaspi_dev_get_rrcd(i), rc_size);
      if( rret != (ssize_t) rc_size )
	{
	  gaspi_print_error("Failed to read from %d", i);
	  return -1;
	}
    }

  return 0;
}
Beispiel #9
0
gaspi_return_t
gaspi_number_of_GPUs(gaspi_gpu_num *gpus)
{
  gaspi_verify_init("gaspi_number_of_GPUs");
  gaspi_verify_null_ptr(gpus);

  if( 0 == glb_gaspi_ctx.use_gpus )
    {
      gaspi_print_error("GPUs are not initialized.");
      return GASPI_ERROR;
    }

  *gpus = glb_gaspi_ctx.gpu_count;

  return GASPI_SUCCESS;
}
Beispiel #10
0
gaspi_return_t
gaspi_gpu_number(gaspi_number_t* num_gpus)
{
  gaspi_verify_init("gaspi_gpu_number");
  gaspi_verify_null_ptr(num_gpus);
  gaspi_context_t const * const gctx = &glb_gaspi_ctx;

  if( 0 == gctx->use_gpus )
    {
      gaspi_print_error("GPUs are not initialized.");
      return GASPI_ERROR;
    }

  *num_gpus = gctx->gpu_count;

  return GASPI_SUCCESS;
}
Beispiel #11
0
static int
_gaspi_find_GPU_numa_node(int cudevice)
{
  CUresult cres;
  int domain, bus, dev;
  char path[128];
  FILE *sysfile = NULL;

  domain = 0;

#ifdef CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID
  cres = cuDeviceGetAttribute(&domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
  if( CUDA_SUCCESS != cres )
    {
      errno = ENOSYS;
      return -1;
    }
#endif

  cres = cuDeviceGetAttribute(&bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
  if( CUDA_SUCCESS != cres )
    {
      return -1;
    }

  cres = cuDeviceGetAttribute(&dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
  if( CUDA_SUCCESS != cres )
    {
      return -1;
    }

  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/numa_node", domain, bus, dev);
  sysfile = fopen(path, "r");
  if( !sysfile )
    {
      gaspi_print_error("Failed to open %s.", path);
      return -1;
    }

  int numa_node = -1;
  fscanf (sysfile, "%1d", &numa_node);
  fclose(sysfile);

  return numa_node;
}
Beispiel #12
0
/* TODO: Not clear to me why we need this function */
gaspi_return_t
gaspi_GPU_ids(gaspi_gpu_t *gpu_ids)
{
  gaspi_verify_init("gaspi_GPU_ids");
  gaspi_verify_null_ptr(gpu_ids);

  if( 0 == glb_gaspi_ctx.use_gpus )
    {
      gaspi_print_error("GPUs are not initialized.");
      return GASPI_ERROR;
    }

  int i;
  for (i = 0; i < glb_gaspi_ctx.gpu_count; i++)
    gpu_ids[i] = gpus[i].device_id;

  return GASPI_SUCCESS;
}
Beispiel #13
0
gaspi_return_t
pgaspi_numa_socket(gaspi_uchar * const socket)
{
  char * numaPtr = getenv ("GASPI_SET_NUMA_SOCKET");
  if(numaPtr)
    {
      if(atoi(numaPtr) == 1)
	{
	  *socket = (gaspi_uchar) glb_gaspi_ctx.localSocket;
	  
	  return GASPI_SUCCESS;
	}
    }

  gaspi_print_error("NUMA was not enabled (-N option of gaspi_run)");
  
  return GASPI_ERR_ENV;
}
Beispiel #14
0
gaspi_return_t
pgaspi_proc_term (const gaspi_timeout_t timeout)
{
  int i;

  gaspi_verify_init("gaspi_proc_term");

  if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout))
    return GASPI_TIMEOUT;

  pthread_kill(glb_gaspi_ctx.snt, SIGSTKFLT);

  if(glb_gaspi_ctx.sockfd != NULL)
    {
      for(i = 0;i < glb_gaspi_ctx.tnc; i++)
	{
	  shutdown(glb_gaspi_ctx.sockfd[i],2);
	  if(glb_gaspi_ctx.sockfd[i] > 0)
	    close(glb_gaspi_ctx.sockfd[i]);
	}

      free(glb_gaspi_ctx.sockfd);
    }
 
#ifdef GPI2_WITH_MPI
  if(glb_gaspi_ctx.rank == 0)
    {
      if(remove(glb_gaspi_ctx.mfile) < 0)
	{
	  gaspi_print_error("Failed to remove tmp file (%s)", glb_gaspi_ctx.mfile);
	}
    }
#endif
  
  if(pgaspi_cleanup_core() != GASPI_SUCCESS)
    goto errL;
  
  unlock_gaspi (&glb_gaspi_ctx_lock);
  return GASPI_SUCCESS;

 errL:
  unlock_gaspi (&glb_gaspi_ctx_lock);
  return GASPI_ERROR;
}
Beispiel #15
0
gaspi_return_t
pgaspi_cpu_frequency (gaspi_float * const cpu_mhz)
{
  gaspi_verify_null_ptr(cpu_mhz);

  if (!glb_gaspi_init)
    {
      *cpu_mhz = gaspi_get_cpufreq ();
    }
  else
    {
      *cpu_mhz = glb_gaspi_ctx.mhz;
    }

  if (*cpu_mhz == 0.0f)
    {
      gaspi_print_error ("Failed to get CPU frequency");
      return GASPI_ERROR;
    }
  return GASPI_SUCCESS;
}
Beispiel #16
0
static int
_gaspi_find_GPU_ib_numa_node()
{
  char path[128];
  int numa_node;
  FILE *sysfile = NULL;

  sprintf(path, "/sys/class/infiniband/%s/device/numa_node",
	  ibv_get_device_name(glb_gaspi_ctx_ib.ib_dev));

  sysfile = fopen(path, "r");
  if (!sysfile)
    {
      gaspi_print_error("Failed to open %s.", path);
      return -1;
    }

  fscanf (sysfile, "%1d", &numa_node);
  fclose(sysfile);

  return numa_node;
}
Beispiel #17
0
gaspi_return_t
pgaspi_proc_kill (const gaspi_rank_t rank,const gaspi_timeout_t timeout_ms)
{
  gaspi_return_t eret = GASPI_ERROR;

  gaspi_verify_init("gaspi_proc_kill");
  gaspi_verify_rank(rank);

  if( rank == glb_gaspi_ctx.rank )
    {
      gaspi_print_error("Invalid rank to kill");
      return GASPI_ERR_INV_RANK;
    }
  
  if(lock_gaspi_tout(&glb_gaspi_ctx_lock, timeout_ms))
    return GASPI_TIMEOUT;

  eret = gaspi_sn_command(GASPI_SN_PROC_KILL, rank, timeout_ms, NULL);

  unlock_gaspi(&glb_gaspi_ctx_lock);
  return eret;
}
Beispiel #18
0
/* TODO: Do we really need this function or at least make it part of
   the GPU interface and allow clients to use it? */
gaspi_return_t
gaspi_gpu_ids(gaspi_gpu_id_t* gpu_ids)
{
  gaspi_verify_init("gaspi_gpu_ids");
  gaspi_verify_null_ptr(gpu_ids);

  gaspi_context_t const * const gctx = &glb_gaspi_ctx;

  if( 0 == gctx->use_gpus )
    {
      gaspi_print_error("GPUs are not found/initialized.");
      return GASPI_ERROR;
    }

  int i;
  for(i = 0; i < gctx->gpu_count; i++)
    {
      gpu_ids[i] = gpus[i].device_id;
    }

  return GASPI_SUCCESS;
}
Beispiel #19
0
static inline int
_gaspi_sn_single_command(const gaspi_rank_t rank, const enum gaspi_sn_ops op)
{
  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  cdh.op_len = 1;
  cdh.op = op;
  cdh.rank = rank;
  cdh.tnc = glb_gaspi_ctx.tnc;

  ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header));
  if( ret != sizeof(gaspi_cd_header) )
    {
      gaspi_print_error("Failed to write to %u  (%d %p %lu)",
			rank,
			glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header));
      return -1;
    }

  return 0;
}
Beispiel #20
0
gaspi_return_t
gaspi_sn_connect_to_rank(const gaspi_rank_t rank, gaspi_timeout_t timeout_ms)
{
  struct timeb t0, t1;
  ftime(&t0);

#ifdef DEBUG
  if( strcmp(gaspi_get_hn(rank), "") == 0 )
    {
      gaspi_print_error("Failed to obtain hostname for rank %u", rank);
      return GASPI_ERROR;
    }
#endif

  /* TODO: introduce backoff delay? */
  while(glb_gaspi_ctx.sockfd[rank] == -1)
    {
      glb_gaspi_ctx.sockfd[rank] =
	gaspi_sn_connect2port(gaspi_get_hn(rank),
			      glb_gaspi_cfg.sn_port + glb_gaspi_ctx.poff[rank],
			      timeout_ms);

      if( -2 == glb_gaspi_ctx.sockfd[rank] )
	return GASPI_ERR_EMFILE;

      if( -1 == glb_gaspi_ctx.sockfd[rank] )
	{
	  ftime(&t1);
	  const unsigned int delta_ms = (t1.time - t0.time) * 1000 + (t1.millitm - t0.millitm);

	  if(delta_ms > timeout_ms)
	    return GASPI_TIMEOUT;
	}
    }

  return GASPI_SUCCESS;
}
Beispiel #21
0
gaspi_return_t
pgaspi_proc_init (const gaspi_timeout_t timeout_ms)
{
  gaspi_return_t eret = GASPI_ERROR;
  int i;
  const int num_queues = (int) glb_gaspi_cfg.queue_num;

  if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms))
    return GASPI_TIMEOUT;

  if(glb_gaspi_sn_init == 0)
    {
      glb_gaspi_ctx.lockPS.lock = 0;
      glb_gaspi_ctx.lockPR.lock = 0;
    
      for (i = 0; i < num_queues; i++)
	glb_gaspi_ctx.lockC[i].lock = 0;

      memset (&glb_gaspi_ctx, 0, sizeof (gaspi_context));

      struct utsname mbuf;
      if (uname (&mbuf) == 0)
	{
	  snprintf (glb_gaspi_ctx.mtyp, 64, "%s", mbuf.machine);
	}

      //timing
      glb_gaspi_ctx.mhz = gaspi_get_cpufreq ();
      if (glb_gaspi_ctx.mhz == 0.0f)
	{
	  gaspi_print_error ("Failed to get CPU frequency");
	  goto errL;
	}
  
      glb_gaspi_ctx.cycles_to_msecs = 1.0f / (glb_gaspi_ctx.mhz * 1000.0f);
    
      //handle environment  
      if(gaspi_handle_env(&glb_gaspi_ctx))
	{
	  gaspi_print_error("Failed to handle environment");
	  eret = GASPI_ERR_ENV;
	  goto errL;
	}
  
      //start sn_backend
      if(pthread_create(&glb_gaspi_ctx.snt, NULL, gaspi_sn_backend, NULL) != 0)
	{
	  gaspi_print_error("Failed to create SN thread");
	  goto errL;
	}
    
      glb_gaspi_sn_init = 1;

    }//glb_gaspi_sn_init

  
  if(glb_gaspi_ctx.procType == MASTER_PROC)
    {
      if(glb_gaspi_dev_init == 0)
	{
	  if(access (glb_gaspi_ctx.mfile, R_OK) == -1)
	    {
	      gaspi_print_error ("Incorrect permissions of machinefile");
	      eret = GASPI_ERR_ENV;
	      goto errL;
	    }
	  
	  //read hostnames
	  char *line = NULL;
	  size_t len = 0;
	  int read;
	  
	  FILE *fp = fopen (glb_gaspi_ctx.mfile, "r");
	  if (fp == NULL)
	    {
	      gaspi_print_error("Failed to open machinefile");
	      eret = GASPI_ERR_ENV;
	      goto errL;
	    }

	  glb_gaspi_ctx.tnc = 0;
	  
	  while ((read = getline (&line, &len, fp)) != -1)
	    {
	      
	      //we assume a single hostname per line
	      if ((read < 2) || (read > 64))
		continue;
	      glb_gaspi_ctx.tnc++;
	      
	      if (glb_gaspi_ctx.tnc >= GASPI_MAX_NODES)
		break;
	    }
	  
	  rewind (fp);
	  
	  free (glb_gaspi_ctx.hn_poff);
	  
	  glb_gaspi_ctx.hn_poff = (char *) calloc (glb_gaspi_ctx.tnc, 65);
	  if(glb_gaspi_ctx.hn_poff == NULL)
	    {
	      gaspi_print_error("Debug: Failed to allocate memory");
	      goto errL;
	    }
	  
	  glb_gaspi_ctx.poff = glb_gaspi_ctx.hn_poff + glb_gaspi_ctx.tnc * 64;
        
	  int id = 0;
	  while((read = getline (&line, &len, fp)) != -1)
	    {
	      //we assume a single hostname per line
	      if((read < 2) || (read >= 64)) continue;
	      
	      int inList = 0;
	      
	      for(i = 0; i < id; i++)
		{
		  //already in list ?
		  //TODO: 64? 63? Magic numbers -> just get cacheline from system or define as such
		  const int hnlen = MAX (strlen (glb_gaspi_ctx.hn_poff + i * 64), MIN (strlen (line) - 1, 63));
		  if(strncmp (glb_gaspi_ctx.hn_poff + i * 64, line, hnlen) == 0)
		    {
		      inList++;
		    }
		}
	      
	      glb_gaspi_ctx.poff[id] = inList;
	      
	      strncpy (glb_gaspi_ctx.hn_poff + id * 64, line, MIN (read - 1, 63));
	      id++; 
	      
	      if(id >= GASPI_MAX_NODES)
		break;
	    }
  
	  fclose (fp);
	  
	  free (line);
	  
	  //master
	  glb_gaspi_ctx.rank = 0;
	  
	  free(glb_gaspi_ctx.sockfd);
  
	  glb_gaspi_ctx.sockfd = (int *) malloc (glb_gaspi_ctx.tnc * sizeof (int));
	  if(glb_gaspi_ctx.sockfd == NULL)
	    {
	      gaspi_print_error("Failed to allocate memory");
	      eret = GASPI_ERR_MEMALLOC;
	      goto errL;
	    }
	  
	  for(i = 0; i < glb_gaspi_ctx.tnc; i++) 
	    glb_gaspi_ctx.sockfd[i] = -1;

	}//glb_gaspi_dev_init
    }//MASTER_PROC
  else if(glb_gaspi_ctx.procType != WORKER_PROC)
    {
      gaspi_print_error ("Invalid node type (GASPI_TYPE)");
      eret = GASPI_ERR_ENV;
      goto errL;
    }

  if( 0 != gaspi_sn_broadcast_topology(&glb_gaspi_ctx, GASPI_BLOCK) )
    {
      gaspi_print_error("Failed topology broadcast");
      eret = GASPI_ERROR;
      goto errL;
    }
  
  if( (eret = pgaspi_init_core()) != GASPI_SUCCESS )
    {
      goto errL;
    }

  /* Unleash SN thread */
  __sync_fetch_and_add( &gaspi_master_topo_data, 1);

  gaspi_init_collectives();

  glb_gaspi_init = 1;

  unlock_gaspi (&glb_gaspi_ctx_lock);

  if(glb_gaspi_cfg.build_infrastructure)
    {
      /* configuration tells us to pre-connect */
      if( GASPI_TOPOLOGY_STATIC == glb_gaspi_cfg.build_infrastructure )
	{
	  for(i = glb_gaspi_ctx.rank; i >= 0; i--)
	    {
	      if( (eret = pgaspi_connect((gaspi_rank_t) i, timeout_ms)) != GASPI_SUCCESS )
		{
		  goto errL;
		}
	    }
	}

      eret = pgaspi_group_all_local_create(timeout_ms);
      if(eret == GASPI_SUCCESS)
	{
	  eret = gaspi_barrier(GASPI_GROUP_ALL, timeout_ms);
	}
      else
	{
	  gaspi_print_error("Failed to create GASPI_GROUP_ALL.");
	}
    }
  else /* dont build_infrastructure */
    {
      /* just reserve GASPI_GROUP_ALL */
      glb_gaspi_ctx.group_cnt = 1;
      glb_gaspi_group_ctx[GASPI_GROUP_ALL].id = -2;//disable
      eret = GASPI_SUCCESS;
    }
  
#ifdef GPI2_CUDA
  /* init GPU counts */
  glb_gaspi_ctx.use_gpus = 0;
  glb_gaspi_ctx.gpu_count = 0;
#endif

  return eret;

 errL:
  unlock_gaspi (&glb_gaspi_ctx_lock);

  return eret;
}
Beispiel #22
0
static gaspi_return_t
pgaspi_init_core()
{
  int i;

  if (glb_gaspi_dev_init)
    return -1;
  
  memset (&glb_gaspi_group_ctx, 0, GASPI_MAX_GROUPS * sizeof (gaspi_group_ctx));

  for (i = 0; i < GASPI_MAX_GROUPS; i++)
    {
      GASPI_RESET_GROUP(glb_gaspi_group_ctx, i);
      glb_gaspi_group_ctx[i].gl.lock = 0;
      glb_gaspi_group_ctx[i].del.lock = 0;
    }

  /* change/override num of queues at large scale */
  if (glb_gaspi_ctx.tnc > 1000 && glb_gaspi_cfg.queue_num > 1)
    {
      if(glb_gaspi_ctx.rank == 0)
	gaspi_printf("Warning: setting number of queues to 1\n");
      glb_gaspi_cfg.queue_num = 1;
    }

  /* Create internal memory space */
  const unsigned int size = NOTIFY_OFFSET + sizeof(gaspi_atomic_value_t);
  const long page_size = sysconf (_SC_PAGESIZE);

  if(page_size < 0)
    {
      gaspi_print_error ("Failed to get system's page size.");
      return GASPI_ERROR;
    }

  glb_gaspi_ctx.nsrc.size = size;
  
  if(posix_memalign ((void **) &glb_gaspi_ctx.nsrc.ptr, page_size, size)!= 0)
    {
      gaspi_print_error ("Memory allocation (posix_memalign) failed");
      return GASPI_ERR_MEMALLOC;
    }

  memset(glb_gaspi_ctx.nsrc.buf, 0, size);
  
  for(i = 0; i < GASPI_MAX_MSEGS; i++)
    {
      glb_gaspi_ctx.rrmd[i] = NULL;
    }

  glb_gaspi_ctx.ep_conn = (gaspi_endpoint_conn_t *) calloc(glb_gaspi_ctx.tnc, sizeof(gaspi_endpoint_conn_t));
  if (glb_gaspi_ctx.ep_conn == NULL)
    return GASPI_ERR_MEMALLOC;

  if(pgaspi_dev_init_core(&glb_gaspi_cfg) != 0)
    return GASPI_ERR_DEVICE;

  for(i = 0; i < GASPI_MAX_QP + 3; i++)
    {
      glb_gaspi_ctx.qp_state_vec[i] = (unsigned char *) calloc (glb_gaspi_ctx.tnc, sizeof(unsigned char));
      if(!glb_gaspi_ctx.qp_state_vec[i])
	{
	  return GASPI_ERR_MEMALLOC;
	}
    }

  glb_gaspi_dev_init = 1;

  return GASPI_SUCCESS;
}
Beispiel #23
0
gaspi_return_t
pgaspi_atomic_fetch_add (const gaspi_segment_id_t segment_id,
			const gaspi_offset_t offset, const gaspi_rank_t rank,
			const gaspi_atomic_value_t val_add,
			gaspi_atomic_value_t * const val_old,
			const gaspi_timeout_t timeout_ms)
{
#ifdef DEBUG
  if (glb_gaspi_ctx_ib.rrmd[segment_id] == NULL)
    {
      gaspi_printf("Debug: Invalid segment (gaspi_atomic_fetch_add)\n");    
      return GASPI_ERROR;
    }
  
  if( rank >= glb_gaspi_ctx.tnc)
    {
      gaspi_printf("Debug: Invalid rank (gaspi_atomic_fetch_add)\n");    
      return GASPI_ERROR;
    }
  
  if( offset > glb_gaspi_ctx_ib.rrmd[segment_id][rank].size)
    {
      gaspi_printf("Debug: Invalid offsets (gaspi_atomic_fetch_add)\n");    
      return GASPI_ERROR;
    }

  if( val_old == NULL)
    {
      gaspi_printf("Debug: Invalid pointer in parameter val_old (gaspi_atomic_fetch_add)\n");    
      return GASPI_ERROR;
    }
#endif
  
  struct ibv_send_wr *bad_wr;
  struct ibv_sge slist;
  struct ibv_send_wr swr;
  int i;

  if (offset & 0x7)
    {
      gaspi_print_error("Unaligned offset");
      return GASPI_ERROR;
    }
  
  lock_gaspi_tout (&glb_gaspi_group_ib[0].gl, timeout_ms);

  slist.addr = (uintptr_t) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET);
  slist.length = 8;
  slist.lkey = glb_gaspi_group_ib[0].mr->lkey;

  swr.wr.atomic.remote_addr =
    glb_gaspi_ctx_ib.rrmd[segment_id][rank].addr + NOTIFY_OFFSET + offset;

  swr.wr.atomic.rkey = glb_gaspi_ctx_ib.rrmd[segment_id][rank].rkey;
  swr.wr.atomic.compare_add = val_add;

  swr.wr_id = rank;
  swr.sg_list = &slist;
  swr.num_sge = 1;
  swr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD;
  swr.send_flags = IBV_SEND_SIGNALED;
  swr.next = NULL;

  if (ibv_post_send (glb_gaspi_ctx_ib.qpGroups[rank], &swr, &bad_wr))
    {
      glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][rank] = 1;
      unlock_gaspi (&glb_gaspi_group_ib[0].gl);
      return GASPI_ERROR;
    }

  glb_gaspi_ctx_ib.ne_count_grp++;


  int ne = 0;
  for (i = 0; i < glb_gaspi_ctx_ib.ne_count_grp; i++)
    {
      do
	{
	  ne =
	    ibv_poll_cq (glb_gaspi_ctx_ib.scqGroups, 1,
			 glb_gaspi_ctx_ib.wc_grp_send);

	}
      while (ne == 0);

      if ((ne < 0)
	  || (glb_gaspi_ctx_ib.wc_grp_send[i].status != IBV_WC_SUCCESS))
	{
	  glb_gaspi_ctx.
	    qp_state_vec[GASPI_COLL_QP][glb_gaspi_ctx_ib.wc_grp_send[i].
					wr_id] = 1;
	  unlock_gaspi (&glb_gaspi_group_ib[0].gl);
	  return GASPI_ERROR;
	}
    }

  glb_gaspi_ctx_ib.ne_count_grp = 0;
  *val_old =
    *((gaspi_atomic_value_t *) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET));

  unlock_gaspi (&glb_gaspi_group_ib[0].gl);
  return GASPI_SUCCESS;

}
Beispiel #24
0
gaspi_return_t
gaspi_sn_command(const enum gaspi_sn_ops op, const gaspi_rank_t rank, const gaspi_timeout_t timeout_ms, void * arg)
{
  int ret = -1;
  gaspi_return_t eret = GASPI_ERROR;
  const int i = (int) rank;

  eret = gaspi_sn_connect_to_rank(rank, timeout_ms);
  if(eret != GASPI_SUCCESS)
    {
      return eret;
    }

  eret = GASPI_ERROR;
  switch(op)
    {
    case GASPI_SN_CONNECT:
      {
	ret = _gaspi_sn_connect_command(rank);
	break;
      }
    case GASPI_SN_PROC_PING:
    case GASPI_SN_PROC_KILL:
      {
	ret = _gaspi_sn_single_command(rank, op);
	break;
      }
    case GASPI_SN_SEG_REGISTER:
      {
	ret = _gaspi_sn_segment_register_command(rank, arg);
	break;
      }
    case GASPI_SN_GRP_CHECK:
      {
	ret = _gaspi_sn_group_check(rank, timeout_ms, arg);
	break;
      }
    case GASPI_SN_GRP_CONNECT:
      {
	ret = _gaspi_sn_group_connect(rank, arg);
	break;
      }
    default:
      {
	gaspi_print_error("Unknown SN op");
	eret = GASPI_ERROR;
      }
    };

  if( 0 == ret )
    eret = GASPI_SUCCESS;
  if( 1 == ret )
    eret = GASPI_TIMEOUT;

  if(gaspi_sn_close(glb_gaspi_ctx.sockfd[i]) != 0)
    {
      gaspi_print_error("Failed to close socket to %d", i);
    }

  glb_gaspi_ctx.sockfd[i] = -1;

  return eret;
}
Beispiel #25
0
void *
gaspi_sn_thread (void *arg)
{
  gaspi_sn_packet snp;
  struct sockaddr_in cliAddr;

  fd_set rfds;
  int i, ret;


  const int dsock = gaspi_setup_dg_socket ();
  if (dsock == -1)
    {
      gaspi_print_error ("Failed to setup create SN thread socket");
      return NULL;
    }

  if (__sync_fetch_and_add (&glb_gaspi_sn_init, 1) != 0)
    gaspi_print_error ("Failed SN init");

  int local_fd =
    gaspi_listen2port (GASPI_INT_PORT + glb_gaspi_ctx.localSocket,
		       GASPI_BLOCK);
  if (local_fd < 0)
    {
      gaspi_print_error ("Failed to initialize SN thread");
      return NULL;
    }

  while (1)
    {

      FD_ZERO (&rfds);
      FD_SET (dsock, &rfds);

      const int selret = select (FD_SETSIZE, &rfds, NULL, NULL, NULL);
      if (selret <= 0)
	{
	  continue;
	}

      if (FD_ISSET (dsock, &rfds))
	{
	  const int cliLen = sizeof (cliAddr);
	  const int rlen =
	    recvfrom (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL,
		      (struct sockaddr *) &cliAddr, (socklen_t *) & cliLen);
	  if ((rlen != sizeof (gaspi_sn_packet))
	      || (snp.magic != GASPI_SNP_MAGIC))
	    goto checkL;

	  char hn[128];
	  int hn_found = 0;
	  getnameinfo ((struct sockaddr *) &cliAddr, cliLen, hn, 128, NULL, 0,
		       NI_NOFQDN);
	  const char *fhn = strtok (hn, ".");

	  for (i = 0; i < glb_gaspi_ctx.tnc; i++)
	    {
	      if (strncmp ((glb_gaspi_ctx.hn + i * 64), fhn, 64) == 0)
		{
		  hn_found = 1;
		  break;
		}
	      if (strncmp ("localhost", fhn, 64) == 0)
		{
		  hn_found = 1;
		  break;
		}
	    }

	  if (!hn_found)
	    {
	      snp.ret = -1;
	      int ret =
		sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL,
			(struct sockaddr *) &cliAddr, sizeof (cliAddr));
	      if (ret != sizeof (gaspi_sn_packet))
		{
		  gaspi_print_error ("Hostname not part of machinefile");
		}
	      goto checkL;
	    }


	  if (snp.magic == GASPI_SNP_MAGIC)
	    {

	      switch (snp.cmd)
		{

		case 1:
		  snp.ret = 0;
		  ret =
		    sendto (dsock, &snp, sizeof (gaspi_sn_packet),
			    MSG_WAITALL, (struct sockaddr *) &cliAddr,
			    sizeof (cliAddr));
		  if (ret != sizeof (gaspi_sn_packet))
		    {
		      gaspi_print_error ("SN thread failed to send cmd 1");
		    }
		  return NULL;
		  break;
		case 2:
		  snp.ret = 0;
		  ret =
		    sendto (dsock, &snp, sizeof (gaspi_sn_packet),
			    MSG_WAITALL, (struct sockaddr *) &cliAddr,
			    sizeof (cliAddr));
		  if (ret != sizeof (gaspi_sn_packet))
		    {
		      gaspi_print_error ("SN thread failed to send cmd 2");
		    }
		  break;
		case 3:
		  snp.ret = 0;
		  ret =
		    sendto (dsock, &snp, sizeof (gaspi_sn_packet),
			    MSG_WAITALL, (struct sockaddr *) &cliAddr,
			    sizeof (cliAddr));
		  if (ret != sizeof (gaspi_sn_packet))
		    {
		      gaspi_print_error ("SN thread failed to send cmd 3");
		    }
		  exit (-1);
		  break;
		case 4:
		  snp.ret = gaspi_seg_reg_sn (snp);
		  ret =
		    sendto (dsock, &snp, sizeof (gaspi_sn_packet),
			    MSG_WAITALL, (struct sockaddr *) &cliAddr,
			    sizeof (cliAddr));
		  if (ret != sizeof (gaspi_sn_packet))
		    {
		      gaspi_print_error ("SN thread failed to send cmd 4");
		    }
		  break;
		default:
		  break;
		};		//switch
	    }			//if
	}			//if(dsock...

    checkL:
      continue;

    }				//while(1)

  return NULL;
}
Beispiel #26
0
gaspi_return_t
gaspi_gpu_init(void)
{
  gaspi_context_t * const gctx = &glb_gaspi_ctx;
  int deviceCount;
  cudaError_t cuda_error_id = cudaGetDeviceCount(&deviceCount);
  if( cuda_error_id != cudaSuccess )
    {
      gaspi_print_error("Failed cudaGetDeviceCount." );
      return GASPI_ERR_DEVICE;
    }

  if( deviceCount <= 0 )
    {
      gaspi_print_error("No CUDA capable devices found.");
      return GASPI_ERR_DEVICE;
    }

  const int ib_numa_node = _gaspi_find_dev_numa_node();

  int device_id = 0;
  int gaspi_devices = 0;
  int direct_devices[GPI2_GPU_MAX_DIRECT_DEVS];
  struct cudaDeviceProp deviceProp;
  for(device_id = 0; device_id < deviceCount; device_id++)
    {
      //TODO: possibly add functionality to show properties structure
      cuda_error_id = cudaGetDeviceProperties(&deviceProp, device_id);
      if( cuda_error_id != cudaSuccess)
	{
	  return GASPI_ERR_DEVICE;
	}

      if( deviceProp.major >= 3 ) /* TODO: magic number */
	{
	  cuda_error_id = cudaSetDevice(device_id);
	  if( cuda_error_id != cudaSuccess )
	    {
	      return GASPI_ERR_DEVICE;
	    }

	  if( ib_numa_node == _gaspi_find_GPU_numa_node(device_id) )
	    {
	      if( gaspi_devices < GPI2_GPU_MAX_DIRECT_DEVS - 1 )
		{
		  direct_devices[gaspi_devices] = device_id;
		  gaspi_devices++;
		}
	    }
	}
    }

  if( 0 == gaspi_devices )
    {
      gaspi_print_error("No GPU Direct RDMA capable devices on the correct NUMA-socket were found.");
      return GASPI_ERROR;
    }

  gpus = (gaspi_gpu_t*) malloc(sizeof(gaspi_gpu_t) * gaspi_devices);
  if( gpus == NULL )
    {
      gaspi_print_error("Failed to allocate memory.");
      return GASPI_ERR_MEMALLOC;
    }

  int i, j, k;
  for(k = 0 ; k < gaspi_devices; k++)
    {
      cuda_error_id = cudaSetDevice(direct_devices[k]);
      if( cuda_error_id != cudaSuccess )
	{
	  return GASPI_ERR_DEVICE;
	}

      for(i = 0; i < GASPI_MAX_QP; i++)
	{
	  cuda_error_id = cudaStreamCreate(&gpus[k].streams[i]);
	  if( cuda_error_id != cudaSuccess )
	    {
	      return GASPI_ERR_DEVICE;
	    }

	  for(j = 0; j < GASPI_CUDA_EVENTS; j++)
	    {
	      cuda_error_id = cudaEventCreateWithFlags(&gpus[k].events[i][j].event, cudaEventDisableTiming);
	      if( cuda_error_id != cudaSuccess )
		{
		  return GASPI_ERR_DEVICE;
		}
	    }

	  cuda_error_id = cudaStreamCreateWithFlags(&gpus[k].streams[i], cudaStreamNonBlocking);
	  if( cuda_error_id != cudaSuccess )
	    {
	      return GASPI_ERR_DEVICE;
	    }

	}

      gpus[k].device_id = direct_devices[k];
    }

  gctx->gpu_count = gaspi_devices;
  gctx->use_gpus = 1;

  return GASPI_SUCCESS;
}
Beispiel #27
0
static int
gaspi_sn_send_topology(gaspi_context *ctx, const int i, const gaspi_timeout_t timeout_ms)
{
  if( (ctx->sockfd[i] =
       gaspi_sn_connect2port(gaspi_get_hn(i),
			     (glb_gaspi_cfg.sn_port + 64 + ctx->poff[i]),
			     timeout_ms)) < 0)
    {
      gaspi_print_error("Failed to connect to %d", i);
      return -1;
    }

  if( 0 != gaspi_sn_set_default_opts(ctx->sockfd[i]) )
    {
      gaspi_print_error("Failed to opts");
      close(ctx->sockfd[i]);
      return -1;
    }

  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  cdh.op_len = ctx->tnc * 65; //TODO: 65 is magic
  cdh.op = GASPI_SN_TOPOLOGY;
  cdh.rank = i;
  cdh.tnc = ctx->tnc;

  int retval = 0;
  size_t len = sizeof(gaspi_cd_header);
  void * ptr = &cdh;
  int sockfd = ctx->sockfd[i];

  if (sockfd <= 0 )
    {
      gaspi_print_error("Wrong fd %d %d", i, ctx->sockfd[i] );
      retval = -1;
      goto endL;
    }

  if ( gaspi_sn_writen( sockfd, ptr, len)  != len )
    {
      gaspi_print_error("Failed to send topology header to %d.", i);
      retval = -1;
      goto endL;
    }

  /* the de facto topology */
  ptr = ctx->hn_poff;
  len = ctx->tnc * 65;

  if ( gaspi_sn_writen( sockfd, ptr, len)  != len )
    {
      gaspi_print_error("Failed to send topology command to %d.", i);
      retval = -1;
      goto endL;
    }

 endL:
  ctx->sockfd[i] = -1;
  if(gaspi_sn_close( sockfd ) != 0)
    retval = -1;

  return retval;
}
Beispiel #28
0
gaspi_return_t
pgaspi_dev_atomic_compare_swap (const gaspi_segment_id_t segment_id,
				const gaspi_offset_t offset,
				const gaspi_rank_t rank,
				const gaspi_atomic_value_t comparator,
				const gaspi_atomic_value_t val_new)
{ 
  struct ibv_send_wr *bad_wr;
  struct ibv_sge slist;
  struct ibv_send_wr swr;
  int i;

  slist.addr = (uintptr_t) (glb_gaspi_ctx.nsrc.buf + NOTIFY_OFFSET);
  slist.length = sizeof(gaspi_atomic_value_t);
  slist.lkey = ((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey;
  
  swr.wr.atomic.remote_addr =
    glb_gaspi_ctx.rrmd[segment_id][rank].addr + NOTIFY_OFFSET + offset;

  swr.wr.atomic.rkey = glb_gaspi_ctx.rrmd[segment_id][rank].rkey;
  swr.wr.atomic.compare_add = comparator;
  swr.wr.atomic.swap = val_new;

  swr.wr_id = rank;
  swr.sg_list = &slist;
  swr.num_sge = 1;
  swr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP;
  swr.send_flags = IBV_SEND_SIGNALED;
  swr.next = NULL;

  if (ibv_post_send (glb_gaspi_ctx_ib.qpGroups[rank], &swr, &bad_wr))
    {
      glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][rank] = GASPI_STATE_CORRUPT;

      return GASPI_ERROR;
    }

  glb_gaspi_ctx.ne_count_grp++;

  int ne = 0;
  for (i = 0; i < glb_gaspi_ctx.ne_count_grp; i++)
    {
      do
	{
	  ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqGroups, 1,
			    glb_gaspi_ctx_ib.wc_grp_send);
	}
      while (ne == 0);

      if ((ne < 0) || (glb_gaspi_ctx_ib.wc_grp_send[i].status != IBV_WC_SUCCESS))
	{
	  glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][glb_gaspi_ctx_ib.wc_grp_send[i].wr_id] = GASPI_STATE_CORRUPT;

 	  gaspi_print_error("Failed request to %lu : %s",
			    glb_gaspi_ctx_ib.wc_grp_send[i].wr_id, 
			    ibv_wc_status_str(glb_gaspi_ctx_ib.wc_grp_send[i].status));

	  return GASPI_ERROR;
	}
    }

  glb_gaspi_ctx.ne_count_grp = 0;
  
  return GASPI_SUCCESS;
}
Beispiel #29
0
void *gaspi_sn_backend(void *arg)
{
  int esock, lsock, n, i;
  struct epoll_event ev;
  struct epoll_event *ret_ev;
  gaspi_mgmt_header *ev_mgmt, *mgmt;

  signal(SIGSTKFLT, gaspi_sn_cleanup);
  signal(SIGPIPE, SIG_IGN);

  while(gaspi_master_topo_data == 0)
    gaspi_delay();

  lsock = socket(AF_INET, SOCK_STREAM, 0);
  if(lsock < 0)
    {
      gaspi_print_error("Failed to create socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      return NULL;
    }

  if( 0 != gaspi_sn_set_default_opts(lsock) )
    {
      gaspi_print_error("Failed to modify socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  signal(SIGPIPE, SIG_IGN);

  struct sockaddr_in listeningAddress;
  listeningAddress.sin_family = AF_INET;
  listeningAddress.sin_port = htons((glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket));
  listeningAddress.sin_addr.s_addr = htonl(INADDR_ANY);

  if(bind(lsock, (struct sockaddr*)(&listeningAddress), sizeof(listeningAddress)) < 0)
    {
      gaspi_print_error("Failed to bind socket (port %d)",
			glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket);

      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERR_SN_PORT;
      close(lsock);
      return NULL;
    }

  if ( 0 != gaspi_sn_set_non_blocking(lsock) )
    {
      gaspi_print_error("Failed to set socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  if(listen(lsock, SOMAXCONN) < 0)
    {
      gaspi_print_error("Failed to listen on socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  esock = epoll_create(GASPI_EPOLL_CREATE);
  if(esock < 0)
    {
      gaspi_print_error("Failed to create IO event facility");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  /* add lsock to epoll instance */
  ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) );
  if(ev.data.ptr == NULL)
    {
      gaspi_print_error("Failed to allocate memory");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  ev_mgmt = ev.data.ptr;
  ev_mgmt->fd = lsock;
  ev.events = EPOLLIN;

  if(epoll_ctl(esock, EPOLL_CTL_ADD, lsock, &ev) < 0)
    {
      gaspi_print_error("Failed to modify IO event facility");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  ret_ev = calloc(GASPI_EPOLL_MAX_EVENTS, sizeof(ev));
  if(ret_ev == NULL)
    {
      gaspi_print_error("Failed to allocate memory");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  /* main events loop */
  while(1)
    {
      n = epoll_wait(esock,ret_ev, GASPI_EPOLL_MAX_EVENTS, -1);

      /* loop over all triggered events */
      for( i = 0; i < n; i++ )
	{
	  mgmt = ret_ev[i].data.ptr;

	  if( (ret_ev[i].events & EPOLLERR)  || (ret_ev[i].events & EPOLLHUP)  ||
	      !((ret_ev[i].events & EPOLLIN) || (ret_ev[i].events & EPOLLOUT )) )
	    {
	      /* an error has occured on this fd. close it => removed from event list. */
	      gaspi_print_error( "Erroneous event." );
	      shutdown(mgmt->fd, SHUT_RDWR);
	      close(mgmt->fd);
	      free(mgmt);
	      continue;
	    }
	  else if(mgmt->fd == lsock)
	    {
	      /* process all new connections */
	      struct sockaddr in_addr;
	      socklen_t in_len = sizeof(in_addr);
	      int nsock = accept( lsock, &in_addr, &in_len );

	      if(nsock < 0)
		{
		  if( (errno == EAGAIN) || (errno == EWOULDBLOCK) )
		    {
		      /* we have processed incoming connection */
		      break;
		    }
		  else
		    {
		      /* at least check/fix open files limit */
		      int errsv = errno;
		      if(errsv == EMFILE)
			{
			  if( 0 == _gaspi_check_ofile_limit() )
			    {
			      nsock = accept( lsock, &in_addr, &in_len );
			    }
			}

		      /* still erroneous? => makes no sense to continue */
		      if(nsock < 0)
			{
			  gaspi_print_error( "Failed to accept connection." );
			  gaspi_sn_status = GASPI_SN_STATE_ERROR;
			  gaspi_sn_err = GASPI_ERROR;
			  close(lsock);
			  return NULL;
			}
		    }
		}

	      /* new socket */
	      if( 0 != gaspi_sn_set_non_blocking( nsock ) )
		{
		  gaspi_print_error( "Failed to set socket options." );
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      /* add nsock */
	      ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) );
	      if(ev.data.ptr == NULL)
		{
		  gaspi_print_error("Failed to allocate memory.");
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      ev_mgmt = ev.data.ptr;
	      ev_mgmt->fd = nsock;
	      ev_mgmt->blen = sizeof(gaspi_cd_header);
	      ev_mgmt->bdone = 0;
	      ev_mgmt->op = GASPI_SN_HEADER;
	      ev.events = EPOLLIN ; /* read only */

	      if(epoll_ctl( esock, EPOLL_CTL_ADD, nsock, &ev ) < 0)
		{
		  gaspi_print_error("Failed to modify IO event facility");
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      continue;
	    }/* if new connection(s) */
	  else
	    {
	      /* read or write ops */
	      int io_err = 0;

	      if( ret_ev[i].events & EPOLLIN )
		{
		  while( 1 )
		    {
		      int rcount = 0;
		      int rsize = mgmt->blen - mgmt->bdone;
		      char *ptr = NULL;

		      if( mgmt->op == GASPI_SN_HEADER )
			{
			  /* TODO: is it valid? */
			  ptr = (char *) &mgmt->cdh;
			  rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize );
			}
		      else if( mgmt->op == GASPI_SN_CONNECT )
			{
			  while( !glb_gaspi_dev_init )
			    gaspi_delay();

			  ptr = pgaspi_dev_get_rrcd(mgmt->cdh.rank);
			  rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize );
			}

		      /* errno==EAGAIN => we have read all data */
		      int errsv = errno;
		      if(rcount < 0)
			{
			  if (errsv == ECONNRESET || errsv == ENOTCONN)
			    {
			      gaspi_print_error(" Failed to read (op %d)", mgmt->op);
			    }

			  if(errsv != EAGAIN || errsv != EWOULDBLOCK)
			    {
			      gaspi_print_error(" Failed to read (op %d).", mgmt->op);
			      io_err = 1;
			    }
			  break;
			}
		      else if(rcount == 0) /* the remote side has closed the connection */
			{
			  io_err = 1;
			  break;
			}
		      else
			{
			  mgmt->bdone += rcount;

			  /* read all data? */
			  if(mgmt->bdone == mgmt->blen)
			    {
			      /* we got header, what do we have to do ? */
			      if(mgmt->op == GASPI_SN_HEADER)
				{
				  if(mgmt->cdh.op == GASPI_SN_PROC_KILL)
				    {
				      _exit(-1);
				    }
				  else if(mgmt->cdh.op == GASPI_SN_CONNECT)
				    {
				      GASPI_SN_RESET_EVENT( mgmt, mgmt->cdh.op_len, mgmt->cdh.op );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_PROC_PING)
				    {
				      GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_GRP_CHECK)
				    {
				      struct{gaspi_group_t group;int tnc, cs, ret;} gb;
				      memset(&gb, 0, sizeof(gb));

				      gb.ret = -1;
				      gb.cs = 0;

				      const int group = mgmt->cdh.rank;
				      const int tnc = mgmt->cdh.tnc;

				      lock_gaspi_tout (&glb_gaspi_group_ctx[group].del, GASPI_BLOCK);
				      if(glb_gaspi_group_ctx[group].id >= 0)
					{
					  if(glb_gaspi_group_ctx[group].tnc == tnc)
					    {
					      int i;
					      gb.ret = 0;
					      gb.tnc = tnc;

					      for(i = 0; i < tnc; i++)
						{
						  if( NULL != glb_gaspi_group_ctx[group].rank_grp )
						    gb.cs ^= glb_gaspi_group_ctx[group].rank_grp[i];
						}
					    }
					}
				      unlock_gaspi (&glb_gaspi_group_ctx[group].del);

				      if(gaspi_sn_writen( mgmt->fd, &gb, sizeof(gb) ) < sizeof(gb) )
					{
					  gaspi_print_error("Failed response to group check.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_GRP_CONNECT)
				    {
				      while( !glb_gaspi_dev_init ||
					     ( glb_gaspi_group_ctx[mgmt->cdh.ret].id == -1) )
					gaspi_delay();

				      /* TODO: check the pointer */
				      if(gaspi_sn_writen( mgmt->fd,
							  &glb_gaspi_group_ctx[mgmt->cdh.ret].rrcd[glb_gaspi_ctx.rank],
							  sizeof(gaspi_rc_mseg) ) < sizeof(gaspi_rc_mseg) )
					{
					  gaspi_print_error("Failed to connect group.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_SEG_REGISTER)
				    {
				      int rret = gaspi_sn_segment_register(mgmt->cdh);

				      /* write back result of registration */
				      if(gaspi_sn_writen( mgmt->fd, &rret, sizeof(int) ) < sizeof(int) )
					{
					  gaspi_print_error("Failed response to segment register.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				}/* !header */
			      else if(mgmt->op == GASPI_SN_CONNECT)
				{
				  /* TODO: to remove */
				  while( !glb_gaspi_dev_init )
				    gaspi_delay();

				  const size_t len = pgaspi_dev_get_sizeof_rc();
				  char *ptr = NULL;

				  gaspi_return_t eret = pgaspi_create_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK);
				  if( eret == GASPI_SUCCESS )
				    {
				      eret = pgaspi_connect_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK);
				      if( eret == GASPI_SUCCESS)
					{
					  ptr = pgaspi_dev_get_lrcd(mgmt->cdh.rank);
					}
				    }

				  if( eret != GASPI_SUCCESS )
				    {
				      /* We set io_err, connection is closed and remote peer reads EOF */
				      io_err = 1;
				    }
				  else
				    {
				      if( NULL != ptr )
					{
					  if( gaspi_sn_writen( mgmt->fd, ptr, len ) < sizeof(len) )
					    {
					      gaspi_print_error("Failed response to connection request from %u.", mgmt->cdh.rank);
					      io_err = 1;
					    }
					}
				    }

				  GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				}
			      else
				{
				  gaspi_print_error("Received unknown SN operation");
				  GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				}

			      break;
			    } /* if all data */
			}/* else */
		    }/* while(1) read */
		}/* read in */

	      if( io_err )
		{
		  shutdown(mgmt->fd, SHUT_RDWR);
		  close(mgmt->fd);
		  free(mgmt);
		}
	    }
	} /* for each event */
    }/* event loop while(1) */

  return NULL;
}
Beispiel #30
0
static inline int
_gaspi_sn_group_check(const gaspi_rank_t rank, gaspi_timeout_t timeout_ms, void *arg)
{
  struct group_desc *gb = (struct group_desc *) arg;
  struct group_desc rem_gb;

  int i = (int) rank;

  struct timeb t0, t1;
  ftime(&t0);

  gaspi_cd_header cdh;
  memset(&cdh, 0, sizeof(gaspi_cd_header));

  cdh.op_len = sizeof (*gb);
  cdh.op = GASPI_SN_GRP_CHECK;
  cdh.rank = gb->group;
  cdh.tnc = gb->tnc;
  cdh.ret = gb->cs;

  do
    {
      memset(&rem_gb, 0, sizeof(rem_gb));

      ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header));
      if(ret != sizeof(gaspi_cd_header) )
	{
	  gaspi_print_error("Failed to write (%d %p %lu)",
			    glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header));
	  return -1;
	}

      ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], &rem_gb, sizeof(rem_gb));
      if( rret != sizeof(rem_gb) )
	{
	  gaspi_print_error("Failed to read (%d %p %lu)",
			    glb_gaspi_ctx.sockfd[i],&rem_gb,sizeof(rem_gb));
	  return -1;
	}

      if((rem_gb.ret < 0) || (gb->cs != rem_gb.cs))
	{
	  ftime(&t1);
	  const unsigned int delta_ms = (t1.time - t0.time) * 1000 + (t1.millitm - t0.millitm);
	  if(delta_ms > timeout_ms)
	    {
	      return 1;
	    }

	  if(gaspi_thread_sleep(250) < 0)
	    {
	      gaspi_printf("gaspi_thread_sleep Error %d: (%s)\n",ret, (char*)strerror(errno));
	    }

	  //check if groups match
	  /* if(gb.cs != rem_gb.cs) */
	  /* { */
	  /* gaspi_print_error("Mismatch with rank %d: ranks in group dont match\n", */
	  /* group_to_commit->rank_grp[i]); */
	  /* eret = GASPI_ERROR; */
	  /* goto errL; */
	  /* } */
	  //usleep(250000);
	  //gaspi_delay();
	}
      else
	{
	  break;
	}
    }while(1);

  return 0;
}