Example #1
0
int
gaspi_sn_connect2port(const char *hn, const unsigned short port, const unsigned long timeout_ms)
{
  int sockfd = -1;
  struct timeb t0, t1;

  ftime(&t0);

  while( -1 == sockfd )
    {
      sockfd = gaspi_sn_connect2port_intern(hn, port);

      ftime(&t1);
      const unsigned int delta_ms = (t1.time - t0.time) * 1000 + (t1.millitm - t0.millitm);

      if(delta_ms > timeout_ms)
	{
	  if( -1 != sockfd )
	    {
	      shutdown( sockfd, SHUT_RDWR );
	      close(sockfd);
	    }
	  return -1;
	}
      gaspi_delay();
    }

  signal(SIGPIPE, SIG_IGN);

  return sockfd;
}
Example #2
0
gaspi_return_t
pgaspi_notify_waitsome (const gaspi_segment_id_t segment_id_local,
			const gaspi_notification_id_t notification_begin,
			const gaspi_number_t num,
			gaspi_notification_id_t * const first_id,
			const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_notify_waitsome");
  gaspi_verify_segment(segment_id_local);
  gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_local]);
  gaspi_verify_null_ptr(first_id);

#ifdef DEBUG
  if( num >= GASPI_MAX_NOTIFICATION)
    return GASPI_ERR_INV_NUM;
#endif

  volatile unsigned char *segPtr;
  int loop = 1;
  gaspi_notification_id_t n;

  if(num == 0)
    return GASPI_SUCCESS;

#ifdef GPI2_CUDA
  if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId >=0 )
    {
      segPtr =  (volatile unsigned char*)glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_addr;
    }
  else
#endif

    segPtr = (volatile unsigned char *) glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr;

  volatile unsigned int *p = (volatile unsigned int *) segPtr;

  if (timeout_ms == GASPI_BLOCK)
    {
      while (loop)
	{
	  for (n = notification_begin; n < (notification_begin + num); n++)
	    {
	      if (p[n])
		{
		  *first_id = n;
		  return GASPI_SUCCESS;
		}
	    }

	  gaspi_delay ();
	}
    }
  else if (timeout_ms == GASPI_TEST)
    {

      for (n = notification_begin; n < (notification_begin + num); n++)
	{
	  if (p[n])
	    {
	      *first_id = n;
	      return GASPI_SUCCESS;
	    }
	}

      return GASPI_TIMEOUT;
    }

  const gaspi_cycles_t s0 = gaspi_get_cycles ();

  while (loop)
    {
      for (n = notification_begin; n < (notification_begin + num); n++)
	{
	  if (p[n])
	    {
	      *first_id = n;
	      loop = 0;
	      break;
	    }
	}

      const gaspi_cycles_t s1 = gaspi_get_cycles ();
      const gaspi_cycles_t tdelta = s1 - s0;

      const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs;
      if (ms > timeout_ms)
	{
	  return GASPI_TIMEOUT;
	}

      gaspi_delay ();
    }

  return GASPI_SUCCESS;
}
gaspi_return_t
gaspi_threads_sync_all(const gaspi_group_t g, const gaspi_timeout_t timeout_ms)
{
    int i;
    gaspi_return_t ret;
    const int ID = __gaspi_thread_tid;
    const int MAX = __gaspi_thread_tnc;

    if(__gaspiThreadsMode == 0)
    {

        if(ID == 0)
        {
            //memset((void*)__gaspiThreadsCount1,0,MAX);
            for(i = 0; i < MAX; i++)
                __gaspiThreadsCount1[i]=0;

            for(i = 1; i < MAX; i++)
            {
                while(__gaspiThreadsCount0[i] == 0)
                    gaspi_delay();
            }

            ret = gaspi_barrier(g, timeout_ms);
            if( ret != GASPI_SUCCESS)
            {
                return ret;
            }

            __gaspiThreadsMode  = 1;
            __gaspiThreadsFlag1 = 0;
            __gaspiThreadsFlag0 = 1;
        }
        else
        {
            __gaspiThreadsCount0[ID] = 1;
            while(__gaspiThreadsFlag0 == 0)
                gaspi_delay();
        }
    }
    else
    {

        if(ID == 0)
        {
            //memset((void*)__gaspiThreadsCount0,0,MAX);
            for(i = 0; i < MAX; i++)
                __gaspiThreadsCount0[i]=0;

            for(i = 1; i < MAX; i++)
            {
                while(__gaspiThreadsCount1[i] == 0)
                    gaspi_delay();
            }

            __gaspiThreadsMode  = 0;
            __gaspiThreadsFlag0 = 0;
            __gaspiThreadsFlag1 = 1;
        }
        else
        {
            __gaspiThreadsCount1[ID] = 1;
            while(__gaspiThreadsFlag1 == 0)
                gaspi_delay();
        }
    }

    return GASPI_SUCCESS;
}
//dont spread over numa sockets -> can get slow...
//TODO: timeout?
void
gaspi_threads_sync(void)
{
    int i;

    const int ID = __gaspi_thread_tid;
    const int MAX = __gaspi_thread_tnc;

    if(__gaspiThreadsMode == 0)
    {

        if(ID == 0)
        {
            //memset((void*)__gaspiThreadsCount1,0,MAX);
            for(i = 0; i < MAX; i++)
                __gaspiThreadsCount1[i] = 0;

            for(i = 1; i < MAX; i++)
            {
                while(__gaspiThreadsCount0[i] == 0)
                    gaspi_delay();
            }

            __gaspiThreadsMode  = 1;
            __gaspiThreadsFlag1 = 0;
            __gaspiThreadsFlag0 = 1;
        }
        else
        {
            __gaspiThreadsCount0[ID] = 1;
            while(__gaspiThreadsFlag0 == 0)
                gaspi_delay();
        }
    }
    else
    {
        if(ID == 0)
        {
            //memset((void*)__gaspiThreadsCount0,0,MAX);
            for(i = 0; i < MAX; i++)
            {
                __gaspiThreadsCount0[i] = 0;
            }

            for(i = 1; i < MAX; i++)
            {
                while(__gaspiThreadsCount1[i] == 0)
                    gaspi_delay();
            }

            __gaspiThreadsMode  = 0;
            __gaspiThreadsFlag0 = 0;
            __gaspiThreadsFlag1 = 1;
        }
        else
        {
            __gaspiThreadsCount1[ID] = 1;
            while(__gaspiThreadsFlag1 == 0)
                gaspi_delay();
        }
    }
}
Example #5
0
void *gaspi_sn_backend(void *arg)
{
  int esock, lsock, n, i;
  struct epoll_event ev;
  struct epoll_event *ret_ev;
  gaspi_mgmt_header *ev_mgmt, *mgmt;

  signal(SIGSTKFLT, gaspi_sn_cleanup);
  signal(SIGPIPE, SIG_IGN);

  while(gaspi_master_topo_data == 0)
    gaspi_delay();

  lsock = socket(AF_INET, SOCK_STREAM, 0);
  if(lsock < 0)
    {
      gaspi_print_error("Failed to create socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      return NULL;
    }

  if( 0 != gaspi_sn_set_default_opts(lsock) )
    {
      gaspi_print_error("Failed to modify socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  signal(SIGPIPE, SIG_IGN);

  struct sockaddr_in listeningAddress;
  listeningAddress.sin_family = AF_INET;
  listeningAddress.sin_port = htons((glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket));
  listeningAddress.sin_addr.s_addr = htonl(INADDR_ANY);

  if(bind(lsock, (struct sockaddr*)(&listeningAddress), sizeof(listeningAddress)) < 0)
    {
      gaspi_print_error("Failed to bind socket (port %d)",
			glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket);

      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERR_SN_PORT;
      close(lsock);
      return NULL;
    }

  if ( 0 != gaspi_sn_set_non_blocking(lsock) )
    {
      gaspi_print_error("Failed to set socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  if(listen(lsock, SOMAXCONN) < 0)
    {
      gaspi_print_error("Failed to listen on socket");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  esock = epoll_create(GASPI_EPOLL_CREATE);
  if(esock < 0)
    {
      gaspi_print_error("Failed to create IO event facility");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  /* add lsock to epoll instance */
  ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) );
  if(ev.data.ptr == NULL)
    {
      gaspi_print_error("Failed to allocate memory");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  ev_mgmt = ev.data.ptr;
  ev_mgmt->fd = lsock;
  ev.events = EPOLLIN;

  if(epoll_ctl(esock, EPOLL_CTL_ADD, lsock, &ev) < 0)
    {
      gaspi_print_error("Failed to modify IO event facility");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  ret_ev = calloc(GASPI_EPOLL_MAX_EVENTS, sizeof(ev));
  if(ret_ev == NULL)
    {
      gaspi_print_error("Failed to allocate memory");
      gaspi_sn_status = GASPI_SN_STATE_ERROR;
      gaspi_sn_err = GASPI_ERROR;
      close(lsock);
      return NULL;
    }

  /* main events loop */
  while(1)
    {
      n = epoll_wait(esock,ret_ev, GASPI_EPOLL_MAX_EVENTS, -1);

      /* loop over all triggered events */
      for( i = 0; i < n; i++ )
	{
	  mgmt = ret_ev[i].data.ptr;

	  if( (ret_ev[i].events & EPOLLERR)  || (ret_ev[i].events & EPOLLHUP)  ||
	      !((ret_ev[i].events & EPOLLIN) || (ret_ev[i].events & EPOLLOUT )) )
	    {
	      /* an error has occured on this fd. close it => removed from event list. */
	      gaspi_print_error( "Erroneous event." );
	      shutdown(mgmt->fd, SHUT_RDWR);
	      close(mgmt->fd);
	      free(mgmt);
	      continue;
	    }
	  else if(mgmt->fd == lsock)
	    {
	      /* process all new connections */
	      struct sockaddr in_addr;
	      socklen_t in_len = sizeof(in_addr);
	      int nsock = accept( lsock, &in_addr, &in_len );

	      if(nsock < 0)
		{
		  if( (errno == EAGAIN) || (errno == EWOULDBLOCK) )
		    {
		      /* we have processed incoming connection */
		      break;
		    }
		  else
		    {
		      /* at least check/fix open files limit */
		      int errsv = errno;
		      if(errsv == EMFILE)
			{
			  if( 0 == _gaspi_check_ofile_limit() )
			    {
			      nsock = accept( lsock, &in_addr, &in_len );
			    }
			}

		      /* still erroneous? => makes no sense to continue */
		      if(nsock < 0)
			{
			  gaspi_print_error( "Failed to accept connection." );
			  gaspi_sn_status = GASPI_SN_STATE_ERROR;
			  gaspi_sn_err = GASPI_ERROR;
			  close(lsock);
			  return NULL;
			}
		    }
		}

	      /* new socket */
	      if( 0 != gaspi_sn_set_non_blocking( nsock ) )
		{
		  gaspi_print_error( "Failed to set socket options." );
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      /* add nsock */
	      ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) );
	      if(ev.data.ptr == NULL)
		{
		  gaspi_print_error("Failed to allocate memory.");
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      ev_mgmt = ev.data.ptr;
	      ev_mgmt->fd = nsock;
	      ev_mgmt->blen = sizeof(gaspi_cd_header);
	      ev_mgmt->bdone = 0;
	      ev_mgmt->op = GASPI_SN_HEADER;
	      ev.events = EPOLLIN ; /* read only */

	      if(epoll_ctl( esock, EPOLL_CTL_ADD, nsock, &ev ) < 0)
		{
		  gaspi_print_error("Failed to modify IO event facility");
		  gaspi_sn_status = GASPI_SN_STATE_ERROR;
		  gaspi_sn_err = GASPI_ERROR;
		  close(nsock);
		  return NULL;
		}

	      continue;
	    }/* if new connection(s) */
	  else
	    {
	      /* read or write ops */
	      int io_err = 0;

	      if( ret_ev[i].events & EPOLLIN )
		{
		  while( 1 )
		    {
		      int rcount = 0;
		      int rsize = mgmt->blen - mgmt->bdone;
		      char *ptr = NULL;

		      if( mgmt->op == GASPI_SN_HEADER )
			{
			  /* TODO: is it valid? */
			  ptr = (char *) &mgmt->cdh;
			  rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize );
			}
		      else if( mgmt->op == GASPI_SN_CONNECT )
			{
			  while( !glb_gaspi_dev_init )
			    gaspi_delay();

			  ptr = pgaspi_dev_get_rrcd(mgmt->cdh.rank);
			  rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize );
			}

		      /* errno==EAGAIN => we have read all data */
		      int errsv = errno;
		      if(rcount < 0)
			{
			  if (errsv == ECONNRESET || errsv == ENOTCONN)
			    {
			      gaspi_print_error(" Failed to read (op %d)", mgmt->op);
			    }

			  if(errsv != EAGAIN || errsv != EWOULDBLOCK)
			    {
			      gaspi_print_error(" Failed to read (op %d).", mgmt->op);
			      io_err = 1;
			    }
			  break;
			}
		      else if(rcount == 0) /* the remote side has closed the connection */
			{
			  io_err = 1;
			  break;
			}
		      else
			{
			  mgmt->bdone += rcount;

			  /* read all data? */
			  if(mgmt->bdone == mgmt->blen)
			    {
			      /* we got header, what do we have to do ? */
			      if(mgmt->op == GASPI_SN_HEADER)
				{
				  if(mgmt->cdh.op == GASPI_SN_PROC_KILL)
				    {
				      _exit(-1);
				    }
				  else if(mgmt->cdh.op == GASPI_SN_CONNECT)
				    {
				      GASPI_SN_RESET_EVENT( mgmt, mgmt->cdh.op_len, mgmt->cdh.op );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_PROC_PING)
				    {
				      GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_GRP_CHECK)
				    {
				      struct{gaspi_group_t group;int tnc, cs, ret;} gb;
				      memset(&gb, 0, sizeof(gb));

				      gb.ret = -1;
				      gb.cs = 0;

				      const int group = mgmt->cdh.rank;
				      const int tnc = mgmt->cdh.tnc;

				      lock_gaspi_tout (&glb_gaspi_group_ctx[group].del, GASPI_BLOCK);
				      if(glb_gaspi_group_ctx[group].id >= 0)
					{
					  if(glb_gaspi_group_ctx[group].tnc == tnc)
					    {
					      int i;
					      gb.ret = 0;
					      gb.tnc = tnc;

					      for(i = 0; i < tnc; i++)
						{
						  if( NULL != glb_gaspi_group_ctx[group].rank_grp )
						    gb.cs ^= glb_gaspi_group_ctx[group].rank_grp[i];
						}
					    }
					}
				      unlock_gaspi (&glb_gaspi_group_ctx[group].del);

				      if(gaspi_sn_writen( mgmt->fd, &gb, sizeof(gb) ) < sizeof(gb) )
					{
					  gaspi_print_error("Failed response to group check.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_GRP_CONNECT)
				    {
				      while( !glb_gaspi_dev_init ||
					     ( glb_gaspi_group_ctx[mgmt->cdh.ret].id == -1) )
					gaspi_delay();

				      /* TODO: check the pointer */
				      if(gaspi_sn_writen( mgmt->fd,
							  &glb_gaspi_group_ctx[mgmt->cdh.ret].rrcd[glb_gaspi_ctx.rank],
							  sizeof(gaspi_rc_mseg) ) < sizeof(gaspi_rc_mseg) )
					{
					  gaspi_print_error("Failed to connect group.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				  else if(mgmt->cdh.op == GASPI_SN_SEG_REGISTER)
				    {
				      int rret = gaspi_sn_segment_register(mgmt->cdh);

				      /* write back result of registration */
				      if(gaspi_sn_writen( mgmt->fd, &rret, sizeof(int) ) < sizeof(int) )
					{
					  gaspi_print_error("Failed response to segment register.");
					  io_err = 1;
					  break;
					}

				      GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				    }
				}/* !header */
			      else if(mgmt->op == GASPI_SN_CONNECT)
				{
				  /* TODO: to remove */
				  while( !glb_gaspi_dev_init )
				    gaspi_delay();

				  const size_t len = pgaspi_dev_get_sizeof_rc();
				  char *ptr = NULL;

				  gaspi_return_t eret = pgaspi_create_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK);
				  if( eret == GASPI_SUCCESS )
				    {
				      eret = pgaspi_connect_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK);
				      if( eret == GASPI_SUCCESS)
					{
					  ptr = pgaspi_dev_get_lrcd(mgmt->cdh.rank);
					}
				    }

				  if( eret != GASPI_SUCCESS )
				    {
				      /* We set io_err, connection is closed and remote peer reads EOF */
				      io_err = 1;
				    }
				  else
				    {
				      if( NULL != ptr )
					{
					  if( gaspi_sn_writen( mgmt->fd, ptr, len ) < sizeof(len) )
					    {
					      gaspi_print_error("Failed response to connection request from %u.", mgmt->cdh.rank);
					      io_err = 1;
					    }
					}
				    }

				  GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				}
			      else
				{
				  gaspi_print_error("Received unknown SN operation");
				  GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER );
				}

			      break;
			    } /* if all data */
			}/* else */
		    }/* while(1) read */
		}/* read in */

	      if( io_err )
		{
		  shutdown(mgmt->fd, SHUT_RDWR);
		  close(mgmt->fd);
		  free(mgmt);
		}
	    }
	} /* for each event */
    }/* event loop while(1) */

  return NULL;
}