Ejemplo n.º 1
0
gaspi_return_t
pgaspi_write_list_notify (const gaspi_number_t num,
			  gaspi_segment_id_t * const segment_id_local,
			  gaspi_offset_t * const offset_local,
			  const gaspi_rank_t rank,
			  gaspi_segment_id_t * const segment_id_remote,
			  gaspi_offset_t * const offset_remote,
			  gaspi_size_t * const size,
			  const gaspi_segment_id_t segment_id_notification,
			  const gaspi_notification_id_t notification_id,
			  const gaspi_notification_t notification_value,
			  const gaspi_queue_id_t queue,
			  const gaspi_timeout_t timeout_ms)
{
  if(num == 0)
    return GASPI_ERR_INV_NUM;

  if(notification_value == 0)
    return GASPI_ERR_INV_NOTIF_VAL;

#ifdef DEBUG
  gaspi_verify_init("gaspi_write_list_notify");
  gaspi_verify_queue(queue);

  gaspi_number_t n;
  for(n = 0; n < num; n++)
    {
      gaspi_verify_local_off(offset_local[n], segment_id_local[n]);
      gaspi_verify_remote_off(offset_remote[n], segment_id_remote[n], rank);
      gaspi_verify_comm_size(size[n], segment_id_local[n], segment_id_remote[n], rank, GASPI_MAX_TSIZE_C);
    }

#endif

  gaspi_return_t eret = GASPI_ERROR;

  if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms))
    return GASPI_TIMEOUT;

  if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if ( eret != GASPI_SUCCESS)
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_write_list_notify(num,
				      segment_id_local, offset_local, rank,
				      segment_id_remote, offset_remote, (unsigned int *)size,
				      segment_id_notification, notification_id, notification_value,
				      queue);

  glb_gaspi_ctx.ne_count_c[queue] += (int) (num + 1);

 endL:
  unlock_gaspi (&glb_gaspi_ctx.lockC[queue]);
  return eret;
}
Ejemplo n.º 2
0
gaspi_return_t
pgaspi_gpu_write_notify(const gaspi_segment_id_t segment_id_local,
			const gaspi_offset_t offset_local,
			const gaspi_rank_t rank,
			const gaspi_segment_id_t segment_id_remote,
			const gaspi_offset_t offset_remote,
			const gaspi_size_t size,
			const gaspi_notification_id_t notification_id,
			const gaspi_notification_t notification_value,
			const gaspi_queue_id_t queue,
			const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_gpu_write_notify");
  gaspi_verify_local_off(offset_local, segment_id_local, size);
  gaspi_verify_remote_off(offset_remote, segment_id_remote, rank, size);
  gaspi_verify_queue(queue);
  gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C);

  if( notification_value == 0 )
    {
      gaspi_printf("Zero is not allowed as notification value.");
      return GASPI_ERR_INV_NOTIF_VAL;
    }

  gaspi_return_t eret = GASPI_ERROR;
  gaspi_context_t * const gctx = &glb_gaspi_ctx;

  if(lock_gaspi_tout (&gctx->lockC[queue], timeout_ms))
    return GASPI_TIMEOUT;

  if( GASPI_ENDPOINT_DISCONNECTED == gctx->ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if ( eret != GASPI_SUCCESS)
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_gpu_write_notify(segment_id_local, offset_local, rank,
				     segment_id_remote, offset_remote, size,
				     notification_id, notification_value,
				     queue, timeout_ms);
  if( eret != GASPI_SUCCESS )
    {
      /* gctx->qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; */
      goto endL;
    }

  /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_NUM_WRITE_NOT, 1); */
  /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_BYTES_WRITE, size); */

 endL:
  unlock_gaspi (&gctx->lockC[queue]);
  return eret;

}
Ejemplo n.º 3
0
gaspi_return_t
pgaspi_write_notify (const gaspi_segment_id_t segment_id_local,
		     const gaspi_offset_t offset_local,
		     const gaspi_rank_t rank,
		     const gaspi_segment_id_t segment_id_remote,
		     const gaspi_offset_t offset_remote,
		     const gaspi_size_t size,
		     const gaspi_notification_id_t notification_id,
		     const gaspi_notification_t notification_value,
		     const gaspi_queue_id_t queue,
		     const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_write_notify");
  gaspi_verify_local_off(offset_local, segment_id_local);
  gaspi_verify_remote_off(offset_remote, segment_id_remote, rank);
  gaspi_verify_queue(queue);
  gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C);

  if(notification_value == 0)
    return GASPI_ERR_INV_NOTIF_VAL;

  gaspi_return_t eret = GASPI_ERROR;

  if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms))
    return GASPI_TIMEOUT;

  if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if ( eret != GASPI_SUCCESS)
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_write_notify(segment_id_local, offset_local, rank,
				 segment_id_remote, offset_remote, size,
				 notification_id, notification_value,
				 queue);

  glb_gaspi_ctx.ne_count_c[queue] += 2;

 endL:
  unlock_gaspi (&glb_gaspi_ctx.lockC[queue]);
  return eret;
}
Ejemplo n.º 4
0
gaspi_return_t
pgaspi_notify (const gaspi_segment_id_t segment_id_remote,
	       const gaspi_rank_t rank,
	       const gaspi_notification_id_t notification_id,
	       const gaspi_notification_t notification_value,
	       const gaspi_queue_id_t queue,
	       const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_notify");
  gaspi_verify_segment(segment_id_remote);
  gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_remote]);
  gaspi_verify_rank(rank);
  gaspi_verify_queue(queue);

  if(notification_value == 0)
    return GASPI_ERR_INV_NOTIF_VAL;

  gaspi_return_t eret = GASPI_ERROR;

  if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms))
    return GASPI_TIMEOUT;

  if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if ( eret != GASPI_SUCCESS)
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_notify(segment_id_remote, rank,
			   notification_id, notification_value,
			   queue);

  glb_gaspi_ctx.ne_count_c[queue]++;

 endL:  
  unlock_gaspi (&glb_gaspi_ctx.lockC[queue]);
  return eret;
}
Ejemplo n.º 5
0
gaspi_return_t
pgaspi_passive_send (const gaspi_segment_id_t segment_id_local,
		     const gaspi_offset_t offset_local,
		     const gaspi_rank_t rank,
		     const gaspi_size_t size,
		     const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_passive_send");
  gaspi_verify_local_off(offset_local, segment_id_local, size);
  gaspi_verify_comm_size(size, segment_id_local,
			 segment_id_local, glb_gaspi_ctx.rank, GASPI_MAX_TSIZE_P);
  gaspi_verify_rank(rank);

  gaspi_return_t eret = GASPI_ERROR;

  if( lock_gaspi_tout (&glb_gaspi_ctx.lockPS, timeout_ms) )
    {
      return GASPI_TIMEOUT;
    }

  if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if( eret != GASPI_SUCCESS )
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_passive_send(segment_id_local, offset_local, rank,
				 size, glb_gaspi_ctx.ne_count_p, timeout_ms);

  if( eret == GASPI_ERROR )
    {
      glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][rank] = GASPI_STATE_CORRUPT;
    }

 endL:
  unlock_gaspi (&glb_gaspi_ctx.lockPS);
  return eret;
}
Ejemplo n.º 6
0
gaspi_return_t
pgaspi_read (const gaspi_segment_id_t segment_id_local,
	     const gaspi_offset_t offset_local,
	     const gaspi_rank_t rank,
	     const gaspi_segment_id_t segment_id_remote,
	     const gaspi_offset_t offset_remote,
	     const gaspi_size_t size,
	     const gaspi_queue_id_t queue,
	     const gaspi_timeout_t timeout_ms)
{
  gaspi_verify_init("gaspi_read");
  gaspi_verify_local_off(offset_local, segment_id_local);
  gaspi_verify_remote_off(offset_remote, segment_id_remote, rank);
  gaspi_verify_queue(queue);
  gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C);

  gaspi_return_t eret = GASPI_ERROR;

  if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms))
    return GASPI_TIMEOUT;

  if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat )
    {
      eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms);
      if ( eret != GASPI_SUCCESS)
	{
	  goto endL;
	}
    }

  eret = pgaspi_dev_read(segment_id_local, offset_local, rank,
			 segment_id_remote,offset_remote, (unsigned int) size,
			 queue);

  glb_gaspi_ctx.ne_count_c[queue]++;

 endL:
  unlock_gaspi (&glb_gaspi_ctx.lockC[queue]);
  return eret;
}
Ejemplo n.º 7
0
gaspi_return_t
pgaspi_proc_init (const gaspi_timeout_t timeout_ms)
{
  gaspi_return_t eret = GASPI_ERROR;
  int i;
  const int num_queues = (int) glb_gaspi_cfg.queue_num;

  if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms))
    return GASPI_TIMEOUT;

  if(glb_gaspi_sn_init == 0)
    {
      glb_gaspi_ctx.lockPS.lock = 0;
      glb_gaspi_ctx.lockPR.lock = 0;
    
      for (i = 0; i < num_queues; i++)
	glb_gaspi_ctx.lockC[i].lock = 0;

      memset (&glb_gaspi_ctx, 0, sizeof (gaspi_context));

      struct utsname mbuf;
      if (uname (&mbuf) == 0)
	{
	  snprintf (glb_gaspi_ctx.mtyp, 64, "%s", mbuf.machine);
	}

      //timing
      glb_gaspi_ctx.mhz = gaspi_get_cpufreq ();
      if (glb_gaspi_ctx.mhz == 0.0f)
	{
	  gaspi_print_error ("Failed to get CPU frequency");
	  goto errL;
	}
  
      glb_gaspi_ctx.cycles_to_msecs = 1.0f / (glb_gaspi_ctx.mhz * 1000.0f);
    
      //handle environment  
      if(gaspi_handle_env(&glb_gaspi_ctx))
	{
	  gaspi_print_error("Failed to handle environment");
	  eret = GASPI_ERR_ENV;
	  goto errL;
	}
  
      //start sn_backend
      if(pthread_create(&glb_gaspi_ctx.snt, NULL, gaspi_sn_backend, NULL) != 0)
	{
	  gaspi_print_error("Failed to create SN thread");
	  goto errL;
	}
    
      glb_gaspi_sn_init = 1;

    }//glb_gaspi_sn_init

  
  if(glb_gaspi_ctx.procType == MASTER_PROC)
    {
      if(glb_gaspi_dev_init == 0)
	{
	  if(access (glb_gaspi_ctx.mfile, R_OK) == -1)
	    {
	      gaspi_print_error ("Incorrect permissions of machinefile");
	      eret = GASPI_ERR_ENV;
	      goto errL;
	    }
	  
	  //read hostnames
	  char *line = NULL;
	  size_t len = 0;
	  int read;
	  
	  FILE *fp = fopen (glb_gaspi_ctx.mfile, "r");
	  if (fp == NULL)
	    {
	      gaspi_print_error("Failed to open machinefile");
	      eret = GASPI_ERR_ENV;
	      goto errL;
	    }

	  glb_gaspi_ctx.tnc = 0;
	  
	  while ((read = getline (&line, &len, fp)) != -1)
	    {
	      
	      //we assume a single hostname per line
	      if ((read < 2) || (read > 64))
		continue;
	      glb_gaspi_ctx.tnc++;
	      
	      if (glb_gaspi_ctx.tnc >= GASPI_MAX_NODES)
		break;
	    }
	  
	  rewind (fp);
	  
	  free (glb_gaspi_ctx.hn_poff);
	  
	  glb_gaspi_ctx.hn_poff = (char *) calloc (glb_gaspi_ctx.tnc, 65);
	  if(glb_gaspi_ctx.hn_poff == NULL)
	    {
	      gaspi_print_error("Debug: Failed to allocate memory");
	      goto errL;
	    }
	  
	  glb_gaspi_ctx.poff = glb_gaspi_ctx.hn_poff + glb_gaspi_ctx.tnc * 64;
        
	  int id = 0;
	  while((read = getline (&line, &len, fp)) != -1)
	    {
	      //we assume a single hostname per line
	      if((read < 2) || (read >= 64)) continue;
	      
	      int inList = 0;
	      
	      for(i = 0; i < id; i++)
		{
		  //already in list ?
		  //TODO: 64? 63? Magic numbers -> just get cacheline from system or define as such
		  const int hnlen = MAX (strlen (glb_gaspi_ctx.hn_poff + i * 64), MIN (strlen (line) - 1, 63));
		  if(strncmp (glb_gaspi_ctx.hn_poff + i * 64, line, hnlen) == 0)
		    {
		      inList++;
		    }
		}
	      
	      glb_gaspi_ctx.poff[id] = inList;
	      
	      strncpy (glb_gaspi_ctx.hn_poff + id * 64, line, MIN (read - 1, 63));
	      id++; 
	      
	      if(id >= GASPI_MAX_NODES)
		break;
	    }
  
	  fclose (fp);
	  
	  free (line);
	  
	  //master
	  glb_gaspi_ctx.rank = 0;
	  
	  free(glb_gaspi_ctx.sockfd);
  
	  glb_gaspi_ctx.sockfd = (int *) malloc (glb_gaspi_ctx.tnc * sizeof (int));
	  if(glb_gaspi_ctx.sockfd == NULL)
	    {
	      gaspi_print_error("Failed to allocate memory");
	      eret = GASPI_ERR_MEMALLOC;
	      goto errL;
	    }
	  
	  for(i = 0; i < glb_gaspi_ctx.tnc; i++) 
	    glb_gaspi_ctx.sockfd[i] = -1;

	}//glb_gaspi_dev_init
    }//MASTER_PROC
  else if(glb_gaspi_ctx.procType != WORKER_PROC)
    {
      gaspi_print_error ("Invalid node type (GASPI_TYPE)");
      eret = GASPI_ERR_ENV;
      goto errL;
    }

  if( 0 != gaspi_sn_broadcast_topology(&glb_gaspi_ctx, GASPI_BLOCK) )
    {
      gaspi_print_error("Failed topology broadcast");
      eret = GASPI_ERROR;
      goto errL;
    }
  
  if( (eret = pgaspi_init_core()) != GASPI_SUCCESS )
    {
      goto errL;
    }

  /* Unleash SN thread */
  __sync_fetch_and_add( &gaspi_master_topo_data, 1);

  gaspi_init_collectives();

  glb_gaspi_init = 1;

  unlock_gaspi (&glb_gaspi_ctx_lock);

  if(glb_gaspi_cfg.build_infrastructure)
    {
      /* configuration tells us to pre-connect */
      if( GASPI_TOPOLOGY_STATIC == glb_gaspi_cfg.build_infrastructure )
	{
	  for(i = glb_gaspi_ctx.rank; i >= 0; i--)
	    {
	      if( (eret = pgaspi_connect((gaspi_rank_t) i, timeout_ms)) != GASPI_SUCCESS )
		{
		  goto errL;
		}
	    }
	}

      eret = pgaspi_group_all_local_create(timeout_ms);
      if(eret == GASPI_SUCCESS)
	{
	  eret = gaspi_barrier(GASPI_GROUP_ALL, timeout_ms);
	}
      else
	{
	  gaspi_print_error("Failed to create GASPI_GROUP_ALL.");
	}
    }
  else /* dont build_infrastructure */
    {
      /* just reserve GASPI_GROUP_ALL */
      glb_gaspi_ctx.group_cnt = 1;
      glb_gaspi_group_ctx[GASPI_GROUP_ALL].id = -2;//disable
      eret = GASPI_SUCCESS;
    }
  
#ifdef GPI2_CUDA
  /* init GPU counts */
  glb_gaspi_ctx.use_gpus = 0;
  glb_gaspi_ctx.gpu_count = 0;
#endif

  return eret;

 errL:
  unlock_gaspi (&glb_gaspi_ctx_lock);

  return eret;
}