Beispiel #1
0
static inline int
MPIDI_Get_use_pami_rget(pami_context_t context, MPIDI_Win_request * req)
{
  int use_typed_rdma = 0;

  if (!req->target.dt.contig || !req->origin.dt.contig) {
    use_typed_rdma = 0;
    if (MPIDI_Process.typed_onesided == 1)
      use_typed_rdma = 1;
  }

  if (use_typed_rdma) {
    pami_result_t rc;
    pami_rget_typed_t params;
    /* params need to zero out to avoid passing garbage to PAMI */
    params=zero_rget_typed_parms;

    params.rma.dest=req->dest;
    params.rma.hints.buffer_registered = PAMI_HINT_ENABLE;
    params.rma.hints.use_rdma          = PAMI_HINT_ENABLE;
    params.rma.bytes   = req->target.dt.size;
    params.rma.cookie  = req;
    params.rma.done_fn = MPIDI_Win_DoneCB;
    params.rdma.local.mr=&req->origin.memregion;
    params.rdma.remote.mr=&req->win->mpid.info[req->target.rank].memregion;
    params.rdma.remote.offset= req->offset;
    params.rdma.local.offset  = req->state.local_offset;

    params.type.local = *(pami_type_t *)(req->origin.dt.pointer->device_datatype);
    params.type.remote = *(pami_type_t *)(req->target.dt.pointer->device_datatype);


    rc = PAMI_Rget_typed(context, &params);
    MPID_assert(rc == PAMI_SUCCESS);
  }
  else {
  pami_result_t rc;
  pami_rget_simple_t  params;

  params=zero_rget_parms;

  params.rma.dest=req->dest;
  params.rma.hints.buffer_registered = PAMI_HINT_ENABLE;
  params.rma.hints.use_rdma          = PAMI_HINT_ENABLE;
  params.rma.bytes   = 0;
  params.rma.cookie  = req;
  params.rma.done_fn = MPIDI_Win_DoneCB;
  params.rdma.local.mr=&req->origin.memregion;
  params.rdma.remote.mr=&req->win->mpid.info[req->target.rank].memregion;
  params.rdma.remote.offset= req->offset;

  struct MPIDI_Win_sync* sync = &req->win->mpid.sync;
  TRACE_ERR("Start       index=%u/%d  l-addr=%p  r-base=%p  r-offset=%zu (sync->started=%u  sync->complete=%u)\n",
	    req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete);
  while (req->state.index < req->target.dt.num_contig) {
    if (sync->started > sync->complete + MPIDI_Process.rma_pending)
      {
	TRACE_ERR("Bailing out;  index=%u/%d  sync->started=%u  sync->complete=%u\n",
		  req->state.index, req->target.dt.num_contig, sync->started, sync->complete);
	return PAMI_EAGAIN;
      }
    ++sync->started;

    params.rma.bytes          =                       req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN;
    params.rdma.remote.offset = req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF;
    params.rdma.local.offset  = req->state.local_offset;

#ifdef TRACE_ON
    unsigned* buf = (unsigned*)(req->buffer + params.rdma.local.offset);
#endif
    TRACE_ERR("  Sub     index=%u  bytes=%zu  l-offset=%zu  r-offset=%zu  buf=%p  *(int*)buf=0x%08x\n", req->state.index, params.rma.bytes, params.rdma.local.offset, params.rdma.remote.offset, buf, *buf);

    /** sync->total will be updated with every RMA and the complete
	will not change till that RMA has completed. In the meanwhile
	the rest of the RMAs will have memory leaks */
    if (req->target.dt.num_contig - req->state.index == 1) {
          rc = PAMI_Rget(context, &params);
          MPID_assert(rc == PAMI_SUCCESS);
          return PAMI_SUCCESS;
      } else {
          rc = PAMI_Rget(context, &params);
          MPID_assert(rc == PAMI_SUCCESS);
          req->state.local_offset += params.rma.bytes;
          ++req->state.index;
      }
  }
  }
  return PAMI_SUCCESS;
}
Beispiel #2
0
int main (int argc, char ** argv)
{
  pami_client_t client;
  pami_context_t context[2];
  pami_task_t task_id;
  size_t num_tasks = 0;
  size_t ncontexts = 0;
  size_t errors = 0;

  pami_result_t result = PAMI_ERROR;



  pami_type_t subtype;
  pami_type_t compound_type;
  pami_type_t simple_type;

  info_t exchange[MAX_TASKS];
  double data[BUFFERSIZE];
  volatile unsigned ready;

  { /* init */


    ready = 0;

    unsigned i;

    for (i = 0; i < MAX_TASKS; i++)
      exchange[i].active = 0;

    for (i = 0; i < BUFFERSIZE; i++)
      data[i] = E;


    result = PAMI_Client_create ("TEST", &client, NULL, 0);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable to create pami client. result = %d\n", result);
        return 1;
      }

    pami_configuration_t configuration;

    configuration.name = PAMI_CLIENT_TASK_ID;
    result = PAMI_Client_query(client, &configuration, 1);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable query configuration (%d). result = %d\n", configuration.name, result);
        return 1;
      }

    task_id = configuration.value.intval;
    /*fprintf (stderr, "My task id = %d\n", task_id);*/

    configuration.name = PAMI_CLIENT_NUM_TASKS;
    result = PAMI_Client_query(client, &configuration, 1);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable query configuration (%d). result = %d\n", configuration.name, result);
        return 1;
      }

    num_tasks = configuration.value.intval;

    /*if (task_id == 0)
      fprintf (stderr, "Number of tasks = %zu\n", num_tasks);*/

    if ((num_tasks < 2) || (num_tasks > MAX_TASKS))
      {
        fprintf (stderr, "Error. This test requires 2-%d tasks. Number of tasks in this job: %zu\n", MAX_TASKS, num_tasks);
        return 1;
      }

    if (task_id == num_tasks - 1)
      {
        for (i = 0; i < 320; i++)
          data[i] = PI;
      }

    configuration.name = PAMI_CLIENT_NUM_CONTEXTS;
    result = PAMI_Client_query(client, &configuration, 1);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable query configuration (%d). result = %d\n", configuration.name, result);
        return 1;
      }

    ncontexts = (configuration.value.intval < 2) ? 1 : 2;

    /*if (task_id == 0)
      fprintf (stderr, "maximum contexts = %zu, number of contexts used in this test = %zu\n", configuration.value.intval, ncontexts);*/

    result = PAMI_Context_createv(client, NULL, 0, context, ncontexts);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable to create pami context(s). result = %d\n", result);
        return 1;
      }


    pami_dispatch_hint_t options = {};

    for (i = 0; i < ncontexts; i++)
      {
        pami_dispatch_callback_function fn;

        fn.p2p = dispatch_exchange;
        result = PAMI_Dispatch_set (context[i],
                                    EXCHANGE_DISPATCH_ID,
                                    fn,
                                    (void *) exchange,
                                    options);

        if (result != PAMI_SUCCESS)
          {
            fprintf (stderr, "Error. Unable register pami 'exchange' dispatch. result = %d\n", result);
            return 1;
          }

        fn.p2p = dispatch_notify;
        result = PAMI_Dispatch_set (context[i],
                                    NOTIFY_DISPATCH_ID,
                                    fn,
                                    (void *) & ready,
                                    options);

        if (result != PAMI_SUCCESS)
          {
            fprintf (stderr, "Error. Unable register pami 'notify' dispatch. result = %d\n", result);
            return 1;
          }
      }


    /* ***********************************
     * Create the pami types
     ************************************/

    /* This compound noncontiguous type is composed of one double, skips a double,
     * two doubles, skips a double, three doubles, skips a double, five doubles,
     * skips a double, six doubles, skips a double, seven doubles, skips a double,
     * eight doubles, then skips two doubles.
     *
     * This results in a type with 32 doubles that is 40 doubles
     * 'wide'.
     */
    PAMI_Type_create (&subtype);
    PAMI_Type_add_simple (subtype,
                          sizeof(double),      /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 2); /* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 2,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 3); /* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 3,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 4); /* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 5,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 6); /* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 6,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 7); /* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 7,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 8);/* stride */
    PAMI_Type_add_simple (subtype,
                          sizeof(double) * 8,  /* bytes */
                          0,                   /* offset */
                          1,                   /* count */
                          sizeof(double) * 10);/* stride */
    PAMI_Type_complete (subtype, sizeof(double));

    /* This noncontiguous type is composed of the above compound type, repeated
     * ten times with no stride.
     *
     * This results in a type with 320 doubles that is 400 doubles
     * 'wide'.
     */
    PAMI_Type_create (&compound_type);
    PAMI_Type_add_typed (compound_type,
                         subtype,                  /* subtype */
                         0,                        /* offset */
                         10,                       /* count */
                         sizeof(double) * 32);     /* stride */
    PAMI_Type_complete (compound_type, sizeof(double));


    /* This simple noncontiguous type is composed of eight contiguous doubles,
     * then skips a _single_ double, repeated 40 times.
     *
     * This results in a type with 320 doubles that is 360 doubles 'wide'.
     */
    PAMI_Type_create (&simple_type);
    PAMI_Type_add_simple (simple_type,
                          sizeof(double) * 8,  /* bytes */
                          0,                   /* offset */
                          40,                  /* count */
                          sizeof(double) * 9); /* stride */
    PAMI_Type_complete (simple_type, sizeof(double));


    /* Create a memory region for the local data buffer. */
    size_t bytes;
    result = PAMI_Memregion_create (context[0], (void *) data, BUFFERSIZE,
                                    &bytes, &exchange[task_id].mr);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable to create memory region. result = %d\n", result);
        return 1;
      }
    else if (bytes < BUFFERSIZE)
      {
        fprintf (stderr, "Error. Unable to create memory region of a large enough size. result = %d\n", result);
        return 1;
      }

    /* Broadcast the memory region to all tasks - including self. */
    for (i = 0; i < num_tasks; i++)
      {
        pami_send_immediate_t parameters;
        parameters.dispatch        = EXCHANGE_DISPATCH_ID;
        parameters.header.iov_base = (void *) & bytes;
        parameters.header.iov_len  = sizeof(size_t);
        parameters.data.iov_base   = (void *) & exchange[task_id].mr;
        parameters.data.iov_len    = sizeof(pami_memregion_t);
        PAMI_Endpoint_create (client, i, 0, &parameters.dest);

        result = PAMI_Send_immediate (context[0], &parameters);
      }

    /* Advance until all memory regions have been received. */
    for (i = 0; i < num_tasks; i++)
      {
        while (exchange[i].active == 0)
          PAMI_Context_advance (context[0], 100);
      }


  } /* init done */


  pami_send_immediate_t notify;
  notify.dispatch        = NOTIFY_DISPATCH_ID;
  notify.header.iov_base = NULL;
  notify.header.iov_len  = 0;
  notify.data.iov_base   = NULL;
  notify.data.iov_len    = 0;

  volatile size_t active = 1;

  pami_rget_typed_t parameters;
  parameters.rma.hints          = (pami_send_hint_t) {0};
  parameters.rma.cookie         = (void *) & active;
  parameters.rma.done_fn        = decrement;
  parameters.rma.bytes          = 320 * sizeof(double);

  if (task_id == 0)
    {
      fprintf (stdout, "PAMI_Rget('typed') functional test %s\n", (ncontexts < 2) ? "" : "[crosstalk]");
      fprintf (stdout, "\n");

      parameters.rdma.local.mr      = &exchange[0].mr;
      parameters.rdma.remote.mr     = &exchange[num_tasks - 1].mr;
      PAMI_Endpoint_create (client, num_tasks - 1, ncontexts - 1, &parameters.rma.dest);

      PAMI_Endpoint_create (client, num_tasks - 1, ncontexts - 1, &notify.dest);
    }
  else
    {
      parameters.rdma.local.mr      = &exchange[num_tasks - 1].mr;
      parameters.rdma.remote.mr     = &exchange[0].mr;
      PAMI_Endpoint_create (client, 0, 0, &parameters.rma.dest);

      PAMI_Endpoint_create (client, 0, 0, &notify.dest);
    }


  /* ******************************************************************** */

  /* contiguous -> contiguous transfer test                               */

  /* ******************************************************************** */
  if (task_id == 0)
    {
      parameters.rdma.local.offset  = 0;
      parameters.rdma.remote.offset = 0;
      parameters.type.local         = PAMI_TYPE_DOUBLE;
      parameters.type.remote        = PAMI_TYPE_DOUBLE;

      active = 1;
      PAMI_Rget_typed (context[0], &parameters);

      while (active > 0)
        PAMI_Context_advance (context[0], 100);

      /* Notify the remote task that the data has been transfered. */
      PAMI_Send_immediate (context[0], &notify);
    }
  else if (task_id == num_tasks - 1)
    {
      /* Wait for notification that the data has been transfered. */
      while (ready == 0)
        PAMI_Context_advance (context[ncontexts - 1], 100);

      ready = 0;
    }

  /* ******************************************************************** */

  /* contiguous -> non-contiguous transfer test                               */

  /* ******************************************************************** */
  if (task_id == num_tasks - 1)
    {
      parameters.rdma.local.offset  = 4 * 1024;
      parameters.rdma.remote.offset = 0;
      parameters.type.local         = simple_type;
      parameters.type.remote        = PAMI_TYPE_DOUBLE;

      active = 1;
      PAMI_Rget_typed (context[ncontexts - 1], &parameters);

      while (active > 0)
        PAMI_Context_advance (context[ncontexts - 1], 100);

      /* Notify the remote task that the data has been transfered. */
      PAMI_Send_immediate (context[ncontexts - 1], &notify);
    }
  else if (task_id == 0)
    {
      /* Wait for notification that the data has been transfered. */
      while (ready == 0)
        PAMI_Context_advance (context[0], 100);

      ready = 0;
    }

  /* ******************************************************************** */

  /* non-contiguous -> non-contiguous transfer test                       */

  /* ******************************************************************** */
  if (task_id == 0)
    {
      parameters.rdma.local.offset  = 4 * 1024;
      parameters.rdma.remote.offset = 4 * 1024;
      parameters.type.local         = compound_type;
      parameters.type.remote        = simple_type;

      active = 1;
      PAMI_Rget_typed (context[0], &parameters);

      while (active > 0)
        PAMI_Context_advance (context[0], 100);

      /* Notify the remote task that the data has been transfered. */
      PAMI_Send_immediate (context[0], &notify);
    }
  else if (task_id == num_tasks - 1)
    {
      /* Wait for notification that the data has been transfered. */
      while (ready == 0)
        PAMI_Context_advance (context[ncontexts - 1], 100);

      ready = 0;
    }

  /* ******************************************************************** */

  /* non-contiguous -> contiguous transfer test                           */

  /* ******************************************************************** */
  if (task_id == num_tasks - 1)
    {
      parameters.rdma.local.offset  = 8 * 1024;
      parameters.rdma.remote.offset = 4 * 1024;
      parameters.type.local         = PAMI_TYPE_DOUBLE;
      parameters.type.remote        = compound_type;

      active = 1;
      PAMI_Rget_typed (context[ncontexts - 1], &parameters);

      while (active > 0)
        PAMI_Context_advance (context[ncontexts - 1], 100);

      /* Notify the remote task that the data has been transfered. */
      PAMI_Send_immediate (context[ncontexts - 1], &notify);
    }
  else if (task_id == 0)
    {
      /* Wait for notification that the data has been transfered. */
      while (ready == 0)
        PAMI_Context_advance (context[0], 100);

      ready = 0;
    }


  /* ******************************************************************** */

  /* VERIFY data buffers                                                  */

  /* ******************************************************************** */
  if (task_id == num_tasks - 1)
    {
      if (task_id == 0)
        {
          unsigned i = 0;

          for (; i < 320; i++)
            {
              if (data[i] != PI)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, PI, data[i]);
                }
            }

          for (; i < 512; i++)
            {
              if (data[i] != E)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, E, data[i]);
                }
            }

          unsigned j = 0;

          for (; j < 40; j++)
            {
              unsigned n = 0;

              for (; n < 8; n++)
                {
                  if (data[i] != PI)
                    {
                      errors++;
                      fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, PI, data[i]);
                    }

                  i++;
                }

              if (data[i] != E)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, E, data[i]);
                }

              i++;
            }

          for (; i < 1024; i++)
            {
              if (data[i] != E)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, E, data[i]);
                }
            }


          for (; i < 1024 + 320; i++)
            {
              if (data[i] != PI)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, PI, data[i]);
                }
            }

          for (; i < BUFFERSIZE; i++)
            {
              if (data[i] != E)
                {
                  errors++;
                  fprintf (stderr, "Error. data[%d] != %g ..... (%g)\n", i, E, data[i]);
                }
            }
        }
    }

  { /* cleanup */
    result = PAMI_Context_destroyv(context, ncontexts);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable to destroy pami context. result = %d\n", result);
        return 1;
      }

    result = PAMI_Client_destroy(&client);

    if (result != PAMI_SUCCESS)
      {
        fprintf (stderr, "Error. Unable to destroy pami client. result = %d\n", result);
        return 1;
      }

    if (task_id == num_tasks - 1)
      {
        if (errors)
          fprintf (stdout, "Test completed with errors (%zu)\n", errors);
        else
          fprintf (stdout, "Test completed with success\n");
      }

  } /* cleanup done */

  return (errors != 0);
};