Exemple #1
0
int GpuArray_fromdata(GpuArray *a, gpudata *data, size_t offset, int typecode,
                      unsigned int nd, const size_t *dims,
                      const ssize_t *strides, int writeable) {
  if (gpuarray_get_type(typecode)->typecode != typecode)
    return GA_VALUE_ERROR;
  assert(data != NULL);
  a->data = data;
  gpudata_retain(a->data);
  a->nd = nd;
  a->offset = offset;
  a->typecode = typecode;
  a->dimensions = calloc(nd, sizeof(size_t));
  a->strides = calloc(nd, sizeof(ssize_t));
  a->flags = (writeable ? GA_WRITEABLE : 0);
  if (a->dimensions == NULL || a->strides == NULL) {
    GpuArray_clear(a);
    return GA_MEMORY_ERROR;
  }
  memcpy(a->dimensions, dims, nd*sizeof(size_t));
  memcpy(a->strides, strides, nd*sizeof(ssize_t));

  GpuArray_fix_flags(a);

  return GA_NO_ERROR;
}
Exemple #2
0
int GpuArray_is_aligned(const GpuArray *a) {
  size_t align = gpuarray_get_type(a->typecode)->align;
  unsigned int i;

  if (a->offset % align != 0)
    return 0;

  for (i = 0; i < a->nd; i++) {
    if (a->strides[i] % align != 0) return 0;
  }
  return 1;
}
static int   maxandargmaxCheckargs              (maxandargmax_ctx*  ctx){
	int i;

	/**
	 * We initialize certain parts of the context.
	 */

	ctx->ret           = GA_NO_ERROR;
	ctx->axisList      = NULL;
	ctx->gpuCtx        = NULL;

	ctx->dstMaxType    = ctx->dstArgmaxType = NULL;
	ctx->ndh           = 0;
	ctx->sourceCode    = NULL;

	ctx->hwAxisList[0] = ctx->hwAxisList[1] = ctx->hwAxisList[2] = 0;
	ctx->blockSize [0] = ctx->blockSize [1] = ctx->blockSize [2] = 1;
	ctx->gridSize  [0] = ctx->gridSize  [1] = ctx->gridSize  [2] = 1;
	ctx->chunkSize [0] = ctx->chunkSize [1] = ctx->chunkSize [2] = 1;

	ctx->srcStepsGD    = ctx->srcSizeGD     = ctx->chunkSizeGD   =
	ctx->dstMaxStepsGD = ctx->dstArgmaxStepsGD = NULL;


	/* Insane src or reduxLen? */
	if(!ctx->dstMax || !ctx->dstArgmax || !ctx->src || ctx->src->nd == 0 ||
	    ctx->reduxLen == 0 || ctx->reduxLen > (int)ctx->src->nd){
		return ctx->ret=GA_INVALID_ERROR;
	}

	/* Insane or duplicate list entry? */
	for(i=0;i<ctx->reduxLen;i++){
		if(ctx->reduxList[i] <  0                            ||
		   ctx->reduxList[i] >= (int)ctx->src->nd            ||
		   axisInSet(ctx->reduxList[i], ctx->reduxList, i, 0)){
			return ctx->ret=GA_INVALID_ERROR;
		}
	}

	/* Unknown type? */
	ctx->dstMaxType    = gpuarray_get_type(ctx->src->typecode)->cluda_name;
	ctx->dstArgmaxType = gpuarray_get_type(GA_SSIZE)          ->cluda_name;
	if(!ctx->dstMaxType || !ctx->dstArgmaxType){
		return ctx->ret=GA_INVALID_ERROR;
	}

	/* GPU context non-existent? */
	ctx->gpuCtx        = GpuArray_context(ctx->src);
	if(!ctx->gpuCtx){
		return ctx->ret=GA_INVALID_ERROR;
	}


	/**
	 * We initialize some more parts of the context, using the guarantees
	 * we now have about the sanity of the arguments.
	 */

	ctx->nds = ctx->src->nd;
	ctx->ndr = ctx->reduxLen;
	ctx->ndd = ctx->nds - ctx->ndr;

	return ctx->ret;
}
Exemple #4
0
size_t gpuarray_get_elsize(int typecode) {
  return gpuarray_get_type(typecode)->size;
}
Exemple #5
0
static int gen_take1_kernel(GpuKernel *k, gpucontext *ctx, char **err_str,
                            GpuArray *a, const GpuArray *v,
                            const GpuArray *ind, int addr32) {
  strb sb = STRB_STATIC_INIT;
  int *atypes;
  char *sz, *ssz;
  unsigned int i, i2;
  unsigned int nargs, apos;
  int flags = GA_USE_CLUDA;
  int res;

  nargs = 9 + 2 * v->nd;

  atypes = calloc(nargs, sizeof(int));
  if (atypes == NULL)
    return GA_MEMORY_ERROR;

  if (addr32) {
    sz = "ga_uint";
    ssz = "ga_int";
  } else {
    sz = "ga_size";
    ssz = "ga_ssize";
  }

  apos = 0;
  strb_appendf(&sb, "KERNEL void take1(GLOBAL_MEM %s *r, ga_size r_off, "
               "GLOBAL_MEM const %s *v, ga_size v_off,",
               gpuarray_get_type(a->typecode)->cluda_name,
               gpuarray_get_type(v->typecode)->cluda_name);
  atypes[apos++] = GA_BUFFER;
  atypes[apos++] = GA_SIZE;
  atypes[apos++] = GA_BUFFER;
  atypes[apos++] = GA_SIZE;
  for (i = 0; i < v->nd; i++) {
    strb_appendf(&sb, " ga_ssize s%u, ga_size d%u,", i, i);
    atypes[apos++] = GA_SSIZE;
    atypes[apos++] = GA_SIZE;
  }
  strb_appendf(&sb, " GLOBAL_MEM const %s *ind, ga_size i_off, "
               "ga_size n0, ga_size n1, GLOBAL_MEM int* err) {\n",
               gpuarray_get_type(ind->typecode)->cluda_name);
  atypes[apos++] = GA_BUFFER;
  atypes[apos++] = GA_SIZE;
  atypes[apos++] = GA_SIZE;
  atypes[apos++] = GA_SIZE;
  atypes[apos++] = GA_BUFFER;
  assert(apos == nargs);
  strb_appendf(&sb, "  const %s idx0 = LDIM_0 * GID_0 + LID_0;\n"
               "  const %s numThreads0 = LDIM_0 * GDIM_0;\n"
               "  const %s idx1 = LDIM_1 * GID_1 + LID_1;\n"
               "  const %s numThreads1 = LDIM_1 * GDIM_1;\n"
               "  %s i0, i1;\n", sz, sz, sz, sz, sz);
  strb_appends(&sb, "  if (idx0 >= n0 || idx1 >= n1) return;\n");
  strb_appendf(&sb, "  r = (GLOBAL_MEM %s *)(((GLOBAL_MEM char *)r) + r_off);\n"
               "  ind = (GLOBAL_MEM %s *)(((GLOBAL_MEM char *)ind) + i_off);\n",
               gpuarray_get_type(a->typecode)->cluda_name,
               gpuarray_get_type(ind->typecode)->cluda_name);
  strb_appendf(&sb, "  for (i0 = idx0; i0 < n0; i0 += numThreads0) {\n"
               "    %s ii0 = ind[i0];\n"
               "    %s pos0 = v_off;\n"
               "    if (ii0 < 0) ii0 += d0;\n"
               "    if ((ii0 < 0) || (ii0 >= d0)) {\n"
               "      *err = -1;\n"
               "      continue;\n"
               "    }\n"
               "    pos0 += ii0 * (%s)s0;\n"
               "    for (i1 = idx1; i1 < n1; i1 += numThreads1) {\n"
               "      %s p = pos0;\n", ssz, sz, sz, sz);
  if (v->nd > 1) {
    strb_appendf(&sb, "      %s pos, ii = i1;\n", sz);
    for (i2 = v->nd; i2 > 1; i2--) {
      i = i2 - 1;
      if (i > 1)
        strb_appendf(&sb, "      pos = ii %% (%s)d%u;\n"
                     "      ii /= (%s)d%u;\n", sz, i, sz, i);
      else
        strb_appends(&sb, "      pos = ii;\n");
      strb_appendf(&sb, "      p += pos * (%s)s%u;\n", ssz, i);
    }
  }
  strb_appendf(&sb, "      r[i0*((%s)n1) + i1] = *((GLOBAL_MEM %s *)(((GLOBAL_MEM char *)v) + p));\n",
               sz, gpuarray_get_type(v->typecode)->cluda_name);
  strb_appends(&sb, "    }\n"
               "  }\n"
               "}\n");
  if (strb_error(&sb)) {
    res = GA_MEMORY_ERROR;
    goto bail;
  }
  flags |= gpuarray_type_flags(a->typecode, v->typecode, GA_BYTE, -1);
  res = GpuKernel_init(k, ctx, 1, (const char **)&sb.s, &sb.l, "take1",
                       nargs, atypes, flags, err_str);
bail:
  free(atypes);
  strb_clear(&sb);
  return res;
}