Exemplo n.º 1
0
void gpukernel_source_with_line_numbers(unsigned int count, const char **news, size_t *newl,
                                        strb *src) {
  assert(src != NULL);
  unsigned int section, line, i, j;
  size_t len;

  line=1;  // start the line counter at 1
  for(section=0; section<count; section++) {
    len = (newl == NULL)?0:newl[section];
    if(len<=0) // If either newl==NULL, or has length zero for this section, use strlen() to determine
      len=strlen(news[section]);

    i=0; // position of line-starts within news[section]
    while(i<len) {
      strb_appendf(src, "%04d\t", line);

      for(j=i; j<len && news[section][j] != '\n'; j++); // look for next line-end
      strb_appendn(src, news[section]+i, (j-i));
      strb_appendc(src, '\n');

      i = j+1;  // Character after the newline
      line++;
    }
  }
}
Exemplo n.º 2
0
static gpukernel *cuda_newkernel(void *c, unsigned int count,
                                 const char **strings, const size_t *lengths,
                                 const char *fname, unsigned int argcount,
                                 const int *types, int flags, int *ret,
                                 char **err_str) {
    cuda_context *ctx = (cuda_context *)c;
    strb sb = STRB_STATIC_INIT;
    char *bin, *log = NULL;
    srckey k, *ak;
    binval *av;
    gpukernel *res;
    size_t bin_len = 0, log_len = 0;
    CUdevice dev;
    unsigned int i;
    int ptx_mode = 0;
    int binary_mode = 0;
    int major, minor;

    if (count == 0) FAIL(NULL, GA_VALUE_ERROR);

    if (flags & GA_USE_OPENCL)
      FAIL(NULL, GA_DEVSUP_ERROR);

    if (flags & GA_USE_BINARY) {
      // GA_USE_BINARY is exclusive
      if (flags & ~GA_USE_BINARY)
        FAIL(NULL, GA_INVALID_ERROR);
      // We need the length for binary data and there is only one blob.
      if (count != 1 || lengths == NULL || lengths[0] == 0)
        FAIL(NULL, GA_VALUE_ERROR);
    }

    cuda_enter(ctx);

    ctx->err = cuCtxGetDevice(&dev);
    if (ctx->err != CUDA_SUCCESS) {
      cuda_exit(ctx);
      FAIL(NULL, GA_IMPL_ERROR);
    }
    ctx->err = cuDeviceComputeCapability(&major, &minor, dev);
    if (ctx->err != CUDA_SUCCESS) {
      cuda_exit(ctx);
      FAIL(NULL, GA_IMPL_ERROR);
    }

    // GA_USE_CLUDA is done later
    // GA_USE_SMALL will always work
    if (flags & GA_USE_DOUBLE) {
      if (major < 1 || (major == 1 && minor < 3)) {
        cuda_exit(ctx);
        FAIL(NULL, GA_DEVSUP_ERROR);
      }
    }
    if (flags & GA_USE_COMPLEX) {
      // just for now since it is most likely broken
      cuda_exit(ctx);
      FAIL(NULL, GA_DEVSUP_ERROR);
    }
    // GA_USE_HALF should always work

    if (flags & GA_USE_PTX) {
      ptx_mode = 1;
    } else if (flags & GA_USE_BINARY) {
      binary_mode = 1;
    }

    if (binary_mode) {
      bin = memdup(strings[0], lengths[0]);
      bin_len = lengths[0];
      if (bin == NULL) {
        cuda_exit(ctx);
        FAIL(NULL, GA_MEMORY_ERROR);
      }
    } else {
      if (flags & GA_USE_CLUDA) {
        strb_appends(&sb, CUDA_PREAMBLE);
      }

      if (lengths == NULL) {
        for (i = 0; i < count; i++)
        strb_appends(&sb, strings[i]);
      } else {
        for (i = 0; i < count; i++) {
          if (lengths[i] == 0)
            strb_appends(&sb, strings[i]);
          else
            strb_appendn(&sb, strings[i], lengths[i]);
        }
      }

      strb_append0(&sb);

      if (strb_error(&sb)) {
        strb_clear(&sb);
        cuda_exit(ctx);
        return NULL;
      }

      if (ptx_mode) {
        bin = sb.s;
        bin_len = sb.l;
      } else {
        bin = NULL;
        if (compile_cache != NULL) {
          k.src = sb.s;
          k.len = sb.l;
          memcpy(k.arch, ctx->bin_id, BIN_ID_LEN);
          av = cache_get(compile_cache, &k);
          if (av != NULL) {
            bin = memdup(av->bin, av->len);
            bin_len = av->len;
          }
        }
        if (bin == NULL) {
          bin = call_compiler(sb.s, sb.l, ctx->bin_id, &bin_len,
                              &log, &log_len, ret);
        }
        if (bin == NULL) {
          if (err_str != NULL) {
            strb debug_msg = STRB_STATIC_INIT;

            // We're substituting debug_msg for a string with this first line:
            strb_appends(&debug_msg, "CUDA kernel build failure ::\n");

            /* Delete the final NUL */
            sb.l--;
            gpukernel_source_with_line_numbers(1, (const char **)&sb.s,
                                               &sb.l, &debug_msg);

            if (log != NULL) {
              strb_appends(&debug_msg, "\nCompiler log:\n");
              strb_appendn(&debug_msg, log, log_len);
              free(log);
            }
            *err_str = strb_cstr(&debug_msg);
            // *err_str will be free()d by the caller (see docs in kernel.h)
          }
          strb_clear(&sb);
          cuda_exit(ctx);
          return NULL;
        }
        if (compile_cache == NULL)
          compile_cache = cache_twoq(16, 16, 16, 8, src_eq, src_hash, src_free,
                                     bin_free);

        if (compile_cache != NULL) {
          ak = malloc(sizeof(*ak));
          av = malloc(sizeof(*av));
          if (ak == NULL || av == NULL) {
            free(ak);
            free(av);
            goto done;
          }
          ak->src = memdup(sb.s, sb.l);
          if (ak->src == NULL) {
            free(ak);
            free(av);
            goto done;
          }
          ak->len = sb.l;
          memmove(ak->arch, ctx->bin_id, BIN_ID_LEN);
          av->len = bin_len;
          av->bin = memdup(bin, bin_len);
          if (av->bin == NULL) {
            src_free(ak);
            free(av);
            goto done;
          }
          cache_add(compile_cache, ak, av);
        }
      done:
        strb_clear(&sb);
      }
    }

    res = calloc(1, sizeof(*res));
    if (res == NULL) {
      free(bin);
      cuda_exit(ctx);
      FAIL(NULL, GA_SYS_ERROR);
    }

    res->bin_sz = bin_len;
    res->bin = bin;

    res->refcnt = 1;
    res->argcount = argcount;
    res->types = calloc(argcount, sizeof(int));
    if (res->types == NULL) {
      _cuda_freekernel(res);
      cuda_exit(ctx);
      FAIL(NULL, GA_MEMORY_ERROR);
    }
    memcpy(res->types, types, argcount*sizeof(int));
    res->args = calloc(argcount, sizeof(void *));
    if (res->args == NULL) {
      _cuda_freekernel(res);
      cuda_exit(ctx);
      FAIL(NULL, GA_MEMORY_ERROR);
    }

    ctx->err = cuModuleLoadData(&res->m, bin);

    if (ctx->err != CUDA_SUCCESS) {
      _cuda_freekernel(res);
      cuda_exit(ctx);
      FAIL(NULL, GA_IMPL_ERROR);
    }

    ctx->err = cuModuleGetFunction(&res->k, res->m, fname);
    if (ctx->err != CUDA_SUCCESS) {
      _cuda_freekernel(res);
      cuda_exit(ctx);
      FAIL(NULL, GA_IMPL_ERROR);
    }

    res->ctx = ctx;
    ctx->refcnt++;
    cuda_exit(ctx);
    TAG_KER(res);
    return res;
}