// Add a wrapper for the named method that actually points to the trampoline // function so that the function will call back to the interpreter to actually // execute the method. Object repl_addmethod(Object self, int nparts, int *argcv, Object *args, int flags) { Object o = args[0]; Object objname = args[1]; gc_root(objname); Object methname = args[2]; gc_root(methname); int pos = integerfromAny(args[3]); Object(*func)(Object, int, int*, Object*, int); func = (Object(*)(Object, int, int*, Object*, int))trampoline; Method *m = add_Method(o->class, cstringfromString(methname), func); m->flags &= ~MFLAG_REALSELFONLY; m->flags |= MFLAG_REALSELFALSO; m->pos = pos; Object callinfo = alloc_List(); gc_root(callinfo); // ugly workaround gc_pause(); int largcv[] = {1}; callmethod(callinfo, "push", 1, largcv, &objname); callmethod(callinfo, "push", 1, largcv, &methname); gc_unpause(); adddatum2(o, callinfo, pos); return alloc_none(); }
Object cuda_FloatArray_at_put(Object self, int nparts, int *argcv, Object *argv, int flags) { int n = integerfromAny(argv[0]); struct CudaFloatArray *a = (struct CudaFloatArray *)self; float f = (float)*((double *)argv[1]->data); a->data[n] = f; return alloc_none(); }
Object cuda_using_do_blockWidth_blockHeight_gridWidth_gridHeight(Object self, int nparts, int *argcv, Object *argv, int flags) { CUresult error; cuInit(0); int deviceCount = 0; error = cuDeviceGetCount(&deviceCount); if (deviceCount == 0) { raiseError("No CUDA devices found"); } CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule; CUfunction cuFunc; error = cuDeviceGet(&cuDevice, 0); error = cuCtxCreate(&cuContext, 0, cuDevice); // do through gridWidth only have one argument each int argOffset = argcv[0] + 1; int blockDimX = integerfromAny(argv[argOffset++]); int blockDimY = integerfromAny(argv[argOffset++]); int gridDimX = integerfromAny(argv[argOffset++]); int gridDimY = integerfromAny(argv[argOffset++]); char *tmp = grcstring(argv[argcv[0]]); char argStr[strlen(tmp) + 1]; strcpy(argStr, tmp); char *tmp2 = strtok(argStr, " "); char blockname[128]; strcpy(blockname, tmp2); errcheck(cuModuleLoad(&cuModule, blockname)); CUdeviceptr dps[argcv[0]]; float floats[argcv[0]]; void *args[argcv[0]]; int ints[argcv[0]]; argStr[strlen(blockname)] = ' '; strtok(argStr, " "); for (int i=0; i<argcv[0]; i++) { char *argType = strtok(NULL, " "); if (argType[0] == 'f' && argType[1] == '*') { struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i]; errcheck(cuMemAlloc(&dps[i], a->size * sizeof(float))); errcheck(cuMemcpyHtoD(dps[i], &a->data, a->size * sizeof(float))); args[i] = &dps[i]; } else if (argType[0] == 'f') { floats[i] = (float)*((double *)(argv[i]->data)); args[i] = &floats[i]; } else if (argType[0] == 'i') { ints[i] = integerfromAny(argv[i]); args[i] = &ints[i]; } else { // Fail char buf[256]; sprintf(buf, "CUDA argument cannot be coerced. This shouldn't happen. Argument string: %s\n", argType); raiseError(buf); } } char name[256]; strcpy(name, "block"); strcat(name, blockname + strlen("_cuda/")); for (int i=0; name[i] != 0; i++) if (name[i] == '.') { name[i] = 0; break; } errcheck(cuModuleGetFunction(&cuFunc, cuModule, name)); errcheck(cuLaunchKernel(cuFunc, gridDimX, gridDimY, 1, blockDimX, blockDimY, 1, 0, NULL, args, NULL)); for (int i=0; i<argcv[0]; i++) { struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i]; errcheck(cuMemcpyDtoH(&a->data, dps[i], a->size * sizeof(float))); cuMemFree(dps[i]); } return alloc_none(); }
Object cuda_FloatArray_at(Object self, int nparts, int *argcv, Object *argv, int flags) { int n = integerfromAny(argv[0]); struct CudaFloatArray *a = (struct CudaFloatArray *)self; return alloc_Float64(a->data[n]); }
Object cuda_floatArray(Object self, int nparts, int *argcv, Object *argv, int flags) { int n = integerfromAny(argv[0]); return alloc_CudaFloatArray(n); }
Object repl_createobject(Object self, int nparts, int *argcv, Object *args, int flags) { int nummethods = integerfromAny(args[0]); int numfields = integerfromAny(args[1]); return alloc_userobj(nummethods, numfields); }