示例#1
0
nsresult dpoCContext::InitContext(cl_platform_id platform)
{
	cl_int err_code;
	cl_device_id *devices;
	size_t cb;

	cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, NULL};
	
	context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, ReportCLError, this, &err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	err_code = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	devices = (cl_device_id *)nsMemory::Alloc(sizeof(cl_device_id)*cb);
	if (devices == NULL) {
		DEBUG_LOG_STATUS("InitContext", "Cannot allocate device list");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	err_code = clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		nsMemory::Free(devices);
		return NS_ERROR_NOT_AVAILABLE;
	}

	cmdQueue = clCreateCommandQueue(context, devices[0], 
#ifdef CLPROFILE 
		CL_QUEUE_PROFILING_ENABLE |
#endif /* CLPROFILE */
#ifdef OUTOFORDERQUEUE
		CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
#endif /* OUTOFORDERQUEUE */
		0,
		&err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		nsMemory::Free(devices);
		return NS_ERROR_NOT_AVAILABLE;
	}

	DEBUG_LOG_STATUS("InitContext", "queue is " << cmdQueue);

	nsMemory::Free(devices);

	kernelFailureMem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	return NS_OK;
}
示例#2
0
/* [implicit_jscontext] dpoIData allocateData (in jsval templ, [optional] in uint32_t length); */
NS_IMETHODIMP dpoCContext::AllocateData(const jsval & templ, uint32_t length, JSContext *cx, dpoIData **_retval)
{
	cl_int err_code;
	nsresult result;
	JSObject *tArray;
	size_t bytePerElements;
	nsCOMPtr<dpoCData> data;

	result = ExtractArray( templ, &tArray, cx);
	if (NS_FAILED(result)) {
		return result;
	}

	data = new dpoCData( this);
	if (data == NULL) {
		DEBUG_LOG_STATUS("AllocateData", "Cannot create new dpoCData object");
		return NS_ERROR_OUT_OF_MEMORY;
	}
	
	if (length == 0) {
		DEBUG_LOG_STATUS("AllocateData", "size not provided, assuming template's size");
		length = JS_GetTypedArrayLength(tArray);
	}

	bytePerElements = JS_GetTypedArrayByteLength(tArray) / JS_GetTypedArrayLength(tArray);

	DEBUG_LOG_STATUS("AllocateData", "length " << length << " bytePerElements " << bytePerElements);

#ifdef PREALLOCATE_IN_JS_HEAP
	JSObject *jsArray;
	if (NS_FAILED(CreateAlignedTA(JS_GetTypedArrayType(tArray, cx), length, &jsArray, cx))) {
		return NS_ERROR_NOT_AVAILABLE;
	}
	if (!jsArray) {
		DEBUG_LOG_STATUS("AllocateData", "Cannot create typed array");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	cl_mem memObj = CreateBuffer( CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, 
                                  JS_GetTypedArrayByteLength(jsArray, cx), GetPointerFromTA(jsArray, cx), &err_code);
#else /* PREALLOCATE_IN_JS_HEAP */
	JSObject *jsArray =  nullptr;
	cl_mem memObj = CreateBuffer(cx, CL_MEM_READ_WRITE, length * bytePerElements, NULL, &err_code);
#endif /* PREALLOCATE_IN_JS_HEAP */
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("AllocateData", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	result = data->InitCData(cx, cmdQueue, memObj, JS_GetArrayBufferViewType(tArray), length, length * bytePerElements, jsArray);

	if (NS_SUCCEEDED(result)) {
		data.forget((dpoCData **) _retval);
	}

    return result;
}
示例#3
0
/* [implicit_jscontext] dpoIData allocateData2 (in dpoIData templ, [optional] in uint32_t length); */
NS_IMETHODIMP dpoCContext::AllocateData2(dpoIData *templ, uint32_t length, JSContext *cx, dpoIData **_retval) 
{
	// this cast is only safe as long as no other implementations of the dpoIData interface exist
	dpoCData *cData = (dpoCData *) templ;
	cl_int err_code;
	nsresult result;
	size_t bytePerElements;
	nsCOMPtr<dpoCData> data;
#ifdef PREALLOCATE_IN_JS_HEAP
	jsval jsBuffer;
#endif /* PREALLOCATE_IN_JS_HEAP */

	data = new dpoCData( this);
	if (data == NULL) {
		DEBUG_LOG_STATUS("AllocateData2", "Cannot create new dpoCData object");
		return NS_ERROR_OUT_OF_MEMORY;
	}
	
	if (length == 0) {
		DEBUG_LOG_STATUS("AllocateData2", "length not provided, assuming template's size");
		length = cData->GetLength();
	}

	bytePerElements = cData->GetSize() / cData->GetLength();

	DEBUG_LOG_STATUS("AllocateData2", "length " << length << " bytePerElements " << bytePerElements);

#ifdef PREALLOCATE_IN_JS_HEAP
	JSObject *jsArray;
	if (NS_FAILED(CreateAlignedTA(cData->GetType(), length, &jsArray, cx))) {
		return NS_ERROR_NOT_AVAILABLE;
	}
	if (!jsArray) {
		DEBUG_LOG_STATUS("AllocateData2", "Cannot create typed array");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	cl_mem memObj = CreateBuffer(CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, 
                                 JS_GetTypedArrayByteLength(jsArray, cx), JS_GetArrayBufferViewData(jsArray, cx), &err_code);
#else /* PREALLOCATE_IN_JS_HEAP */
	JSObject *jsArray = NULL;
	cl_mem memObj = CreateBuffer(cx, CL_MEM_READ_WRITE, length * bytePerElements, NULL, &err_code);
#endif /* PREALLOCATE_IN_JS_HEAP */
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("AllocateData2", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	result = data->InitCData(cx, cmdQueue, memObj, cData->GetType(), length, length * bytePerElements, jsArray);

	if (NS_SUCCEEDED(result)) {
		data.forget((dpoCData **) _retval);
	}
		
    return result;
}
示例#4
0
void dpoCContext::RecordEndOfRoundTrip(dpoIContext *parent) {
	dpoCContext *self = (dpoCContext *) parent;
	if (self->wrt_exec_start.QuadPart == -1) {
		DEBUG_LOG_STATUS("RecordEndOfRoundTrip", "no previous start data");
		return;
	}
	if (!QueryPerformanceCounter(&(self->wrt_exec_end))) {
		DEBUG_LOG_STATUS("RecordEndOfRoundTrip", "querying performance counter failed");
		self->wrt_exec_start.QuadPart = -1;
		self->wrt_exec_end.QuadPart = -1;
	}
}
示例#5
0
template<> Uint8ClampedArray* CData::getValue<Uint8ClampedArray>()
{
    cl_int err_code;
#ifdef PREALLOCATE_IN_JS_HEAP
    void* mem;
#endif // PREALLOCATE_IN_JS_HEAP

    if (m_theUint8ClampedArray.get()) {
#ifdef PREALLOCATE_IN_JS_HEAP
        if (false && !m_isMapped) {
            DEBUG_LOG_STATUS("getValue", "memory is " << m_theUint8ClampedArray.get());
            void* mem = clEnqueueMapBuffer(m_queue, m_memObj, CL_TRUE, CL_MAP_READ, 0, m_size, 0, 0, 0, &err_code);

            if (err_code != CL_SUCCESS) {
                DEBUG_LOG_ERROR("getValue", err_code);
                return 0;
            }
#ifndef DEBUG_OFF
            if (mem != m_theUint8ClampedArray->data())
                DEBUG_LOG_STATUS("getValue", "EnqueueMap returned wrong pointer");
#endif // DEBUG_OFF
            m_isMapped = true;
        }
#endif // PREALLOCATE_IN_JS_HEAP
        return m_theUint8ClampedArray.get();
    } else {
#ifdef INCREMENTAL_MEM_RELEASE
        checkFree();
#endif // INCREMENTAL_MEM_RELEASE

        if (m_parent->createAlignedTA<Uint8ClampedArray, unsigned char>(m_type, m_length, m_theUint8ClampedArray) != RT_OK)
            return 0;

        if (!m_theUint8ClampedArray) {
            DEBUG_LOG_STATUS("getValue", "Cannot create typed array");
            return 0;
        }

        err_code = enqueueReadBuffer(m_size, m_theUint8ClampedArray->data());

        if (err_code != CL_SUCCESS) {
            DEBUG_LOG_ERROR("getValue", err_code);
            m_theUint8ClampedArray.clear();
            return 0;
        }

        DEBUG_LOG_STATUS("getValue", "materialized typed array");

        return m_theUint8ClampedArray.get();
    }
}
示例#6
0
void dpoCContext::RecordBeginOfRoundTrip(dpoIContext *parent) {
	dpoCContext *self = (dpoCContext *) parent;
	if (!QueryPerformanceCounter(&(self->wrt_exec_start))) {
		DEBUG_LOG_STATUS("RecordBeginOfRoundTrip", "querying performance counter failed");
		self->wrt_exec_start.QuadPart = -1;
	}
}
示例#7
0
template<> unsigned CData::initCData<Uint8ClampedArray>(cl_command_queue aQueue, cl_mem aMemObj, ArrayBufferView::ViewType aType, unsigned aLength, unsigned aSize, PassRefPtr<Uint8ClampedArray> anArray)
{
    cl_int err_code;

    m_type = aType;
    m_length = aLength;
    m_size = aSize;
    m_memObj = aMemObj;

    if (anArray.get())
        m_theUint8ClampedArray = anArray;
    else
        m_theUint8ClampedArray.clear();

    DEBUG_LOG_STATUS("initCData", "queue is " << aQueue << " buffer is " << aMemObj);

    err_code = clRetainCommandQueue(m_queue);
    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("initCData", err_code);
        // We should really fail here but a bug in the whatif OpenCL
        // makes the above retain operation always fail.
        m_isRetained = false;
    } else
        m_isRetained = true;
    m_queue = aQueue;

    return RT_OK;
}
示例#8
0
void CL_CALLBACK dpoCContext::CollectTimings( cl_event event, cl_int status, void *data)
{
	cl_int result;
	dpoCContext *instance = (dpoCContext *) data;
	
	DEBUG_LOG_STATUS("CollectTimings", "enquiring for runtimes...");

	result = clGetEventProfilingInfo( event, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &(instance->clp_exec_start), NULL);
	if (result != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CollectTimings", result);
		instance->clp_exec_start = 0;
	}

	result = clGetEventProfilingInfo( event, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &(instance->clp_exec_end), NULL);
	if (result != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CollectTimings", result);
		instance->clp_exec_end = 0;
	}

	DEBUG_LOG_STATUS("CollectTimings", "Collected start " << instance->clp_exec_start << " and end " << instance->clp_exec_end);
}
示例#9
0
CData::~CData()
{
    DEBUG_LOG_DESTROY("CData", this);
    if (m_memObj) {
#ifdef INCREMENTAL_MEM_RELEASE
        deferFree(m_memObj);
#else // INCREMENTAL_MEM_RELEASE
        clReleaseMemObject(m_memObj);
#endif // INCREMENTAL_MEM_RELEASE
    }
    if (m_queue && m_isRetained) {
        DEBUG_LOG_STATUS("~CData", "releasing queue object");
        clReleaseCommandQueue(m_queue);
    }
    m_parent = 0;
}
示例#10
0
/* void setArgument (in PRUint32 number, in dpoIData argument); */
NS_IMETHODIMP dpoCKernel::SetArgument(PRUint32 number, dpoIData *argument)
{
    cl_int err_code;
    cl_mem buffer;

    /* skip internal arguments */
    number = number + DPO_NUMBER_OF_ARTIFICIAL_ARGS;

    buffer = ((dpoCData *) argument)->GetContainedBuffer();
    DEBUG_LOG_STATUS("SetArgument", "buffer is " << buffer);

    err_code = clSetKernelArg(kernel, number, sizeof(cl_mem), &buffer);

    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("SetArgument", err_code);
        return NS_ERROR_INVALID_ARG;
    }

    return NS_OK;
}
示例#11
0
/* readonly attribute uint64_t lastRoundTripTime; */
NS_IMETHODIMP dpoCContext::GetLastRoundTripTime(uint64_t *_retval) 
{
#ifdef WINDOWS_ROUNDTRIP
	if ((wrt_exec_start.QuadPart == -1) || (wrt_exec_end.QuadPart == -1)) {
		*_retval = 0;
		return NS_ERROR_NOT_AVAILABLE;
	} else {
		LARGE_INTEGER freq;
		if (!QueryPerformanceFrequency(&freq)) {
			DEBUG_LOG_STATUS("GetLastRoundTrupTime", "cannot read performance counter frequency.");
			return NS_ERROR_NOT_AVAILABLE;
		}
		double diff = (double) (wrt_exec_end.QuadPart - wrt_exec_start.QuadPart);
		double time = diff / (double) freq.QuadPart * 1000000000;
		*_retval = (uint64_t) time;
		return NS_OK;
	}
#else /* WINDOWS_ROUNDTRIP */
	return NS_ERROR_NOT_IMPLEMENTED;
#endif /* WINDOWS_ROUNDTRIP */
}
示例#12
0
/* void setScalarArgument (in PRUint32 number, in jsval argument); */
NS_IMETHODIMP dpoCKernel::SetScalarArgument(PRUint32 number, const jsval & argument,
        const jsval & isInteger, const jsval & highPrecision)
{
    cl_int err_code;
    bool isIntegerB;
    bool isHighPrecisionB;

    /* skip internal arguments */
    number = number + DPO_NUMBER_OF_ARTIFICIAL_ARGS;

    if (!JSVAL_IS_BOOLEAN(isInteger)) {
        DEBUG_LOG_STATUS("SetScalarArgument", "illegal isInteger argument.");

        return NS_ERROR_INVALID_ARG;
    }
    isIntegerB = JSVAL_TO_BOOLEAN(isInteger);

    if (!JSVAL_IS_BOOLEAN(highPrecision)) {
        DEBUG_LOG_STATUS("SetScalarArgument", "illegal highPrecision argument.");

        return NS_ERROR_INVALID_ARG;
    }
    isHighPrecisionB = JSVAL_TO_BOOLEAN(highPrecision);

    if (!JSVAL_IS_NUMBER(argument)) {
        DEBUG_LOG_STATUS("SetScalarArgument", "illegal number argument.");

        return NS_ERROR_INVALID_ARG;
    }

    if (JSVAL_IS_INT(argument)) {
        int value = JSVAL_TO_INT(argument);
        DEBUG_LOG_STATUS("SetScalarArgument", "(JSVAL_IS_INT(argument)) isIntegerB: " << isIntegerB  << " isHighPrecisionB " << isHighPrecisionB);

        if (isIntegerB) {
            DEBUG_LOG_STATUS("SetScalarArgument", "(JSVAL_IS_INT(argument)) setting integer argument " << number << " to integer value " << value);
            cl_int intVal = (cl_int) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_int), &intVal);
        } else if (isHighPrecisionB) {
            DEBUG_LOG_STATUS("SetScalarArgument", "setting double argument " << number << " to integer value " << value);
            cl_double doubleVal = (cl_double) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_double), &doubleVal);
        } else {
            DEBUG_LOG_STATUS("SetScalarArgument", "setting float argument " << number << " to integer value " << value);
            cl_float floatVal = (cl_float) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_float), &floatVal);
        }

        if (err_code != CL_SUCCESS) {
            DEBUG_LOG_ERROR("SetScalarArgument", err_code);
            return NS_ERROR_NOT_AVAILABLE;
        }
    } else if (JSVAL_IS_DOUBLE(argument)) {
        double value = JSVAL_TO_DOUBLE(argument);
        DEBUG_LOG_STATUS("SetScalarArgument", "(JSVAL_IS_DOUBLE(argument)) isIntegerB: " << isIntegerB  << " isHighPrecisionB " << isHighPrecisionB);

        if (isIntegerB) {
            DEBUG_LOG_STATUS("SetScalarArgument", "setting int formal argument " << number << " using double value " << value);
            cl_int intVal = (cl_int) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_int), &intVal);
        } else if (isHighPrecisionB) {
            DEBUG_LOG_STATUS("SetScalarArgument", "setting double formal argument " << number << " using double value " << value);
            cl_double doubleVal = (cl_double) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_double), &doubleVal);
        } else {
            DEBUG_LOG_STATUS("SetScalarArgument", "setting float formal argument " << number << " using double value " << value);
            cl_float floatVal = (cl_float) value;
            err_code = clSetKernelArg(kernel, number, sizeof(cl_float), &floatVal);
        }

        if (err_code != CL_SUCCESS) {
            DEBUG_LOG_ERROR("SetScalarArgument", err_code);
            return NS_ERROR_NOT_AVAILABLE;
        }
    } else {
        DEBUG_LOG_STATUS("SetScalarArgument", "illegal number argument.");

        return NS_ERROR_INVALID_ARG;
    }

    return NS_OK;
}
示例#13
0
void OCLUtil::Init() {
  cl_int error = 0;   // Used to handle error codes
  cl_uint numberOfPlatforms;
  cl_uint nplatforms;
  cl_device_id device;
  size_t length;

  createContextSuccess = false;
  createCommandQueueSuccess = false;
  openclModule = 0;

  // Load OpenCL library
  openclModule = LoadLibrary(TEXT("OpenCL.dll"));
  if (!openclModule) {
      DEBUG_LOG_ERROR("Init", "Load OpenCL.dll failed.");
      return;
  }

  // Initialize function entries
#define INITIALIZE_FUNCTION_ENTRY(name) checkFunction(__##name = (name##Function) GetProcAddress(openclModule, #name));
  OPENCL_FUNCTION_LIST(INITIALIZE_FUNCTION_ENTRY)
#undef INITIALIZE_FUNCTION_ENTRY

  openclFlag = true;
  // Platform
  error = clGetPlatformIDs( 0, 0, &nplatforms);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform number error: " << error);
      return;
  }
  cl_platform_id* m_platforms = new cl_platform_id[nplatforms];
  error = clGetPlatformIDs(nplatforms, m_platforms, &numberOfPlatforms);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform id error: " << error);
      delete [] m_platforms;
      return;
  }

  const cl_uint maxNameLength = 256;
  char name[maxNameLength];
  for (cl_uint i = 0; i < numberOfPlatforms; i++) {
      error = clGetPlatformInfo(m_platforms[i], CL_PLATFORM_NAME, maxNameLength * sizeof(char), name, 0);
      if (error != CL_SUCCESS) {
          DEBUG_LOG_ERROR("Init", "Get platform name error: " << error);
      } else {
          if (!strcmp(name, "Intel(R) OpenCL") || !strcmp(name, "AMD Accelerated Parallel Processing")) {
              platform_ = m_platforms[i];
              break;
          }
      }
  }
  delete [] m_platforms;
  if (!platform_) {
      DEBUG_LOG_ERROR("Init", "Find Intel or AMD platform failed.");
      return;
  }

  // Version
  char* temp;
  error = getPlatformPropertyHelper(CL_PLATFORM_VERSION, temp);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform version error: " << error);
  } else {
      version_ =  std::string(temp);
      delete [] temp;
  }
  // Name
  error = getPlatformPropertyHelper(CL_PLATFORM_NAME, temp);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform name error: " << error);
  } else {
      name_ = std::string(temp);
      delete [] temp;
  }
  // Vendor
  error = getPlatformPropertyHelper(CL_PLATFORM_VENDOR, temp);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform vendor error: " << error);
  } else {
      vendor_ = std::string(temp);
      delete [] temp;
  }
  // Profile
  error = getPlatformPropertyHelper(CL_PLATFORM_PROFILE, temp);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform profile error: " << error);
  } else {
      profile_ = std::string(temp);
      delete [] temp;
  }
  // Platform Extensions
  error = getPlatformPropertyHelper(CL_PLATFORM_EXTENSIONS, temp);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get platform extension error: " << error);
  } else {
      platformExtensions_ = std::string(temp);
      delete [] temp;
  }

  // Number of Device
  cl_uint number;
  error = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_ALL, 0, 0, &number);
  if (error != CL_SUCCESS) {
     DEBUG_LOG_ERROR("Init", "Get device number error: " << error);
  } else {
     numberOfDevices_ = number;
  }

  // Context
  cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_, 0};
  context_ = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, &reportCLError, this, &error);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Create context error: " << error);
      return;
  }
  createContextSuccess = true;

  // Device
  size_t cb;
  error = clGetContextInfo(context_, CL_CONTEXT_DEVICES, 0, 0, &cb);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get context device number error: " << error);
      return;
  }
  cl_device_id* devices = (cl_device_id*)malloc(sizeof(cl_device_id) * cb);
  if (!devices) {
      DEBUG_LOG_STATUS("Init", "Cannot allocate device list");
      return;
  }
  error = clGetContextInfo(context_, CL_CONTEXT_DEVICES, cb, devices, 0);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get context device info error: " << error);
      free(devices);
      return;
  }

  // Command Queue
  queue_ = clCreateCommandQueue(context_, devices[0],
#ifdef CLPROFILE
      CL_QUEUE_PROFILING_ENABLE |
#endif // CLPROFILE
#ifdef OUTOFORDERQUEUE
      CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
#endif // OUTOFORDERQUEUE
      0,
      &error);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Create command queue error: " << error);
      free(devices);
      return;
  }
  DEBUG_LOG_STATUS("Init", "queue is " << queue_);
  createCommandQueueSuccess = true;

  error = clGetDeviceInfo(devices[0], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignmentSize_), &alignmentSize_, 0);
  if (error != CL_SUCCESS) {
      // We can tolerate this, simply do not align.
      alignmentSize_ = 8;
  }
  // We use byte, not bits.
  if (alignmentSize_ % 8) {
      // They align on sub-byte borders? Odd architecture this must be. Give up.
      alignmentSize_ = 1;
  } else {
      alignmentSize_ = alignmentSize_ / 8;
  }

  // Device Extensions
  error = clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, 0);
  if (error != CL_SUCCESS) {
      DEBUG_LOG_ERROR("Init", "Get command queue device error: " << error);
  } else {
      error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, 0, &length);
      if (error == CL_SUCCESS) {
          temp = new char[length+1];
          error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, length, temp, 0);
          deviceExtensions_ = std::string(temp);
          delete [] temp;
      } else {
          DEBUG_LOG_ERROR("Init", "Get device extension error: " << error);
      }
  }
  temp = 0;

  free(devices);
}
示例#14
0
/* [implicit_jscontext] dpoIData mapData (in jsval source); */
NS_IMETHODIMP dpoCContext::MapData(const jsval & source, JSContext *cx, dpoIData **_retval)
{
  cl_int err_code;
  nsresult result;
  JSObject *tArray;
  nsCOMPtr<dpoCData> data;

  result = ExtractArray( source, &tArray, cx);
  if (NS_SUCCEEDED(result)) {
    // we have a typed array
    data = new dpoCData( this);
    if (data == NULL) {
      DEBUG_LOG_STATUS("MapData", "Cannot create new dpoCData object");
      return NS_ERROR_OUT_OF_MEMORY;
    }

    // USE_HOST_PTR is save as the CData object will keep the associated typed array alive as long as the
    // memory buffer lives
    cl_mem_flags flags = CL_MEM_READ_ONLY;
    void *tArrayBuffer = NULL;
    size_t arrayByteLength = JS_GetTypedArrayByteLength(tArray, cx);
    if(arrayByteLength == 0) {
        arrayByteLength = 1;
    }
    else {
        tArrayBuffer = GetPointerFromTA(tArray, cx);
        flags |= CL_MEM_USE_HOST_PTR;
    }

    cl_mem memObj = CreateBuffer(flags, arrayByteLength, tArrayBuffer , &err_code);
    if (err_code != CL_SUCCESS) {
      DEBUG_LOG_ERROR("MapData", err_code);
      return NS_ERROR_NOT_AVAILABLE;
    }

    result = data->InitCData(cx, cmdQueue, memObj, JS_GetTypedArrayType(tArray, cx), JS_GetTypedArrayLength(tArray, cx), 
        JS_GetTypedArrayByteLength(tArray, cx), tArray);

#ifdef SUPPORT_MAPPING_ARRAYS
  } else if (JSVAL_IS_OBJECT(source)) {
    // maybe it is a regular array. 
    //
    // WARNING: We map a pointer to the actual array here. All this works on CPU only
    //          and only of the OpenCL compiler knows what to do! For the current Intel OpenCL SDK
    //          this works but your milage may vary.
    const jsval *elems = UnsafeDenseArrayElements(cx, JSVAL_TO_OBJECT(source));
    if (elems != NULL) {
      data = new dpoCData( this);
      if (data == NULL) {
        DEBUG_LOG_STATUS("MapData", "Cannot create new dpoCData object");
        return NS_ERROR_OUT_OF_MEMORY;
      }
	  cl_mem memObj = CreateBuffer(CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, sizeof(double *), &elems, &err_code);
      if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("MapData", err_code);
        return NS_ERROR_NOT_AVAILABLE;
      }
      result = data->InitCData(cx, cmdQueue, memObj, 0 /* bogus type */, 1, sizeof(double *), JSVAL_TO_OBJECT(source));
#ifndef DEBUG_OFF
    } else {
        DEBUG_LOG_STATUS("MapData", "No elements returned!");
#endif /* DEBUG_OFF */
    }
#endif /* SUPPORT_MAPPING_ARRAYS */
  }

  if (NS_SUCCEEDED(result)) {
    data.forget((dpoCData **)_retval);
  }

  return result;
}
示例#15
0
/* dpoIKernel compileKernel (in AString source, in AString kernelName, [optional] in AString options); */
NS_IMETHODIMP dpoCContext::CompileKernel(const nsAString & source, const nsAString & kernelName, const nsAString & options, dpoIKernel **_retval)
{
	cl_program program;
	cl_kernel kernel;
	cl_int err_code, err_code2;
	cl_uint numDevices;
	cl_device_id *devices = NULL;
	size_t actual;
	char *sourceStr, *optionsStr, *kernelNameStr;
	nsCOMPtr<dpoCKernel> ret;
	nsresult result;

	sourceStr = ToNewUTF8String(source);
	DEBUG_LOG_STATUS("CompileKernel", "Source: " << sourceStr);
	program = clCreateProgramWithSource(context, 1, (const char**)&sourceStr, NULL, &err_code);
	nsMemory::Free(sourceStr);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		return NS_ERROR_ILLEGAL_VALUE;
	}

	optionsStr = ToNewUTF8String(options);
	err_code = clBuildProgram(program, 0, NULL, optionsStr, NULL, NULL);
	nsMemory::Free(optionsStr);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
	}
		
	err_code2 = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code2);
		goto FAIL;
	} 

	devices = (cl_device_id *) nsMemory::Alloc(numDevices * sizeof(cl_device_id));
	err_code2 = clGetProgramInfo(program, CL_PROGRAM_DEVICES, numDevices * sizeof(cl_device_id), devices, NULL);
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		goto FAIL;
	} 
	err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual);
	if (actual > buildLogSize) {
		if (buildLog != NULL) {
			nsMemory::Free(buildLog);
		}
		buildLog = (char *) nsMemory::Alloc(actual * sizeof(char));
		if (buildLog == NULL) {
			DEBUG_LOG_STATUS("CompileKernel", "Cannot allocate buildLog");
			buildLogSize = 0;
			goto DONE;
		}
		buildLogSize = actual;
		err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual);
	}
			
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		goto FAIL;
	}

	DEBUG_LOG_STATUS("CompileKernel", "buildLog: " << buildLog);
	goto DONE;

FAIL:
	if (buildLog != NULL) {
		nsMemory::Free(buildLog);
		buildLog = NULL;
		buildLogSize = 0;
	}

DONE:
	if (devices != NULL) {
		nsMemory::Free(devices);
	}
	
	kernelNameStr = ToNewUTF8String(kernelName);
	kernel = clCreateKernel(program, kernelNameStr, &err_code);
	nsMemory::Free( kernelNameStr);
	clReleaseProgram(program);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	ret = new dpoCKernel(this);
	if (ret == NULL) {
		clReleaseKernel(kernel);
		DEBUG_LOG_STATUS("CompileKernel", "Cannot create new dpoCKernel object");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	/* all kernels share the single buffer for the failure code */
	result = ret->InitKernel(cmdQueue, kernel, kernelFailureMem);
	if (NS_FAILED(result)) {
		clReleaseKernel(kernel);
		return result;
	}

	ret.forget((dpoCKernel **)_retval);
	
	return NS_OK;
}
示例#16
0
nsresult dpoCContext::InitContext(cl_platform_id platform)
{
	cl_int err_code;
	cl_device_id *devices;
	size_t cb;

#ifdef INCREMENTAL_MEM_RELEASE
	defer_list = (cl_mem *)nsMemory::Alloc(DEFER_LIST_LENGTH * sizeof(cl_mem));
	defer_pos = 0;
	defer_max = DEFER_LIST_LENGTH;
#endif /* INCREMENTAL_MEM_RELEASE */

	cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, NULL};
	
	context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, ReportCLError, this, &err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	err_code = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	devices = (cl_device_id *)nsMemory::Alloc(sizeof(cl_device_id)*cb);
	if (devices == NULL) {
		DEBUG_LOG_STATUS("InitContext", "Cannot allocate device list");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	err_code = clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		nsMemory::Free(devices);
		return NS_ERROR_NOT_AVAILABLE;
	}

	cmdQueue = clCreateCommandQueue(context, devices[0], 
#ifdef CLPROFILE 
		CL_QUEUE_PROFILING_ENABLE |
#endif /* CLPROFILE */
#ifdef OUTOFORDERQUEUE
		CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
#endif /* OUTOFORDERQUEUE */
		0,
		&err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		nsMemory::Free(devices);
		return NS_ERROR_NOT_AVAILABLE;
	}
	
	DEBUG_LOG_STATUS("InitContext", "queue is " << cmdQueue);

	err_code = clGetDeviceInfo(devices[0], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment_size), &alignment_size, NULL);
	if (err_code != CL_SUCCESS) {
		/* we can tolerate this, simply do not align */
		alignment_size = 8;
	} 
	/* we use byte, not bits */
	if (alignment_size % 8) {
		/* they align on sub-byte borders? Odd architecture this must be. Give up */
		alignment_size = 1;
	} else {
		alignment_size = alignment_size / 8;
	}

	nsMemory::Free(devices);

	kernelFailureMem = CreateBuffer(CL_MEM_READ_WRITE, sizeof(int), NULL, &err_code);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("InitContext", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	return NS_OK;
}
示例#17
0
/* PRUint32 run (in PRUint32 rank, [array, size_is (rank)] in PRUint32 shape, [array, size_is (rank), optional] in PRUint32 tile); */
NS_IMETHODIMP dpoCKernel::Run(PRUint32 rank, PRUint32 *shape, PRUint32 *tile, PRUint32 *_retval)
{
    cl_int err_code;
    cl_event runEvent, readEvent, writeEvent;
    size_t *global_work_size;
    size_t *local_work_size;
    const int zero = 0;

    DEBUG_LOG_STATUS("Run", "preparing execution of kernel");

    if (sizeof(size_t) == sizeof(PRUint32)) {
        global_work_size = (size_t *) shape;
    } else {
        global_work_size = (size_t *) nsMemory::Alloc(rank * sizeof(size_t));
        if (global_work_size == NULL) {
            DEBUG_LOG_STATUS("Run", "allocation of global_work_size failed");
            return NS_ERROR_OUT_OF_MEMORY;
        }
        for (int cnt = 0; cnt < rank; cnt++) {
            global_work_size[cnt] = shape[cnt];
        }
    }

#ifdef USE_LOCAL_WORKSIZE
    if (tile == NULL) {
        local_work_size = NULL;
    } else {
        if ((sizeof(size_t) == sizeof(PRUint32))) {
            local_work_size = (size_t *) tile;
        } else {
            local_work_size = (size_t *) nsMemory::Alloc(rank * sizeof(size_t));
            if (local_work_size == NULL) {
                DEBUG_LOG_STATUS("Run", "allocation of local_work_size failed");
                return NS_ERROR_OUT_OF_MEMORY;
            }
            for (int cnt = 0; cnt < rank; cnt++) {
                local_work_size[cnt] = (size_t) tile[cnt];
            }
        }
    }
#else /* USE_LOCAL_WORKSIZE */
    local_work_size = NULL;
#endif /* USE_LOCAL_WORKSIZE */

    DEBUG_LOG_STATUS("Run", "setting failure code to 0");

    err_code = clEnqueueWriteBuffer(cmdQueue, failureMem, CL_FALSE, 0, sizeof(int), &zero, 0, NULL, &writeEvent);
    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }

    DEBUG_LOG_STATUS("Run", "enqueing execution of kernel");

#ifdef WINDOWS_ROUNDTRIP
    dpoCContext::RecordBeginOfRoundTrip(parent);
#endif /* WINDOWS_ROUNDTRIP */

    err_code = clEnqueueNDRangeKernel(cmdQueue, kernel, rank, NULL, global_work_size, NULL, 1, &writeEvent, &runEvent);
    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }

    DEBUG_LOG_STATUS("Run", "reading failure code");

    err_code = clEnqueueReadBuffer(cmdQueue, failureMem, CL_FALSE, 0, sizeof(int), _retval, 1, &runEvent, &readEvent);
    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }

    DEBUG_LOG_STATUS("Run", "waiting for execution to finish");

    // For now we always wait for the run to complete.
    // In the long run, we may want to interleave this with JS execution and only sync on result read.
    err_code = clWaitForEvents( 1, &readEvent);

    DEBUG_LOG_STATUS("Run", "first event fired");

    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }
#ifdef WINDOWS_ROUNDTRIP
    dpoCContext::RecordEndOfRoundTrip(parent);
#endif /* WINDOWS_ROUNDTRIP */

#ifdef CLPROFILE
#ifdef CLPROFILE_ASYNC
    err_code = clSetEventCallback( readEvent, CL_COMPLETE, &dpoCContext::CollectTimings, parent);

    DEBUG_LOG_STATUS("Run", "second event fired");
    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }
#else /* CLPROFILE_ASYNC */
    dpoCContext::CollectTimings(readEvent,CL_COMPLETE,parent);
#endif /* CLPROFILE_ASYNC */
#endif /* CLPROFILE */

    DEBUG_LOG_STATUS("Run", "execution completed successfully, start cleanup");

    if (global_work_size != (size_t *) shape) {
        nsMemory::Free(global_work_size);
    }
#ifdef USE_LOCAL_WORKSIZE
    if (local_work_size != (size_t *) tile) {
        nsMemory::Free(local_work_size);
    }
#endif /* USE_LOCAL_WORKSIZE */

    err_code = clReleaseEvent(readEvent);
    err_code = clReleaseEvent(runEvent);
    err_code = clReleaseEvent(writeEvent);

    if (err_code != CL_SUCCESS) {
        DEBUG_LOG_ERROR("Run", err_code);
        return NS_ERROR_ABORT;
    }

    DEBUG_LOG_STATUS("Run", "cleanup complete");

    return NS_OK;
}