예제 #1
0
BLARGG_EXPORT int fex_err_code( fex_err_t err )
{
	int code = err_code( err );
	return (code >= 0 ? code : fex_err_generic);
}
예제 #2
0
int main(int argc, char** argv)
{
	if (argc != 2)
	{
		std::cout << "Usage: ./pi_vocl num\n"
		          << "\twhere num = 1, 4 or 8\n";
		return EXIT_FAILURE;
	}

	int vector_size = atoi(argv[1]);

	// Define some vector size specific constants
	unsigned int ITERS, WGS;
	if (vector_size == 1)
	{
		ITERS = 262144;
		WGS = 8;
	}
	else if (vector_size == 4)
	{
		ITERS = 262144 / 4;
		WGS = 32;
	}
	else if (vector_size == 8)
	{
		ITERS = 262144 / 8;
		WGS = 64;
	}
	else
	{
		std::cerr << "Invalid vector size\n";
		return EXIT_FAILURE;
	}

	// Set some default values:
	// Default number of steps (updated later to device preferable)
	unsigned int in_nsteps = INSTEPS;
	// Default number of iterations
	unsigned int niters = ITERS;
	unsigned int work_group_size = WGS;

	try
	{
		// Create context, queue and build program
		cl::Context context(DEVICE);
		cl::CommandQueue queue(context);
		cl::Program program(context, util::loadProgram("../pi_vocl.cl"), true);
		cl::Kernel kernel;

		// Now that we know the size of the work_groups, we can set the number of work
		// groups, the actual number of steps, and the step size
		unsigned int nwork_groups = in_nsteps/(work_group_size*niters);

		// Get the max work group size for the kernel pi on our device
		unsigned int max_size;
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
		if (vector_size == 1)
		{
			kernel = cl::Kernel(program, "pi");
			max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
		}
		else if (vector_size == 4)
		{
			kernel = cl::Kernel(program, "pi_vec4");
			max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
		}
		else if (vector_size == 8)
		{
			kernel = cl::Kernel(program, "pi_vec8");
			max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
		}

		if (max_size > work_group_size)
		{
			work_group_size = max_size;
			nwork_groups = in_nsteps/(nwork_groups*niters);
		}

		if (nwork_groups < 1)
		{
			nwork_groups = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
			work_group_size = in_nsteps/(nwork_groups*niters);
		}

		unsigned int nsteps = work_group_size * niters * nwork_groups;
		float step_size = 1.0f / (float) nsteps;

		// Vector to hold partial sum
		std::vector<float> h_psum(nwork_groups);

		std::cout << nwork_groups << " work groups of size " << work_group_size << ".\n"
		          << nsteps << " Integration steps\n";

        cl::Buffer d_partial_sums(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups);

        // Start the timer
        util::Timer timer;

        // Execute the kernel over the entire range of our 1d input data et
        // using the maximum number of work group items for this device
        cl::NDRange global(nwork_groups * work_group_size);
        cl::NDRange local(work_group_size);

        kernel.setArg(0, niters);
        kernel.setArg(1, step_size);
        cl::LocalSpaceArg localmem = cl::Local(sizeof(float) * work_group_size);
        kernel.setArg(2, localmem);
        kernel.setArg(3, d_partial_sums);
        queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);

        cl::copy(queue, d_partial_sums, h_psum.begin(), h_psum.end());

        // Complete the sum and compute the final integral value
        float pi_res = 0.0;
        for (std::vector<float>::iterator x = h_psum.begin(); x != h_psum.end(); x++)
            pi_res += *x;
        pi_res *= step_size;

        // Stop the timer
		double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.;
        std::cout << "The calculation ran in " << rtime << " seconds\n"
                  << " pi = " << pi_res << " for " << nsteps << " steps\n";

        return EXIT_SUCCESS;


	}
	catch (cl::Error err)
	{
		std::cout << "Exception\n";
		std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
        return EXIT_FAILURE;
	}
}
예제 #3
0
파일: LdmProxy.c 프로젝트: funnelfiasco/LDM
/*
 * Returns a new instance of an LDM proxy. Can take a while because it
 * establishes a connection to the LDM.
 *
 * Arguments:
 *      host            Identifier of the host on which an LDM server is
 *                      running.
 *      instance        Pointer to a pointer to the new instance. "*instance"
 *                      is set upon successful return.
 * Returns:
 *      0               Success. "*instance" is set.
 *      LP_SYSTEM       System error. "log_start()" called.
 *      LP_TIMEDOUT     Connection attempt timed-out. "log_start()" called.
 *      LP_HOSTUNREACH  Host is unreachable. "log_start()" called.
 *      LP_RPC_ERROR    RPC error. "log_start()" called.
 *      LP_LDM_ERROR    LDM error. "log_start()" called.
 */
LdmProxyStatus
lp_new(
    const char* const   host,
    LdmProxy** const    instance)
{
    LdmProxyStatus      status = 0;     /* success */
    size_t              nbytes = sizeof(LdmProxy);
    LdmProxy*           proxy = (LdmProxy*)malloc(nbytes);

    if (NULL == proxy) {
        log_serror("Couldn't allocate %lu bytes for new LdmProxy", nbytes);
        status = LP_SYSTEM;
    }
    else {
        proxy->host = strdup(host);

        if (NULL == proxy->host) {
            LOG_SERROR1("Couldn't duplicate string \"%s\"", host);
            status = LP_SYSTEM;
        }
        else {
            CLIENT*         clnt = NULL;
            ErrorObj*       error = ldm_clnttcp_create_vers(host, LDM_PORT, 6,
                    &clnt, NULL, NULL);

            if (!error) {
                proxy->version = 6;
                proxy->hiya = my_hiya_6;
                proxy->send = my_send_6;
                proxy->flush = my_flush_6;
            }
            else if (LDM_CLNT_BAD_VERSION == err_code(error)) {
                /* Couldn't connect due to protocol version. */
                err_free(error);

                error = ldm_clnttcp_create_vers(host, LDM_PORT, 5,
                        &clnt, NULL, NULL);

                if (!error) {
                    proxy->version = 5;
                    proxy->hiya = my_hiya_5;
                    proxy->send = my_send_5;
                    proxy->flush = my_flush_5;
                }
            }

            if (error) {
                LOG_START1("%s", err_message(error));
                err_free(error);
                free(proxy->host);
                status = convertStatus(error);
            }
            else {
                proxy->clnt = clnt;
                proxy->rpcTimeout = rpcTimeout;
            }
        }                                       /* "proxy->host" allocated */

        if (LP_OK == status) {
            *instance = proxy;
        }
        else {
            free(proxy);
        }
    }                                           /* "proxy" allocated */

    return status;
}
예제 #4
0
int main(void)
{
    std::vector<float> h_a(LENGTH);                // a vector 
    std::vector<float> h_b(LENGTH);                // b vector 	
    std::vector<float> h_c(LENGTH, 0xdeadbeef);    // c = a + b, from compute device

    cl::Buffer d_a;                        // device memory used for the input  a vector
    cl::Buffer d_b;                        // device memory used for the input  b vector
    cl::Buffer d_c;                       // device memory used for the output c vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int> vadd(program, "vadd");

        d_a   = cl::Buffer(context, h_a.begin(), h_a.end(), true);
        d_b   = cl::Buffer(context, h_b.begin(), h_b.end(), true);

        d_c  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        util::Timer timer;

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            count);

        queue.finish();

        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
        printf("\nThe kernels ran in %lf seconds\n", rtime);

        cl::copy(queue, d_c, h_c.begin(), h_c.end());

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++) {
            tmp = h_a[i] + h_b[i]; // expected value for d_c[i]
            tmp -= h_c[i];                      // compute errors
            if(tmp*tmp < TOL*TOL) {      // correct if square deviation is less 
                correct++;                         //  than tolerance squared
            }
            else {

                printf(
                    " tmp %f h_a %f h_b %f  h_c %f \n",
                    tmp, 
                    h_a[i], 
                    h_b[i], 
                    h_c[i]);
            }
        }

        // summarize results
        printf(
            "vector add to find C = A+B:  %d out of %d results were correct.\n", 
            correct, 
            count);
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
예제 #5
0
int main(int argc, char** argv)
{
    cl_int          err;               // error code returned from OpenCL calls

    size_t dataSize = sizeof(float) * LENGTH;
    float*       h_a = (float *)malloc(dataSize);       // a vector
    float*       h_b = (float *)malloc(dataSize);       // b vector
    float*       h_c = (float *)malloc(dataSize);       // c vector (result)
    float*       h_d = (float *)malloc(dataSize);       // d vector (result)
    float*       h_e = (float *)malloc(dataSize);       // e vector
    float*       h_f = (float *)malloc(dataSize);       // f vector (result)
    float*       h_g = (float *)malloc(dataSize);       // g vector
    unsigned int correct;           // number of correct results

    size_t global;                  // global domain size

    cl_device_id     device_id;     // compute device id
    cl_context       context;       // compute context
    cl_command_queue commands;      // compute command queue
    cl_program       program;       // compute program
    cl_kernel        ko_vadd;       // compute kernel

    cl_mem d_a;                     // device memory used for the input  a vector
    cl_mem d_b;                     // device memory used for the input  b vector
    cl_mem d_c;                     // device memory used for the output c vector
    cl_mem d_d;                     // device memory used for the output d vector
    cl_mem d_e;                     // device memory used for the input e vector
    cl_mem d_f;                     // device memory used for the output f vector
    cl_mem d_g;                     // device memory used for the input g vector

    // Fill vectors a and b with random float values
    int i = 0;
    for(i = 0; i < LENGTH; i++){
        h_a[i] = rand() / (float)RAND_MAX;
        h_b[i] = rand() / (float)RAND_MAX;
        h_e[i] = rand() / (float)RAND_MAX;
        h_g[i] = rand() / (float)RAND_MAX;
    }

    // Set up platform and GPU device

    cl_uint numPlatforms;

    // Find number of platforms
    err = clGetPlatformIDs(0, NULL, &numPlatforms);
    checkError(err, "Finding platforms");
    if (numPlatforms == 0)
    {
        printf("Found 0 platforms!\n");
        return EXIT_FAILURE;
    }

    // Get all platforms
    cl_platform_id Platform[numPlatforms];
    err = clGetPlatformIDs(numPlatforms, Platform, NULL);
    checkError(err, "Getting platforms");

    // Secure a GPU
    for (i = 0; i < numPlatforms; i++)
    {
        err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
        if (err == CL_SUCCESS)
        {
            break;
        }
    }

    if (device_id == NULL)
        checkError(err, "Getting device");

    err = output_device_info(device_id);
    checkError(err, "Outputting device info");
  
    // Create a compute context 
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    checkError(err, "Creating context");

    // Create a command queue
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    checkError(err, "Creating command queue");

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
    checkError(err, "Creating program");

    // Build the program  
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program 
    ko_vadd = clCreateKernel(program, "vadd", &err);
    checkError(err, "Creating kernel");

    // Create the input (a, b, e, g) arrays in device memory
    // NB: we copy the host pointers here too
    d_a  = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  dataSize, h_a, &err);
    checkError(err, "Creating buffer d_a");
    d_b  = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  dataSize, h_b, &err);
    checkError(err, "Creating buffer d_b");
    d_e  = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  dataSize, h_e, &err);
    checkError(err, "Creating buffer d_e");
    d_g  = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  dataSize, h_g, &err);
    checkError(err, "Creating buffer d_g");
    
    // Create the output arrays in device memory
    d_c  = clCreateBuffer(context,  CL_MEM_READ_WRITE, dataSize, NULL, &err);
    checkError(err, "Creating buffer d_c");
    d_d  = clCreateBuffer(context,  CL_MEM_READ_WRITE, dataSize, NULL, &err);
    checkError(err, "Creating buffer d_d");
    d_f  = clCreateBuffer(context,  CL_MEM_WRITE_ONLY, dataSize, NULL, &err);
    checkError(err, "Creating buffer d_f"); 

    const int count = LENGTH;

    // Enqueue kernel - first time
    // Set the arguments to our compute kernel
    err  = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_a);
    err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_b);
    err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_c);
    err |= clSetKernelArg(ko_vadd, 3, sizeof(unsigned int), &count);
    checkError(err, "Setting kernel arguments"); 
	
    // Execute the kernel over the entire range of our 1d input data set
    // letting the OpenCL runtime choose the work-group size
    global = count;
    err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL);
    checkError(err, "Enqueueing kernel 1st time");

    // Enqueue kernel - second time
    // Set different arguments to our compute kernel
    err  = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_e);
    err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_c);
    err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_d);
    checkError(err, "Setting kernel arguments");
    
    // Enqueue the kernel again    
    err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL);
    checkError(err, "Enqueueing kernel 2nd time");

    // Enqueue kernel - third time
    // Set different (again) arguments to our compute kernel
    err  = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_g);
    err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_d);
    err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_f);
    checkError(err, "Setting kernel arguments");

    // Enqueue the kernel again    
    err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL);
    checkError(err, "Enqueueing kernel 3rd time");

    // Read back the result from the compute device
    err = clEnqueueReadBuffer( commands, d_f, CL_TRUE, 0, sizeof(float) * count, h_f, 0, NULL, NULL );  
    checkError(err, "Reading back d_f");
    
    // Test the results
    correct = 0;
    float tmp;
    
    for(i = 0; i < count; i++)
    {
        tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i];     // assign element i of a+b+e+g to tmp
        tmp -= h_f[i];                               // compute deviation of expected and output result
        if(tmp*tmp < TOL*TOL)                        // correct if square deviation is less than tolerance squared
            correct++;
        else {
            printf(" tmp %f h_a %f h_b %f h_e %f h_g %f h_f %f\n",tmp, h_a[i], h_b[i], h_e[i], h_g[i], h_f[i]);
        }
    }

    // summarize results
    printf("C = A+B+E+G:  %d out of %d results were correct.\n", correct, count);

    // cleanup then shutdown
    clReleaseMemObject(d_a);
    clReleaseMemObject(d_b);
    clReleaseMemObject(d_c);
    clReleaseMemObject(d_d);
    clReleaseMemObject(d_e);
    clReleaseMemObject(d_f);
    clReleaseMemObject(d_g);
    clReleaseProgram(program);
    clReleaseKernel(ko_vadd);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    free(h_a);
    free(h_b);
    free(h_c);
    free(h_d);
    free(h_e);
    free(h_f);
    free(h_g);

    return 0;
}
예제 #6
0
파일: dusb_cmd.c 프로젝트: TC01/tilibs
static int err_code_pkt(DUSBVirtualPacket *pkt)
{
	return err_code((((uint16_t)pkt->data[0]) << 8) | pkt->data[1]);
}
예제 #7
0
파일: ldm_clnt.c 프로젝트: funnelfiasco/LDM
/*
 * Attempts to connect to an upstream LDM using a range of LDM versions.  The
 * versions are tried, in order, from highest to lowest.  This function returns
 * on the first successful attempt.  If the host is unknown or the RPC call
 * times-out, then the version-loop is prematurely terminated and this function
 * returns immediately.
 *
 * The client is responsible for freeing the client resources set by this
 * function on success.  Calls exitIfDone() after potentially lengthy
 * operations.
 *
 * Arguments:
 *   upName                The name of the upstream LDM host.
 *   port                  The port on which to connect.
 *   version               Program version.
 *   *client               Pointer to CLIENT structure. Set on success.
 *   *socket               The socket used for the connection.  May be NULL.
 *   *upAddr               The IP address of the upstream LDM host.  Set on
 *                         success.  May be NULL.
 * Returns:
 *    NULL                 Success.  *vers_out, *client, *sock_out, and *upAddr
 *                         set.
 *   !NULL                 Error. "*client" is not set. err_code(RETURN_VALUE):
 *       LDM_CLNT_UNKNOWN_HOST         Unknown upstream host.
 *       LDM_CLNT_TIMED_OUT            Call to upstream host timed-out.
 *       LDM_CLNT_BAD_VERSION          Upstream LDM isn't given version.
 *       LDM_CLNT_NO_CONNECT           Other connection-related error.
 *       LDM_CLNT_SYSTEM_ERROR         A fatal system-error occurred.
 */
ErrorObj*
ldm_clnttcp_create_vers(
    const char* const            upName,
    const unsigned               port,
    unsigned const               version,
    CLIENT** const               client,
    int* const                   socket,
    struct sockaddr_in*          upAddr)
{
    ErrorObj*           error;
    struct sockaddr_in  addr;

    assert(upName != NULL);
    assert(client != NULL);

    /*
     * Get the IP address of the upstream LDM.  This is a potentially
     * lengthy operation.
     */
    (void)exitIfDone(0);
    error = ldm_clnt_addr(upName, &addr);

    if (error) {
        error = ERR_NEW1(LDM_CLNT_UNKNOWN_HOST, error, 
            "Couldn't get IP address of host %s", upName);
    }
    else {
        int                     sock;
        int                     errCode;
        CLIENT*                 clnt = NULL;

        /*
         * Connect to the remote port.  This is a potentially lengthy
         * operation.
         */
        (void)exitIfDone(0);
        error = ldm_clnt_tcp_create(&addr, version, port, &clnt, &sock);

        if (error) {
            errCode = err_code(error);

            if (LDM_CLNT_NO_CONNECT != errCode) {
                error =
                    ERR_NEW3(errCode, error, 
                        "Couldn't connect to LDM %d on %s "
                            "using port %d",
                        version, upName, port);
            }
            else {
                err_log_and_free(
                    ERR_NEW3(0, error, 
                        "Couldn't connect to LDM %d on %s using port "
                            "%d",
                        version, upName, port),
                    ERR_INFO);

                /*
                 * Connect using the portmapper.  This is a
                 * potentially lengthy operation.
                 */
                (void)exitIfDone(0);
                error = ldm_clnt_tcp_create(&addr, version, 0, &clnt, &sock);

                if (error) {
                    error =
                        ERR_NEW2(err_code(error), error, 
                            "Couldn't connect to LDM on %s "
                            "using either port %d or portmapper",
                            upName, port);
                }                       /* portmapper failure */
            }                           /* non-fatal port failure */
        }                               /* port failure */
        else {
            assert(!error);
            /*
             * Success.  Set the return arguments.
             */
            *client = clnt;

            if (socket)
                *socket = sock;
            if (upAddr)
                *upAddr = addr;
        }                                   /* clnt != NULL */
    }                                       /* got upstream IP address */

    return error;
}
예제 #8
0
void TransformExport::Export()
{
	ModelExporter & modelExporter = ModelExporter::GetExporter();

	modelExporter.ResetTimeline();


	MStatus stat;
	MItDependencyNodes itDep(MFn::kTransform,&stat);


	while (!itDep.isDone()) 
	{
		MObject obj = itDep.item();
		MFnTransform transform(obj, &stat);
		err_code(stat);
		
		MString cmd = MString("reference -q -f ") + transform.name();
		MString file_id;
		stat = MGlobal::executeCommand( cmd, file_id );
		if( stat == MS::kSuccess ) 
		{ 
			itDep.next();
			continue;
		}

		MString transformName = transform.name(&stat);
		err_code(stat);
	
		unsigned int parentCount = transform.parentCount(&stat);
		err_code(stat);
		
		unsigned int childCount = transform.childCount(&stat);
		err_code(stat);

		bool doExport = false;
		if (childCount == 0)
		{
			doExport = true; // Tip JOINTS
		}
	
		for (unsigned int child = 0 ; child < childCount ; child++)
		{
			MObject childObj = transform.child(child,&stat);
			err_code(stat);

			MFn::Type childType = childObj.apiType();

			if ( modelExporter.CheckChildType(childType) )				
			{
				doExport = true;
				break;
			}
		}

		if (!doExport)
		{
			itDep.next();
			continue;
		}

		TransformData * pTransformData = new TransformData();
		pTransformData->name = transformName.asChar();

		MObject parentObj = transform.parent(0,&stat);
		MFn::Type parentType = parentObj.apiType();

		if ( modelExporter.CheckParentType(parentType) )
		{
			MFnDagNode parentDagNode(parentObj, &stat);
			MString parentName = parentDagNode.name(&stat);
		
			if (parentName.length() > 0)
				pTransformData->parentName = parentName.asChar();
		}

		if (parentType != MFn::kJoint && obj.apiType() == MFn::kJoint)
		{
			modelExporter.mSkeletonRoot = transform.name().asChar();
		}

		MVector translate = transform.getTranslation(MSpace::kTransform, &stat);
		err_code(stat);

		MVector pivot = transform.rotatePivotTranslation(MSpace::kTransform, &stat);
		err_code(stat);

		pTransformData->tx = (float)translate.x;
		pTransformData->ty = (float)translate.y;
		pTransformData->tz = (float)translate.z;

		pTransformData->px = (float)pivot.x;
		pTransformData->py = (float)pivot.y;
		pTransformData->pz = (float)pivot.z;

		//double rx,ry,rz,rw;
		MQuaternion quat;
		stat = transform.getRotation(quat);
		err_code(stat);

		if (transform.object().hasFn(MFn::kJoint))
		{
			err_code(stat);

			MFnIkJoint joint(transform.object(), &stat);
			err_code(stat);

			MQuaternion RO;
			MQuaternion R;
			MQuaternion JO;
			MQuaternion IS; // We dont have time for this.

			stat = joint.getScaleOrientation(RO);
			err_code(stat);
			stat = joint.getRotation(R); 
			err_code(stat);
			stat = joint.getOrientation(JO); 
			err_code(stat);
						
			quat = RO*R*JO;
		}

		//stat = transform.getRotationQuaternion(rx,ry,rz,rw, MSpace::kTransform);
		//err_code(stat);

		pTransformData->rx = -(float)quat.x;
		pTransformData->ry = -(float)quat.y;
		pTransformData->rz = -(float)quat.z;
		pTransformData->rw = (float)quat.w;

		double scale[3];
		stat = transform.getScale(scale);
		err_code(stat);

		pTransformData->sx = (float)scale[0];
		pTransformData->sy = (float)scale[1];
		pTransformData->sz = (float)scale[2];
		
		pTransformData->index = modelExporter.mTransformCount;
		modelExporter.mSceneTransforms.push_back(pTransformData);

	
		modelExporter.mSceneTransformsTable[transformName.asChar()] = modelExporter.mTransformCount;
		modelExporter.mTransformCount++;
		
		stat = itDep.next();
		err_code(stat);
	}

	
	vector<TransformData*>::iterator nodesIter = modelExporter.mSceneTransforms.begin();

	while (nodesIter != modelExporter.mSceneTransforms.end())
	{
		TransformData * pTransformData = *nodesIter;
		if (pTransformData->parentName.length() > 0)
			pTransformData->parent = modelExporter.mSceneTransformsTable[pTransformData->parentName];
		nodesIter++;
	}
	
	WriteTransforms();
}
예제 #9
0
int main(void) {
  std::vector<float> h_a(count);  // a vector
  std::vector<float> h_b(count);  // b vector
  std::vector<float> h_c(count, 0xdeadbeef);  // c = a + b

  cl::Buffer d_a;  // device memory used for the input  a vector
  cl::Buffer d_b;  // device memory used for the input  b vector
  cl::Buffer d_c;  // device memory used for the output c vector

  // Fill vectors a and b with random float values
  for (size_t i = 0; i < count; i++) {
    h_a[i] = rand_r(&seed) / static_cast<float>(UINT32_MAX);
    h_b[i] = rand_r(&seed) / static_cast<float>(UINT32_MAX);
  }

  try {
    // Create a context
    cl::Context context(DEVICE);

    // Load in kernel source, creating a program object for the context

    cl::Program program(context, util::loadProgram("vadd.cl"), true);

    // Get the command queue
    cl::CommandQueue queue(context);

    // Create the kernel functor
    auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer,
                                cl::Buffer, int>(program, "vadd");

    d_a = cl::Buffer(context, begin(h_a), end(h_a), true);
    d_b = cl::Buffer(context, begin(h_b), end(h_b), true);
    d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count);

    util::Timer timer;
    vadd(cl::EnqueueArgs(queue, cl::NDRange(count)),
         d_a, d_b, d_c, static_cast<int>(count));

    queue.finish();

    double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
    printf("\nThe kernels ran in %lf seconds\n", rtime);

    cl::copy(queue, d_c, begin(h_c), end(h_c));  // NOLINT

    // Test the results
    int correct = 0;
    float tmp;

    for (size_t i = 0; i < count; i++) {
      tmp = h_a[i] + h_b[i];  // expected value for d_c[i]
      tmp -= h_c[i];  // compute errors
      if (tmp * tmp < tolerance * tolerance)
        correct++;
      else
        printf(" tmp %f h_a %f h_b %f  h_c %f \n", tmp, h_a[i], h_b[i], h_c[i]);
    }

    // summarize results
    printf("vector add to find C = A+B:  %d out of %zu results were correct.\n",
           correct, count);
  } catch (cl::Error err) {
    std::cout << "Exception\n";
    std::cerr << "ERROR: " << err.what() << "(" << err_code(err.err()) << ")\n";
  }
}
예제 #10
0
int main(void)
{
    float *h_psum;              // vector to hold partial sum
    int in_nsteps = INSTEPS;    // default number of steps (updated later to device preferable)
    int niters = ITERS;         // number of iterations
    int nsteps;
    float step_size;
    size_t nwork_groups;
    size_t max_size, work_group_size = 8;
    float pi_res;

    cl_mem d_partial_sums;

    char *kernelsource = getKernelSource("../pi_ocl.cl");             // Kernel source

    cl_int err;
    cl_device_id     device_id;     // compute device id 
    cl_context       context;       // compute context
    cl_command_queue commands;      // compute command queue
    cl_program       program;       // compute program
    cl_kernel        kernel_pi;     // compute kernel

    // Set up OpenCL context. queue, kernel, etc.
    cl_uint numPlatforms;
    // Find number of platforms
    err = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to find a platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Get all platforms
    cl_platform_id Platform[numPlatforms];
    err = clGetPlatformIDs(numPlatforms, Platform, NULL);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to get the platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Secure a device
    for (int i = 0; i < numPlatforms; i++)
    {
        err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
        if (err == CL_SUCCESS)
            break;
    }
    if (device_id == NULL)
    {
        printf("Error: Failed to create a device group!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Output information
    err = output_device_info(device_id);
    // Create a compute context
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Create a command queue
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err);
    if (!program)
    {
        printf("Error: Failed to create compute program!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Build the program  
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }
    // Create the compute kernel from the program 
    kernel_pi = clCreateKernel(program, "pi", &err);
    if (!kernel_pi || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

    // Find kernel work-group size
    err = clGetKernelWorkGroupInfo (kernel_pi, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &work_group_size, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to get kernel work-group info\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Now that we know the size of the work-groups, we can set the number of
    // work-groups, the actual number of steps, and the step size
    nwork_groups = in_nsteps/(work_group_size*niters);

    if (nwork_groups < 1)
    {
        err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &nwork_groups, NULL);
        work_group_size = in_nsteps / (nwork_groups * niters);
    }

    nsteps = work_group_size * niters * nwork_groups;
    step_size = 1.0f/(float)nsteps;
    h_psum = calloc(sizeof(float), nwork_groups);
    if (!h_psum)
    {
        printf("Error: could not allocate host memory for h_psum\n");
        return EXIT_FAILURE;
    }

    printf(" %ld work-groups of size %ld. %d Integration steps\n",
            nwork_groups,
            work_group_size,
            nsteps);

    d_partial_sums = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups, NULL, &err);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to create buffer\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

    // Set kernel arguments
    err  = clSetKernelArg(kernel_pi, 0, sizeof(int), &niters);
    err |= clSetKernelArg(kernel_pi, 1, sizeof(float), &step_size);
    err |= clSetKernelArg(kernel_pi, 2, sizeof(float) * work_group_size, NULL);
    err |= clSetKernelArg(kernel_pi, 3, sizeof(cl_mem), &d_partial_sums);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to set kernel arguments!\n");
        return EXIT_FAILURE;
    }

    // Execute the kernel over the entire range of our 1D input data set
    // using the maximum number of work items for this device
    size_t global = nwork_groups * work_group_size;
    size_t local = work_group_size;
    double rtime = wtime();
    err = clEnqueueNDRangeKernel(
        commands,
        kernel_pi,
        1, NULL,
        &global,
        &local,
        0, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to execute kernel\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }


    err = clEnqueueReadBuffer(
        commands,
        d_partial_sums,
        CL_TRUE,
        0,
        sizeof(float) * nwork_groups,
        h_psum,
        0, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to read buffer\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

    // complete the sum and compute the final integral value on the host
    pi_res = 0.0f;
    for (unsigned int i = 0; i < nwork_groups; i++)
    {
        pi_res += h_psum[i];
    }
    pi_res *= step_size;

    rtime = wtime() - rtime;

    printf("\nThe calculation ran in %lf seconds\n", rtime);
    printf(" pi = %f for %d steps\n", pi_res, nsteps);

    // clean up
    clReleaseMemObject(d_partial_sums);
    clReleaseProgram(program);
    clReleaseKernel(kernel_pi);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);
    free(kernelsource);
    free(h_psum);
}
예제 #11
0
파일: host.c 프로젝트: rcgbac/SphereDecoder
int main(int argc, char *argv[])
{
    // Declare variables - y', L
    // Load from file? Declare within host?
	Type y[K];
	Type L[K*K];
	Type R[K];
	Type m[K];
	Complex Xml[K];
    int check_result;
	
	cl_mem input_y;
	cl_mem input_L;
	cl_mem output_xml;

    // OpenCL-specific variables
    cl_device_id        device_id;
    cl_platform_id      platform_id;
    cl_context          context;
    cl_command_queue    commands;
    cl_program          program;

    cl_kernel SDkernel;
    cl_int dev_type;

    cl_int error;
    cl_event event;

    FILE *kernel;
    char *kernelSRC;
	
	size_t global[2];
	size_t local[2];

    if(argc > 1)
    {
        kernel = argv[1];
    }
    else
        printf("\nError - must specify arguments\n");

    //--------------------------------------------------------------------------------
    // Create a context, queue and device.
    //--------------------------------------------------------------------------------

    cl_uint numPlatforms;
    // Find number of platforms
    err = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to find a platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Get all platforms
    cl_platform_id Platform[numPlatforms];
    err = clGetPlatformIDs(numPlatforms, Platform, NULL);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to get the platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Secure a device
    for (int i = 0; i < numPlatforms; i++)
    {
        err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
        if (err == CL_SUCCESS)
            break;
    }
    if (device_id == NULL)
    {
        printf("Error: Failed to create a device group!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }

    // Create a compute context
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &error);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n%s\n", err_code(error));
        return EXIT_FAILURE;
    }
    // Create a command queue
    commands = clCreateCommandQueue(context, device_id, 0, &error);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n%s\n", err_code(error));
        return EXIT_FAILURE;
    }


	// Create buffers for each argument of kernel
	// Need to add for R^2 and m
    input_y = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Type) * K, y, &err);
	input_L = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Type) * K * K, L, &err);
	output_xml = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(Complex) * K, Xml, &err);
    if (err != CL_SUCCESS)
    {
        printf("Error: failed to create buffer\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **)&kernelSRC, NULL, &error);
    if (err != CL_SUCCESS)
    {
        printf("Error: could not create program\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program
    kernel = clCreateKernel(program, "SD", &err);
    if (!kernel || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

	err = clEnqueueWriteBuffer(commands, input_y, CL_TRUE, 0, sizeof(Type)*K, y, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(commands, input_L, CL_TRUE, 0, sizeof(Type)*K*K, L, 0, NULL, NULL);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not write buffer\nError code %d\n", err);
		return EXIT_FAILURE;
	}
	
	err  = clSetKernelArg(SDkernel, 0, sizeof(cl_mem), &input_y);
	err |= clSetKernelArg(SDkernel, 1, sizeof(cl_mem), &input_L);
	err |= clSetKernelArg(SDkernel, 2, sizeof(cl_mem), &input_R);
	err |= clSetKernelArg(SDkernel, 3, sizeof(cl_mem), &output_m);
	err |= clSetKernelArg(SDkernel, 4, sizeof(cl_mem), &output_xml);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not set kernel arguments\nError code %d\n", err);
		return EXIT_FAILURE;
	}
	
	global[0] = K;
	global[1] = K;
	local[0] = K;
	local[1] = 1;
	
	
	err = clEnqueueNDRangeKernel(commands,
								 kernel,
								 2,
								 NULL,
								 (size_t*)&global,
								 (size_t*)&local,
								 0,
								 NULL,
								 &event);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not set ND range\nError code %d\n", err);
		return EXIT_FAILURE;
	}			

	clEnqueueReadBuffer(commands, output_m, CL_TRUE, 0, sizeof(Type)*K, m, 0, NULL, NULL);
	clEnqueueReadBuffer(commands, output_xml, CL_TRUE, 0, sizeof(Type)*K, m, 0, NULL, NULL);
	
	clReleaseMemObject(input_y);
	clReleaseMemObject(input_L);
	clReleaseMemObject(output_xml);
	clReleaseProgram(program);
	clReleaseKernel(SDkernel);
	clReleaseCommandQueue(commands);
	clReleaseContext(context);
	
    return EXIT_SUCCESS;
}
예제 #12
0
int main(void)
{
    std::vector<float> h_a(LENGTH);                // a vector 
    std::vector<float> h_b(LENGTH);                // b vector 	
    std::vector<float> h_c (LENGTH);               // c vector
    std::vector<float> h_r (LENGTH, 0xdeadbeef);   // d vector (result)

    cl::Buffer d_a;                       // device memory used for the input  a vector
    cl::Buffer d_b;                       // device memory used for the input  b vector
    cl::Buffer d_c;                       // device memory used for the input c vector
    cl::Buffer d_r;                       // device memory used for the output r vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
        h_c[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd_abc.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);
        d_c   = cl::Buffer(context, begin(h_c), end(h_c), true);

        d_r  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            d_r,
            count);

        cl::copy(queue, d_r, begin(h_r), end(h_r));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++)
        {
            tmp = h_a[i] + h_b[i] + h_c[i];              // assign element i of a+b+c to tmp
            tmp -= h_r[i];                               // compute deviation of expected and output result
            if(tmp*tmp < TOL*TOL)                        // correct if square deviation is less than tolerance squared
                correct++;
            else {
                printf(" tmp %f h_a %f h_b %f h_c %f h_r %f \n",tmp, h_a[i], h_b[i], h_c[i], h_r[i]);
            }
        }
        
        // summarize results
        printf("R = A+B+C:  %d out of %d results were correct.\n", correct, count);
        
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
예제 #13
0
파일: main.c 프로젝트: arkaheb258/infineon
int main(void) {
	BOOL_T		err = CO_FALSE; /* error flag */
	UNSIGNED8   temp = 0;
	uword	time_lo = 0, time_hi = 0;

    /*--- hardware initialization e.g SIO, Chip-Selects, ...----------*/
    iniDevice();
	IO_vInit();
	USIC0_vInit();
	USIC1_vInit();
	USIC2_vInit();
#if HW_KPS == 1
	USIC3_vInit();
#endif
	//RTC init
    DAVE_vUnlockProtecReg();
	RTC_vInit();
    NOP();  /* one cycle delay */
    NOP();  /* one cycle delay */
    DAVE_vLockProtecReg();

	lNodeId = (~IO_uwReadPort(P2))&0xFF;
	if (lNodeId > 127) { lNodeId = 127; }
	temp = (~IO_uwReadPort(P10))&0x0F;
	switch(temp){
		case 0: bitRate = 10; break;
		case 1: bitRate = 20; break; 
		case 2: bitRate = 50; break; 
		case 3: bitRate = 125; break; 
		case 4: bitRate = 250; break; 
		case 5: bitRate = 500; break; 
		case 0x9:
		case 0xE:
		case 0xF:
			bitRate = 125;
			if (((~IO_uwReadPort(P2)) & 0xFF) == 0){ lNodeId = 127;	}
		break;
		default: bitRate = 500; break;
	}

    initCan(bitRate);
    init_Library();
    initTimer();
    Start_CAN();
    ENABLE_CPU_INTERRUPTS();
	RTC_vSetTime(0,0);

	sw_init();

	//PRINTF("loop\n");
    while (err == CO_FALSE) {
		FlushMbox();		         /* Do the CANopen job */
		myRTC_ulGetTime(&time_lo, &time_hi);

		//odczyt temperatury wewn.
		status_wewn[2] = ad7814_read(U2C0, SPI_CS3); 

		sw_loop();

		//sygnalizacja na diodach

		if (blink(time_lo, ch1_led_st)) {	//St1
			IO_vResetPin(IO_P0_0_LED_ST1);
		} else {
			IO_vSetPin(IO_P0_0_LED_ST1);
		}

		if (blink(time_lo, ch2_led_st)) {	//St2
			IO_vResetPin(IO_P4_1_LED_ST2);
		} else {
			IO_vSetPin(IO_P4_1_LED_ST2);
		}

		if (blink(time_lo, ch1_led_err)) {	//Err1
			IO_vResetPin(IO_P4_2_LED_ERR1);
		} else {
			IO_vSetPin(IO_P4_2_LED_ERR1);
		}

		if (blink(time_lo, ch2_led_err)) {	//Err2
			IO_vResetPin(IO_P4_0_LED_ERR2);
		} else {
			IO_vSetPin(IO_P4_0_LED_ERR2);
		}

//w funkcji err_code dopisac cykliczna informacje o kilku bledach (numer bledu jako bity w slowie "dev_led_st" a nie wartosc)
		if (err_code(time_lo, dev_led_st)) {	//err code - Yellow diode
			IO_vResetPin(IO_P2_8_LED_C);
		} else {
			IO_vSetPin(IO_P2_8_LED_C);
		}

		/* give a chance to finish the loop */
		err = endLoop();
    }
    PRINTF("\nSTOP\n");
    Stop_CAN();
    DISABLE_CPU_INTERRUPTS();
    releaseTimer();
    ResetIntMask();
    deinit_Library();
    return 0; 
}
예제 #14
0
int main(void)
{

    int Mdim, Ndim, Pdim;   // A[N][P], B[P][M], C[N][M]
    int szA, szB, szC;      // Number of elements in each matrix


    double start_time;      // Starting time
    double run_time;        // Timing 
    util::Timer timer;      // Timing

    Ndim = ORDER;
    Pdim = ORDER;
    Mdim = ORDER;

    szA = Ndim * Pdim;
    szB = Pdim * Mdim;
    szC = Ndim * Mdim;

    std::vector<float> h_A(szA); // Host memory for Matrix A
    std::vector<float> h_B(szB); // Host memory for Matrix B
    std::vector<float> h_C(szC); // Host memory for Matrix C

    cl::Buffer d_a, d_b, d_c;   // Matrices in device memory

    initmat(Mdim, Ndim, Pdim, h_A, h_B, h_C);

    timer.reset();

    printf("\n===== Sequential, matrix mult (dot prod), order %d on host CPU ======\n",ORDER);
    for(int i = 0; i < COUNT; i++)
    {
        zero_mat(Ndim, Mdim, h_C);

        start_time = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;

        seq_mat_mul_sdot(Mdim, Ndim, Pdim, h_A, h_B, h_C);

        run_time  = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0 - start_time;
        results(Mdim, Ndim, Pdim, h_C, run_time);
    }

    try
    {

//--------------------------------------------------------------------------------
// Create a context and queue for DEVICE
//--------------------------------------------------------------------------------

        cl::Context context(DEVICE);
        cl::CommandQueue queue(context);

//--------------------------------------------------------------------------------
// Setup the buffers, initialize matrices, and write them into global memory
//--------------------------------------------------------------------------------

        //  Reset A, B and C matrices (just to play it safe)
        initmat(Mdim, Ndim, Pdim, h_A, h_B, h_C);

        d_a = cl::Buffer(context, begin(h_A), end(h_A), true);

        d_b = cl::Buffer(context, begin(h_B), end(h_B), true);

        d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * szC);

//--------------------------------------------------------------------------------
// OpenCL matrix multiplication ... Naive
//--------------------------------------------------------------------------------

        // Create the compute program from the source buffer
        cl::Program program(context, kernelsource, true);

        // Create the compute kernel from the program
        auto naive_mmul = cl::make_kernel<int, int, int, cl::Buffer, cl::Buffer, cl::Buffer>(program, "mmul");

        printf("\n===== OpenCL, matrix mult, C(i,j) per work item, order %d ======\n",Ndim);

        // Do the multiplication COUNT times
        for (int i = 0; i < COUNT; i++)
        {
            zero_mat(Ndim, Mdim, h_C);

            start_time = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;

            // Execute the kernel over the entire range of C matrix elements ... computing
            // a dot product for each element of the product matrix.  The local work
            // group size is set to NULL ... so I'm telling the OpenCL runtime to
            // figure out a local work group size for me.
            cl::NDRange global(Ndim, Mdim);
            naive_mmul(cl::EnqueueArgs(queue, global),
                    Mdim, Ndim, Pdim, d_a, d_b, d_c);

            queue.finish();

            run_time  = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0 - start_time;

            cl::copy(queue, d_c, begin(h_C), end(h_C));

            results(Mdim, Ndim, Pdim, h_C, run_time);

        } // end for loop

    } catch (cl::Error err)
    {
        std::cout << "Exception\n";
        std::cerr << "ERROR: "
                  << err.what()
                  << "("
                  << err_code(err.err())
                  << ")"
                  << std::endl;
    }

    return EXIT_SUCCESS;
}
예제 #15
0
BLARGG_EXPORT const char* fex_err_details( fex_err_t err )
{
	// If we don't have error code assigned, return entire string
	return (err_code( err ) >= 0 ? blargg_err_details( err ) : blargg_err_str( err ));
}
예제 #16
0
int main(int argc, char *argv[])
{

    int N;                  // A[N][N], B[N][N], C[N][N]
    int size;               // Number of elements in each matrix


    double start_time;      // Starting time
    double run_time;        // Timing
    util::Timer timer;      // Timing

    N    = ORDER;
    size = N * N;

    std::vector<float> h_A(size); // Host memory for Matrix A
    std::vector<float> h_B(size); // Host memory for Matrix B
    std::vector<float> h_C(size); // Host memory for Matrix C

    cl::Buffer d_a, d_b, d_c;   // Matrices in device memory

//--------------------------------------------------------------------------------
// Create a context and queue
//--------------------------------------------------------------------------------

    try
    {

        cl_uint deviceIndex = 0;
        parseArguments(argc, argv, &deviceIndex);

        // Get list of devices
        std::vector<cl::Device> devices;
        unsigned numDevices = getDeviceList(devices);

        // Check device index in range
        if (deviceIndex >= numDevices)
        {
          std::cout << "Invalid device index (try '--list')\n";
          return EXIT_FAILURE;
        }

        cl::Device device = devices[deviceIndex];

        std::string name;
        getDeviceName(device, name);
        std::cout << "\nUsing OpenCL device: " << name << "\n";

        std::vector<cl::Device> chosen_device;
        chosen_device.push_back(device);
        cl::Context context(chosen_device);
        cl::CommandQueue queue(context, device);

//--------------------------------------------------------------------------------
// Run sequential matmul
//--------------------------------------------------------------------------------


        initmat(N, h_A, h_B, h_C);

        timer.reset();

        printf("\n===== Sequential, matrix mult (dot prod), order %d on host CPU ======\n",N);
        for(int i = 0; i < COUNT; i++)
        {
            zero_mat(N, h_C);

            start_time = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;

            seq_mat_mul_sdot(N, h_A, h_B, h_C);

            run_time  = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0 - start_time;
            results(N, h_C, run_time);
        }

//--------------------------------------------------------------------------------
// Setup the buffers, initialize matrices, and write them into global memory
//--------------------------------------------------------------------------------

        //  Reset A, B and C matrices (just to play it safe)
        initmat(N, h_A, h_B, h_C);

        d_a = cl::Buffer(context, h_A.begin(), h_A.end(), true);

        d_b = cl::Buffer(context, h_B.begin(), h_B.end(), true);

        d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * size);

//--------------------------------------------------------------------------------
// OpenCL matrix multiplication ... Naive
//--------------------------------------------------------------------------------

        // Create the compute program from the source buffer
        cl::Program program(context, kernelsource, true);

        // Create the compute kernel from the program
        cl::make_kernel<int, cl::Buffer, cl::Buffer, cl::Buffer> naive_mmul(program, "mmul");

        printf("\n===== OpenCL, matrix mult, C(i,j) per work item, order %d ======\n",N);

        // Do the multiplication COUNT times
        for (int i = 0; i < COUNT; i++)
        {
            zero_mat(N, h_C);

            start_time = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;

            // Execute the kernel over the entire range of C matrix elements ... computing
            // a dot product for each element of the product matrix.  The local work
            // group size is set to NULL ... so I'm telling the OpenCL runtime to
            // figure out a local work group size for me.
            cl::NDRange global(N, N);
            naive_mmul(cl::EnqueueArgs(queue, global),
                    N, d_a, d_b, d_c);

            queue.finish();

            run_time  = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0 - start_time;

            cl::copy(queue, d_c, h_C.begin(), h_C.end());

            results(N, h_C, run_time);

        } // end for loop

    } catch (cl::Error err)
    {
        std::cout << "Exception\n";
        std::cerr << "ERROR: "
                  << err.what()
                  << "("
                  << err_code(err.err())
                  << ")"
                  << std::endl;
    }

    return EXIT_SUCCESS;
}
예제 #17
0
int main(void)
{

  try
  {
    // Discover number of platforms
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    std::cout << "\nNumber of OpenCL plaforms: " << platforms.size() << std::endl;

    // Investigate each platform
    std::cout << "\n-------------------------" << std::endl;
    for (std::vector<cl::Platform>::iterator plat = platforms.begin(); plat != platforms.end(); plat++)
    {
      std::string s;
      plat->getInfo(CL_PLATFORM_NAME, &s);
      std::cout << "Platform: " << s << std::endl;

      plat->getInfo(CL_PLATFORM_VENDOR, &s);
      std::cout << "\tVendor:  " << s << std::endl;

      plat->getInfo(CL_PLATFORM_VERSION, &s);
      std::cout << "\tVersion: " << s << std::endl;

      // Discover number of devices
      std::vector<cl::Device> devices;
      plat->getDevices(CL_DEVICE_TYPE_ALL, &devices);
      std::cout << "\n\tNumber of devices: " << devices.size() << std::endl;

      // Investigate each device
      for (std::vector<cl::Device>::iterator dev = devices.begin(); dev != devices.end(); dev++ )
      {
        std::cout << "\t-------------------------" << std::endl;

        dev->getInfo(CL_DEVICE_NAME, &s);
        std::cout << "\t\tName: " << s << std::endl;

        dev->getInfo(CL_DEVICE_OPENCL_C_VERSION, &s);
        std::cout << "\t\tVersion: " << s << std::endl;

        int i;
        dev->getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &i);
        std::cout << "\t\tMax. Compute Units: " << i << std::endl;

        size_t size;
        dev->getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &size);
        std::cout << "\t\tLocal Memory Size: " << size/1024 << " KB" << std::endl;

        dev->getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &size);
        std::cout << "\t\tGlobal Memory Size: " << size/(1024*1024) << " MB" << std::endl;

        dev->getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &size);
        std::cout << "\t\tMax Alloc Size: " << size/(1024*1024) << " MB" << std::endl;

        dev->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size);
        std::cout << "\t\tMax Work-group Total Size: " << size << std::endl;

        std::vector<size_t> d;
        dev->getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &d);
        std::cout << "\t\tMax Work-group Dims: (";
        for (std::vector<size_t>::iterator st = d.begin(); st != d.end(); st++)
          std::cout << *st << " ";
        std::cout << "\x08)" << std::endl;

        std::cout << "\t-------------------------" << std::endl;

      }

      std::cout << "\n-------------------------\n";
    }

  }
  catch (cl::Error err)
  {
    std::cout << "OpenCL Error: " << err.what() << " returned " << err_code(err.err()) << std::endl;
    std::cout << "Check cl.h for error codes." << std::endl;
	system("pause");
	exit(-1);
  }

  system("pause");
  return 0;

}
예제 #18
0
int main(int argc, char** argv)
{
	if (argc != 2)
	{
		printf("Usage: ./pi_vocl num\n");
		printf("\twhere num = 1, 4 or 8\n");
		return EXIT_FAILURE;
	}

	int vector_size = atoi(argv[1]);

    // Define some vector size specific constants
    unsigned int ITERS, WGS;
    if (vector_size == 1)
    {
        ITERS = 262144;
        WGS = 8;
    }
    else if (vector_size == 4)
    {
        ITERS = 262144 / 4;
        WGS = 32;
    }
    else if (vector_size == 8)
    {
        ITERS = 262144 / 8;
        WGS = 64;
    }
    else
    {
        fprintf(stderr, "Invalid vector size\n");
        return EXIT_FAILURE;
    }

    // Set some default values:
    // Default number of steps (updated later to device preferable)
    unsigned int in_nsteps = INSTEPS;
    // Defaultl number of iterations
    unsigned int niters = ITERS;
    unsigned int work_group_size = WGS;

    // Create context, queue and build program
    cl_int err;
    cl_context context;
    cl_device_id device;
    cl_command_queue queue;
    cl_program program;
    cl_kernel kernel;
    // Find number of platforms
    cl_uint numPlatforms;
    err = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    // Get all platforms
    cl_platform_id platforms[numPlatforms];
    err = clGetPlatformIDs(numPlatforms, platforms, NULL);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    // Secure a device
    for (int i = 0; i < numPlatforms; i++)
    {
        err = clGetDeviceIDs(platforms[i], DEVICE, 1, &device, NULL);
        if (err == CL_SUCCESS)
            break;
    }
    if (device == NULL) die(err_code(err), __LINE__, __FILE__);
    // Create a compute context
    context = clCreateContext(0, 1, &device, NULL, NULL, &err);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    // Create a command queue
    queue = clCreateCommandQueue(context, device, 0, &err);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    // Create the compute program from the source buffer
    char *kernel_source = getKernelSource("../pi_vocl.cl");
    program = clCreateProgramWithSource(context, 1, (const char**)&kernel_source, NULL, &err);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        die(err_code(err), __LINE__, __FILE__);
    }
    if (vector_size == 1)
    {
        kernel = clCreateKernel(program, "pi", &err);
        if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    }
    else if (vector_size == 4)
    {
        kernel = clCreateKernel(program, "pi_vec4", &err);
        if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    }
    else if (vector_size == 8)
    {
        kernel = clCreateKernel(program, "pi_vec8", &err);
        if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    }

    // Now that we know the size of the work_groups, we can set the number of work
    // groups, the actual number of steps, and the step size
    unsigned int nwork_groups = in_nsteps/(work_group_size*niters);

    // Get the max work group size for the kernel pi on our device
    size_t max_size;
    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_size), &max_size, NULL);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);

    if (max_size > work_group_size)
    {
        work_group_size = max_size;
        nwork_groups = in_nsteps/(nwork_groups*niters);
    }

    if (nwork_groups < 1)
    {
        err = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nwork_groups), &nwork_groups, NULL);
        if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
        work_group_size = in_nsteps/(nwork_groups*niters);
    }

    unsigned int nsteps = work_group_size * niters * nwork_groups;
    float step_size = 1.0f / (float) nsteps;

    // Array to hold partial sum
    float *h_psum = (float*)calloc(nwork_groups, sizeof(float));

    printf("%d work groups of size %d.\n", nwork_groups, work_group_size);
    printf(" %u Integration steps\n", nsteps);

    cl_mem d_partial_sums = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups, NULL, &err);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);

    // Execute the kernel over the entire range of our 1d input data et
    // using the maximum number of work group items for this device
    const size_t global = nwork_groups * work_group_size;
    const size_t local = work_group_size;

    err = clSetKernelArg(kernel, 0, sizeof(int), &niters);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    err = clSetKernelArg(kernel, 1, sizeof(float), &step_size);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    err = clSetKernelArg(kernel, 2, sizeof(float) * work_group_size, NULL);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);
    err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_partial_sums);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);

    // Start the timer
    double rtime = wtime();

    err = clEnqueueNDRangeKernel(
        queue, kernel,
        1, NULL, &global, &local,
        0, NULL, NULL);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);

    err = clEnqueueReadBuffer(queue, d_partial_sums, CL_TRUE, 0,
        sizeof(float) * nwork_groups, h_psum, 0, NULL, NULL);
    if (err != CL_SUCCESS) die(err_code(err), __LINE__, __FILE__);

    // complete the sum and compute the final integral value on the host
    float pi_res = 0.0f;
    for (unsigned int i = 0; i < nwork_groups; i++)
    {
        pi_res += h_psum[i];
    }
    pi_res *= step_size;

    rtime = wtime() - rtime;

    printf("\nThe calculation ran in %lf seconds\n", rtime);
    printf(" pi = %f for %u steps\n", pi_res, nsteps);

    free(h_psum);
    free(kernel_source);

}
예제 #19
0
파일: dusb_cmd.c 프로젝트: TC01/tilibs
int dusb_dissect_cmd_data(CalcModel model, FILE *f, const uint8_t * data, uint32_t len, uint32_t vtl_size, uint16_t vtl_type)
{
	int ret = dusb_check_cmd_data(model, data, len, vtl_size, vtl_type);
	(void)vtl_size;

	if (ret)
	{
		return ret;
	}

	switch (vtl_type)
	{
		case DUSB_VPKT_PING:
		{
			uint16_t arg1 = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
			uint16_t arg2 = (((uint16_t)(data[2])) << 8) | ((uint16_t)(data[3]));
			uint16_t arg3 = (((uint16_t)(data[4])) << 8) | ((uint16_t)(data[5]));
			uint16_t arg4 = (((uint16_t)(data[6])) << 8) | ((uint16_t)(data[7]));
			uint16_t arg5 = (((uint16_t)(data[8])) << 8) | ((uint16_t)(data[9]));
			fprintf(f, "Set mode: { %u, %u, %u, %u, 0x%04X }\n", arg1, arg2, arg3, arg4, arg5);
		}
		break;

		case DUSB_VPKT_PARM_REQ:
		{
			uint16_t npids = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
			uint16_t i;

			if (len == 2U + npids * 2)
			{
				data += 2;
				fprintf(f, "Requested %u (%X) parameter IDs:\n", npids, npids);
				for (i = 0; i < npids; i++)
				{
					uint16_t pid = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
					data += 2;
					fprintf(f, "\t%04X (%s)\n", pid, dusb_cmd_param_type2name(pid));
				}
				fputc('\n', f);
			}
		}
		break;

		case DUSB_VPKT_PARM_DATA:
		{
			uint16_t nparams = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
			uint16_t i;
			uint32_t additional_size = 0;

			if (len >= 2U + 3 * nparams)
			{
				data += 2;
				fprintf(f, "Received %u (%X) parameter values:\n", nparams, nparams);
				for (i = 0; i < nparams; i++)
				{
					uint16_t pid = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
					uint8_t ok;
					data += 2;
					ok = !(*data++);
					fprintf(f, "\t%04X (%s): ", pid, dusb_cmd_param_type2name(pid));
					if (ok)
					{
						uint16_t j;
						uint16_t size = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));

						data += 2;
						additional_size += size + 2;
						if (len < 2U + 3 * nparams + additional_size)
						{
							break;
						}
						fprintf(f, "OK, size %04X\n\t\t", size);
						for (j = 0; j < size;)
						{
							fprintf(f, "%02X ", *data++);
							if (!(++j & 15))
							{
								fprintf(f, "\n\t\t");
							}
						}
						fputc('\n', f);
					}
					else
					{
						fputs("NOK !\n", f);
					}
				}
			}
			// else do nothing.
		}
		break;

		case DUSB_VPKT_PARM_SET:
		{
			uint16_t id = (((uint16_t)(data[0])) << 8) | ((uint16_t)(data[1]));
			uint16_t size = (((uint16_t)(data[2])) << 8) | ((uint16_t)(data[3]));
			uint16_t i;

			data += 4;
			if (len == 4U + size)
			{
				fprintf(f, "Sending value of size %04X for parameter %04X\n\t", size, id);
				for (i = 0; i < size; i++)
				{
					fprintf(f, "%02X ", *data++);
					if (!(++i & 15))
					{
						fprintf(f, "\n\t");
					}
				}
			}
			// else do nothing.
		}
		break;

		case DUSB_VPKT_OS_BEGIN:
		{
			uint32_t size = (((uint32_t)data[7]) << 24) | (((uint32_t)data[8]) << 16) | (((uint32_t)data[9]) << 8) | (((uint32_t)data[10]) << 0);
			fprintf(f, "Size: %lu / %08lX\n", (unsigned long)size, (unsigned long)size);
		}
		break;

		case DUSB_VPKT_OS_ACK:
		{
			uint32_t size = (((uint32_t)data[0]) << 24) | (((uint32_t)data[1]) << 16) | (((uint32_t)data[2]) << 8) | (((uint32_t)data[3]) << 0);
			fprintf(f, "Chunk size: %lu / %08lX\n", (unsigned long)size, (unsigned long)size);
		}
		break;

		case DUSB_VPKT_OS_HEADER:
		case DUSB_VPKT_OS_DATA:
		{
			if (model == CALC_TI83PCE_USB || model == CALC_TI84PCE_USB)
			{
				uint32_t addr = (((uint32_t)data[3]) << 24) | (((uint32_t)data[2]) << 16) | (((uint32_t)data[1]) << 8) | (((uint32_t)data[0]) << 0);
				fprintf(f, "Address: %08lX\n", (unsigned long)addr);
			}
			else if (model != CALC_TI89T_USB)
			{
				uint16_t addr = (((uint16_t)data[0]) << 8) | (((uint32_t)data[1]) << 0);
				fprintf(f, "Address: %04X\tPage: %02X\tFlag: %02X\n", addr, data[2], data[3]);
			}
			// else do nothing.
		}
		break;

		case DUSB_VPKT_DELAY_ACK:
		{
			uint32_t delay = (((uint32_t)data[0]) << 24) | (((uint32_t)data[1]) << 16) | (((uint32_t)data[2]) << 8) | (data[3] << 0);
			fprintf(f, "Delay: %lu\n", (unsigned long)delay);
		}
		break;

		case DUSB_VPKT_ERROR:
		{
			int err = err_code((((uint16_t)data[0]) << 8) | (((uint32_t)data[1]) << 0));
			fprintf(f, "Error code: %u (%04X)\n", err, err);
		}
		break;

		// Nothing to do.
		case DUSB_VPKT_VAR_CNTS:
		case DUSB_VPKT_MODE_SET:
		case DUSB_VPKT_EOT_ACK:
		case DUSB_VPKT_DATA_ACK:
		case DUSB_VPKT_EOT:
		break;

		// TODO
		case DUSB_VPKT_DIR_REQ:
		case DUSB_VPKT_VAR_HDR:
		case DUSB_VPKT_RTS:
		case DUSB_VPKT_VAR_REQ:
		case DUSB_VPKT_MODIF_VAR:
		case DUSB_VPKT_EXECUTE:
		{
			fputs("(no extra dissection performed for now)\n", f);
		}
		break;

		default:
		{
			fputs("(not performing extra dissection on unknown vpkt type)\n", f);
		}
		break;

	}

	return ret;
}
예제 #20
0
파일: pi_ocl.c 프로젝트: casertillo/HPC
int main(int argc, char *argv[])
{
    float *h_psum;              // vector to hold partial sum
    int in_nsteps = INSTEPS;    // default number of steps (updated later to device preferable)
    int niters = ITERS;         // number of iterations
    int nsteps;
    float step_size;
    size_t nwork_groups;
    size_t max_size, work_group_size = 8;
    float pi_res;

    cl_mem d_partial_sums;

    char *kernelsource = getKernelSource("../pi_ocl.cl");             // Kernel source

    cl_int err;
    cl_device_id        device;     // compute device id
    cl_context       context;       // compute context
    cl_command_queue commands;      // compute command queue
    cl_program       program;       // compute program
    cl_kernel        kernel_pi;     // compute kernel

    // Set up OpenCL context, queue, kernel, etc.
    cl_uint deviceIndex = 0;
    parseArguments(argc, argv, &deviceIndex);

    // Get list of devices
    cl_device_id devices[MAX_DEVICES];
    unsigned numDevices = getDeviceList(devices);

    // Check device index in range
    if (deviceIndex >= numDevices)
    {
      printf("Invalid device index (try '--list')\n");
      return EXIT_FAILURE;
    }

    device = devices[deviceIndex];

    char name[MAX_INFO_STRING];
    getDeviceName(device, name);
    printf("\nUsing OpenCL device: %s\n", name);



    // Create a compute context
    context = clCreateContext(0, 1, &device, NULL, NULL, &err);
    checkError(err, "Creating context");
    // Create a command queue
    commands = clCreateCommandQueue(context, device, 0, &err);
    checkError(err, "Creating command queue");
    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err);
    checkError(err, "Creating program");
    // Build the program  
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }
    // Create the compute kernel from the program 
    kernel_pi = clCreateKernel(program, "pi", &err);
    checkError(err, "Creating kernel");

    // Find kernel work-group size
    err = clGetKernelWorkGroupInfo (kernel_pi, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &work_group_size, NULL);
    checkError(err, "Getting kernel work group info");
    // Now that we know the size of the work-groups, we can set the number of
    // work-groups, the actual number of steps, and the step size
    nwork_groups = in_nsteps/(work_group_size*niters);

    if (nwork_groups < 1)
    {
        err = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &nwork_groups, NULL);
        checkError(err, "Getting device compute unit info");
        work_group_size = in_nsteps / (nwork_groups * niters);
    }

    nsteps = work_group_size * niters * nwork_groups;
    step_size = 1.0f/(float)nsteps;

    printf("nsteps:%d\n", nsteps);
    printf("niters:%d\n", niters);
    printf("work_group_size:%zd\n", work_group_size);
    printf("n work groups:%ld\n", nwork_groups);
    printf("step_size:%f\n", step_size);

    h_psum = calloc(sizeof(float), nwork_groups);
    if (!h_psum)
    {
        printf("Error: could not allocate host memory for h_psum\n");
        return EXIT_FAILURE;
    }

    printf(" %ld work-groups of size %ld. %d Integration steps\n",
            nwork_groups,
            work_group_size,
            nsteps);

    d_partial_sums = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups, NULL, &err);
    checkError(err, "Creating buffer d_partial_sums");

    // Set kernel arguments
    err  = clSetKernelArg(kernel_pi, 0, sizeof(int), &niters);
    err |= clSetKernelArg(kernel_pi, 1, sizeof(float), &step_size);
    err |= clSetKernelArg(kernel_pi, 2, sizeof(float) * work_group_size, NULL);
    err |= clSetKernelArg(kernel_pi, 3, sizeof(cl_mem), &d_partial_sums);
    checkError(err, "Settin kernel args");

    // Execute the kernel over the entire range of our 1D input data set
    // using the maximum number of work items for this device
    size_t global = nsteps / niters;
    size_t local = work_group_size;
    double rtime = wtime();
    err = clEnqueueNDRangeKernel(
        commands,
        kernel_pi,
        1, NULL,
        &global,
        &local,
        0, NULL, NULL);
    checkError(err, "Enqueueing kernel");

    err = clEnqueueReadBuffer(
        commands,
        d_partial_sums,
        CL_TRUE,
        0,
        sizeof(float) * nwork_groups,
        h_psum,
        0, NULL, NULL);
    checkError(err, "Reading back d_partial_sums");

    // complete the sum and compute the final integral value on the host
    pi_res = 0.0f;
    for (unsigned int i = 0; i < nwork_groups; i++)
    {
        pi_res += h_psum[i];
    }
    pi_res *= step_size;

    rtime = wtime() - rtime;

    printf("\nThe calculation ran in %lf seconds\n", rtime);
    printf(" pi = %f for %d steps\n", pi_res, nsteps);

    // clean up
    clReleaseMemObject(d_partial_sums);
    clReleaseProgram(program);
    clReleaseKernel(kernel_pi);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);
    free(kernelsource);
    free(h_psum);
}
예제 #21
0
int main(int argc, char *argv[])
{
    float *h_A;             // A matrix
    float *h_B;             // B matrix
    float *h_C;             // C = A*B matrix
    int N;                  // A[N][N], B[N][N], C[N][N]
    int size;               // number of elements in each matrix

    cl_mem d_a, d_b, d_c;   // Matrices in device memory

    double start_time;      // Starting time
    double run_time;        // timing data

    char * kernelsource;    // kernel source string

    cl_int err;             // error code returned from OpenCL calls
    cl_device_id     device;        // compute device id 
    cl_context       context;       // compute context
    cl_command_queue commands;      // compute command queue
    cl_program       program;       // compute program
    cl_kernel        kernel;        // compute kernel

    N = ORDER;

    size = N * N;

    h_A = (float *)malloc(size * sizeof(float));
    h_B = (float *)malloc(size * sizeof(float));
    h_C = (float *)malloc(size * sizeof(float));

//--------------------------------------------------------------------------------
// Create a context, queue and device.
//--------------------------------------------------------------------------------

    cl_uint deviceIndex = 0;
    parseArguments(argc, argv, &deviceIndex);

    // Get list of devices
    cl_device_id devices[MAX_DEVICES];
    unsigned numDevices = getDeviceList(devices);

    // Check device index in range
    if (deviceIndex >= numDevices)
    {
      printf("Invalid device index (try '--list')\n");
      return EXIT_FAILURE;
    }

    device = devices[deviceIndex];

    char name[MAX_INFO_STRING];
    getDeviceName(device, name);
    printf("\nUsing OpenCL device: %s\n", name);

    // Create a compute context
    context = clCreateContext(0, 1, &device, NULL, NULL, &err);
    checkError(err, "Creating context");
    // Create a command queue
    commands = clCreateCommandQueue(context, device, 0, &err);
    checkError(err, "Creating command queue");


//--------------------------------------------------------------------------------
// Run sequential version on the host
//--------------------------------------------------------------------------------

    initmat(N, h_A, h_B, h_C);

    printf("\n===== Sequential, matrix mult (dot prod), order %d on host CPU ======\n",ORDER);
    for(int i = 0; i < COUNT; i++)
    {
        zero_mat(N, h_C);
        start_time = wtime();

        seq_mat_mul_sdot(N, h_A, h_B, h_C);

        run_time  = wtime() - start_time;
        results(N, h_C, run_time);
    }

//--------------------------------------------------------------------------------
// Setup the buffers, initialize matrices, and write them into global memory
//--------------------------------------------------------------------------------

    //  Reset A, B and C matrices (just to play it safe)
    initmat(N, h_A, h_B, h_C);

    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                            sizeof(float) * size, h_A, &err);
    checkError(err, "Creating buffer d_a");
    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                            sizeof(float) * size, h_B, &err);
    checkError(err, "Creating buffer d_b");
    d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
                            sizeof(float) * size, NULL, &err);
    checkError(err, "Creating buffer d_c");


//--------------------------------------------------------------------------------
// OpenCL matrix multiplication ... Naive
//--------------------------------------------------------------------------------

    kernelsource = getKernelSource("../C_elem.cl");
    // Create the comput program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err);
    checkError(err, "Creating program with C_elem.cl");
    free(kernelsource);
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program
    kernel = clCreateKernel(program, "mmul", &err);
    checkError(err, "Creating kernel from C_elem.cl");

    printf("\n===== OpenCL, matrix mult, C(i,j) per work item, order %d ======\n",N);

    // Do the multiplication COUNT times
    for (int i = 0; i < COUNT; i++)
    {
        zero_mat(N, h_C);

        err =  clSetKernelArg(kernel, 0, sizeof(int),    &N);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_a);
        err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_b);
        err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_c);
        checkError(err, "Setting kernel args");

        start_time = wtime();

        // Execute the kernel over the entire range of C matrix elements ... computing
        // a dot product for each element of the product matrix.  The local work
        // group size is set to NULL ... so I'm telling the OpenCL runtime to
        // figure out a local work group size for me.
        const size_t global[2] = {N, N};
        err = clEnqueueNDRangeKernel(
            commands,
            kernel,
            2, NULL,
            global, NULL,
            0, NULL, NULL);
        checkError(err, "Enqueueing kernel");

        err = clFinish(commands);
        checkError(err, "Waiting for kernel to finish");

        run_time = wtime() - start_time;

        err = clEnqueueReadBuffer(
            commands, d_c, CL_TRUE, 0,
            sizeof(float) * size, h_C,
            0, NULL, NULL);
        checkError(err, "Copying back d_c");

        results(N, h_C, run_time);

    } // end for loop

//--------------------------------------------------------------------------------
// OpenCL matrix multiplication ... C row per work item
//--------------------------------------------------------------------------------
    kernelsource = getKernelSource("../C_row.cl");
    // Create the comput program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err);
    checkError(err, "Creating program with C_row.cl");
    free(kernelsource);
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program
    kernel = clCreateKernel(program, "mmul", &err);
    checkError(err, "Creating kernel from C_row.cl");

    printf("\n===== OpenCL, matrix mult, C row per work item, order %d ======\n",N);

    // Do the multiplication COUNT times
    for (int i = 0; i < COUNT; i++)
    {
        zero_mat(N, h_C);

        err =  clSetKernelArg(kernel, 0, sizeof(int),    &N);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_a);
        err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_b);
        err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_c);
        checkError(err, "Setting kernel args");

        start_time = wtime();

        // Execute the kernel over the rows of the C matrix ... computing
        // a dot product for each element of the product matrix.
        const size_t global = N;
        err = clEnqueueNDRangeKernel(
            commands,
            kernel,
            1, NULL,
            &global, NULL,
            0, NULL, NULL);
        checkError(err, "Enqueueing kernel");

        err = clFinish(commands);
        checkError(err, "Waiting for kernel to finish");

        run_time = wtime() - start_time;

        err = clEnqueueReadBuffer(
            commands, d_c, CL_TRUE, 0,
            sizeof(float) * size, h_C,
            0, NULL, NULL);
        checkError(err, "Reading back d_c");

        results(N, h_C, run_time);

    } // end for loop


//--------------------------------------------------------------------------------
// OpenCL matrix multiplication ... C row per work item, A row in pivate memory
//--------------------------------------------------------------------------------
    kernelsource = getKernelSource("../C_row_priv.cl");
    // Create the comput program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err);
    checkError(err, "Creating program from C_row_priv.cl");
    free(kernelsource);
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program
    kernel = clCreateKernel(program, "mmul", &err);
    checkError(err, "Creating kernel from C_row_priv.cl");

    printf("\n===== OpenCL, matrix mult, C row, A row in priv mem, order %d ======\n",N);

    // Do the multiplication COUNT times
    for (int i = 0; i < COUNT; i++)
    {
        zero_mat(N, h_C);

        err =  clSetKernelArg(kernel, 0, sizeof(int),    &N);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_a);
        err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_b);
        err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_c);
        checkError(err, "Setting kernel args");

        start_time = wtime();

        // Execute the kernel over the rows of the C matrix ... computing
        // a dot product for each element of the product matrix.
        const size_t global = N;
        const size_t local = ORDER / 16;
        err = clEnqueueNDRangeKernel(
            commands,
            kernel,
            1, NULL,
            &global, &local,
            0, NULL, NULL);
        checkError(err, "Enqueueing kernel");

        err = clFinish(commands);
        checkError(err, "Waiting for kernel to finish");

        run_time = wtime() - start_time;

        err = clEnqueueReadBuffer(
            commands, d_c, CL_TRUE, 0,
            sizeof(float) * size, h_C,
            0, NULL, NULL);
        checkError(err, "Reading back d_c");

        results(N, h_C, run_time);

    } // end for loop


//--------------------------------------------------------------------------------
// Clean up!
//--------------------------------------------------------------------------------

    free(h_A);
    free(h_B);
    free(h_C);
    clReleaseMemObject(d_a);
    clReleaseMemObject(d_b);
    clReleaseMemObject(d_c);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return EXIT_SUCCESS;
}
예제 #22
0
int main(int ac, char *av[])
{
        char myname[HOSTNAMESIZE];
        char *progname = av[0];
        char *logfname;
        unsigned version;
        prod_class_t clss;
        prod_spec spec;
        int seq_start = 0;
        int status;
        ErrorObj* error;
        unsigned remotePort = LDM_PORT;
        
        logfname = "-";
        remote = "localhost";

        (void)set_timestamp(&clss.from);
        clss.to = TS_ENDT;
        clss.psa.psa_len = 1;
        clss.psa.psa_val = &spec;
        spec.feedtype = DEFAULT_FEEDTYPE;
        spec.pattern = ".*";

        {
        extern int optind;
        extern char *optarg;
        int ch;
        int logmask = (LOG_MASK(LOG_ERR) | LOG_MASK(LOG_WARNING) |
            LOG_MASK(LOG_NOTICE));

        while ((ch = getopt(ac, av, "vxl:h:f:P:s:")) != EOF)
                switch (ch) {
                case 'v':
                        logmask |= LOG_MASK(LOG_INFO);
                        break;
                case 'x':
                        logmask |= LOG_MASK(LOG_DEBUG);
                        break;
                case 'l':
                        logfname = optarg;
                        break;
                case 'h':
                        remote = optarg;
                        break;
                case 'f':
                        spec.feedtype = atofeedtypet(optarg);
                        if(spec.feedtype == NONE)
                        {
                            fprintf(stderr, "Unknown feedtype \"%s\"\n", optarg);
                                usage(progname);        
                        }
                        break;
                case 'P': {
                    char*       suffix = "";
                    long        port;

                    errno = 0;
                    port = strtol(optarg, &suffix, 0);

                    if (0 != errno || 0 != *suffix ||
                        0 >= port || 0xffff < port) {

                        (void)fprintf(stderr, "%s: invalid port %s\n",
                             av[0], optarg);
                        usage(av[0]);   
                    }

                    remotePort = (unsigned)port;

                    break;
                }
                case 's':
                        seq_start = atoi(optarg);
                        break;
                case '?':
                        usage(progname);
                        break;
                }

        ac -= optind; av += optind;

        if(ac < 1) usage(progname);
        (void) setulogmask(logmask);
        }

        /*
         * Set up error logging
         */
        (void) openulog(ubasename(progname), LOG_NOTIME, LOG_LDM, logfname);

        /*
         * register exit handler
         */
        if(atexit(cleanup) != 0)
        {
                serror("atexit");
                exit(1);
        }

        /*
         * set up signal handlers
         */
        set_sigactions();

        (void) strcpy(myname, ghostname());

        /*
         * Contact the server.
         */
        error = ldm_clnttcp_create_vers(remote, remotePort, SIX, &clnt,
                NULL, NULL);

        (void)exitIfDone(1);

        if (!error) {
            version = SIX;
            hiya = my_hiya_6;
            send_product = send_product_6;
            nullproc = nullproc_6;
        }
        else if (LDM_CLNT_BAD_VERSION == err_code(error)) {
            err_free(error);

            error = ldm_clnttcp_create_vers(remote, remotePort, FIVE, &clnt,
                    NULL, NULL);

            (void)exitIfDone(1);

            if (!error) {
                version = FIVE;
                hiya = my_hiya_5;
                send_product = send_product_5;
                nullproc = NULL;
            }
        }

        if (error) {
            err_log(error, ERR_FAILURE);
            err_free(error);
            status = 1;
        }
        else {
            udebug("version %u", version);

            status = ldmsend(clnt, &clss, myname, seq_start, ac, av);
        }

        return status != 0; 
}
예제 #23
0
파일: main.c 프로젝트: whs1787/ffs
int main(int argc, char *argv[])
{
	static const struct option long_opts[] = {
		/* commands */
		{"inject", required_argument, NULL, c_INJECT},
		{"remove", required_argument, NULL, c_REMOVE},
		{"hexdump", required_argument, NULL, c_HEXDUMP},
		/* options */
		{"output", required_argument, NULL, o_OUTPUT},
		/* flags */
		{"force", no_argument, NULL, f_FORCE},
		{"p8", no_argument, NULL, f_P8},
		{"verbose", no_argument, NULL, f_VERBOSE},
		{"help", no_argument, NULL, f_HELP},
		{0, 0, 0, 0}
	};

	static const char *short_opts = "I:R:H:o:fpvh";

	int rc = EXIT_FAILURE;

	if (argc == 1)
		usage(args.short_name, false), exit(rc);

	int opt = 0, idx = 0;
	while ((opt = getopt_long(argc, argv, short_opts, long_opts,
				  &idx)) != -1)
		if (process_argument(&args, opt, optarg) < 0)
			goto error;

	/* getopt_long doesn't know what to do with orphans, */
	/* so we'll scoop them up here, and deal with them later */

	while (optind < argc)
		if (process_option(&args, argv[optind++]) < 0)
			goto error;

	if (args.verbose == f_VERBOSE)
		args_dump(&args);

	if (validate_args(&args) < 0)
		goto error;
	if (process_args(&args) < 0)
		goto error;

	rc = EXIT_SUCCESS;

	if (false) {
		err_t *err;
error:
		err = err_get();
		assert(err != NULL);

		fprintf(stderr, "%s: %s : %s(%d) : (code=%d) %.*s\n",
			program_invocation_short_name,
			err_type_name(err), err_file(err), err_line(err),
			err_code(err), err_size(err), (char *)err_data(err));
	}

	return rc;
}
예제 #24
0
파일: Exporter.cpp 프로젝트: dovalec/3D
int main(int argc, char * argv[])
{

	MStatus stat;
    
	// initialise the maya library - This basically starts up Maya
	if(MLibrary::initialize(argv[0], true) != MS::kSuccess)
    {
		err_stop("[ERROR] Maya failed to initialise\n");
    }
	std::string fileName = argv[1];
    cout << "Loading file: " << fileName;
    
	//std::string scale = "0.01";
	std::string scale = "1.0";
	std::string todo = "all";
	std::string target = "tegra"; // or tegra

	if (argc > 2)
	{
		scale = argv[2];
	}

	if (argc > 3)
	{
		todo = argv[3];
	}

	if (argc > 4)
	{
		target = argv[4];
	}

	Globals & globalFlags = Globals::GetGlobals();
	globalFlags.TARGET = target;
	globalFlags.SCALE = (float)atof(scale.c_str());

	char txt[128]={0};
	sprintf(txt,"Scale: %f\n",globalFlags.SCALE);
	err_info(txt);
	

	for (unsigned int i=0;i<fileName.length();i++)
		if (fileName[i] == '\\')
			fileName[i] = '/';

    err_info("Opening file: " + fileName);
	stat = MFileIO::open(fileName.c_str());
	err_code(stat);

	int dot = fileName.rfind(".");
	int slash = fileName.rfind("/");

	string folder = fileName;
	folder.erase(slash, folder.length() - slash);
	
	fileName.erase(dot, fileName.length() - dot);
	fileName.erase(0, slash+1);

	if (todo == "model" || todo == "all")
	{
		ModelExporter & modelExport = ModelExporter::GetExporter();
		modelExport.Export(folder, fileName);
	
		AnimationExport & animationExport = AnimationExport::GetExporter();
		animationExport.Export(folder, fileName);

		UVAnimationExport & uvAnimationExport = UVAnimationExport::GetExporter();
		uvAnimationExport.Export(folder, fileName);
	}

	if (todo == "locator" || todo == "all")
	{
		LocatorExport & locatorExport = LocatorExport::GetExporter();
		locatorExport.Export(folder, fileName);
	}

	if (todo == "ref" || todo == "all")
	{
		RefSceneExport & refExport = RefSceneExport::GetExporter();
		refExport.Export(folder, fileName);
	}


	if (todo == "stop_circle" || todo == "all")
	{
		StopCircleExport & stopCircleExport = StopCircleExport::GetExporter();
		stopCircleExport.Export(folder, fileName);
	}

	if (todo == "launcher" || todo == "all")
	{
		BotLauncherExport & launcherExport = BotLauncherExport::GetExporter();
		launcherExport.Export(folder, fileName);
	}

	if (todo == "curve" || todo == "all")
	{
		CurveExport & curveExport = CurveExport::GetExporter();
		curveExport.Export(folder, fileName);
	}

	if (todo == "wire" || todo == "all")
	{
		WireExport & wireExport = WireExport::GetExporter();
		wireExport.Export(folder, fileName);
	}

	if (todo == "collision_mesh" || todo == "all")
	{
		CollisionMeshExport & cllisionMeshExport = CollisionMeshExport::GetExporter();
		cllisionMeshExport.Export(folder, fileName);
	}

	if (todo == "graph_mesh" || todo == "all")
	{
		GraphExport & graphExport = GraphExport::GetExporter();
		graphExport.Export(folder, fileName);
	}


	if (todo == "doors" || todo == "all")
	{
		DoorsExport & doorsExport = DoorsExport::GetExporter();
		doorsExport.Export(folder, fileName);
	}

	if (todo == "physics" || todo == "all")
	{
		PhysicsExport & physicsExport = PhysicsExport::GetExporter();
		physicsExport.Export(folder, fileName);
	}

	return 0;
}
예제 #25
0
int main(void)
{
    float *h_psum;					// vector to hold partial sum
    int in_nsteps = INSTEPS;		// default number of steps (updated later to device prefereable)
    int niters = ITERS;				// number of iterations
    int nsteps;
    float step_size;
    ::size_t nwork_groups;
    ::size_t max_size, work_group_size = 8;
    float pi_res;

	cl::Buffer d_partial_sums;

	try
	{
        // Create a context
        cl::Context context(DEVICE);

        // Create the program object
		cl::Program program(context, util::loadProgram("pi_ocl.cl"), true);

		// Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel object for quering information
		cl::Kernel ko_pi(program, "pi");
        // Get the device we are using
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
        cl::Device device = devices[0];

        // Get the work group size
		work_group_size = ko_pi.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device);
        //printf("wgroup_size = %lu\n", work_group_size);

		auto pi = cl::make_kernel<int, float, cl::LocalSpaceArg, cl::Buffer>(program, "pi");

		// Now that we know the size of the work_groups, we can set the number of work
		// groups, the actual number of steps, and the step size
		nwork_groups = in_nsteps/(work_group_size*niters);

		if ( nwork_groups < 1) {
			nwork_groups = 
				device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
			work_group_size=in_nsteps / (nwork_groups*niters);
		}

		nsteps = work_group_size * niters * nwork_groups;
		step_size = 1.0f/static_cast<float>(nsteps);
		std::vector<float> h_psum(nwork_groups);

		printf(
			" %d work groups of size %d.  %d Integration steps\n", 
			(int)nwork_groups, 
			(int)work_group_size,
			nsteps);

		d_partial_sums = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups);

		util::Timer timer;

		// Execute the kernel over the entire range of our 1d input data set
		// using the maximum number of work group items for this device
		pi(
			cl::EnqueueArgs(
				queue,
				cl::NDRange(nwork_groups * work_group_size),
				cl::NDRange(work_group_size)), 
			niters,
			step_size,
			cl::Local(sizeof(float) * work_group_size),
			d_partial_sums);

		cl::copy(queue, d_partial_sums, begin(h_psum), end(h_psum));

		// complete the sum and compute final integral value
		pi_res = 0.0f;
		for (unsigned int i = 0; i< nwork_groups; i++) {
			pi_res += h_psum[i];
		}
		pi_res = pi_res * step_size;
	
		//rtime = wtime() - rtime;
		double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.;
		printf("\nThe calculation ran in %lf seconds\n", rtime);
		printf(" pi = %f for %d steps\n", pi_res, nsteps);

	}
	catch (cl::Error err) {
		std::cout << "Exception\n";
		std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
	}
}
예제 #26
0
int main(void) {
	//###############################################
	//
	// Declare variables for OpenCL
	//
	//###############################################
	int err;               // error code returned from OpenCL calls

	size_t global;                  // global domain size

	cl_device_id device_id;     // compute device id
	cl_context context;       // compute context
	cl_command_queue commands;      // compute command queue
	cl_program program;       // compute program
	cl_kernel ko_calculate_imagerowdots_iterations;       // compute kernel
	cl_kernel ko_calculate_colorrow;       // compute kernel

	cl_mem d_a;                    // device memory used for the input  a vector
	cl_mem d_b;                    // device memory

	int i;

	//###############################################
	//
	// Set values for mandelbrot
	//
	//###############################################

	//plane section values
	float x_ebene_min = -1;
	float y_ebene_min = -1;
	float x_ebene_max = 2;
	float y_ebene_max = 1;

	//monitor resolution values
	const long x_mon = 640;
	const long y_mon = 480;

	//Iterations
	long itr = 100;

	//abort condition
	float abort_value = 2;

	//Number of images per second
	long fps = 24;

	//video duration in seconds
	long video_duration = 3;

	//zoom speed in percentage
	float reduction = 5;

	//zoom dot
	my_complex_t zoom_dot;

	//###############################################
	//
	// Set up platform and GPU device
	//
	//###############################################

	cl_uint numPlatforms;

	// Find number of platforms
	err = clGetPlatformIDs(0, NULL, &numPlatforms);
	checkError(err, "Finding platforms");
	if (numPlatforms == 0) {
		printf("Found 0 platforms!\n");
		return EXIT_FAILURE;
	}

	// Get all platforms
	cl_platform_id Platform[numPlatforms];
	err = clGetPlatformIDs(numPlatforms, Platform, NULL);
	checkError(err, "Getting platforms");

	// Secure a GPU
	for (i = 0; i < numPlatforms; i++) {
		err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
		if (err == CL_SUCCESS) {
			break;
		}
	}

	if (device_id == NULL)
		checkError(err, "Finding a device");

	err = output_device_info(device_id);
	checkError(err, "Printing device output");

	//###############################################
	//
	// Create context, command queue and kernel
	//
	//###############################################

	// Create a compute context
	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	checkError(err, "Creating context");

	// Create a command queue
	commands = clCreateCommandQueue(context, device_id, 0, &err);
	checkError(err, "Creating command queue");

	//Read Kernel source
	FILE *fp;
	char *source_str;
	size_t source_size, program_size;

	fp = fopen("./kernel/calculate_iterations.cl", "r");
	if (!fp) {
		printf("Failed to load kernel\n");
		return 1;
	}

	fseek(fp, 0, SEEK_END);
	program_size = ftell(fp);
	rewind(fp);
	source_str = (char*) malloc(program_size + 1);
	source_str[program_size] = '\0';
	fread(source_str, sizeof(char), program_size, fp);
	fclose(fp);

	// Create the compute program from the source buffer
	program = clCreateProgramWithSource(context, 1, (const char **) &source_str,
	NULL, &err);

	checkError(err, "Creating program");

	// Build the program
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS) {
		size_t len;
		char buffer[2048];

		printf("Error: Failed to build program executable!\n%s\n",
				err_code(err));
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
				sizeof(buffer), buffer, &len);
		printf("%s\n", buffer);

		// Determine the size of the log
		size_t log_size;
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
				&log_size);

		// Allocate memory for the log
		char *log = (char *) malloc(log_size);

		// Get the log
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
				log_size, log, NULL);

		// Print the log
		printf("%s\n", log);

		return EXIT_FAILURE;
	}

	// Create the compute kernel from the program
	ko_calculate_imagerowdots_iterations = clCreateKernel(program,
			"calculate_imagerowdots_iterations", &err);
	checkError(err, "Creating kernel");

	// Create the compute kernel from the program
	ko_calculate_colorrow = clCreateKernel(program, "calculate_colorrow", &err);
	checkError(err, "Creating kernel");

	int number_images = 0;
	do {
		//Get memory for image
		long* h_image = (long*) calloc(x_mon * y_mon, sizeof(long));
		unsigned char* h_image_pixel = (unsigned char*) calloc(
				x_mon * y_mon * 3, sizeof(unsigned char));

		//###############################################
		//###############################################
		//
		// Loop to calculate image dot iterations
		//
		//###############################################
		//###############################################

		float y_value = y_ebene_max;
		float delta_y = delta(y_ebene_min, y_ebene_max, y_mon);

		for (int row = 0; row < y_mon; ++row) {
			//###############################################
			//
			// Create and write buffer
			//
			//###############################################

			//Get memory for row
			long* h_image_row = (long*) calloc(x_mon, sizeof(long)); // a vector

			d_a = clCreateBuffer(context, CL_MEM_READ_WRITE,
					sizeof(long) * x_mon,
					NULL, &err);
			checkError(err, "Creating buffer d_a");

			// Write a vector into compute device memory
			err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0,
					sizeof(long) * x_mon, h_image_row, 0, NULL, NULL);
			checkError(err, "Copying h_a to device at d_a");

			//###############################################
			//
			// Set the arguments to our compute kernel
			//
			//###############################################

			err = clSetKernelArg(ko_calculate_imagerowdots_iterations, 0,
					sizeof(float), &x_ebene_min);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 1,
					sizeof(float), &x_ebene_max);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 2,
					sizeof(float), &y_value);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 3,
					sizeof(long), &x_mon);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 4,
					sizeof(float), &abort_value);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 5,
					sizeof(long), &itr);
			err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 6,
					sizeof(cl_mem), &d_a);
			checkError(err, "Setting kernel arguments");

			/*__kernel void calculate_imagerowdots_iterations(const float x_min, const float x_max,
			 const float y_value, const long x_mon, const float abort_value, const long itr,
			 __global long * imagerow)*/

			// Execute the kernel over the entire range of our 1d input data set
			// letting the OpenCL runtime choose the work-group size
			global = x_mon;
			err = clEnqueueNDRangeKernel(commands,
					ko_calculate_imagerowdots_iterations, 1, NULL, &global,
					NULL, 0,
					NULL, NULL);
			checkError(err, "Enqueueing kernel");

			// Wait for the commands to complete
			err = clFinish(commands);
			checkError(err, "Waiting for kernel to finish");

			// Read back the results from the compute device
			err = clEnqueueReadBuffer(commands, d_a, CL_TRUE, 0,
					sizeof(long) * x_mon, h_image_row, 0, NULL, NULL);
			if (err != CL_SUCCESS) {
				printf("Error: Failed to read output array!\n%s\n",
						err_code(err));
				exit(1);
			}

			//reduce y
			y_value -= delta_y;

			//cope row to image
			memcpy(h_image + row * x_mon, h_image_row, sizeof(long) * x_mon);

			free(h_image_row);
		}

//		for (i = 0; i < x_mon * y_mon; ++i) {
//			printf("%ld ", h_image[i]);
//		}
//		fflush(stdout);

		//###############################################
		//###############################################
		//
		// End of loop to calculate image dot iterations
		//
		//###############################################
		//###############################################

		//###############################################
		//###############################################
		//
		// Beginn color calculation
		//
		//###############################################
		//###############################################

		for (int row = 0; row < y_mon; ++row) {
			//Get memory for row
			long* h_image_row = (long*) calloc(x_mon, sizeof(long)); // a vector
			memcpy(h_image_row, h_image + row * x_mon, sizeof(long) * x_mon);

			d_a = clCreateBuffer(context, CL_MEM_READ_ONLY,
					sizeof(long) * x_mon,
					NULL, &err);
			checkError(err, "Creating buffer d_a");

			// Write a vector into compute device memory
			err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0,
					sizeof(long) * x_mon, h_image_row, 0, NULL, NULL);
			checkError(err, "Copying h_image_row to device at d_a");

			unsigned char* h_imagepixel_row = (unsigned char*) calloc(x_mon * 3,
					sizeof(unsigned char));     // a vector

			d_b = clCreateBuffer(context, CL_MEM_READ_WRITE,
					sizeof(unsigned char) * x_mon * 3,
					NULL, &err);
			checkError(err, "Creating buffer d_b");

			// Write a vector into compute device memory
			err = clEnqueueWriteBuffer(commands, d_b, CL_TRUE, 0,
					sizeof(unsigned char) * x_mon * 3, h_imagepixel_row, 0,
					NULL, NULL);
			checkError(err, "Copying h_imagepixel_row to device at d_b");

			//###############################################
			//
			// Set the arguments to our compute kernel
			//
			//###############################################

			err = clSetKernelArg(ko_calculate_colorrow, 0, sizeof(long),
					&x_mon);
			err |= clSetKernelArg(ko_calculate_colorrow, 1, sizeof(long), &itr);
			err |= clSetKernelArg(ko_calculate_colorrow, 2, sizeof(cl_mem),
					&d_a);
			err |= clSetKernelArg(ko_calculate_colorrow, 3, sizeof(cl_mem),
					&d_b);
			checkError(err, "Setting kernel arguments");

			/*__kernel void calculate_colorrow(const long width, long itr, long * imagerowvalues,
			 unsigned char * imagerow)*/

			// Execute the kernel over the entire range of our 1d input data set
			// letting the OpenCL runtime choose the work-group size
			global = x_mon;
			err = clEnqueueNDRangeKernel(commands, ko_calculate_colorrow, 1,
			NULL, &global, NULL, 0,
			NULL, NULL);
			checkError(err, "Enqueueing kernel");

			// Wait for the commands to complete
			err = clFinish(commands);
			checkError(err, "Waiting for kernel to finish");

			// Read back the results from the compute device
			err = clEnqueueReadBuffer(commands, d_b, CL_TRUE, 0,
					sizeof(unsigned char) * x_mon * 3, h_imagepixel_row, 0,
					NULL, NULL);
			if (err != CL_SUCCESS) {
				printf("Error: Failed to read output array!\n%s\n",
						err_code(err));
				exit(1);
			}

			memcpy(h_image_pixel + row * x_mon * 3, h_imagepixel_row,
					sizeof(unsigned char) * x_mon * 3);

			free(h_image_row);
			free(h_imagepixel_row);
		}

		if (number_images == 0) {
			zoom_dot = find_dot_to_zoom(x_ebene_min, x_ebene_max, y_ebene_min,
					y_ebene_max, h_image, y_mon, x_mon, itr);
		}

		reduce_plane_section_focus_dot(&x_ebene_min, &x_ebene_max, &y_ebene_min,
				&y_ebene_max, reduction, zoom_dot);


		// save the image
		char filename[50];
		sprintf(filename, "img-%d.bmp", number_images);

		safe_image_to_bmp(x_mon, y_mon, h_image_pixel, filename);

		free(h_image);
		free(h_image_pixel);

		number_images++;
		itr = (long) (itr + itr * reduction / 100);
		printf("%d\n", number_images);
		fflush(stdout);
	} while (number_images < (fps * video_duration));

	//###############################################
	//
	// cleanup then shutdown
	//
	//###############################################

	clReleaseMemObject(d_a);
	clReleaseMemObject(d_b);
	clReleaseProgram(program);
	clReleaseKernel(ko_calculate_imagerowdots_iterations);
	clReleaseCommandQueue(commands);
	clReleaseContext(context);

	return 0;
}