示例#1
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // create vector on the device and copy the data
    boost::compute::vector<int> device_vector(PERF_N, context);
    boost::compute::copy(
        host_vector.begin(), host_vector.end(), device_vector.begin(), queue
    );

    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        boost::compute::reverse(
            device_vector.begin(), device_vector.end(), queue
        );
        queue.finish();
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    return 0;
}
示例#2
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    int sum = 0;
    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        sum = ParallelSum<int>(&host_vector[0], host_vector.size());
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
    std::cout << "sum: " << sum << std::endl;

    int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0));
    if(sum != host_sum){
        std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl;
        return -1;
    }

    return 0;
}
示例#3
0
int main()
{
    // get default device and setup context
    compute::device device = compute::system::default_device();
    compute::context context(device);
    compute::command_queue queue(context, device);

    // generate random data on the host
    std::vector<float> host_vector(10000);
    std::generate(host_vector.begin(), host_vector.end(), rand);

    // create a vector on the device
    compute::vector<float> device_vector(host_vector.size(), context);

    // transfer data from the host to the device
    compute::copy(
        host_vector.begin(), host_vector.end(), device_vector.begin(), queue
    );

    // calculate the square-root of each element in-place
    compute::transform(
        device_vector.begin(),
        device_vector.end(),
        device_vector.begin(),
        compute::sqrt<float>(),
        queue
    );

    // copy values back to the host
    compute::copy(
        device_vector.begin(), device_vector.end(), host_vector.begin(), queue
    );

    return 0;
}
示例#4
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // create vector on the device and copy the data
    boost::compute::vector<int> device_vector(PERF_N, context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    // trying to find element that isn't in vector (worst-case scenario)
    int wanted = rand_int_max + 1;

    // device iterator
    boost::compute::vector<int>::iterator device_result_it;

    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        device_result_it = boost::compute::find(device_vector.begin(),
                                                device_vector.end(),
                                                wanted,
                                                queue);
        queue.finish();
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    // verify if found index is correct by comparing it with std::find() result
    size_t host_result_index = std::distance(host_vector.begin(),
                                             std::find(host_vector.begin(),
                                                       host_vector.end(),
                                                       wanted));
    size_t device_result_index = device_result_it.get_index();

    if(device_result_index != host_result_index){
        std::cout << "ERROR: "
                  << "device_result_index (" << device_result_index << ") "
                  << "!= "
                  << "host_result_index (" << host_result_index << ")"
                  << std::endl;
        return -1;
    }

    return 0;
}
示例#5
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // trying to find element that isn't in vector (worst-case scenario)
    int wanted = rand_int_max + 1;

    // result
    std::vector<int>::iterator host_result_it;

    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted);
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    // verify
    if(host_result_it != host_vector.end()){
        std::cout << "ERROR: "
                  << "host_result_iterator != "
                  << "host_vector.end()"
                  << std::endl;
        return -1;
    }

    return 0;
}
示例#6
0
int main(int argc, char *argv[])
{
    size_t size = 1000;
    if(argc >= 2){
        size = boost::lexical_cast<size_t>(argv[1]);
    }

    std::cout << "size: " << size << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(
        context,
        device,
        boost::compute::command_queue::enable_profiling
    );

    // create vector of random numbers on the host
    std::vector<int> host_vector(size);
    std::generate(host_vector.begin(), host_vector.end(), rand);

    // create vector on the device and copy the data
    boost::compute::vector<int> device_vector(size, context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    // sum vector
    boost::compute::timer t(queue);
    int sum =
        boost::compute::accumulate(device_vector.begin(),
                                   device_vector.end(),
                                   int(0),
                                   queue);
    std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl;

    // verify sum is correct
    int host_sum = std::accumulate(host_vector.begin(),
                                   host_vector.end(),
                                   int(0));
    if(sum != host_sum){
        std::cout << "ERROR: "
                  << "device_sum (" << sum << ") "
                  << "!= "
                  << "host_sum (" << host_sum << ")"
                  << std::endl;
        return -1;
    }

    return 0;
}
示例#7
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // create vector on the device and copy the data
    boost::compute::vector<int> device_vector(PERF_N, context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    boost::compute::vector<int>::iterator max = device_vector.begin();
    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        max = boost::compute::max_element(
            device_vector.begin(), device_vector.end(), queue
        );
        queue.finish();
        t.stop();
    }

    int device_max = max.read(queue);
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
    std::cout << "max: " << device_max << std::endl;

    // verify max is correct
    int host_max = *std::max_element(host_vector.begin(), host_vector.end());
    if(device_max != host_max){
        std::cout << "ERROR: "
                  << "device_max (" << device_max << ") "
                  << "!= "
                  << "host_max (" << host_max << ")"
                  << std::endl;
        return -1;
    }

    return 0;
}
示例#8
0
int main(int argc, char *argv[])
{
    size_t size = 1000;
    if(argc >= 2){
        size = boost::lexical_cast<size_t>(argv[1]);
    }

    std::cout << "size: " << size << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(
        context,
        device,
        boost::compute::command_queue::enable_profiling
    );

    // create vector of random numbers on the host
    std::vector<unsigned int> host_vector(size);
    std::generate(host_vector.begin(), host_vector.end(), rand);

    // create vector on the device and copy the data
    boost::compute::vector<unsigned int> device_vector(size, context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    // sort vector
    boost::compute::timer t(queue);
    boost::compute::sort(
        device_vector.begin(),
        device_vector.end(),
        queue
    );
    std::cout << "time: " << t.elapsed() / 1e6 << " ms" << std::endl;

    // verify vector is sorted
    if(!boost::compute::is_sorted(device_vector.begin(),
                                  device_vector.end(),
                                  queue)){
        std::cout << "ERROR: is_sorted() returned false" << std::endl;
        return -1;
    }

    return 0;
}
示例#9
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<float> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_float);

    // create vector on the device and copy the data
    boost::compute::vector<float> device_vector(PERF_N, context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    // sort vector
    perf_timer t;
    t.start();
    boost::compute::sort(
        device_vector.begin(),
        device_vector.end(),
        queue
    );
    queue.finish();
    t.stop();
    std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl;

    // verify vector is sorted
    if(!boost::compute::is_sorted(device_vector.begin(),
                                  device_vector.end(),
                                  queue)){
        std::cout << "ERROR: is_sorted() returned false" << std::endl;
        return -1;
    }

    return 0;
}
示例#10
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        t.start();
        std::reverse(host_vector.begin(), host_vector.end());
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    return 0;
}
示例#11
0
// this example demonstrates how to sort a vector of ints on the GPU
int main()
{
    // fix random number generator : do not give always the same numbers
    srand(time(NULL));
    // create vector of random values on the host
    std::vector<int> host_vector(10);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // print out input vector
    std::cout << "input:  [ ";
    for(size_t i = 0; i < host_vector.size(); i++){
        std::cout << host_vector[i];

        if(i != host_vector.size() - 1){
            std::cout << ", ";
        }
    }
    std::cout << " ]" << std::endl;

    // transfer the values to the device
    compute::vector<int> device_vector = host_vector;

    // sort the values on the device
    compute::sort(device_vector.begin(), device_vector.end());

    // transfer the values back to the host
    compute::copy(device_vector.begin(),
                  device_vector.end(),
                  host_vector.begin());

    // print out the sorted vector
    std::cout << "output: [ ";
    for(size_t i = 0; i < host_vector.size(); i++){
        std::cout << host_vector[i];

        if(i != host_vector.size() - 1){
            std::cout << ", ";
        }
    }
    std::cout << " ]" << std::endl;

    return 0;
}
示例#12
0
int main()
{
    // generate random data on the host
    std::vector<float> host_vector(10000);
    std::generate(host_vector.begin(), host_vector.end(), rand);

    // create a vector on the device and transfer data from the host
    boost::compute::vector<float> device_vector = host_vector;

    // calculate sqrt of each element in-place
    boost::compute::transform(device_vector.begin(),
                              device_vector.end(),
                              device_vector.begin(),
                              boost::compute::sqrt<float>());

    // copy values back to the host
    boost::compute::copy(device_vector.begin(),
                         device_vector.end(),
                         host_vector.begin());

    return 0;
}
示例#13
0
int main()
{
    // create data array on host
    int host_data[] = { 1, 3, 5, 7, 9 };

    // create vector on device
    compute::vector<int> device_vector(5);

    // copy from host to device
    compute::copy(host_data,
                  host_data + 5,
                  device_vector.begin());

    // create vector on host
    std::vector<int> host_vector(5);

    // copy data back to host
    compute::copy(device_vector.begin(),
                  device_vector.end(),
                  host_vector.begin());

    return 0;
}
示例#14
0
	/* note that the lock must be already acquired upon entry.
	 * This is necessary because the caller often peeks at
	 * the header and delegates control to here when it doesn't
	 * specifically handle the current RPC.
	 * The lock must be unlocked before returning */
	void process(RPCReceive& rcv, thread_scoped_lock &lock)
	{
		if(rcv.name == "mem_alloc") {
			MemoryType type;
			network_device_memory mem;
			device_ptr client_pointer;

			rcv.read(mem);
			rcv.read(type);

			lock.unlock();

			client_pointer = mem.device_pointer;

			/* create a memory buffer for the device buffer */
			size_t data_size = mem.memory_size();
			DataVector &data_v = data_vector_insert(client_pointer, data_size);

			if(data_size)
				mem.data_pointer = (device_ptr)&(data_v[0]);
			else
				mem.data_pointer = 0;

			/* perform the allocation on the actual device */
			device->mem_alloc(mem, type);

			/* store a mapping to/from client_pointer and real device pointer */
			pointer_mapping_insert(client_pointer, mem.device_pointer);
		}
		else if(rcv.name == "mem_copy_to") {
			network_device_memory mem;

			rcv.read(mem);
			lock.unlock();

			device_ptr client_pointer = mem.device_pointer;

			DataVector &data_v = data_vector_find(client_pointer);

			size_t data_size = mem.memory_size();

			/* get pointer to memory buffer	for device buffer */
			mem.data_pointer = (device_ptr)&data_v[0];

			/* copy data from network into memory buffer */
			rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);

			/* translate the client pointer to a real device pointer */
			mem.device_pointer = device_ptr_from_client_pointer(client_pointer);

			/* copy the data from the memory buffer to the device buffer */
			device->mem_copy_to(mem);
		}
		else if(rcv.name == "mem_copy_from") {
			network_device_memory mem;
			int y, w, h, elem;

			rcv.read(mem);
			rcv.read(y);
			rcv.read(w);
			rcv.read(h);
			rcv.read(elem);

			device_ptr client_pointer = mem.device_pointer;
			mem.device_pointer = device_ptr_from_client_pointer(client_pointer);

			DataVector &data_v = data_vector_find(client_pointer);

			mem.data_pointer = (device_ptr)&(data_v[0]);

			device->mem_copy_from(mem, y, w, h, elem);

			size_t data_size = mem.memory_size();

			RPCSend snd(socket, &error_func, "mem_copy_from");
			snd.write();
			snd.write_buffer((uint8_t*)mem.data_pointer, data_size);
			lock.unlock();
		}
		else if(rcv.name == "mem_zero") {
			network_device_memory mem;
			
			rcv.read(mem);
			lock.unlock();

			device_ptr client_pointer = mem.device_pointer;
			mem.device_pointer = device_ptr_from_client_pointer(client_pointer);

			DataVector &data_v = data_vector_find(client_pointer);

			mem.data_pointer = (device_ptr)&(data_v[0]);

			device->mem_zero(mem);
		}
		else if(rcv.name == "mem_free") {
			network_device_memory mem;
			device_ptr client_pointer;

			rcv.read(mem);
			lock.unlock();

			client_pointer = mem.device_pointer;

			mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);

			device->mem_free(mem);
		}
		else if(rcv.name == "const_copy_to") {
			string name_string;
			size_t size;

			rcv.read(name_string);
			rcv.read(size);

			vector<char> host_vector(size);
			rcv.read_buffer(&host_vector[0], size);
			lock.unlock();

			device->const_copy_to(name_string.c_str(), &host_vector[0], size);
		}
		else if(rcv.name == "tex_alloc") {
			network_device_memory mem;
			string name;
			InterpolationType interpolation;
			bool periodic;
			device_ptr client_pointer;

			rcv.read(name);
			rcv.read(mem);
			rcv.read(interpolation);
			rcv.read(periodic);
			lock.unlock();

			client_pointer = mem.device_pointer;

			size_t data_size = mem.memory_size();

			DataVector &data_v = data_vector_insert(client_pointer, data_size);

			if(data_size)
				mem.data_pointer = (device_ptr)&(data_v[0]);
			else
				mem.data_pointer = 0;

			rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);

			device->tex_alloc(name.c_str(), mem, interpolation, periodic);

			pointer_mapping_insert(client_pointer, mem.device_pointer);
		}
		else if(rcv.name == "tex_free") {
			network_device_memory mem;
			device_ptr client_pointer;

			rcv.read(mem);
			lock.unlock();

			client_pointer = mem.device_pointer;

			mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);

			device->tex_free(mem);
		}
		else if(rcv.name == "load_kernels") {
			bool experimental;
			rcv.read(experimental);

			bool result;
			result = device->load_kernels(experimental);
			RPCSend snd(socket, &error_func, "load_kernels");
			snd.add(result);
			snd.write();
			lock.unlock();
		}
		else if(rcv.name == "task_add") {
			DeviceTask task;

			rcv.read(task);
			lock.unlock();

			if(task.buffer)
				task.buffer = device_ptr_from_client_pointer(task.buffer);

			if(task.rgba_half)
				task.rgba_half = device_ptr_from_client_pointer(task.rgba_half);

			if(task.rgba_byte)
				task.rgba_byte = device_ptr_from_client_pointer(task.rgba_byte);

			if(task.shader_input)
				task.shader_input = device_ptr_from_client_pointer(task.shader_input);

			if(task.shader_output)
				task.shader_output = device_ptr_from_client_pointer(task.shader_output);


			task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
			task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
			task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
			task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1);
			task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this);

			device->task_add(task);
		}
		else if(rcv.name == "task_wait") {
			lock.unlock();

			blocked_waiting = true;
			device->task_wait();
			blocked_waiting = false;

			lock.lock();
			RPCSend snd(socket, &error_func, "task_wait_done");
			snd.write();
			lock.unlock();
		}
		else if(rcv.name == "task_cancel") {
			lock.unlock();
			device->task_cancel();
		}
		else if(rcv.name == "acquire_tile") {
			AcquireEntry entry;
			entry.name = rcv.name;
			rcv.read(entry.tile);
			acquire_queue.push_back(entry);
			lock.unlock();
		}
		else if(rcv.name == "acquire_tile_none") {
			AcquireEntry entry;
			entry.name = rcv.name;
			acquire_queue.push_back(entry);
			lock.unlock();
		}
		else if(rcv.name == "release_tile") {
			AcquireEntry entry;
			entry.name = rcv.name;
			acquire_queue.push_back(entry);
			lock.unlock();
		}
		else {
			cout << "Error: unexpected RPC receive call \"" + rcv.name + "\"\n";
			lock.unlock();
		}
	}
示例#15
0
	void process(RPCReceive& rcv)
	{
		// fprintf(stderr, "receive process %s\n", rcv.name.c_str());

		if(rcv.name == "mem_alloc") {
			MemoryType type;
			network_device_memory mem;
			device_ptr remote_pointer;

			rcv.read(mem);
			rcv.read(type);

			/* todo: CPU needs mem.data_pointer */

			remote_pointer = mem.device_pointer;

			mem_data[remote_pointer] = vector<uint8_t>();
			mem_data[remote_pointer].resize(mem.memory_size());
			if(mem.memory_size())
				mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]);
			else
				mem.data_pointer = 0;

			device->mem_alloc(mem, type);

			ptr_map[remote_pointer] = mem.device_pointer;
			ptr_imap[mem.device_pointer] = remote_pointer;
		}
		else if(rcv.name == "mem_copy_to") {
			network_device_memory mem;

			rcv.read(mem);

			device_ptr remote_pointer = mem.device_pointer;
			mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]);

			rcv.read_buffer((uint8_t*)mem.data_pointer, mem.memory_size());

			mem.device_pointer = ptr_map[remote_pointer];

			device->mem_copy_to(mem);
		}
		else if(rcv.name == "mem_copy_from") {
			network_device_memory mem;
			int y, w, h, elem;

			rcv.read(mem);
			rcv.read(y);
			rcv.read(w);
			rcv.read(h);
			rcv.read(elem);

			device_ptr remote_pointer = mem.device_pointer;
			mem.device_pointer = ptr_map[remote_pointer];
			mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]);

			device->mem_copy_from(mem, y, w, h, elem);

			RPCSend snd(socket);
			snd.write();
			snd.write_buffer((uint8_t*)mem.data_pointer, mem.memory_size());
		}
		else if(rcv.name == "mem_zero") {
			network_device_memory mem;
			
			rcv.read(mem);
			device_ptr remote_pointer = mem.device_pointer;
			mem.device_pointer = ptr_map[mem.device_pointer];
			mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]);

			device->mem_zero(mem);
		}
		else if(rcv.name == "mem_free") {
			network_device_memory mem;
			device_ptr remote_pointer;

			rcv.read(mem);

			remote_pointer = mem.device_pointer;
			mem.device_pointer = ptr_map[mem.device_pointer];
			ptr_map.erase(remote_pointer);
			ptr_imap.erase(mem.device_pointer);
			mem_data.erase(remote_pointer);

			device->mem_free(mem);
		}
		else if(rcv.name == "const_copy_to") {
			string name_string;
			size_t size;

			rcv.read(name_string);
			rcv.read(size);

			vector<char> host_vector(size);
			rcv.read_buffer(&host_vector[0], size);

			device->const_copy_to(name_string.c_str(), &host_vector[0], size);
		}
		else if(rcv.name == "tex_alloc") {
			network_device_memory mem;
			string name;
			bool interpolation;
			bool periodic;
			device_ptr remote_pointer;

			rcv.read(name);
			rcv.read(mem);
			rcv.read(interpolation);
			rcv.read(periodic);

			remote_pointer = mem.device_pointer;

			mem_data[remote_pointer] = vector<uint8_t>();
			mem_data[remote_pointer].resize(mem.memory_size());
			if(mem.memory_size())
				mem.data_pointer = (device_ptr)&(mem_data[remote_pointer][0]);
			else
				mem.data_pointer = 0;

			rcv.read_buffer((uint8_t*)mem.data_pointer, mem.memory_size());

			device->tex_alloc(name.c_str(), mem, interpolation, periodic);

			ptr_map[remote_pointer] = mem.device_pointer;
			ptr_imap[mem.device_pointer] = remote_pointer;
		}
		else if(rcv.name == "tex_free") {
			network_device_memory mem;
			device_ptr remote_pointer;

			rcv.read(mem);

			remote_pointer = mem.device_pointer;
			mem.device_pointer = ptr_map[mem.device_pointer];
			ptr_map.erase(remote_pointer);
			ptr_map.erase(mem.device_pointer);
			mem_data.erase(remote_pointer);

			device->tex_free(mem);
		}
		else if(rcv.name == "task_add") {
			DeviceTask task;

			rcv.read(task);

			if(task.buffer) task.buffer = ptr_map[task.buffer];
			if(task.rgba_byte) task.rgba_byte = ptr_map[task.rgba_byte];
			if(task.rgba_half) task.rgba_half = ptr_map[task.rgba_half];
			if(task.shader_input) task.shader_input = ptr_map[task.shader_input];
			if(task.shader_output) task.shader_output = ptr_map[task.shader_output];

			task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
			task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
			task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
			task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1);
			task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this);

			device->task_add(task);
		}
		else if(rcv.name == "task_wait") {
			device->task_wait();

			RPCSend snd(socket, "task_wait_done");
			snd.write();
		}
		else if(rcv.name == "task_cancel") {
			device->task_cancel();
		}
	}
示例#16
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);

    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    // create vector on the device and copy the data
    boost::compute::vector<int> device_vector(PERF_N, context);
    boost::compute::vector<int> device_res(PERF_N,context);
    boost::compute::copy(
        host_vector.begin(),
        host_vector.end(),
        device_vector.begin(),
        queue
    );

    // sum vector
    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        boost::compute::copy(
            host_vector.begin(),
            host_vector.end(),
            device_vector.begin(),
            queue
        );

        t.start();
        boost::compute::partial_sum(
            device_vector.begin(),
            device_vector.end(),
            device_res.begin(),
            queue
        );
        queue.finish();
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    // verify sum is correct
    std::partial_sum(
        host_vector.begin(),
        host_vector.end(),
        host_vector.begin()
    );

    int device_sum = device_res.back();
    int host_sum = host_vector.back();

    if(device_sum != host_sum){
        std::cout << "ERROR: "
                  << "device_sum (" << device_sum << ") "
                  << "!= "
                  << "host_sum (" << host_sum << ")"
                  << std::endl;
        return -1;
    }

    return 0;
}