Ejemplo n.º 1
0
void ImageManager::device_update(Device *device,
                                 Scene *scene,
                                 Progress& progress)
{
	if(!need_update) {
		return;
	}

	TaskPool pool;
	for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
		for(size_t slot = 0; slot < images[type].size(); slot++) {
			if(!images[type][slot])
				continue;

			if(images[type][slot]->users == 0) {
				device_free_image(device, (ImageDataType)type, slot);
			}
			else if(images[type][slot]->need_load) {
				if(!osl_texture_system || images[type][slot]->builtin_data)
					pool.push(function_bind(&ImageManager::device_load_image,
					                        this,
					                        device,
					                        scene,
					                        (ImageDataType)type,
					                        slot,
					                        &progress));
			}
		}
	}

	pool.wait_work();

	need_update = false;
}
Ejemplo n.º 2
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

#ifdef WITH_OPTIMIZED_KERNEL
		if(system_cpu_support_optimized()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task_pool.cancelled())
					break;
			}
		}
		else
#endif
		{
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task_pool.cancelled())
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Ejemplo n.º 3
0
TEST(util_task, basic)
{
  TaskScheduler::init(0);
  TaskPool pool;
  for (int i = 0; i < 100; ++i) {
    pool.push(function_bind(task_run));
  }
  TaskPool::Summary summary;
  pool.wait_work(&summary);
  TaskScheduler::exit();
  EXPECT_EQ(summary.num_tasks_handled, 100);
}
Ejemplo n.º 4
0
	~CUDADevice()
	{
		task_pool.stop();

		cuda_push_context();
		cuda_assert(cuCtxDetach(cuContext))
	}
Ejemplo n.º 5
0
 void beginConvertLegacySavegame(String const &sourcePath, String const &gameId)
 {
     LOG_AS("SaveGames");
     LOG_TRACE("Scheduling legacy savegame conversion for %s (gameId:%s)") << sourcePath << gameId;
     Loop::get().audienceForIteration() += this;
     convertSavegameTasks.start(new ConvertSavegameTask(sourcePath, gameId));
 }
Ejemplo n.º 6
0
  void thread_shader(DeviceTask &task)
  {
    KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
    OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
    for (int sample = 0; sample < task.num_samples; sample++) {
      for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
        shader_kernel()(&kg,
                        (uint4 *)task.shader_input,
                        (float4 *)task.shader_output,
                        task.shader_eval_type,
                        task.shader_filter,
                        x,
                        task.offset,
                        sample);

      if (task.get_cancel() || task_pool.canceled())
        break;

      task.update_progress(NULL);
    }

#ifdef WITH_OSL
    OSLShader::thread_free(&kg);
#endif
  }
Ejemplo n.º 7
0
TaskHandle CreateTask(TaskPool& pool,
                      TaskHandle* parentTask,
                      const DELEGATE_CBK<void, const Task&>& threadedFunction)
{
    Task* freeTask = pool.createTask(parentTask ? parentTask->_task : nullptr,
                                     threadedFunction);
    return TaskHandle(freeTask, &pool);
}
Ejemplo n.º 8
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones */
		list<DeviceTask> tasks;
		task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
Ejemplo n.º 9
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones, more than number of threads for uneven
		 * workloads where some parts of the image render slower than others */
		list<DeviceTask> tasks;
		task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
Ejemplo n.º 10
0
void parallel_for(TaskPool& pool, 
                  const DELEGATE_CBK<void, const Task&, U32, U32>& cbk,
                  U32 count,
                  U32 partitionSize,
                  TaskPriority priority,
                  bool noWait,
                  bool useCurrentThread)
{
    if (count > 0) {

        const U32 crtPartitionSize = std::min(partitionSize, count);
        const U32 partitionCount = count / crtPartitionSize;
        const U32 remainder = count % crtPartitionSize;

        U32 adjustedCount = partitionCount;
        if (useCurrentThread) {
            adjustedCount -= 1;
        }

        std::atomic_uint jobCount = adjustedCount + (remainder > 0 ? 1 : 0);

        for (U32 i = 0; i < adjustedCount; ++i) {
            const U32 start = i * crtPartitionSize;
            const U32 end = start + crtPartitionSize;
            CreateTask(pool,
                       nullptr,
                       [&cbk, &jobCount, start, end](const Task& parentTask) {
                           cbk(parentTask, start, end);
                           jobCount.fetch_sub(1);
                       }).startTask(priority);
        }
        if (remainder > 0) {
            CreateTask(pool,
                       nullptr,
                       [&cbk, &jobCount, count, remainder](const Task& parentTask) {
                           cbk(parentTask, count - remainder, count);
                           jobCount.fetch_sub(1);
                       }).startTask(priority);
        }

        if (useCurrentThread) {
            TaskHandle threadTask = CreateTask(pool, [](const Task& parentTask) {ACKNOWLEDGE_UNUSED(parentTask); });
            const U32 start = adjustedCount * crtPartitionSize;
            const U32 end = start + crtPartitionSize;
            cbk(*threadTask._task, start, end);
        }
        if (!noWait) {
            while (jobCount.load() > 0) {
                pool.threadWaiting();
            }
        }
    }
}
Ejemplo n.º 11
0
void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& progress)
{
	if(!need_update)
		return;
	
	TaskPool pool;

	for(size_t slot = 0; slot < images.size(); slot++) {
		if(!images[slot])
			continue;

		if(images[slot]->users == 0) {
			device_free_image(device, dscene, slot);
		}
		else if(images[slot]->need_load) {
			if(!osl_texture_system) 
				pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress));
		}
	}

	for(size_t slot = 0; slot < float_images.size(); slot++) {
		if(!float_images[slot])
			continue;

		if(float_images[slot]->users == 0) {
			device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
		}
		else if(float_images[slot]->need_load) {
			if(!osl_texture_system) 
				pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress));
		}
	}

	pool.wait_work();

	if(pack_images)
		device_pack_images(device, dscene, progress);

	need_update = false;
}
Ejemplo n.º 12
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
		void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int);

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
		if(system_cpu_support_avx2())
			shader_kernel = kernel_cpu_avx2_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
		if(system_cpu_support_avx())
			shader_kernel = kernel_cpu_avx_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
		if(system_cpu_support_sse41())
			shader_kernel = kernel_cpu_sse41_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
		if(system_cpu_support_sse3())
			shader_kernel = kernel_cpu_sse3_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
		if(system_cpu_support_sse2())
			shader_kernel = kernel_cpu_sse2_shader;
		else
#endif
			shader_kernel = kernel_cpu_shader;

		for(int sample = 0; sample < task.num_samples; sample++) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
				shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
					task.shader_eval_type, x, task.offset, sample);

			if(task.get_cancel() || task_pool.canceled())
				break;

			task.update_progress(NULL);

		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Ejemplo n.º 13
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones */
		list<DeviceTask> tasks;

		if(task.type == DeviceTask::SHADER)
			task.split(tasks, TaskScheduler::num_threads(), 256);
		else
			task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
Ejemplo n.º 14
0
    void rewrapCache()
    {
        if(cache.isEmpty()) return;

        if(isRewrapping())
        {
            // Cancel an existing rewrap.
            cancelRewrap++;
        }

        // Start a rewrapping task that goes through all the existing entries,
        // starting from the latest entry.
        rewrapPool.start(new RewrapTask(this, cache.size() - 1, contentWidth()));
    }
Ejemplo n.º 15
0
  void task_add(DeviceTask &task)
  {
    /* Load texture info. */
    load_texture_info();

    /* split task into smaller ones */
    list<DeviceTask> tasks;

    if (task.type == DeviceTask::SHADER)
      task.split(tasks, info.cpu_threads, 256);
    else
      task.split(tasks, info.cpu_threads);

    foreach (DeviceTask &task, tasks)
      task_pool.push(new CPUDeviceTask(this, task));
  }
Ejemplo n.º 16
0
 void loopIteration()
 {
     /// @todo Refactor: TaskPool has a signal (or audience) when all tasks are complete.
     /// No need to check on every loop iteration.
     if (convertSavegameTasks.isDone())
     {
         LOG_AS("SaveGames");
         Loop::get().audienceForIteration() -= this;
         try
         {
             // The newly converted savegame(s) should now be somewhere in /home/savegames
             FileSystem::get().root().locate<Folder>("/home/savegames").populate();
         }
         catch (Folder::NotFoundError const &)
         {} // Ignore.
     }
 }
Ejemplo n.º 17
0
  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
  {
    const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;

    scoped_timer timer(&tile.buffers->render_time);

    Coverage coverage(kg, tile);
    if (use_coverage) {
      coverage.init_path_trace();
    }

    float *render_buffer = (float *)tile.buffer;
    int start_sample = tile.start_sample;
    int end_sample = tile.start_sample + tile.num_samples;

    /* Needed for Embree. */
    SIMD_SET_FLUSH_TO_ZERO;

    for (int sample = start_sample; sample < end_sample; sample++) {
      if (task.get_cancel() || task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }

      for (int y = tile.y; y < tile.y + tile.h; y++) {
        for (int x = tile.x; x < tile.x + tile.w; x++) {
          if (use_coverage) {
            coverage.init_pixel(x, y);
          }
          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
        }
      }

      tile.sample = sample + 1;

      task.update_progress(&tile, tile.w * tile.h);
    }
    if (use_coverage) {
      coverage.finalize();
    }
  }
Ejemplo n.º 18
0
	~OpenCLDevice()
	{
		task_pool.stop();

		if(null_mem)
			clReleaseMemObject(CL_MEM_PTR(null_mem));

		map<string, device_vector<uchar>*>::iterator mt;
		for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
			mem_free(*(mt->second));
			delete mt->second;
		}

		if(ckPathTraceKernel)
			clReleaseKernel(ckPathTraceKernel);  
		if(ckFilmConvertKernel)
			clReleaseKernel(ckFilmConvertKernel);  
		if(cpProgram)
			clReleaseProgram(cpProgram);
		if(cqCommandQueue)
			clReleaseCommandQueue(cqCommandQueue);
		if(cxContext)
			clReleaseContext(cxContext);
	}
Ejemplo n.º 19
0
	~CPUDevice()
	{
		task_pool.stop();
	}
Ejemplo n.º 20
0
	void task_cancel()
	{
		task_pool.cancel();
	}
Ejemplo n.º 21
0
	void task_wait()
	{
		task_pool.wait_work();
	}
Ejemplo n.º 22
0
 ~CPUDevice()
 {
   task_pool.stop();
   texture_info.free();
 }
Ejemplo n.º 23
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
		if(system_cpu_support_avx()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
		if(system_cpu_support_sse41()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
		if(system_cpu_support_sse3()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
		if(system_cpu_support_sse2()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
		{
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Ejemplo n.º 24
0
	void thread_path_trace(DeviceTask& task)
	{
		if(task_pool.canceled()) {
			if(task.need_finish_queue == false)
				return;
		}

		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

		RenderTile tile;
		
		while(task.acquire_tile(this, tile)) {
			float *render_buffer = (float*)tile.buffer;
			uint *rng_state = (uint*)tile.rng_state;
			int start_sample = tile.start_sample;
			int end_sample = tile.start_sample + tile.num_samples;

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
			if(system_cpu_support_avx()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
			if(system_cpu_support_sse41()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
			if(system_cpu_support_sse3()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
			if(system_cpu_support_sse2()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
			{
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}

			task.release_tile(tile);

			if(task_pool.canceled()) {
				if(task.need_finish_queue == false)
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Ejemplo n.º 25
0
  void thread_render(DeviceTask &task)
  {
    if (task_pool.canceled()) {
      if (task.need_finish_queue == false)
        return;
    }

    /* allocate buffer for kernel globals */
    device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
    kgbuffer.alloc_to_device(1);

    KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
        KernelGlobals(thread_kernel_globals_init());

    profiler.add_state(&kg->profiler);

    CPUSplitKernel *split_kernel = NULL;
    if (use_split_kernel) {
      split_kernel = new CPUSplitKernel(this);
      if (!split_kernel->load_kernels(requested_features)) {
        thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
        kgbuffer.free();
        delete split_kernel;
        return;
      }
    }

    RenderTile tile;
    DenoisingTask denoising(this, task);
    denoising.profiler = &kg->profiler;

    while (task.acquire_tile(this, tile)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        if (use_split_kernel) {
          device_only_memory<uchar> void_buffer(this, "void_buffer");
          split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
        }
        else {
          path_trace(task, tile, kg);
        }
      }
      else if (tile.task == RenderTile::DENOISE) {
        denoise(denoising, tile);
        task.update_progress(&tile, tile.w * tile.h);
      }

      task.release_tile(tile);

      if (task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }
    }

    profiler.remove_state(&kg->profiler);

    thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
    kg->~KernelGlobals();
    kgbuffer.free();
    delete split_kernel;
  }
Ejemplo n.º 26
0
void WaitForAllTasks(TaskPool& pool, bool yield, bool flushCallbacks, bool foceClear) {
    pool.waitForAllTasks(yield, flushCallbacks, foceClear);
}
Ejemplo n.º 27
0
	void task_add(DeviceTask& task)
	{
		task_pool.push(new OpenCLDeviceTask(this, task));
	}
Ejemplo n.º 28
0
 void cancelRewraps()
 {
     cancelRewrap = CancelAllRewraps;
     rewrapPool.waitForDone();
     cancelRewrap = 0;
 }
Ejemplo n.º 29
0
 ~Impl()
 {
     convertSavegameTasks.waitForDone();
 }
Ejemplo n.º 30
0
 bool isRewrapping() const
 {
     return !rewrapPool.isDone();
 }