u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job) { /* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */ if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) { return job->uargs.perf_counter_src0; } /* Use per sub job counter if enabled... */ if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) { return job->perf_counter_per_sub_job_src0[sub_job]; } /* ...else default to global job counter */ return job->uargs.perf_counter_src0; }
void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual) { u32 relative_address; u32 start_index; u32 nr_of_regs; u32 *frame_registers = mali_pp_job_get_frame_registers(job); u32 *wb0_registers = mali_pp_job_get_wb0_registers(job); u32 *wb1_registers = mali_pp_job_get_wb1_registers(job); u32 *wb2_registers = mali_pp_job_get_wb2_registers(job); u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job); u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job); MALI_DEBUG_ASSERT_POINTER(core); /* Write frame registers */ /* * There are two frame registers which are different for each sub job: * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME) * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK) */ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]); /* For virtual jobs, the stack address shouldn't be broadcast but written individually */ if (!mali_pp_job_is_virtual(job) || restart_virtual) { mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]); } /* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */ relative_address = MALI200_REG_ADDR_RSW; start_index = MALI200_REG_ADDR_RSW / sizeof(u32); nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32); mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* MALI200_REG_ADDR_STACK_SIZE */ relative_address = MALI200_REG_ADDR_STACK_SIZE; start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, relative_address, frame_registers[start_index], mali_frame_registers_reset_values[start_index]); /* Skip 2 reserved registers */ /* Write remaining registers */ relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X; start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* Write WBx registers */ if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (MALI_HW_CORE_NO_COUNTER != counter_src0) { mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } if (MALI_HW_CORE_NO_COUNTER != counter_src1) { mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } #ifdef CONFIG_MALI400_HEATMAPS_ENABLED if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) { mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1); mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8); } #endif /* CONFIG_MALI400_HEATMAPS_ENABLED */ MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description)); /* Adding barrier to make sure all rester writes are finished */ _mali_osk_write_mem_barrier(); /* This is the command that starts the core. * * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just * force core to assert the completion interrupt. */ #if !defined(PROFILING_SKIP_PP_JOBS) mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING); #else mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME); #endif /* Adding barrier to make sure previous rester writes is finished */ _mali_osk_write_mem_barrier(); }
void mali_pp_job_dma_cmd_prepare(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual, mali_dma_cmd_buf *buf) { u32 relative_address; u32 start_index; u32 nr_of_regs; u32 *frame_registers = mali_pp_job_get_frame_registers(job); u32 *wb0_registers = mali_pp_job_get_wb0_registers(job); u32 *wb1_registers = mali_pp_job_get_wb1_registers(job); u32 *wb2_registers = mali_pp_job_get_wb2_registers(job); u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job); u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job); MALI_DEBUG_ASSERT_POINTER(core); /* Write frame registers */ /* * There are two frame registers which are different for each sub job: * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME) * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK) */ mali_dma_write_conditional(buf, &core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]); /* For virtual jobs, the stack address shouldn't be broadcast but written individually */ if (!mali_pp_job_is_virtual(job) || restart_virtual) { mali_dma_write_conditional(buf, &core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]); } /* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */ relative_address = MALI200_REG_ADDR_RSW; start_index = MALI200_REG_ADDR_RSW / sizeof(u32); nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32); mali_dma_write_array_conditional(buf, &core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* MALI200_REG_ADDR_STACK_SIZE */ relative_address = MALI200_REG_ADDR_STACK_SIZE; start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32); mali_dma_write_conditional(buf, &core->hw_core, relative_address, frame_registers[start_index], mali_frame_registers_reset_values[start_index]); /* Skip 2 reserved registers */ /* Write remaining registers */ relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X; start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); mali_dma_write_array_conditional(buf, &core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* Write WBx registers */ if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */ mali_dma_write_array_conditional(buf, &core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */ mali_dma_write_array_conditional(buf, &core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */ mali_dma_write_array_conditional(buf, &core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (MALI_HW_CORE_NO_COUNTER != counter_src0) { mali_dma_write(buf, &core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); mali_dma_write_conditional(buf, &core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } if (MALI_HW_CORE_NO_COUNTER != counter_src1) { mali_dma_write(buf, &core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); mali_dma_write_conditional(buf, &core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } /* This is the command that starts the core. */ mali_dma_write(buf, &core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING); }
void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual) { u32 num_frame_registers; u32 relative_address; u32 start_index; u32 nr_of_regs; u32 *frame_registers = mali_pp_job_get_frame_registers(job); u32 *wb0_registers = mali_pp_job_get_wb0_registers(job); u32 *wb1_registers = mali_pp_job_get_wb1_registers(job); u32 *wb2_registers = mali_pp_job_get_wb2_registers(job); core->counter_src0_used = mali_pp_job_get_perf_counter_src0(job); core->counter_src1_used = mali_pp_job_get_perf_counter_src1(job); MALI_DEBUG_ASSERT_POINTER(core); /* Write frame registers */ num_frame_registers = (_MALI_PRODUCT_ID_MALI200 == mali_kernel_core_get_product_id()) ? MALI_PP_MALI200_NUM_FRAME_REGISTERS : MALI_PP_MALI400_NUM_FRAME_REGISTERS; /* * There are two frame registers which are different for each sub job: * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME) * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK) */ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]); /* For virtual jobs, the stack address shouldn't be broadcast but written individually */ if (!mali_pp_job_is_virtual(job) || restart_virtual) { mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]); } /* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */ relative_address = MALI200_REG_ADDR_RSW; start_index = MALI200_REG_ADDR_RSW / sizeof(u32); nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32); mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* MALI200_REG_ADDR_STACK_SIZE */ relative_address = MALI200_REG_ADDR_STACK_SIZE; start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, relative_address, frame_registers[start_index], mali_frame_registers_reset_values[start_index]); /* Skip 2 reserved registers */ /* Write remaining registers */ relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X; start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); nr_of_regs = num_frame_registers - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, relative_address, &frame_registers[start_index], nr_of_regs, &mali_frame_registers_reset_values[start_index]); /* Write WBx registers */ if (wb0_registers[0]) /* M200_WB0_REG_SOURCE_SELECT register */ { mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb1_registers[0]) /* M200_WB1_REG_SOURCE_SELECT register */ { mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (wb2_registers[0]) /* M200_WB2_REG_SOURCE_SELECT register */ { mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); } if (MALI_HW_CORE_NO_COUNTER != core->counter_src0_used) { mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, core->counter_src0_used); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } if (MALI_HW_CORE_NO_COUNTER != core->counter_src1_used) { mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, core->counter_src1_used); mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); } MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description)); /* Adding barrier to make sure all rester writes are finished */ _mali_osk_write_mem_barrier(); /* This is the command that starts the core. */ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING); /* Adding barrier to make sure previous rester writes is finished */ _mali_osk_write_mem_barrier(); }
struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id) { struct mali_pp_job *job; u32 perf_counter_flag; job = _mali_osk_calloc(1, sizeof(struct mali_pp_job)); if (NULL != job) { if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) { goto fail; } if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) { MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n")); goto fail; } if (!mali_pp_job_use_no_notification(job)) { job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s)); if (NULL == job->finished_notification) goto fail; } perf_counter_flag = mali_pp_job_get_perf_counter_flag(job); /* case when no counters came from user space * so pass the debugfs / DS-5 provided global ones to the job object */ if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) || (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) { u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count); /* These counters apply for all virtual jobs, and where no per sub job counter is specified */ job->uargs.perf_counter_src0 = pp_counter_src0; job->uargs.perf_counter_src1 = pp_counter_src1; /* We only copy the per sub job array if it is enabled with at least one counter */ if (0 < sub_job_count) { job->perf_counter_per_sub_job_count = sub_job_count; _mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0)); _mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1)); } } _mali_osk_list_init(&job->list); job->session = session; _mali_osk_list_init(&job->session_list); job->id = id; job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1; job->pid = _mali_osk_get_pid(); job->tid = _mali_osk_get_tid(); job->num_memory_cookies = job->uargs.num_memory_cookies; if (job->num_memory_cookies > 0) { u32 size; if (job->uargs.num_memory_cookies > session->descriptor_mapping->current_nr_mappings) { MALI_PRINT_ERROR(("Mali PP job: Too many memory cookies specified in job object\n")); goto fail; } size = sizeof(*job->uargs.memory_cookies) * job->num_memory_cookies; job->memory_cookies = _mali_osk_malloc(size); if (NULL == job->memory_cookies) { MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size)); goto fail; } if (0 != _mali_osk_copy_from_user(job->memory_cookies, job->uargs.memory_cookies, size)) { MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size)); goto fail; } #if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) job->num_dma_bufs = job->num_memory_cookies; job->dma_bufs = _mali_osk_calloc(job->num_dma_bufs, sizeof(struct mali_dma_buf_attachment *)); if (NULL == job->dma_bufs) { MALI_PRINT_ERROR(("Mali PP job: Failed to allocate dma_bufs array!\n")); goto fail; } #endif } /* Prepare DMA command buffer to start job, if it is virtual. */ if (mali_pp_job_is_virtual(job)) { struct mali_pp_core *core; _mali_osk_errcode_t err = mali_dma_get_cmd_buf(&job->dma_cmd_buf); if (_MALI_OSK_ERR_OK != err) { MALI_PRINT_ERROR(("Mali PP job: Failed to allocate DMA command buffer\n")); goto fail; } core = mali_pp_scheduler_get_virtual_pp(); MALI_DEBUG_ASSERT_POINTER(core); mali_pp_job_dma_cmd_prepare(core, job, 0, MALI_FALSE, &job->dma_cmd_buf); } if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) { /* Not a valid job. */ goto fail; } mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job); mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence)); return job; } fail: if (NULL != job) { mali_pp_job_delete(job); } return NULL; }