void ProblemDescription::prepareRun( bool subsample) { debug_status( "ProblemDescription", "prepareRun", "start"); if ( subsample ){ trajectory.subsample(); } //if the factory is not empty of constraints, // get all of the constraints for the current resolution if ( !factory.empty() ){ factory.getAll( trajectory.N() ); } //are we going to use the goalset on this iteration? // If we are subsampling, then even if there is a goalset // to use, do not use it. bool use_goalset = goalset && !subsample; if ( use_goalset ){ //do not subsample if doing goalset run. trajectory.startGoalset(); //add the goal constraint to the constraints vector. factory.addGoalset( goalset ); } //are we doing covariant on the current // optimization? Do not do covariant optimization // if we are subsampling doing_covariant = is_covariant && !subsample; metric.initialize( trajectory.fullN(), trajectory.getObjectiveType(), false, use_goalset ); if ( subsample ){ subsampled_metric.initialize( trajectory.N(), trajectory.getObjectiveType(), true); } //prepare the gradient for a run at this resolution smoothness_function.prepareRun( trajectory, metric); //are we doing covariant optimization at this stage of optimiation? // do not do covariant optimization if there is subsampling if ( doing_covariant ){ trajectory.getCovariantTrajectory(metric, covariant_trajectory); } debug_status( "ProblemDescription", "prepareRun", "end"); }
static void evg_cf_engine_fetch(struct evg_compute_unit_t *compute_unit) { struct evg_ndrange_t *ndrange = evg_gpu->ndrange; struct evg_wavefront_t *wavefront; char str[MAX_LONG_STRING_SIZE]; char str_trimmed[MAX_LONG_STRING_SIZE]; struct evg_inst_t *inst; struct evg_uop_t *uop; struct evg_work_item_uop_t *work_item_uop; struct evg_work_item_t *work_item; int work_item_id; /* Schedule wavefront */ wavefront = evg_compute_unit_schedule(compute_unit); if (!wavefront) return; /* Emulate CF instruction */ evg_wavefront_execute(wavefront); inst = &wavefront->cf_inst; /* Create uop */ uop = evg_uop_create(); uop->wavefront = wavefront; uop->work_group = wavefront->work_group; uop->compute_unit = compute_unit; uop->id_in_compute_unit = compute_unit->gpu_uop_id_counter++; uop->alu_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_ALU; uop->tex_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_TEX; uop->no_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_CF; uop->last = DOUBLE_LINKED_LIST_MEMBER(wavefront->work_group, finished, wavefront); uop->wavefront_last = uop->last && uop->no_clause_trigger; uop->global_mem_read = wavefront->global_mem_read; uop->global_mem_write = wavefront->global_mem_write; uop->active_mask_update = wavefront->active_mask_update; uop->active_mask_push = wavefront->active_mask_push; uop->active_mask_pop = wavefront->active_mask_pop; uop->active_mask_stack_top = wavefront->stack_top; uop->vliw_slots = 1; /* If debugging active mask, store active state for work-items */ if (debug_status(evg_stack_debug_category)) evg_uop_save_active_mask(uop); /* If instruction is a global memory write, record addresses */ if (uop->global_mem_write) { assert((inst->info->flags & EVG_INST_FLAG_MEM_WRITE)); EVG_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; work_item_uop = &uop->work_item_uop[work_item->id_in_wavefront]; work_item_uop->global_mem_access_addr = work_item->global_mem_access_addr; work_item_uop->global_mem_access_size = work_item->global_mem_access_size; }
// single iteration of local smoothing // // precondition: prepareChompIter has been called since the last // time xi was modified void ChompLocalOptimizer::optimize() { debug_status( TAG, "optimize", "start" ); MatX h_t, H_t, P_t, P_t_inv, delta_t; constraint_magnitude = 0; debug_status( TAG, "optimize", "pre-for-loop" ); for (int t=0; t < problem.N(); ++t){ bool is_constrained = problem.evaluateConstraint( h_t, H_t, t ); if ( is_constrained ){ constraint_magnitude = std::max(constraint_magnitude, h_t.lpNorm<Eigen::Infinity>()); P_t_inv = ( H_t*H_t.transpose() ).inverse(); const int M = problem.M(); problem.updateTrajectory( (alpha *(MatX::Identity(M,M) - H_t.transpose()*P_t_inv*H_t ) * g.row(t).transpose() + H_t.transpose()*P_t_inv*h_t).transpose(), t ); } //there are no constraints, so just add the negative gradient // into the trajectory (multiplied by the step size, of course. else { problem.updateTrajectory( alpha * g.row(t), t ); } } debug_status( TAG, "optimize", "end" ); }
void ProblemDescription::endRun() { debug_status( "ProblemDescription", "endRun", "start"); if ( doing_covariant ){ covariant_trajectory.getNonCovariantTrajectory( metric, trajectory ); } if( use_goalset ){ //Restore the trajectory to the non-goalset type trajectory.endGoalset(); //remove the goal constraint, so that it is not deleted along // with the other constraints. factory.removeGoalset(); } if ( trajectory.isSubsampled() ){ trajectory.endSubsample(); } debug_status( "ProblemDescription", "endRun", "end"); }
/* Called before and ALU clause starts for a wavefront */ void evg_isa_alu_clause_start(struct evg_wavefront_t *wavefront) { /* Copy 'active' mask at the top of the stack to 'pred' mask */ bit_map_copy(wavefront->pred, 0, wavefront->active_stack, wavefront->stack_top * wavefront->work_item_count, wavefront->work_item_count); if (debug_status(evg_isa_debug_category)) { evg_isa_debug(" %s:pred=", wavefront->name); bit_map_dump(wavefront->pred, 0, wavefront->work_item_count, debug_file(evg_isa_debug_category)); } /* Flag 'push_before_done' will be set by the first PRED_SET* inst */ wavefront->push_before_done = 0; /* Stats */ wavefront->alu_clause_count++; }
/* FIXME - merge with ctx_execute */ void mips_isa_execute_inst(struct mips_ctx_t *ctx) { // struct mips_regs_t *regs = ctx->regs; ctx->next_ip = ctx->n_next_ip; ctx->n_next_ip += 4; /* Debug */ if (debug_status(mips_isa_inst_debug_category)) { mips_isa_inst_debug("%d %8lld %x: ", ctx->pid, asEmu(mips_emu)->instructions, ctx->regs->pc); mips_inst_debug_dump(&ctx->inst, debug_file(mips_isa_inst_debug_category)); } /* Call instruction emulation function */ // regs->pc = regs->pc + ctx->inst.info->size; if (ctx->inst.info->opcode) mips_isa_inst_func[ctx->inst.info->opcode](ctx); /* Statistics */ mips_inst_freq[ctx->inst.info->opcode]++; /* Debug */ mips_isa_inst_debug("\n"); // if (debug_status(mips_isa_call_debug_category)) // mips_isa_debug_call(ctx); }
static void X86ContextUpdateState(X86Context *self, X86ContextState state) { X86Emu *emu = self->emu; X86ContextState status_diff; char state_str[MAX_STRING_SIZE]; /* Remove contexts from the following lists: * running, suspended, zombie */ if (DOUBLE_LINKED_LIST_MEMBER(emu, running, self)) DOUBLE_LINKED_LIST_REMOVE(emu, running, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, suspended, self)) DOUBLE_LINKED_LIST_REMOVE(emu, suspended, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, zombie, self)) DOUBLE_LINKED_LIST_REMOVE(emu, zombie, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, finished, self)) DOUBLE_LINKED_LIST_REMOVE(emu, finished, self); /* If the difference between the old and new state lies in other * states other than 'x86_ctx_specmode', a reschedule is marked. */ status_diff = self->state ^ state; if (status_diff & ~X86ContextSpecMode) emu->schedule_signal = 1; /* Update state */ self->state = state; if (self->state & X86ContextFinished) self->state = X86ContextFinished | (state & X86ContextAlloc) | (state & X86ContextMapped); if (self->state & X86ContextZombie) self->state = X86ContextZombie | (state & X86ContextAlloc) | (state & X86ContextMapped); if (!(self->state & X86ContextSuspended) && !(self->state & X86ContextFinished) && !(self->state & X86ContextZombie) && !(self->state & X86ContextLocked)) self->state |= X86ContextRunning; else self->state &= ~X86ContextRunning; /* Insert context into the corresponding lists. */ if (self->state & X86ContextRunning) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, running, self); if (self->state & X86ContextZombie) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, zombie, self); if (self->state & X86ContextFinished) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, finished, self); if (self->state & X86ContextSuspended) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, suspended, self); /* Dump new state (ignore 'x86_ctx_specmode' state, it's too frequent) */ if (debug_status(x86_context_debug_category) && (status_diff & ~X86ContextSpecMode)) { str_map_flags(&x86_context_state_map, self->state, state_str, sizeof state_str); X86ContextDebug("inst %lld: ctx %d changed state to %s\n", asEmu(emu)->instructions, self->pid, state_str); } /* Start/stop x86 timer depending on whether there are any contexts * currently running. */ if (emu->running_list_count) m2s_timer_start(asEmu(emu)->timer); else m2s_timer_stop(asEmu(emu)->timer); }
void evg_isa_write_task_commit(struct evg_work_item_t *work_item) { struct linked_list_t *task_list = work_item->write_task_list; struct evg_wavefront_t *wavefront = work_item->wavefront; struct evg_work_group_t *work_group = work_item->work_group; struct evg_isa_write_task_t *wt; struct evg_inst_t *inst; /* Process first tasks of type: * - EVG_ISA_WRITE_TASK_WRITE_DEST * - EVG_ISA_WRITE_TASK_WRITE_LDS */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); assert(wt->work_item == work_item); inst = wt->inst; switch (wt->kind) { case EVG_ISA_WRITE_TASK_WRITE_DEST: { if (wt->write_mask) evg_isa_write_gpr(work_item, wt->gpr, wt->rel, wt->chan, wt->value); work_item->pv.elem[wt->inst->alu] = wt->value; /* Debug */ if (evg_isa_debugging()) { evg_isa_debug(" i%d:%s", work_item->id, map_value(&evg_pv_map, wt->inst->alu)); if (wt->write_mask) { evg_isa_debug(","); evg_inst_dump_gpr(wt->gpr, wt->rel, wt->chan, 0, debug_file(evg_isa_debug_category)); } evg_isa_debug("<="); gpu_isa_dest_value_dump(inst, &wt->value, debug_file(evg_isa_debug_category)); } break; } case EVG_ISA_WRITE_TASK_WRITE_LDS: { struct mem_t *local_mem; union evg_reg_t lds_value; local_mem = work_group->local_mem; assert(local_mem); assert(wt->lds_value_size); mem_write(local_mem, wt->lds_addr, wt->lds_value_size, &wt->lds_value); /* Debug */ lds_value.as_uint = wt->lds_value; evg_isa_debug(" i%d:LDS[0x%x]<=(%u,%gf) (%d bytes)", work_item->id, wt->lds_addr, lds_value.as_uint, lds_value.as_float, (int) wt->lds_value_size); break; } default: linked_list_next(task_list); continue; } /* Done with this task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* Process PUSH_BEFORE, PRED_SET */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); inst = wt->inst; /* Process */ switch (wt->kind) { case EVG_ISA_WRITE_TASK_PUSH_BEFORE: { if (!wavefront->push_before_done) evg_wavefront_stack_push(wavefront); wavefront->push_before_done = 1; break; } case EVG_ISA_WRITE_TASK_SET_PRED: { int update_pred = EVG_ALU_WORD1_OP2.update_pred; int update_exec_mask = EVG_ALU_WORD1_OP2.update_exec_mask; assert(inst->info->fmt[1] == EVG_FMT_ALU_WORD1_OP2); if (update_pred) evg_work_item_set_pred(work_item, wt->cond); if (update_exec_mask) evg_work_item_set_active(work_item, wt->cond); /* Debug */ if (debug_status(evg_isa_debug_category)) { if (update_pred && update_exec_mask) evg_isa_debug(" i%d:act/pred<=%d", work_item->id, wt->cond); else if (update_pred) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); else if (update_exec_mask) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); } break; } default: abort(); } /* Done with task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* List should be empty */ assert(!linked_list_count(task_list)); }
/* Execute one instruction in the wavefront */ void si_wavefront_execute(struct si_wavefront_t *wavefront) { struct si_ndrange_t *ndrange; struct si_work_group_t *work_group; struct si_work_item_t *work_item; struct si_inst_t *inst; char inst_dump[MAX_INST_STR_SIZE]; unsigned int pc; ndrange = wavefront->ndrange; int work_item_id; /* Get current work-group */ ndrange = wavefront->ndrange; work_group = wavefront->work_group; work_item = NULL; inst = NULL; assert(!DOUBLE_LINKED_LIST_MEMBER(work_group, finished, wavefront)); /* Reset instruction flags */ wavefront->vector_mem_write = 0; wavefront->vector_mem_read = 0; wavefront->scalar_mem_read = 0; wavefront->local_mem_write = 0; wavefront->local_mem_read = 0; wavefront->pred_mask_update = 0; wavefront->mem_wait = 0; wavefront->barrier = 0; assert(!wavefront->finished); /* Grab the next instruction and update the pointer */ wavefront->inst_size = si_inst_decode(wavefront->wavefront_pool, &wavefront->inst, 0); /* Stats */ si_emu->inst_count++; wavefront->emu_inst_count++; wavefront->inst_count++; /* Set the current instruction */ inst = &wavefront->inst; pc = wavefront->wavefront_pool - wavefront->wavefront_pool_start; /*MIAOW start - Print the debug message to stdout, stderr to the open file stream*/ si_isa_debug("\n###%d_%d_%d", kernel_config_count - 1, wavefront->work_group->id, wavefront->id_in_work_group, pc, wavefront->inst_size); /*MIAOW stop*/ /* Execute the current instruction */ switch (inst->info->fmt) { /* Scalar ALU Instructions */ case SI_FMT_SOP1: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sop1(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOP2: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sop2(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); //Calling a function pointer in machine.c if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPP: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopp(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ if (wavefront->inst.micro_inst.sopp.op > 1 && wavefront->inst.micro_inst.sopp.op < 10) { si_emu->branch_inst_count++; wavefront->branch_inst_count++; } else { si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; } /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPC: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopc(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPK: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopk(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } /* Scalar Memory Instructions */ case SI_FMT_SMRD: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_smrd(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_mem_inst_count++; wavefront->scalar_mem_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } /* Vector ALU Instructions */ case SI_FMT_VOP2: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_vop2(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->vector_alu_inst_count++; wavefront->vector_alu_inst_count++; /* Execute the instruction */ SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; if(si_wavefront_work_item_active(wavefront, work_item->id_in_wavefront)) (*si_isa_inst_func[inst->info->inst])(work_item, inst); } if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_VOP1: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_vop1(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->vector_alu_inst_count++; wavefront->vector_alu_inst_count++; if (inst->micro_inst.vop1.op == 2) { /* Instruction ignores execution mask and is only executed on one work item. * Execute on the first active work item from the least significant bit in EXEC. * (if exec is 0, execute work item 0) */ work_item = ndrange->work_items[wavefront->work_item_id_first]; if (si_isa_read_sreg(work_item, SI_EXEC) == 0 && si_isa_read_sreg(work_item, SI_EXEC + 1) == 0) { (*si_isa_inst_func[inst->info->inst])(work_item, inst); } else { SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; if(si_wavefront_work_item_active(wavefront, work_item->id_in_wavefront)) { (*si_isa_inst_func[inst->info->inst])(work_item, inst); break; } } } }