uint32_t tgen_generate_debug_chunk(const char * filename, uint32_t x, uint32_t y) { // restore heap mem.restore_heap(saved_heap); cpu.reset_stack(); cpu.push_dword(y); cpu.push_dword(x); get_self() = MANAGER_ADDRESS; add_ret(); generator_func(); // save_chunk_now(); // return 0; // address 405E30 is // void * __thiscall get_sector_chunk_data(__int64 chunk_pos) cpu.push_dword(y); cpu.push_dword(x); get_self() = MANAGER_ADDRESS; add_ret(); sub_405E30(); uint32_t chunk_offset = cpu.reg[EAX]; tgen_dump_mem(filename); return chunk_offset; }
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_set(bottom[0]->count(), Dtype(0), bottom_diff); // We'll output the mask to top[1] if it's of size >1. const bool use_top_mask = top.size() > 1; typename PoolingCodeGeneratorBackward<Dtype>::Callback_t* generator_func = Backward_code_generator.Get_callback(this, top[0]); // We are getting top_mask here as mutable_cpu_data is not thread safe // and doing it inside parallel region creates of risk of race condition void* mask = NULL; if (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX ) { mask = (use_top_mask) ? static_cast<void*>(top[1]->mutable_cpu_data()) : static_cast<void*>(max_idx_.mutable_cpu_data()); } const int batch_size = bottom[0]->num(); const int num_channels = bottom[0]->channels(); #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif for (int image = 0; image < batch_size; ++image) for (int channel = 0; channel < num_channels; ++channel) generator_func(top_diff, bottom_diff, image, image+1, channel, channel+1, use_top_mask, mask, this); }
uint32_t tgen_generate_chunk(uint32_t x, uint32_t y) { // restore heap mem.restore_heap(saved_heap); cpu.reset_stack(); cpu.push_dword(y); cpu.push_dword(x); get_self() = MANAGER_ADDRESS; add_ret(); generator_func(); // save_chunk_now(); // return 0; // address 405E30 is // void * __thiscall get_sector_chunk_data(__int64 chunk_pos) cpu.push_dword(y); cpu.push_dword(x); get_self() = MANAGER_ADDRESS; add_ret(); sub_405E30(); return cpu.reg[EAX]; }