Пример #1
0
uint32_t tgen_generate_debug_chunk(const char * filename,
                                   uint32_t x, uint32_t y)
{
    // restore heap
    mem.restore_heap(saved_heap);

    cpu.reset_stack();
    cpu.push_dword(y);
    cpu.push_dword(x);
    get_self() = MANAGER_ADDRESS;
    add_ret();
    generator_func();

    // save_chunk_now();
    // return 0;
    // address 405E30 is
    // void * __thiscall get_sector_chunk_data(__int64 chunk_pos)
    cpu.push_dword(y);
    cpu.push_dword(x);
    get_self() = MANAGER_ADDRESS;
    add_ret();
    sub_405E30();
    uint32_t chunk_offset = cpu.reg[EAX];
    tgen_dump_mem(filename);

    return chunk_offset;
}
Пример #2
0
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;

  typename PoolingCodeGeneratorBackward<Dtype>::Callback_t* generator_func =
                          Backward_code_generator.Get_callback(this, top[0]);

  // We are getting top_mask here as mutable_cpu_data is not thread safe
  // and doing it inside parallel region creates of risk of race condition
  void* mask = NULL;
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_MAX ) {
    mask = (use_top_mask) ? static_cast<void*>(top[1]->mutable_cpu_data()) :
                            static_cast<void*>(max_idx_.mutable_cpu_data());
  }

  const int batch_size = bottom[0]->num();
  const int num_channels = bottom[0]->channels();

#ifdef _OPENMP
  #pragma omp parallel for collapse(2)
#endif
  for (int image = 0; image < batch_size; ++image)
    for (int channel = 0; channel < num_channels; ++channel)
      generator_func(top_diff,
                     bottom_diff,
                     image,
                     image+1,
                     channel,
                     channel+1,
                     use_top_mask,
                     mask,
                     this);
}
Пример #3
0
uint32_t tgen_generate_chunk(uint32_t x, uint32_t y)
{
    // restore heap
    mem.restore_heap(saved_heap);

    cpu.reset_stack();
    cpu.push_dword(y);
    cpu.push_dword(x);
    get_self() = MANAGER_ADDRESS;
    add_ret();
    generator_func();

    // save_chunk_now();
    // return 0;
    // address 405E30 is
    // void * __thiscall get_sector_chunk_data(__int64 chunk_pos)
    cpu.push_dword(y);
    cpu.push_dword(x);
    get_self() = MANAGER_ADDRESS;
    add_ret();
    sub_405E30();

    return cpu.reg[EAX];
}