void Blob<Dtype>::FromProto(const BlobProto& proto, bool reshape) { if (reshape) { vector<int> shape; if (proto.has_num() || proto.has_channels() || proto.has_height() || proto.has_width()) { // Using deprecated 4D Blob dimensions -- // shape is (num, channels, height, width). shape.resize(4); shape[0] = proto.num(); shape[1] = proto.channels(); shape[2] = proto.height(); shape[3] = proto.width(); } else { shape.resize(proto.shape().dim_size()); for (int i = 0; i < proto.shape().dim_size(); ++i) { shape[i] = proto.shape().dim(i); } } Reshape(shape); } else { CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)"; } // copy data Dtype* data_vec = mutable_cpu_data(); for (int i = 0; i < count_; ++i) { data_vec[i] = proto.data(i); } if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (int i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i); } } }
void Blob<Dtype>::scale_data(Dtype scale_factor) { Dtype* data; if (!data_) { return; } switch (data_->head()) { case SyncedMemory::SYNCED_PRV: case SyncedMemory::HEAD_AT_PRV: data = mutable_prv_data(); caffe_scal(prv_data_count(), scale_factor, data); break; case SyncedMemory::HEAD_AT_CPU: data = mutable_cpu_data(); caffe_scal(count_, scale_factor, data); return; case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY data = mutable_gpu_data(); caffe_gpu_scal(count_, scale_factor, data); return; #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } }
void Blob<Dtype>::FromProto(const BlobProto& proto, const bool init_ps_table) { Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); if (blob_mode_ == BlobProto_BlobMode_GLOBAL) { if (init_ps_table) { // initialize ps table // update values in ps table Dtype* data_vec = ReadPSTable(0); for (int i = 0; i < count_; ++i) { data_vec[i] = data_vec[i] - proto.data(i); } diff_->set_cpu_data(data_vec); UpdatePSTable(); //TODO: 2016-6-16 //// fetch the newest values //data_vec = ReadPSTable(0); //data_->set_cpu_ps_data(data_vec); } } else { //copy data Dtype* data_vec = mutable_cpu_data(); for (int i = 0; i < count_; ++i) { data_vec[i] = proto.data(i); } } if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (int i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i); } } }
void Blob<Dtype>::FixData(int pos, int width) { if (width <= 0) return; // caffe::Timer timer; // timer.Start(); TruncData(mutable_cpu_data(), count_, pos, width); // LOG(INFO) << "===== Conversion time: " << timer.MicroSeconds() << "us."; // Data may automatically synced, no need to operate gpu_data here // #ifndef CPU_ONLY // TruncData(mutable_gpu_data(), count_, pos, width); // #endif }
void Blob<Dtype>::FromProto(const BlobProto& proto) { Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); // copy data Dtype* data_vec = mutable_cpu_data(); for (int i = 0; i < count_; ++i) { data_vec[i] = proto.data(i); } if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (int i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i); } } }
void Blob<Dtype>::scale_data(Dtype scale_factor) { Dtype* data; if (!data_) { return; } switch (data_->head()) { case SyncedMemory::HEAD_AT_CPU: data = mutable_cpu_data(); caffe_scal(count_, scale_factor, data); return; case SyncedMemory::UNINITIALIZED: return; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } }
Dtype Blob<Dtype>::asum_data(){ switch(data_->head()){ case SyncedMemory::SyncedHead::HEAD_AT_CPU: return dragon_cpu_asum(count_,mutable_cpu_data()); case SyncedMemory::SyncedHead::HEAD_AT_GPU: case SyncedMemory::SyncedHead::SYNCED: #ifndef CPU_ONLY return dragon_gpu_asum(count_, (Dtype*)data_->gpu_data()); #endif case SyncedMemory::SyncedHead::UNINITIALIZED: return 0; default:LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } }
void Blob<Dtype>::FromProtoWithExtraCopy(const BlobProto& proto){ // reshape other dimention but remain the same number of channels; Reshape(proto.num(), this->channels(), proto.height(), proto.width(), proto.depth()); // copy data Dtype* data_vec = mutable_cpu_data(); int sourceDataSize =proto.data_size(); for (int i = 0; i < count_; ++i) { data_vec[i] = proto.data(i%sourceDataSize); } if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (int i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i%sourceDataSize); } } }
void Blob<Dtype>::update(){ switch(data_->head()){ case SyncedMemory::SyncedHead::HEAD_AT_CPU: dragon_axpy(count_, Dtype(-1), cpu_diff(), mutable_cpu_data()); break; case SyncedMemory::SyncedHead::HEAD_AT_GPU: case SyncedMemory::SyncedHead::SYNCED: #ifndef CPU_ONLY dragon_gpu_axpy<Dtype>(count_, Dtype(-1), gpu_diff(), mutable_gpu_data()); #endif break; default: // UNINITIALIZED JUST DO NOTHING ; } }
void Blob<Dtype>::FromProtoFC2Conv(const BlobProto& proto){ // Note that we do not change the shape of taget layer blob (convolution) // reshape other dimention but remain the same number of channels; //Reshape(proto.num(), this->channels(), proto.height(), proto.width(), proto.depth()); // copy data Dtype* data_vec = mutable_cpu_data(); size_t proto_count =proto.num()*proto.channels()*proto.height()*proto.width()*proto.depth(); CHECK_EQ(proto_count,count_); for (size_t i = 0; i < count_; ++i) { data_vec[i] = proto.data(i); } // LOG(INFO)<<"proto.diff_size= "<<proto.diff_size(); if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (size_t i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i); } } }
void Blob<Dtype>::FromProto(const BlobProto& proto, bool need_reshape = true){ //copy shape if (need_reshape){ vector<int> shape; shape.resize(proto.shape().dim_size()); for (int i = 0; i < shape.size(); i++) shape[i] = proto.shape().dim(i); reshape(shape); } if (proto.data_size()>0){ CHECK_EQ(proto.data_size(), count()); Dtype *data = mutable_cpu_data(); for (int i = 0; i < count_; i++) data[i] = proto.data(i); } if (proto.diff_size()>0){ CHECK_EQ(proto.diff_size(), count()); Dtype *diff = mutable_cpu_diff(); for (int i = 0; i < count_; i++) diff[i] = proto.diff(i); } }
void Blob<Dtype>::FromProto(const BlobProto& proto) { LOG(INFO)<<"FromProto size = "<<proto.num() <<" "<<proto.channels() << " "<<proto.height()<<" "<< proto.width()<<" "<< proto.depth(); if(proto.depth() ==0) { LOG(INFO)<< "proto depth is 0, converting to 1 for 2D models to 3D models..."; Reshape(proto.num(), proto.channels(), proto.height(), proto.width(), 1); }else{ Reshape(proto.num(), proto.channels(), proto.height(), proto.width(), proto.depth()); } // copy data Dtype* data_vec = mutable_cpu_data(); /* for testing only Dtype data_vec_sum=0; for (int i = 0; i < count_; ++i) { data_vec_sum=data_vec_sum+data_vec[i]; } LOG(INFO)<<"bolb sum value = "<<data_vec_sum; */ for (size_t i = 0; i < count_; ++i) { data_vec[i] = proto.data(i); //LOG(INFO)<<"proto.data(i) ="<<proto.data(i); } //LOG(INFO)<<"proto.data[11870779] ="<<proto.data(11870779); //sleep(20); // LOG(INFO)<<"proto.diff_size= "<<proto.diff_size(); if (proto.diff_size() > 0) { Dtype* diff_vec = mutable_cpu_diff(); for (size_t i = 0; i < count_; ++i) { diff_vec[i] = proto.diff(i); } } //if (Caffe::mode()==Caffe::GPU) { // gpu_data(); // } }
void Blob<Dtype>::scale_data(Dtype scale_factor) { Dtype* data; if (!data_) { return; } switch (data_->head()) { case SyncedMemory::HEAD_AT_CPU: { data = mutable_cpu_data(); caffe_scal(count_, scale_factor, data); return; } case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: { #ifndef CPU_ONLY data = mutable_gpu_data(); if (device_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_scal(count_, scale_factor, data); #endif } else { #ifdef USE_GREENTEA greentea_gpu_scal(device_->id(), count_, scale_factor, (cl_mem) data, 0); #endif } return; #else NO_GPU; #endif } case SyncedMemory::UNINITIALIZED: return; default: LOG(FATAL)<< "Unknown SyncedMemory head state: " << data_->head(); } }
// ネットワークを使って画像を再構築する Waifu2x::eWaifu2xError cNet::ReconstructImage(const bool UseTTA, const int crop_w, const int crop_h, const int outer_padding, const int batch_size, float *outputBlockBuf, const cv::Mat &inMat, cv::Mat &outMat) { const auto InputHeight = inMat.size().height; const auto InputWidth = inMat.size().width; const auto InputLine = inMat.step1(); assert(inMat.channels() == 1 || inMat.channels() == 3); const int InputPadding = mNetOffset + outer_padding; // 入力パディング const auto NoPaddingInputWidth = InputWidth - InputPadding * 2; // パディングを除いた入力画像サイズ(横) const auto NoPaddingInputHeight = InputHeight - InputPadding * 2; // パディングを除いた入力画像サイズ(縦) cv::Mat outim(NoPaddingInputHeight * mInnerScale, NoPaddingInputWidth * mInnerScale, inMat.type()); // float *imptr = (float *)im.data; float *imptr = (float *)outim.data; const auto input_block_width = crop_w + InputPadding * 2; // 入力ブロックサイズ(横) const auto input_block_height = crop_h + InputPadding * 2; // 入力ブロックサイズ(縦) const auto output_block_width = input_block_width * mInnerScale - mNetOffset * 2; // 出力ブロックサイズ(横) const auto output_block_height = input_block_height * mInnerScale - mNetOffset * 2; // 出力ブロックサイズ(縦) const auto output_crop_block_width = crop_w * mInnerScale; // クロップ後の出力ブロックサイズ(横) const auto output_crop_block_height = crop_h * mInnerScale; // クロップ後の出力ブロックサイズ(縦) const auto output_crop_w = (output_block_width - crop_w * mInnerScale) / 2; // 出力後のクロップサイズ const auto output_crop_h = (output_block_height - crop_h * mInnerScale) / 2; // 出力後のクロップサイズ assert(NoPaddingInputWidth % crop_w == 0); assert(NoPaddingInputHeight % crop_h == 0); try { auto input_blobs = mNet->input_blobs(); assert(input_blobs.size() > 0); auto input_blob = mNet->input_blobs()[0]; input_blob->Reshape(batch_size, mInputPlane, input_block_height, input_block_width); assert(inMat.channels() == mInputPlane); assert(input_blob->shape(1) == mInputPlane); const int WidthNum = NoPaddingInputWidth / crop_w; const int HeightNum = NoPaddingInputHeight / crop_h; const int BlockNum = WidthNum * HeightNum; const int input_block_plane_size = input_block_width * input_block_height * mInputPlane; const int output_block_plane_size = output_block_width * output_block_height * mInputPlane; // 画像は(消費メモリの都合上)block_size*block_sizeに分けて再構築する for (int num = 0; num < BlockNum; num += batch_size) { const int processNum = (BlockNum - num) >= batch_size ? batch_size : BlockNum - num; if (processNum < batch_size) input_blob->Reshape(processNum, mInputPlane, input_block_height, input_block_width); for (int n = 0; n < processNum; n++) { const int wn = (num + n) % WidthNum; const int hn = (num + n) / WidthNum; const int w = wn * crop_w; const int h = hn * crop_h; assert(w + input_block_width <= InputWidth && h + input_block_height <= InputHeight); cv::Mat someimg = inMat(cv::Rect(w, h, input_block_width, input_block_height)); // 画像を直列に変換 { float *fptr = input_blob->mutable_cpu_data() + (input_block_plane_size * n); const float *uptr = (const float *)someimg.data; const auto Line = someimg.step1(); if (someimg.channels() == 1) { if (input_block_width == Line) memcpy(fptr, uptr, input_block_width * input_block_height * sizeof(float)); else { for (int i = 0; i < input_block_height; i++) memcpy(fptr + i * input_block_width, uptr + i * Line, input_block_width * sizeof(float)); } } else { const auto LinePixel = someimg.step1() / someimg.channels(); const auto Channel = someimg.channels(); const auto Width = someimg.size().width; const auto Height = someimg.size().height; for (int i = 0; i < Height; i++) { for (int j = 0; j < Width; j++) { for (int ch = 0; ch < Channel; ch++) { const size_t IndexSrc = i * someimg.step1() + j * Channel + ch; const size_t IndexDst = (ch * Height + i) * Width + j; fptr[IndexDst] = uptr[IndexSrc]; } } } } } } assert(input_blob->count() == input_block_plane_size * processNum); // 計算 auto out = mNet->Forward(); auto b = out[0]; assert(b->count() == output_block_plane_size * processNum); const float *ptr = nullptr; if (caffe::Caffe::mode() == caffe::Caffe::CPU) ptr = b->cpu_data(); else ptr = b->gpu_data(); caffe::caffe_copy(output_block_plane_size * processNum, ptr, outputBlockBuf); for (int n = 0; n < processNum; n++) { const int wn = (num + n) % WidthNum; const int hn = (num + n) / WidthNum; const int w = wn * output_crop_block_width; const int h = hn * output_crop_block_height; const float *fptr = outputBlockBuf + (output_block_plane_size * n); const auto Line = outim.step1(); // 結果を出力画像にコピー if (outim.channels() == 1) { for (int i = 0; i < output_crop_block_height; i++) memcpy(imptr + (h + i) * Line + w, fptr + (i + output_crop_h) * output_block_width + output_crop_w, output_crop_block_width * sizeof(float)); } else { const auto LinePixel = Line / outim.channels(); const auto Channel = outim.channels(); for (int i = 0; i < output_crop_block_height; i++) { for (int j = 0; j < output_crop_block_width; j++) { for (int ch = 0; ch < Channel; ch++) { const size_t IndexSrc = (ch * output_block_height + i + output_crop_h) * output_block_width + j + output_crop_w; const size_t IndexDst = ((h + i) * LinePixel + (w + j)) * Channel + ch; imptr[IndexDst] = fptr[IndexSrc]; } } } } //{ // cv::Mat testim(output_block_size, output_block_size, CV_32FC1); // float *p = (float *)testim.data; // for (int i = 0; i < output_block_size; i++) // { // for (int j = 0; j < output_block_size; j++) // { // p[testim.step1() * i + j] = fptr[i * output_block_size + j]; // } // } // const int cv_depth = DepthBitToCVDepth(8); // const double max_val = GetValumeMaxFromCVDepth(cv_depth); // const double eps = GetEPS(cv_depth); // cv::Mat write_iamge; // testim.convertTo(write_iamge, cv_depth, max_val, eps); // cv::imwrite("ti.png", write_iamge); // testim.release(); //} } } } catch (...) { return Waifu2x::eWaifu2xError_FailedProcessCaffe; } // 値を0~1にクリッピング cv::threshold(outim, outim, 1.0, 1.0, cv::THRESH_TRUNC); cv::threshold(outim, outim, 0.0, 0.0, cv::THRESH_TOZERO); outMat = outim; return Waifu2x::eWaifu2xError_OK; }
void Blob<Dtype>::FixParams(int pos, int width) { if (width <= 0) return; TruncParams(mutable_cpu_data(), count_, pos, width); }