Waifu2x::eWaifu2xError Waifu2x::ReconstructFloatMat(const bool isReconstructNoise, const bool isReconstructScale, const waifu2xCancelFunc cancel_func, const cv::Mat &in, cv::Mat &out) { Waifu2x::eWaifu2xError ret; cv::Mat im(in); cv::Size_<int> image_size = im.size(); if (isReconstructNoise) { PaddingImage(im, im); ret = ReconstructImage(net_noise, im); if (ret != eWaifu2xError_OK) return ret; // パディングを取り払う im = im(cv::Rect(offset, offset, image_size.width, image_size.height)); // 値を0〜1にクリッピング cv::threshold(im, im, 1.0, 1.0, cv::THRESH_TRUNC); cv::threshold(im, im, 0.0, 0.0, cv::THRESH_TOZERO); } if (cancel_func && cancel_func()) return eWaifu2xError_Cancel; const int scale2 = ceil(log2(scale_ratio)); if (isReconstructScale) { bool isError = false; for (int i = 0; i < scale2; i++) { Zoom2xAndPaddingImage(im, im, image_size); ret = ReconstructImage(net_scale, im); if (ret != eWaifu2xError_OK) return ret; // パディングを取り払う im = im(cv::Rect(offset, offset, image_size.width, image_size.height)); // 値を0〜1にクリッピング cv::threshold(im, im, 1.0, 1.0, cv::THRESH_TRUNC); cv::threshold(im, im, 0.0, 0.0, cv::THRESH_TOZERO); } } if (cancel_func && cancel_func()) return eWaifu2xError_Cancel; out = im; return eWaifu2xError_OK; }
bool PngDecodeImage(PngT *png, PixBufT *pixbuf) { if (png->ihdr.interlace_method != 0) { LOG("Interlaced PNG not supported."); } else if (png->ihdr.bit_depth != 8) { LOG("Non 8-bit components not supported."); } else { uint32_t pixelWidth = GetPixelWidth(png); uint32_t length = png->ihdr.width * png->ihdr.height * pixelWidth; uint32_t dstLength = length + png->ihdr.height; uint8_t *encoded = MemNew(dstLength); MergeIDATs(png); LOG("Uncompressing the image."); Inflate(png->idat.data + 2, encoded); LOG("Decoding pixels."); ReconstructImage(pixbuf->data, encoded, png->ihdr.width, png->ihdr.height, pixelWidth); MemUnref(encoded); return true; } return false; }
Waifu2x::eWaifu2xError Waifu2x::waifu2x(const boost::filesystem::path &input_file, const boost::filesystem::path &output_file, const boost::optional<double> scale_ratio, const boost::optional<int> scale_width, const boost::optional<int> scale_height, const waifu2xCancelFunc cancel_func, const int crop_w, const int crop_h, const boost::optional<int> output_quality, const int output_depth, const bool use_tta, const int batch_size) { Waifu2x::eWaifu2xError ret; if (!mIsInited) return Waifu2x::eWaifu2xError_NotInitialized; stImage image; ret = image.Load(input_file); if (ret != Waifu2x::eWaifu2xError_OK) return ret; image.Preprocess(mInputPlane, mMaxNetOffset); const bool isReconstructNoise = mMode == eWaifu2xModelTypeNoise || mMode == eWaifu2xModelTypeNoiseScale || (mMode == eWaifu2xModelTypeAutoScale && image.RequestDenoise()); const bool isReconstructScale = mMode == eWaifu2xModelTypeScale || mMode == eWaifu2xModelTypeNoiseScale || mMode == eWaifu2xModelTypeAutoScale; double Factor = CalcScaleRatio(scale_ratio, scale_width, scale_height, image); if (!isReconstructScale) Factor = 1.0; cv::Mat reconstruct_image; ret = ReconstructImage(Factor, crop_w, crop_h, use_tta, batch_size, isReconstructNoise, isReconstructScale, cancel_func, image); if (ret != Waifu2x::eWaifu2xError_OK) return ret; image.Postprocess(mInputPlane, Factor, output_depth); ret = image.Save(output_file, output_quality); if (ret != Waifu2x::eWaifu2xError_OK) return ret; return Waifu2x::eWaifu2xError_OK; }
int main( int argc, char* argv[]) { if(argc < 6) { std::cout<< "All File arguments are required!!" << std::endl; exit(-1); } uint32_t total_frames = std::stoi(argv[1], nullptr, 10); uint32_t key_frame_interval = std::stoi(argv[2], nullptr, 10); std::string dir_path(argv[3]); uint32_t search_area = std::stoi(argv[4], nullptr, 10); int32_t vErrThreshold = std::stoi(argv[5], nullptr, 10); std::string out_file_path(argv[6], nullptr, 10); #if 0 std::string dir_path(argv[1]); uint32_t first_frame_idx = std::stoi(argv[2], nullptr, 10); uint32_t frame_count = std::stoi(argv[3], nullptr, 10); uint32_t search_area = std::stoi(argv[4], nullptr, 10); uint32_t pad_zero = std::stoi(argv[5], nullptr, 10); int32_t vErrThreshold = std::stoi(argv[6], nullptr, 10); std::vector<std::string> file_paths; std::vector< std::unique_ptr<MPTC::DXTImage> > dxt_frames; std::stringstream ss; std::fstream outfile; outfile.open("out.txt", std::ios::out); //MPTC::DXTImage dxt_img(img_path, true, 0); ss.str(""); ss << std::setw(pad_zero) << std::setfill('0') << first_frame_idx; std::string frame_num_str = ss.str(); std::string file_path = dir_path + "/" + frame_num_str+ ".png"; file_paths.push_back(file_path); MPTC::DXTImage::SetPattern(static_cast<int32_t>(search_area)); std::unique_ptr<MPTC::DXTImage> dxt_img(new MPTC::DXTImage(file_path, true, 0, vErrThreshold)); std::unique_ptr<MPTC::DXTImage> null_dxt(nullptr); dxt_frames.push_back(std::move(dxt_img)); std::cout << "Frame Number:" << 0 << std::endl; std::cout << "Before PSNR: " << dxt_frames[0]->PSNR() << std::endl; dxt_frames[0]->Reencode(null_dxt, 0); std::cout << "After PSNR: " << dxt_frames[0]->PSNR() << std::endl; std::cout << "Intra block motion size:" << dxt_frames[0]->_motion_indices.size() << std::endl; std::vector<uint8_t> palette = std::move(dxt_frames[0]->Get8BitPalette()); std::vector<uint64_t> count_palette(256, 0); std::vector<uint64_t> count_intra(256, 0); std::vector<uint64_t> total_counts(256,0); for(auto a : dxt_frames[0]->_intra_motion_indices) { count_intra[std::get<0>(a)]++; count_intra[std::get<1>(a)]++; total_counts[std::get<0>(a)]++; total_counts[std::get<1>(a)]++; } uint64_t Total = std::accumulate(count_intra.begin(), count_intra.end(), 0U); double entropy = 0.0; for( auto e : count_intra ) { if(e!=0) { double p = static_cast<double>(e)/static_cast<double>(Total); entropy += (-1.0 * p * log2(p)); } } std::cout << "Total:" << Total << std::endl; std::cout << "Entropy:" << entropy << std::endl; //Entropy encode motion indices //*****************MAX BYTES ******************* uint32_t max_bytes = 180000; std::vector<uint8_t> compressed_data(max_bytes, 0); entropy::Arithmetic_Codec ace(max_bytes, compressed_data.data()); entropy::Adaptive_Data_Model model(257); ace.start_encoder(); for(auto a : dxt_frames[0]->_motion_indices) { ace.encode(std::get<0>(a), model); ace.encode(std::get<1>(a), model); } ace.stop_encoder(); std::cout << "Compressed motion index bytes:" << ace.get_num_bytes() << std::endl; // Entropy encode index mask std::vector<uint8_t> compressed_mask(max_bytes, 0); entropy::Arithmetic_Codec ace_mask(max_bytes, compressed_mask.data()); entropy::Adaptive_Bit_Model mask_bit_model; ace_mask.start_encoder(); for(auto a : dxt_frames[0]->_index_mask) { ace_mask.encode(a, mask_bit_model); } ace_mask.stop_encoder(); std::cout << "Compressed Mask bytes:" << ace_mask.get_num_bytes() << std::endl; #if 0 //Entropy Decode entropy::Arithmetic_Codec ade(max_bytes, compressed_data.data()); entropy::Adaptive_Data_Model decode_model(257); ade.start_decoder(); std::vector<std::tuple<uint8_t, uint8_t> > decoded_symbols; for(int i = 0; i < dxt_frames[0]->_motion_indices.size(); i++) { uint8_t sym1 = ade.decode(decode_model); uint8_t sym2 = ade.decode(decode_model); decoded_symbols.push_back(std::make_tuple(sym1, sym2)); #ifdef NDEBUG auto a = dxt_frames[0]->_motion_indices[i]; std::cout << sym1 << "-" << std::get<0>(a) << std::endl; std::cout << sym2 << "-" << std::get<1>(a) << std::endl; #endif assert(dxt_frames[0]->_motion_indices[i] == decoded_symbols[i]); } ade.stop_decoder(); //Entropy decode mask bits entropy::Arithmetic_Codec ade_mask(max_bytes,compressed_mask.data()); entropy::Adaptive_Bit_Model decode_mask_bit_model; ade_mask.start_decoder(); std::vector<uint8_t> decoded_mask; for(int i = 0; i < dxt_frames[0]->_motion_indices.size(); i++) { uint8_t sym = ade_mask.decode(decode_mask_bit_model); decoded_mask.push_back(sym); #ifndef NDEBUG auto a = dxt_frames[0]->_index_mask[i]; std::cout << static_cast<int>(sym) << " -- " << static_cast<int>(a) << std::endl; #endif assert(sym == dxt_frames[0]->_index_mask[i]); } ade_mask.stop_decoder(); #endif uint32_t total_bits = ace.get_num_bytes() * 8 + dxt_frames[0]->_unique_palette.size() * 32 + ace_mask.get_num_bytes() * 8; float total_bytes = static_cast<float>(total_bits)/8; outfile << total_bytes+10 << "\t" << dxt_frames[0]->PSNR() << std::endl; std::cout << "*****Total bytes****:" << total_bytes << std::endl; float bpp = static_cast<float>(total_bits)/(dxt_frames[0]->_width * dxt_frames[0]->_height); std::cout << "BPP:" << bpp << "\t" << dxt_frames[0]->PSNR() << std::endl; std::unique_ptr<MPTC::RGBAImage> ep1 = std::move(dxt_frames[0]->EndpointOneImage()); std::vector<uint8_t> ep1_vector = std::move(ep1->Pack()); stbi_write_png("ep1.png", ep1->Width(), ep1->Height(), 4, ep1->Pack().data(), 4 * ep1->Width()); std::unique_ptr<MPTC::RGBAImage> ep2 = std::move(dxt_frames[0]->EndpointTwoImage()); std::vector<uint8_t> ep2_vector = std::move(ep2->Pack()); stbi_write_png("ep2.png", ep2->Width(), ep2->Height(), 4, ep2->Pack().data(), 4 * ep2->Width()); std::vector<uint32_t> ep_diff; int max_diff = std::numeric_limits<int>::min(); int min_diff = std::numeric_limits<int>::max(); std::vector<uint32_t> count_ep(512, 0); for(size_t ep_idx = 0; ep_idx < ep1_vector.size(); ep_idx++) { if(ep_idx % 4 == 3) continue; int diff = static_cast<int>(ep1_vector[ep_idx]) - static_cast<int>(ep2_vector[ep_idx]); if(diff > max_diff) max_diff = diff; if(diff < min_diff) min_diff = diff; ep_diff.push_back(static_cast<uint32_t>(diff + 255)); count_ep[ep_diff[ep_diff.size() - 1]]++; } uint64_t Total_ep = std::accumulate(count_ep.begin(), count_ep.end(), 0U); double entropy_ep = 0.0; for( auto e : count_ep ) { if(e!=0) { double p = static_cast<double>(e)/static_cast<double>(Total_ep); entropy_ep += (-1.0 * p * log2(p)); } } // Entropy encode endpoint std::vector<uint8_t> compressed_ep(max_bytes, 0); entropy::Arithmetic_Codec ace_ep(max_bytes, compressed_ep.data()); entropy::Adaptive_Data_Model ep_model; ace_ep.start_encoder(); for(auto a :ep_diff) { ace_ep.encode(a, mask_bit_model); } ace_ep.stop_encoder(); std::cout << "----EndPoint compressed----:" << ace_ep.get_num_bytes() << std::endl; std::cout << "Total end point:" << Total_ep << std::endl; std::cout << "Entropy end point:" << entropy_ep << std::endl; ReconstructImage(dxt_frames, 0); std::cout << "PSNR after decompression: " << dxt_frames[0]->PSNR() << std::endl; for(uint32_t i = first_frame_idx + 1; i <= first_frame_idx + frame_count; i++) { ss.str(""); ss << std::setw(pad_zero) << std::setfill('0') << i; std::string frame_num_str = ss.str(); std::string file_path = dir_path + "/" + frame_num_str+ ".png"; std::unique_ptr<MPTC::DXTImage> dxt_img1(new MPTC::DXTImage(file_path, false, search_area, vErrThreshold)); dxt_frames.push_back(std::move(dxt_img1)); file_paths.push_back(file_path); } double combined_bpp = bpp; for(size_t i = 1; i < dxt_frames.size(); i++) { //*****************MAX BYTES ******************* std::cout << std::endl << std::endl; std::cout << "Frame Number:" << i << std::endl; std::cout << "Before PSNR:" << dxt_frames[i]->PSNR() << std::endl; dxt_frames[i]->Reencode(dxt_frames[i-1], -1); std::cout << "After PSNR:" << dxt_frames[i]->PSNR() << std::endl; std::cout << "Total unique indices:" << dxt_frames[i]->_unique_palette.size()<< std::endl; std::cout << "Intra block motion size:" << dxt_frames[i]->_intra_motion_indices.size()<<std::endl; std::cout << "Inter block motion size:" << dxt_frames[i]->_inter_block_motion_indices.size() << std::endl; std::cout << "Inter pixel motion size:" << dxt_frames[i]->_inter_pixel_motion_indices.size() << std::endl; uint32_t max_bytes_inter = 180000; std::vector<uint8_t> compressed_data_inter(max_bytes_inter, 0); entropy::Arithmetic_Codec ace_inter(max_bytes_inter, compressed_data_inter.data()); entropy::Adaptive_Data_Model model_inter(257); ace_inter.start_encoder(); for(auto a : dxt_frames[i]->_motion_indices) { ace_inter.encode(std::get<0>(a), model_inter); ace_inter.encode(std::get<1>(a), model_inter); } ace_inter.stop_encoder(); // Entropy encode index mask std::vector<uint8_t> compressed_mask_inter(max_bytes_inter, 0); entropy::Arithmetic_Codec ace_mask_inter(max_bytes_inter, compressed_mask_inter.data()); entropy::Adaptive_Bit_Model mask_bit_model_inter; ace_mask_inter.start_encoder(); for(auto a : dxt_frames[i]->_index_mask) { ace_mask_inter.encode(a, mask_bit_model_inter); } ace_mask_inter.stop_encoder(); //Entropy Decode entropy::Arithmetic_Codec ade(max_bytes, compressed_data_inter.data()); entropy::Adaptive_Data_Model decode_model(257); ade.start_decoder(); std::vector<std::tuple<uint8_t, uint8_t> > decoded_symbols; for(int ii = 0; ii < dxt_frames[i]->_motion_indices.size(); ii++) { uint8_t sym1 = ade.decode(decode_model); uint8_t sym2 = ade.decode(decode_model); decoded_symbols.push_back(std::make_tuple(sym1, sym2)); #if 0 auto a = dxt_frames[]->_motion_indices[i]; std::cout << sym1 << "-" << std::get<0>(a) << std::endl; std::cout << sym2 << "-" << std::get<1>(a) << std::endl; #endif assert(dxt_frames[i]->_motion_indices[ii] == decoded_symbols[ii]); } ade.stop_decoder(); //Entropy decode mask bits entropy::Arithmetic_Codec ade_mask(max_bytes,compressed_mask_inter.data()); entropy::Adaptive_Bit_Model decode_mask_bit_model; ade_mask.start_decoder(); std::vector<uint8_t> decoded_mask; for(int ii = 0; ii < dxt_frames[i]->_index_mask.size(); ii++) { uint8_t sym = ade_mask.decode(decode_mask_bit_model); decoded_mask.push_back(sym); #if 0 auto a = dxt_frames[0]->_index_mask[i]; std::cout << static_cast<int>(sym) << " -- " << static_cast<int>(a) << std::endl; #endif assert(sym == dxt_frames[i]->_index_mask[ii]); } ade_mask.stop_decoder(); std::vector<uint64_t> counts(256,0); uint8_t max = std::numeric_limits<uint8_t>::min(); uint8_t min = std::numeric_limits<uint8_t>::max(); for(auto a : dxt_frames[i]->_motion_indices) { counts[std::get<0>(a)]++; counts[std::get<1>(a)]++; total_counts[std::get<0>(a)]++; total_counts[std::get<1>(a)]++; max = std::max(std::max(max, std::get<0>(a)), std::get<1>(a)); min = std::min(std::min(min, std::get<0>(a)), std::get<1>(a)); } Total = std::accumulate(counts.begin(), counts.end(), 0U); entropy = 0.0; for( auto e : counts ) { if(e!=0) { double p = static_cast<double>(e)/static_cast<double>(Total); entropy += (-1.0 * p * log2(p)); } } std::cout << "Total:" << Total << std::endl; std::cout << "Entropy:" << entropy << std::endl; total_bits = ace_inter.get_num_bytes() * 8 + 1000 + dxt_frames[0]->_unique_palette.size() * 32 + ace_mask_inter.get_num_bytes() * 8; total_bytes = static_cast<float>(total_bits)/8; std::cout << "Compressed motion index bytes:" << ace_inter.get_num_bytes() << std::endl; std::cout << "Compressed Mask bytes:" << ace_mask_inter.get_num_bytes() << std::endl; std::cout << "Total bytes:" << total_bytes << std::endl; bpp = static_cast<float>(total_bits)/(dxt_frames[0]->_width * dxt_frames[0]->_height); std::cout << "BPP:" << bpp << std::endl; combined_bpp += bpp; } std::cout << std::endl << std::endl; std::cout << "Combined BPP:" << combined_bpp << std::endl; #endif return 0; }
Waifu2x::eWaifu2xError Waifu2x::waifu2x(int factor, const uint32_t* source, uint32_t* dest, int width, int height) { Waifu2x::eWaifu2xError ret; if (!is_inited) return eWaifu2xError_NotInitialized; cv::Mat float_image; ret = LoadMat(float_image, source, width, height); if (ret != eWaifu2xError_OK) return ret; cv::Mat im; if (input_plane == 1) return eWaifu2xError_NotInitialized; else { std::vector<cv::Mat> planes; cv::split(float_image, planes); if (float_image.channels() == 4) planes.resize(3); // BGRからRGBにする //std::swap(planes[0], planes[2]); cv::merge(planes, im); } cv::Size_<int> image_size = im.size(); const bool isReconstructNoise = mode == "noise" || mode == "noise_scale" || mode == "auto_scale"; const bool isReconstructScale = mode == "scale" || mode == "noise_scale"; if (isReconstructNoise) { PaddingImage(im, im); ret = ReconstructImage(net_noise, im); if (ret != eWaifu2xError_OK) return ret; // パディングを取り払う im = im(cv::Rect(offset, offset, image_size.width, image_size.height)); } const int scale2 = ceil(log2((double)factor)); const double shrinkRatio = (double)factor / std::pow(2.0, (double)scale2); if (isReconstructScale) { bool isError = false; for (int i = 0; i < scale2; i++) { Zoom2xAndPaddingImage(im, im, image_size); ret = ReconstructImage(net_scale, im); if (ret != eWaifu2xError_OK) return ret; // パディングを取り払う im = im(cv::Rect(offset, offset, image_size.width, image_size.height)); } } cv::Mat process_image; if (input_plane == 1) { // 再構築した輝度画像とCreateZoomColorImage()で作成した色情報をマージして通常の画像に変換し、書き込む std::vector<cv::Mat> color_planes; CreateZoomColorImage(float_image, image_size, color_planes); float_image.release(); color_planes[0] = im; im.release(); cv::Mat converted_image; cv::merge(color_planes, converted_image); color_planes.clear(); cv::cvtColor(converted_image, process_image, ConvertInverseMode); converted_image.release(); } else { std::vector<cv::Mat> planes; cv::split(im, planes); // RGBからBGRに直す //std::swap(planes[0], planes[2]); cv::merge(planes, process_image); } cv::Mat alpha; if (float_image.channels() == 4) { std::vector<cv::Mat> planes; cv::split(float_image, planes); alpha = planes[3]; cv::resize(alpha, alpha, image_size, 0.0, 0.0, cv::INTER_CUBIC); } // アルファチャンネルがあったら、アルファを付加してカラーからアルファの影響を抜く if (!alpha.empty()) { std::vector<cv::Mat> planes; cv::split(process_image, planes); process_image.release(); planes.push_back(alpha); cv::Mat w2 = planes[3]; planes[0] = (planes[0]).mul(1.0 / w2); planes[1] = (planes[1]).mul(1.0 / w2); planes[2] = (planes[2]).mul(1.0 / w2); cv::merge(planes, process_image); } const cv::Size_<int> ns(image_size.width * shrinkRatio, image_size.height * shrinkRatio); if (image_size.width != ns.width || image_size.height != ns.height) cv::resize(process_image, process_image, ns, 0.0, 0.0, cv::INTER_LINEAR); cv::Mat write_iamge; process_image.convertTo(write_iamge, CV_8U, 255.0); process_image.release(); /* ret = WriteMat(write_iamge, output_file); if (ret != eWaifu2xError_OK) return ret; write_iamge.release(); */ { const auto width = write_iamge.size().width; const auto stride = write_iamge.step1(); for (int i = 0; i < write_iamge.size().height; i++) memcpy(dest + width * i, write_iamge.data + stride * i, stride); } return eWaifu2xError_OK; }
Waifu2x::eWaifu2xError Waifu2x::waifu2x(const double factor, const void* source, void* dest, const int width, const int height, const int in_channel, const int in_stride, const int out_channel, const int out_stride, const int crop_w, const int crop_h, const bool use_tta, const int batch_size) { Waifu2x::eWaifu2xError ret; if (!mIsInited) return Waifu2x::eWaifu2xError_NotInitialized; int cvrSetting = -1; if (in_channel == 3 && out_channel == 3) cvrSetting = CV_BGR2RGB; else if (in_channel == 4 && out_channel == 4) cvrSetting = CV_BGRA2RGBA; else if (in_channel == 3 && out_channel == 4) cvrSetting = CV_BGR2RGBA; else if (in_channel == 4 && out_channel == 3) cvrSetting = CV_BGRA2RGB; else if (!(in_channel == 1 && out_channel == 1)) return Waifu2x::eWaifu2xError_InvalidParameter; stImage image; ret = image.Load(source, width, height, in_channel, in_stride); if (ret != Waifu2x::eWaifu2xError_OK) return ret; image.Preprocess(mInputPlane, mMaxNetOffset); const bool isReconstructNoise = mMode == eWaifu2xModelTypeNoise || mMode == eWaifu2xModelTypeNoiseScale; const bool isReconstructScale = mMode == eWaifu2xModelTypeScale || mMode == eWaifu2xModelTypeNoiseScale || mMode == eWaifu2xModelTypeAutoScale; double Factor = factor; if (!isReconstructScale) Factor = 1.0; cv::Mat reconstruct_image; ret = ReconstructImage(Factor, crop_w, crop_h, use_tta, batch_size, isReconstructNoise, isReconstructScale, nullptr, image); if (ret != Waifu2x::eWaifu2xError_OK) return ret; image.Postprocess(mInputPlane, Factor, 32); cv::Mat out_bgr_image = image.GetEndImage(); image.Clear(); cv::Mat out_image; if (cvrSetting >= 0) cv::cvtColor(out_bgr_image, out_image, cvrSetting); // BGRからRGBに戻す else out_image = out_bgr_image; out_bgr_image.release(); // 出力配列へ書き込み { const auto width = out_image.size().width; const auto stride = out_image.step[0]; for (int i = 0; i < out_image.size().height; i++) memcpy((uint8_t *)dest + out_stride * i, out_image.data + stride * i, out_stride); } return Waifu2x::eWaifu2xError_OK; }