void File::copyData( const File &src ) { uint64_t N = src.getDarkCount(); uint64_t O = src.m_iOrder; uint64_t i; hsize_t n; hsize_t off[2], Dims[2]; std::cout << "Copying data from " << src.m_name << std::endl; H5::Group dstGroup(openGroup( "dark" )); H5::Group srcGroup(src.openGroup( "dark" )); H5::DataSet dst_pos( dstGroup.openDataSet("position") ); H5::DataSet dst_vel( dstGroup.openDataSet("velocity") ); //H5::DataSet dst_cls( dstGroup.openDataSet("class") ); H5::DataSet src_pos( srcGroup.openDataSet("position") ); H5::DataSet src_vel( srcGroup.openDataSet("velocity") ); //H5::DataSet src_cls( srcGroup.openDataSet("class") ); double *buffer = new double[3*CHUNK_SIZE]; src_pos.getSpace().getSimpleExtentDims(Dims); H5::DataSpace src_spaceDisk(2,Dims); dst_pos.getSpace().getSimpleExtentDims(Dims); H5::DataSpace dst_spaceDisk(2,Dims); for ( i=0; i<N; i+=n ) { n = N-i > CHUNK_SIZE ? CHUNK_SIZE : N-i; Dims[0] = n; Dims[1] = 3; H5::DataSpace spaceMem(2,Dims); Dims[0] = n; Dims[1] = 3; off[0] = off[1] = 0; spaceMem.selectHyperslab(H5S_SELECT_SET,Dims,off); off[0] = i; off[1] = 0; src_spaceDisk.selectHyperslab(H5S_SELECT_SET,Dims,off); src_pos.read( buffer, H5::PredType::NATIVE_DOUBLE, spaceMem, src_spaceDisk ); off[0] = i+O; off[1] = 0; dst_spaceDisk.selectHyperslab(H5S_SELECT_SET,Dims,off); dst_pos.write( buffer, H5::PredType::NATIVE_DOUBLE, spaceMem, dst_spaceDisk ); off[0] = i; off[1] = 0; src_spaceDisk.selectHyperslab(H5S_SELECT_SET,Dims,off); src_vel.read( buffer, H5::PredType::NATIVE_DOUBLE, spaceMem, src_spaceDisk ); off[0] = i+O; off[1] = 0; dst_spaceDisk.selectHyperslab(H5S_SELECT_SET,Dims,off); dst_vel.write( buffer, H5::PredType::NATIVE_DOUBLE, spaceMem, dst_spaceDisk ); } delete [] buffer; }
void BM3D_Basic_Process::CollaborativeFilter(int plane, FLType *ResNum, FLType *ResDen, const FLType *src, const FLType *ref, const PosPairCode &code) const { PCType GroupSize = static_cast<PCType>(code.size()); // When para.GroupSize > 0, limit GroupSize up to para.GroupSize if (d.para.GroupSize > 0 && GroupSize > d.para.GroupSize) { GroupSize = d.para.GroupSize; } // Construct source group guided by matched pos code block_group srcGroup(src, src_stride[plane], code, GroupSize, d.para.BlockSize, d.para.BlockSize); // Initialize retianed coefficients of hard threshold filtering int retainedCoefs = 0; // Apply forward 3D transform to the source group d.f[plane].fp[GroupSize - 1].execute_r2r(srcGroup.data(), srcGroup.data()); // Apply hard-thresholding to the source group auto srcp = srcGroup.data(); auto thrp = d.f[plane].thrTable[GroupSize - 1].get(); const auto upper = srcp + srcGroup.size(); #if defined(__SSE2__) static const ptrdiff_t simd_step = 4; const ptrdiff_t simd_residue = srcGroup.size() % simd_step; const ptrdiff_t simd_width = srcGroup.size() - simd_residue; static const __m128 zero_ps = _mm_setzero_ps(); __m128i cmp_sum = _mm_setzero_si128(); for (const auto upper1 = srcp + simd_width; srcp < upper1; srcp += simd_step, thrp += simd_step) { const __m128 s1 = _mm_load_ps(srcp); const __m128 t1p = _mm_load_ps(thrp); const __m128 t1n = _mm_sub_ps(zero_ps, t1p); const __m128 cmp1 = _mm_cmpgt_ps(s1, t1p); const __m128 cmp2 = _mm_cmplt_ps(s1, t1n); const __m128 cmp = _mm_or_ps(cmp1, cmp2); const __m128 d1 = _mm_and_ps(cmp, s1); _mm_store_ps(srcp, d1); cmp_sum = _mm_sub_epi32(cmp_sum, _mm_castps_si128(cmp)); } alignas(16) int32_t cmp_sum_i32[4]; _mm_store_si128(reinterpret_cast<__m128i *>(cmp_sum_i32), cmp_sum); retainedCoefs += cmp_sum_i32[0] + cmp_sum_i32[1] + cmp_sum_i32[2] + cmp_sum_i32[3]; #endif for (; srcp < upper; ++srcp, ++thrp) { if (*srcp > *thrp || *srcp < -*thrp) { ++retainedCoefs; } else { *srcp = 0; } } // Apply backward 3D transform to the filtered group d.f[plane].bp[GroupSize - 1].execute_r2r(srcGroup.data(), srcGroup.data()); // Calculate weight for the filtered group // Also include the normalization factor to compensate for the amplification introduced in 3D transform FLType denWeight = retainedCoefs < 1 ? 1 : FLType(1) / static_cast<FLType>(retainedCoefs); FLType numWeight = static_cast<FLType>(denWeight / d.f[plane].finalAMP[GroupSize - 1]); // Store the weighted filtered group to the numerator part of the basic estimation // Store the weight to the denominator part of the basic estimation srcGroup.AddTo(ResNum, dst_stride[plane], numWeight); srcGroup.CountTo(ResDen, dst_stride[plane], denWeight); }
void VBM3D_Final_Process::CollaborativeFilter(int plane, const std::vector<FLType *> &ResNum, const std::vector<FLType *> &ResDen, const std::vector<const FLType *> &src, const std::vector<const FLType *> &ref, const Pos3PairCode &code) const { PCType GroupSize = static_cast<PCType>(code.size()); // When para.GroupSize > 0, limit GroupSize up to para.GroupSize if (d.para.GroupSize > 0 && GroupSize > d.para.GroupSize) { GroupSize = d.para.GroupSize; } // Construct source group and reference group guided by matched pos code block_group srcGroup(src, src_stride[plane], code, GroupSize, d.para.BlockSize, d.para.BlockSize); block_group refGroup(ref, ref_stride[plane], code, GroupSize, d.para.BlockSize, d.para.BlockSize); // Initialize L2-norm of Wiener coefficients FLType L2Wiener = 0; // Apply forward 3D transform to the source group and the reference group d.f[plane].fp[GroupSize - 1].execute_r2r(srcGroup.data(), srcGroup.data()); d.f[plane].fp[GroupSize - 1].execute_r2r(refGroup.data(), refGroup.data()); // Apply empirical Wiener filtering to the source group guided by the reference group const FLType sigmaSquare = d.f[plane].wienerSigmaSqr[GroupSize - 1]; auto srcp = srcGroup.data(); auto refp = refGroup.data(); const auto upper = srcp + srcGroup.size(); #if defined(__SSE2__) static const ptrdiff_t simd_step = 4; const ptrdiff_t simd_residue = srcGroup.size() % simd_step; const ptrdiff_t simd_width = srcGroup.size() - simd_residue; const __m128 sgm_sqr = _mm_set_ps1(sigmaSquare); __m128 l2wiener_sum = _mm_setzero_ps(); for (const auto upper1 = srcp + simd_width; srcp < upper1; srcp += simd_step, refp += simd_step) { const __m128 s1 = _mm_load_ps(srcp); const __m128 r1 = _mm_load_ps(refp); const __m128 r1sqr = _mm_mul_ps(r1, r1); const __m128 wiener = _mm_mul_ps(r1sqr, _mm_rcp_ps(_mm_add_ps(r1sqr, sgm_sqr))); const __m128 d1 = _mm_mul_ps(s1, wiener); _mm_store_ps(srcp, d1); l2wiener_sum = _mm_add_ps(l2wiener_sum, _mm_mul_ps(wiener, wiener)); } alignas(16) FLType l2wiener_sum_f32[4]; _mm_store_ps(l2wiener_sum_f32, l2wiener_sum); L2Wiener += l2wiener_sum_f32[0] + l2wiener_sum_f32[1] + l2wiener_sum_f32[2] + l2wiener_sum_f32[3]; #endif for (; srcp < upper; ++srcp, ++refp) { const FLType refSquare = *refp * *refp; const FLType wienerCoef = refSquare / (refSquare + sigmaSquare); *srcp *= wienerCoef; L2Wiener += wienerCoef * wienerCoef; } // Apply backward 3D transform to the filtered group d.f[plane].bp[GroupSize - 1].execute_r2r(srcGroup.data(), srcGroup.data()); // Calculate weight for the filtered group // Also include the normalization factor to compensate for the amplification introduced in 3D transform L2Wiener = Max(FLT_EPSILON, L2Wiener); FLType denWeight = FLType(1) / L2Wiener; FLType numWeight = static_cast<FLType>(denWeight / d.f[plane].finalAMP[GroupSize - 1]); // Store the weighted filtered group to the numerator part of the final estimation // Store the weight to the denominator part of the final estimation srcGroup.AddTo(ResNum, dst_stride[plane], numWeight); srcGroup.CountTo(ResDen, dst_stride[plane], denWeight); }