void Waifu2x_Process_Base::process_core_gray() { FLType *dstYd = nullptr, *srcYd = nullptr; // *refYd = nullptr // Get write/read pointer auto dstY = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 0)); auto srcY = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 0)); // Allocate memory for floating point Y data AlignedMalloc(dstYd, dst_pcount[0]); AlignedMalloc(srcYd, src_pcount[0]); // Convert src and ref from integer Y data to floating point Y data Int2Float(srcYd, srcY, src_height[0], src_width[0], src_stride[0], src_stride[0], false, d.para.full, false); // Execute kernel Kernel(dstYd, srcYd); // Convert dst from floating point Y data to integer Y data Float2Int(dstY, dstYd, dst_height[0], dst_width[0], dst_stride[0], dst_stride[0], false, d.para.full, !isFloat(_Ty)); // Free memory for floating point Y data AlignedFree(dstYd); AlignedFree(srcYd); }
void TriangleMesh::ReallocVertexBuffer(int numTris, int vertexSizeBytes_) { AlignedFree(data); vertexSizeBytes = vertexSizeBytes_; data = (float*)AlignedMalloc(numTris * 3 * vertexSizeBytes, 32); numTriangles = numTris; }
HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples) { uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X); SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y); HotTileSet& tile = mHotTiles[x][y]; HOTTILE& hotTile = tile.Attachment[attachment]; if (hotTile.pBuffer == NULL) { if (create) { uint32_t size = numSamples * mHotTileSize[attachment]; hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64); hotTile.state = HOTTILE_INVALID; hotTile.numSamples = numSamples; hotTile.renderTargetArrayIndex = 0; } else { return NULL; } } return &hotTile; }
void Waifu2x_Process_Base::process_core_rgb() { FLType *dstYd = nullptr, *dstUd = nullptr, *dstVd = nullptr; FLType *srcYd = nullptr, *srcUd = nullptr, *srcVd = nullptr; // Get write/read pointer auto dstR = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 0)); auto dstG = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 1)); auto dstB = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 2)); auto srcR = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 0)); auto srcG = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 1)); auto srcB = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 2)); // Allocate memory for floating point YUV data AlignedMalloc(dstYd, dst_pcount[0]); if (d.chroma) AlignedMalloc(dstUd, dst_pcount[1]); if (d.chroma) AlignedMalloc(dstVd, dst_pcount[2]); AlignedMalloc(srcYd, src_pcount[0]); AlignedMalloc(srcUd, src_pcount[1]); AlignedMalloc(srcVd, src_pcount[2]); // Convert src and ref from RGB data to floating point YUV data RGB2FloatYUV(srcYd, srcUd, srcVd, srcR, srcG, srcB, src_height[0], src_width[0], src_stride[0], src_stride[0], d.para.matrix, d.para.full, false); // Execute kernel if (d.chroma) { Kernel(dstYd, dstUd, dstVd, srcYd, srcUd, srcVd); } else { Kernel(dstYd, srcYd); dstUd = srcUd; dstVd = srcVd; } // Convert dst from floating point YUV data to RGB data FloatYUV2RGB(dstR, dstG, dstB, dstYd, dstUd, dstVd, dst_height[0], dst_width[0], dst_stride[0], dst_stride[0], d.para.matrix, d.para.full, !isFloat(_Ty)); // Free memory for floating point YUV data AlignedFree(dstYd); if (d.chroma) AlignedFree(dstUd); if (d.chroma) AlignedFree(dstVd); AlignedFree(srcYd); AlignedFree(srcUd); AlignedFree(srcVd); }
bool MultilayerPerceptron::AddLayer( Layer* in_layer) { if(_layers.size() > 0) { if(_layers.back()->outputs != in_layer->inputs) { return false; } size_t input_alloc_size = sizeof(float) * BlockCount(in_layer->inputs) * 4; float* buffer = (float*)AlignedMalloc(input_alloc_size, 16); memset(buffer, 0x00, input_alloc_size); _activations.back() = buffer; _activations.push_back(nullptr); } _layers.push_back(in_layer); return true; }
void Waifu2x_Process_Base::process_core_yuv() { FLType *dstYd = nullptr, *dstUd = nullptr, *dstVd = nullptr; FLType *srcYd = nullptr, *srcUd = nullptr, *srcVd = nullptr; // Get write/read pointer auto dstY = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 0)); auto dstU = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 1)); auto dstV = reinterpret_cast<_Ty *>(vsapi->getWritePtr(dst, 2)); auto srcY = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 0)); auto srcU = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 1)); auto srcV = reinterpret_cast<const _Ty *>(vsapi->getReadPtr(src, 2)); // Allocate memory for floating point YUV data AlignedMalloc(dstYd, dst_pcount[0]); if (d.process[1]) AlignedMalloc(dstUd, dst_pcount[1]); if (d.process[2]) AlignedMalloc(dstVd, dst_pcount[2]); AlignedMalloc(srcYd, src_pcount[0]); if (d.process[1]) AlignedMalloc(srcUd, src_pcount[1]); if (d.process[2]) AlignedMalloc(srcVd, src_pcount[2]); // Convert src and ref from integer YUV data to floating point YUV data Int2Float(srcYd, srcY, src_height[0], src_width[0], src_stride[0], src_stride[0], false, d.para.full, false); if (d.process[1]) Int2Float(srcUd, srcU, src_height[1], src_width[1], src_stride[1], src_stride[1], true, d.para.full, false); if (d.process[2]) Int2Float(srcVd, srcV, src_height[2], src_width[2], src_stride[2], src_stride[2], true, d.para.full, false); // Execute kernel if (d.chroma) Kernel(dstYd, dstUd, dstVd, srcYd, srcUd, srcVd); else Kernel(dstYd, srcYd); // Convert dst from floating point YUV data to integer YUV data Float2Int(dstY, dstYd, dst_height[0], dst_width[0], dst_stride[0], dst_stride[0], false, d.para.full, !isFloat(_Ty)); if (d.process[1]) Float2Int(dstU, dstUd, dst_height[1], dst_width[1], dst_stride[1], dst_stride[1], true, d.para.full, !isFloat(_Ty)); if (d.process[2]) Float2Int(dstV, dstVd, dst_height[2], dst_width[2], dst_stride[2], dst_stride[2], true, d.para.full, !isFloat(_Ty)); // Free memory for floating point YUV data AlignedFree(dstYd); if (d.process[1]) AlignedFree(dstUd); if (d.process[2]) AlignedFree(dstVd); AlignedFree(srcYd); if (d.process[1]) AlignedFree(srcUd); if (d.process[2]) AlignedFree(srcVd); }
static boolean swr_texture_layout(struct swr_screen *screen, struct swr_resource *res, boolean allocate) { struct pipe_resource *pt = &res->base; pipe_format fmt = pt->format; const struct util_format_description *desc = util_format_description(fmt); res->has_depth = util_format_has_depth(desc); res->has_stencil = util_format_has_stencil(desc); if (res->has_stencil && !res->has_depth) fmt = PIPE_FORMAT_R8_UINT; res->swr.width = pt->width0; res->swr.height = pt->height0; res->swr.depth = pt->depth0; res->swr.type = swr_convert_target_type(pt->target); res->swr.tileMode = SWR_TILE_NONE; res->swr.format = mesa_to_swr_format(fmt); res->swr.numSamples = (1 << pt->nr_samples); SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format); unsigned total_size = 0; unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; unsigned layers = pt->array_size; for (int level = 0; level <= pt->last_level; level++) { unsigned alignedWidth, alignedHeight; unsigned num_slices; if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { alignedWidth = align(width, KNOB_MACROTILE_X_DIM); alignedHeight = align(height, KNOB_MACROTILE_Y_DIM); } else { alignedWidth = width; alignedHeight = height; } if (level == 0) { res->alignedWidth = alignedWidth; res->alignedHeight = alignedHeight; } res->row_stride[level] = alignedWidth * finfo.Bpp; res->img_stride[level] = res->row_stride[level] * alignedHeight; res->mip_offsets[level] = total_size; if (pt->target == PIPE_TEXTURE_3D) num_slices = depth; else if (pt->target == PIPE_TEXTURE_1D_ARRAY || pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_CUBE_ARRAY) num_slices = layers; else num_slices = 1; total_size += res->img_stride[level] * num_slices; if (total_size > SWR_MAX_TEXTURE_SIZE) return FALSE; width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } res->swr.halign = res->alignedWidth; res->swr.valign = res->alignedHeight; res->swr.pitch = res->row_stride[0]; if (allocate) { res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); if (res->has_depth && res->has_stencil) { SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); res->secondary.width = pt->width0; res->secondary.height = pt->height0; res->secondary.depth = pt->depth0; res->secondary.type = SURFACE_2D; res->secondary.tileMode = SWR_TILE_NONE; res->secondary.format = R8_UINT; res->secondary.numSamples = (1 << pt->nr_samples); res->secondary.pitch = res->alignedWidth * finfo.Bpp; res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( res->alignedHeight * res->secondary.pitch, 64); } } return TRUE; }
struct pipe_context * swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { struct swr_context *ctx = (struct swr_context *) AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES); memset(ctx, 0, sizeof(struct swr_context)); swr_screen(p_screen)->pfnSwrGetInterface(ctx->api); ctx->swrDC.pAPI = &ctx->api; ctx->blendJIT = new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>; ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT; SWR_CREATECONTEXT_INFO createInfo; memset(&createInfo, 0, sizeof(createInfo)); createInfo.privateStateSize = sizeof(swr_draw_context); createInfo.pfnLoadTile = swr_LoadHotTile; createInfo.pfnStoreTile = swr_StoreHotTile; createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; SWR_THREADING_INFO threadingInfo {0}; threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; // Use non-standard settings for KNL if (swr_screen(p_screen)->is_knl) { if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE")) threadingInfo.MAX_THREADS_PER_CORE = 2; if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT")) { ctx->max_draws_in_flight = 2048; createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight; } } createInfo.pThreadInfo = &threadingInfo; ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo); ctx->api.pfnSwrInit(); if (ctx->swrContext == NULL) goto fail; ctx->pipe.screen = p_screen; ctx->pipe.destroy = swr_destroy; ctx->pipe.priv = priv; ctx->pipe.create_surface = swr_create_surface; ctx->pipe.surface_destroy = swr_surface_destroy; ctx->pipe.transfer_map = swr_transfer_map; ctx->pipe.transfer_unmap = swr_transfer_unmap; ctx->pipe.transfer_flush_region = swr_transfer_flush_region; ctx->pipe.buffer_subdata = u_default_buffer_subdata; ctx->pipe.texture_subdata = u_default_texture_subdata; ctx->pipe.clear_texture = util_clear_texture; ctx->pipe.resource_copy_region = swr_resource_copy; ctx->pipe.render_condition = swr_render_condition; swr_state_init(&ctx->pipe); swr_clear_init(&ctx->pipe); swr_draw_init(&ctx->pipe); swr_query_init(&ctx->pipe); ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe); if (!ctx->pipe.stream_uploader) goto fail; ctx->pipe.const_uploader = ctx->pipe.stream_uploader; ctx->pipe.blit = swr_blit; ctx->blitter = util_blitter_create(&ctx->pipe); if (!ctx->blitter) goto fail; swr_init_scratch_buffers(ctx); return &ctx->pipe; fail: /* Should really validate the init steps and fail gracefully */ swr_destroy(&ctx->pipe); return NULL; }
T *alloc(size_t size) const { T *p = static_cast<T*>(AlignedMalloc(size * sizeof(T), N)); if (p == 0) throw std::bad_alloc(); return p; }
static bool swr_texture_layout(struct swr_screen *screen, struct swr_resource *res, boolean allocate) { struct pipe_resource *pt = &res->base; pipe_format fmt = pt->format; const struct util_format_description *desc = util_format_description(fmt); res->has_depth = util_format_has_depth(desc); res->has_stencil = util_format_has_stencil(desc); if (res->has_stencil && !res->has_depth) fmt = PIPE_FORMAT_R8_UINT; /* We always use the SWR layout. For 2D and 3D textures this looks like: * * |<------- pitch ------->| * +=======================+------- * |Array 0 | ^ * | | | * | Level 0 | | * | | | * | | qpitch * +-----------+-----------+ | * | | L2L2L2L2 | | * | Level 1 | L3L3 | | * | | L4 | v * +===========+===========+------- * |Array 1 | * | | * | Level 0 | * | | * | | * +-----------+-----------+ * | | L2L2L2L2 | * | Level 1 | L3L3 | * | | L4 | * +===========+===========+ * * The overall width in bytes is known as the pitch, while the overall * height in rows is the qpitch. Array slices are laid out logically below * one another, qpitch rows apart. For 3D surfaces, the "level" values are * just invalid for the higher array numbers (since depth is also * minified). 1D and 1D array surfaces are stored effectively the same way, * except that pitch never plays into it. All the levels are logically * adjacent to each other on the X axis. The qpitch becomes the number of * elements between array slices, while the pitch is unused. * * Each level's sizes are subject to the valign and halign settings of the * surface. For compressed formats that swr is unaware of, we will use an * appropriately-sized uncompressed format, and scale the widths/heights. * * This surface is stored inside res->swr. For depth/stencil textures, * res->secondary will have an identically-laid-out but R8_UINT-formatted * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp * texels, to simplify map/unmap logic which copies the stencil values * in/out. */ res->swr.width = pt->width0; res->swr.height = pt->height0; res->swr.type = swr_convert_target_type(pt->target); res->swr.tileMode = SWR_TILE_NONE; res->swr.format = mesa_to_swr_format(fmt); res->swr.numSamples = std::max(1u, pt->nr_samples); if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { res->swr.halign = KNOB_MACROTILE_X_DIM; res->swr.valign = KNOB_MACROTILE_Y_DIM; } else { res->swr.halign = 1; res->swr.valign = 1; } unsigned halign = res->swr.halign * util_format_get_blockwidth(fmt); unsigned width = align(pt->width0, halign); if (pt->target == PIPE_TEXTURE_1D || pt->target == PIPE_TEXTURE_1D_ARRAY) { for (int level = 1; level <= pt->last_level; level++) width += align(u_minify(pt->width0, level), halign); res->swr.pitch = util_format_get_blocksize(fmt); res->swr.qpitch = util_format_get_nblocksx(fmt, width); } else { // The pitch is the overall width of the texture in bytes. Most of the // time this is the pitch of level 0 since all the other levels fit // underneath it. However in some degenerate situations, the width of // level1 + level2 may be larger. In that case, we use those // widths. This can happen if, e.g. halign is 32, and the width of level // 0 is 32 or less. In that case, the aligned levels 1 and 2 will also // be 32 each, adding up to 64. unsigned valign = res->swr.valign * util_format_get_blockheight(fmt); if (pt->last_level > 1) { width = std::max<uint32_t>( width, align(u_minify(pt->width0, 1), halign) + align(u_minify(pt->width0, 2), halign)); } res->swr.pitch = util_format_get_stride(fmt, width); // The qpitch is controlled by either the height of the second LOD, or // the combination of all the later LODs. unsigned height = align(pt->height0, valign); if (pt->last_level == 1) { height += align(u_minify(pt->height0, 1), valign); } else if (pt->last_level > 1) { unsigned level1 = align(u_minify(pt->height0, 1), valign); unsigned level2 = 0; for (int level = 2; level <= pt->last_level; level++) { level2 += align(u_minify(pt->height0, level), valign); } height += std::max(level1, level2); } res->swr.qpitch = util_format_get_nblocksy(fmt, height); } if (pt->target == PIPE_TEXTURE_3D) res->swr.depth = pt->depth0; else res->swr.depth = pt->array_size; // Fix up swr format if necessary so that LOD offset computation works if (res->swr.format == (SWR_FORMAT)-1) { switch (util_format_get_blocksize(fmt)) { default: unreachable("Unexpected format block size"); case 1: res->swr.format = R8_UINT; break; case 2: res->swr.format = R16_UINT; break; case 4: res->swr.format = R32_UINT; break; case 8: if (util_format_is_compressed(fmt)) res->swr.format = BC4_UNORM; else res->swr.format = R32G32_UINT; break; case 16: if (util_format_is_compressed(fmt)) res->swr.format = BC5_UNORM; else res->swr.format = R32G32B32A32_UINT; break; } } for (int level = 0; level <= pt->last_level; level++) { res->mip_offsets[level] = ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr); } size_t total_size = (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch; if (total_size > SWR_MAX_TEXTURE_SIZE) return false; if (allocate) { res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); if (res->has_depth && res->has_stencil) { res->secondary = res->swr; res->secondary.format = R8_UINT; res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt); for (int level = 0; level <= pt->last_level; level++) { res->secondary_mip_offsets[level] = ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->secondary); } res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( res->secondary.depth * res->secondary.qpitch * res->secondary.pitch, 64); } } return true; }
void TriangleMesh::ReallocVertexBuffer(int numTris) { AlignedFree(data); data = (float*)AlignedMalloc(numTris*3*3*sizeof(float), 32); numTriangles = numTris; }
struct pipe_context * swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { struct swr_context *ctx = (struct swr_context *) AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES); memset(ctx, 0, sizeof(struct swr_context)); swr_screen(p_screen)->pfnSwrGetInterface(ctx->api); ctx->swrDC.pAPI = &ctx->api; ctx->blendJIT = new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>; SWR_CREATECONTEXT_INFO createInfo; memset(&createInfo, 0, sizeof(createInfo)); createInfo.privateStateSize = sizeof(swr_draw_context); createInfo.pfnLoadTile = swr_LoadHotTile; createInfo.pfnStoreTile = swr_StoreHotTile; createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo); ctx->api.pfnSwrInit(); if (ctx->swrContext == NULL) goto fail; ctx->pipe.screen = p_screen; ctx->pipe.destroy = swr_destroy; ctx->pipe.priv = priv; ctx->pipe.create_surface = swr_create_surface; ctx->pipe.surface_destroy = swr_surface_destroy; ctx->pipe.transfer_map = swr_transfer_map; ctx->pipe.transfer_unmap = swr_transfer_unmap; ctx->pipe.transfer_flush_region = swr_transfer_flush_region; ctx->pipe.buffer_subdata = u_default_buffer_subdata; ctx->pipe.texture_subdata = u_default_texture_subdata; ctx->pipe.clear_texture = util_clear_texture; ctx->pipe.resource_copy_region = swr_resource_copy; ctx->pipe.render_condition = swr_render_condition; swr_state_init(&ctx->pipe); swr_clear_init(&ctx->pipe); swr_draw_init(&ctx->pipe); swr_query_init(&ctx->pipe); ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe); if (!ctx->pipe.stream_uploader) goto fail; ctx->pipe.const_uploader = ctx->pipe.stream_uploader; ctx->pipe.blit = swr_blit; ctx->blitter = util_blitter_create(&ctx->pipe); if (!ctx->blitter) goto fail; swr_init_scratch_buffers(ctx); return &ctx->pipe; fail: /* Should really validate the init steps and fail gracefully */ swr_destroy(&ctx->pipe); return NULL; }
BM3D_FilterData::BM3D_FilterData(bool wiener, double sigma, PCType GroupSize, PCType BlockSize, double lambda) : fp(GroupSize), bp(GroupSize), finalAMP(GroupSize), thrTable(wiener ? 0 : GroupSize), wienerSigmaSqr(wiener ? GroupSize : 0) { const unsigned int flags = FFTW_PATIENT; const fftw::r2r_kind fkind = FFTW_REDFT10; const fftw::r2r_kind bkind = FFTW_REDFT01; FLType *temp = nullptr; for (PCType i = 1; i <= GroupSize; ++i) { AlignedMalloc(temp, i * BlockSize * BlockSize); fp[i - 1].r2r_3d(i, BlockSize, BlockSize, temp, temp, fkind, fkind, fkind, flags); bp[i - 1].r2r_3d(i, BlockSize, BlockSize, temp, temp, bkind, bkind, bkind, flags); AlignedFree(temp); finalAMP[i - 1] = 2 * i * 2 * BlockSize * 2 * BlockSize; double forwardAMP = sqrt(finalAMP[i - 1]); if (wiener) { wienerSigmaSqr[i - 1] = static_cast<FLType>(sigma * forwardAMP * sigma * forwardAMP); } else { double thrBase = sigma * lambda * forwardAMP; std::vector<double> thr(4); thr[0] = thrBase; thr[1] = thrBase * sqrt(double(2)); thr[2] = thrBase * double(2); thr[3] = thrBase * sqrt(double(8)); FLType *thrp = nullptr; AlignedMalloc(thrp, i * BlockSize * BlockSize); thrTable[i - 1].reset(thrp, [](FLType *memory) { AlignedFree(memory); }); for (PCType z = 0; z < i; ++z) { for (PCType y = 0; y < BlockSize; ++y) { for (PCType x = 0; x < BlockSize; ++x, ++thrp) { int flag = 0; if (x == 0) { ++flag; } if (y == 0) { ++flag; } if (z == 0) { ++flag; } *thrp = static_cast<FLType>(thr[flag]); } } } } } }