C++ (Cpp) SMVP示例 - HotExamples

示例#1

0

显示文件

文件： cg.c 项目： UoB-HPC/TeaLeaf

// Calculates w
void cg_calc_w(
        const int x,
        const int y,
        const int halo_depth,
        double* pw,
        double* p,
        double* w,
        double* kx,
        double* ky)
{
    double pw_temp = 0.0;

#pragma omp target
#pragma omp parallel for reduction(+:pw_temp)
    for(int jj = halo_depth; jj < y-halo_depth; ++jj)
    {
        for(int kk = halo_depth; kk < x-halo_depth; ++kk)
        {
            const int index = kk + jj*x;
            const double smvp = SMVP(p);
            w[index] = smvp;
            pw_temp += w[index]*p[index];
        }
    }

    *pw += pw_temp;
}

示例#2

0

显示文件

文件： ext_cg_kernel.c 项目： UoB-HPC/TeaLeaf-OpenMP4

// Entry point for calculating w
void ext_cg_calc_w_(
        const int* chunk,
        double* p,
        double* w,
        double* kx,
        double* ky,
        double* kz,
        double* pw)
{
    START_PROFILING;

    double pwTemp = 0.0;

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for reduction(+:pwTemp)
    for(int ii = HALO_PAD; ii < _chunk.z-HALO_PAD; ++ii)
    {
        for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj)
        {
            for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk)
            {
                const int index = ii*_chunk.x*_chunk.y+jj*_chunk.x+kk;
                const double smvp = SMVP(p);
                w[index] = smvp;
                pwTemp += w[index]*p[index];
            }
        }
    }

    *pw = pwTemp;

    STOP_PROFILING(__func__);
}

示例#3

0

显示文件

文件： ext_cheby_kernel.cpp 项目： UoB-HPC/TeaLeaf-OpenMP

// Initialises the Chebyshev solver.
void TeaLeafChunk::ChebyInit(
		double* alphas, 
		double* betas,
		const double theta,
		const bool preconditionerOn)
{
	preconditioner = preconditionerOn;
	this->alphas = alphas;
	this->betas = betas;

	START_PROFILING;
#pragma omp parallel for
	for(int jj = HALO_PAD; jj < yCells-HALO_PAD; ++jj)
	{
		for(int kk = HALO_PAD; kk < xCells-HALO_PAD; ++kk)
		{
			const int index = jj*xCells+kk;
			const double smvp = SMVP(u);
			w[index] = smvp;
			r[index] = u0[index]-w[index];
			p[index] = (preconditioner ? mi[index]*r[index] : r[index])/theta;
		}
	}
	STOP_PROFILING("Cheby Init");

	ChebyCalcU();
}

示例#4

0

显示文件

文件： cheby.c 项目： UoB-HPC/TeaLeaf

// The main chebyshev iteration
void cheby_iterate(
        const int x,
        const int y,
        const int halo_depth,
        double alpha,
        double beta,
        double* u,
        double* u0,
        double* p,
        double* r,
        double* w,
        double* kx,
        double* ky)
{
#pragma omp parallel for
    for(int jj = halo_depth; jj < y-halo_depth; ++jj)
    {
        for(int kk = halo_depth; kk < x-halo_depth; ++kk)
        {	
            const int index = kk + jj*x;
            const double smvp = SMVP(u);
            w[index] = smvp;
            r[index] = u0[index]-w[index];
            p[index] = alpha*p[index] + beta*r[index];
        }
    }

    cheby_calc_u(x, y, halo_depth, u, p);
}

示例#5

0

显示文件

文件： ext_ppcg_kernel.hpp 项目： UoB-HPC/TeaLeaf-Kokkos

	KOKKOS_INLINE_FUNCTION
	void operator()(const int index) const 
    {
		KOKKOS_INDICES;

		if(INDEX_IN_INNER_DOMAIN)
		{
			const double smvp = SMVP(sd);
			r[index] -= smvp;
			u[index] += sd[index];
		}
	}

示例#6

0

显示文件

文件： cg.hpp 项目： crtrott/TeaLeaf-OpenSrc

    KOKKOS_INLINE_FUNCTION
        void operator()(const int index, value_type& rro) const
        {
            const int kk = index % x; 
            const int jj = index / x; 

            if(kk >= halo_depth && kk < x - halo_depth &&
                    jj >= halo_depth && jj < y - halo_depth)
            {
                const double smvp = SMVP(u);
                w(index) = smvp;
                r(index) = u(index)-w(index);
                p(index) = r(index);
                rro += r(index)*p(index);
            }
        }

示例#7

0

显示文件

文件： ext_ppcg_kernel.c 项目： UoB-HPC/TeaLeaf-OpenMP4

// Entry point for the main PPCG step.
void ext_ppcg_inner_(
        const int* chunk,
        double* u,
        double* r,
        double* kx,
        double* ky,
        double* sd,
        double* mi,
        double* alphas,
        double* betas,
        int* step,
        int* maxSteps)
{
    START_PROFILING;

    double alpha = alphas[*step];
    double beta = betas[*step];

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for
    for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj)
    {
        for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk)
        {
            const int index = jj*_chunk.x+kk;
            const double smvp = SMVP(sd);
            r[index] -= smvp;
            u[index] += sd[index];
        }
    }

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for
    for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj)
    {
        for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk)
        {
            const int index = jj*_chunk.x+kk;
            sd[index] = alpha*sd[index]+beta* r[index];
        }
    }

    STOP_PROFILING(__func__);
}

示例#8

0

显示文件

文件： ppcg_solver.c 项目： UoB-HPC/TeaLeaf

// The PPCG inner iteration
void ppcg_inner_iteration(
        const int x,
        const int y,
        const int z,
        const int halo_depth,
        double alpha,
        double beta,
        double* vec_u,
        double* vec_r,
        double* vec_kx,
        double* vec_ky,
        double* vec_kz,
        double* vec_sd)
{
#pragma omp target teams distribute
//#pragma omp parallel for
    for(int ii = halo_depth; ii < z-halo_depth; ++ii)
    {
        for(int jj = halo_depth; jj < y-halo_depth; ++jj)
        {
            for(int kk = halo_depth; kk < x-halo_depth; ++kk)
            {
                const int index = ii*x*y+jj*x+kk;
                const double smvp = SMVP(vec_sd);
                vec_r[index] -= smvp;
                vec_u[index] += vec_sd[index];
            }
        }
    }

#pragma omp target teams distribute
//#pragma omp parallel for
    for(int ii = halo_depth; ii < z-halo_depth; ++ii)
    {
        for(int jj = halo_depth; jj < y-halo_depth; ++jj)
        {
            for(int kk = halo_depth; kk < x-halo_depth; ++kk)
            {
                const int index = ii*x*y+jj*x+kk;
                vec_sd[index] = alpha*vec_sd[index] + beta*vec_r[index];
            }
        }
    }
}

示例#9

0

显示文件

文件： cg.hpp 项目： crtrott/TeaLeaf-OpenSrc

    KOKKOS_INLINE_FUNCTION
    void operator()(const team_member& team, value_type& pw) const
    {
        double pw_team = 0.0;
        const int team_offset = (team.league_rank() + halo_depth)*y;

        Kokkos::parallel_reduce(
                Kokkos::TeamThreadRange(team, halo_depth, y-halo_depth),
                [&] (const int &j, double& pw_thread)
        {
            const int index = team_offset + j;
            const double smvp = SMVP(p);
            w(index) = smvp;
            pw_thread += smvp*p(index);
        }, pw_team);

        Kokkos::single(Kokkos::PerTeam(team), [&] ()
        {
            pw += pw_team;
        });
    }

示例#10

0

显示文件

文件： ext_cheby_kernel.cpp 项目： UoB-HPC/TeaLeaf-OpenMP

// The main Chebyshev solver iteration.
void TeaLeafChunk::ChebyIterate(
		const int step)
{
	START_PROFILING;
#pragma omp parallel for
	for(int jj = HALO_PAD; jj < yCells-HALO_PAD; ++jj)
	{
		for(int kk = HALO_PAD; kk < xCells-HALO_PAD; ++kk)
		{	
			const int index = jj*xCells+kk;
			const double smvp = SMVP(u);
			w[index] = smvp;
			r[index] = u0[index]-w[index];
			p[index] = alphas[step]*p[index] + betas[step] *
				(preconditioner ? mi[index]*r[index] : r[index]);
		}
	}
	STOP_PROFILING("Cheby Iterate");

	ChebyCalcU();
}

示例#11

0

显示文件

文件： ext_solver_methods.c 项目： UK-MAC/TeaLeaf_OpenMP4

// Entry point for calculating residual.
void ext_calculate_residual_(
        const int* chunk,
        double* u,
        double* u0,
        double* r,
        double* kx,
        double* ky)
{
    START_PROFILING;

#pragma omp target device(_chunk.device_id)
#pragma omp parallel for
    for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj)
    {
        for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk)
        {
            int index = jj*_chunk.x+kk;
            const double smvp = SMVP(u);
            r[index] = u0[index] - smvp;
        }
    }

    STOP_PROFILING(__func__);
}

示例#12

0

显示文件

文件： cg.c 项目： UoB-HPC/TeaLeaf

// Initialises the CG solver
void cg_init(
        const int x,
        const int y,
        const int halo_depth,
        const int coefficient,
        double rx,
        double ry,
        double* rro,
        double* density,
        double* energy,
        double* u,
        double* p,
        double* r,
        double* w,
        double* kx,
        double* ky)
{
    if(coefficient != CONDUCTIVITY && coefficient != RECIP_CONDUCTIVITY)
    {
        die(__LINE__, __FILE__, "Coefficient %d is not valid.\n", coefficient);
    }

#pragma omp target
#pragma omp parallel for
    for(int jj = 0; jj < y; ++jj)
    {
        for(int kk = 0; kk < x; ++kk)
        {
            const int index = kk + jj*x;
            p[index] = 0.0;
            r[index] = 0.0;
            u[index] = energy[index]*density[index];
        }
    }

#pragma omp target
#pragma omp parallel for
    for(int jj = 1; jj < y-1; ++jj)
    {
        for(int kk = 1; kk < x-1; ++kk)
        {
            const int index = kk + jj*x;
            w[index] = (coefficient == CONDUCTIVITY) 
                ? density[index] : 1.0/density[index];
        }
    }

   
#pragma omp target 
#pragma omp parallel for
    for(int jj = halo_depth; jj < y-1; ++jj)
    {
        for(int kk = halo_depth; kk < x-1; ++kk)
        {
            const int index = kk + jj*x;
            kx[index] = rx*(w[index-1]+w[index]) /
                (2.0*w[index-1]*w[index]);
            ky[index] = ry*(w[index-x]+w[index]) /
                (2.0*w[index-x]*w[index]);
        }
    }

    double rro_temp = 0.0;

#pragma omp target
#pragma omp parallel for reduction(+:rro_temp)
    for(int jj = halo_depth; jj < y-halo_depth; ++jj)
    {
        for(int kk = halo_depth; kk < x-halo_depth; ++kk)
        {
            const int index = kk + jj*x;
            const double smvp = SMVP(u);
            w[index] = smvp;
            r[index] = u[index]-w[index];
            p[index] = r[index];
            rro_temp += r[index]*p[index];
        }
    }

    // Sum locally
    *rro += rro_temp;
}

示例#13

0

显示文件

文件： ext_cg_kernel.c 项目： UoB-HPC/TeaLeaf-OpenMP4

// Entry point for CG initialisation.
void ext_cg_solver_init_(
        const int* chunk,
        double* density,
        double* energy,
        double* u,
        double* p,
        double* r,
        double* mi,
        double* w,
        double* z,
        double* kx,
        double* ky,
        double* kz,
        const int* coefficient,
        const int* preconditioner,
        double* dt,
        double* rx,
        double* ry,
        double* rz,
        double* rro)
{
    START_PROFILING;

    if(*coefficient < CONDUCTIVITY && *coefficient < RECIP_CONDUCTIVITY)
    {
        panic(__LINE__, __FILE__, "Coefficient %d is not valid.\n", *coefficient);
    }

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for
    for(int ii = 0; ii < _chunk.z; ++ii)
    {
        for(int jj = 0; jj < _chunk.y; ++jj)
        {
            for(int kk = 0; kk < _chunk.x; ++kk)
            {
                const int index = ii*_chunk.y*_chunk.x+jj*_chunk.x+kk;
                p[index] = 0.0;
                r[index] = 0.0;
                u[index] = energy[index]*density[index];
            }
        }
    }

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for
    for(int ii = 1; ii < _chunk.z-1; ++ii)
    {
        for(int jj = 1; jj < _chunk.y-1; ++jj)
        {
            for(int kk = 1; kk < _chunk.x-1; ++kk)
            {
                const int index = ii*_chunk.y*_chunk.x+jj*_chunk.x+kk;
                w[index] = (*coefficient == CONDUCTIVITY) ? density[index] : 1.0/density[index];
            }
        }
    }

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for
    for(int ii = HALO_PAD; ii < _chunk.z-1; ++ii)
    {
        for(int jj = HALO_PAD; jj < _chunk.y-1; ++jj)
        {
            for(int kk = HALO_PAD; kk < _chunk.x-1; ++kk)
            {
                const int index = ii*_chunk.y*_chunk.x+jj*_chunk.x+kk;
                kx[index] = *rx*(w[index-1]+w[index])/(2.0*w[index-1]*w[index]);
                ky[index] = *ry*(w[index-_chunk.x]+w[index])/(2.0*w[index-_chunk.x]*w[index]);
                kz[index] = *rz*(w[index-_chunk.page]+w[index])/(2.0*w[index-_chunk.page]*w[index]);
            }
        }
    }

    double rroTemp = 0.0;

#pragma omp target if(_chunk.is_offload) device(_chunk.device_id)
#pragma omp parallel for reduction(+:rroTemp)
    for(int ii = HALO_PAD; ii < _chunk.z-HALO_PAD; ++ii)
    {
        for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj)
        {
            for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk)
            {
                const int index = ii*_chunk.y*_chunk.x+jj*_chunk.x+kk;
                const double smvp = SMVP(u);
                w[index] = smvp;
                r[index] = u[index]-w[index];
                p[index] = r[index];
                rroTemp += r[index]*p[index];
            }
        }
    }

    *rro = rroTemp;

    STOP_PROFILING(__func__);
}