示例#1
0
//
// FIXME: Report correct return value - Needs change in KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{

	DUMMY_ARGS_USAGE_2(subdims, pgran);
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;
	char tempTemplate[32*1024];

	if ( buf == NULL) // return buffer size
	{
		buflen = (32 * 1024 * sizeof(char));
        return (ssize_t)buflen;
	}

	#ifdef DEBUG_ROTMG
	printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

    strcpy( tempTemplate, (char*)rotmg_kernel );

	kprintf kobj( Prefix[extraFlags->dtype], 1, false, false);
    kobj.spit((char*)buf, tempTemplate);

    return (32 * 1024 * sizeof(char));
}
示例#2
0
BencodeObject* BencodeObject::setValueForKey(const char* key, BencodeObject* val) {
	if (_type != BencodeTypeDictionary) {
		return NULL;
	}

	removeValueForKey(key);

	BencodeObject kobj(key, BencodeModeCopy);
	return const_cast<BencodeObject*>(&_dictValue->insert(BencodeDictStorage::value_type(kobj, *val)).first->second);
}
示例#3
0
//
// FIXME: Report correct return value - Needs change in KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{

	DUMMY_ARG_USAGE(subdims);
	size_t BLOCKSIZE  = pgran->wgSize[0];
	char tempTemplate[32*1024];

	if ( buf == NULL) // return buffer size
	{
		buflen = (32 * 1024 * sizeof(char));
        return (ssize_t)buflen;
	}
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;

	#ifdef DEBUG_ASUM
 	printf("ASUM GENERATOR called....\n");
	printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

    unsigned int vecLenA = extraFlags->vecLenA;

	#ifdef DEBUG_ASUM
	printf("Vector length used : %d\n\n", vecLenA);
	#endif

	bool doVLOAD = false;
	if( extraFlags->flags &  KEXTRA_NO_COPY_VEC_A )
	{
		doVLOAD = true;
		#ifdef DEBUG_ASUM
		printf("DOing VLOAD as Aligned Data Pointer not Availabe\n");
		#endif
	}
	else
	{
		#ifdef DEBUG_ASUM
		printf("Using Aligned Data Pointer \n");
		#endif
	}
    strcpy( tempTemplate, (char*)asum_kernel );
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD, BLOCKSIZE);
    kobj.spit((char*)buf, tempTemplate);

    return (32 * 1024 * sizeof(char));
}
示例#4
0
//
// FIXME: Report correct return value - Needs change in KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{

	size_t BLOCKSIZE  = pgran->wgSize[0];
	char tempTemplate[32*1024];
    SolutionStep *step = container_of(subdims, subdims, SolutionStep);

	if ( buf == NULL) // return buffer size
	{
		buflen = (32 * 1024 * sizeof(char));
        return (ssize_t)buflen;
	}
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;

    unsigned int vecLenA = extraFlags->vecLenA;
	bool doVLOAD = false;

	if( extraFlags->flags &  KEXTRA_NO_COPY_VEC_A )
	{
		doVLOAD = true;
	}
    const char *kernName;

    if(step->args.redctnType == REDUCE_BY_HYPOT) {
            kernName = nrm2_hypot_kernel;
    } else if (step->args.redctnType == REDUCE_BY_SSQ) {
            kernName = nrm2_ssq_kernel;
    } else {
            printf(" Error in selecting kernel!\n");
            return 0;
    }

    strcpy( tempTemplate, kernName );
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD, BLOCKSIZE);
    kobj.spit((char*)buf, tempTemplate);

    return (32 * 1024 * sizeof(char));
}
示例#5
0
static ssize_t
generator(
    char *buf,
    size_t buflen,
    const struct SubproblemDim *subdims,
    const struct PGranularity *pgran,
    void *extra)
{
    CLBLASKernExtra *kextra = (CLBLASKernExtra*)extra;
    KernelExtraFlags kflags = kextra->flags;
    DataType dtype = kextra->dtype;
    char tempTemplate[32*1024];
    char itemx[10], itemy[10], width[10], itemy_by_width[10];
    size_t Y, X, BLOCKSIZE, ITEMX, ITEMY;

    if (buf == NULL)
    {
        buflen = 32*1024*sizeof(char);
        return (ssize_t)buflen;
    }

    //
    // Row-major is implemented in terms of column major routines
    //
    if ((kflags & KEXTRA_COLUMN_MAJOR) == 0)
    {
        return 0;
    }
    kprintf kobj(Prefix[dtype], kextra->vecLenA, true, true);

    BLOCKSIZE = pgran->wgSize[0];
#ifdef DEBUG_SYMM
    printf("SYMM- generator(): Blocksize passed = %lu, subdimy = %lu, subdimx = %lu, veclen = %lu \n", BLOCKSIZE, subdims->y, subdims->x, kextra->vecLenA);
#endif

    Y = 16;
    while (Y*(kextra->vecLenA) > subdims->y)
    {
        Y /= 2;
    }

    X = BLOCKSIZE/Y;
    ITEMY = (subdims->y) / Y;
    ITEMX = (subdims->x) / X;
    if (ITEMX == 0)
    {
        ITEMX = 1;
    }

    if ((BLOCKSIZE % Y) || ((subdims->y) % Y) || ((subdims->x)%X) || (ITEMY % kextra->vecLenA))
    {
        printf("WARNING: SYMM- generator: subdim and blocksize in-compatible.\n");
    }

    sprintf(width, "%" SPREFIX "u", Y);
    sprintf(itemy, "%" SPREFIX "u", ITEMY);
    sprintf(itemx, "%" SPREFIX "u", ITEMX);
    sprintf(itemy_by_width, "%" SPREFIX "u", (size_t) ITEMY/kextra->vecLenA);

    kobj.put("%WIDTH", width);
    kobj.put("%ITEMX", itemx);
    kobj.put("%ITEMY", itemy);
    kobj.put("%ITEMY_BY_V", itemy_by_width);
#ifdef DEBUG_SYMM
    printf("ColMajor SYMM - WIDTH = %s, ITEMX = %s, ITEMY = %s\n", width, itemx, itemy);
#endif

    strcpy(tempTemplate, SYMM_C_KERNEL);
    kobj.spit(buf, tempTemplate);
#ifdef DEBUG_SYMM
    printf("Kernel = \n%s\n", buf);
#endif
    size_t tail = strlen(buf) + 1;
    while(tail < 32*1024)
    {
        buf[tail++] = 0;
    }
    return 32*1024*sizeof(char);
}
示例#6
0
void BencodeObject::removeValueForKey(const char* key) {
	BencodeObject kobj(key, BencodeModeCopy);
	_dictValue->erase(kobj);
}
示例#7
0
//
// FIXME: Report correct return value when "buf" is NULL - Needs change in KPRINTF
// FIXME: Return correct return value when "buf" is NON NULL - Needs change in KPRINTF
// FIXME: "buflen" check needs to be more accurate. Relies on above changes to KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;
    unsigned int vecLenA = extraFlags->vecLenA;
	char tempTemplate[32*1024];
	char TARGETROWS_S[10], NLOOPS_S[10], TARGETWIDTH_S[10];
	size_t TARGETROWS, NLOOPS, TARGETWIDTH;
	char TARGETHEIGHT_S[10], BLOCKSIZE_S[10], TRIANGLE_HEIGHT_S[10];
	size_t TARGETHEIGHT;
	bool doVLOAD = false;
	int BLOCKSIZE = pgran->wgSize[0] * pgran->wgSize[1];  // [1] will always be 1 since we are a 1D implementation

	if (buf == NULL) // PENDING: Return correct buffer size
	{
		return (32 * 1024 * sizeof(char));
	}
	if (buflen > 32*1024)
	{
		#ifdef DEBUG_TRSV_GEMV
		printf("TRSV GEMV: generator(): WARNING: Returning 0 as buflen is > 32K\n");
		#endif
		return 0;
	}

	if( extraFlags->flags &  KEXTRA_NO_COPY_VEC_A )
	{
		doVLOAD = true;
		#ifdef DEBUG_TRSV_GEMV
		printf("DOing VLOAD as Aligned Data Pointer not Availabe\n");
		#endif
	}
	else
	{
		#ifdef DEBUG_TRSV_GEMV
			printf("Using Aligned Data Pointer .........................\n");
		#endif
	}
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD);

	#ifdef DEBUG_TRSV_GEMV
 	printf("TRSV GEMV GENERATOR called....\n");
	#endif

	clblasUplo uplo   = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower;
	clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor;
	clblasTranspose trans =
	(extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans);
	bool unit = (((extraFlags->flags) & KEXTRA_UNIT_DIAGONAL) != 0);

	// unity and doConj handled in setKernelArgs
    if ( order == clblasRowMajor )
    {
        order = clblasColumnMajor;
        if ( trans == clblasNoTrans)
        {
            trans = clblasTrans;
        }
        else if ( trans == clblasTrans )
        {
            trans = clblasNoTrans;
        }
        else // clblasConjTrans
        {
            trans = clblasNoTrans;
        }
		uplo = ( uplo == clblasUpper)? clblasLower : clblasUpper;
    }

	//
	// Check Feasibility and then generate the code.
	//
	if ( trans != clblasNoTrans)
	{
		if (isTransposeFeasible(subdims->y, BLOCKSIZE, vecLenA, TARGETHEIGHT) == false)
		{
			return 0;
		}
        sprintf( TARGETHEIGHT_S, "%" SPREFIX "u", TARGETHEIGHT );
	    sprintf( BLOCKSIZE_S, "%d", BLOCKSIZE );
        sprintf( TRIANGLE_HEIGHT_S, "%" SPREFIX "u", subdims->y );

		kobj.put("%TARGET_HEIGHT", TARGETHEIGHT_S);
		kobj.put("%BLOCKSIZE", BLOCKSIZE_S);
		kobj.put("%TRIANGLE_HEIGHT", TRIANGLE_HEIGHT_S);
		( uplo == clblasLower )?
		    		(strcpy(tempTemplate, (char*)trsv_CLT_ComputeRectangle_kernel)) :
					(strcpy(tempTemplate, (char*)trsv_CUT_ComputeRectangle_kernel));

	}
	else // No-Transpose cases...
	{
		if (isNoTransposeFeasible(subdims->y, BLOCKSIZE, vecLenA, TARGETROWS, TARGETWIDTH, NLOOPS) == false)
		{
			return 0;
		}
        sprintf( TARGETROWS_S, "%" SPREFIX "u", TARGETROWS );
	    sprintf( TARGETWIDTH_S, "%" SPREFIX "u", TARGETWIDTH );
        sprintf( NLOOPS_S, "%" SPREFIX "u", NLOOPS );
		kobj.put("%TARGET_ROWS", TARGETROWS_S);
		kobj.put("%TARGET_WIDTH", TARGETWIDTH_S);
		kobj.put("%NLOOPS", NLOOPS_S);
		if (unit)
		{
			( uplo == clblasLower )?
		    (strcpy(tempTemplate, (char*)trsv_CL_ComputeRectangle_kernel)) : (strcpy(tempTemplate, (char*)trsv_CU_ComputeRectangle_kernel));
		} else {
			( uplo == clblasLower )?
		    (strcpy(tempTemplate, (char*)trsv_CL_ComputeRectangle_NonUnity_kernel)) : (strcpy(tempTemplate, (char*)trsv_CU_ComputeRectangle_NonUnity_kernel));
		}
	}

	#ifdef DEBUG_TRSV_GEMV
	printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

	// FIXME: VECTORSIZE HARD CODED
	// FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint

	#ifdef DEBUG_TRSV_GEMV
	printf("Vector length used : %d\n\n", vecLenA);
	#endif

    kobj.spit((char*)buf, tempTemplate);
	return (32 * 1024 * sizeof(char));
}
示例#8
0
//
// FIXME: Report correct return value - Needs change in KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{
	int BLOCKSIZE  = pgran->wgSize[0];
	char tempTemplate[64*1024];
	char targetRows[10], blockSize[10];

	if ( buf == NULL) // return buffer size
	{
		buflen = (64 * 1024 * sizeof(char));
		return (ssize_t)buflen;
	}
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;

	#ifdef DEBUG_HER2
 	printf("HER2 GENERATOR called....\n");
	#endif

	clblasUplo uplo   = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower;

	if ((subdims->y % extraFlags->vecLenA) != 0)
	{
		printf("WARNING: HER2: generator: TARGETROWS must be divisible by Vector Length\n");
		return 0;
	}

	size_t TARGETROWS = 0;
	( uplo == clblasLower )?
		     (strcpy(tempTemplate, (char*)syr2_her2_CL_kernel)) : (strcpy(tempTemplate, (char*)syr2_her2_CU_kernel));

	TARGETROWS = subdims->y;
	if ((BLOCKSIZE % TARGETROWS) != 0)
	{
		printf("WARNING: HER2: generator: Invalid Block Size\n");
		return 0;
	}

	#ifdef DEBUG_HER2
	printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

	// FIXME: VECTORSIZE HARD CODED
	// FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint
    unsigned int vecLenA = extraFlags->vecLenA;

	#ifdef DEBUG_HER2
	printf("Vector length used : %d\n\n", vecLenA);
	#endif

	bool doVLOAD = false;
	if( extraFlags->flags &  KEXTRA_NO_COPY_VEC_A )
	{
		doVLOAD = true;
		#ifdef DEBUG_HER2
			printf("DOing VLOAD as Aligned Data Pointer not Availabe\n");
		#endif
	}
	else
	{
		#ifdef DEBUG_HER2
			printf("Using Aligned Data Pointer .........................\n");
		#endif
	}
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD);

	sprintf( targetRows, "%" SPREFIX "u", TARGETROWS );
	sprintf( blockSize, "%d", BLOCKSIZE );

	#ifdef DEBUG_HER2
    printf("TARGET ROWS = %s\n", targetRows);
    printf("BLOCK SIZE = %s\n", blockSize);
	#endif

    kobj.put("%TARGET_ROWS", (const char *)targetRows);
    kobj.put("%BLOCKSIZE", (const char *) blockSize);
    kobj.spit((char*)buf, tempTemplate);

	return (64 * 1024 * sizeof(char));
    // return 0;//(ret < 0) ? -EOVERFLOW : ret;
}
示例#9
0
//
// FIXME: Report correct return value - Needs change in KPRINTF
//
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{

	size_t BLOCKSIZE  = pgran->wgSize[0];
	char tempTemplate[32*1024];
	char targetRows[10], blockSize[10];

	if ( buf == NULL) // return buffer size
	{
		buflen = (64 * 1024 * sizeof(char));
        return (ssize_t)buflen;
	}
	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;

	#ifdef DEBUG_TRMV
 	printf("TRMV GENERATOR called....\n");
	#endif

	if((( extraFlags->flags &  KEXTRA_TRANS_A) || ( extraFlags ->flags & KEXTRA_CONJUGATE_A )))
	{
		#ifdef DEBUG_TRMV
		printf("A is trans or CONJ-TRANS\n");
		#endif
	}
	else
	{
		#ifdef DEBUG_TRMV
		printf("A is noTrans...\n");
		#endif
	}

	clblasUplo uplo   = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower;
	clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor;
	clblasTranspose trans = ( extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans);

	// unity and doConj handled in setKernelArgs
    if ( order == clblasRowMajor )
    {
        order = clblasColumnMajor;
        if ( trans == clblasNoTrans)
        {
            trans = clblasTrans;
        }
        else if ( trans == clblasTrans )
        {
            trans = clblasNoTrans;
        }
        else // clblasConjTrans
        {
            trans = clblasNoTrans;
        }

		uplo = ( uplo == clblasUpper)? clblasLower : clblasUpper;
    }


	if ((subdims->y % extraFlags->vecLenA) != 0)
	{
		printf("WARNING: TRMV: generator: TARGETROWS must be divisible by Vector Length\n");
		return 0;
	}

	size_t TARGETROWS = 0;
	if ( trans == clblasNoTrans)
	{
		#ifdef DEBUG_TRMV
		printf("clblasNoTrans....%s\n",	( uplo == clblasLower )?"LOWER":"UPPER");
		#endif

		( uplo == clblasLower )?
		    		(strcpy(tempTemplate, (char*)trmv_CL_kernel)) : (strcpy(tempTemplate, (char*)trmv_CU_kernel));

		TARGETROWS = subdims->y;
		if ((BLOCKSIZE % TARGETROWS) != 0)
		{
			printf("WARNING: TRMV: generator: Invalid Block Size\n");
			return 0;
		}
	}
	else // Transpose cases...
	{
		#ifdef DEBUG_TRMV
		printf("clblasTrans....%s\n",	( uplo == clblasLower )?"LOWER":"UPPER");
		#endif

		( uplo == clblasLower )?
		    		(strcpy(tempTemplate, (char*)trmv_CLT_kernel)) : (strcpy(tempTemplate, (char*)trmv_CUT_kernel));

		if ((BLOCKSIZE % (subdims->y / extraFlags->vecLenA)) != 0)
		{
			printf("WARNING: TRMV: generator: Invalid Block Size\n");
			return 0;
		}
		TARGETROWS = BLOCKSIZE/(subdims->y / extraFlags->vecLenA);
	}

	#ifdef DEBUG_TRMV
	printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

	// FIXME: VECTORSIZE HARD CODED
	// FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint
    unsigned int vecLenA = extraFlags->vecLenA;

	#ifdef DEBUG_TRMV
	printf("Vector length used : %d\n\n", vecLenA);
	#endif

	bool doVLOAD = false;
	if( extraFlags->flags &  KEXTRA_NO_COPY_VEC_A )
	{
		doVLOAD = true;
		#ifdef DEBUG_TRMV
			printf("DOing VLOAD as Aligned Data Pointer not Availabe\n");
		#endif
	}
	else
	{
		#ifdef DEBUG_TRMV
			printf("Using Aligned Data Pointer .........................\n");
		#endif
	}
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD);

    sprintf( targetRows, "%" SPREFIX "u", TARGETROWS );
	sprintf( blockSize, "%" SPREFIX "u", BLOCKSIZE );

	#ifdef DEBUG_TRMV
    printf("TARGET ROWS = %s\n", targetRows);
    printf("BLOCK SIZE = %s\n", blockSize);
	#endif

    kobj.put("%TARGET_ROWS", (const char *)targetRows);
    kobj.put("%BLOCKSIZE", (const char *) blockSize);
    kobj.spit((char*)buf, tempTemplate);

	return (64 * 1024 * sizeof(char));
    // return 0;//(ret < 0) ? -EOVERFLOW : ret;
}
示例#10
0
int main(int argc, char** argv)
{

    int ppw=10;     // Point per wavelength
    std::string filename="kitenormcond10.txt";


    std::vector<double> freqs;
    freqs.push_back(5);
    freqs.push_back(10);
    freqs.push_back(20);
    freqs.push_back(40);
    freqs.push_back(80);
    freqs.push_back(160);
    freqs.push_back(320);
    freqs.push_back(640);



    std::vector<double> norm_sl(freqs.size());
    std::vector<double> norm_dl(freqs.size());
    std::vector<double> norm_combined1(freqs.size());
    std::vector<double> norm_combined2(freqs.size());
    std::vector<double> cond_sl(freqs.size());
    std::vector<double> cond_dl(freqs.size());
    std::vector<double> cond_combined1(freqs.size());
    std::vector<double> cond_combined2(freqs.size());



    clock_t start, finish;
    double time;
    start=clock();


#ifdef BEM2DMPI
    MPI_Init(&argc, &argv);


    int nprow=4; // Number of rows in process grid
    int npcol=2; // Number of columns in process grid
    int mb=24;  // Row Block size
    int nb=24;  // Column Block size
    bem2d::BlacsSystem* b=bem2d::BlacsSystem::Initialize(nprow,npcol,mb,nb);

    // Exit if Context could not be created or process does not belong to context

    if (!b) {
        std::cout <<  "Could not create Blacs context" << std::endl;
        MPI_Finalize();
        exit(1);
    }
    if ((b->get_myrow()==-1)&&(b->get_mycol()==-1)) {
        MPI_Finalize();
        exit(0);
    }
#endif

    for (int j=0; j<freqs.size(); j++) {

        bem2d::freqtype k= {(double)freqs[j],0};
        double eta1=k.re; // Coupling between conj. double and single layer pot.
        double eta2=cbrt(k.re*k.re);
        bem2d::pCurve kobj(new bem2d::Kite);
        int n=(int)(kobj->Length()*k.re*ppw/2.0/bem2d::PI);
        bem2d::AnalyticCurve kite(n,kobj);
        bem2d::pGeometry pgeom=kite.GetGeometry();

        bem2d::PolBasis::AddBasis(0,pgeom); // Add constant basis functions


        // Discretize the single and double layer potential

        bem2d::SingleLayer sl(k);
        bem2d::ConjDoubleLayer cdl(k);
        bem2d::DoubleLayer dl(k);

        bem2d::QuadOption quadopts;

        quadopts.L=3;
        quadopts.N=5;
        quadopts.sigma=0.15;

#ifdef BEM2DMPI
        if (b->IsRoot()) {
            std::cout << "Discretize Kernels with n=" << n << std::endl;
        }
#else
        std::cout << "Discretize Kernels with n=" << n << std::endl;
#endif



        bem2d::Matrix dsl=*(DiscreteKernel(*pgeom,quadopts,sl));
        bem2d::Matrix ddl=(*DiscreteKernel(*pgeom,quadopts,dl));
        bem2d::Matrix dcdl=*(DiscreteKernel(*pgeom,quadopts,cdl));
        bem2d::Matrix Id=*(EvalIdent(*pgeom, quadopts));
        bem2d::Matrix combined1=Id+2.0*dcdl-bem2d::complex(0,2.0)*eta1*dsl;
        bem2d::Matrix combined2=Id+2.0*dcdl-bem2d::complex(0,2.0)*eta2*dsl;

        dsl=2.0*bem2d::ChangeBasis(dsl,Id);
        ddl=2.0*bem2d::ChangeBasis(ddl,Id);
        dcdl=2.0*bem2d::ChangeBasis(dcdl,Id);
        combined1=bem2d::ChangeBasis(combined1,Id);
        combined2=bem2d::ChangeBasis(combined2,Id);

#ifdef BEM2DMPI
        if (b->IsRoot()) {
            std::cout << "Compute norms and condition numbers" << std::endl;
        }
#else
        std::cout << "Compute norms and condition numbers" << std::endl;
#endif


        bem2d::L2NormCond(dsl,norm_sl[j],cond_sl[j]);
        bem2d::L2NormCond(ddl,norm_dl[j],cond_dl[j]);
        bem2d::L2NormCond(combined1,norm_combined1[j],cond_combined1[j]);
        bem2d::L2NormCond(combined2,norm_combined2[j],cond_combined2[j]);

    }
    finish=clock();
    time=(double(finish)-double(start))/CLOCKS_PER_SEC/60;


#ifdef BEM2DMPI
    if (b->IsRoot()) {
#endif

        std::ofstream out(filename.c_str());
        out << "Single Layer" << std::endl;

        for (int j=0; j<freqs.size(); j++) {
            out << "k=" << freqs[j] << " Norm: " << norm_sl[j] << " Norm of Inverse: " << cond_sl[j]/norm_sl[j] << " Condition Nr.: " << cond_sl[j] <<  std::endl;
        }

        out << "Double Layer" << std::endl;


        for (int j=0; j<freqs.size(); j++) {
            out << "k=" << freqs[j] << " Norm: " << norm_dl[j] << " Norm of Inverse: " << cond_dl[j]/norm_dl[j] << " Condition Nr.: " << cond_dl[j] <<  std::endl;
        }

        out << "Combined Layer eta=k" << std::endl;

        for (int j=0; j<freqs.size(); j++) {
            out << "k=" << freqs[j] << " Norm: " << norm_combined1[j] << " Norm of Inverse: " << cond_combined1[j]/norm_combined1[j] << " Condition Nr.: " << cond_combined1[j] <<  std::endl;
        }

        out << "Combined Layer eta=k^(2/3)" << std::endl;

        for (int j=0; j<freqs.size(); j++) {
            out << "k=" << freqs[j] << " Norm: " << norm_combined2[j] << " Norm of Inverse: " << cond_combined2[j]/norm_combined2[j] << " Condition Nr.: " << cond_combined2[j] <<  std::endl;
        }



        out << "Overalll time (minutes): " << time << std::endl;
        std::cout << "Overall time (minutes): " << time << std::endl;
        out.close();
#ifdef BEM2DMPI
    }
#endif


#ifdef BEM2DMPI
    bem2d::BlacsSystem::Release();
    MPI_Finalize();
#endif

}
示例#11
0
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{

	size_t BLOCKSIZE  = pgran->wgSize[0];
	size_t H = subdims->x;
	char tempTemplate[64*1024];
	char def_target_rows[10], def_h[10];

    SolutionStep *step = container_of( pgran , pgran, SolutionStep);    // NOTE: using container_of() to get pigFuncID
    CLBlasKargs* kargs = (CLBlasKargs*) &(step->args);


	if ( buf == NULL) // return buffer size
	{
		buflen = (64 * 1024 * sizeof(char));
        return (ssize_t)buflen;
	}

	CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra;

	//clblasUplo uplo   = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower;
	clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor;
	clblasTranspose trans = ( extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans);

    if ( order == clblasColumnMajor )
    {
        order = clblasRowMajor;
        if ( trans == clblasNoTrans)
        {
            trans = clblasTrans;
        }
        else if ( trans == clblasTrans )
        {
            trans = clblasNoTrans;
        }
        else // clblasConjTrans
        {
            trans = clblasNoTrans;
        }
    }
    if( (kargs->pigFuncID == CLBLAS_SBMV) || (kargs->pigFuncID == CLBLAS_HBMV) )    // Only NT kernel is used
    {
        trans = clblasNoTrans;
    }

	if ((BLOCKSIZE % H) != 0)
    {
		printf("WARNING: GBMV: generator: Invalid Block Size\n");
		return 0;
	}
	size_t TARGET_ROWS =  BLOCKSIZE / H;

	if ( trans == clblasNoTrans)
	{
		strcpy(tempTemplate, (char*)gbmv_RNT_kernel);
	}
	else // Transpose cases...
	{
        strcpy(tempTemplate, (char*)gbmv_RT_kernel);;
	}

    unsigned int vecLenA = extraFlags->vecLenA;

	bool doVLOAD = false;       // Always scalar load for banded matrices
	kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD);

    sprintf( def_target_rows, "%d", (int)TARGET_ROWS );
	sprintf( def_h, "%d", (int)H );

	#ifdef DEBUG_GBMV
	    printf("GBMV GENERATOR called....\n");
	    if((( extraFlags->flags &  KEXTRA_TRANS_A) || ( extraFlags ->flags & KEXTRA_CONJUGATE_A )))
	    {
	        printf("A is trans or CONJ-TRANS\n");
	    }
	    else
	    {
	        printf("A is noTrans...\n");
	    }
        printf("TARGET ROWS = %s\n", def_target_rows);
        printf("H = %s\n", def_h);
        printf("dataType : %c\n", Prefix[extraFlags->dtype]);
	#endif

    kobj.put("%DEF_H", (const char *)def_h);
    kobj.put("%DEF_TARGET_ROWS", (const char *)def_target_rows);
    kobj.spit((char*)buf, tempTemplate);

	return (64 * 1024 * sizeof(char));
}