コード例 #1
0
ファイル: genmat.c プロジェクト: toothbrush/bsp-cg
void checkStrictDiagonallyDominant(int* i, int* j, double* v, int nz)
{

    // steps:
    // first sum all rows
    // then find diagonals
    // check each diagonal against the summed rows.

    int c;
    double * rowtotal;
    rowtotal = vecallocd(N);
    double * diagonals;
    diagonals = vecallocd(N);

    for(c = 0; c< N; c++)
    {
        rowtotal[c] = 0;
        diagonals[c] = 0;
    }

    for(c = 0; c< nz; c++)
    {
        // find diagonals:
        if(i[c] == j[c]){
            diagonals[i[c]] = v[c];
        } else {
            rowtotal[i[c]] += fabs(v[c]);
        }

    }

    // foreach diag, check.
    for(c=0; c<N; c++) {

        if ( !(fabs(diagonals[c]) > rowtotal[c]) ) {
            fprintf(stderr, "PROBLEM: diagonal > rowtotal doesn't hold: \n"
                            "    diagonals[%d] = %lf\n"
                            "    rowtotal[%d]  = %lf\n",
                            c, fabs(diagonals[c]),
                            c, rowtotal[c]
                   );
            fprintf(stderr, "increase mu? sometimes just running again is enough.\n");
            exit(5);
        }

    }

    free(rowtotal);
    free(diagonals);

}
コード例 #2
0
ファイル: bspinprod.c プロジェクト: pkrusche/bsponmpi
double bspip(int p, int s, int n, double *x, double *y){
    /* Compute inner product of vectors x and y of length n>=0 */

    int nloc(int p, int s, int n);
    double inprod, *Inprod, alpha;
    int i, t;
  
    Inprod= vecallocd(p); bsp_push_reg(Inprod,p*SZDBL);
    bsp_sync();

    inprod= 0.0;
    for (i=0; i<nloc(p,s,n); i++){
        inprod += x[i]*y[i];
    }
    for (t=0; t<p; t++){
        bsp_put(t,&inprod,Inprod,s*SZDBL,SZDBL);
    }
    bsp_sync();

    alpha= 0.0;
    for (t=0; t<p; t++){
        alpha += Inprod[t];
    }
    bsp_pop_reg(Inprod); vecfreed(Inprod);

    return alpha;

} /* end bspip */
コード例 #3
0
ファイル: bspfft2d.c プロジェクト: Heliosmaster/bspfft2d
 void bspredistr(double *x, int i, int length, int M, int N, int s, int t,
 int c0, int c1,char rev, int *rho_p, double *pm, int col){
   
   /* This function redistributes the complex vector x of length n,
      col = 0 means that we are considering proc rows
      col = 1 means that we are considering proc columns
   */
     
   
   double *tmp;
   int j0, j2, j, jglob, ratio, size;
   int npackets, destproc, destindex, r;
   
   ratio= c1/c0;
   size= MAX(length/ratio,1);
   npackets= length/size;
   tmp= vecallocd(2*size);
   
   if (rev) {
     j0= rho_p[t]%c0;
     j2= rho_p[t]/c0;
   } else {
     j0= t%c0;
     j2= t/c0;
   }    
   for(j=0; j<npackets; j++){
     jglob= j2*c0*length + j*c0 + j0;
     destproc = (jglob/(c1*length))*c1 + jglob%c1; 
     
     destproc = (col == 0 ? s+M*destproc : N*s+destproc);
        
     /*
     * the first term of the sum is because we don't really know
     * the address of a[i] in the destproc, so we start from the
     * beginning of a and jump
     */
   destindex = (jglob%(c1*length))/c1;
   
     for(r=0; r<size; r++){
       tmp[2*r]=x[2*(j+r*ratio)];
       tmp[2*r+1]= x[2*(j+r*ratio)+1];
     }
     
       destindex= i*length+destindex;
   
     bsp_put(destproc,tmp,pm,destindex*2*SZDBL,size*2*SZDBL);
   }
   vecfreed(tmp);
   
 } /* end bspredistr */
コード例 #4
0
/* This function provides the actual Matlab interface. */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  struct sparsematrix* MondriaanMatrix;
  int i;

  /* converting the matrix from Matlab to Mondriaan */
  MondriaanMatrix = ConvertMatlabToMondriaan(prhs[0]);

  /* computing the length of the priority vector */
  int length = MondriaanMatrix->m+MondriaanMatrix->n;

  /* getting the vector as double*, DO NOT FREE (the memory is allocated in Matlab!) */
  double* inputVec = mxGetPr(prhs[1]);

  /* explicit conversion of double* to long* */
  long* vec = double_array_to_long(inputVec,length);
 
  /* switching from mondriaan numbering of rows/cols to C */
  for(i=0;i<length;i++) vec[i]--;

  if(MondriaanMatrix->ReValue == NULL){
    MondriaanMatrix->ReValue = vecallocd(MondriaanMatrix->NrNzElts);
    int j;
    for(j=0;j<MondriaanMatrix->NrNzElts;j++) MondriaanMatrix->ReValue[j] = 0.0;
  }

  /* switching from Matlab matrix storage (ascending columns) to ascending rows */
  struct sparsematrixplus matplus = reorder_row_incr(MondriaanMatrix);
  struct sparsematrix mat = matplus.matrix;
  
  /* performing split */
  struct twomatrices two = overpaint(&mat,vec);

  /* separating the two parts */
  struct sparsematrix matrix = two.Ac;
  struct sparsematrix matrix2 = two.Ar;
  
  /* converting back from Mondriaan to Matlab */  
  plhs[0] = ConvertMondriaanToMatlab(&matrix);
  plhs[1] = ConvertMondriaanToMatlab(&matrix2);
  
  vecfreel(vec);
  MMDeleteSparseMatrix(&mat);
  MMDeleteSparseMatrix(&two.Ar);
  MMDeleteSparseMatrix(&two.Ac);
  vecfreel(matplus.perm);
  MMDeleteSparseMatrix(MondriaanMatrix);
}
コード例 #5
0
void bspinprod(){
    
    double bspip(int p, int s, int n, double *x, double *y);
    int nloc(int p, int s, int n);
    double *x, alpha, time0, time1;
    int p, s, n, nl, i, iglob;
    
    bsp_begin(P);
    p= bsp_nprocs(); /* p = number of processors obtained */ 
    s= bsp_pid();    /* s = processor number */ 
    if (s==0){
        printf("Please enter n:\n"); fflush(stdout);
        scanf("%d",&n);
        if(n<0)
            bsp_abort("Error in input: n is negative");
    }
    bsp_push_reg(&n,SZINT);
    bsp_sync();

    bsp_get(0,&n,0,&n,SZINT);
    bsp_sync();
    bsp_pop_reg(&n);

    nl= nloc(p,s,n);
    x= vecallocd(nl);
    for (i=0; i<nl; i++){
        iglob= i*p+s;
        x[i]= iglob+1;
    }
    bsp_sync(); 
    time0=bsp_time();

    alpha= bspip(p,s,n,x,x);
    bsp_sync();  
    time1=bsp_time();

    printf("Processor %d: sum of squares up to %d*%d is %.lf\n",
            s,n,n,alpha); fflush(stdout);
    if (s==0){
        printf("This took only %.6lf seconds.\n", time1-time0);
        fflush(stdout);
    }

    vecfreed(x);
    bsp_end();

} /* end bspinprod */
コード例 #6
0
ファイル: utils.c プロジェクト: Heliosmaster/master_thesis
/*
 * methods that reorders the nonzeros of a given matrix such that the columns are in ascending order
 */
struct sparsematrixplus reorder_col_incr(struct sparsematrix* matrix){

	/* allocating memory */
	long length = matrix->NrNzElts;
	long* I = vecallocl(length);
	long* J = vecallocl(length);
	double* Val = vecallocd(length);

	int k,l;

	/* creating a temporary array for storing the values to be sorted (rows) */
	long* tempArray = vecallocl(length);
	for(k=0;k<length;k++) tempArray[k] = matrix->j[k];

	/* sorting tempArray with Counting Sort and getting back the permutation indices */
	long* indices = CSortVec(tempArray,length,matrix->n);

	/* creation of the vectors of the permuted rows, columns, value */
	for(l=0;l<length;l++){
		k = indices[l];
		I[l] = matrix->i[k];
		J[l] = matrix->j[k];
		Val[l] = matrix->ReValue[k];
	}

	/* creating the matrix part of the */
	struct sparsematrix newmatrix;
	MMSparseMatrixInit(&newmatrix);

	newmatrix.m = matrix->m;
	newmatrix.n = matrix->n;
	newmatrix.i = I;
	newmatrix.j = J;
	newmatrix.ReValue = Val;
	newmatrix.NrNzElts = length;

	/* removing the temporary array */
	vecfreel(tempArray);

	/* creating the final output */
	struct sparsematrixplus output;
	output.matrix = newmatrix;
	output.perm = indices;
	return output;
}
コード例 #7
0
int main(){

	/* reading the matrix from file */
	FILE* File;
	struct sparsematrix matrix;
	File = fopen("../../matrices/cre_b.mtx", "r");
	/* File = fopen("../../matrices/tbdlinux.mtx", "r"); */
	if (!MMReadSparseMatrix(File, &matrix)) printf("Unable to read input matrix!\n");
	fclose(File);

	/* creating explicitly a particular priority vector vec */
	int m = matrix.m;
	int n = matrix.n;

	if(matrix.ReValue == NULL){
		matrix.ReValue = vecallocd(matrix.NrNzElts);
		int j;
		for(j=0;j<matrix.NrNzElts;j++) matrix.ReValue[j] = 0.0;
	}

	/*long* vec = random_permutation(m+n); */
	int i;
	long* vec = vecallocl(m+n);
	for(i=0;i<m+n;i++) vec[i]=i;

	/* explicit computation of Ar and Ac with the overpaint method */
	struct sparsematrixplus m2plus = reorder_row_incr(&matrix);
	struct sparsematrix matrix2 = m2plus.matrix;
	struct twomatrices two = overpaint(&matrix2,vec);

	printf("---------------\n");
	/*print_matrix(two.Ar);
		print_matrix(two.Ac); */

	vecfreel(vec);
	vecfreel(m2plus.perm);

	MMDeleteSparseMatrix(&matrix);
	MMDeleteSparseMatrix(&matrix2);
	MMDeleteSparseMatrix(&two.Ar);
	MMDeleteSparseMatrix(&two.Ac);
	return 0;
}
コード例 #8
0
ファイル: utils.c プロジェクト: Heliosmaster/master_thesis
/* from partitioned matrix, obtaining subpart
 * id = 1 or 2
 */
struct sparsematrix assignMatrix(struct sparsematrix* matrix, int id){
	struct sparsematrix output;
	MMSparseMatrixInit(&output);

	output.m = matrix->m;
	output.n = matrix->n;
	output.NrNzElts = matrix->Pstart[id]-matrix->Pstart[id-1];

	output.i = vecallocl(output.NrNzElts);
	output.j = vecallocl(output.NrNzElts);
	output.ReValue = vecallocd(output.NrNzElts);

	int start = matrix->Pstart[id-1];
	int k;
	for(k=0;k<output.NrNzElts;k++){
		output.i[k] = matrix->i[start+k];
		output.j[k] = matrix->j[start+k];
		output.ReValue[k] = 1.0*id;
	}
	return output;
}
コード例 #9
0
ファイル: utils.c プロジェクト: Heliosmaster/master_thesis
struct sparsematrix partition_to_matrix(struct sparsematrix* A){
	struct sparsematrix A1 = assignMatrix(A,1);
	struct sparsematrix A2 = assignMatrix(A,2);

	struct sparsematrix B;
	MMSparseMatrixInit(&B);

	B.m = A->m;
	B.n = A->n;
	B.NrNzElts = A->NrNzElts;

	B.i = vecallocl(B.NrNzElts);
	B.j = vecallocl(B.NrNzElts);
	B.ReValue = vecallocd(B.NrNzElts);

	int index_B = 0, i;
	for(i=0;i<A1.NrNzElts;i++){
		B.i[index_B] = A1.i[i];
		B.j[index_B] = A1.j[i];
		B.ReValue[index_B++] = 1.0;
	}
	for(i=0;i<A2.NrNzElts;i++){
		B.i[index_B] = A2.i[i];
		B.j[index_B] = A2.j[i];
		B.ReValue[index_B++] = 2.0;
	}

	MMDeleteSparseMatrix(&A1);
	MMDeleteSparseMatrix(&A2);

	struct sparsematrixplus plus = reorder_row_incr(&B);
	MMDeleteSparseMatrix(&B);
	vecfreel(plus.perm);
	return plus.matrix;

}
コード例 #10
0
ファイル: bspbench.c プロジェクト: wf34/parallel
void bspbench(){
void leastsquares(int h0, int h1, double *t, double *g, double *l);
int p, s, s1, iter, i, n, h, destproc[MAXH], destindex[MAXH];
double alpha, beta, x[MAXN], y[MAXN], z[MAXN], src[MAXH], *dest,
time0, time1, time, *Time, mintime, maxtime,
nflops, r, g0, l0, g, l, t[MAXH+1];
/**** Determine p ****/
bsp_begin(P);
p= bsp_nprocs(); /* p = number of processors obtained */
s= bsp_pid();
/* s = processor number */
Time= vecallocd(p); bsp_push_reg(Time,p*SZDBL);
dest= vecallocd(2*MAXH+p); bsp_push_reg(dest,(2*MAXH+p)*SZDBL);
bsp_sync();
/**** Determine r ****/
for (n=1; n <= MAXN; n *= 2){
/* Initialize scalars and vectors */
alpha= 1.0/3.0;
beta= 4.0/9.0;
for (i=0; i<n; i++){
z[i]= y[i]= x[i]= (double)i;
}
/* Measure time of 2*NITERS DAXPY operations of length n */
time0=bsp_time();
for (iter=0; iter<NITERS; iter++){
for (i=0; i<n; i++)
y[i] += alpha*x[i];
for (i=0; i<n; i++)
z[i] -= beta*x[i];
}
time1= bsp_time();
time= time1-time0;
bsp_put(0,&time,Time,s*SZDBL,SZDBL);
bsp_sync();
/* Processor 0 determines minimum, maximum, average30
INTRODUCTION
computing rate */
if (s==0){
mintime= maxtime= Time[0];
for(s1=1; s1<p; s1++){
mintime= MIN(mintime,Time[s1]);
maxtime= MAX(maxtime,Time[s1]);
}
if (mintime>0.0){
/* Compute r = average computing rate in flop/s */
nflops= 4*NITERS*n;
r= 0.0;
for(s1=0; s1<p; s1++)
r += nflops/Time[s1];
r /= p;
printf("n= %5d min= %7.3lf max= %7.3lf av= %7.3lf Mflop/s ",
n, nflops/(maxtime*MEGA),nflops/
(mintime*MEGA), r/MEGA);
fflush(stdout);
/* Output for fooling benchmark-detecting compilers */
printf(" fool=%7.1lf\n",y[n-1]+z[n-1]);
} else
printf("minimum time is 0\n"); fflush(stdout);
}
}
/**** Determine g and l ****/
for (h=0; h<=MAXH; h++){
/* Initialize communication pattern */
for (i=0; i<h; i++){
src[i]= (double)i;
if (p==1){
destproc[i]=0;
destindex[i]=i;
} else {
/* destination processor is one of the p-1 others */
destproc[i]= (s+1 + i%(p-1)) %p;
/* destination index is in my own part of dest */
destindex[i]= s + (i/(p-1))*p;
}
}
/* Measure time of NITERS h-relations */
bsp_sync();
time0= bsp_time();
for (iter=0; iter<NITERS; iter++){
for (i=0; i<h; i++)
bsp_put(destproc[i],&src[i],dest,destindex[i]*SZDBL,
SZDBL);
bsp_sync();
}
time1= bsp_time();
time= time1-time0;
/* Compute time of one h-relation */
if (s==0){
t[h]= (time*r)/NITERS;
printf("Time of %5d-relation= %lf sec= %8.0lf flops\n",
h, time/NITERS, t[h]); fflush(stdout);
}
}
if (s==0){
printf("size of double = %d bytes\n",(int)SZDBL);
leastsquares(0,p,t,&g0,&l0);
printf("Range h=0 to p : g= %.1lf, l= %.1lf\n",g0,l0);
leastsquares(p,MAXH,t,&g,&l);
printf("Range h=p to HMAX: g= %.1lf, l= %.1lf\n",g,l);
printf("The bottom line for this BSP computer is:\n");
printf("p= %d, r= %.3lf Mflop/s, g= %.1lf, l= %.1lf\n",
p,r/MEGA,g,l);
fflush(stdout);
}
bsp_pop_reg(dest); vecfreed(dest);
bsp_pop_reg(Time); vecfreed(Time);
bsp_end();
} /* end bspbench */
コード例 #11
0
ファイル: bspfft_test.cpp プロジェクト: haampie/BSPLib
void bspfft_test()
{
    void bspfft( double * x, int n, int p, int s, int sign, double * w0,
                 double * w, double * tw, int *rho_np, int *rho_p );
    void bspfft_init( int n, int p, int s, double * w0,
                      double * w, double * tw, int *rho_np, int *rho_p );
    int k1_init( int n, int p );

    int p, s, n, q, np, k1, j, jglob, it, *rho_np, *rho_p;
    double time0, time1, time2, ffttime, nflops,
           max_error, error_re, error_im, error,
           *Error, *x, *w0, *w, *tw;

    bsp_begin( P );
    p = bsp_nprocs();
    s = bsp_pid();

    bsp_push_reg( &n, SZINT );
    Error = vecallocd( p );
    bsp_push_reg( Error, p * SZDBL );
    bsp_sync();

    if ( s == 0 )
    {
        printf( "Please enter length n: \n" );

#ifdef _WIN32
        scanf_s( "%d", &n );
#else
        scanf( "%d", &n );
#endif

        if ( n < 2 * p )
        {
            bsp_abort( "Error in input: n < 2p" );
        }

        for ( q = 1; q < p; q++ )
        {
            bsp_put( q, &n, &n, 0, SZINT );
        }
    }

    bsp_sync();

    if ( s == 0 )
    {
        printf( "FFT of vector of length %d using %d processors\n", n, p );
        printf( "performing %d forward and %d backward transforms\n",
                NITERS, NITERS );
    }

    /* Allocate, register,  and initialize vectors */
    np = n / p;
    x = vecallocd( 2 * np );
    bsp_push_reg( x, 2 * np * SZDBL );
    k1 = k1_init( n, p );
    w0 = vecallocd( k1 );
    w =  vecallocd( np );
    tw = vecallocd( 2 * np + p );
    rho_np = vecalloci( np );
    rho_p =  vecalloci( p );

    for ( j = 0; j < np; j++ )
    {
        jglob = j * p + s;
        x[2 * j] = ( double )jglob;
        x[2 * j + 1] = 1.0;
    }

    bsp_sync();
    time0 = bsp_time();

    /* Initialize the weight and bit reversal tables */
    for ( it = 0; it < NITERS; it++ )
    {
        bspfft_init( n, p, s, w0, w, tw, rho_np, rho_p );
    }

    bsp_sync();
    time1 = bsp_time();

    /* Perform the FFTs */
    for ( it = 0; it < NITERS; it++ )
    {
        bspfft( x, n, p, s, 1, w0, w, tw, rho_np, rho_p );
        bspfft( x, n, p, s, -1, w0, w, tw, rho_np, rho_p );
    }

    bsp_sync();
    time2 = bsp_time();

    /* Compute the accuracy */
    max_error = 0.0;

    for ( j = 0; j < np; j++ )
    {
        jglob = j * p + s;
        error_re = fabs( x[2 * j] - ( double )jglob );
        error_im = fabs( x[2 * j + 1] - 1.0 );
        error = sqrt( error_re * error_re + error_im * error_im );

        if ( error > max_error )
        {
            max_error = error;
        }
    }

    bsp_put( 0, &max_error, Error, s * SZDBL, SZDBL );
    bsp_sync();

    if ( s == 0 )
    {
        max_error = 0.0;

        for ( q = 0; q < p; q++ )
        {
            if ( Error[q] > max_error )
            {
                max_error = Error[q];
            }
        }
    }

    for ( j = 0; j < NPRINT && j < np; j++ )
    {
        jglob = j * p + s;
        printf( "proc=%d j=%d Re= %f Im= %f \n", s, jglob, x[2 * j], x[2 * j + 1] );
    }

    fflush( stdout );
    bsp_sync();

    if ( s == 0 )
    {
        printf( "Time per initialization = %lf sec \n",
                ( time1 - time0 ) / NITERS );
        ffttime = ( time2 - time1 ) / ( 2.0 * NITERS );
        printf( "Time per FFT = %lf sec \n", ffttime );
        nflops = 5 * n * log( ( double )n ) / log( 2.0 ) + 2 * n;
        printf( "Computing rate in FFT = %lf Mflop/s \n",
                nflops / ( MEGA * ffttime ) );
        printf( "Absolute error= %e \n", max_error );
        printf( "Relative error= %e \n\n", max_error / n );
    }


    bsp_pop_reg( x );
    bsp_pop_reg( Error );
    bsp_pop_reg( &n );
    bsp_sync();

    vecfreei( rho_p );
    vecfreei( rho_np );
    vecfreed( tw );
    vecfreed( w );
    vecfreed( w0 );
    vecfreed( x );
    vecfreed( Error );
    bsp_end();

} /* end bspfft_test */
コード例 #12
0
ファイル: utils.c プロジェクト: Heliosmaster/master_thesis
/*
 * method that splits the two parts of A which have value "first"
 * and value "second", assigning them respectively to Ar and Ac
 */
struct twomatrices split_matrix(struct sparsematrix* A, double first, double second){

	int k;

	/* initialization of the counters */
	int max1=0;
	int max2=0;

	/* initial sweep of the matrix to see how long should be the vectors*/
	for(k=0;k<A->NrNzElts;k++) 
		(A->ReValue[k] == second) ? max2++ : max1++;

	/* initialization of the vectors */
	long *i1 = vecallocl(max1);
	long *j1 = vecallocl(max1);
	double *v1 = vecallocd(max1);
	double *c1 = vecallocd(max1);

	long *i2 = vecallocl(max2);
	long *j2 = vecallocl(max2);
	double *v2 = vecallocd(max2);
	double *c2 = vecallocd(max2);


	/* population of the vectors */
	int index1=0;
	int index2=0;
	for(k=0;k<(A->NrNzElts);k++){
		if (A->ReValue[k] == second ){
			i2[index2] = A->i[k];
			j2[index2] = A->j[k];
			v2[index2] = second;
			c2[index2] = 0.0;
			index2++;
		}
		else {
			i1[index1] = A->i[k];
			j1[index1] = A->j[k];
			v1[index1] = first;
			c1[index1] = 0.0;
			index1++;
		}
	}


	/* construction of the output */
	struct sparsematrix A1, A2;
	MMSparseMatrixInit(&A1);
	MMSparseMatrixInit(&A2); 
	A1.m = A->m;
	A1.n = A->n;
	A1.NrNzElts = max1;
	A1.i = i1;
	A1.j = j1;
	A1.ReValue = v1;
	A1.ImValue = c1;

	A2.NrNzElts = max2;
	A2.m = A->m;
	A2.n = A->n;
	A2.i = i2;
	A2.j = j2;
	A2.ReValue = v2;
	A2.ImValue = c2;

	struct twomatrices output;
	output.Ar = A1;
	output.Ac = A2;

	return output;
}
コード例 #13
0
ファイル: genmat.c プロジェクト: toothbrush/bsp-cg
int main (int argc, char** argv) {

    // aim for a nonzero density given by sparsity:
    sparsity = 0.2; // nz = sparsity*100% of the size of the matrix

    /*
     * we say 'aim' here, since of course initially exactly
     *    nz = sparsity * N^2
     * nonzeroes will be generated at random spots, but because
     * the matrix must be symmetric and diagonally positive, the
     * actual number of nonzeroes will probably not be exactly
     * the projected number.
     */

    // read the desired size of the matrix from command line
    if (argc < 2) {
        printf("Usage: %s N [mu] [sparsity]\n", argv[0]);
        exit(-1);
    }

    if(sscanf(argv[1], "%d", &N) != 1) {
        printf("couldn't read command-line argument for N. must be an integer.\n");
        exit(-2);
    }
    double mu;
    mu = 2.5; //default scalar for making matrix diagonal-dominant

    // maybe the user supplied a different mu
    if(argc > 2 && sscanf(argv[2], "%lf", &mu) != 1) {
        exit(-2);
    }
    // maybe the user supplied a different sparsity
    if(argc > 3 && sscanf(argv[3], "%lf", &sparsity) != 1) {
        exit(-2);
    }

    int nz = sparsity*N*N;
    int* xs;
    int* ys;
    double* vals;

    fprintf(stderr,"Generating matrix. N=%d, density=%lf, target nz=%d, ",
            N, sparsity, nz);
    fprintf(stderr, "mu = %lf\n", mu);

    // seed the random generator.
    srandom((unsigned)time(NULL));

    xs = vecalloci(nz);
    ys = vecalloci(nz);
    vals = vecallocd(nz);

    bool* diag_done;
    diag_done = malloc(N*sizeof(bool));

    int i;
    for(i = 0; i<N; i++) {
        diag_done[i] = false;
    }

    int nz_generated;

    int x,y;
    nz_generated = 0;
    double fake_transpose;
    x=0;y=0;
    while(x<N && y<N) { //don't escape matrix bounds.

        if (nz_generated % 1000000 == 0) {
            fprintf(stderr,"progress: %f%%\r", (double)nz_generated/(double)(nz/2.0)*100.0);
        }
        if(x==y) {
            //diagonal, so always generate.

            xs[nz_generated]=x;
            ys[nz_generated]=y;
            vals[nz_generated]=ran()*2.0-1.0;
            diag_done[x] = true;

#ifdef DEBUG
            fprintf(stderr,"generated A[//][%d]=%lf\n"
                                                      , ys[nz_generated]
                                                      , vals[nz_generated]);
#endif

            nz_generated++;

        } else {
            // not a diagonal. only add if in
            // lower triangular half
            if(x<y) {

                if(nz_generated > nz) {
                    // this should NEVER happen, although all that's
                    // stopping it from happening is our ran() being
                    // well-behaved...........
                    printf("EEK! something went wrong!!\n");
                    exit(666);
                }

                xs[nz_generated]= x;
                ys[nz_generated]= y;
                // simulate the distribution of values which
                // would occur if we do A+A^T afterwards.
                if (ran() < sparsity) {
                    fake_transpose    = ran()*2.0-1.0;
                    vals[nz_generated]= ran()*2.0-1.0 + fake_transpose;
                } else {
                    vals[nz_generated]= ran()*2.0-1.0;
                }
#ifdef DEBUG
            fprintf(stderr,"generated A[%d][%d]=%lf\n", xs[nz_generated]
                                                      , ys[nz_generated]
                                                      , vals[nz_generated]);
#endif
                nz_generated++;


            }
        }
        x += 1/sparsity * (ran() + 0.5);
        if( x >= N ) {
            y += x/N;
            x  = x%N;
        }

    }

    fprintf(stderr, "generated initial randoms\n");

    int diagonals_present = 0;
    for(i=0; i<nz_generated; i++) {
        if(xs[i]==ys[i])
            diagonals_present++;
    }

    fprintf(stderr,"generated %d nzeros, array was %d big.\n", nz_generated, nz);


#ifdef DEBUG
    fprintf(stderr, "found %d diagonal(s), still need %d more.\n", diagonals_present, (N-diagonals_present));
#endif

    // add the missing diagonals, and add mu to each diagonal.
    int newsize = nz_generated + (N - diagonals_present);
    fprintf(stderr,"reallocating values array to %lluM \n", (unsigned long long)(SZDBL+2*SZINT)*newsize/1048576);
    int* diag_i;
    int* diag_j;
    double* diag_val;

    diag_i   = realloc(xs  ,SZINT*newsize);
    diag_j   = realloc(ys  ,SZINT*newsize);
    diag_val = realloc(vals,SZDBL*newsize);

    if(diag_i == NULL ||
            diag_j == NULL ||
            diag_val == NULL)
    {
        printf("out of memory!");
        exit(44);
    }

    addDiagonal(mu, diag_i, diag_j, diag_val, nz_generated, newsize, diag_done);
    nz_generated=newsize;
#ifdef DEBUG
    for(i=0;i<newsize;i++) {
        fprintf(stderr,"after addDiagonal A[%d][%d]=%lf\n", diag_i[i],diag_j[i], diag_val[i]);
    }

    fprintf(stderr, "Going to make symmetric now... (nz_generated = %d)\n", nz_generated);
#endif

    // now we explicitly fill the array with the
    // upper triangle values

    // things must be symmetric, but they aren't, yet
    // ... here's a good place to do the transposing thing.

    newsize = nz_generated * 2 - N; //number of real nonzeros, don't
                                    // count diagonals twice.
    int *new_i;
    int *new_j;
    double *new_v;

    new_i = realloc(diag_i  ,SZINT*newsize);
    new_j = realloc(diag_j  ,SZINT*newsize);
    new_v = realloc(diag_val,SZDBL*newsize);

    if(new_i == NULL ||
            new_i == NULL ||
            new_v == NULL)
    {
        printf("out of memory (2)!");
        exit(44);
    }
    diag_i = new_i;
    diag_j = new_j;
    diag_val = new_v;
    addTranspose(newsize,diag_i,diag_j,diag_val,
                              nz_generated);

#ifdef DEBUG
    for(i=0;i<newsize;i++)
        // to make diags stand out.
        if(diag_i[i]==diag_j[i])
            fprintf(stderr,"after transpose A[%d][%d]=%lf \\\\\n", diag_i[i],diag_j[i], diag_val[i]);
        else
            fprintf(stderr,"after transpose A[%d][%d]=%lf\n", diag_i[i],diag_j[i], diag_val[i]);
#endif

    checkStrictDiagonallyDominant(diag_i,diag_j,diag_val, newsize);

    // now quickly generate a test-vector to solve against:

    double *vec = vecallocd(N);
    for(i=0;i<N;i++)
        vec[i]=ran();

    fprintf(stderr,"Left with %d nonzeroes; nonzero density = %lf (desired=%lf)\n", newsize, newsize/((double)N*N), sparsity);
    fprintf(stderr,"========== OUTPUTTING ... ==========\n");

    outputMondriaanMatrix(newsize, diag_i, diag_j, diag_val, vec);
    outputMathematicaMatrix(newsize, diag_i, diag_j, diag_val, vec);

    free(diag_done);
    free(vec);
    free(diag_i);
    free(diag_j);
    free(diag_val);

    return 0;
}
コード例 #14
0
void bspbench(){
    void leastsquares(int h0, int h1, double *t, double *g, double *l);
    int p, s, s1, iter, i, n, h, destproc[MAXH], destindex[MAXH];
    double alpha, beta, x[MAXN], y[MAXN], z[MAXN], src[MAXH], *dest,
           time0, time1, time, *Time, mintime, maxtime,
           nflops, r, g0, l0, g, l, t[MAXH+1]; 
  
    size_t pin[100];

    // Determine p 
    // start: new code for pinning
    for (i=0; i< tnode->length; i++) pin[i] = tnode->sons[i]->index;
    mcbsp_set_pinning( pin, tnode->length );
    bsp_begin(tnode->length);
    // end: new code for pinning

    p= bsp_nprocs(); // p = number of processors obtained 
    s= bsp_pid();    // s = processor number

    Time= vecallocd(p); bsp_push_reg(Time,p*SZDBL);
    dest= vecallocd(2*(MAXH+p)); bsp_push_reg(dest,(2*(MAXH+p))*SZDBL);
    bsp_sync();

    // Determine r 

    for (n=1; n < MAXN; n *= 2){
        // Initialize scalars and vectors 
        alpha= 1.0/3.0;
        beta= 4.0/9.0;
        for (i=0; i<n; i++){
          z[i]= y[i]= x[i]= (double)i;
        }
        // Measure time of 2*NITERS DAXPY operations of length n 
        time0=bsp_time();
        for (iter=0; iter<NITERS; iter++){
          for (i=0; i<n; i++)
            y[i] += alpha*x[i];
          for (i=0; i<n; i++)
            z[i] -= beta*x[i];
        }
        time1= bsp_time(); 
        time= time1-time0; 
        bsp_put(0,&time,Time,s*SZDBL,SZDBL);
        bsp_sync();

        // Processor 0 determines minimum, maximum, average computing rate 
        if (s==0){
          mintime= maxtime= Time[0];
          for(s1=1; s1<p; s1++){
            mintime= MIN(mintime,Time[s1]);
            maxtime= MAX(maxtime,Time[s1]);
          }
          if (mintime>0.0){
            // Compute r = average computing rate in flop/s 
            nflops= 4*NITERS*n;
            r= 0.0;
            for(s1=0; s1<p; s1++)
              r += nflops/Time[s1];
            r /= p; 

            //printf("n= %5d min= %7.3lf max= %7.3lf av= %7.3lf Mflop/s ",
            //       n, nflops/(maxtime*MEGA),nflops/(mintime*MEGA), r/MEGA);
            //fflush(stdout);
            // Output for fooling benchmark-detecting compilers 
            printf( "", y[n-1]+z[n-1] );
          } 
        }
      }

      // Determine g and l 
      for (h=0; h<=MAXH; h++){
        // Initialize communication pattern 
        for (i=0; i<h; i++){
          src[i]= (double)i;
          if (p==1){
            destproc[i]=0;
            destindex[i]=i;
          } else {
            // destination processor is one of the p-1 others 
            destproc[i]= (s+1 + i%(p-1)) %p;
            // destination index is in my own part of dest 
            destindex[i]= s + (i/(p-1))*p;
          }
        }
        for (i=0; i<h; i++){
          src[i]= (double)i;
          if (p==1){
            destproc[i]=0;
            destindex[i]=i;
          } else {
            // destination processor is one of the p-1 others 
            destproc[i]= (s+1 + i%(p-1)) %p;
            // destination index is in my own part of dest 
            destindex[i]= s + (i/(p-1))*p;
          }
        }


        // Measure time of NITERS h-relations 
        bsp_sync(); 

        time0= bsp_time(); 
        for (iter=0; iter<NITERS; iter++){
          for (i=0; i<h; i++) {
            //bsp_get(0,  dest, destindex[i]*SZDBL, &src[i] , SZDBL);
            //bsp_get(destproc[i],  dest, destindex[i]*SZDBL, &src[i] , SZDBL);
            bsp_put(destproc[i],  &src[i] , dest              , destindex[i]*SZDBL, SZDBL);
          }

          //if (s == 0) 
          //  bsp_get(0,  dest, destindex[i]*SZDBL, &src[i] , SZDBL);

          bsp_sync(); 
          
        }

        time1= bsp_time();
        time= time1-time0;

        // Compute time of one h-relation 
        if (s==0){
          t[h]= (time*r)/NITERS;
//#define SEHLOC_BENCH_VERBOSE
#ifdef SEHLOC_BENCH_VERBOSE
          char strnodes[256];
          sprintf(strnodes, "");
          for (i=0; i<tnode->length; i++) {
            sprintf(strnodes, "%s %d", strnodes, tnode->sons[i]->index);
          }
          printf("SEH# Level%d %5d %lf %8.0lf\n", tnode->level, h, time/NITERS, t[h]); fflush(stdout);
#endif
        }
      }

      if (s==0){
        leastsquares(0,p,t,&g0,&l0); 
        printf("Range h=0 to p   : g= %.1lf, l= %.1lf\n",g0,l0);
        leastsquares(p,MAXH,t,&g,&l);
        g=(g>0)? g: g0*2;
        printf("Range h=p to HMAX: g= %.1lf, l= %.1lf\n",g,l);
        //printf("plot# %d %.1lf  %.1lf\n",tnode->level, g,l);
        printf("The bottom line for this MultiBSP component is:\n");
        printf("<p= %d, r= %.3lf Mflop/s, g= %.1lf, l= %.1lf>\n",
               p,r/MEGA,g,l);
        fflush(stdout);
      }
      bsp_pop_reg(dest); vecfreed(dest);
      bsp_pop_reg(Time); vecfreed(Time);
      bsp_end();

} /* end bspbench */
コード例 #15
0
/*
* function that assigns the nonzeros of matrix either to Ar or Ac
*/
struct twomatrices localview(struct sparsematrix* matrix){

  /* dividing between A1 and A2 */
  struct twomatrices A = split_matrix(matrix,1.0,2.0);

  struct sparsematrix* A1 = &(A.Ar);
  struct sparsematrix* A2 = &(A.Ac);

  /* explicit saving of m,n for brevity */
  int m = matrix->m;
  int n = matrix->n;

  /* 
  * building the bookkeeping vectors
  * nzXr = nonzeros in the rows of AX
  * nzXc = nonzeros in the columns of AX
  * nzr,nzc = nonzeros in row/col of matrix
  */
  long* nz1r = nnz(A1->i, A1->NrNzElts, m);
  long* nz2r = nnz(A2->i, A2->NrNzElts, m);
  long* nzr = nnz(matrix->i, matrix->NrNzElts, m);

  long* nz1c = nnz(A1->j, A1->NrNzElts, n);
  long* nz2c = nnz(A2->j, A2->NrNzElts, n);
  long* nzc = nnz(matrix->j, matrix->NrNzElts, n);

  /* storing the number of nonzeros that have to be assigned */
  int len = matrix->NrNzElts;
  
  /* 
  * initialization of the new vectors to be populated
  * assuming everything is assigned to one and the other stays empty
  * the max size is matrix.NrNzElts (len)
  */
  long* ir = vecallocl(len);
  long* jr = vecallocl(len);
  long* ic = vecallocl(len);
  long* jc = vecallocl(len);

  /* counters for filling of ir,jr and ic,jc */
  int index_r = 0;
  int index_c = 0;
  
  int i,j,k;
  k = 0;
  while(len>0){

    /* TODO k randomly chosen between 0 and len */
    /* k = randi(len); */

    /* computing explicitly row and column of the k-th element of the matrix */
    i = matrix->i[k];
    j = matrix->j[k];

    /* computing whether i,j are split */
    int rowsplit = (nz1r[i] && nz2r[i]);
    int colsplit = (nz1c[j] && nz2c[j]);

    /* actual assignment of the nonzero */
    if (!xor(rowsplit,colsplit)){
      if (nzr[i]<nzc[j]){
        ir[index_r] = i;
        jr[index_r] = j;
        index_r++;
      } else {
        ic[index_c] = i;
        jc[index_c] = j;
        index_c++;
      }
    } else {
      if (rowsplit) {
        ic[index_c] = i;
        jc[index_c] = j;
        index_c++;
      } else {
        ir[index_r] = i;
        jr[index_r] = j;
        index_r++;
      }
    }
    /*
    * putting the last element that could be chosen instead of the
    * k-th one, and we reduce the interval for randi by 1
    */
     /* matrix.i[k] = matrix.i[len-1];
    matrix.j[k] = matrix.j[len-1]; */
    k++;
    len--;
  }

  /* creation of vectors of the right size */
  long* ir_n = vecallocl(index_r);
  long* jr_n = vecallocl(index_r);
  long* ic_n = vecallocl(index_c);
  long* jc_n = vecallocl(index_c);

  /* copying only the filled part */
  memcpy(ir_n,ir,index_r*SZLONG);
  memcpy(jr_n,jr,index_r*SZLONG);
  memcpy(ic_n,ic,index_c*SZLONG);
  memcpy(jc_n,jc,index_c*SZLONG);

  /* creating the (dummy) values for the nonzeros */
  double* val_r = vecallocd(index_r);
  double* valc_r = vecallocd(index_r);
  double* val_c = vecallocd(index_c);
  double* valc_c = vecallocd(index_c);

  for(k=0;k<index_r;k++){
    val_r[k] = 1.0;
    valc_r[k] = 0.0;
  }
  for(k=0;k<index_c;k++){
    val_c[k] = 1.0;
    valc_c[k] = 0.0;
  }

  /* explicit creation of the final matrices */
  struct sparsematrix Ar;
  MMSparseMatrixInit(&Ar);
  Ar.NrNzElts = index_r;
  Ar.m = m;
  Ar.n = n;
  Ar.i = ir_n;
  Ar.j = jr_n;
  Ar.ReValue = val_r;
  Ar.ImValue = valc_r;

  struct sparsematrix Ac;
  MMSparseMatrixInit(&Ac);
  Ac.NrNzElts = index_c;
  Ac.i = ic_n;
  Ac.j = jc_n;
  Ac.m = m;
  Ac.n = n;
  Ac.ReValue = val_c;
  Ac.ImValue = valc_c;
  
  /* freeing memory from unnecessary arrays */
  vecfreel(ir);
  vecfreel(jr);
  vecfreel(ic);
  vecfreel(jc);

  vecfreel(nz1c);
  vecfreel(nz2c);
  vecfreel(nzc);

  vecfreel(nz1r);
  vecfreel(nz2r);
  vecfreel(nzr);

  MMDeleteSparseMatrix(A1);
  MMDeleteSparseMatrix(A2);

  /* explicit construction of the output */
  struct twomatrices output;

  output.Ar = Ar;
  output.Ac = Ac;

  return output;
}