Example #1
0
double bspip(int p, int s, int n, double *x, double *y){
    /* Compute inner product of vectors x and y of length n>=0 */

    int nloc(int p, int s, int n);
    double inprod, *Inprod, alpha;
    int i, t;
  
    Inprod= vecallocd(p); bsp_push_reg(Inprod,p*SZDBL);
    bsp_sync();

    inprod= 0.0;
    for (i=0; i<nloc(p,s,n); i++){
        inprod += x[i]*y[i];
    }
    for (t=0; t<p; t++){
        bsp_put(t,&inprod,Inprod,s*SZDBL,SZDBL);
    }
    bsp_sync();

    alpha= 0.0;
    for (t=0; t<p; t++){
        alpha += Inprod[t];
    }
    bsp_pop_reg(Inprod); vecfreed(Inprod);

    return alpha;

} /* end bspip */
Example #2
0
void bspinprod(){
    
    double bspip(int p, int s, int n, double *x, double *y);
    int nloc(int p, int s, int n);
    double *x, alpha, time0, time1;
    int p, s, n, nl, i, iglob;
    
    bsp_begin(P);
    p= bsp_nprocs(); /* p = number of processors obtained */ 
    s= bsp_pid();    /* s = processor number */ 
    if (s==0){
        printf("Please enter n:\n"); fflush(stdout);
        scanf("%d",&n);
        if(n<0)
            bsp_abort("Error in input: n is negative");
    }
    bsp_push_reg(&n,SZINT);
    bsp_sync();

    bsp_get(0,&n,0,&n,SZINT);
    bsp_sync();
    bsp_pop_reg(&n);

    nl= nloc(p,s,n);
    x= vecallocd(nl);
    for (i=0; i<nl; i++){
        iglob= i*p+s;
        x[i]= iglob+1;
    }
    bsp_sync(); 
    time0=bsp_time();

    alpha= bspip(p,s,n,x,x);
    bsp_sync();  
    time1=bsp_time();

    printf("Processor %d: sum of squares up to %d*%d is %.lf\n",
            s,n,n,alpha); fflush(stdout);
    if (s==0){
        printf("This took only %.6lf seconds.\n", time1-time0);
        fflush(stdout);
    }

    vecfreed(x);
    bsp_end();

} /* end bspinprod */
Example #3
0
 void bspfft1d_init(int n1, int N, int s, int t, double *w0, double *w, double *tw,
 int *rho_np, int *rho_p){
   
   /* This parallel function initializes all the tables used in the FFT. */
   
   int nlc, k1, ntw, c;
   double alpha;
   
   nlc= nloc(N,t,n1);
   bitrev_init(nlc,rho_np);
   bitrev_init(N,rho_p);
   
   k1= k1_init(n1,N,nlc);
   ufft_init(k1,w0);
   ufft_init(nlc,w);
   
   ntw= 0;
   for (c=k1; c<=N; c *=nlc){
     alpha= (t%c) / (double)(c);
     twiddle_init(nlc,alpha,rho_np,&tw[2*ntw*nlc]);
     ntw++;
   }
   
 } /* end bspfft_init */
Example #4
0
static int
hwloc_look_osf(struct hwloc_backend *backend)
{
    struct hwloc_topology *topology = backend->topology;
    cpu_cursor_t cursor;
    unsigned nbnodes;
    radid_t radid, radid2;
    radset_t radset, radset2;
    cpuid_t cpuid;
    cpuset_t cpuset;
    struct hwloc_obj *obj;
    unsigned distance;

    if (topology->levels[0][0]->cpuset)
        /* somebody discovered things */
        return 0;

    hwloc_alloc_obj_cpusets(topology->levels[0][0]);

    nbnodes = rad_get_num();

    cpusetcreate(&cpuset);
    radsetcreate(&radset);
    radsetcreate(&radset2);
    {
        hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t));
        unsigned *indexes = calloc(nbnodes, sizeof(unsigned));
        float *distances = calloc(nbnodes*nbnodes, sizeof(float));
        unsigned nfound;
        numa_attr_t attr;

        attr.nattr_type = R_RAD;
        attr.nattr_descr.rd_radset = radset;
        attr.nattr_flags = 0;

        for (radid = 0; radid < (radid_t) nbnodes; radid++) {
            rademptyset(radset);
            radaddset(radset, radid);
            cpuemptyset(cpuset);
            if (rad_get_cpus(radid, cpuset)==-1) {
                fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno));
                continue;
            }

            indexes[radid] = radid;
            nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid);
            obj->cpuset = hwloc_bitmap_alloc();
            obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize();
            obj->memory.page_types_len = 2;
            obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
            memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
            obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
            obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif

            cursor = SET_CURSOR_INIT;
            while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE)
                hwloc_bitmap_set(obj->cpuset, cpuid);

            hwloc_debug_1arg_bitmap("node %d has cpuset %s\n",
                                    radid, obj->cpuset);

            hwloc_insert_object_by_cpuset(topology, obj);

            nfound = 0;
            for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++)
                distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE;
            for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) {
                attr.nattr_distance = distance;
                /* get set of NUMA nodes at distance <= DISTANCE */
                if (nloc(&attr, radset2)) {
                    fprintf(stderr,"nloc failed: %s\n", strerror(errno));
                    continue;
                }
                cursor = SET_CURSOR_INIT;
                while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) {
                    if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) {
                        distances[radid*nbnodes+radid2] = (float) distance;
                        nfound++;
                    }
                }
                if (nfound == nbnodes)
                    /* Finished finding distances, no need to go up to RAD_DIST_REMOTE */
                    break;
            }
        }

        hwloc_distances_set(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */);
    }
    radsetdestroy(&radset2);
    radsetdestroy(&radset);
    cpusetdestroy(&cpuset);

    /* add PU objects */
    hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));

    hwloc_obj_add_info(topology->levels[0][0], "Backend", "OSF");
    if (topology->is_thissystem)
        hwloc_add_uname_info(topology);
    return 1;
}
Example #5
0
void mainloop(){

//int init[N*N] = {0,3,8,1000,-4, 1000,0,1000,1,7,1000,4,0,1000,1000,
//2,1000,-5,0,1000,1000,1000,1000,6,0};
   
int nlr,nlc,s,t,i,j,k,l,li,lsize,tsize0, tsize1,tempp,tempoff,rpos,cpos, 
*lpart,*linter,*gindx,*lcol,*lrow,*lsrow, *lscol, *ltrow, *ltcol, *temp;

int* init = gen_graph(N, 0.05);  

bsp_begin(bsp_nprocs());

/**********Initialization SuperStep 0***************/

//Compute global row and column indeces for each element
int pm = sqrt(bsp_nprocs());
int pn = (bsp_nprocs())/pm;
/* Compute 2D processor numbering from 1D numbering 
 with failsafe if the number of processors are not enough, back to simple 1D cyclic distribution */ 
if ( pn  != pm ){
	pn = bsp_nprocs();
	pm = 1;
	t = bsp_pid();
	s = 0;
  
}else{
	s= bsp_pid()%pm;  /* 0 <= s < pm */
	t= bsp_pid()/pn;  /* 0 <= t < pn */
}

nlr=  nloc(pm,s,N); /* number of local rows */
nlc=  nloc(pn,t,N); /* number of local columns */

lsize = nlr*nlc;						  //interpret 2D size to array size
lpart = vecalloci(lsize);				  //Initialize local part of processor s
linter = vecalloci(lsize);				  //Intermidiate array used for the matrix "multiplication"
gindx = vecalloci(lsize);				  //Array to store the global indeces of the local elements
lcol  = vecalloci(lsize);				  //Array to store the glocal column index
lrow  = vecalloci(lsize);				  //Array to store the glocal row index
bsp_push_reg(lpart,lsize*SZINT);

//Distribute the Data
li=0;
for ( i= 0; i < N; i++){
	for ( j= 0; j < N; j++){
		if ((j % pn) == t){
			lpart[li] = init[N*i+j];
			lrow[li] = i;
			lcol[li] = j;
			gindx[li] = N*i+j;
			li++;	
		}
	}
}


/*for ( i= 0; i < N*N; i++) {

		if(bsp_pid() == (i % bsp_nprocs())){
   			lpart[li] = init[i];
			lrow[li] = i/N;
			lcol[li] = i % N;
			gindx[li] = i;
			li++;	
		}
		

}*/
vecfreei(init);//out of the shared space

tsize0 = tsize1 =lsize;
temp = lrow;

//find unique global rows for processor s
for(i=0;i<tsize0;i++){
    for(j=0;j<tsize0;j++){
         if(i==j){
             continue;
         }
         else if(*(temp+i)==*(temp+j)){
             k=j;
             tsize0--;
             while(k < tsize0){
                 *(temp+k)=*(temp+k+1);
                 k++;
             }
              j=0;
         }
    }
}
temp = lcol;

//find unique global column for processor s
for(i=0;i<tsize1;i++){
    for(j=0;j<tsize1;j++){
         if(i==j){
             continue;
         }
         else if(*(temp+i)==*(temp+j)){
             k=j;
             tsize1--;
             while(k < tsize1){
                 *(temp+k)=*(temp+k+1);
                 k++;
             }
              j=0;
         }
    }
}


//keep unique global rows and columns in arrays
//initialize arrays to hold the elements of those rows and columns(ltcol, ltrow)
lscol  = vecalloci(tsize1); 
lsrow  = vecalloci(tsize0);
ltcol  = vecalloci(N*tsize1);
ltrow  = vecalloci(N*tsize0);

for(i=0;i < tsize0;i++){
    lsrow[i] = lrow[i];
  }
for(i=0;i < tsize1;i++){
    lscol[i] = lcol[i];
  }


vecfreei(lcol);//not needed from this point on
vecfreei(lrow);//we use lscol, lsrow, ltrow, ltcol

//sort arrays
qsort (lsrow, tsize0, sizeof(int), compare_int);
qsort (lscol, tsize1, sizeof(int), compare_int);
bsp_sync();
/**********End Initialization SuperStep 0***************/

double time0= bsp_time();
/*********Repeated Squaring loop start*************/
j=1;
while ((N-1) > j) {

/*************Comm. SuperStep j0*************/
for(i=0;i < tsize1;i++){
	for(k=0; k<N;k++){
		tempp=((N*k+lscol[i]) % bsp_nprocs());
		tempoff = ((double)(N*k+lscol[i])/(double)bsp_nprocs());
		bsp_get(tempp, &lpart[0],tempoff*SZINT, &ltcol[N*i+k],SZINT);
	} 
}

for(i=0;i < tsize0;i++){
	for(k=0; k<N;k++){
		tempp=((N*lsrow[i]+k) % bsp_nprocs());
		tempoff = ((double)(N*lsrow[i]+k)/(double)bsp_nprocs());
		bsp_get(tempp, &lpart[0],tempoff*SZINT, &ltrow[N*i+k],SZINT);
	} 
}
bsp_sync();
/*************End Comm. SuperStep j0*************/

/*************Comp. SuperStep j1*************/
for ( i=0; i<lsize; i++) {
  
	int gcol = gindx[i] % N; //get global col indx of current element
	int grow = gindx[i]/N;	 //get global row indx of current element

    linter[i]=1000;//initiliaze array
	//find appropriate indx of the global rows and columns to perform "multiplication"
	/*for ( l=0; l < tsize0;l++){
		if(grow == lsrow[l]){
			rpos =l;
			break;
		}
	}*/
	int *rp = bsearch (&grow, lsrow, tsize0, sizeof (lsrow),compare_int);
	rpos = rp - lsrow;
	

	int *cp = bsearch (&gcol, lscol, tsize1, sizeof (lscol),compare_int);
	cpos = cp - lscol;
	
	/*for ( l=0; l < tsize1;l++){
		if(gcol == lscol[l]){
			cpos =l;
			break;
		}
	}*/

	//this is where the update is done
	for(k=0;k<N;k++){
		linter[i] = fmin(linter[i], ltrow[N*rpos + k]+ltcol[N*cpos + k]);
	}

}

memcpy(lpart,linter,lsize*SZINT);
j = 2*j;
bsp_sync();
/*************End Comp. SuperStep j1*************/

}
/*********Repeated Squaring loop end*************/
double time1= bsp_time();
bsp_sync();
/*********display matrices and time*********/
if(bsp_pid()==0){
	printf( " \n Block Cyclic Distr  calculation of APSP took: %f seconds \n", time1-time0 ); 
}
/*printf("\n The array is, proc %d \n ", bsp_pid());
  for(i=0;i < lsize;i++){
    	printf(" %d",lpart[i]);
	
}*/
printf("\n ");

//clean up
bsp_pop_reg(lpart);
vecfreei(lpart);
vecfreei(linter);
vecfreei(lscol);
vecfreei(lsrow);
vecfreei(ltcol);
vecfreei(ltrow);
vecfreei(gindx);

bsp_end();   
}
Example #6
0
void mainloop(){

//int init[N*N] = {0,3,8,1000,-4, 1000,0,1000,1,7,1000,4,0,1000,1000,
//2,1000,-5,0,1000,1000,1000,1000,6,0};

int i,j,k,l,v,t,lsize,*lsize_m,*lrow,*lcol, *linit, *linter,*startrow_m;
int li,lj,lk,startrow, endrow,g;

int* init = gen_graph(N, 0.05);  

bsp_begin(bsp_nprocs());


/**********Initialization***************/

/*******Comp. Superstep 0******/

lsize = nloc(bsp_nprocs(),bsp_pid(), N); //Get the number of rows of processor s
lrow = vecalloci(lsize*N);				 //The main storing array of processor s
lcol = vecalloci(N);					 //array to hold the column for the matrix squaring
startrow_m = vecalloci(bsp_nprocs());    //array to hold all processors starting global row
lsize_m = vecalloci(bsp_nprocs());		 //array to hold the number of rows of all processors
linter = vecalloci(lsize*N);			 //Intermidiate array used for the matrix "multiplication"

bsp_push_reg(startrow_m,bsp_nprocs()*SZINT);
bsp_push_reg(lsize_m,bsp_nprocs()*SZINT);
bsp_push_reg(lrow,lsize*N*SZINT);

/****Get the first and last global row of processor s***/
if(bsp_pid() == (bsp_nprocs() - 1)){
 startrow = (N - lsize);
 endrow = N;
}else{
 startrow = bsp_pid()*lsize;
 endrow = bsp_pid()*lsize + lsize;
}



//Distribute Data, according row block distribution
li=0;
for ( i= startrow; i < endrow; i++) {
	lj=0;
	 for(j=0; j < N; j++) {	
   		lrow[N*li+lj] = init[N*i+j];
		lj++;
   	 } 
 li++;
}
vecfreei(init); //out of the shared enviroment

//initialize arrays
for ( i=0; i<bsp_nprocs(); i++) {
			startrow_m[i] = 0;
			lsize_m[i] = 0;
}

bsp_sync();
/*******End Comp. Superstep 0******/


/*********Comm. Superstep 1********/
//Communicate the global starting rows of all processors
for(g=0; g<bsp_nprocs();g++){
	bsp_put(g,&startrow,&startrow_m[0],bsp_pid()*SZINT,SZINT);
	bsp_put(g,&lsize,&lsize_m[0],bsp_pid()*SZINT,SZINT);
}
/*********End Comm. Superstep 1*****/
bsp_sync();
/**********End Initialization***************/

double time0= bsp_time();
/*********Repeated Squaring loop start*************/
j=1;
while ((N-1) > j) {
 
		/****Comp. Superstep j0****/ 
		//initialize arrays
		for ( i=0; i<N*lsize; i++) {
			linter[i] = 1000;
		}
		for ( i=0; i<N; i++) {
			lcol[i] = 0;
		}
		bsp_sync();
		/****End Comp. Superstep j0****/ 
	   		
        	for ( lj=0; lj < N; lj++) {
				/***Comm. SuperStep jlj0*******/
				//get global column lj 
				t=0;
				for(g=0; g < bsp_nprocs();g++){
				  for(v=0; v<lsize_m[g]; v++){				
					bsp_get(g,&lrow[0],(lj+v*N)*SZINT,&lcol[t],SZINT);
					t++;
				  }
				}
				bsp_sync();
				/***End Comm. SuperStep jlj0***/
				/***Comp. SuperStep jlj1*******/
				//update the values that use global column lj
				for ( li = 0; li < lsize; li++){
					for ( lk=0; lk < N; lk++) {
						linter[N*li+lj] = fmin(linter[N*li+lj], lrow[N*li+lk]+lcol[lk]);
					} 
        		}
				bsp_sync();
				/***End Comp. SuperStep jlj1***/
    		}
 		/****Comp. Superstep j1****/ 
		memcpy(lrow,linter,N*lsize*SZINT);
  		j=2*j;
		bsp_sync();
		/****End Comp. Superstep j1****/ 
}
/*********Repeated Squaring loop end*************/
double time1= bsp_time();
bsp_sync();
/*********display matrices and time*********/
if(bsp_pid()==0){
	printf( " \n Block Row Distr (need to know basis) calculation of APSP took: %f seconds \n", time1-time0 ); 
}

/*for(g = 0; g < bsp_nprocs(); g++){
if(bsp_pid()==g){
 printf("\n i am proc %d and i have APSP Mat \n",bsp_pid());
  for(k=0;k<lsize;k++)
     {
	  printf("\n");
		 for(l=0;l<N;l++){
		    printf("\t %d",lrow[N*k+l]);
			  }
			printf("\n \n ");
		}
	}
	bsp_sync();
}*/


//Clean up
bsp_pop_reg(startrow_m);
bsp_pop_reg(lsize_m);
bsp_pop_reg(lrow);


vecfreei(lrow);
vecfreei(lcol);
vecfreei(startrow_m);
vecfreei(lsize_m);
vecfreei(linter);

bsp_end();   
}