/** \brief Clone the RTDB creating a new file Take the filename from the specified RTDB and create a new filename with the specified suffix. Then copy the current RTDB to the new file. \param handle [Input] the RTDB handle \param suffix [Input] the suffix for the new RTDB file \return the status of the file copy operation */ int rtdb_clone(const int handle, const char *suffix) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #else me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (handle < 0 || handle >= MAX_RTDB) { (void) fprintf(stderr, "rtdb_clone: handle out of range %d\n", handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == INACTIVE) { (void) fprintf(stderr, "rtdb_clone: handle not active %d\n",handle); (void) fflush(stderr); return 0; } if (parallel_mode != par_mode[handle]) { (void) fprintf(stderr, "rtdb_clone: mode of open and copy mismatch\n"); (void) fflush(stderr); return 0; } if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_copy(handle, suffix); if (parallel_mode == RTDB_PAR_MODE) rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); return status; }
int main(int argc, char **argv) { int status, me; int max_arrays = 10; double max_sz = 1e8, max_disk = 1e10, max_mem = 1e6; int stack = 120000, heap = 3200000; int numfiles, numioprocs; int total, nproc; MP_INIT(argc,argv); GA_Initialize(); me = GA_Nodeid(); nproc = GA_Nnodes(); total = pow(SIZE,NDIM)*sizeof(double); if (!GA_Uses_ma()) { if (GA_Nodeid() == 0) { printf("GA is not using MA\n"); } stack = 100000; heap = (int)(2.2*(float)(total)); } if (MA_init(MT_F_DBL, stack, heap) ) { if (DRA_Init(max_arrays, max_sz, max_disk, max_mem) != 0) GA_Error("DRA_Init failed: ",0); if (USER_CONFIG == 0) { numfiles = -1; numioprocs = -1; } else if (USER_CONFIG == 1) { numfiles = 1; numioprocs = GA_Cluster_nnodes(); } else if (USER_CONFIG == 2) { numfiles = GA_Cluster_nnodes(); numioprocs = GA_Cluster_nnodes(); } else { numfiles = 1; numioprocs = 1; } if (me==0) { printf("Disk resident arrays configured as:\n"); printf(" Number of files: %d\n",numfiles); printf(" Number of I/O processors: %d\n",numioprocs); } DRA_Set_default_config(numfiles,numioprocs); if (me == 0) printf("\n"); if (me == 0) printf("TESTING PERFORMANCE OF DISK ARRAYS\n"); if (me == 0) printf("\n"); test_io_dbl(); status = DRA_Terminate(); GA_Terminate(); } else { printf("MA_init failed\n"); } if(me == 0) printf("all done ...\n"); MP_FINALIZE(); return 0; }
PetscErrorCode testCreate2D() { int ga; DA da; DALocalInfo info; Vec vec; PetscErrorCode ierr; PetscFunctionBegin; int d1 = 1453, d2 = 1451; ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR, d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr); ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr); ierr = DACreateGlobalArray( da, &ga, &vec); CHKERRQ(ierr); PetscReal **v; ierr = DAVecGetArray(da,vec,&v); CHKERRQ(ierr); int xe = info.xs+info.xm, ye = info.ys+info.ym; for (int j = info.ys; j < ye; ++j) { for (int i = info.xs; i < xe; ++i) { v[j][i] = 1.*i + d1 * j; } } ierr = DAVecRestoreArray(da,vec,&v); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_WORLD,"Updated local portion with DAVec\n"); PetscBarrier(0); { double *da_ptr; VecGetArray(vec, &da_ptr); double *ptr; int low[2],hi[2],ld; NGA_Distribution(ga,GA_Nodeid(),low,hi); NGA_Access(ga,low,hi,&ptr,&ld); printf("[%d] ga:%p\tda:%p\tdiff:%p\n", GA_Nodeid(), ptr, da_ptr, (ptr-da_ptr) ); NGA_Release_update(ga,low,hi); } int lo[2],ld; double val; for (int j = 0; j < d2; ++j) { for (int i = 0; i < d1; ++i) { lo[0] = j; lo[1] = i; NGA_Get(ga,lo,lo,&val,&ld); if( PetscAbs( i + d1*j - val) > .1 ) printf("."); // printf("[%d] (%3.0f,%3.0f)\n", GA_Nodeid(), 1.*i + d1*j, val); } } GA_Print_stats(); ierr = VecDestroy(vec); CHKERRQ(ierr); GA_Destroy(ga); PetscFunctionReturn(0); }
int nw_inp_from_string(Integer rtdb, const char *input) { char filename[30]; FILE *file; #if defined(USE_FCD) || defined(CRAY_T3E) || defined(WIN32) _fcd fstring; #else char fstring[255]; #endif int status; const char base[] = "temp"; const char ending[] = ".nw"; int number ; // This is bad, not 100% sure to be unique, since could be subgroup if (GA_Pgroup_get_world() != GA_Pgroup_get_default()) { number = (int) util_sgroup_mygroup_() ; } else { number = 0 ; } sprintf(filename, "%s%d%s", base,number,ending); if (GA_Nodeid() == 0) { if (!(file = fopen(filename,"w"))) { GA_Error("nw_inp_from_string: failed to open temp.nw\n",0); } if (fwrite(input, 1, strlen(input), file) != strlen(input)) { GA_Error("nw_inp_from_string: failed to write to temp.nw\n",0); } if (fwrite("\n", 1, 1, file) != 1) { GA_Error("nw_inp_from_string: failed to write to temp.nw\n",0); } (void) fclose(file); } #if defined(CRAY_T3E) fstring = _cptofcd(filename, strlen(filename)); status = nw_inp_from_file_(&rtdb, fstring); #elif defined(WIN32) fstring.string = filename; fstring.len = strlen(filename); status = nw_inp_from_file_(&rtdb, fstring); #elif defined(USE_FCD) #error Do something about _fcd #else status = nw_inp_from_file_(&rtdb, filename, strlen(filename)); #endif if (GA_Nodeid() == 0) (void) unlink(filename); return status; }
void verify(int g_a, int g_b, int g_c, int *lo, int *hi, int *ld, int N) { double rchk, alpha=1.0, beta=0.0; int g_chk, me=GA_Nodeid(); g_chk = GA_Duplicate(g_a, "array Check"); if(!g_chk) GA_Error("duplicate failed",NDIMS); GA_Sync(); GA_Dgemm('n', 'n', N, N, N, 1.0, g_a, g_b, 0.0, g_chk); GA_Sync(); alpha=1.0, beta=-1.0; GA_Add(&alpha, g_c, &beta, g_chk, g_chk); rchk = GA_Ddot(g_chk, g_chk); if (me==0) { printf("Normed difference in matrices: %12.4e\n", rchk); if(rchk < -TOLERANCE || rchk > TOLERANCE) GA_Error("Matrix multiply verify failed",0); else printf("Matrix Mutiply OK\n"); } GA_Destroy(g_chk); }
void FATR ga_antisymmetrize_(Integer *g_a) { DoublePrecision alpha = 0.5; int i, me = GA_Nodeid(); extern void * FATR ga_malloc(Integer nelem, int type, char *name); extern void FATR ga_free(void *ptr); void FATR gai_subtr(int *lo, int *hi, void *a, void *b, DoublePrecision alpha, int type, Integer nelem, int ndim); int alo[GA_MAX_DIM], ahi[GA_MAX_DIM], lda[GA_MAX_DIM]; int blo[GA_MAX_DIM], bhi[GA_MAX_DIM], ldb[GA_MAX_DIM]; int ndim, dims[GA_MAX_DIM], type; Integer nelem=1; Logical have_data; void *a_ptr, *b_ptr; GA_Sync(); NGA_Inquire((int)(*g_a), &type, &ndim, dims); if (dims[0] != dims[1]) GA_Error("ga_sym: can only sym square matrix", 0L); /* Find the local distribution */ NGA_Distribution((int)(*g_a), me, alo, ahi); have_data = ahi[0]>=0; for(i=1; i<ndim; i++) have_data = have_data && ahi[i]>=0; if(have_data) { NGA_Access((int)(*g_a), alo, ahi, &a_ptr, lda); for(i=0; i<ndim; i++) nelem *= ahi[i]-alo[i] +1; b_ptr = (void *) ga_malloc(nelem, MT_C_DBL, "v"); for(i=2; i<ndim; i++) {bhi[i]=ahi[i]; blo[i]=alo[i]; } /* switch rows and cols */ blo[1]=alo[0]; bhi[1]=ahi[0]; blo[0]=alo[1]; bhi[0]=ahi[1]; for (i=0; i < ndim-1; i++) ldb[i] = bhi[i+1] - blo[i+1] + 1; NGA_Get((int)(*g_a), blo, bhi, b_ptr, ldb); } GA_Sync(); if(have_data) { gai_subtr(alo, ahi, a_ptr, b_ptr, alpha, type, nelem, ndim); NGA_Release_update((int)(*g_a), alo, ahi); ga_free(b_ptr); } GA_Sync(); }
/*server signals all clients to terminate*/ void signal_termination(int server_id, int msg_src_mpi) { int i, v=TERM_CLIENT; const int rank = GA_Nodeid(); const int size = GA_Nnodes(); const int default_grp = ga_pgroup_get_default_(); /* assert(server_id == rank); /\*call with your id, not someone else's*\/ */ assert(server_id == SVR); /*Only server may invoke this method*/ #ifdef LEADER_BCAST { int *pid_list = (int *)alloca(size*sizeof(int)); int root = GA_Pgroup_absolute_id(default_grp, SVR); /* int src = GA_Pgroup_absolute_id(default_grp, rank); */ for(i=0; i<size; i++) { pid_list[i] = GA_Pgroup_absolute_id(default_grp,i); } broadcast(size,pid_list,root,msg_src_mpi,&v,sizeof(int)); } #else for(i=0; i<size; i++) { if(i != SVR) { MPI_Send(&v, 1, MPI_INT, GA_Pgroup_absolute_id(default_grp,i), SIGNAL_TAG, MPI_COMM_WORLD); } } #endif }
int linux_printaff_(){ mypid=getpid(); #ifdef MPI MPI_Comm_rank(MPI_COMM_WORLD,&myrank); #else myrank=GA_Nodeid(); #endif CPU_ZERO(&mycpuid); if (sched_getaffinity(mypid, sizeof(mycpuid), &mycpuid) < 0) { perror("sched_getaffinity"); return -1; } for (i = 0; i < MXCPUS; i++){ if (CPU_ISSET(i, &mycpuid)) { caff[numaff]=i; numaff+=1; } } if(numaff>0) { printf("rank %d pid %d bind to %d CPUs:", myrank, (int) mypid, numaff); for (i = 0; i < numaff; i++){ printf(" %i ", caff[i]); } printf(" \n"); fflush(stdout); } return 0; }
int main(int argc, char **argv) { int heap=300000, stack=300000; int me, nprocs, i; double start, end; /* Initialize Message Passing library */ MPI_Init(&argc, &argv); /* Initialize GA */ GA_Initialize(); /* Initialize Memory Allocator (MA) */ if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap); me = GA_Nodeid(); nprocs = GA_Nnodes(); if(me==0) { printf("\nUsing %d processes\n\n", nprocs); fflush(stdout); } start = MPI_Wtime(); for(i =0; i< 1000; i++) { TRANSPOSE1D(); } end = MPI_Wtime(); if(me==0) printf(" Time=%2.5e secs\n\n",end-start); if(me==0)printf("\nTerminating ..\n"); GA_Terminate(); MPI_Finalize(); }
/** \brief Delete the data associated with a key from the RTDB \param handle [Input] the RTDB handle \param name [Input] the key \return the status of the delete operation */ int rtdb_delete(const int handle, const char *name) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (handle < 0 || handle >= MAX_RTDB) { (void) fprintf(stderr, "rtdb_delete: handle out of range %d\n", handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == INACTIVE) { (void) fprintf(stderr, "rtdb_delete: handle not active %d\n",handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == RTDB_SEQ_MODE && parallel_mode == RTDB_PAR_MODE) { (void) fprintf(stderr, "rtdb_delete: seq. open and par. delete\n"); (void) fflush(stderr); return 0; } if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_delete(handle, name); if (parallel_mode == RTDB_PAR_MODE) rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); return status; }
int main(int argc, char **argv) { TEST_SETUP; int shape_idx=0, type_idx=0, dist_idx=0; int return_code=0; for (shape_idx=0; shape_idx < NUM_SHAPES; ++shape_idx) { for (type_idx=0; type_idx < NUM_TYPES; ++type_idx) { for (dist_idx=0; dist_idx < NUM_DISTS; ++dist_idx) { if (0 == GA_Nodeid()) { printf("%s\t%s\t%s\n", SHAPE_NAMES[shape_idx], TYPE_NAMES[type_idx], DIST_NAMES[dist_idx] ); } GA_Sync(); return_code = test(shape_idx, type_idx, dist_idx); if (0 != return_code) { break; } } if (0 != return_code) { break; } } if (0 != return_code) { break; } } TEST_TEARDOWN; return return_code; }
int main(int argc, char** argv) { int nprocs,myid,nprocssq; int dims[2],chunk[2]; int i,j,k; int stack = 100000, heap = 100000; MPI_Init(&argc,&argv); GA_Initialize(); MA_init(C_DBL,stack,heap); nprocssq = GA_Nnodes(); nprocs = sqrt(nprocssq); myid = GA_Nodeid(); dims[0] = N; dims[1] = N; chunk[0] = N/nprocs; chunk[1] = N/nprocs; int g_a = NGA_Create(C_DBL,2,dims,"Array A",chunk); int lo[2],hi[2]; NGA_Distribution(g_a,myid,lo,hi); int ld[1] = {N/nprocs}; void *ptr; double *local; printf("Myid = %d, lo = [%d,%d] , hi = [%d,%d] , ld = %d \n",myid,lo[0],lo[1],hi[0],hi[1],ld[0]); NGA_Access(g_a,lo,hi,&ptr,ld); local = (double*) ptr; printf("Myid = %d , local[0][0] = %f\n",*local); GA_Sync(); GA_Destroy(g_a); GA_Terminate(); MPI_Finalize(); return 0; }
Integer util_gnxtval_(Integer *val) { if(*val > 0) { if(!initialized) ga_error("nxtval: not yet initialized", 0L); return (Integer) NGA_Read_inc(g_T, &subscript, 1); } else if(*val==0) { int n = 1; initialized=1; /* create task array */ g_T = NGA_Create(C_LONG, 1, &n,"Atomic Task", NULL); /* Initialize the task array */ if(GA_Nodeid()==0) { int lo=0, hi=0; NGA_Put (g_T, &lo, &hi, &initval, &hi); initval=0; } GA_Sync(); return 0; } else if (*val < 0) { GA_Destroy(g_T); initialized=0; initval=0; return 0;} ga_error("nxtval: invalid value passed", 0L); return -1; }
void rtdb_print_usage() { #ifdef USE_HDBM #ifdef GAGROUPS int me = GA_Nodeid(); #endif if (me == 0) hdbm_print_usage(); #endif }
int main(int argc, char **argv) { int size_dst = 15; int g_a = 0; int I_NEG_ONE = -1; long L_NEG_ONE = -1; long long LL_NEG_ONE = -1; int FIVE = 5; int TEN = 10; int lo; int hi; int *ptr; int i; MP_INIT(argc,argv); GA_INIT(argc,argv); for (i=0; i<3; ++i) { if (0 == i) { g_a = NGA_Create(C_INT, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &I_NEG_ONE); } else if (1 == i) { g_a = NGA_Create(C_LONG, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &L_NEG_ONE); } else if (2 == i) { g_a = NGA_Create(C_LONGLONG, 1, &size_dst, "dst", NULL); GA_Fill(g_a, &LL_NEG_ONE); } GA_Sync(); GA_Print(g_a); NGA_Print_patch(g_a, &FIVE, &TEN, 0); NGA_Print_patch(g_a, &FIVE, &TEN, 1); NGA_Distribution(g_a, GA_Nodeid(), &lo, &hi); NGA_Access(g_a, &lo, &hi, &ptr, NULL); printf("[%d] (%d)=%d\n", GA_Nodeid(), lo, *ptr); NGA_Release(g_a, &lo, &hi); } GA_Terminate(); MP_FINALIZE(); exit(EXIT_SUCCESS); }
void do_work() { int i; int me = GA_Nodeid(); test(C_FLOAT); test(C_DBL); test(C_DCPL); test(C_SCPL); /* */ if(me == 0) printf("\n\n"); }
int util_mic_get_device_() { int count; if (!offload_master_()) { fprintf(stdout, "%02d: need to be offload master\n", GA_Nodeid()); GA_Error("util_mic_get_device error", 0L); } count = util_cgetppn()/util_mic_get_num_devices_(); if (count > 0) { count=util_my_smp_index()/util_nwc_ranks_per_device_(); } return count; }
/** \brief Open an RTDB stored on a given file The RTDB is stored on a file. Before the RTDB can be accessed in the program the file needs to be opened. To access the RTDB a handle is associated with with the file and this handle is used in the actual access routines. \param filename [Input] the name of the file holding the RTDB \param mode [Input] the initial access mode of the RTDB \param handle [Output] the RTBD handle \return The return value of the file open command */ int rtdb_open(const char *filename, const char *mode, int *handle) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #else me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_open(filename, mode, handle); if (parallel_mode == RTDB_PAR_MODE) { rtdb_broadcast(TYPE_RTDB_HANDLE, MT_INT, 1, (void *) handle); rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); } if (status) par_mode[*handle] = parallel_mode; return status; }
void FATR util_mic_set_affinity_() { char affinity[BUFSZ]; char num_threads[BUFSZ]; int pos; int micdev; int nprocs; int ranks_per_dev; int rank_on_dev; int nthreads; int ppn; int ranks_per_device=util_getenv_nwc_ranks_per_device_(); if (ranks_per_device == 0) { return ; } else if (ranks_per_device < 0){ ranks_per_device = RANKS_PER_DEVICE; } pos=snprintf(affinity, BUFSZ, "KMP_PLACE_THREADS="); micdev=util_mic_get_device_(); ppn=util_cgetppn(); #pragma offload target(mic:micdev) out(nprocs) { /* do one offload to query the coprocessor for the number of cores */ nprocs = ((int) sysconf(_SC_NPROCESSORS_ONLN) / 4) - 1; } rank_on_dev = util_my_smp_index() % util_nwc_ranks_per_device_(); nthreads = nprocs / ranks_per_device * DEFAULT_OFFLOAD_THREAD_MULTIPLIER; pos+=snprintf(affinity+pos, BUFSZ-pos, "%dc,%dt,%do", nprocs / ranks_per_device, DEFAULT_OFFLOAD_THREAD_MULTIPLIER, rank_on_dev * (nprocs / ranks_per_device)); snprintf(num_threads, BUFSZ, "OMP_NUM_THREADS=%d", nthreads); printf("%02d: micdev=%d nprocs=%d rank_on_dev=%d ranks_per_device=%d affinity='%s' pos=%d\n", GA_Nodeid(), micdev, nprocs, rank_on_dev, ranks_per_device, affinity, pos); fflush(stdout); #pragma offload target(mic:micdev) in(affinity) in(num_threads) { /* set the affinity masks and the number of offloaded OpenMP threads */ kmp_set_defaults("KMP_AFFINITY=compact"); kmp_set_defaults(affinity); kmp_set_defaults(num_threads); } }
static int verify_parallel_access() /* Return true if access mode / processor values are sensible */ { #ifdef GAGROUPS int me = GA_Nodeid(); #endif if ((parallel_mode == RTDB_SEQ_MODE) && (me != 0)) { (void) fflush(stdout); (void) fprintf(stderr,"rtdb: sequential access only possible for process 0\n"); (void) fflush(stderr); return 0; } else return 1; }
/** \brief Retrieve data from the RTDB Retrieve the data associated with the specified key from the RTDB and return it in the provided array. \param handle [Input] the RTDB handle \param name [Input] the key for the data \param ma_type [Input] the type of the data specified by one of the MA data types (see mafdecls.fh) \param nelem [Input] the number of elements of the specified type \param array [Output] the actual data retrieved \return the status of the read operation */ int rtdb_get(const int handle, const char *name, const int ma_type, const int nelem, void *array) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (handle < 0 || handle >= MAX_RTDB) { (void) fprintf(stderr, "rtdb_get: handle out of range %d\n", handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == INACTIVE) { (void) fprintf(stderr, "rtdb_get: handle not active %d\n",handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == RTDB_SEQ_MODE && parallel_mode == RTDB_PAR_MODE) { (void) fprintf(stderr, "rtdb_get: seq. open and par. get\n"); (void) fflush(stderr); return 0; } if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_get(handle, name, ma_type, nelem, array); /* Implicit assumption that all processes call this with nelem greater than or equal to value used on process 0 */ if (parallel_mode == RTDB_PAR_MODE) { rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); if (status) { rtdb_broadcast(TYPE_RTDB_NELEM, MT_INT, 1, (void *) &nelem); rtdb_broadcast(TYPE_RTDB_ARRAY, ma_type, nelem, (void *) array); } } return status; }
int main(int argc, char **argv) { int proc, nprocs; int M, N, K; /* */ int blockX_len, blockY_len; int heap=3000000, stack=3000000; if (argc == 6) { M = atoi(argv[1]); N = atoi(argv[2]); K = atoi(argv[3]); blockX_len = atoi(argv[4]); blockY_len = atoi(argv[5]); } else { printf("Please enter ./a.out <M> <N> <K> <BLOCK-X-LEN> <BLOCK-Y-LEN>"); exit(-1); } MPI_Init(&argc, &argv); GA_Initialize(); if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap); proc = GA_Nodeid(); nprocs = GA_Nnodes(); if(proc == 0) { printf("Using %d processes\n", nprocs); fflush(stdout); } matrix_multiply(M, N, K, blockX_len, blockY_len); if(proc == 0) printf("\nTerminating ..\n"); GA_Terminate(); MPI_Finalize(); return 0; }
int rtdb_next(const int handle, const int namelen, char *name) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (handle < 0 || handle >= MAX_RTDB) { (void) fprintf(stderr, "rtdb_next: handle out of range %d\n", handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == INACTIVE) { (void) fprintf(stderr, "rtdb_next: handle not active %d\n",handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == RTDB_SEQ_MODE && parallel_mode == RTDB_PAR_MODE) { (void) fprintf(stderr, "rtdb_next: seq. open and par. next\n"); (void) fflush(stderr); return 0; } if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_next(handle, namelen, name); if (parallel_mode == RTDB_PAR_MODE) { rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); if (status) { int len; if (me == 0)len = strlen(name)+1; rtdb_broadcast(TYPE_RTDB_LEN, MT_INT, 1, (void *) &len); rtdb_broadcast(TYPE_RTDB_NAME, MT_CHAR, len, (void *) name); } } return status; }
int rtdb_get_info(const int handle, const char *name, int *ma_type, int *nelem, char date[26]) { int status; #ifdef GAGROUPS int me = GA_Nodeid(); #endif if (!verify_parallel_access()) return 0; if (handle < 0 || handle >= MAX_RTDB) { (void) fprintf(stderr, "rtdb_get_info: handle out of range %d\n", handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == INACTIVE) { (void) fprintf(stderr, "rtdb_get_info: handle not active %d\n",handle); (void) fflush(stderr); return 0; } if (par_mode[handle] == RTDB_SEQ_MODE && parallel_mode == RTDB_PAR_MODE) { (void) fprintf(stderr, "rtdb_get_info: seq. open and par. get\n"); (void) fflush(stderr); return 0; } if (parallel_mode == RTDB_SEQ_MODE || me == 0) status = rtdb_seq_get_info(handle, name, ma_type, nelem, date); if (parallel_mode == RTDB_PAR_MODE) { rtdb_broadcast(TYPE_RTDB_STATUS, MT_INT, 1, (void *) &status); if (status) { rtdb_broadcast(TYPE_RTDB_NELEM, MT_INT, 1, (void *) nelem); rtdb_broadcast(TYPE_RTDB_TYPE, MT_INT, 1, (void *) ma_type); rtdb_broadcast(TYPE_RTDB_DATE, MT_CHAR, 26, (void *) date); } } return status; }
int main(int argc, char **argv) { int heap=300000, stack=300000; int me, nprocs; /* Step1: Initialize Message Passing library */ #ifdef MPI MPI_Init(&argc, &argv); /* initialize MPI */ #else PBEGIN_(argc, argv); /* initialize TCGMSG */ #endif /* Step2: Initialize GA */ /* ### intialize the GA library */ GA_Initialize(); /* Step3: Initialize Memory Allocator (MA) */ if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap); /* ### assign the local processor ID to the int variable "me" * ### and the total number of processors to the int variable * ### "nprocs" */ me = GA_Nodeid(); nprocs = GA_Nnodes(); if(me==0) { printf("\nUsing %d processes\n\n", nprocs); fflush(stdout); } TRANSPOSE1D(); if(me==0)printf("\nTerminating ..\n"); /* ### terminate the GA library */ GA_Terminate(); #ifdef MPI MPI_Finalize(); #else PEND_(); #endif }
/** * Shut down GOSS communication */ void gridpack::goss::GOSSUtils::terminateGOSS() { if (GA_Nodeid()==0) { char topic[128]; #ifndef GOSS_DEBUG Connection *connection; Session *session; Destination *destination; MessageProducer *producer; std::auto_ptr<ActiveMQConnectionFactory> connectionFactory(new ActiveMQConnectionFactory(p_URI)) ; // Create a Connection connection = connectionFactory->createConnection(p_username, p_passwd); connection->start(); // Create a Session session = connection->createSession(Session::AUTO_ACKNOWLEDGE); // Create the destination (Topic or Queue) sprintf(topic,"topic/goss/gridpack/close_goss"); destination = session->createTopic(topic); // Create a MessageProducer from the Session to the Topic producer = session->createProducer(destination); producer->setDeliveryMode(DeliveryMode::NON_PERSISTENT); // Send final message indicating that channel is being close std::string buf = "Closing GOSS"; std::auto_ptr<TextMessage> end_message(session->createTextMessage(buf)); producer->send(end_message.get()); if (connection) delete connection; if (session) delete session; if (destination) delete destination; if (producer) delete producer; #endif } }
void plist_init(proc_list_t *plist) { int i, ctr; const int me = GA_Nodeid(); const int nproc = GA_Nnodes(); const int default_grp = ga_pgroup_get_default_(); const char *pname = "plist_init"; /* fprintf(stderr, "%d:: 1 %s\n", me,pname); */ plist->buf = (proc_t *)malloc((nproc-1)*sizeof(proc_t)); assert(plist->buf != NULL); for(i=0, ctr=0; i<nproc; i++) { if(i != SVR) { plist->buf[ctr].procid = GA_Pgroup_absolute_id(default_grp,i); plist->buf[ctr].next = &plist->buf[ctr+1]; ctr+=1; } } /* fprintf(stderr, "%d:: 2 %s\n", me,pname); */ plist->buf[nproc-2].next = NULL; plist->idle = &plist->buf[0]; plist->nidle = nproc-1; }
// ------------------------------------------------------------- // MPIComm2GApgroup // ------------------------------------------------------------- static PetscErrorCode MPIComm2GApgroup(MPI_Comm comm, int *pGrpHandle) { PetscErrorCode ierr = 0; int nproc; int me, myGlobalRank; int *proclist; int p; ierr = MPI_Comm_size(comm, &nproc); CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &me); CHKERRQ(ierr); myGlobalRank = GA_Nodeid(); ierr = PetscMalloc(nproc*sizeof(int), &proclist); CHKERRQ(ierr); for (p = 0; p < nproc; ++p) { proclist[p] = 0; } proclist[me] = myGlobalRank; ierr = MPI_Allreduce(MPI_IN_PLACE, &proclist[0], nproc, MPI_INT, MPI_SUM, comm); CHKERRQ(ierr); *pGrpHandle = GA_Pgroup_create(&proclist[0], nproc); ierr = PetscFree(proclist); CHKERRQ(ierr); return ierr; }
int main(int argc, char **argv) { Integer heap=9000000, stack=9000000; int me, nproc; DoublePrecision time; MP_INIT(argc,argv); GA_INIT(argc,argv); /* initialize GA */ nproc = GA_Nnodes(); me = GA_Nodeid(); if(me==0) printf("Using %d processes\n\n",nproc); if (me==0) printf ("Matrix size is %d X %d\n",N,N); #ifdef USE_REGULAR if (me == 0) printf("\nUsing regular data distribution\n\n"); #endif #ifdef USE_SIMPLE_CYCLIC if (me == 0) printf("\nUsing simple block-cyclic data distribution\n\n"); #endif #ifdef USE_SCALAPACK if (me == 0) printf("\nUsing ScaLAPACK data distribution\n\n"); #endif #ifdef USE_TILED if (me == 0) printf("\nUsing tiled data distribution\n\n"); #endif if(!MA_init((Integer)MT_F_DBL, stack/nproc, heap/nproc)) GA_Error("MA_init failed bytes= %d",stack+heap); #ifdef PERMUTE { int i, *list = (int*)malloc(nproc*sizeof(int)); if(!list)GA_Error("malloc failed",nproc); for(i=0; i<nproc;i++)list[i]=nproc-1-i; GA_Register_proclist(list, nproc); free(list); } #endif if(GA_Uses_fapi())GA_Error("Program runs with C API only",1); time = MP_TIMER(); do_work(); /* printf("%d: Total Time = %lf\n", me, MP_TIMER()-time); printf("%d: GEMM Total Time = %lf\n", me, gTime); */ if(me==0)printf("\nSuccess\n\n"); GA_Terminate(); MP_FINALIZE(); return 0; }
void test(int data_type) { int me=GA_Nodeid(); int nproc = GA_Nnodes(); int g_a, g_b, g_c; int ndim = 2; int dims[2]={N,N}; int lo[2]={0,0}; int hi[2]={N-1,N-1}; int block_size[2]={NB,NB-1}; int proc_grid[2]; int i,j,l,k,m,n, ld; double alpha_dbl = 1.0, beta_dbl = 0.0; double dzero = 0.0; double ddiff; float alpha_flt = 1.0, beta_flt = 0.0; float fzero = 0.0; float fdiff; float ftmp; double dtmp; SingleComplex ctmp; DoubleComplex ztmp; DoubleComplex alpha_dcpl = {1.0, 0.0} , beta_dcpl = {0.0, 0.0}; DoubleComplex zzero = {0.0,0.0}; DoubleComplex zdiff; SingleComplex alpha_scpl = {1.0, 0.0} , beta_scpl = {0.0, 0.0}; SingleComplex czero = {0.0,0.0}; SingleComplex cdiff; void *alpha=NULL, *beta=NULL; void *abuf=NULL, *bbuf=NULL, *cbuf=NULL, *c_ptr=NULL; switch (data_type) { case C_FLOAT: alpha = (void *)&alpha_flt; beta = (void *)&beta_flt; abuf = (void*)malloc(N*N*sizeof(float)); bbuf = (void*)malloc(N*N*sizeof(float)); cbuf = (void*)malloc(N*N*sizeof(float)); if(me==0) printf("Single Precision: Testing GA_Sgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_DBL: alpha = (void *)&alpha_dbl; beta = (void *)&beta_dbl; abuf = (void*)malloc(N*N*sizeof(double)); bbuf = (void*)malloc(N*N*sizeof(double)); cbuf = (void*)malloc(N*N*sizeof(double)); if(me==0) printf("Double Precision: Testing GA_Dgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_DCPL: alpha = (void *)&alpha_dcpl; beta = (void *)&beta_dcpl; abuf = (void*)malloc(N*N*sizeof(DoubleComplex)); bbuf = (void*)malloc(N*N*sizeof(DoubleComplex)); cbuf = (void*)malloc(N*N*sizeof(DoubleComplex)); if(me==0) printf("Double Complex: Testing GA_Zgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; case C_SCPL: alpha = (void *)&alpha_scpl; beta = (void *)&beta_scpl; abuf = (void*)malloc(N*N*sizeof(SingleComplex)); bbuf = (void*)malloc(N*N*sizeof(SingleComplex)); cbuf = (void*)malloc(N*N*sizeof(SingleComplex)); if(me==0) printf("Single Complex: Testing GA_Cgemm,NGA_Matmul_patch for %d-Dimension", ndim); break; default: GA_Error("wrong data type", data_type); } if (me==0) printf("\nCreate A, B, C\n"); #ifdef USE_REGULAR g_a = NGA_Create(data_type, ndim, dims, "array A", NULL); #endif #ifdef USE_SIMPLE_CYCLIC g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); NGA_Set_block_cyclic(g_a,block_size); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif #ifdef USE_SCALAPACK g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); grid_factor(nproc,&i,&j); proc_grid[0] = i; proc_grid[1] = j; NGA_Set_block_cyclic_proc_grid(g_a,block_size,proc_grid); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif #ifdef USE_TILED g_a = NGA_Create_handle(); NGA_Set_data(g_a,ndim,dims,data_type); NGA_Set_array_name(g_a,"array A"); grid_factor(nproc,&i,&j); proc_grid[0] = i; proc_grid[1] = j; NGA_Set_tiled_proc_grid(g_a,block_size,proc_grid); if (!GA_Allocate(g_a)) { GA_Error("Failed: create: g_a",40); } #endif g_b = GA_Duplicate(g_a, "array B"); g_c = GA_Duplicate(g_a, "array C"); if(!g_a || !g_b || !g_c) GA_Error("Create failed: a, b or c",1); ld = N; if (me==0) printf("\nInitialize A\n"); /* Set up matrix A */ if (me == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)abuf)[i*N+j] = (float)(i*N+j); break; case C_DBL: ((double*)abuf)[i*N+j] = (double)(i*N+j); break; case C_DCPL: ((DoubleComplex*)abuf)[i*N+j].real = (double)(i*N+j); ((DoubleComplex*)abuf)[i*N+j].imag = 1.0; break; case C_SCPL: ((SingleComplex*)abuf)[i*N+j].real = (float)(i*N+j); ((SingleComplex*)abuf)[i*N+j].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_a,lo,hi,abuf,&ld); } GA_Sync(); if (me==0) printf("\nInitialize B\n"); /* Set up matrix B */ if (me == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)bbuf)[i*N+j] = (float)(j*N+i); break; case C_DBL: ((double*)bbuf)[i*N+j] = (double)(j*N+i); break; case C_DCPL: ((DoubleComplex*)bbuf)[i*N+j].real = (double)(j*N+i); ((DoubleComplex*)bbuf)[i*N+j].imag = 1.0; break; case C_SCPL: ((SingleComplex*)bbuf)[i*N+j].real = (float)(j*N+i); ((SingleComplex*)bbuf)[i*N+j].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_b,lo,hi,bbuf,&ld); } GA_Sync(); if (me==0) printf("\nPerform matrix multiply\n"); switch (data_type) { case C_FLOAT: NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DBL: NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_SCPL: NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DCPL: NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; default: GA_Error("wrong data type", data_type); } GA_Sync(); #if 0 if (me==0) printf("\nCheck answer\n"); /* GA_Print(g_a); if (me == 0) printf("\n\n\n\n"); GA_Print(g_b); if (me == 0) printf("\n\n\n\n"); GA_Print(g_c); */ /* Check answer */ NGA_Get(g_a,lo,hi,abuf,&ld); NGA_Get(g_b,lo,hi,bbuf,&ld); for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N+j] = fzero; break; case C_DBL: ((double*)cbuf)[i*N+j] = dzero; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N+j] = zzero; break; case C_SCPL: ((SingleComplex*)cbuf)[i*N+j] = czero; break; default: GA_Error("wrong data type", data_type); } for (k=0; k<N; k++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N+j] += ((float*)abuf)[i*N+k] *((float*)bbuf)[k*N+j]; break; case C_DBL: ((double*)cbuf)[i*N+j] += ((double*)abuf)[i*N+k] *((double*)bbuf)[k*N+j]; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N+j].real += (((DoubleComplex*)abuf)[i*N+k].real *((DoubleComplex*)bbuf)[k*N+j].real -(((DoubleComplex*)abuf)[i*N+k].imag *((DoubleComplex*)bbuf)[k*N+j].imag)); ((DoubleComplex*)cbuf)[i*N+j].imag += (((DoubleComplex*)abuf)[i*N+k].real *((DoubleComplex*)bbuf)[k*N+j].imag +(((DoubleComplex*)abuf)[i*N+k].imag *((DoubleComplex*)bbuf)[k*N+j].real)); break; case C_SCPL: ((SingleComplex*)cbuf)[i*N+j].real += (((SingleComplex*)abuf)[i*N+k].real *((SingleComplex*)bbuf)[k*N+j].real -(((SingleComplex*)abuf)[i*N+k].imag *((SingleComplex*)bbuf)[k*N+j].imag)); ((SingleComplex*)cbuf)[i*N+j].imag += (((SingleComplex*)abuf)[i*N+k].real *((SingleComplex*)bbuf)[k*N+j].imag +(((SingleComplex*)abuf)[i*N+k].imag *((SingleComplex*)bbuf)[k*N+j].real)); break; default: GA_Error("wrong data type", data_type); } } } } GA_Sync(); if (me == 0) { NGA_Get(g_c,lo,hi,abuf,&ld); for (i=0; i<N; i++) { for (j=0; j<N; j++) { switch (data_type) { case C_FLOAT: fdiff = ((float*)abuf)[i*N+j]-((float*)cbuf)[i*N+j]; if (((float*)abuf)[i*N+j] != 0.0) { fdiff /= ((float*)abuf)[i*N+j]; } if (fabs(fdiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j, ((float*)abuf)[i*N+j],((float*)cbuf)[i*N+j]); } break; case C_DBL: ddiff = ((double*)abuf)[i*N+j]-((double*)cbuf)[i*N+j]; if (((double*)abuf)[i*N+j] != 0.0) { ddiff /= ((double*)abuf)[i*N+j]; } if (fabs(ddiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j, ((double*)abuf)[i*N+j],((double*)cbuf)[i*N+j]); } break; case C_DCPL: zdiff.real = ((DoubleComplex*)abuf)[i*N+j].real -((DoubleComplex*)cbuf)[i*N+j].real; zdiff.imag = ((DoubleComplex*)abuf)[i*N+j].imag -((DoubleComplex*)cbuf)[i*N+j].imag; if (((DoubleComplex*)abuf)[i*N+j].real != 0.0 || ((DoubleComplex*)abuf)[i*N+j].imag != 0.0) { ztmp = ((DoubleComplex*)abuf)[i*N+j]; ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if (fabs(ddiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j, ((DoubleComplex*)abuf)[i*N+j].real, ((DoubleComplex*)abuf)[i*N+j].imag, ((DoubleComplex*)cbuf)[i*N+j].real, ((DoubleComplex*)cbuf)[i*N+j].imag); } break; case C_SCPL: cdiff.real = ((SingleComplex*)abuf)[i*N+j].real -((SingleComplex*)cbuf)[i*N+j].real; cdiff.imag = ((SingleComplex*)abuf)[i*N+j].imag -((SingleComplex*)cbuf)[i*N+j].imag; if (((SingleComplex*)abuf)[i*N+j].real != 0.0 || ((SingleComplex*)abuf)[i*N+j].imag != 0.0) { ctmp = ((SingleComplex*)abuf)[i*N+j]; fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if (fabs(fdiff) > TOLERANCE) { printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j, ((SingleComplex*)abuf)[i*N+j].real, ((SingleComplex*)abuf)[i*N+j].imag, ((SingleComplex*)cbuf)[i*N+j].real, ((SingleComplex*)cbuf)[i*N+j].imag); } break; default: GA_Error("wrong data type", data_type); } } } } GA_Sync(); /* copy cbuf back to g_a */ if (me == 0) { NGA_Put(g_a,lo,hi,cbuf,&ld); } GA_Sync(); /* Get norm of g_a */ switch (data_type) { case C_FLOAT: ftmp = GA_Fdot(g_a,g_a); break; case C_DBL: dtmp = GA_Ddot(g_a,g_a); break; case C_DCPL: ztmp = GA_Zdot(g_a,g_a); break; case C_SCPL: ctmp = GA_Cdot(g_a,g_a); break; default: GA_Error("wrong data type", data_type); } /* subtract C from A and put the results in B */ beta_flt = -1.0; beta_dbl = -1.0; beta_scpl.real = -1.0; beta_dcpl.real = -1.0; GA_Zero(g_b); GA_Add(alpha,g_a,beta,g_c,g_b); /* evaluate the norm of the difference between the two matrices */ switch (data_type) { case C_FLOAT: fdiff = GA_Fdot(g_b, g_b); if (ftmp != 0.0) { fdiff /= ftmp; } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE); GA_Error("GA_Sgemm Failed", 1); } else if (me == 0) { printf("\nGA_Sgemm OK\n\n"); } break; case C_DBL: ddiff = GA_Ddot(g_b, g_b); if (dtmp != 0.0) { ddiff /= dtmp; } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE); GA_Error("GA_Dgemm Failed", 1); } else if (me == 0) { printf("\nGA_Dgemm OK\n\n"); } break; case C_DCPL: zdiff = GA_Zdot(g_b, g_b); if (ztmp.real != 0.0 || ztmp.imag != 0.0) { ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE); GA_Error("GA_Zgemm Failed", 1); } else if (me == 0) { printf("\nGA_Zgemm OK\n\n"); } break; case C_SCPL: cdiff = GA_Cdot(g_b, g_b); if (ctmp.real != 0.0 || ctmp.imag != 0.0) { fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE); GA_Error("GA_Cgemm Failed", 1); } else if (me == 0) { printf("\nGA_Cgemm OK\n\n"); } break; default: GA_Error("wrong data type", data_type); } #endif free(abuf); free(bbuf); free(cbuf); switch (data_type) { case C_FLOAT: abuf = (void*)malloc(N*N*sizeof(float)/4); bbuf = (void*)malloc(N*N*sizeof(float)/4); cbuf = (void*)malloc(N*N*sizeof(float)/4); break; case C_DBL: abuf = (void*)malloc(N*N*sizeof(double)/4); bbuf = (void*)malloc(N*N*sizeof(double)/4); cbuf = (void*)malloc(N*N*sizeof(double)/4); break; case C_DCPL: abuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); bbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); cbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4); break; case C_SCPL: abuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); bbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); cbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4); break; default: GA_Error("wrong data type", data_type); } /* Test multiply on a fraction of matrix. Start by reinitializing * A and B */ GA_Zero(g_a); GA_Zero(g_b); GA_Zero(g_c); if (me==0) printf("\nTest patch multiply\n"); lo[0] = N/4; lo[1] = N/4; hi[0] = 3*N/4-1; hi[1] = 3*N/4-1; ld = N/2; /* Set up matrix A */ if (me==0) printf("\nInitialize A\n"); if (me == 0) { for (i=N/4; i<3*N/4; i++) { for (j=N/4; j<3*N/4; j++) { switch (data_type) { case C_FLOAT: ((float*)abuf)[(i-N/4)*N/2+(j-N/4)] = (float)(i*N+j); break; case C_DBL: ((double*)abuf)[(i-N/4)*N/2+(j-N/4)] = (double)(i*N+j); break; case C_DCPL: ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(i*N+j); ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; case C_SCPL: ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(i*N+j); ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_a,lo,hi,abuf,&ld); } GA_Sync(); if (me==0) printf("\nInitialize B\n"); /* Set up matrix B */ if (me == 0) { for (i=N/4; i<3*N/4; i++) { for (j=N/4; j<3*N/4; j++) { switch (data_type) { case C_FLOAT: ((float*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (float)(j*N+i); break; case C_DBL: ((double*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (double)(j*N+i); break; case C_DCPL: ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(j*N+i); ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; case C_SCPL: ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(j*N+i); ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0; break; default: GA_Error("wrong data type", data_type); } } } NGA_Put(g_b,lo,hi,bbuf,&ld); } GA_Sync(); beta_flt = 0.0; beta_dbl = 0.0; beta_scpl.real = 0.0; beta_dcpl.real = 0.0; if (me==0) printf("\nPerform matrix multiply on sub-blocks\n"); switch (data_type) { case C_FLOAT: NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DBL: NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_SCPL: NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; case C_DCPL: NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi, g_b,lo,hi,g_c,lo,hi); break; default: GA_Error("wrong data type", data_type); } GA_Sync(); #if 0 if (0) { /* if (data_type != C_SCPL && data_type != C_DCPL) { */ if (me==0) printf("\nCheck answer\n"); /* Multiply buffers by hand */ if (me == 0) { for (i=0; i<N/2; i++) { for (j=0; j<N/2; j++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N/2+j] = fzero; break; case C_DBL: ((double*)cbuf)[i*N/2+j] = dzero; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N/2+j] = zzero; break; case C_SCPL: ((SingleComplex*)cbuf)[i*N/2+j] = czero; break; default: GA_Error("wrong data type", data_type); } for (k=0; k<N/2; k++) { switch (data_type) { case C_FLOAT: ((float*)cbuf)[i*N/2+j] += ((float*)abuf)[i*N/2+k] *((float*)bbuf)[k*N/2+j]; break; case C_DBL: ((double*)cbuf)[i*N/2+j] += ((double*)abuf)[i*N/2+k] *((double*)bbuf)[k*N/2+j]; break; case C_DCPL: ((DoubleComplex*)cbuf)[i*N/2+j].real += (((DoubleComplex*)abuf)[i*N/2+k].real *((DoubleComplex*)bbuf)[k*N/2+j].real -(((DoubleComplex*)abuf)[i*N/2+k].imag *((DoubleComplex*)bbuf)[k*N/2+j].imag)); ((DoubleComplex*)cbuf)[i*N/2+j].imag += (((DoubleComplex*)abuf)[i*N/2+k].real *((DoubleComplex*)bbuf)[k*N/2+j].imag +(((DoubleComplex*)abuf)[i*N/2+k].imag *((DoubleComplex*)bbuf)[k*N/2+j].real)); break; case C_SCPL: ((SingleComplex*)cbuf)[i*N/2+j].real += (((SingleComplex*)abuf)[i*N/2+k].real *((SingleComplex*)bbuf)[k*N/2+j].real -(((SingleComplex*)abuf)[i*N/2+k].imag *((SingleComplex*)bbuf)[k*N/2+j].imag)); ((SingleComplex*)cbuf)[i*N/2+j].imag += (((SingleComplex*)abuf)[i*N/2+k].real *((SingleComplex*)bbuf)[k*N/2+j].imag +(((SingleComplex*)abuf)[i*N/2+k].imag *((SingleComplex*)bbuf)[k*N/2+j].real)); break; default: GA_Error("wrong data type", data_type); } } } } NGA_Put(g_a,lo,hi,cbuf,&ld); } if (me == 0) printf("\n\n\n\n"); /* Get norm of g_a */ switch (data_type) { case C_FLOAT: ftmp = NGA_Fdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_DBL: dtmp = NGA_Ddot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_DCPL: ztmp = NGA_Zdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; case C_SCPL: ctmp = NGA_Cdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi); break; default: GA_Error("wrong data type", data_type); } /* subtract C from A and put the results in B */ beta_flt = -1.0; beta_dbl = -1.0; beta_scpl.real = -1.0; beta_dcpl.real = -1.0; NGA_Zero_patch(g_b,lo,hi); NGA_Add_patch(alpha,g_a,lo,hi,beta,g_c,lo,hi,g_b,lo,hi); /* evaluate the norm of the difference between the two matrices */ switch (data_type) { case C_FLOAT: fdiff = NGA_Fdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ftmp != 0.0) { fdiff /= ftmp; } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE); GA_Error("GA_Sgemm Failed", 1); } else if (me == 0) { printf("\nGA_Sgemm OK\n\n"); } break; case C_DBL: ddiff = NGA_Ddot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (dtmp != 0.0) { ddiff /= dtmp; } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE); GA_Error("GA_Dgemm Failed", 1); } else if (me == 0) { printf("\nGA_Dgemm OK\n\n"); } break; case C_DCPL: zdiff = NGA_Zdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ztmp.real != 0.0 || ztmp.imag != 0.0) { ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag) /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag)); } else { ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag); } if(fabs(ddiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE); GA_Error("GA_Zgemm Failed", 1); } else if (me == 0) { printf("\nGA_Zgemm OK\n\n"); } break; case C_SCPL: cdiff = NGA_Cdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi); if (ctmp.real != 0.0 || ctmp.imag != 0.0) { fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag) /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag)); } else { fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag); } if(fabs(fdiff) > TOLERANCE) { printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE); GA_Error("GA_Cgemm Failed", 1); } else if (me == 0) { printf("\nGA_Cgemm OK\n\n"); } break; default: GA_Error("wrong data type", data_type); } } #endif free(abuf); free(bbuf); free(cbuf); GA_Destroy(g_a); GA_Destroy(g_b); GA_Destroy(g_c); }