/**************************************************** * write_binary_contraction_data ****************************************************/ int write_binary_contraction_data_3d(double * const s, LimeWriter * limewriter, const int prec, const int N, DML_Checksum * ans) { #ifdef MPI fprintf(stderr, "[write_binary_contraction_data_3d] No mpi version.\n"); return(1); #else int x, y, z, i=0, mu, status=0; double *tmp; float *tmp2; int proc_coords[4], tloc,xloc,yloc,zloc, proc_id; n_uint64_t bytes; DML_SiteRank rank; int words_bigendian = big_endian(); unsigned int VOL3 = LX*LY*LZ; DML_checksum_init(ans); tmp = (double*)malloc(2*N*sizeof(double)); tmp2 = (float*)malloc(2*N*sizeof(float)); if(prec == 32) bytes = (n_uint64_t)2*N*sizeof(float); else bytes = (n_uint64_t)2*N*sizeof(double); if(g_cart_id==0) { for(x = 0; x < LX; x++) { for(y = 0; y < LY; y++) { for(z = 0; z < LZ; z++) { /* Rank should be computed by proc 0 only */ rank = (DML_SiteRank) (( x * LY + y)*LZ + z); for(mu=0; mu<N; mu++) { i = _GWI(mu, g_ipt[0][x][y][z], VOL3); if(!words_bigendian) { if(prec == 32) { byte_swap_assign_double2single( (tmp2+2*mu), (s + i), 2); } else { byte_swap_assign( (tmp+2*mu), (s + i), 2); } } else { if(prec == 32) { double2single((float*)(tmp2+2*mu), (s + i), 2); } else { tmp[2*mu ] = s[i ]; tmp[2*mu+1] = s[i+1]; } } } if(prec == 32) { DML_checksum_accum(ans,rank,(char *) tmp2,2*N*sizeof(float)); status = limeWriteRecordData((void*)tmp2, &bytes, limewriter); } else { status = limeWriteRecordData((void*)tmp, &bytes, limewriter); DML_checksum_accum(ans,rank,(char *) tmp, 2*N*sizeof(double)); } }}} } free(tmp2); free(tmp); return(0); #endif }
/*********************************************************************** * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach * * This file is part of tmLQCD. * * tmLQCD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * tmLQCD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with tmLQCD. If not, see <http://www.gnu.org/licenses/>. ***********************************************************************/ #include "ranlux.ih" #ifdef HAVE_LIBLEMON // this cannot work like this... int write_binary_ranlux_data(int * const s, LemonWriter * lemonwriter, DML_Checksum *checksum) { // int x, y, z, t, i = 0, xG, yG, zG, tG, status = 0; // int latticeSize[] = {1, g_nproc_x*LX, g_nproc_y*LY, g_nproc_z*LZ}; // int scidacMapping[] = {0, 3, 2, 1}; // unsigned long bufoffset = 0; // char *filebuffer = NULL; // uint64_t bytes; // DML_SiteRank rank; // double tick = 0, tock = 0; // char measure[64]; // // DML_checksum_init(checksum); // bytes = (uint64_t)sizeof(su3_vector); // if (prec == 32) { // bytes /= 2; // } // if((void*)(filebuffer = malloc(VOLUME * bytes)) == NULL) { // fprintf (stderr, "malloc errno in write_binary_su3_vector_data_parallel: %d\n", errno); // fflush(stderr); // errno = 0; // /* do we need to abort here? */ // return 1; // } // // tG = g_proc_coords[0]*T; // zG = g_proc_coords[3]*LZ; // yG = g_proc_coords[2]*LY; // xG = g_proc_coords[1]*LX; // for(z = 0; z < LZ; z++) { // for(y = 0; y < LY; y++) { // for(x = 0; x < LX; x++) { // rank = (DML_SiteRank) (((zG + z)*L + yG + y)*L + xG + x); // i = g_ipt[t][x][y][z]; // // if (prec == 32) // be_to_cpu_assign_double2single((float*)(filebuffer + bufoffset), (double*)(s + i), sizeof(su3_vector) / 8); // else // be_to_cpu_assign((double*)(filebuffer + bufoffset), (double*)(s + i), sizeof(su3_vector) / 8); // DML_checksum_accum(checksum, rank, (char*) filebuffer + bufoffset, bytes); // bufoffset += bytes; // } // } // } //} // //if (g_debug_level > 0) { // MPI_Barrier(g_cart_grid); // tick = MPI_Wtime(); //} // //status = lemonWriteLatticeParallelMapped(lemonwriter, filebuffer, bytes, latticeSize, scidacMapping); // //if (status != LEMON_SUCCESS) //{ // free(filebuffer); // fprintf(stderr, "LEMON write error occurred with status = %d, while in write_binary_su3_vector_data_l (su3_vector_write_binary.c)!\n", status); // return(-2); //} // //if (g_debug_level > 0) { // MPI_Barrier(g_cart_grid); // tock = MPI_Wtime(); // // if (g_cart_id == 0) { // engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes, "b"); // fprintf(stdout, "# Time spent writing %s ", measure); // engineering(measure, tock - tick, "s"); // fprintf(stdout, "was %s.\n", measure); // engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (tock - tick), "b/s"); // fprintf(stdout, "# Writing speed: %s", measure); // engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (g_nproc * (tock - tick)), "b/s"); // fprintf(stdout, " (%s per MPI process).\n", measure); // fflush(stdout); // } //} // //lemonWriterCloseRecord(lemonwriter); // //DML_global_xor(&checksum->suma); //DML_global_xor(&checksum->sumb); // //free(filebuffer); return 0; } #else /* HAVE_LIBLEMON */ int write_binary_ranlux_data(int * const state, LimeWriter * limewriter, DML_Checksum * checksum, int const length_total) { int i = 0, tmp, status = 0; n_uint64_t bytes = sizeof(int); if (g_cart_id == 0) { for (i = 0; i < length_total; i++) { tmp = state[i]; DML_checksum_accum(checksum, (uint32_t) i, (char*) &tmp, sizeof(int)); status = limeWriteRecordData((int *) &tmp, &bytes, limewriter); if (status < 0) { fprintf(stderr, "LIME write error occurred with status = %d, while in write_binary_ranlux_data (ranlux_write_binary.c)!\n", status); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(500); } } } return (0); }
int write_binary_gauge_data(LemonWriter * lemonwriter, const int prec, DML_Checksum * checksum) { int x, xG, y, yG, z, zG, t, tG, status = 0; su3 tmp3[4]; int latticeSize[] = {T_global, g_nproc_x*LX, g_nproc_y*LY, g_nproc_z*LZ}; int scidacMapping[] = {0, 3, 2, 1}; unsigned long bufoffset; char * filebuffer = NULL; uint64_t bytes; double tick = 0, tock = 0; char measure[64]; DML_SiteRank rank; DML_checksum_init(checksum); bytes = (uint64_t)sizeof(su3) * (prec == 32 ? 2 : 4); bufoffset = 0; if((void*)(filebuffer = (char*)malloc(bytes * VOLUME)) == NULL) { fprintf (stderr, "malloc errno in write_binary_gauge_data_parallel: %d\n",errno); fflush(stderr); errno = 0; return 1; } if (g_debug_level > 0) { MPI_Barrier(g_cart_grid); tick = MPI_Wtime(); } tG = g_proc_coords[0]*T; zG = g_proc_coords[3]*LZ; yG = g_proc_coords[2]*LY; xG = g_proc_coords[1]*LX; for(t = 0; t < T; t++) { for(z = 0; z < LZ; z++) { for(y = 0; y < LY; y++) { for(x = 0; x < LX; x++) { rank = (DML_SiteRank) ((((tG + t)*L + zG + z)*L + yG + y)*L + xG + x); memcpy(&tmp3[0], &g_gauge_field[ g_ipt[t][x][y][z] ][1], sizeof(su3)); memcpy(&tmp3[1], &g_gauge_field[ g_ipt[t][x][y][z] ][2], sizeof(su3)); memcpy(&tmp3[2], &g_gauge_field[ g_ipt[t][x][y][z] ][3], sizeof(su3)); memcpy(&tmp3[3], &g_gauge_field[ g_ipt[t][x][y][z] ][0], sizeof(su3)); if(prec == 32) be_to_cpu_assign_double2single(filebuffer + bufoffset, tmp3, 4*sizeof(su3)/8); else be_to_cpu_assign(filebuffer + bufoffset, tmp3, 4*sizeof(su3)/8); DML_checksum_accum(checksum, rank, (char*) filebuffer + bufoffset, bytes); bufoffset += bytes; } } } } status = lemonWriteLatticeParallelMapped(lemonwriter, filebuffer, bytes, latticeSize, scidacMapping); if (status != LEMON_SUCCESS) { free(filebuffer); fprintf(stderr, "LEMON write error occurred with status = %d, while writing in gauge_write_binary.c!\n", status); return(-2); } if (g_debug_level > 0) { MPI_Barrier(g_cart_grid); tock = MPI_Wtime(); if (g_cart_id == 0) { engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes, "b"); fprintf(stdout, "# Time spent writing %s ", measure); engineering(measure, tock - tick, "s"); fprintf(stdout, "was %s.\n", measure); engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (tock - tick), "b/s"); fprintf(stdout, "# Writing speed: %s", measure); engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (g_nproc * (tock - tick)), "b/s"); fprintf(stdout, " (%s per MPI process).\n", measure); fflush(stdout); } } lemonWriterCloseRecord(lemonwriter); free(filebuffer); status = DML_global_xor(&checksum->suma); if (status != MPI_SUCCESS) { fprintf(stderr, "DML Checksum accumulation error occurred with status = %d, while writing in gauge_write_binary.c!\n", status); return(-2); } status = DML_global_xor(&checksum->sumb); if (status != MPI_SUCCESS) { fprintf(stderr, "DML Checksum accumulation error occurred with status = %d, while writing in gauge_write_binary.c!\n", status); return(-2); } return 0; }
int write_binary_gauge_data(LimeWriter * limewriter, const int prec, DML_Checksum * checksum) { int x, X, y, Y, z, Z, tt, t0, tag=0, id=0, status=0; int latticeSize[] = {T_global, g_nproc_x*LX, g_nproc_y*LY, g_nproc_z*LZ}; su3 tmp[4]; su3 tmp3[4]; float tmp2[72]; int coords[4]; n_uint64_t bytes; DML_SiteRank rank; double tick = 0, tock = 0; char measure[64]; #ifdef TM_USE_MPI MPI_Status mpi_status; #endif DML_checksum_init(checksum); if (g_debug_level > 0) { #ifdef TM_USE_MPI MPI_Barrier(g_cart_grid); #endif tick = gettime(); } if(prec == 32) bytes = (n_uint64_t)2*sizeof(su3); else bytes = (n_uint64_t)4*sizeof(su3); for(t0 = 0; t0 < T*g_nproc_t; t0++) { tt = t0 - g_proc_coords[0]*T; coords[0] = t0 / T; for(z = 0; z < LZ*g_nproc_z; z++) { Z = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; for(y = 0; y < LY*g_nproc_y; y++) { tag = 0; Y = y - g_proc_coords[2]*LY; coords[2] = y / LY; for(x = 0; x < LX*g_nproc_x; x++) { X = x - g_proc_coords[1]*LX; coords[1] = x / LX; #ifdef TM_USE_MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif if(g_cart_id == 0) { /* Rank should be computed by proc 0 only */ rank = (DML_SiteRank) (((t0*LZ*g_nproc_z + z)*LY*g_nproc_y + y)*LX*g_nproc_x + x); if(g_cart_id == id) { memcpy(&tmp3[0], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][1], sizeof(su3)); memcpy(&tmp3[1], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][2], sizeof(su3)); memcpy(&tmp3[2], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][3], sizeof(su3)); memcpy(&tmp3[3], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][0], sizeof(su3)); if(prec == 32) { be_to_cpu_assign_double2single(tmp2, tmp3, 4*sizeof(su3)/8); DML_checksum_accum(checksum, rank, (char*) tmp2, 4*sizeof(su3)/2); status = limeWriteRecordData((void*)&tmp2, &bytes, limewriter); } else { be_to_cpu_assign(tmp, tmp3, 4*sizeof(su3)/8); DML_checksum_accum(checksum, rank, (char*) tmp, 4*sizeof(su3)); status = limeWriteRecordData((void*)&tmp, &bytes, limewriter); } } #ifdef TM_USE_MPI else { if(prec == 32) { MPI_Recv(tmp2, 4*sizeof(su3)/8, MPI_FLOAT, id, tag, g_cart_grid, &mpi_status); DML_checksum_accum(checksum, rank, (char*) tmp2, 4*sizeof(su3)/2); status = limeWriteRecordData((void*)&tmp2, &bytes, limewriter); } else { MPI_Recv(tmp, 4*sizeof(su3)/8, MPI_DOUBLE, id, tag, g_cart_grid, &mpi_status); DML_checksum_accum(checksum, rank, (char*) tmp, 4*sizeof(su3)); status = limeWriteRecordData((void*)&tmp, &bytes, limewriter); } } #endif if(status < 0 ) { fprintf(stderr, "LIME write error occurred with status = %d, while writing in gauge_write_binary.c!\n", status); fprintf(stderr, "x %d, y %d, z %d, t %d (%d,%d,%d,%d)\n",x,y,z,tt,X,Y,Z,tt); fprintf(stderr, "id = %d, bytes = %lu, size = %d\n", g_cart_id, bytes, (int)(4*sizeof(su3)/8)); #ifdef TM_USE_MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(500); } } #ifdef TM_USE_MPI else { if(g_cart_id == id){ memcpy(&tmp3[0], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][1], sizeof(su3)); memcpy(&tmp3[1], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][2], sizeof(su3)); memcpy(&tmp3[2], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][3], sizeof(su3)); memcpy(&tmp3[3], &g_gauge_field[ g_ipt[tt][X][Y][Z] ][0], sizeof(su3)); if(prec == 32) { be_to_cpu_assign_double2single(tmp2, tmp3, 4*sizeof(su3)/8); MPI_Send((void*) tmp2, 4*sizeof(su3)/8, MPI_FLOAT, 0, tag, g_cart_grid); } else { be_to_cpu_assign(tmp, tmp3, 4*sizeof(su3)/8); MPI_Send((void*) tmp, 4*sizeof(su3)/8, MPI_DOUBLE, 0, tag, g_cart_grid); } } } #endif tag++; } #ifdef TM_USE_MPI MPI_Barrier(g_cart_grid); #endif } } } if (g_debug_level > 0) { #ifdef TM_USE_MPI MPI_Barrier(g_cart_grid); #endif tock = gettime(); if (g_cart_id == 0) { engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes, "b"); fprintf(stdout, "# Time spent writing %s ", measure); engineering(measure, tock-tick, "s"); fprintf(stdout, "was %s.\n", measure); engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (tock-tick), "b/s"); fprintf(stdout, "# Writing speed: %s", measure); engineering(measure, latticeSize[0] * latticeSize[1] * latticeSize[2] * latticeSize[3] * bytes / (g_nproc * (tock-tick)), "b/s"); fprintf(stdout, " (%s per MPI process).\n", measure); } } return(0); }
int read_binary_contraction_data_3d(double * const s, LimeReader * limereader, const int prec, const int N, DML_Checksum *ans) { #ifdef MPI fprintf(stderr, "[read_binary_contraction_data_3d] No mpi version.\n"); return(1); #else int status=0, mu; n_uint64_t bytes, ix; double *tmp; DML_SiteRank rank; float *tmp2; int x, y, z; int words_bigendian = big_endian(); unsigned int VOL3 = LX * LY * LZ; DML_checksum_init(ans); rank = (DML_SiteRank) 0; if( (tmp = (double*)malloc(2*N*sizeof(double))) == (double*)NULL ) { exit(500); } if( (tmp2 = (float*)malloc(2*N*sizeof(float))) == (float*)NULL ) { exit(501); } if(prec == 32) bytes = 2*N*sizeof(float); else bytes = 2*N*sizeof(double); for(x = 0; x < LX; x++){ for(y = 0; y < LY; y++){ for(z = 0; z < LZ; z++){ ix = g_ipt[0][x][y][z]; rank = (DML_SiteRank) (( LXstart + x)*(LY*g_nproc_y) + LYstart + y)*LZ + z; if(prec == 32) { status = limeReaderReadData(tmp2, &bytes, limereader); DML_checksum_accum(ans,rank,(char *) tmp2, bytes); } else { status = limeReaderReadData(tmp, &bytes, limereader); DML_checksum_accum(ans,rank,(char *) tmp, bytes); } for(mu=0; mu<N; mu++) { if(!words_bigendian) { if(prec == 32) { byte_swap_assign_single2double(s + _GWI(mu,ix,VOL3), (float*)(tmp2+2*mu), 2); } else { byte_swap_assign(s + _GWI(mu,ix,VOL3), (float*)(tmp+2*mu), 2); } } else { // words_bigendian true if(prec == 32) { single2double(s + _GWI(mu,ix,VOL3), (float*)(tmp2+2*mu), 2); } else { s[_GWI(mu, ix,VOL3) ] = tmp[2*mu ]; s[_GWI(mu, ix,VOL3)+1] = tmp[2*mu+1]; } } } if(status < 0 && status != LIME_EOR) { return(-1); } }}} if(g_cart_id == 0) printf("\n# [read_binary_contraction_data] The final checksum is %#lx %#lx\n", (*ans).suma, (*ans).sumb); free(tmp2); free(tmp); return(0); #endif }