void testSimpleReadWrite() { loadStore(); unsigned char *b = (unsigned char *) malloc( sizeof(unsigned char) * 1024 * 1024); f_write(17, b, 5 * 1024, 34); c_read(17, b, 5 * 1024, 34); CPPUNIT_ASSERT(sbdi_fsync(sbdi, SIV_KEYS) == SBDI_SUCCESS); closeStore(); loadStore(); c_read(17, b, 5 * 1024, 34); closeStore(); deleteStore(); free(b); }
void testRandomAccess() { loadStore(); const int BLK_SIZE = 1 * 1024; const int BLKS = 2048; unsigned char *b = (unsigned char *) malloc( sizeof(unsigned char) * BLK_SIZE); CPPUNIT_ASSERT(b); //==== Sequential Setup Phase ==== for (int i = 0; i < BLKS; ++i) { f_write(i % 256, b, BLK_SIZE, i * BLK_SIZE); } for (int i = 0; i < BLKS; ++i) { c_read(i % 256, b, BLK_SIZE, i * BLK_SIZE); } //==== Random Write Test ==== std::vector<int> idxs; for (int i = 0; i < BLKS; ++i) { idxs.push_back(i); } std::random_shuffle(idxs.begin(), idxs.end()); for (int i = 0; i < BLKS; ++i) { f_write(i % 256, b, BLK_SIZE, idxs.at(i) * BLK_SIZE); } for (int i = 0; i < BLKS; ++i) { c_read(i % 256, b, BLK_SIZE, idxs.at(i) * BLK_SIZE); } closeStore(); deleteStore(); free(b); }
void testParameterChecks() { loadStore(); ssize_t rd = 0; off_t of = 0; uint8_t d[2] = { 42, 23 }; ASS_ERR_ILL_PAR(sbdi_read(NULL, NULL, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_read(&rd, NULL, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_read(&rd, sbdi, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_read(&rd, sbdi, d, SBDI_SIZE_MAX + 1)); ASS_SUC(sbdi_read(&rd, sbdi, d, 1)); CPPUNIT_ASSERT(rd == 0 && d[0] == 42); ASS_ERR_ILL_PAR(sbdi_write(NULL, NULL, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_write(&rd, NULL, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_write(&rd, sbdi, NULL, SBDI_SIZE_MAX + 1)); ASS_ERR_ILL_PAR(sbdi_write(&rd, sbdi, d, SBDI_SIZE_MAX + 1)); ASS_SUC(sbdi_write(&rd, sbdi, d, 1)); ASS_SUC(sbdi_lseek(&of, sbdi, 0, SBDI_SEEK_SET)); ASS_SUC(sbdi_read(&rd, sbdi, d, 1)); CPPUNIT_ASSERT(rd == 1 && d[0] == 42); ASS_ERR_ILL_PAR(sbdi_lseek(NULL, NULL, SBDI_SIZE_MAX + 1, SBDI_SEEK_SET)); ASS_ERR_ILL_PAR(sbdi_lseek(&of, NULL, SBDI_SIZE_MAX + 1, SBDI_SEEK_SET)); ASS_ERR_ILL_PAR(sbdi_lseek(NULL, sbdi, SBDI_SIZE_MAX + 1, SBDI_SEEK_SET)); ASS_ERR_ILL_PAR(sbdi_lseek(&of, sbdi, SBDI_SIZE_MAX + 1, SBDI_SEEK_SET)); ASS_SUC(sbdi_lseek(&of, sbdi, SBDI_SIZE_MAX-1, SBDI_SEEK_SET)); // NOTE: This check fails on 32 bit systems, because the overflow detection is triggered ASS_SUC(sbdi_write(&rd, sbdi, d, 1)); ASS_SUC(sbdi_lseek(&of, sbdi, SBDI_SIZE_MAX-1, SBDI_SEEK_SET)); ASS_SUC(sbdi_read(&rd, sbdi, d, 1)); CPPUNIT_ASSERT(rd == 1 && d[0] == 42); ASS_SUC(sbdi_lseek(&of, sbdi, SBDI_SIZE_MAX-1, SBDI_SEEK_SET)); ASS_ERR_ILL_PAR(sbdi_lseek(&of, sbdi, 1, SBDI_SEEK_CUR)); ASS_SUC(sbdi_lseek(&of, sbdi, 0, SBDI_SEEK_CUR)); ASS_SUC(sbdi_write(&rd, sbdi, d, 2)); CPPUNIT_ASSERT(rd == 1); ASS_ERR_ILL_PAR(sbdi_lseek(&of, sbdi, 1, SBDI_SEEK_END)); ASS_ERR_ILL_PAR(sbdi_lseek(&of, sbdi, 0, SBDI_SEEK_END)); closeStore(); deleteStore(); }
void testReadWriteUpdate() { loadStore(); closeStore(); deleteStore(); }
/* Test program * created 23/09/2014 * author Alex Bombrun * * icc -O1 -o eigen.exe lapackReadStore.c mpiutil.c normals.c matrixBlockStore.c -mkl * ./eigen.exe 4 4 * */ int main(int argc, char **argv) { FILE* store; FILE* scaStore; int N , M; int i, j; int n_blocks; int scalapack_size; int NB, MB; int i_block, j_block; int dim[4]; double * mat; // local matrix block use for reading double * matK; // the kernel matrix ng x 6 int t, t_block; const char* profileG_file_name= "./data/NormalsG/profile.txt"; const char* store_location = "./data/ReducedNormals"; const char* scaStore_location ="./data/CholeskyReducedNormals"; const char* kernel_file_name ="./data/kernel.txt"; int mp; // number of rows in the processor grid int mla; // number of rows in the local array int mb; // number of rows in a block int np; // number of columns in the processor grid int nla; // number of columns in the local array int nb; // number of columns in a block int mype,npe; // rank and total number of process int idescal[9]; // matrix descriptors double *la; // matrix values: al is the local array int idescaal[9]; double *laa; int idescbl[9]; double *lb; double normb; int idescxl[9]; double *lx; double normx; int idesck1l[9]; double *lk1; double normk1; int idesczl[9]; // matrix descriptors double *lz; // matrix values: al is the local array double *w; int ierr; // error output int mp_ret, np_ret, myrow, mycol; // to store grid info int zero=0; // value used for the descriptor initialization int one=1; // value used for the descriptor initialization int m,n; // matrix A dimensions double norm, cond; double *work = NULL; double * work2 = NULL; int *iwork = NULL; int lwork, liwork; float ll,mm,cr,cc; int ii,jj,pr,pc,h,g; // ii,jj coordinates of local array element int rsrc=0,csrc=0; // assume that 0,0 element should be stored in the 0,0 process int n_b = 1; int index; int icon; // scalapack cblacs context char normJob, jobz, uplo, trans, notrans, diag; double MPIt1, MPIt2, MPIelapsed; jobz= 'N'; uplo='U'; Cblacs_pinfo( &mype, &npe ); if (argc == 3) { //printf("%s %s %s\n", argv[0], argv[1], argv[2]); n_blocks= (int) strtol(argv[1], NULL, 10); scalapack_size= (int) strtol(argv[2], NULL, 10); } else { printf("Usage: expect 2 integers \n"); printf(" 1 : the number of diagonal blocks \n"); printf(" 2 : scalapack number to define block size (assume n is divisible by sqrt(p) and that n/sqrt(p) is divisible by this number)\n"); exit( -1); } printf("%d/%d: read store\n",mype,npe); N = getNumberOfLine(profileG_file_name); // the dimension of the matrix; M = N; // square matrix m=M; //mla*mp; n=N; //nla*np; np = isqrt(npe); // assume that the number of process is a square mp = np; // square grid mla = m/mp; // assume that the matrix dimension if a multiple of the process grid dimension nla = n/np; mb = mla/scalapack_size; // assume that the dimension of the matrix is a multiple of the number of the number of diagonal blocks nb = nla/scalapack_size; // init CBLACS Cblacs_get( -1, 0, &icon ); Cblacs_gridinit( &icon,"c", mp, np ); Cblacs_gridinfo( icon, &mp_ret, &np_ret, &myrow, &mycol); // read blocks and set the reduced normal matrix in scalapack grid // allocate local matrix la=malloc(sizeof(double)*mla*nla); printf("%d/%d: full matrix (%d,%d), local matrix (%d,%d), processor grid (%d,%d), block (%d,%d) \n", mype, npe, m, n, mla, nla, np, mp, mb, nb); for(i_block=0;i_block<n_blocks;i_block++){ printf("%d/%d: process store block %d \n", mype, npe, i_block); readStore(&store,i_block,store_location); t_block = 0; while(readNextBlockDimension(dim,store)!=-1) { // loop B over all block tasks j_block = mpi_get_diag_block_id(i_block, t_block, n_blocks); mat = malloc((dim[1]-dim[0])*(dim[3]-dim[2]) * sizeof(double)); readNextBlock(dim[0],dim[1],dim[2],dim[3],mat,store); // printf("%d/%d: read block (%d,%d) with global indices (%d,%d,%d,%d) \n",mype, npe, i_block,j_block,dim[0],dim[1],dim[2],dim[3]); NB = dim[1]-dim[0]; MB = dim[3]-dim[2]; for(i = dim[0];i<dim[1];i++){ for(j = dim[2];j<dim[3];j++){ //matA[i*M+j] = mat[(i-dim[0])*MB+(j-dim[2])]; // finding out which pe gets this i,j element cr = (float)( i/mb ); h = rsrc+(int)(cr); pr = h%np; cc = (float)( j/mb ); g = csrc+(int)(cc); pc = g%mp; // check if process should get this element if (myrow == pr && mycol==pc){ // ii = x + l*mb // jj = y + m*nb ll = (float)( ( i/(np*mb) ) ); // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric mm = (float)( ( j/(mp*nb) ) ); ii = i%mb + (int)(ll)*mb; jj = j%nb + (int)(mm)*nb; index=jj*mla+ii; // seems to be the transpose !? //if(index<0) printf("%d/%d: negative index (%d,%d) \n",mype,npe,i,j); //if(index>=mla*nla) printf("%d/%d: too large index (%d,%d) \n",mype,npe,i,j); la[index] = mat[(i-dim[0])*MB+(j-dim[2])]; } } } // transpose if(j_block != i_block){ for(i = dim[0];i<dim[1];i++){ for(j = dim[2];j<dim[3];j++){ //matA[j*M+i] = mat[(i-dim[0])*MB+(j-dim[2])]; // finding out which pe gets this j,i element cr = (float)( j/mb ); h = rsrc+(int)(cr); pr = h%np; cc = (float)( i/mb ); g = csrc+(int)(cc); pc = g%mp; // check if process should get this element if (myrow == pr && mycol==pc){ // ii = x + l*mb // jj = y + m*nb ll = (float)( ( j/(np*mb) ) ); // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric mm = (float)( ( i/(mp*nb) ) ); ii = j%mb + (int)(ll)*mb; jj = i%nb + (int)(mm)*nb; index=jj*mla+ii; // seems to be the transpose !? //if(index<0) printf("%d/%d: negative index (%d,%d) \n",mype,npe,i,j); //if(index>=mla*nla) printf("%d/%d: too large index (%d,%d) \n",mype,npe,i,j); la[index] = mat[(i-dim[0])*MB+(j-dim[2])]; } } } } free(mat); t_block++; } closeStore(store); } printf("%d/%d: finished scaterring the matrix \n",mype,npe); // read the kernel matrix printf("%d/%d: set kernel \n",mype,npe); matK = malloc(N * 6* sizeof(double)); readMatrixDouble(matK,kernel_file_name); // set k1 lk1 = calloc(sizeof(double),mla); for(i = 0; i<N; i++) { // finding out which pe gets i element cr = (float)( i/mb ); h = rsrc+(int)(cr); pr = h%np; // check if process should get this element if (myrow == pr) { // ii = x + l*mb ll = (float)( ( i/(np*mb) ) ); // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric ii = i%mb + (int)(ll)*mb; lk1[ii] = matK[N*0+i]; } } printf("%d/%d: finished scaterring the kernel vector \n",mype,npe); printf("%d/%d: start computing \n",mype,npe); // set the matrix descriptor ierr=0; descinit_(idescal, &m, &n , &mb, &nb , &zero, &zero, &icon, &mla, &ierr); // processor grip id start at 0 if (mype==0) saveMatrixDescriptor(idescal, scaStore_location); ierr=0; descinit_(idescaal, &m, &n , &mb, &nb , &zero, &zero, &icon, &mla, &ierr); // processor grip id start at 0 ierr=0; descinit_(idescbl, &m, &one , &mb, &nb , &zero, &zero, &icon, &nla, &ierr); // processor grip id start at 0 lb = calloc(sizeof(double),mla); ierr=0; // set x descinit_(idescxl, &n, &one , &mb, &nb , &zero, &zero, &icon, &nla, &ierr); // processor grip id start at 0 lx = calloc(sizeof(double),mla); for(i=0;i<mla;i++){ lx[i] = 1.0/m; } pddot_(&n,&normx,lx,&one,&one,idescxl,&one,lx,&one,&one,idescxl,&one); // normx <- x'x if (mype==0) printf("%d/%d: normx2 %E \n",mype,npe,normx); ierr=0; // set k1 descinit_(idesck1l, &n, &one , &mb, &nb , &zero, &zero, &icon, &nla, &ierr); // processor grip id start at 0 pddot_(&n,&normk1,lk1,&one,&one,idesck1l,&one,lk1,&one,&one,idesck1l,&one); // normx <- x'x if (mype==0) printf("%d/%d: normk1 square %E \n",mype,npe,normk1); ierr=0; // set b double alpha =1.0; double beta =0.0; notrans = 'N'; pdgemv_(¬rans,&m,&n,&alpha,la,&one,&one,idescal,lk1,&one,&one,idesck1l,&one,&beta,lb,&one,&one,idescbl,&one); // b <- A k1 pddot_(&n,&normb,lb,&one,&one,idescbl,&one,lb,&one,&one,idescbl,&one); // norm <- b'b if (mype==0) printf("%d/%d: is kernel, normb square %E \n",mype,npe,normb); ierr=0; // set b alpha =1.0; beta =0.0; notrans = 'N'; pdgemv_(¬rans,&m,&n,&alpha,la,&one,&one,idescal,lx,&one,&one,idescxl,&one,&beta,lb,&one,&one,idescbl,&one); // b <- A x pddot_(&n,&normb,lb,&one,&one,idescbl,&one,lb,&one,&one,idescbl,&one); // norm <- b'b if (mype==0) printf("%d/%d: normb2 %E \n",mype,npe,normb); // set aa printf("%d/%d: start setting aa with k1 x k1\' \n",mype,npe); alpha = 1.0; beta = 1.0; trans = 'T'; notrans = 'N'; // laa=malloc(sizeof(double)*mla*nla); // for debugging //pdgemm(transa, transb, // m, n, k, // alpha, a, ia , ja , desca, // b, ib , jb ,descb, // beta, c, ic , jc , descc) // A = k1 (Nx1) // B = k1 (Nx1) // C = aa (MxN) ... M=N // C <- C + A x B' pdgemm_(¬rans, &trans, &N, &N, &one, &alpha, lk1, &one, &one, idesck1l, lk1, &one, &one, idesck1l, &beta, la, &one, &one, idescal ); for(j = 1; j<6; j++) { for(i = 0; i<N; i++) { // finding out which pe gets i element cr = (float)( i/mb ); h = rsrc+(int)(cr); pr = h%np; // check if process should get this element if (myrow == pr) { // ii = x + l*mb ll = (float)( ( i/(np*mb) ) ); // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric ii = i%mb + (int)(ll)*mb; lk1[ii] = matK[N*j+i]; } } pdgemm_(¬rans, &trans, &N, &N, &one, &alpha, lk1, &one, &one, idesck1l, lk1, &one, &one, idesck1l, &beta, la, &one, &one, idescal ); } ierr = 0; // compute norm 1 of the reduced normal matrix /* DO NOT WORK lwork = 2*mla+2*nla; work = malloc(sizeof(double)*lwork); normJob = '1'; norm = pdlansy_(&normJob, &uplo, &n, la, &one, &one, idescal, work); // matrix index start at one printf("%d/%d: norm %f \n",mype,npe,norm); free(work); */ ierr = 0; // compute the cholesky decomposition printf("%d/%d: start computing cholesky factor\n",mype,npe); pdpotrf_(&uplo,&n,la,&one,&one,idescal,&ierr); printf("%d/%d: finish computing cholesky factor\n",mype,npe); openScalapackStore(&scaStore,myrow,mycol,scaStore_location); saveLocalMatrix(la,nla,mla,scaStore); double test=0.0; for(i=0;i<nla*mla;i++){ test += la[i]*la[i]; } printf("%d/%d: finished computing cholesky, test=%f \n",mype,npe,test); ierr =0; // assume x and b set // assume cholesky decomposition // compute the soluation A x = b diag = 'N'; printf("%d/%d: start solving\n",mype,npe); //pdpptrs_(&uplo, &trans , &diag , &n , &one , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr); // solve triangular system //pdtrtrs (&uplo, &trans , &diag , &n , &n , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr); pdpotrs_(&uplo, &n , &one , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr); // b<- A-1 b alpha = -1.0; normb=0; pdaxpy_(&n,&alpha,lx,&one,&one,idescxl,&one,lb,&one,&one,idescbl,&one); // b<-b-x pddot_(&n,&normb,lb,&one,&one,idescbl,&one,lb,&one,&one,idescbl,&one); // norm <- b'b if (mype==0) printf("%d/%d: finish solving, norm2(sol-true) %E \n",mype,npe,normb); ierr = 0; /* // compute the eigen values jobz= 'N'; uplo='U'; // with N z is ignored descinit_(idesczl, &m, &n , &mb, &nb , &zero, &zero, &icon, &mla, &ierr); lz = malloc(sizeof(double)*mla*nla); w = malloc(sizeof(double)*m); lwork = -1; work = malloc(sizeof(double)*2); pdsyev_( &jobz, &uplo, &n, la, &one, &one, idescal, w, lz, &one, &one, idesczl, work, &lwork, &ierr); // only compute lwork //pdsyev_( &jobz, &uplo, &n, A, &ione, &ione, descA, W, Z, &ione, &ione, descZ, work, &lwork, &info ); lwork= (int) work[0]; free(work); work = (double *)calloc(lwork,sizeof(double)) ; //MPIt1 = MPI_Wtime(); pdsyev_( &jobz, &uplo, &n, la, &one, &one, idescal, w, lz, &one, &one, idesczl, work, &lwork, &ierr); // compute the eigen values //MPIt2 = MPI_Wtime(); //MPIelapsed=MPIt2-MPIt1; if (mype == 0) { saveMatrix(n,w,"eigenvalues.txt"); //printf("%d/%d: finished job in %8.2fs\n",mype,npe,MPIelapsed); // not working } */ ierr = 0; // compute the conditioner number assume that the norm and the cholesky decomposition have been computed /* DO NOT WORK lwork = 2*mla+3*nla; printf("%d/%d: lwork=%d @%p\n",mype,npe,lwork,&lwork); work2 = malloc(sizeof(double)*lwork); liwork = 2*mla+3*nla; iwork = malloc(sizeof(int)*liwork); pdpocon_(&uplo,&n,la,&one,&one,idescal,&norm,&cond,work2,&lwork,iwork,&liwork,&ierr); printf("%d/%d: condition number %f \n",mype,npe,cond); */ free(la); Cblacs_gridexit(icon); Cblacs_exit( 0 ); return 0; }