int main(){ int before,after; int cost_with_tax; for(;scanf("%d%d%d",&before,&after,&cost_with_tax),cost_with_tax;){ int ma=0,x; int cost1_with_tax,cost1_without_tax; int cost2_with_tax,cost2_without_tax; for(cost1_with_tax=1;cost1_with_tax<=cost_with_tax/2;cost1_with_tax++){ cost1_without_tax=iceil(cost1_with_tax*100,100+before); cost2_with_tax=cost_with_tax-cost1_with_tax; cost2_without_tax=iceil(cost2_with_tax*100,100+before); x=cost1_without_tax*(100+before)/100+cost2_without_tax*(100+before)/100; if(x!=cost_with_tax)continue; //入出力は通っているのだが、この辺が未だ嘘解法な感じしかしない x=cost1_without_tax*(100+after)/100+cost2_without_tax*(100+after)/100; if(ma<x)ma=x; } printf("%d\n",ma); } return 0; }
dc_status_t dc_rbstream_new (dc_rbstream_t **out, dc_device_t *device, unsigned int pagesize, unsigned int packetsize, unsigned int begin, unsigned int end, unsigned int address) { dc_rbstream_t *rbstream = NULL; if (out == NULL || device == NULL) return DC_STATUS_INVALIDARGS; // Page and packet size should be non-zero. if (pagesize == 0 || packetsize == 0) { ERROR (device->context, "Zero length page or packet size!"); return DC_STATUS_INVALIDARGS; } // Packet size should be a multiple of the page size. if (packetsize % pagesize != 0) { ERROR (device->context, "Packet size not a multiple of the page size!"); return DC_STATUS_INVALIDARGS; } // Ringbuffer boundaries should be aligned to the page size. if (begin % pagesize != 0 || end % pagesize != 0) { ERROR (device->context, "Ringbuffer not aligned to the page size!"); return DC_STATUS_INVALIDARGS; } // Address should be inside the ringbuffer. if (address < begin || address > end) { ERROR (device->context, "Address outside the ringbuffer!"); return DC_STATUS_INVALIDARGS; } // Allocate memory. rbstream = (dc_rbstream_t *) malloc (sizeof(*rbstream) + packetsize); if (rbstream == NULL) { ERROR (device->context, "Failed to allocate memory."); return DC_STATUS_NOMEMORY; } rbstream->device = device; rbstream->pagesize = pagesize; rbstream->packetsize = packetsize; rbstream->begin = begin; rbstream->end = end; rbstream->address = iceil(address, pagesize); rbstream->available = 0; rbstream->skip = rbstream->address - address; *out = rbstream; return DC_STATUS_SUCCESS; }
// -------------------------------------------------------------------------------------------------------------------- bool SimpleSolution::Init(const std::vector<RotationCubeProblem::Vertex>& _vertices, const std::vector<RotationCubeProblem::Index>& _indices, size_t _objectCount) { mObjectCount = _objectCount; mIndexCount = _indices.size(); // Program const char* kUniformNames[] = { "gTex", nullptr }; mProgram = CreateProgramT("cubes_gl_simple_vs.glsl", "cubes_gl_simple_fs.glsl", kUniformNames, &mUniformLocation); if (mProgram == 0) { console::warn("Unable to initialize solution '%s', shader compilation/linking failed.", GetName().c_str()); return false; } glGenVertexArrays(1, &mVertexArrayObject); glBindVertexArray(mVertexArrayObject); GLuint UB0 = glGetUniformBlockIndex(mProgram, "UB0"); glUniformBlockBinding(mProgram, UB0, 0); GLuint UB1 = glGetUniformBlockIndex(mProgram, "UB1"); glUniformBlockBinding(mProgram, UB1, 1); GLint uniformBufferOffsetAlignment = 0; glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniformBufferOffsetAlignment); mMatrixStride = iceil(sizeof(Matrix), uniformBufferOffsetAlignment); glGenBuffers(1, &mVertexBuffer); glGenBuffers(1, &mIndexBuffer); glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer); BufferData(GL_ARRAY_BUFFER, _vertices, GL_STATIC_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer); BufferData(GL_ELEMENT_ARRAY_BUFFER, _indices, GL_STATIC_DRAW); glGenBuffers(1, &mUniformBuffer0); glGenBuffers(1, &mUniformBuffer1); glGenVertexArrays(1, &mVAO); glBindVertexArray(mVAO); mStorage.resize(mMatrixStride); return glGetError() == GL_NO_ERROR; }
int pmrrr (char *jobz, char *range, int *np, double *D, double *E, double *vl, double *vu, int *il, int *iu, int *tryracp, MPI_Comm comm, int *nzp, int *offsetp, double *W, double *Z, int *ldz, int *Zsupp) { /* Input parameter */ int n = *np; bool onlyW = toupper(jobz[0]) == 'N'; bool wantZ = toupper(jobz[0]) == 'V'; bool cntval = toupper(jobz[0]) == 'C'; bool alleig = toupper(range[0]) == 'A'; bool valeig = toupper(range[0]) == 'V'; bool indeig = toupper(range[0]) == 'I'; /* Check input parameters */ if(!(onlyW || wantZ || cntval)) return 1; if(!(alleig || valeig || indeig)) return 1; if(n <= 0) return 1; if (valeig) { if(*vu<=*vl) return 1; } else if (indeig) { if (*il<1 || *il>n || *iu<*il || *iu>n) return 1; } /* MPI & multithreading info */ int is_init, is_final; MPI_Initialized(&is_init); MPI_Finalized(&is_final); if (is_init!=1 || is_final==1) { fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", is_init, is_final); return 1; } MPI_Comm comm_dup; MPI_Comm_dup(comm, &comm_dup); int nproc, pid, thread_support; MPI_Comm_size(comm_dup, &nproc); MPI_Comm_rank(comm_dup, &pid); MPI_Query_thread(&thread_support); int nthreads; if ( !(thread_support == MPI_THREAD_MULTIPLE || thread_support == MPI_THREAD_FUNNELED) ) { /* Disable multithreading; note: to support multithreading with * MPI_THREAD_SERIALIZED the code must be changed slightly; this * is not supported at the moment */ nthreads = 1; } else { char *ompvar = getenv("PMR_NUM_THREADS"); if (ompvar == NULL) { nthreads = DEFAULT_NUM_THREADS; } else { nthreads = atoi(ompvar); } } #if defined(MVAPICH2_VERSION) if (nthreads>1) { int mv2_affinity=1; char *mv2_string = getenv("MV2_ENABLE_AFFINITY"); if (mv2_string != NULL) mv2_affinity = atoi(mv2_string); if (mv2_affinity!=0) { nthreads = 1; if (pid==0) { fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n"); fflush(stderr); } } } #endif /* If only maximal number of local eigenvectors are queried * return if possible here */ *nzp = 0; *offsetp = 0; if (cntval) { if ( alleig || n < DSTEMR_IF_SMALLER ) { *nzp = iceil(n,nproc); MPI_Comm_free(&comm_dup); return 0; } else if (indeig) { *nzp = iceil(*iu-*il+1,nproc); MPI_Comm_free(&comm_dup); return 0; } } /* Check if computation should be done by multiple processes */ int info; if (n < DSTEMR_IF_SMALLER) { info = handle_small_cases(jobz, range, np, D, E, vl, vu, il, iu, tryracp, comm, nzp, offsetp, W, Z, ldz, Zsupp); MPI_Comm_free(&comm_dup); return info; } /* Allocate memory */ double *Werr = (double*)malloc(n*sizeof(double)); assert(Werr!=NULL); double *Wgap = (double*)malloc(n*sizeof(double)); assert(Wgap!=NULL); double *gersch = (double*)malloc(2*n*sizeof(double)); assert(gersch!=NULL); int *iblock = (int*)calloc(n,sizeof(int)); assert(iblock!=NULL); int *iproc = (int*)malloc(n*sizeof(int)); assert(iproc!=NULL); int *Windex = (int*)malloc(n*sizeof(int)); assert(Windex!=NULL); int *isplit = (int*)malloc(n*sizeof(int)); assert(isplit!=NULL); int *Zindex = (int*)malloc(n*sizeof(int)); assert(Zindex!=NULL); proc_t *procinfo = (proc_t*)malloc(sizeof(proc_t)); assert(procinfo!=NULL); in_t *Dstruct = (in_t*)malloc(sizeof(in_t)); assert(Dstruct!=NULL); val_t *Wstruct = (val_t*)malloc(sizeof(val_t)); assert(Wstruct!=NULL); vec_t *Zstruct = (vec_t*)malloc(sizeof(vec_t)); assert(Zstruct!=NULL); tol_t *tolstruct = (tol_t*)malloc(sizeof(tol_t)); assert(tolstruct!=NULL); /* Bundle variables into a structures */ procinfo->pid = pid; procinfo->nproc = nproc; procinfo->comm = comm_dup; procinfo->nthreads = nthreads; procinfo->thread_support = thread_support; Dstruct->n = n; Dstruct->D = D; Dstruct->E = E; Dstruct->isplit = isplit; Wstruct->n = n; Wstruct->vl = vl; Wstruct->vu = vu; Wstruct->il = il; Wstruct->iu = iu; Wstruct->W = W; Wstruct->Werr = Werr; Wstruct->Wgap = Wgap; Wstruct->Windex = Windex; Wstruct->iblock = iblock; Wstruct->iproc = iproc; Wstruct->gersch = gersch; Zstruct->ldz = *ldz; Zstruct->nz = 0; Zstruct->Z = Z; Zstruct->Zsupp = Zsupp; Zstruct->Zindex = Zindex; /* Scale matrix to allowable range, returns 1.0 if not scaled */ double scale = scale_matrix(Dstruct, Wstruct, valeig); /* Test if matrix warrants more expensive computations which * guarantees high relative accuracy */ if (*tryracp) odrrr(&n, D, E, &info); /* 0 - rel acc */ else info = -1; int i; double *Dcopy, *E2copy; if (info == 0) { /* This case is extremely rare in practice */ tolstruct->split = DBL_EPSILON; /* Copy original data needed for refinement later */ Dcopy = (double*)malloc(n*sizeof(double)); assert(Dcopy!=NULL); memcpy(Dcopy, D, n*sizeof(double)); E2copy = (double*)malloc(n*sizeof(double)); assert(E2copy!=NULL); for (i=0; i<n-1; i++) E2copy[i] = E[i]*E[i]; } else { /* Neg. threshold forces old splitting criterion */ tolstruct->split = -DBL_EPSILON; *tryracp = 0; } if (!wantZ) { /* Compute eigenvalues to full precision */ tolstruct->rtol1 = 4.0 * DBL_EPSILON; tolstruct->rtol2 = 4.0 * DBL_EPSILON; } else { /* Do not compute to full accuracy first, but refine later */ tolstruct->rtol1 = sqrt(DBL_EPSILON); tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1); tolstruct->rtol2 = sqrt(DBL_EPSILON)*5.0E-3; tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2); tolstruct->rtol2 = fmax(4.0 * DBL_EPSILON, tolstruct->rtol2); } /* Compute all eigenvalues: sorted by block */ info = plarre(procinfo,jobz,range,Dstruct,Wstruct,tolstruct,nzp,offsetp); assert(info == 0); /* If just number of local eigenvectors are queried */ if (cntval & valeig) { clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); return 0; } /* If only eigenvalues are to be computed */ if (!wantZ) { /* Refine to high relative with respect to input T */ if (*tryracp) { info = refine_to_highrac (procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct); assert(info == 0); } /* Sort eigenvalues */ qsort(W, n, sizeof(double), cmp); /* Only keep subset ifirst:ilast */ int ifirst, ilast, isize; int iil = *il; int iiu = *iu; int ifirst_tmp=iil; for (i=0; i<nproc; i++) { int chunk = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc); int ilast_tmp; if (i == nproc-1) { ilast_tmp = iiu; } else { ilast_tmp = ifirst_tmp + chunk - 1; ilast_tmp = imin(ilast_tmp, iiu); } if (i == pid) { ifirst = ifirst_tmp; ilast = ilast_tmp; isize = ilast - ifirst + 1; *offsetp = ifirst - iil; *nzp = isize; } ifirst_tmp = ilast_tmp + 1; ifirst_tmp = imin(ifirst_tmp, iiu + 1); } if (isize > 0) { memmove(W, &W[ifirst-1], *nzp * sizeof(double)); } /* If matrix was scaled, rescale eigenvalues */ invscale_eigenvalues(Wstruct, scale, *nzp); clean_up (comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); return 0; } /* end of only eigenvalues to compute */ /* Compute eigenvectors */ info = plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, nzp, offsetp); assert(info == 0); /* Refine to high relative with respect to input matrix */ if (*tryracp) { info = refine_to_highrac(procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct); assert(info == 0); } /* If matrix was scaled, rescale eigenvalues */ invscale_eigenvalues(Wstruct, scale, n); /* Make the first nz elements of W contains the eigenvalues * associated to the process */ int j, im=0; for (j=0; j<n; j++) { if (iproc[j] == pid) { W[im] = W[j]; Windex[im] = Windex[j]; Zindex[im] = Zindex[j]; im++; } } clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); if (*tryracp) { free(Dcopy); free(E2copy); } return 0; } /* end pmrrr */
/* * Wrapper to call LAPACKs DSTEMR for small matrices */ static int handle_small_cases (char *jobz, char *range, int *np, double *D, double *E, double *vlp, double *vup, int *ilp, int *iup, int *tryracp, MPI_Comm comm, int *nzp, int *myfirstp, double *W, double *Z, int *ldzp, int *Zsupp) { bool cntval = toupper(jobz[0]) == 'C'; bool onlyW = toupper(jobz[0]) == 'N'; bool wantZ = toupper(jobz[0]) == 'V'; bool indeig = toupper(range[0]) == 'I'; int n = *np; int ldz_tmp = *np; int ldz = *ldzp; int nproc, pid; MPI_Comm_size(comm, &nproc); MPI_Comm_rank(comm, &pid); int lwork, liwork; double *Z_tmp; if (onlyW) { lwork = 12*n; liwork = 8*n; } else if (cntval) { lwork = 18*n; liwork = 10*n; } else if (wantZ) { lwork = 18*n; liwork = 10*n; int itmp; if (indeig) itmp = *iup-*ilp+1; else itmp = n; Z_tmp = (double*)malloc(n*itmp*sizeof(double)); assert(Z_tmp!=NULL); } else { return 1; } double *work = (double*)malloc(lwork*sizeof(double)); assert(work != NULL); int *iwork = (int*)malloc(liwork*sizeof(int)); assert(iwork!=NULL); if (cntval) { /* Note: at the moment, jobz="C" should never get here, since * it is blocked before. */ int m, info, MINUSONE=-1; double cnt; odstmr("V", "V", np, D, E, vlp, vup, ilp, iup, &m, W, &cnt, &ldz_tmp, &MINUSONE, Zsupp, tryracp, work, &lwork, iwork, &liwork, &info); assert(info == 0); *nzp = (int) ceil(cnt/nproc); free(work); free(iwork); return 0; } int m, info; odstmr(jobz, range, np, D, E, vlp, vup, ilp, iup, &m, W, Z_tmp, &ldz_tmp, np, Zsupp, tryracp, work, &lwork, iwork, &liwork, &info); assert(info == 0); int chunk = iceil(m,nproc); int myfirst = imin(pid * chunk, m); int mylast = imin((pid+1)*chunk - 1, m - 1); int mysize = mylast - myfirst + 1; if (mysize > 0) { memmove(W, &W[myfirst], mysize*sizeof(double)); if (wantZ) { if (ldz == ldz_tmp) { /* copy everything in one chunk */ memcpy(Z, &Z_tmp[myfirst*ldz_tmp], n*mysize*sizeof(double)); } else { /* copy each vector seperately */ int i; for (i=0; i<mysize; i++) memcpy(&Z[i*ldz], &Z_tmp[(myfirst+i)*ldz_tmp], n*sizeof(double)); } } /* if (wantZ) */ } *myfirstp = myfirst; *nzp = mysize; if (wantZ) free(Z_tmp); free(work); free(iwork); return 0; }
/* Routine to compute eigenvalues */ int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, val_t *Wstruct, tol_t *tolstruct, int *nzp, int *myfirstp) { /* input variables */ int pid = procinfo->pid; int nproc = procinfo->nproc; bool wantZ = (jobz[0] == 'V' || jobz[0] == 'v'); bool cntval = (jobz[0] == 'C' || jobz[0] == 'c'); int n = Dstruct->n; double *restrict D = Dstruct->D; double *restrict E = Dstruct->E; int *restrict isplit = Dstruct->isplit; double *vl = Wstruct->vl; double *vu = Wstruct->vu; int *il = Wstruct->il; int *iu = Wstruct->iu; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; int *restrict iblock = Wstruct->iblock; double *restrict gersch = Wstruct->gersch; /* constants */ int IZERO = 0, IONE = 1; double DZERO = 0.0; /* work space */ double *E2, *work; int *iwork; /* compute geschgorin disks and spectral diameter */ double gl, gu, bl_gu, eold, emax, eabs; /* compute splitting points */ int bl_begin, bl_end; /* distribute work among processes */ int ifirst, ilast, ifirst_tmp, ilast_tmp; int chunk, isize, iil, iiu; /* gather results */ int *rcount, *rdispl; /* others */ int info, i, j, im, idummy, ind; double tmp1, dummy; enum range_enum {allrng=1, valrng=2, indrng=3} irange; double intervals[2]; int negcounts[2]; double sigma; if (range[0] == 'A' || range[0] == 'a') { irange = allrng; } else if (range[0] == 'V' || range[0] == 'v') { irange = valrng; } else if (range[0] == 'I' || range[0] == 'i') { irange = indrng; } else { return(1); } /* allocate work space */ E2 = (double *) malloc( n * sizeof(double) ); assert(E2 != NULL); work = (double *) malloc( 4*n * sizeof(double) ); assert(work != NULL); iwork = (int *) malloc( 3*n * sizeof(int) ); assert(iwork != NULL); rcount = (int *) malloc( nproc * sizeof(int) ); assert(rcount != NULL); rdispl = (int *) malloc( nproc * sizeof(int) ); assert(rdispl != NULL); /* Compute square of off-diagonal elements */ for (i=0; i<n-1; i++) { E2[i] = E[i]*E[i]; } /* compute geschgorin disks and spectral diameter */ gl = D[0]; gu = D[0]; eold = 0.0; emax = 0.0; E[n-1] = 0.0; for (i=0; i<n; i++) { eabs = fabs(E[i]); if (eabs >= emax) emax = eabs; tmp1 = eabs + eold; gersch[2*i] = D[i] - tmp1; gl = fmin(gl, gersch[2*i]); gersch[2*i+1] = D[i] + tmp1; gu = fmax(gu, gersch[2*i+1]); eold = eabs; } /* min. pivot allowed in the Sturm sequence of T */ tolstruct->pivmin = DBL_MIN * fmax(1.0, emax*emax); /* estimate of spectral diameter */ Dstruct->spdiam = gu - gl; /* compute splitting points with threshold "split" */ LAPACK(dlarra) (&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam, &Dstruct->nsplit, isplit, &info); assert(info == 0); if (irange == allrng || irange == indrng) { *vl = gl; *vu = gu; } /* set eigenvalue indices in case of all or subset by value has * to be computed; thereby convert all problem to subset by index * computation */ if (irange == allrng) { *il = 1; *iu = n; } else if (irange == valrng) { intervals[0] = *vl; intervals[1] = *vu; /* find negcount at boundaries 'vl' and 'vu'; * needs work of dim(n) and iwork of dim(n) */ LAPACK(dlaebz) (&IONE, &IZERO, &n, &IONE, &IONE, &IZERO, &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy, intervals, &dummy, &idummy, negcounts, work, iwork, &info); assert(info == 0); /* update negcounts of whole matrix with negcounts found for block */ *il = negcounts[0] + 1; *iu = negcounts[1]; } if (cntval && irange == valrng) { /* clean up and return */ *nzp = iceil(*iu-*il+1, nproc); clean_up_plarre(E2, work, iwork, rcount, rdispl); return(0); } /* in case only eigenvalues are desired compute eigenvalues * "il" to "iu"; otherwise compute all */ if (wantZ) { iil = 1; iiu = n; } else { iil = *il; iiu = *iu; } /* each process computes a subset of the eigenvalues */ ifirst_tmp = iil; for (i=0; i<nproc; i++) { chunk = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc); if (i == nproc-1) { ilast_tmp = iiu; } else { ilast_tmp = ifirst_tmp + chunk - 1; ilast_tmp = imin(ilast_tmp, iiu); } if (i == pid) { ifirst = ifirst_tmp; ilast = ilast_tmp; isize = ilast - ifirst + 1; *myfirstp = ifirst - iil;; *nzp = isize; } rcount[i] = ilast_tmp - ifirst_tmp + 1; rdispl[i] = ifirst_tmp - iil; ifirst_tmp = ilast_tmp + 1; ifirst_tmp = imin(ifirst_tmp, iiu + 1); } /* compute eigenvalues assigned to process */ if (isize != 0) { info = eigval_subset_proc(procinfo, range, Dstruct, E2, ifirst, ilast, tolstruct, Wstruct, work, iwork); assert(info == 0); } if (wantZ) { /* communicate results */ memcpy(work, W, isize * sizeof(double) ); MPI_Allgatherv(work, isize, MPI_DOUBLE, W, rcount, rdispl, MPI_DOUBLE, procinfo->comm); memcpy(work, Werr, isize * sizeof(double) ); MPI_Allgatherv(work, isize, MPI_DOUBLE, Werr, rcount, rdispl, MPI_DOUBLE, procinfo->comm); memcpy(iwork, Windex, isize * sizeof(int) ); MPI_Allgatherv(iwork, isize, MPI_INT, Windex, rcount, rdispl, MPI_INT, procinfo->comm); memcpy(iwork, iblock, isize * sizeof(int) ); MPI_Allgatherv(iwork, isize, MPI_INT, iblock, rcount, rdispl, MPI_INT, procinfo->comm); /* sort by block */ memcpy(&work[0], W, n*sizeof(double)); memcpy(&work[n], Werr, n*sizeof(double)); memcpy(&iwork[0], Windex, n*sizeof(int)); memcpy(&iwork[n], iblock, n*sizeof(int)); im = 0; for (i=1; i<=Dstruct->nsplit; i++) { for (j=0; j<n; j++) { if (iwork[j+n] == i) { /* iblock == i */ W[im] = work[j]; Werr[im] = work[j+n]; Windex[im] = iwork[j]; iblock[im] = iwork[j+n]; im++; } } } /* recompute gap of blocks */ bl_begin = 0; for (i=0; i < Dstruct->nsplit; i++) { bl_end = isplit[i] - 1; sigma = E[bl_end]; /* find outer bounds GU for block used for last gap */ bl_gu = D[bl_begin]; for (j = bl_begin; j <= bl_end; j++) { bl_gu = fmax(bl_gu, gersch[2*j+1]); } /* recompute gaps within the blocks */ for (j = bl_begin; j < bl_end; j++) { Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) ); } Wgap[bl_end] = fmax(0.0, (bl_gu - sigma) - (W[bl_end] + Werr[bl_end]) ); bl_begin = bl_end + 1; } /* end i */ } else { /* compute UNSHIFTED eigenvalues */ for (i=0; i < isize; i++) { ind = iblock[i] - 1; bl_end = isplit[ind] - 1; sigma = E[bl_end]; W[i] += sigma; } } /* if wantZ */ /* free memory */ clean_up_plarre(E2, work, iwork, rcount, rdispl); return(0); }
/* * Refine eigenvalues with respect to new rrr */ static inline int refine_eigvals (cluster_t *cl, int rf_begin, int rf_end, int tid, proc_t *procinfo, rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, counter_t *num_left, workQ_t *workQ, double *work, int *iwork) { int rf_size = rf_end-rf_begin+1; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; double *restrict D = RRR->D; double *restrict L = RRR->L; double *restrict DLL = RRR->DLL; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; double *restrict Wshifted = Wstruct->Wshifted; double pivmin = tolstruct->pivmin; double rtol1 = tolstruct->rtol1; double rtol2 = tolstruct->rtol2; /* Determine if refinement should be split into tasks */ int left = PMR_get_counter_value(num_left); int nz = Zstruct->nz; int nthreads = procinfo->nthreads; int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads)); int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK); int offset, i, p, q; double savegap; task_t *task; if (own_part < rf_size) { int others_part = rf_size - own_part; int num_tasks = iceil(rf_size, own_part) - 1; /* >1 */ int chunk = others_part/num_tasks; /* floor */ int ts_begin=rf_begin, ts_end; p = Windex[rf_begin]; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; q = p + chunk - 1; task = PMR_create_r_task (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid); if (ts_begin <= ts_end) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_refine_sem_post(task->data); /* case chunk=0 */ ts_begin = ts_end + 1; p = q + 1; } ts_end = rf_end; q = Windex[rf_end]; offset = Windex[ts_begin] - 1; /* Call bisection routine to refine the values */ if (ts_begin <= ts_end) { int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); } /* Empty "all" r-queue refine tasks before waiting */ int num_iter = PMR_get_num_tasks(workQ->r_queue); for (i=0; i<num_iter; i++) { task = PMR_remove_task_at_front(workQ->r_queue); if (task != NULL) { if (task->flag == REFINE_TASK_FLAG) { PMR_process_r_task ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork); free(task); } else { PMR_insert_task_at_back(workQ->r_queue, task); } } /* if task */ } /* end for i */ /* Barrier: wait until all created tasks finished */ int count = num_tasks; while (count > 0) { while ( PMR_refine_sem_wait(task->data) != 0 ) { }; count--; } PMR_refine_sem_destroy(task->data); /* Edit right gap at splitting point */ ts_begin = rf_begin; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1] - Wshifted[ts_end] - Werr[ts_end]); ts_begin = ts_end + 1; } } else { /* Refinement of cluster without creating tasks */ /* 'p' and 'q' are local (within block) indices of * the first/last eigenvalue of the cluster */ p = Windex[rf_begin]; q = Windex[rf_end]; offset = Windex[rf_begin] - 1; /* = p - 1 */ if (p == q) { savegap = Wgap[rf_begin]; Wgap[rf_begin] = 0.0; } /* Bisection routine to refine the values */ int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); if (p == q) Wgap[rf_begin] = savegap; } /* end refine with or without creating tasks */ /* refined eigenvalues with all shifts applied in W */ double sigma = L[bl_size-1]; for (i=rf_begin; i<=rf_end; i++) W[i] = Wshifted[i] + sigma; return 0; } /* end refine_eigvals */
/** Returns the transpose of a ztilt gradient operator that converts the OPDs defined on xloc to subapertures defines on saloc. */ dsp * mkzt(loc_t* xloc, double *amp, loc_t *saloc, int saorc, double scale, double dispx, double dispy) { /*compute ztilt influence function from xloc to saloc saorc: SALOC is subaperture origin or center. 1: origin (lower left corner), 0: center. */ long nsa=saloc->nloc; double dsa=saloc->dx; double dx1=1./xloc->dx; double dx2=scale*dx1; double dy1=1./xloc->dy; double dy2=scale*dy1; loc_create_map(xloc); map_t *map=xloc->map; dispx=(dispx-map->ox+saorc*dsa*0.5*scale)*dx1; dispy=(dispy-map->oy+saorc*dsa*0.5*scale)*dy1; double dsa2=dsa*0.5*dx2; long nmax=(dsa2*2+2)*(dsa2*2+2); long *ind=mycalloc(nmax,long); loc_t *sloc=mycalloc(1,loc_t); sloc->dx=xloc->dx; sloc->dy=xloc->dy; sloc->locx=mycalloc(nmax,double); sloc->locy=mycalloc(nmax,double); double *amploc=NULL; if(amp) amploc=mycalloc(nmax,double); dsp*zax=dspnew(xloc->nloc,nsa,xloc->nloc); dsp*zay=dspnew(xloc->nloc,nsa,xloc->nloc); long xcount=0,ycount=0; spint *xpp=zax->p; spint *xpi=zax->i; double *xpx=zax->x; spint *ypp=zay->p; spint *ypi=zay->i; double *ypx=zay->x; const double *locx=xloc->locx; const double *locy=xloc->locy; double *slocx=sloc->locx; double *slocy=sloc->locy; for(int isa=0; isa<nsa; isa++){ /*center of subaperture when mapped onto XLOC*/ double scx=saloc->locx[isa]*dx2+dispx; double scy=saloc->locy[isa]*dy2+dispy; int count=0; /*find points that belongs to this subaperture. */ for(int iy=iceil(scy-dsa2); iy<ifloor(scy+dsa2);iy++){ for(int ix=iceil(scx-dsa2); ix<ifloor(scx+dsa2);ix++){ int ii=loc_map_get(map, ix, iy); if(ii>0){ ii--; ind[count]=ii; slocx[count]=locx[ii]; slocy[count]=locy[ii]; if(amp) amploc[count]=amp[ii]; count++; } } } /*locwrite(sloc,"sloc_isa%d",isa); */ /*writedbl(amploc,count,1,"amploc_isa%d",isa); */ sloc->nloc=count; dmat *mcc=loc_mcc_ptt(sloc,amploc); /*writebin(mcc,"mcc_isa%d",isa); */ dinvspd_inplace(mcc); /*writebin(mcc,"imcc_isa%d",isa); */ xpp[isa]=xcount; ypp[isa]=ycount; for(int ic=0; ic<count; ic++){ double xx=IND(mcc,0,1)+IND(mcc,1,1)*slocx[ic]+IND(mcc,2,1)*slocy[ic]; double yy=IND(mcc,0,2)+IND(mcc,1,2)*slocx[ic]+IND(mcc,2,2)*slocy[ic]; if(amp){ xx*=amploc[ic]; yy*=amploc[ic]; } xpi[xcount]=ind[ic]; xpx[xcount]=xx; xcount++; ypi[ycount]=ind[ic]; ypx[ycount]=yy; ycount++; } dfree(mcc); } xpp[nsa]=xcount; ypp[nsa]=ycount; locfree(sloc); free(ind); dspsetnzmax(zax,xcount); dspsetnzmax(zay,ycount); dsp*ZAT=dspcat(zax,zay,1); dspfree(zax); dspfree(zay); if(amp) free(amploc); return ZAT; }
/* Routine to compute eigenvalues */ int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp) { /* input variables */ int nproc = procinfo->nproc; bool wantZ = (jobz[0] == 'V' || jobz[0] == 'v'); bool cntval = (jobz[0] == 'C' || jobz[0] == 'c'); int n = Dstruct->n; double *restrict D = Dstruct->D; double *restrict E = Dstruct->E; int *restrict isplit = Dstruct->isplit; double *vl = Wstruct->vl; double *vu = Wstruct->vu; int *il = Wstruct->il; int *iu = Wstruct->iu; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; int *restrict iblock = Wstruct->iblock; double *restrict gersch = Wstruct->gersch; /* constants */ int IZERO = 0, IONE = 1; double DZERO = 0.0; /* work space */ double *E2; double *work; int *iwork; /* compute geschgorin disks and spectral diameter */ double gl, gu, eold, emax, eabs; /* compute splitting points */ int bl_begin, bl_end, bl_size; /* distribute work among processes */ int ifirst, ilast, ifirst_tmp, ilast_tmp; int chunk, isize, iil, iiu; /* gather results */ int *rcount, *rdispl; /* others */ int info, i, j, jbl, idummy; double tmp1, dummy; bool sorted; enum range_enum {allrng=1, valrng=2, indrng=3} irange; double intervals[2]; int negcounts[2]; double sigma; if (range[0] == 'A' || range[0] == 'a') { irange = allrng; } else if (range[0] == 'V' || range[0] == 'v') { irange = valrng; } else if (range[0] == 'I' || range[0] == 'i') { irange = indrng; } else { return 1; } /* allocate work space */ E2 = (double *) malloc( n * sizeof(double) ); assert(E2 != NULL); work = (double *) malloc( 4*n * sizeof(double) ); assert(work != NULL); iwork = (int *) malloc( 3*n * sizeof(int) ); assert(iwork != NULL); rcount = (int *) malloc( nproc * sizeof(int) ); assert(rcount != NULL); rdispl = (int *) malloc( nproc * sizeof(int) ); assert(rdispl != NULL); /* Compute square of off-diagonal elements */ for (i=0; i<n-1; i++) { E2[i] = E[i]*E[i]; } /* compute geschgorin disks and spectral diameter */ gl = D[0]; gu = D[0]; eold = 0.0; emax = 0.0; E[n-1] = 0.0; for (i=0; i<n; i++) { eabs = fabs(E[i]); if (eabs >= emax) emax = eabs; tmp1 = eabs + eold; gersch[2*i] = D[i] - tmp1; gl = fmin(gl, gersch[2*i]); gersch[2*i+1] = D[i] + tmp1; gu = fmax(gu, gersch[2*i+1]); eold = eabs; } /* min. pivot allowed in the Sturm sequence of T */ tolstruct->pivmin = DBL_MIN * fmax(1.0, emax*emax); /* estimate of spectral diameter */ Dstruct->spdiam = gu - gl; /* compute splitting points with threshold "split" */ odrra(&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam, &Dstruct->nsplit, isplit, &info); assert(info == 0); if (irange == allrng || irange == indrng) { *vl = gl; *vu = gu; } /* set eigenvalue indices in case of all or subset by value has * to be computed; thereby convert all problem to subset by index * computation */ if (irange == allrng) { *il = 1; *iu = n; } else if (irange == valrng) { intervals[0] = *vl; intervals[1] = *vu; /* find negcount at boundaries 'vl' and 'vu'; * needs work of dim(n) and iwork of dim(n) */ odebz(&IONE, &IZERO, &n, &IONE, &IONE, &IZERO, &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy, intervals, &dummy, &idummy, negcounts, work, iwork, &info); assert(info == 0); /* update negcounts of whole matrix with negcounts found for block */ *il = negcounts[0] + 1; *iu = negcounts[1]; } if (cntval && irange == valrng) { /* clean up and return */ *nzp = iceil(*iu-*il+1, nproc); clean_up_plarre(E2, work, iwork, rcount, rdispl); return 0; } /* loop over unreduced blocks */ bl_begin = 0; for (jbl=0; jbl<Dstruct->nsplit; jbl++) { bl_end = isplit[jbl] - 1; bl_size = bl_end - bl_begin + 1; /* deal with 1x1 block immediately */ if (bl_size == 1) { E[bl_end] = 0.0; W[bl_begin] = D[bl_begin]; Werr[bl_begin] = 0.0; Werr[bl_begin] = 0.0; iblock[bl_begin] = jbl + 1; Windex[bl_begin] = 1; bl_begin = bl_end + 1; continue; } /* Indix range of block */ iil = 1; iiu = bl_size; /* each process computes a subset of the eigenvalues of the block */ ifirst_tmp = iil; for (i=0; i<nproc; i++) { chunk = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc); if (i == nproc-1) { ilast_tmp = iiu; } else { ilast_tmp = ifirst_tmp + chunk - 1; ilast_tmp = imin(ilast_tmp, iiu); } if (i == procinfo->pid) { ifirst = ifirst_tmp; ilast = ilast_tmp; isize = ilast - ifirst + 1; *offsetp = ifirst - iil; *nzp = isize; } rcount[i] = ilast_tmp - ifirst_tmp + 1; rdispl[i] = ifirst_tmp - iil; ifirst_tmp = ilast_tmp + 1; ifirst_tmp = imin(ifirst_tmp, iiu + 1); } /* approximate eigenvalues of input assigned to process */ if (isize != 0) { info = eigval_approx_proc(procinfo, ifirst, ilast, bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], work, iwork); assert(info == 0); } /* compute root representation of block */ info = eigval_root_proc(procinfo, ifirst, ilast, bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], work, iwork); assert(info == 0); /* refine eigenvalues assigned to process w.r.t root */ if (isize != 0) { info = eigval_refine_proc(procinfo, ifirst, ilast, bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], work, iwork); assert(info == 0); } memcpy(work, &W[bl_begin], isize * sizeof(double) ); MPI_Allgatherv(work, isize, MPI_DOUBLE, &W[bl_begin], rcount, rdispl, MPI_DOUBLE, procinfo->comm); memcpy(work, &Werr[bl_begin], isize * sizeof(double) ); MPI_Allgatherv(work, isize, MPI_DOUBLE, &Werr[bl_begin], rcount, rdispl, MPI_DOUBLE, procinfo->comm); memcpy(iwork, &Windex[bl_begin], isize * sizeof(int) ); MPI_Allgatherv(iwork, isize, MPI_INT, &Windex[bl_begin], rcount, rdispl, MPI_INT, procinfo->comm); /* Ensure that within block eigenvalues sorted */ sorted = false; while (sorted == false) { sorted = true; for (j=bl_begin; j < bl_end; j++) { if (W[j+1] < W[j]) { sorted = false; tmp1 = W[j]; W[j] = W[j+1]; W[j+1] = tmp1; tmp1 = Werr[j]; Werr[j] = Werr[j+1]; Werr[j+1] = tmp1; } } } /* Set indices index correctly */ for (j=bl_begin; j <= bl_end; j++) iblock[j] = jbl + 1; /* Recompute gaps within the blocks */ for (j = bl_begin; j < bl_end; j++) { Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) ); } sigma = E[bl_end]; Wgap[bl_end] = fmax(0.0, (gu - sigma) - (W[bl_end] + Werr[bl_end]) ); /* Compute UNSHIFTED eigenvalues */ if (!wantZ) { sigma = E[bl_end]; for (i=bl_begin; i<=bl_end; i++) { W[i] += sigma; } } /* Proceed with next block */ bl_begin = bl_end + 1; } /* end of loop over unreduced blocks */ /* free memory */ clean_up_plarre(E2, work, iwork, rcount, rdispl); return 0; }
void EntityPlayer::onMove(EntityData & data, shared_ptr<World> world, float deltaTimeIn) const { getEntity(data, world); assert(data.extraData); auto eData = dynamic_pointer_cast<ExtraData>(data.extraData); assert(eData); data.deltaAcceleration = VectorF(0); data.acceleration = gravityVector; if(eData->flying) data.acceleration = VectorF(0); int count = iceil(deltaTimeIn * abs(data.velocity) / 0.5 + 1); BlockIterator bi = world->get((PositionI)data.position); data.entity->acceleration = data.acceleration; data.entity->deltaAcceleration = data.deltaAcceleration; auto pphysicsObject = make_shared<PhysicsBox>((VectorF)data.position + physicsOffset(), physicsExtents(), data.velocity, data.entity->acceleration, data.entity->deltaAcceleration, data.position.d, physicsProperties(), -physicsOffset()); PhysicsBox & physicsObject = *pphysicsObject; for(int step = 0; step < count; step++) { float deltaTime = deltaTimeIn / count; data.entity->age += deltaTime; int zeroCount = 0; while(deltaTime * deltaTime * absSquared(data.velocity) > eps * eps) { bool supported = false; PhysicsCollision firstCollision(data.position + deltaTime * data.velocity + deltaTime * deltaTime * 0.5f * data.entity->acceleration + deltaTime * deltaTime * deltaTime * (1 / 6.0f) * data.entity->deltaAcceleration, data.velocity + deltaTime * data.entity->acceleration + deltaTime * deltaTime * 0.5f * data.entity->deltaAcceleration, VectorF(0), deltaTime); physicsObject.reInit((VectorF)data.position + physicsOffset(), physicsExtents(), data.velocity, data.entity->acceleration, data.entity->deltaAcceleration); for(int dx = -1; dx <= 1; dx++) { for(int dy = -2; dy <= 2; dy++) { for(int dz = -1; dz <= 1; dz++) { BlockIterator curBI = bi; curBI += VectorI(dx, dy, dz); shared_ptr<PhysicsObject> otherObject; if(curBI.get().good()) otherObject = curBI.get().desc->getPhysicsObject(curBI.position()); else otherObject = static_pointer_cast<PhysicsObject>(make_shared<PhysicsBox>((VectorI)curBI.position() + VectorF(0.5), VectorF(0.5), VectorF(0), VectorF(0), VectorF(0), curBI.position().d, PhysicsProperties(PhysicsProperties::INFINITE_MASS, 1, 0))); assert(otherObject); { bool filled = false; float newY; switch(otherObject->type()) { case PhysicsObject::Type::Box: { const PhysicsBox * pbox = dynamic_cast<const PhysicsBox *>(otherObject.get()); VectorF min = pbox->center - pbox->extents; VectorF max = pbox->center + pbox->extents; if(min.x <= curBI.position().x && max.x >= curBI.position().x + 1 && min.y <= curBI.position().y && max.y >= curBI.position().y + 1 && min.z <= curBI.position().z && max.z >= curBI.position().z + 1) { newY = max.y + physicsObject.extents.y - physicsOffset().y; filled = true; } VectorF temp; if(isBoxCollision(pbox->center, pbox->extents, physicsObject.center - VectorF(0, eps * 10, 0) + physicsOffset(), physicsObject.extents, temp) && !isBoxCollision(pbox->center, pbox->extents, physicsObject.center + physicsOffset(), physicsObject.extents, temp)) { supported = true; } break; } case PhysicsObject::Type::None: break; } if(filled && zeroCount >= 2 && dx == 0 && dy == 0 && dz == 0) { firstCollision.time = 0; firstCollision.newPosition = data.position; firstCollision.newPosition.y = newY; firstCollision.newVelocity = VectorF(0); firstCollision.collisionNormal = VectorF(0, 1, 0); break; } } PhysicsCollision collision = physicsObject.collide(otherObject, deltaTime); if(collision.valid) { if(collision.time < eps) { if(zeroCount > 25) collision.valid = false; else zeroCount++; } else zeroCount = 0; } if(collision.valid && collision.time < firstCollision.time) firstCollision = collision; } } } deltaTime -= firstCollision.time; data.setPosition(firstCollision.newPosition + eps * (2 + abs(firstCollision.newVelocity)) * firstCollision.collisionNormal); data.setVelocity(firstCollision.newVelocity); data.setAcceleration(data.entity->acceleration + data.entity->deltaAcceleration * firstCollision.time); } } if(eData->pclient == nullptr || !isClientValid(*eData->pclient)) { data.clear(); return; } }