void mergesort(int* ar, int len){ if (len < 2) return; int* ar1=ar; int* ar2=ar+ len/2; int* ar2or = ar2; mergesort(ar1, len/2); mergesort(ar2, ceildiv(len,2)); if (*ar2 > *(ar2-1)) return; // ля, ля-ля... int* arsub = malloc(len*sizeof(int)); int* p = arsub; char broken = 0; for (int i=1; i<len; i++, p++){ if (compar(ar1,ar2)>0 && ar2-ar2or<ceildiv(len,2)) { *p = *ar2; ar2++; } else { *p = *ar1; ar1++; if (ar1 == ar2or) { len = i; broken = 1; break; } } } if (!broken) *p = ar1 == ar2or ? *ar2 : *ar1; memcpy(ar, arsub, len*sizeof(int)); free(arsub); }
FieldData::FieldData(const GridInfo& gInfo, string category, int nElem, int nDoublesPerElem, bool onGpu) : nElem(nElem), scale(1.), gInfo(gInfo), isReal(nDoublesPerElem==1) { int nDoubles = nElem * nDoublesPerElem; int nComplex = ceildiv(nDoubles,2); memInit(category, nComplex, onGpu); }
void select_keys(int *keys, int fd, off_t size, int k){ int a,i,j,random1,random2; off_t le_random,random_integer; int *samples; a = ceildiv(ceil(log(k)),ceil(log(2))); samples = (int*)malloc(sizeof(int)*((a+1)*k)); keys[0] = INT_MIN; keys[k] = INT_MAX; srand((unsigned)time(0)); for(i=0;i < (a+1)*k ;i++){ random1 = rand(); random2 = rand(); le_random = random1; le_random = le_random << 32 | random2; random_integer = le_random % (size/B); lseek64(fd, sizeof(int)*random_integer, SEEK_SET); if((j = read(fd, (void *)&(samples[i]),sizeof(int)))== -1){ printf("fail4\n"); exit(1); } results.io_acc++; results.io_rand++; } lseek64(fd, 0, SEEK_SET); results.io_rand++; alpha_quicksort(samples,(a+1)*k); for(i=1;i < k;i++){ keys[i] = samples[(a+1)*i]; } free(samples); }
static err_t rfb_accept(void *arg, struct tcp_pcb *pcb, err_t err) { struct rfb_state *state; char * blockbuf; LWIP_UNUSED_ARG(arg); LWIP_UNUSED_ARG(err); state = (struct rfb_state *)mem_malloc(sizeof(struct rfb_state)); if (!state) { outputf("rfb_accept: out of memory\n"); return ERR_MEM; } memset(state, 0, sizeof(struct rfb_state)); blockbuf = mem_malloc(ceildiv(fb->curmode.xres, SCREEN_CHUNKS_X) * ceildiv(fb->curmode.yres, SCREEN_CHUNKS_Y) * 4); if (!blockbuf) { outputf("rfb_accept: out of memory allocating blockbuf\n"); mem_free(state); return ERR_MEM; } state->blockbuf = blockbuf; state->state = ST_BEGIN; state->send_state = SST_IDLE; /* XXX: update_server_info() should be called from the 64ms timer, and deal * with screen resizes appropriately. */ update_server_info(); tcp_arg(pcb, state); tcp_recv(pcb, rfb_recv); tcp_sent(pcb, rfb_sent); tcp_poll(pcb, rfb_poll, 1); /* tcp_err(pcb, rfb_err); */ tcp_write(pcb, "RFB 003.008\n", 12, 0); tcp_output(pcb); return ERR_OK; }
void diagMatrix::set(int iStart, int iStep, int iStop, const diagMatrix& m) { myassert(iStart>=0 && iStart<nRows()); myassert(iStop>iStart && iStop<=nRows()); myassert(iStep>0); int iDelta = ceildiv(iStop-iStart, iStep); myassert(iDelta==m.nRows()); for(int i=0; i<iDelta; i++) at(i*iStep+iStart) = m[i]; }
diagMatrix diagMatrix::operator()(int iStart, int iStep, int iStop) const { myassert(iStart>=0 && iStart<nRows()); myassert(iStop>iStart && iStop<=nRows()); myassert(iStep>0); int iDelta = ceildiv(iStop-iStart, iStep); diagMatrix ret(iDelta); for(int i=0; i<iDelta; i++) ret[i] = at(i*iStep+iStart); return ret; }
void CopyToTextureOpenGL::copy(void *tex, int width, int height, const void *dataptr, int data_num, DataConversion conv) { int w = width; int h = ceildiv(data_num, width); dataptr = getDataPointer(dataptr, data_num, width*height, conv, false); glBindTexture(GL_TEXTURE_2D, (GLuint)(size_t)tex); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_FLOAT, dataptr); glBindTexture(GL_TEXTURE_2D, 0); }
int li_combo_ticks(t_env const *e, int const *route_stack, int const stacki) { size_t datas[5]; build_datas(e, route_stack, stacki, datas); if (NUM_ANTS < MIN_ANTS) return (INT_MAX); return (ceildiv(NUM_ANTS - MIN_ANTS, NUM_ROUTES) + MAX_DIFF + MIN_LEN); }
void li_combo_ticks_decomposed(t_env const *e, size_t phases[3]) { size_t datas[5]; build_datas(e, e->best_combo.routes, e->best_combo.num_routes - 1, datas); P1_FULL_SENDIND = ceildiv(NUM_ANTS - MIN_ANTS, NUM_ROUTES); P2_PARTIAL_SENDING = MAX_DIFF; P3_SENDING_DONE = MIN_LEN; return ; }
void matrix::set(int iStart, int iStep, int iStop, int jStart, int jStep, int jStop, const matrix& m) { myassert(iStart>=0 && iStart<nr); myassert(iStop>iStart && iStop<=nr); myassert(iStep>0); myassert(jStart>=0 || jStart<nc); myassert(jStop>jStart || jStop<=nc); myassert(jStep>0); int iDelta = ceildiv(iStop-iStart, iStep), jDelta = ceildiv(jStop-jStart, jStep); myassert(iDelta==m.nr); myassert(jDelta==m.nc); const complex* mData = m.dataPref(); complex* thisData = this->dataPref(); #ifdef GPU_ENABLED matrixSubSet_gpu(nr, iStart,iStep,iDelta, jStart,jStep,jDelta, mData, thisData); #else for(int i=0; i<iDelta; i++) for(int j=0; j<jDelta; j++) thisData[this->index(i*iStep+iStart, j*jStep+jStart)] = mData[m.index(i,j)]; #endif }
uint64_t getTimeTickDelta(const uint64_t* intervals, int len, uint64_t span) { Q_ASSERT(len > 0); uint64_t idealWidth = ceildiv(span, (uint64_t) TIMEAXIS_MAXTICKS); const uint64_t* tickptr = std::lower_bound(intervals, intervals + len, idealWidth); if (tickptr == intervals + len) { tickptr--; } return *tickptr; }
matrix matrix::operator()(int iStart, int iStep, int iStop, int jStart, int jStep, int jStop) const { if(iStart==0 && iStep==1 && iStop==nr && jStart==0 && jStep==1 && jStop==nc) return *this; //faster to copy matrix for this special case myassert(iStart>=0 && iStart<nr); myassert(iStop>iStart && iStop<=nr); myassert(iStep>0); myassert(jStart>=0 || jStart<nc); myassert(jStop>jStart || jStop<=nc); myassert(jStep>0); int iDelta = ceildiv(iStop-iStart, iStep), jDelta = ceildiv(jStop-jStart, jStep); matrix ret(iDelta,jDelta, isGpuEnabled()); complex* retData = ret.dataPref(); const complex* thisData = this->dataPref(); #ifdef GPU_ENABLED matrixSubGet_gpu(nr, iStart,iStep,iDelta, jStart,jStep,jDelta, thisData, retData); #else for(int i=0; i<iDelta; i++) for(int j=0; j<jDelta; j++) retData[ret.index(i,j)] = thisData[this->index(i*iStep+iStart, j*jStep+jStart)]; #endif return ret; }
//! Fallback conversion that will at least work for ASCII characters static void to_utf8_fallback(const std::string & from, std::string & to, boost::uint32_t codepage) { size_t skip = get_encoding_size(codepage); to.clear(); to.reserve(ceildiv(from.size(), skip)); for(size_t i = 0; i < from.size(); i += skip) { if((unsigned char)from[i] <= 127) { // copy ASCII characters to.push_back(from[i]); } else { // replace everything else with underscores to.push_back(replacement_char); } } }
ScalarFieldTilde PCM::getFullCore() const { switch(fsp.pcmVariant) { case PCM_SaLSA: case PCM_CANDLE: { ScalarFieldTilde nFullCore, SG(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled())); for(unsigned iSp=0; iSp<atpos.size(); iSp++) { //Create GPU-friendly copy of atom positions: int nAtoms = atpos[iSp].size(); matrix atposTemp(3, ceildiv(nAtoms,2)); memcpy(atposTemp.data(), atpos[iSp].data(), sizeof(vector3<>)*nAtoms); //Compute structure factor and accumulate contribution from this species: callPref(getSG)(gInfo.S, nAtoms, (const vector3<>*)atposTemp.dataPref(), 1./gInfo.detR, SG->dataPref()); nFullCore += e.iInfo.species[iSp]->ZfullCore * SG; } return nFullCore; } default: return 0; //no full-core } }
/** Purpose ------- ZHEEVDX_GPU computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments --------- @param[in] jobz magma_vec_t - = MagmaNoVec: Compute eigenvalues only; - = MagmaVec: Compute eigenvalues and eigenvectors. @param[in] range magma_range_t - = MagmaRangeAll: all eigenvalues will be found. - = MagmaRangeV: all eigenvalues in the half-open interval (VL,VU] will be found. - = MagmaRangeI: the IL-th through IU-th eigenvalues will be found. @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] dA COMPLEX_16 array on the GPU, dimension (LDDA, N). On entry, the Hermitian matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if JOBZ = MagmaVec, then if INFO = 0, the first mout columns of A contains the required orthonormal eigenvectors of the matrix A. If JOBZ = MagmaNoVec, then on exit the lower triangle (if UPLO=MagmaLower) or the upper triangle (if UPLO=MagmaUpper) of A, including the diagonal, is destroyed. @param[in] ldda INTEGER The leading dimension of the array DA. LDDA >= max(1,N). @param[in] vl DOUBLE PRECISION @param[in] vu DOUBLE PRECISION If RANGE=MagmaRangeV, the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = MagmaRangeAll or MagmaRangeI. @param[in] il INTEGER @param[in] iu INTEGER If RANGE=MagmaRangeI, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = MagmaRangeAll or MagmaRangeV. @param[out] mout INTEGER The total number of eigenvalues found. 0 <= MOUT <= N. If RANGE = MagmaRangeAll, MOUT = N, and if RANGE = MagmaRangeI, MOUT = IU-IL+1. @param[out] w DOUBLE PRECISION array, dimension (N) If INFO = 0, the required mout eigenvalues in ascending order. @param wA (workspace) COMPLEX_16 array, dimension (LDWA, N) @param[in] ldwa INTEGER The leading dimension of the array wA. LDWA >= max(1,N). @param[out] work (workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The length of the array WORK. If N <= 1, LWORK >= 1. If JOBZ = MagmaNoVec and N > 1, LWORK >= N + N*NB. If JOBZ = MagmaVec and N > 1, LWORK >= max( N + N*NB, 2*N + N**2 ). NB can be obtained through magma_get_zhetrd_nb(N). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] rwork (workspace) DOUBLE PRECISION array, dimension (LRWORK) On exit, if INFO = 0, RWORK[0] returns the optimal LRWORK. @param[in] lrwork INTEGER The dimension of the array RWORK. If N <= 1, LRWORK >= 1. If JOBZ = MagmaNoVec and N > 1, LRWORK >= N. If JOBZ = MagmaVec and N > 1, LRWORK >= 1 + 5*N + 2*N**2. \n If LRWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] iwork (workspace) INTEGER array, dimension (MAX(1,LIWORK)) On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK. @param[in] liwork INTEGER The dimension of the array IWORK. If N <= 1, LIWORK >= 1. If JOBZ = MagmaNoVec and N > 1, LIWORK >= 1. If JOBZ = MagmaVec and N > 1, LIWORK >= 3 + 5*N. \n If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i and JOBZ = MagmaNoVec, then the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; if INFO = i and JOBZ = MagmaVec, then the algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details --------------- Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified description of INFO. Sven, 16 Feb 05. @ingroup magma_zheev_driver ********************************************************************/ extern "C" magma_int_t magma_zheevdx_gpu( magma_vec_t jobz, magma_range_t range, magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *mout, double *w, magmaDoubleComplex *wA, magma_int_t ldwa, magmaDoubleComplex *work, magma_int_t lwork, #ifdef COMPLEX double *rwork, magma_int_t lrwork, #endif magma_int_t *iwork, magma_int_t liwork, magma_int_t *info) { const char* uplo_ = lapack_uplo_const( uplo ); const char* jobz_ = lapack_vec_const( jobz ); magma_int_t ione = 1; double d__1; double eps; magma_int_t inde; double anrm; magma_int_t imax; double rmin, rmax; double sigma; magma_int_t iinfo, lwmin; magma_int_t lower; magma_int_t llrwk; magma_int_t wantz; //magma_int_t indwk2; magma_int_t iscale; double safmin; double bignum; magma_int_t indtau; magma_int_t indrwk, indwrk, liwmin; magma_int_t lrwmin, llwork; double smlnum; magma_int_t lquery; magma_int_t alleig, valeig, indeig; magmaDouble_ptr dwork; magmaDoubleComplex_ptr dC; magma_int_t lddc = ldda; wantz = (jobz == MagmaVec); lower = (uplo == MagmaLower); alleig = (range == MagmaRangeAll); valeig = (range == MagmaRangeV); indeig = (range == MagmaRangeI); lquery = (lwork == -1 || lrwork == -1 || liwork == -1); *info = 0; if (! (wantz || (jobz == MagmaNoVec))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || (uplo == MagmaUpper))) { *info = -3; } else if (n < 0) { *info = -4; } else if (ldda < max(1,n)) { *info = -6; } else if (ldwa < max(1,n)) { *info = -14; } else { if (valeig) { if (n > 0 && vu <= vl) { *info = -8; } } else if (indeig) { if (il < 1 || il > max(1,n)) { *info = -9; } else if (iu < min(n,il) || iu > n) { *info = -10; } } } magma_int_t nb = magma_get_zhetrd_nb( n ); if ( n <= 1 ) { lwmin = 1; lrwmin = 1; liwmin = 1; } else if ( wantz ) { lwmin = max( n + n*nb, 2*n + n*n ); lrwmin = 1 + 5*n + 2*n*n; liwmin = 3 + 5*n; } else { lwmin = n + n*nb; lrwmin = n; liwmin = 1; } // multiply by 1+eps (in Double!) to ensure length gets rounded up, // if it cannot be exactly represented in floating point. real_Double_t one_eps = 1. + lapackf77_dlamch("Epsilon"); work[0] = MAGMA_Z_MAKE( lwmin * one_eps, 0 ); rwork[0] = lrwmin * one_eps; iwork[0] = liwmin; if ((lwork < lwmin) && !lquery) { *info = -16; } else if ((lrwork < lrwmin) && ! lquery) { *info = -18; } else if ((liwork < liwmin) && ! lquery) { *info = -20; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* If matrix is very small, then just call LAPACK on CPU, no need for GPU */ if (n <= 128) { magma_int_t lda = n; magmaDoubleComplex *A; magma_zmalloc_cpu( &A, lda*n ); magma_zgetmatrix( n, n, dA, ldda, A, lda ); lapackf77_zheevd( jobz_, uplo_, &n, A, &lda, w, work, &lwork, rwork, &lrwork, iwork, &liwork, info ); magma_zsetmatrix( n, n, A, lda, dA, ldda ); magma_free_cpu( A ); *mout = n; return *info; } magma_queue_t stream; magma_queue_create( &stream ); // dC and dwork are never used together, so use one buffer for both; // unfortunately they're different types (complex and double). // (this is easier in dsyevd_gpu where everything is double.) // zhetrd2_gpu requires ldda*ceildiv(n,64) + 2*ldda*nb, in double-complex. // zunmtr_gpu requires lddc*n, in double-complex. // zlanhe requires n, in double. magma_int_t ldwork = max( ldda*ceildiv(n,64) + 2*ldda*nb, lddc*n ); magma_int_t ldwork_real = max( ldwork*2, n ); if ( wantz ) { // zstedx requrise 3n^2/2, in double ldwork_real = max( ldwork_real, 3*n*(n/2 + 1) ); } if (MAGMA_SUCCESS != magma_dmalloc( &dwork, ldwork_real )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } dC = (magmaDoubleComplex*) dwork; /* Get machine constants. */ safmin = lapackf77_dlamch("Safe minimum"); eps = lapackf77_dlamch("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = magma_dsqrt( smlnum ); rmax = magma_dsqrt( bignum ); /* Scale matrix to allowable range, if necessary. */ anrm = magmablas_zlanhe( MagmaMaxNorm, uplo, n, dA, ldda, dwork ); iscale = 0; sigma = 1; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { magmablas_zlascl( uplo, 0, 0, 1., sigma, n, n, dA, ldda, info ); } /* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ // zhetrd rwork: e (n) // zstedx rwork: e (n) + llrwk (1 + 4*N + 2*N**2) ==> 1 + 5n + 2n^2 inde = 0; indrwk = inde + n; llrwk = lrwork - indrwk; // zhetrd work: tau (n) + llwork (n*nb) ==> n + n*nb // zstedx work: tau (n) + z (n^2) // zunmtr work: tau (n) + z (n^2) + llwrk2 (n or n*nb) ==> 2n + n^2, or n + n*nb + n^2 indtau = 0; indwrk = indtau + n; //indwk2 = indwrk + n*n; llwork = lwork - indwrk; //llwrk2 = lwork - indwk2; magma_timer_t time=0; timer_start( time ); #ifdef FAST_HEMV magma_zhetrd2_gpu( uplo, n, dA, ldda, w, &rwork[inde], &work[indtau], wA, ldwa, &work[indwrk], llwork, dC, ldwork, &iinfo ); #else magma_zhetrd_gpu ( uplo, n, dA, ldda, w, &rwork[inde], &work[indtau], wA, ldwa, &work[indwrk], llwork, &iinfo ); #endif timer_stop( time ); timer_printf( "time zhetrd_gpu = %6.2f\n", time ); /* For eigenvalues only, call DSTERF. For eigenvectors, first call ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the tridiagonal matrix, then call ZUNMTR to multiply it to the Householder transformations represented as Householder vectors in A. */ if (! wantz) { lapackf77_dsterf( &n, w, &rwork[inde], info ); magma_dmove_eig( range, n, w, &il, &iu, vl, vu, mout ); } else { timer_start( time ); magma_zstedx( range, n, vl, vu, il, iu, w, &rwork[inde], &work[indwrk], n, &rwork[indrwk], llrwk, iwork, liwork, dwork, info ); timer_stop( time ); timer_printf( "time zstedx = %6.2f\n", time ); timer_start( time ); magma_dmove_eig( range, n, w, &il, &iu, vl, vu, mout ); magma_zsetmatrix( n, *mout, &work[indwrk + n * (il-1) ], n, dC, lddc ); magma_zunmtr_gpu( MagmaLeft, uplo, MagmaNoTrans, n, *mout, dA, ldda, &work[indtau], dC, lddc, wA, ldwa, &iinfo ); magma_zcopymatrix( n, *mout, dC, lddc, dA, ldda ); timer_stop( time ); timer_printf( "time zunmtr_gpu + copy = %6.2f\n", time ); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = n; } else { imax = *info - 1; } d__1 = 1. / sigma; blasf77_dscal( &imax, &d__1, w, &ione ); } work[0] = MAGMA_Z_MAKE( lwmin * one_eps, 0 ); // round up rwork[0] = lrwmin * one_eps; iwork[0] = liwmin; magma_queue_destroy( stream ); magma_free( dwork ); return *info; } /* magma_zheevdx_gpu */
static void EXPORT_in(struct crabql *crabql, msgpack_object *o, UT_string *s, int nargs) { ERROR_ASSERT(nargs == 3); if (o[2].type == MSGPACK_OBJECT_ARRAY) { msgpack_object *p = o[2].via.array.ptr; uts_printf_concat(s, "bs64((int64_t) "); crabql_generate_code(crabql, &o[1], s); uts_printf_concat(s, ", \""); size_t len = o[2].via.array.size; int64_t *nums = slab_alloc(len * 8); for (size_t i = 0; i < len; i++) { ERROR_ASSERT(p[i].type == MSGPACK_OBJECT_POSITIVE_INTEGER || p[i].type == MSGPACK_OBJECT_NEGATIVE_INTEGER); if (p[i].type == MSGPACK_OBJECT_POSITIVE_INTEGER) ERROR_ASSERT(p[i].via.u64 < INT64_MAX); nums[i] = p[i].via.i64; } qsort(nums, len, 8, cmp64); // \x01\x00\x00\x00\x00\x00\x00\x00 char *binstr = dump_binstrescape((const char *) nums, len * sizeof(nums[0])); utstring_bincpy(s, binstr, strlen(binstr)); slab_free(nums); slab_free(binstr); uts_printf_concat(s, "\", (size_t) %zu)", o[2].via.array.size); } else if (o[2].type == MSGPACK_OBJECT_RAW) { char *bucket_name; int64_t table_id; if (parse_bucket_and_table(o[2].via.raw.ptr, o[2].via.raw.size, &bucket_name, &table_id) == 0) { struct bucket *bucket = bucket_get(bucket_name, CAN_RETURN_NULL); free(bucket_name); if (bucket) { struct table *table = bucket_get_table(bucket, table_id, CAN_RETURN_NULL); if (table) { lock_table(crabql, table); struct schema *schema = table->schema; if (schema) { struct field *field = schema_field_get_primary(schema); size_t datasize = ceildiv(schema->nbits, 8); uts_printf_concat(s, "bsintable("); crabql_generate_code(crabql, &o[1], s); uts_printf_concat(s, ", (size_t) %pULL, (size_t) %pULL, (size_t) %zu, (size_t) %zu)", table->data, field, datasize, table->len); } else { log_warn("schema is NULL, return 0."); uts_printf_concat(s, "0"); } } else { uts_printf_concat(s, "0"); } } else { log_warn("bucket is NULL, return 0."); uts_printf_concat(s, "0"); } } else { log_warn("cannot parse bucket and table"); crabql->error = 34; } } };
void s_samplesort(int fd, off_t size,char *base_name, int t){ struct queue_buf *buff; struct queue_buf **file_buff; int *files, *sizes, *keys; int i,j,cur,r,ret,ret1; char name_file[500]; int k; k = t * min(ceildiv(size,M),ceildiv(M,B)); r = 0; buff = qb_new(M); files = (int *)malloc(sizeof(int)*k); sizes = (int *)malloc(sizeof(int)*k); keys = (int *)malloc(sizeof(int)*(k+1)); file_buff = (struct queue_buf **)malloc(sizeof(struct queue_buf *)*k); select_keys(keys,fd,size,k); for(i = 0; i < k; i++){ file_buff[i] = qb_new(B); sizes[i] = 0; sprintf(name_file, "%s_%d\0", base_name,i); if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; } j=1; while(1){ if((ret1 = qb_refill(buff,fd))== -1){ printf("%s_%d fail2\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); if (qb_empty(buff)) break; while(!qb_empty(buff)){ cur = qb_dequeue(buff); i = bucket(cur,keys,k+1); qb_enqueue(file_buff[i], cur); if(qb_full(file_buff[i])){ results.io_rand++; results.io_acc+=ceildiv(file_buff[i]->n_elems,B); sizes[i] += file_buff[i]->n_elems; if((ret = qb_flush(file_buff[i],files[i]))== -1){ printf("%s_%d fail3\n", base_name,i); perror("aca"); exit(1); } } } } close(fd); remove(curr_name); /* Pueden quedar restos en los bufs. Hay que vaciar todo */ for (i = 0; i < k; i++) { if (!qb_empty(file_buff[i])) { results.io_rand++; results.io_acc+=ceildiv(file_buff[i]->n_elems,B); sizes[i] += file_buff[i]->n_elems; qb_flush(file_buff[i], files[i]); } close(files[i]); qb_free(file_buff[i]); } free(file_buff); free(keys); for(i = 0; i < k; i++){ sprintf(name_file, "%s_%d\0", base_name,i); if(sizes[i] == 0){ remove(name_file); } else if(sizes[i] <= M){ if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n", base_name,i); perror("aca"); exit(1); } if((ret1 = qb_refill(buff,files[i]))== -1){ printf("%s_%d fail2\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); quicksort(buff); lseek64(files[i],0,SEEK_SET); results.io_rand++; qb_flush(buff,files[i]); results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); close(files[i]); remove(name_file); } } qb_free(buff); for (i = 0; i < k; i++) { sprintf(name_file, "%s_%d\0", base_name,i); if (sizes[i] > M) { if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n",base_name, i); perror("aca"); exit(1); } results.io_rand++; curr_name= name_file; s_samplesort(files[i],sizes[i],name_file,t); } } free(sizes); free(files); }
/** Purpose ------- ZHETRD reduces a complex Hermitian matrix A to real symmetric tridiagonal form T by an orthogonal similarity transformation: Q**H * A * Q = T. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A COMPLEX_16 array, dimension (LDA,N) On entry, the Hermitian matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = MagmaUpper, the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = MagmaLower, the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] d COMPLEX_16 array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). @param[out] e COMPLEX_16 array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower. @param[out] tau COMPLEX_16 array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). @param[out] work (workspace) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB is the optimal blocksize given by magma_get_zhetrd_nb(). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value Further Details --------------- If UPLO = MagmaUpper, the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = MagmaLower, the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = MagmaUpper: if UPLO = MagmaLower: ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). @ingroup magma_zheev_comp ********************************************************************/ extern "C" magma_int_t magma_zhetrd( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info) { #define A(i_, j_) ( A + (i_) + (j_)*lda ) #define dA(i_, j_) (dA + (i_) + (j_)*ldda) const char* uplo_ = lapack_uplo_const( uplo ); magma_int_t ldda = roundup( n, 32 ); magma_int_t nb = magma_get_zhetrd_nb( n ); const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; const magmaDoubleComplex c_one = MAGMA_Z_ONE; const double d_one = MAGMA_D_ONE; magma_int_t kk, nx; magma_int_t i, j, i_n; magma_int_t iinfo; magma_int_t ldw, lddw, lwkopt; magma_int_t lquery; *info = 0; int upper = (uplo == MagmaUpper); lquery = (lwork == -1); if (! upper && uplo != MagmaLower) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else if (lwork < nb*n && ! lquery) { *info = -9; } /* Determine the block size. */ ldw = n; lddw = ldda; lwkopt = n * nb; if (*info == 0) { work[0] = MAGMA_Z_MAKE( lwkopt, 0 ); } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) return *info; /* Quick return if possible */ if (n == 0) { work[0] = c_one; return *info; } magmaDoubleComplex *dA; #ifdef FAST_HEMV magma_int_t ldwork2 = ldda*ceildiv(n,64); #else magma_int_t ldwork2 = 0; #endif if (MAGMA_SUCCESS != magma_zmalloc( &dA, ldda*n + 2*lddw*nb + ldwork2 )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magmaDoubleComplex *dwork = dA + ldda*n; #ifdef FAST_HEMV magmaDoubleComplex *dwork2 = dwork + 2*lddw*nb; #endif //if (n < 2048) // nx = n; //else // nx = 512; nx = min( 128, n ); // nx <= n is required // clear out dwork in case it has NANs (used as y in zhemv) // rest of dwork (used as work in magmablas_zhemv) doesn't need to be cleared magmablas_zlaset( MagmaFull, n, nb, c_zero, c_zero, dwork, lddw ); if (upper) { /* Copy the matrix to the GPU */ magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda ); /* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */ kk = n - (n - nx + nb - 1) / nb * nb; for (i = n - nb; i >= kk; i -= nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ /* Get the current panel (no need for the 1st iteration) */ if (i != n-nb) magma_zgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda ); #ifdef FAST_HEMV magma_zlatrd2( uplo, i+nb, nb, A(0, 0), lda, e, tau, work, ldw, dA(0, 0), ldda, dwork, lddw, dwork2, ldwork2 ); #else magma_zlatrd( uplo, i+nb, nb, A(0, 0), lda, e, tau, work, ldw, dA(0, 0), ldda, dwork, lddw ); #endif /* Update the unreduced submatrix A(0:i-2,0:i-2), using an update of the form: A := A - V*W' - W*V' */ magma_zsetmatrix( i + nb, nb, work, ldw, dwork, lddw ); magma_zher2k( uplo, MagmaNoTrans, i, nb, c_neg_one, dA(0, i), ldda, dwork, lddw, d_one, dA(0, 0), ldda ); /* Copy superdiagonal elements back into A, and diagonal elements into D */ for (j = i; j < i+nb; ++j) { *A(j-1,j) = MAGMA_Z_MAKE( e[j - 1], 0 ); d[j] = MAGMA_Z_REAL( *A(j, j) ); } } magma_zgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda ); /* Use CPU code to reduce the last or only block */ lapackf77_zhetrd( uplo_, &kk, A(0, 0), &lda, d, e, tau, work, &lwork, &iinfo ); } else { /* Copy the matrix to the GPU */ if (1 <= n-nx) magma_zsetmatrix( n, n, A(0,0), lda, dA(0,0), ldda ); /* Reduce the lower triangle of A */ for (i = 0; i < n-nx; i += nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ /* Get the current panel (no need for the 1st iteration) */ if (i != 0) magma_zgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda ); #ifdef FAST_HEMV magma_zlatrd2( uplo, n-i, nb, A(i, i), lda, &e[i], &tau[i], work, ldw, dA(i, i), ldda, dwork, lddw, dwork2, ldwork2 ); #else magma_zlatrd( uplo, n-i, nb, A(i, i), lda, &e[i], &tau[i], work, ldw, dA(i, i), ldda, dwork, lddw ); #endif /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using an update of the form: A := A - V*W' - W*V' */ magma_zsetmatrix( n-i, nb, work, ldw, dwork, lddw ); magma_zher2k( MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one, dA(i+nb, i), ldda, &dwork[nb], lddw, d_one, dA(i+nb, i+nb), ldda ); /* Copy subdiagonal elements back into A, and diagonal elements into D */ for (j = i; j < i+nb; ++j) { *A(j+1,j) = MAGMA_Z_MAKE( e[j], 0 ); d[j] = MAGMA_Z_REAL( *A(j, j) ); } } /* Use CPU code to reduce the last or only block */ if (1 <= n-nx) magma_zgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda ); i_n = n-i; lapackf77_zhetrd( uplo_, &i_n, A(i, i), &lda, &d[i], &e[i], &tau[i], work, &lwork, &iinfo ); } magma_free( dA ); work[0] = MAGMA_Z_MAKE( lwkopt, 0 ); return *info; } /* magma_zhetrd */
static void send_fsm(struct tcp_pcb *pcb, struct rfb_state *state) { struct update_header hdr; int bytes_left; int totaldim; err_t err; while(1) { switch (state->send_state) { case SST_IDLE: /* Nothing to do */ if (state->update_requested) { outputf("RFB send: update requested"); state->update_requested = 0; state->chunk_actually_sent = 0; state->send_state = SST_HEADER; } else { return; } /* FALL THROUGH to SST_HEADER */ case SST_HEADER: /* Calculate the width and height for this chunk, remembering * that if SCREEN_CHUNKS_[XY] do not evenly divide the width and * height, we may need to have shorter chunks at the edge of * the screen. */ state->chunk_width = ceildiv(fb->curmode.xres, SCREEN_CHUNKS_X); state->chunk_xpos = state->chunk_width * state->chunk_xnum; totaldim = state->chunk_width * (state->chunk_xnum + 1); if (totaldim > fb->curmode.xres) { state->chunk_width -= (totaldim - fb->curmode.xres); } state->chunk_height = ceildiv(fb->curmode.yres, SCREEN_CHUNKS_Y); state->chunk_ypos = state->chunk_height * state->chunk_ynum; totaldim = state->chunk_height * (state->chunk_ynum + 1); if (totaldim > fb->curmode.yres) { state->chunk_height -= (totaldim - fb->curmode.yres); } /* Do we _actually_ need to send this chunk? */ if (fb->checksum_rect) { state->chunk_checksum = fb->checksum_rect(state->chunk_xpos, state->chunk_ypos, state->chunk_width, state->chunk_height); if (state->chunk_checksum == state->checksums[state->chunk_xnum][state->chunk_ynum]) { if (advance_chunk(state)) return; continue; } /* Checksum gets set in data block, AFTER the data has been sent. */ } state->chunk_actually_sent = 1; /* Send a header */ hdr.msgtype = 0; state->chunk_bytes_sent = 0; hdr.nrects = htons(1); hdr.xpos = htons(state->chunk_xpos); hdr.ypos = htons(state->chunk_ypos); hdr.width = htons(state->chunk_width); hdr.height= htons(state->chunk_height); hdr.enctype = htonl(0); err = tcp_write(pcb, &hdr, sizeof(hdr), TCP_WRITE_FLAG_COPY); if (err != ERR_OK) { if (err != ERR_MEM) outputf("RFB: header send error %d", err); /* Try again later. */ return; } state->send_state = SST_DATA; /* Snag the data. */ fb->copy_pixels(state->blockbuf, state->chunk_xpos, state->chunk_ypos, state->chunk_width, state->chunk_height); /* FALL THROUGH to SST_DATA */ case SST_DATA: bytes_left = 4 * state->chunk_width * state->chunk_height - state->chunk_bytes_sent; if (bytes_left == 0) { state->send_state = SST_HEADER; state->checksums[state->chunk_xnum][state->chunk_ynum] = state->chunk_checksum; if (advance_chunk(state)) return; break; } /* That's enough. */ if (bytes_left > 1400) { bytes_left = 1400; } err = tcp_write(pcb, state->blockbuf + state->chunk_bytes_sent, bytes_left, TCP_WRITE_FLAG_COPY); if (err == ERR_OK) { state->chunk_bytes_sent += bytes_left; } else { if (err != ERR_MEM) outputf("RFB: send error %d", err); return; } if (tcp_sndbuf(pcb) == 0) { return; } } } if (tcp_output(pcb) != ERR_OK) outputf("RFB: tcp_output bailed in send_fsm?"); }
QVector<struct timetick> TimeAxis::getTicks() { uint64_t delta = (uint64_t) (this->domainHi - this->domainLo); uint64_t deltatick; Timescale granularity; /* First find deltatick. In the case of months and years, which are * of variable length, just find the number of months or years. */ if (delta < MAX_NANOTICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(NANOTICK_INTERVALS, NANOTICK_INTERVALS_LEN, delta); granularity = Timescale::NANOSECOND; } else if (delta < MAX_MILLITICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(MILLITICK_INTERVALS, MILLITICK_INTERVALS_LEN, delta); granularity = Timescale::MILLISECOND; } else if (delta < MAX_SECTICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(SECTICK_INTERVALS, SECTICK_INTERVALS_LEN, delta); granularity = Timescale::SECOND; } else if (delta < MAX_MINUTETICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(MINUTETICK_INTERVALS, MINUTETICK_INTERVALS_LEN, delta); granularity = Timescale::MINUTE; } else if (delta < MAX_HOURTICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(HOURTICK_INTERVALS, HOURTICK_INTERVALS_LEN, delta); granularity = Timescale::HOUR; } else if (delta < MAX_DAYTICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(DAYTICK_INTERVALS, DAYTICK_INTERVALS_LEN, delta); granularity = Timescale::DAY; } else if (delta < MAX_MONTHTICK * TIMEAXIS_MAXTICKS) { deltatick = getTimeTickDelta(MONTHTICK_INTERVALS, MONTHTICK_INTERVALS_LEN, delta); granularity = Timescale::MONTH; } else { deltatick = getTimeTickDelta(YEARTICK_INTERVALS, YEARTICK_INTERVALS_LEN, delta); granularity = Timescale::YEAR; } QVector<struct timetick> ticks; int64_t domainLoMSecs = this->domainLo / MILLISECOND_NS; int64_t domainLoNSecs = this->domainLo % MILLISECOND_NS; if (domainLoNSecs < 0) { domainLoMSecs -= 1; domainLoNSecs += MILLISECOND_NS; } int64_t starttime, prevstart; switch(granularity) { case Timescale::YEAR: { int yeardelta = (int) (deltatick / YEAR_NS); Q_ASSERT((deltatick % YEAR_NS) == 0); QDateTime date = QDateTime::fromMSecsSinceEpoch(domainLoMSecs - 1, this->tz); int curryear = ceildiv(date.date().year(), yeardelta) * yeardelta; date.setDate(QDate(curryear, 1, 1)); date.setTime(QTime()); if (domainLoMSecs == date.toMSecsSinceEpoch() && domainLoNSecs > 0) { date = date.addYears(yeardelta); } /* TODO: this multiplication might overflow */ starttime = date.toMSecsSinceEpoch() * MILLISECOND_NS; prevstart = starttime; /* This is the lowest granularity, so we need to check for overflow. */ while (starttime <= this->domainHi && starttime >= prevstart) { ticks.append({ starttime, getTimeTickLabel(starttime, date, granularity, !this->promoteTicks) }); date = date.addYears(yeardelta); prevstart = starttime; starttime = date.toMSecsSinceEpoch() * MILLISECOND_NS; } break; } case Timescale::MONTH: { int monthdelta = (int) (deltatick / MONTH_NS); Q_ASSERT((deltatick % MONTH_NS) == 0); QDateTime date = QDateTime::fromMSecsSinceEpoch(domainLoMSecs - 1, this->tz); date.setTime(QTime()); /* currmonth is an int from 0 to 11. */ int currmonth = ceildiv(date.date().month() - 1, monthdelta) * monthdelta; int curryear = date.date().year() + (currmonth / 12); currmonth %= 12; date.setDate(QDate(curryear, currmonth + 1, 1)); date.setTime(QTime()); if (domainLoMSecs == date.toMSecsSinceEpoch() && domainLoNSecs > 0) { date = date.addMonths(monthdelta); } starttime = date.toMSecsSinceEpoch() * (int64_t) MILLISECOND_NS; prevstart = starttime; while (starttime <= this->domainHi && starttime >= prevstart) { ticks.append({ starttime, getTimeTickLabel(starttime, date, granularity, !this->promoteTicks) }); date = date.addMonths(monthdelta); prevstart = starttime; starttime = date.toMSecsSinceEpoch() * (int64_t) MILLISECOND_NS; } break; } default: starttime = ceildiv(this->domainLo, (int64_t) deltatick) * deltatick; if (granularity == Timescale::DAY || granularity == Timescale::HOUR) { /* I'm assuming that the timezone offset is never in smaller granularity than minutes. */ QDateTime d = QDateTime::fromMSecsSinceEpoch(ceildiv(starttime, MILLISECOND_NS), this->tz); starttime -= SECOND_NS * (int64_t) d.offsetFromUtc(); while (starttime > this->domainLo) { starttime -= deltatick; } while (starttime < this->domainLo) { starttime += deltatick; } } prevstart = starttime; while (starttime <= this->domainHi && starttime >= prevstart) { // Add the tick to ticks QDateTime date = QDateTime::fromMSecsSinceEpoch(ceildiv(starttime, MILLISECOND_NS), this->tz); ticks.append({ starttime, getTimeTickLabel(starttime, date, granularity, !this->promoteTicks) }); prevstart = starttime; starttime += deltatick; } break; } return ticks; }
/** Purpose ------- DLATRD2 reduces NB rows and columns of a real symmetric matrix A to symmetric tridiagonal form by an orthogonal similarity transformation Q' * A * Q, and returns the matrices V and W which are needed to apply the transformation to the unreduced part of A. If UPLO = MagmaUpper, DLATRD reduces the last NB rows and columns of a matrix, of which the upper triangle is supplied; if UPLO = MagmaLower, DLATRD reduces the first NB rows and columns of a matrix, of which the lower triangle is supplied. This is an auxiliary routine called by DSYTRD2_GPU. It uses an accelerated HEMV that needs extra memory. Arguments --------- @param[in] uplo magma_uplo_t Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: - = MagmaUpper: Upper triangular - = MagmaLower: Lower triangular @param[in] n INTEGER The order of the matrix A. @param[in] nb INTEGER The number of rows and columns to be reduced. @param[in,out] A DOUBLE_PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit: - if UPLO = MagmaUpper, the last NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements above the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; - if UPLO = MagmaLower, the first NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements below the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. @param[in] lda INTEGER The leading dimension of the array A. LDA >= (1,N). @param[out] e DOUBLE_PRECISION array, dimension (N-1) If UPLO = MagmaUpper, E(n-nb:n-1) contains the superdiagonal elements of the last NB columns of the reduced matrix; if UPLO = MagmaLower, E(1:nb) contains the subdiagonal elements of the first NB columns of the reduced matrix. @param[out] tau DOUBLE_PRECISION array, dimension (N-1) The scalar factors of the elementary reflectors, stored in TAU(n-nb:n-1) if UPLO = MagmaUpper, and in TAU(1:nb) if UPLO = MagmaLower. See Further Details. @param[out] W DOUBLE_PRECISION array, dimension (LDW,NB) The n-by-nb matrix W required to update the unreduced part of A. @param[in] ldw INTEGER The leading dimension of the array W. LDW >= max(1,N). @param dA TODO: dimension (ldda, n) ?? @param ldda TODO: ldda >= n ?? @param dW TODO: dimension (lddw, 2*nb) ?? @param lddw TODO: lddw >= n ?? @param dwork TODO: dimension (ldwork) ?? @param ldwork TODO: ldwork >= ceil(n/64)*ldda ?? Further Details --------------- If UPLO = MagmaUpper, the matrix Q is represented as a product of elementary reflectors Q = H(n) H(n-1) . . . H(n-nb+1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), and tau in TAU(i-1). If UPLO = MagmaLower, the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(nb). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), and tau in TAU(i). The elements of the vectors v together form the n-by-nb matrix V which is needed, with W, to apply the transformation to the unreduced part of the matrix, using a symmetric rank-2k update of the form: A := A - V*W' - W*V'. The contents of A on exit are illustrated by the following examples with n = 5 and nb = 2: if UPLO = MagmaUpper: if UPLO = MagmaLower: ( a a a v4 v5 ) ( d ) ( a a v4 v5 ) ( 1 d ) ( a 1 v5 ) ( v1 1 a ) ( d 1 ) ( v1 v2 a a ) ( d ) ( v1 v2 a a a ) where d denotes a diagonal element of the reduced matrix, a denotes an element of the original matrix that is unchanged, and vi denotes an element of the vector defining H(i). @ingroup magma_dsyev_aux ********************************************************************/ extern "C" magma_int_t magma_dlatrd2( magma_uplo_t uplo, magma_int_t n, magma_int_t nb, double *A, magma_int_t lda, double *e, double *tau, double *W, magma_int_t ldw, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dW, magma_int_t lddw, magmaDouble_ptr dwork, magma_int_t ldwork) { #define A(i_, j_) (A + (i_) + (j_)*lda) #define W(i_, j_) (W + (i_) + (j_)*ldw) #define dA(i_, j_) (dA + (i_) + (j_)*ldda) #define dW(i_, j_) (dW + (i_) + (j_)*lddw) const double c_neg_one = MAGMA_D_NEG_ONE; const double c_one = MAGMA_D_ONE; const double c_zero = MAGMA_D_ZERO; const magma_int_t ione = 1; double alpha, value; magma_int_t i, i_n, i_1, iw; /* Check arguments */ magma_int_t info = 0; if ( uplo != MagmaLower && uplo != MagmaUpper ) { info = -1; } else if ( n < 0 ) { info = -2; } else if ( nb < 1 ) { info = -3; } else if ( lda < max(1,n) ) { info = -5; } else if ( ldw < max(1,n) ) { info = -9; } else if ( ldda < max(1,n) ) { info = -11; } else if ( lddw < max(1,n) ) { info = -13; } else if ( ldwork < ldda*ceildiv(n,64) ) { info = -15; } if (info != 0) { magma_xerbla( __func__, -(info) ); return info; } /* Quick return if possible */ if (n == 0) { return info; } magma_queue_t stream; magma_queue_create( &stream ); double *f; magma_dmalloc_cpu( &f, n ); if ( f == NULL ) { info = MAGMA_ERR_HOST_ALLOC; return info; } if (uplo == MagmaUpper) { /* Reduce last NB columns of upper triangle */ for (i = n-1; i >= n - nb; --i) { i_1 = i + 1; i_n = n - i - 1; iw = i - n + nb; if (i < n-1) { /* Update A(1:i,i) */ #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i_n, W(i, iw+1), &ldw ); #endif blasf77_dgemv( "No transpose", &i_1, &i_n, &c_neg_one, A(0, i+1), &lda, W(i, iw+1), &ldw, &c_one, A(0, i), &ione ); #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i_n, W(i, iw+1), &ldw ); lapackf77_dlacgv( &i_n, A(i, i+1), &lda ); #endif blasf77_dgemv( "No transpose", &i_1, &i_n, &c_neg_one, W(0, iw+1), &ldw, A(i, i+1), &lda, &c_one, A(0, i), &ione ); #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i_n, A(i, i+1), &lda ); #endif } if (i > 0) { /* Generate elementary reflector H(i) to annihilate A(1:i-2,i) */ alpha = *A(i-1, i); lapackf77_dlarfg( &i, &alpha, A(0, i), &ione, &tau[i - 1] ); e[i-1] = MAGMA_D_REAL( alpha ); *A(i-1,i) = MAGMA_D_ONE; /* Compute W(1:i-1,i) */ // 1. Send the block reflector A(0:n-i-1,i) to the GPU magma_dsetvector_async( i, A(0, i), 1, dA(0, i), 1, stream ); magmablas_dsymv_work( MagmaUpper, i, c_one, dA(0, 0), ldda, dA(0, i), ione, c_zero, dW(0, iw), ione, dwork, ldwork, stream ); // 2. Start getting the result back (asynchronously) magma_dgetmatrix_async( i, 1, dW(0, iw), lddw, W(0, iw), ldw, stream ); if (i < n-1) { blasf77_dgemv( MagmaConjTransStr, &i, &i_n, &c_one, W(0, iw+1), &ldw, A(0, i), &ione, &c_zero, W(i+1, iw), &ione ); } // 3. Here we need dsymv result W(0, iw) magma_queue_sync( stream ); if (i < n-1) { blasf77_dgemv( "No transpose", &i, &i_n, &c_neg_one, A(0, i+1), &lda, W(i+1, iw), &ione, &c_one, W(0, iw), &ione ); blasf77_dgemv( MagmaConjTransStr, &i, &i_n, &c_one, A(0, i+1), &lda, A(0, i), &ione, &c_zero, W(i+1, iw), &ione ); blasf77_dgemv( "No transpose", &i, &i_n, &c_neg_one, W(0, iw+1), &ldw, W(i+1, iw), &ione, &c_one, W(0, iw), &ione ); } blasf77_dscal( &i, &tau[i - 1], W(0, iw), &ione ); value = magma_cblas_ddot( i, W(0,iw), ione, A(0,i), ione ); alpha = tau[i - 1] * -0.5f * value; blasf77_daxpy( &i, &alpha, A(0, i), &ione, W(0, iw), &ione ); } } } else { /* Reduce first NB columns of lower triangle */ for (i = 0; i < nb; ++i) { /* Update A(i:n,i) */ i_n = n - i; #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i, W(i, 0), &ldw ); #endif blasf77_dgemv( "No transpose", &i_n, &i, &c_neg_one, A(i, 0), &lda, W(i, 0), &ldw, &c_one, A(i, i), &ione ); #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i, W(i, 0), &ldw ); lapackf77_dlacgv( &i, A(i, 0), &lda ); #endif blasf77_dgemv( "No transpose", &i_n, &i, &c_neg_one, W(i, 0), &ldw, A(i, 0), &lda, &c_one, A(i, i), &ione ); #if defined(PRECISION_z) || defined(PRECISION_c) lapackf77_dlacgv( &i, A(i, 0), &lda ); #endif if (i < n-1) { /* Generate elementary reflector H(i) to annihilate A(i+2:n,i) */ i_n = n - i - 1; alpha = *A(i+1, i); lapackf77_dlarfg( &i_n, &alpha, A(min(i+2,n-1), i), &ione, &tau[i] ); e[i] = MAGMA_D_REAL( alpha ); *A(i+1,i) = MAGMA_D_ONE; /* Compute W(i+1:n,i) */ // 1. Send the block reflector A(i+1:n,i) to the GPU magma_dsetvector_async( i_n, A(i+1, i), 1, dA(i+1, i), 1, stream ); magmablas_dsymv_work( MagmaLower, i_n, c_one, dA(i+1, i+1), ldda, dA(i+1, i), ione, c_zero, dW(i+1, i), ione, dwork, ldwork, stream ); // 2. Start getting the result back (asynchronously) magma_dgetmatrix_async( i_n, 1, dW(i+1, i), lddw, W(i+1, i), ldw, stream ); blasf77_dgemv( MagmaConjTransStr, &i_n, &i, &c_one, W(i+1, 0), &ldw, A(i+1, i), &ione, &c_zero, W(0, i), &ione ); blasf77_dgemv( "No transpose", &i_n, &i, &c_neg_one, A(i+1, 0), &lda, W(0, i), &ione, &c_zero, f, &ione ); blasf77_dgemv( MagmaConjTransStr, &i_n, &i, &c_one, A(i+1, 0), &lda, A(i+1, i), &ione, &c_zero, W(0, i), &ione ); // 3. Here we need dsymv result W(i+1, i) magma_queue_sync( stream ); if (i != 0) blasf77_daxpy( &i_n, &c_one, f, &ione, W(i+1, i), &ione ); blasf77_dgemv( "No transpose", &i_n, &i, &c_neg_one, W(i+1, 0), &ldw, W(0, i), &ione, &c_one, W(i+1, i), &ione ); blasf77_dscal( &i_n, &tau[i], W(i+1,i), &ione ); value = magma_cblas_ddot( i_n, W(i+1,i), ione, A(i+1,i), ione ); alpha = tau[i] * -0.5f * value; blasf77_daxpy( &i_n, &alpha, A(i+1, i), &ione, W(i+1,i), &ione ); } } } magma_free_cpu( f ); magma_queue_destroy( stream ); return info; } /* magma_dlatrd */
int main(int argc, char **argv) { FILE *f; char *src, *src_name; char *dest, S1, S2, S3; int len; j2k_image_t img; j2k_cp_t cp; int w, wr, wrr, h, hr, hrr, max; int i, image_type = -1, compno, pad, j; int adjust; jp2_struct_t *jp2_struct; if (argc < 3) { fprintf(stderr, "usage: %s j2k-file image-file [-reduce n]\n", argv[0]); return 1; } f = fopen(argv[1], "rb"); if (!f) { fprintf(stderr, "failed to open %s for reading\n", argv[1]); return 1; } dest = argv[2]; cp.reduce_on = 0; cp.reduce_value = 0; /* OPTION REDUCE IS ACTIVE */ if (argc == 5) { if (strcmp(argv[3], "-reduce")) { fprintf(stderr, "usage: options " "-reduce n" " where n is the factor of reduction [%s]\n", argv[3]); return 1; } cp.reduce_on = 1; sscanf(argv[4], "%d", &cp.reduce_value); } while (*dest) { dest++; } dest--; S3 = *dest; dest--; S2 = *dest; dest--; S1 = *dest; if ((S1 == 'p' && S2 == 'g' && S3 == 'x') || (S1 == 'P' && S2 == 'G' && S3 == 'X')) { image_type = 0; dest--; *dest = '\0'; } if ((S1 == 'p' && S2 == 'n' && S3 == 'm') || (S1 == 'P' && S2 == 'N' && S3 == 'M') || (S1 == 'p' && S2 == 'g' && S3 == 'm') || (S1 == 'P' && S2 == 'G' && S3 == 'M') || (S1 == 'P' && S2 == 'P' && S3 == 'M') || (S1 == 'p' && S2 == 'p' && S3 == 'm')) { image_type = 1; } if ((S1 == 'b' && S2 == 'm' && S3 == 'p') || (S1 == 'B' && S2 == 'M' && S3 == 'P')) { image_type = 2; } if (image_type == -1) { fprintf(stderr, "!! Unrecognized format for infile : %c%c%c [accept only *.pnm, *.pgm, *.ppm, *.pgx or *.bmp] !!\n\n", S1, S2, S3); return 1; } fseek(f, 0, SEEK_END); len = ftell(f); fseek(f, 0, SEEK_SET); src = (char *) malloc(len); fread(src, 1, len, f); fclose(f); src_name = argv[1]; while (*src_name) { src_name++; } src_name--; S3 = *src_name; src_name--; S2 = *src_name; src_name--; S1 = *src_name; /* J2K format */ if ((S1 == 'j' && S2 == '2' && S3 == 'k') || (S1 == 'J' && S2 == '2' && S3 == 'K') || (S1 == 'j' && S2 == '2' && S3 == 'c') || (S1 == 'J' && S2 == '2' && S3 == 'C')) { if (!j2k_decode(src, len, &img, &cp)) { fprintf(stderr, "j2k_to_image: failed to decode image!\n"); return 1; } } /* JP2 format */ else if ((S1 == 'j' && S2 == 'p' && S3 == '2') || (S1 == 'J' && S2 == 'P' && S3 == '2')) { jp2_struct = (jp2_struct_t *) malloc(sizeof(jp2_struct_t)); jp2_struct->image = &img; if (jp2_decode(src, len, jp2_struct, &cp)) { fprintf(stderr, "j2k_to_image: failed to decode image!\n"); return 1; } /* Insert code here if you want to create actions on jp2_struct before deleting it */ free(jp2_struct); } /* JPT format */ else if ((S1 == 'j' && S2 == 'p' && S3 == 't') || (S1 == 'J' && S2 == 'P' && S3 == 'T')) { if (!j2k_decode_jpt_stream(src, len, &img, &cp)) { fprintf(stderr, "j2k_to_image: failed to decode image!\n"); return 1; } } /* otherwise : error */ else { fprintf(stderr, "j2k_to_image : Unknown format image *.%c%c%c [only *.j2k, *.jp2, *.jpc or *.jpt]!! \n", S1, S2, S3); return 1; } free(src); /* ------------------ CREATE OUT IMAGE WITH THE RIGHT FORMAT ----------------------- */ /* ---------------------------- / */ /* / / */ /* / FORMAT : PNM, PGM or PPM / */ /* / / */ /* ---------------------------- / */ switch (image_type) { case 1: /* PNM PGM PPM */ if (img.numcomps == 3 && img.comps[0].dx == img.comps[1].dx && img.comps[1].dx == img.comps[2].dx && img.comps[0].dy == img.comps[1].dy && img.comps[1].dy == img.comps[2].dy && img.comps[0].prec == img.comps[1].prec && img.comps[1].prec == img.comps[2].prec) { f = fopen(argv[2], "wb"); w = ceildiv(img.x1 - img.x0, img.comps[0].dx); // wr = ceildiv(int_ceildivpow2(img.x1 - img.x0,img.factor),img.comps[0].dx); wr = img.comps[0].w; wrr = int_ceildivpow2(img.comps[0].w, img.comps[0].factor); h = ceildiv(img.y1 - img.y0, img.comps[0].dy); // hr = ceildiv(int_ceildivpow2(img.y1 - img.y0,img.factor), img.comps[0].dy); hr = img.comps[0].h; hrr = int_ceildivpow2(img.comps[0].h, img.comps[0].factor); max = img.comps[0].prec > 8 ? 255 : (1 << img.comps[0].prec) - 1; img.comps[0].x0 = int_ceildivpow2(img.comps[0].x0 - int_ceildiv(img.x0, img.comps[0].dx), img.comps[0].factor); img.comps[0].y0 = int_ceildivpow2(img.comps[0].y0 - int_ceildiv(img.y0, img.comps[0].dy), img.comps[0].factor); fprintf(f, "P6\n# %d %d %d %d %d\n%d %d\n%d\n", cp.tcps[cp.tileno[0]].tccps[0].numresolutions, w, h, img.comps[0].x0, img.comps[0].y0, wrr, hrr, max); adjust = img.comps[0].prec > 8 ? img.comps[0].prec - 8 : 0; for (i = 0; i < wrr * hrr; i++) { char r, g, b; r = img.comps[0].data[i / wrr * wr + i % wrr]; r += (img.comps[0].sgnd ? 1 << (img.comps[0].prec - 1) : 0); r = r >> adjust; g = img.comps[1].data[i / wrr * wr + i % wrr]; g += (img.comps[1].sgnd ? 1 << (img.comps[1].prec - 1) : 0); g = g >> adjust; b = img.comps[2].data[i / wrr * wr + i % wrr]; b += (img.comps[2].sgnd ? 1 << (img.comps[2].prec - 1) : 0); b = b >> adjust; fprintf(f, "%c%c%c", r, g, b); } free(img.comps[0].data); free(img.comps[1].data); free(img.comps[2].data); fclose(f); } else { for (compno = 0; compno < img.numcomps; compno++) {