void smashCmpq(TCA inst, uint32_t imm) { always_assert(is_aligned(inst, Alignment::SmashCmpq)); *reinterpret_cast<uint32_t*>(inst + kSmashCmpqImmOff) = imm; }
bool PSVirtualSpace::is_aligned(size_t value) const { return is_aligned(value, alignment()); }
int ChainingBlockDevice::init() { int err; uint32_t val = core_util_atomic_incr_u32(&_init_ref_count, 1); if (val != 1) { return BD_ERROR_OK; } _read_size = 0; _program_size = 0; _erase_size = 0; _erase_value = -1; _size = 0; // Initialize children block devices, find all sizes and // assert that block sizes are similar. We can't do this in // the constructor since some block devices may need to be // initialized before they know their block size/count for (size_t i = 0; i < _bd_count; i++) { err = _bds[i]->init(); if (err) { goto fail; } bd_size_t read = _bds[i]->get_read_size(); if (i == 0 || (read >= _read_size && is_aligned(read, _read_size))) { _read_size = read; } else { MBED_ASSERT(_read_size > read && is_aligned(_read_size, read)); } bd_size_t program = _bds[i]->get_program_size(); if (i == 0 || (program >= _program_size && is_aligned(program, _program_size))) { _program_size = program; } else { MBED_ASSERT(_program_size > program && is_aligned(_program_size, program)); } bd_size_t erase = _bds[i]->get_erase_size(); if (i == 0 || (erase >= _erase_size && is_aligned(erase, _erase_size))) { _erase_size = erase; } else { MBED_ASSERT(_erase_size > erase && is_aligned(_erase_size, erase)); } int value = _bds[i]->get_erase_value(); if (i == 0 || value == _erase_value) { _erase_value = value; } else { _erase_value = -1; } _size += _bds[i]->size(); } _is_initialized = true; return BD_ERROR_OK; fail: _is_initialized = false; _init_ref_count = 0; return err; }
void vegaReadPixels(void * data, VGint dataStride, VGImageFormat dataFormat, VGint sx, VGint sy, VGint width, VGint height) { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->strb; VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4]; VGfloat *df = (VGfloat*)temp; VGint i; VGubyte *dst = (VGubyte *)data; VGint xoffset = 0, yoffset = 0; if (!supported_image_format(dataFormat)) { vg_set_error(ctx, VG_UNSUPPORTED_IMAGE_FORMAT_ERROR); return; } if (!data || !is_aligned(data)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } if (width <= 0 || height <= 0) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } if (sx < 0) { xoffset = -sx; xoffset *= _vega_size_for_format(dataFormat); width += sx; sx = 0; } if (sy < 0) { yoffset = -sy; yoffset *= dataStride; height += sy; sy = 0; } if (sx + width > stfb->width || sy + height > stfb->height) { width = stfb->width - sx; height = stfb->height - sy; /* nothing to read */ if (width <= 0 || height <= 0) return; } { VGint y = (stfb->height - sy) - 1, yStep = -1; struct pipe_transfer *transfer; void *map; map = pipe_transfer_map(pipe, strb->texture, 0, 0, PIPE_TRANSFER_READ, 0, 0, sx + width, stfb->height - sy, &transfer); /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { #if 0 debug_printf("%d-%d == %d\n", sy, height, y); #endif pipe_get_tile_rgba(transfer, map, sx, y, width, 1, df); y += yStep; _vega_pack_rgba_span_float(ctx, width, temp, dataFormat, dst + yoffset + xoffset); dst += dataStride; } pipe->transfer_unmap(pipe, transfer); } }
int lbfgs( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param ) { int ret; int i, j, k, ls, end, bound; lbfgsfloatval_t step; /* Constant parameters and their default values. */ lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; const int m = param.m; lbfgsfloatval_t *xp = NULL; lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; iteration_data_t *lm = NULL, *it = NULL; lbfgsfloatval_t ys, yy; lbfgsfloatval_t xnorm, gnorm, beta; lbfgsfloatval_t fx = 0.; lbfgsfloatval_t rate = 0.; line_search_proc linesearch = line_search_morethuente; /* Construct a callback data. */ callback_data_t cd; cd.n = n; cd.instance = instance; cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); #endif/*defined(USE_SSE)*/ /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } if (!is_aligned(x, 16)) { return LBFGSERR_INVALID_X_SSE; } #endif/*defined(USE_SSE)*/ if (param.epsilon < 0.) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || n < param.orthantwise_start) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (n < param.orthantwise_end) { return LBFGSERR_INVALID_ORTHANTWISE_END; } if (param.orthantwise_c != 0.) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } if (param.orthantwise_c != 0.) { /* Allocate working space for OW-LQN. */ pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (pg == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate limited memory storage. */ lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); if (lm == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Initialize the limited memory. */ for (i = 0;i < m;++i) { it = &lm[i]; it->alpha = 0; it->ys = 0; it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (it->s == NULL || it->y == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate an array for storing previous values of the objective function. */ if (0 < param.past) { pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t)); } /* Evaluate the function value and its gradient. */ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. != param.orthantwise_c) { /* Compute the L1 norm of the variable and add it to the object value. */ xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } /* Store the initial value of the objective function. */ if (pf != NULL) { pf[0] = fx; } /* Compute the direction; we assume the initial hessian matrix H_0 as the identity matrix. */ if (param.orthantwise_c == 0.) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /* Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /* Compute the initial step: step = 1.0 / sqrt(vecdot(d, d, n)) */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (param.orthantwise_c == 0.) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if (cd.proc_progress) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } /* Test for stopping criterion. The criterion is given by the following formula: (f(past_x) - f(x)) / f(x) < \delta */ if (pf != NULL) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_STOP; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; /* Compute the steepest direction. */ if (param.orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param.orthantwise_c != 0.) { for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { if (d[i] * pg[i] >= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } vecfree(pf); /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
void * allocate(size_t i_size, size_t i_alignment, size_t i_alignment_offset) { if (i_alignment == 0 || ((i_alignment & (i_alignment - 1))) != 0) { throw std::exception(); } size_t total_size = ((i_size + m_page_mask) / m_page_size) * m_page_size; for(;;) { uintptr_t remaining_size = total_size; if (remaining_size < i_size) { total_size += m_page_size; continue; } remaining_size -= i_size; remaining_size += i_alignment_offset; remaining_size &= ~static_cast<uintptr_t>(i_alignment - 1); if (remaining_size < i_alignment_offset) { total_size += m_page_size; continue; } remaining_size -= i_alignment_offset; if (remaining_size < sizeof(AllocationHeader)) { total_size += m_page_size; continue; } break; } void * pages = VirtualAlloc(NULL, total_size + m_page_size, MEM_RESERVE, PAGE_NOACCESS ); if (pages == nullptr) { throw std::bad_alloc(); } pages = VirtualAlloc(pages, total_size, MEM_COMMIT, PAGE_READWRITE); if (pages == nullptr) { throw std::bad_alloc(); } uintptr_t address = reinterpret_cast<uintptr_t>(pages) + total_size; address -= i_size; address += i_alignment_offset; address &= ~static_cast<uintptr_t>(i_alignment - 1); address -= i_alignment_offset; void * block = reinterpret_cast<void*>(address); assert(is_aligned(block, i_alignment, i_alignment_offset)); auto header = static_cast<AllocationHeader*>(pages); assert(header == get_header(block)); header->m_block = block; header->m_size = i_size; header->m_alignment = i_alignment; header->m_alignment_offset = i_alignment_offset; header->m_whole_size = total_size + m_page_size; header->m_progressive = s_next_proressive++; assert(!IsBadWritePtr(block, i_size)); return block; }
void vegaReadPixels(void * data, VGint dataStride, VGImageFormat dataFormat, VGint sx, VGint sy, VGint width, VGint height) { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->strb; struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4]; VGfloat *df = (VGfloat*)temp; VGint y = (fb->height - sy) - 1, yStep = -1; VGint i; VGubyte *dst = (VGubyte *)data; VGint xoffset = 0, yoffset = 0; if (!supported_image_format(dataFormat)) { vg_set_error(ctx, VG_UNSUPPORTED_IMAGE_FORMAT_ERROR); return; } if (!data || !is_aligned(data)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } if (width <= 0 || height <= 0) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } /* make sure rendering has completed */ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); if (sx < 0) { xoffset = -sx; xoffset *= _vega_size_for_format(dataFormat); width += sx; sx = 0; } if (sy < 0) { yoffset = -sy; height += sy; sy = 0; y = (fb->height - sy) - 1; yoffset *= dataStride; } { struct pipe_transfer *transfer; transfer = pipe_get_transfer(pipe, strb->texture, 0, 0, 0, PIPE_TRANSFER_READ, 0, 0, width, height); /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { #if 0 debug_printf("%d-%d == %d\n", sy, height, y); #endif pipe_get_tile_rgba(pipe, transfer, sx, y, width, 1, df); y += yStep; _vega_pack_rgba_span_float(ctx, width, temp, dataFormat, dst + yoffset + xoffset); dst += dataStride; } pipe->transfer_destroy(pipe, transfer); } }
bool PSVirtualSpace::is_aligned(char* value) const { return is_aligned((size_t)value); }
void sha512_final(void *_output, sha512_ctx *ctx) { ARCH_WORD_32 last, padcnt; ARCH_WORD_64 bits; union { ARCH_WORD_64 wlen[2]; unsigned char mlen[16]; // need aligned on sparc } m; unsigned char *output = (unsigned char *)_output; bits = (ctx->total << 3); m.wlen[0] = 0; OUTBE64(bits, m.mlen, 8); last = ctx->total & 0x7F; padcnt = (last < 112) ? (112 - last) : (240 - last); sha512_update(ctx, (unsigned char *) padding, padcnt); sha512_update(ctx, m.mlen, 16); if (!output) return; // the SHA2_GENERIC_DO_NOT_BUILD_ALIGNED == 1 is to force build on // required aligned systems without doing the alignment checking. // it IS faster (about 2.5%), and once the data is properly aligned // in the formats, the alignment checking is nore needed any more. #if ARCH_ALLOWS_UNALIGNED == 1 || SHA2_GENERIC_DO_NOT_BUILD_ALIGNED == 1 OUTBE64(ctx->h[0], output, 0); OUTBE64(ctx->h[1], output, 8); OUTBE64(ctx->h[2], output, 16); OUTBE64(ctx->h[3], output, 24); OUTBE64(ctx->h[4], output, 32); OUTBE64(ctx->h[5], output, 40); if(ctx->bIs512) { OUTBE64( ctx->h[6], output, 48 ); OUTBE64( ctx->h[7], output, 56 ); } #else if (is_aligned(output,sizeof(ARCH_WORD_64))) { OUTBE64(ctx->h[0], output, 0); OUTBE64(ctx->h[1], output, 8); OUTBE64(ctx->h[2], output, 16); OUTBE64(ctx->h[3], output, 24); OUTBE64(ctx->h[4], output, 32); OUTBE64(ctx->h[5], output, 40); if(ctx->bIs512) { OUTBE64( ctx->h[6], output, 48 ); OUTBE64( ctx->h[7], output, 56 ); } } else { union { ARCH_WORD_64 x[8]; unsigned char c[64]; } m; unsigned char *tmp = m.c; OUTBE64(ctx->h[0], tmp, 0); OUTBE64(ctx->h[1], tmp, 8); OUTBE64(ctx->h[2], tmp, 16); OUTBE64(ctx->h[3], tmp, 24); OUTBE64(ctx->h[4], tmp, 32); OUTBE64(ctx->h[5], tmp, 40); if(ctx->bIs512) { OUTBE64(ctx->h[6], tmp, 48); OUTBE64(ctx->h[7], tmp, 56); memcpy(output, tmp, 64); } else memcpy(output, tmp, 48); } #endif }
char* reckless::detail::thread_input_buffer::allocate_input_frame(std::size_t size) { // Conceptually, we have the invariant that // pinput_start_ <= pinput_end_, // and the memory area after pinput_end is free for us to use for // allocating a frame. However, the fact that it's a circular buffer means // that: // // * The area after pinput_end is actually non-contiguous, wraps around // at the end of the buffer and ends at pinput_start. // // * Except, when pinput_end itself has fallen over the right edge and we // have the case pinput_end <= pinput_start. Then the *used* memory is // non-contiguous, and the free memory is contiguous (it still starts at // pinput_end and ends at pinput_start modulo circular buffer size). // // (This is easier to understand by drawing it on a paper than by reading // the comment text). auto mask = frame_alignment_mask(); size = (size + mask) & ~mask; // We can't write a frame that is larger than the entire capacity of the // input buffer. If you hit this assert then you either need to write a // smaller log entry, or you need to make the input buffer larger. assert(size < size_); while(true) { auto pinput_end = pinput_end_; // FIXME these asserts should / can be enabled again? assert(static_cast<std::size_t>(pinput_end - buffer_start()) < size_); assert(is_aligned(pinput_end)); // Even if we get an "old" value for pinput_start_ here, that's OK // because other threads will never cause the amount of available // buffer space to shrink. So either there is enough buffer space and // we're done, or there isn't and we'll wait for an input-consumption // event which creates a full memory barrier and hence gives us an // updated value for pinput_start_. So memory_order_relaxed should be // fine here. auto pinput_start = pinput_start_.load(std::memory_order_relaxed); std::ptrdiff_t free = pinput_start - pinput_end; if(free > 0) { // Free space is contiguous. // Technically, there is enough room if size == free. But the // problem with using the free space in this situation is that when // we increase pinput_end_ by size, we end up with pinput_start_ == // pinput_end_. Now, given that state, how do we know if the buffer // is completely filled or empty? So, it's easier to just check for // size < free instead of size <= free, and pretend we're out // of space if size == free. Same situation applies in the else // clause below. if(likely(static_cast<std::ptrdiff_t>(size) < free)) { pinput_end_ = advance_frame_pointer(pinput_end, size); return pinput_end; } else { // Not enough room. Wait for the output thread to consume some // input. wait_input_consumed(); } } else { // Free space is non-contiguous. // TODO should we use an end pointer instead of a size_? std::size_t free1 = size_ - (pinput_end - buffer_start()); if(likely(size < free1)) { // There's enough room in the first segment. pinput_end_ = advance_frame_pointer(pinput_end, size); return pinput_end; } else { std::size_t free2 = pinput_start - buffer_start(); if(likely(size < free2)) { // We don't have enough room for a continuous input frame // in the first segment (at the end of the circular // buffer), but there is enough room in the second segment // (at the beginning of the buffer). To instruct the output // thread to skip ahead to the second segment, we need to // put a marker value at the current position. We're // supposed to be guaranteed enough room for the wraparound // marker because frame alignment is at least the size of // the marker. *reinterpret_cast<formatter_dispatch_function_t**>(pinput_end_) = WRAPAROUND_MARKER; pinput_end_ = advance_frame_pointer(buffer_start(), size); return buffer_start(); } else { // Not enough room. Wait for the output thread to consume // some input. wait_input_consumed(); } } } } }
void vegaLookupSingle(VGImage dst, VGImage src, const VGuint * lookupTable, VGImageChannel sourceChannel, VGboolean outputLinear, VGboolean outputPremultiplied) { struct vg_context *ctx = vg_current_context(); struct vg_image *d, *s; struct pipe_sampler_view *lut_texture_view; VGfloat buffer[4]; struct filter_info info; VGuint color_data[256]; VGint i; if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } if (!lookupTable || !is_aligned(lookupTable)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } if (sourceChannel != VG_RED && sourceChannel != VG_GREEN && sourceChannel != VG_BLUE && sourceChannel != VG_ALPHA) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } d = (struct vg_image*)dst; s = (struct vg_image*)src; if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } vg_validate_state(ctx); for (i = 0; i < 256; ++i) { VGuint rgba = lookupTable[i]; VGubyte blue, green, red, alpha; red = (rgba & 0xff000000)>>24; green = (rgba & 0x00ff0000)>>16; blue = (rgba & 0x0000ff00)>> 8; alpha = (rgba & 0x000000ff)>> 0; color_data[i] = blue << 24 | green << 16 | red << 8 | alpha; } lut_texture_view = create_texture_1d_view(ctx, color_data, 256); buffer[0] = 0.f; buffer[1] = 0.f; buffer[2] = 1.f; buffer[3] = 1.f; info.dst = d; info.src = s; info.setup_shader = &setup_lookup_single; info.user_data = (void*)sourceChannel; info.const_buffer = buffer; info.const_buffer_len = 4 * sizeof(VGfloat); info.tiling_mode = VG_TILE_PAD; info.extra_texture_view = lut_texture_view; execute_filter(ctx, &info); pipe_sampler_view_reference(&lut_texture_view, NULL); }