Ejemplo n.º 1
0
void smashCmpq(TCA inst, uint32_t imm) {
    always_assert(is_aligned(inst, Alignment::SmashCmpq));
    *reinterpret_cast<uint32_t*>(inst + kSmashCmpqImmOff) = imm;
}
bool PSVirtualSpace::is_aligned(size_t value) const {
    return is_aligned(value, alignment());
}
Ejemplo n.º 3
0
int ChainingBlockDevice::init()
{
    int err;
    uint32_t val = core_util_atomic_incr_u32(&_init_ref_count, 1);

    if (val != 1) {
        return BD_ERROR_OK;
    }

    _read_size = 0;
    _program_size = 0;
    _erase_size = 0;
    _erase_value = -1;
    _size = 0;

    // Initialize children block devices, find all sizes and
    // assert that block sizes are similar. We can't do this in
    // the constructor since some block devices may need to be
    // initialized before they know their block size/count
    for (size_t i = 0; i < _bd_count; i++) {
        err = _bds[i]->init();
        if (err) {
            goto fail;
        }

        bd_size_t read = _bds[i]->get_read_size();
        if (i == 0 || (read >= _read_size && is_aligned(read, _read_size))) {
            _read_size = read;
        } else {
            MBED_ASSERT(_read_size > read && is_aligned(_read_size, read));
        }

        bd_size_t program = _bds[i]->get_program_size();
        if (i == 0 || (program >= _program_size && is_aligned(program, _program_size))) {
            _program_size = program;
        } else {
            MBED_ASSERT(_program_size > program && is_aligned(_program_size, program));
        }

        bd_size_t erase = _bds[i]->get_erase_size();
        if (i == 0 || (erase >= _erase_size && is_aligned(erase, _erase_size))) {
            _erase_size = erase;
        } else {
            MBED_ASSERT(_erase_size > erase && is_aligned(_erase_size, erase));
        }

        int value = _bds[i]->get_erase_value();
        if (i == 0 || value == _erase_value) {
            _erase_value = value;
        } else {
            _erase_value = -1;
        }

        _size += _bds[i]->size();
    }

    _is_initialized = true;
    return BD_ERROR_OK;

fail:
    _is_initialized = false;
    _init_ref_count = 0;
    return err;
}
Ejemplo n.º 4
0
void vegaReadPixels(void * data, VGint dataStride,
                    VGImageFormat dataFormat,
                    VGint sx, VGint sy,
                    VGint width, VGint height)
{
   struct vg_context *ctx = vg_current_context();
   struct pipe_context *pipe = ctx->pipe;

   struct st_framebuffer *stfb = ctx->draw_buffer;
   struct st_renderbuffer *strb = stfb->strb;

   VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4];
   VGfloat *df = (VGfloat*)temp;
   VGint i;
   VGubyte *dst = (VGubyte *)data;
   VGint xoffset = 0, yoffset = 0;

   if (!supported_image_format(dataFormat)) {
      vg_set_error(ctx, VG_UNSUPPORTED_IMAGE_FORMAT_ERROR);
      return;
   }
   if (!data || !is_aligned(data)) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }
   if (width <= 0 || height <= 0) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }

   if (sx < 0) {
      xoffset = -sx;
      xoffset *= _vega_size_for_format(dataFormat);
      width += sx;
      sx = 0;
   }
   if (sy < 0) {
      yoffset = -sy;
      yoffset *= dataStride;
      height += sy;
      sy = 0;
   }

   if (sx + width > stfb->width || sy + height > stfb->height) {
      width = stfb->width - sx;
      height = stfb->height - sy;
      /* nothing to read */
      if (width <= 0 || height <= 0)
         return;
   }

   {
      VGint y = (stfb->height - sy) - 1, yStep = -1;
      struct pipe_transfer *transfer;
      void *map;

      map = pipe_transfer_map(pipe, strb->texture,  0, 0,
                              PIPE_TRANSFER_READ,
                              0, 0, sx + width, stfb->height - sy,
                              &transfer);

      /* Do a row at a time to flip image data vertically */
      for (i = 0; i < height; i++) {
#if 0
         debug_printf("%d-%d  == %d\n", sy, height, y);
#endif
         pipe_get_tile_rgba(transfer, map, sx, y, width, 1, df);
         y += yStep;
         _vega_pack_rgba_span_float(ctx, width, temp, dataFormat,
                                    dst + yoffset + xoffset);
         dst += dataStride;
      }

      pipe->transfer_unmap(pipe, transfer);
   }
}
Ejemplo n.º 5
0
int lbfgs(
    int n,
    lbfgsfloatval_t *x,
    lbfgsfloatval_t *ptr_fx,
    lbfgs_evaluate_t proc_evaluate,
    lbfgs_progress_t proc_progress,
    void *instance,
    lbfgs_parameter_t *_param
    )
{
    int ret;
    int i, j, k, ls, end, bound;
    lbfgsfloatval_t step;

    /* Constant parameters and their default values. */
    lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
    const int m = param.m;

    lbfgsfloatval_t *xp = NULL;
    lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;
    lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;
    iteration_data_t *lm = NULL, *it = NULL;
    lbfgsfloatval_t ys, yy;
    lbfgsfloatval_t xnorm, gnorm, beta;
    lbfgsfloatval_t fx = 0.;
    lbfgsfloatval_t rate = 0.;
    line_search_proc linesearch = line_search_morethuente;

    /* Construct a callback data. */
    callback_data_t cd;
    cd.n = n;
    cd.instance = instance;
    cd.proc_evaluate = proc_evaluate;
    cd.proc_progress = proc_progress;

#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    /* Round out the number of variables. */
    n = round_out_variables(n);
#endif/*defined(USE_SSE)*/

    /* Check the input parameters for errors. */
    if (n <= 0) {
        return LBFGSERR_INVALID_N;
    }
#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    if (n % 8 != 0) {
        return LBFGSERR_INVALID_N_SSE;
    }
    if (!is_aligned(x, 16)) {
        return LBFGSERR_INVALID_X_SSE;
    }
#endif/*defined(USE_SSE)*/
    if (param.epsilon < 0.) {
        return LBFGSERR_INVALID_EPSILON;
    }
    if (param.past < 0) {
        return LBFGSERR_INVALID_TESTPERIOD;
    }
    if (param.delta < 0.) {
        return LBFGSERR_INVALID_DELTA;
    }
    if (param.min_step < 0.) {
        return LBFGSERR_INVALID_MINSTEP;
    }
    if (param.max_step < param.min_step) {
        return LBFGSERR_INVALID_MAXSTEP;
    }
    if (param.ftol < 0.) {
        return LBFGSERR_INVALID_FTOL;
    }
    if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
        param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
        if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
            return LBFGSERR_INVALID_WOLFE;
        }
    }
    if (param.gtol < 0.) {
        return LBFGSERR_INVALID_GTOL;
    }
    if (param.xtol < 0.) {
        return LBFGSERR_INVALID_XTOL;
    }
    if (param.max_linesearch <= 0) {
        return LBFGSERR_INVALID_MAXLINESEARCH;
    }
    if (param.orthantwise_c < 0.) {
        return LBFGSERR_INVALID_ORTHANTWISE;
    }
    if (param.orthantwise_start < 0 || n < param.orthantwise_start) {
        return LBFGSERR_INVALID_ORTHANTWISE_START;
    }
    if (param.orthantwise_end < 0) {
        param.orthantwise_end = n;
    }
    if (n < param.orthantwise_end) {
        return LBFGSERR_INVALID_ORTHANTWISE_END;
    }
    if (param.orthantwise_c != 0.) {
        switch (param.linesearch) {
        case LBFGS_LINESEARCH_BACKTRACKING:
            linesearch = line_search_backtracking_owlqn;
            break;
        default:
            /* Only the backtracking method is available. */
            return LBFGSERR_INVALID_LINESEARCH;
        }
    } else {
        switch (param.linesearch) {
        case LBFGS_LINESEARCH_MORETHUENTE:
            linesearch = line_search_morethuente;
            break;
        case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
        case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
        case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
            linesearch = line_search_backtracking;
            break;
        default:
            return LBFGSERR_INVALID_LINESEARCH;
        }
    }

    /* Allocate working space. */
    xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    if (param.orthantwise_c != 0.) {
        /* Allocate working space for OW-LQN. */
        pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
        if (pg == NULL) {
            ret = LBFGSERR_OUTOFMEMORY;
            goto lbfgs_exit;
        }
    }

    /* Allocate limited memory storage. */
    lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));
    if (lm == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    /* Initialize the limited memory. */
    for (i = 0;i < m;++i) {
        it = &lm[i];
        it->alpha = 0;
        it->ys = 0;
        it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
        it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
        if (it->s == NULL || it->y == NULL) {
            ret = LBFGSERR_OUTOFMEMORY;
            goto lbfgs_exit;
        }
    }

    /* Allocate an array for storing previous values of the objective function. */
    if (0 < param.past) {
        pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t));
    }

    /* Evaluate the function value and its gradient. */
    fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
    if (0. != param.orthantwise_c) {
        /* Compute the L1 norm of the variable and add it to the object value. */
        xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
        fx += xnorm * param.orthantwise_c;
        owlqn_pseudo_gradient(
            pg, x, g, n,
            param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
            );
    }

    /* Store the initial value of the objective function. */
    if (pf != NULL) {
        pf[0] = fx;
    }

    /*
        Compute the direction;
        we assume the initial hessian matrix H_0 as the identity matrix.
     */
    if (param.orthantwise_c == 0.) {
        vecncpy(d, g, n);
    } else {
        vecncpy(d, pg, n);
    }

    /*
       Make sure that the initial variables are not a minimizer.
     */
    vec2norm(&xnorm, x, n);
    if (param.orthantwise_c == 0.) {
        vec2norm(&gnorm, g, n);
    } else {
        vec2norm(&gnorm, pg, n);
    }
    if (xnorm < 1.0) xnorm = 1.0;
    if (gnorm / xnorm <= param.epsilon) {
        ret = LBFGS_ALREADY_MINIMIZED;
        goto lbfgs_exit;
    }

    /* Compute the initial step:
        step = 1.0 / sqrt(vecdot(d, d, n))
     */
    vec2norminv(&step, d, n);

    k = 1;
    end = 0;
    for (;;) {
        /* Store the current position and gradient vectors. */
        veccpy(xp, x, n);
        veccpy(gp, g, n);

        /* Search for an optimal step. */
        if (param.orthantwise_c == 0.) {
            ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
        } else {
            ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
            owlqn_pseudo_gradient(
                pg, x, g, n,
                param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
                );
        }
        if (ls < 0) {
            /* Revert to the previous point. */
            veccpy(x, xp, n);
            veccpy(g, gp, n);
            ret = ls;
            goto lbfgs_exit;
        }

        /* Compute x and g norms. */
        vec2norm(&xnorm, x, n);
        if (param.orthantwise_c == 0.) {
            vec2norm(&gnorm, g, n);
        } else {
            vec2norm(&gnorm, pg, n);
        }

        /* Report the progress. */
        if (cd.proc_progress) {
            if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
                goto lbfgs_exit;
            }
        }

        /*
            Convergence test.
            The criterion is given by the following formula:
                |g(x)| / \max(1, |x|) < \epsilon
         */
        if (xnorm < 1.0) xnorm = 1.0;
        if (gnorm / xnorm <= param.epsilon) {
            /* Convergence. */
            ret = LBFGS_SUCCESS;
            break;
        }

        /*
            Test for stopping criterion.
            The criterion is given by the following formula:
                (f(past_x) - f(x)) / f(x) < \delta
         */
        if (pf != NULL) {
            /* We don't test the stopping criterion while k < past. */
            if (param.past <= k) {
                /* Compute the relative improvement from the past. */
                rate = (pf[k % param.past] - fx) / fx;

                /* The stopping criterion. */
                if (rate < param.delta) {
                    ret = LBFGS_STOP;
                    break;
                }
            }

            /* Store the current value of the objective function. */
            pf[k % param.past] = fx;
        }

        if (param.max_iterations != 0 && param.max_iterations < k+1) {
            /* Maximum number of iterations. */
            ret = LBFGSERR_MAXIMUMITERATION;
            break;
        }

        /*
            Update vectors s and y:
                s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
                y_{k+1} = g_{k+1} - g_{k}.
         */
        it = &lm[end];
        vecdiff(it->s, x, xp, n);
        vecdiff(it->y, g, gp, n);

        /*
            Compute scalars ys and yy:
                ys = y^t \cdot s = 1 / \rho.
                yy = y^t \cdot y.
            Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
         */
        vecdot(&ys, it->y, it->s, n);
        vecdot(&yy, it->y, it->y, n);
        it->ys = ys;

        /*
            Recursive formula to compute dir = -(H \cdot g).
                This is described in page 779 of:
                Jorge Nocedal.
                Updating Quasi-Newton Matrices with Limited Storage.
                Mathematics of Computation, Vol. 35, No. 151,
                pp. 773--782, 1980.
         */
        bound = (m <= k) ? m : k;
        ++k;
        end = (end + 1) % m;

        /* Compute the steepest direction. */
        if (param.orthantwise_c == 0.) {
            /* Compute the negative of gradients. */
            vecncpy(d, g, n);
        } else {
            vecncpy(d, pg, n);
        }

        j = end;
        for (i = 0;i < bound;++i) {
            j = (j + m - 1) % m;    /* if (--j == -1) j = m-1; */
            it = &lm[j];
            /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
            vecdot(&it->alpha, it->s, d, n);
            it->alpha /= it->ys;
            /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
            vecadd(d, it->y, -it->alpha, n);
        }

        vecscale(d, ys / yy, n);

        for (i = 0;i < bound;++i) {
            it = &lm[j];
            /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
            vecdot(&beta, it->y, d, n);
            beta /= it->ys;
            /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
            vecadd(d, it->s, it->alpha - beta, n);
            j = (j + 1) % m;        /* if (++j == m) j = 0; */
        }

        /*
            Constrain the search direction for orthant-wise updates.
         */
        if (param.orthantwise_c != 0.) {
            for (i = param.orthantwise_start;i < param.orthantwise_end;++i) {
                if (d[i] * pg[i] >= 0) {
                    d[i] = 0;
                }
            }
        }

        /*
            Now the search direction d is ready. We try step = 1 first.
         */
        step = 1.0;
    }

lbfgs_exit:
    /* Return the final value of the objective function. */
    if (ptr_fx != NULL) {
        *ptr_fx = fx;
    }

    vecfree(pf);

    /* Free memory blocks used by this function. */
    if (lm != NULL) {
        for (i = 0;i < m;++i) {
            vecfree(lm[i].s);
            vecfree(lm[i].y);
        }
        vecfree(lm);
    }
    vecfree(pg);
    vecfree(w);
    vecfree(d);
    vecfree(gp);
    vecfree(g);
    vecfree(xp);

    return ret;
}
Ejemplo n.º 6
0
		void * allocate(size_t i_size, size_t i_alignment, size_t i_alignment_offset)
		{
			if (i_alignment == 0 || ((i_alignment & (i_alignment - 1))) != 0)
			{
				throw std::exception();
			}

			size_t total_size = ((i_size + m_page_mask) / m_page_size) * m_page_size;

			for(;;)
			{
				uintptr_t remaining_size = total_size;
				if (remaining_size < i_size)
				{
					total_size += m_page_size;
					continue;
				}
				remaining_size -= i_size;

				remaining_size += i_alignment_offset;
				remaining_size &= ~static_cast<uintptr_t>(i_alignment - 1);

				if (remaining_size < i_alignment_offset)
				{
					total_size += m_page_size;
					continue;
				}

				remaining_size -= i_alignment_offset;

				if (remaining_size < sizeof(AllocationHeader))
				{
					total_size += m_page_size;
					continue;
				}

				break;
			}

			void * pages = VirtualAlloc(NULL, total_size + m_page_size, MEM_RESERVE, PAGE_NOACCESS );
			if (pages == nullptr)
			{
				throw std::bad_alloc();
			}

			pages = VirtualAlloc(pages, total_size, MEM_COMMIT, PAGE_READWRITE);
			if (pages == nullptr)
			{
				throw std::bad_alloc();
			}

			uintptr_t address = reinterpret_cast<uintptr_t>(pages) + total_size;
			address -= i_size;
			address += i_alignment_offset;
			address &= ~static_cast<uintptr_t>(i_alignment - 1);
			address -= i_alignment_offset;
			
			void * block = reinterpret_cast<void*>(address);
			assert(is_aligned(block, i_alignment, i_alignment_offset));

			auto header = static_cast<AllocationHeader*>(pages);
			assert(header == get_header(block));
			header->m_block = block;
			header->m_size = i_size;
			header->m_alignment = i_alignment;
			header->m_alignment_offset = i_alignment_offset;
			header->m_whole_size = total_size + m_page_size;
			header->m_progressive = s_next_proressive++;
			
			assert(!IsBadWritePtr(block, i_size));

			return block;
		}
Ejemplo n.º 7
0
void vegaReadPixels(void * data, VGint dataStride,
                    VGImageFormat dataFormat,
                    VGint sx, VGint sy,
                    VGint width, VGint height)
{
   struct vg_context *ctx = vg_current_context();
   struct pipe_context *pipe = ctx->pipe;

   struct st_framebuffer *stfb = ctx->draw_buffer;
   struct st_renderbuffer *strb = stfb->strb;
   struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb;

   VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4];
   VGfloat *df = (VGfloat*)temp;
   VGint y = (fb->height - sy) - 1, yStep = -1;
   VGint i;
   VGubyte *dst = (VGubyte *)data;
   VGint xoffset = 0, yoffset = 0;

   if (!supported_image_format(dataFormat)) {
      vg_set_error(ctx, VG_UNSUPPORTED_IMAGE_FORMAT_ERROR);
      return;
   }
   if (!data || !is_aligned(data)) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }
   if (width <= 0 || height <= 0) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }

   /* make sure rendering has completed */
   pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
   if (sx < 0) {
      xoffset = -sx;
      xoffset *= _vega_size_for_format(dataFormat);
      width += sx;
      sx = 0;
   }
   if (sy < 0) {
      yoffset = -sy;
      height += sy;
      sy = 0;
      y = (fb->height - sy) - 1;
      yoffset *= dataStride;
   }

   {
      struct pipe_transfer *transfer;

      transfer = pipe_get_transfer(pipe, strb->texture,  0, 0, 0,
				   PIPE_TRANSFER_READ,
				   0, 0, width, height);

      /* Do a row at a time to flip image data vertically */
      for (i = 0; i < height; i++) {
#if 0
         debug_printf("%d-%d  == %d\n", sy, height, y);
#endif
         pipe_get_tile_rgba(pipe, transfer, sx, y, width, 1, df);
         y += yStep;
         _vega_pack_rgba_span_float(ctx, width, temp, dataFormat,
                                    dst + yoffset + xoffset);
         dst += dataStride;
      }

      pipe->transfer_destroy(pipe, transfer);
   }
}
Ejemplo n.º 8
0
bool PSVirtualSpace::is_aligned(char* value) const {
  return is_aligned((size_t)value);
}
Ejemplo n.º 9
0
void sha512_final(void *_output, sha512_ctx *ctx)
{
	ARCH_WORD_32 last, padcnt;
	ARCH_WORD_64 bits;
	union {
		ARCH_WORD_64 wlen[2];
		unsigned char mlen[16];  // need aligned on sparc
	} m;
	unsigned char *output = (unsigned char *)_output;

	bits = (ctx->total <<  3);
	m.wlen[0] = 0;
	OUTBE64(bits, m.mlen, 8);

	last = ctx->total & 0x7F;
	padcnt = (last < 112) ? (112 - last) : (240 - last);

	sha512_update(ctx, (unsigned char *) padding, padcnt);
	sha512_update(ctx, m.mlen, 16);

	if (!output) return;

	// the SHA2_GENERIC_DO_NOT_BUILD_ALIGNED == 1 is to force build on
	// required aligned systems without doing the alignment checking.
	// it IS faster (about 2.5%), and once the data is properly aligned
	// in the formats, the alignment checking is nore needed any more.
#if ARCH_ALLOWS_UNALIGNED == 1 || SHA2_GENERIC_DO_NOT_BUILD_ALIGNED == 1
	OUTBE64(ctx->h[0], output,  0);
	OUTBE64(ctx->h[1], output,  8);
	OUTBE64(ctx->h[2], output, 16);
	OUTBE64(ctx->h[3], output, 24);
	OUTBE64(ctx->h[4], output, 32);
	OUTBE64(ctx->h[5], output, 40);
	if(ctx->bIs512) {
		OUTBE64( ctx->h[6], output, 48 );
		OUTBE64( ctx->h[7], output, 56 );
	}
#else
	if (is_aligned(output,sizeof(ARCH_WORD_64))) {
		OUTBE64(ctx->h[0], output,  0);
		OUTBE64(ctx->h[1], output,  8);
		OUTBE64(ctx->h[2], output, 16);
		OUTBE64(ctx->h[3], output, 24);
		OUTBE64(ctx->h[4], output, 32);
		OUTBE64(ctx->h[5], output, 40);
		if(ctx->bIs512) {
			OUTBE64( ctx->h[6], output, 48 );
			OUTBE64( ctx->h[7], output, 56 );
		}
	} else {
		union {
			ARCH_WORD_64 x[8];
			unsigned char c[64];
		} m;
		unsigned char *tmp = m.c;
		OUTBE64(ctx->h[0], tmp,  0);
		OUTBE64(ctx->h[1], tmp,  8);
		OUTBE64(ctx->h[2], tmp, 16);
		OUTBE64(ctx->h[3], tmp, 24);
		OUTBE64(ctx->h[4], tmp, 32);
		OUTBE64(ctx->h[5], tmp, 40);
		if(ctx->bIs512) {
			OUTBE64(ctx->h[6], tmp, 48);
			OUTBE64(ctx->h[7], tmp, 56);
			memcpy(output, tmp, 64);
		} else
			memcpy(output, tmp, 48);
	}
#endif
}
Ejemplo n.º 10
0
char* reckless::detail::thread_input_buffer::allocate_input_frame(std::size_t size)
{
    // Conceptually, we have the invariant that
    //   pinput_start_ <= pinput_end_,
    // and the memory area after pinput_end is free for us to use for
    // allocating a frame. However, the fact that it's a circular buffer means
    // that:
    // 
    // * The area after pinput_end is actually non-contiguous, wraps around
    //   at the end of the buffer and ends at pinput_start.
    //   
    // * Except, when pinput_end itself has fallen over the right edge and we
    //   have the case pinput_end <= pinput_start. Then the *used* memory is
    //   non-contiguous, and the free memory is contiguous (it still starts at
    //   pinput_end and ends at pinput_start modulo circular buffer size).
    //   
    // (This is easier to understand by drawing it on a paper than by reading
    // the comment text).
    auto mask = frame_alignment_mask();
    size = (size + mask) & ~mask;

    // We can't write a frame that is larger than the entire capacity of the
    // input buffer. If you hit this assert then you either need to write a
    // smaller log entry, or you need to make the input buffer larger.
    assert(size < size_);
 
    while(true) {
        auto pinput_end = pinput_end_;
        // FIXME these asserts should / can be enabled again?
        assert(static_cast<std::size_t>(pinput_end - buffer_start()) < size_);
        assert(is_aligned(pinput_end));

        // Even if we get an "old" value for pinput_start_ here, that's OK
        // because other threads will never cause the amount of available
        // buffer space to shrink. So either there is enough buffer space and
        // we're done, or there isn't and we'll wait for an input-consumption
        // event which creates a full memory barrier and hence gives us an
        // updated value for pinput_start_. So memory_order_relaxed should be
        // fine here.
        auto pinput_start = pinput_start_.load(std::memory_order_relaxed);
        std::ptrdiff_t free = pinput_start - pinput_end;
        if(free > 0) {
            // Free space is contiguous.
            // Technically, there is enough room if size == free. But the
            // problem with using the free space in this situation is that when
            // we increase pinput_end_ by size, we end up with pinput_start_ ==
            // pinput_end_. Now, given that state, how do we know if the buffer
            // is completely filled or empty? So, it's easier to just check for
            // size < free instead of size <= free, and pretend we're out
            // of space if size == free. Same situation applies in the else
            // clause below.
            if(likely(static_cast<std::ptrdiff_t>(size) < free)) {
                pinput_end_ = advance_frame_pointer(pinput_end, size);
                return pinput_end;
            } else {
                // Not enough room. Wait for the output thread to consume some
                // input.
                wait_input_consumed();
            }
        } else {
            // Free space is non-contiguous.
            // TODO should we use an end pointer instead of a size_?
            std::size_t free1 = size_ - (pinput_end - buffer_start());
            if(likely(size < free1)) {
                // There's enough room in the first segment.
                pinput_end_ = advance_frame_pointer(pinput_end, size);
                return pinput_end;
            } else {
                std::size_t free2 = pinput_start - buffer_start();
                if(likely(size < free2)) {
                    // We don't have enough room for a continuous input frame
                    // in the first segment (at the end of the circular
                    // buffer), but there is enough room in the second segment
                    // (at the beginning of the buffer). To instruct the output
                    // thread to skip ahead to the second segment, we need to
                    // put a marker value at the current position. We're
                    // supposed to be guaranteed enough room for the wraparound
                    // marker because frame alignment is at least the size of
                    // the marker.
                    *reinterpret_cast<formatter_dispatch_function_t**>(pinput_end_) =
                        WRAPAROUND_MARKER;
                    pinput_end_ = advance_frame_pointer(buffer_start(), size);
                    return buffer_start();
                } else {
                    // Not enough room. Wait for the output thread to consume
                    // some input.
                    wait_input_consumed();
                }
            }
        }
    }
}
Ejemplo n.º 11
0
void vegaLookupSingle(VGImage dst, VGImage src,
                      const VGuint * lookupTable,
                      VGImageChannel sourceChannel,
                      VGboolean outputLinear,
                      VGboolean outputPremultiplied)
{
   struct vg_context *ctx = vg_current_context();
   struct vg_image *d, *s;
   struct pipe_sampler_view *lut_texture_view;
   VGfloat buffer[4];
   struct filter_info info;
   VGuint color_data[256];
   VGint i;

   if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) {
      vg_set_error(ctx, VG_BAD_HANDLE_ERROR);
      return;
   }

   if (!lookupTable || !is_aligned(lookupTable)) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }

   if (sourceChannel != VG_RED && sourceChannel != VG_GREEN &&
       sourceChannel != VG_BLUE && sourceChannel != VG_ALPHA) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }

   d = (struct vg_image*)dst;
   s = (struct vg_image*)src;

   if (vg_image_overlaps(d, s)) {
      vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR);
      return;
   }

   vg_validate_state(ctx);

   for (i = 0; i < 256; ++i) {
      VGuint rgba = lookupTable[i];
      VGubyte blue, green, red, alpha;
      red   = (rgba & 0xff000000)>>24;
      green = (rgba & 0x00ff0000)>>16;
      blue  = (rgba & 0x0000ff00)>> 8;
      alpha = (rgba & 0x000000ff)>> 0;
      color_data[i] = blue << 24 | green << 16 |
                      red  <<  8 | alpha;
   }
   lut_texture_view = create_texture_1d_view(ctx, color_data, 256);

   buffer[0] = 0.f;
   buffer[1] = 0.f;
   buffer[2] = 1.f;
   buffer[3] = 1.f;

   info.dst = d;
   info.src = s;
   info.setup_shader = &setup_lookup_single;
   info.user_data = (void*)sourceChannel;
   info.const_buffer = buffer;
   info.const_buffer_len = 4 * sizeof(VGfloat);
   info.tiling_mode = VG_TILE_PAD;
   info.extra_texture_view = lut_texture_view;

   execute_filter(ctx, &info);

   pipe_sampler_view_reference(&lut_texture_view, NULL);
}