Esempio n. 1
0
void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride,
                                  const uint8_t *src0_8, uint32_t src0_stride,
                                  const uint8_t *src1_8, uint32_t src1_stride,
                                  const uint8_t *mask, int h, int w, int bd) {
  int i, j;
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
  (void)bd;

  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));

  assert(h >= 1);
  assert(w >= 1);
  assert(IS_POWER_OF_TWO(h));
  assert(IS_POWER_OF_TWO(w));

  assert(bd == 8 || bd == 10 || bd == 12);

  for (i = 0; i < h; ++i) {
    const int m = mask[i];
    for (j = 0; j < w; ++j) {
      dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
                                              src1[i * src1_stride + j]);
    }
  }
}
Esempio n. 2
0
void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride,
                                const uint8_t *src0, uint32_t src0_stride,
                                const uint8_t *src1, uint32_t src1_stride,
                                const uint8_t *mask, int w, int h) {
  typedef void (*blend_fn)(uint8_t * dst, uint32_t dst_stride,
                           const uint8_t *src0, uint32_t src0_stride,
                           const uint8_t *src1, uint32_t src1_stride,
                           const uint8_t *mask, int w, int h);

  // Dimension: width_index
  static const blend_fn blend[9] = {
    blend_a64_vmask_w16n_sse4_1,  // w % 16 == 0
    aom_blend_a64_vmask_c,        // w == 1
    aom_blend_a64_vmask_c,        // w == 2
    NULL,                         // INVALID
    blend_a64_vmask_w4_sse4_1,    // w == 4
    NULL,                         // INVALID
    NULL,                         // INVALID
    NULL,                         // INVALID
    blend_a64_vmask_w8_sse4_1,    // w == 8
  };

  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));

  assert(h >= 1);
  assert(w >= 1);
  assert(IS_POWER_OF_TWO(h));
  assert(IS_POWER_OF_TWO(w));

  blend[w & 0xf](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, w,
                 h);
}
Esempio n. 3
0
uint64_t AlignmentMask(uint32_t block_size)
{
    if (!IS_POWER_OF_TWO(block_size)) {
        ThrowHere(ARGUMENT_ERROR);
    }

    return ~(((uint64_t) block_size) - 1);
}
Esempio n. 4
0
void aom_highbd_blend_a64_vmask_sse4_1(
    uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8,
    uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride,
    const uint8_t *mask, int w, int h, int bd) {
  typedef void (*blend_fn)(uint16_t * dst, uint32_t dst_stride,
                           const uint16_t *src0, uint32_t src0_stride,
                           const uint16_t *src1, uint32_t src1_stride,
                           const uint8_t *mask, int w, int h);

  // Dimensions are: bd_index X width_index
  static const blend_fn blend[2][2] = {
    {
        // bd == 8 or 10
        blend_a64_vmask_b10_w8n_sse4_1,  // w % 8 == 0
        blend_a64_vmask_b10_w4_sse4_1,   // w == 4
    },
    {
        // bd == 12
        blend_a64_vmask_b12_w8n_sse4_1,  // w % 8 == 0
        blend_a64_vmask_b12_w4_sse4_1,   // w == 4
    }
  };

  assert(IMPLIES(src0_8 == dst_8, src0_stride == dst_stride));
  assert(IMPLIES(src1_8 == dst_8, src1_stride == dst_stride));

  assert(h >= 1);
  assert(w >= 1);
  assert(IS_POWER_OF_TWO(h));
  assert(IS_POWER_OF_TWO(w));

  assert(bd == 8 || bd == 10 || bd == 12);

  if (UNLIKELY((h | w) & 3)) {  // if (w <= 2 || h <= 2)
    aom_highbd_blend_a64_vmask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8,
                                 src1_stride, mask, w, h, bd);
  } else {
    uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
    const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
    const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);

    blend[bd == 12][(w >> 2) & 1](dst, dst_stride, src0, src0_stride, src1,
                                  src1_stride, mask, w, h);
  }
}
Esempio n. 5
0
void* GnStandardAllocator::Reallocate(void* pvMemory, gsize& stSizeInBytes,
	gsize& stAlignment, GnMemoryEventType eEventType, bool bProvideAccurateSizeOnDeallocate,
	gsize stSizeCurrent)
{
	GnAssert(IS_POWER_OF_TWO(stAlignment));

	// The deallocation case should have been caught by us before in
	// the allocation functions.
	GnAssert(stSizeInBytes != 0);

	return GnExternalAlignedRealloc(pvMemory, stSizeInBytes, stAlignment);
}
Esempio n. 6
0
void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride,
                           const uint8_t *src0, uint32_t src0_stride,
                           const uint8_t *src1, uint32_t src1_stride,
                           const uint8_t *mask, int h, int w) {
  int i, j;

  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));

  assert(h >= 1);
  assert(w >= 1);
  assert(IS_POWER_OF_TWO(h));
  assert(IS_POWER_OF_TWO(w));

  for (i = 0; i < h; ++i) {
    const int m = mask[i];
    for (j = 0; j < w; ++j) {
      dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
                                              src1[i * src1_stride + j]);
    }
  }
}
Esempio n. 7
0
static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *pre8,
                                            const int pre_stride,
                                            const int32_t *wsrc,
                                            const int32_t *mask,
                                            const int width, const int height) {
  const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
  const int pre_step = pre_stride - width;
  int n = 0;
  __m128i v_sad_d = _mm_setzero_si128();

  assert(width >= 8);
  assert(IS_POWER_OF_TWO(width));

  do {
    const __m128i v_p1_w = xx_loadl_64(pre + n + 4);
    const __m128i v_m1_d = xx_load_128(mask + n + 4);
    const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
    const __m128i v_p0_w = xx_loadl_64(pre + n);
    const __m128i v_m0_d = xx_load_128(mask + n);
    const __m128i v_w0_d = xx_load_128(wsrc + n);

    const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w);
    const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w);

    // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
    // boundaries. We use pmaddwd, as it has lower latency on Haswell
    // than pmulld but produces the same result with these inputs.
    const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
    const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);

    const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
    const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
    const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
    const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);

    // Rounded absolute difference
    const __m128i v_rad0_d = xx_roundn_epu32(v_absdiff0_d, 12);
    const __m128i v_rad1_d = xx_roundn_epu32(v_absdiff1_d, 12);

    v_sad_d = _mm_add_epi32(v_sad_d, v_rad0_d);
    v_sad_d = _mm_add_epi32(v_sad_d, v_rad1_d);

    n += 8;

    if (n % width == 0) pre += pre_step;
  } while (n < width * height);

  return xx_hsum_epi32_si32(v_sad_d);
}
Esempio n. 8
0
uint32_t app_fifo_init(app_fifo_t * p_fifo, uint8_t * p_buf, uint16_t buf_size)
{
    // Check buffer for null pointer.
    if (p_buf == NULL)
    {
        return NRF_ERROR_NULL;
    }

    // Check that the buffer size is a power of two.
    if (!IS_POWER_OF_TWO(buf_size))
    {
        return NRF_ERROR_INVALID_LENGTH;
    }

    p_fifo->p_buf         = p_buf;
    p_fifo->buf_size_mask = buf_size - 1;
    p_fifo->read_pos      = 0;
    p_fifo->write_pos     = 0;

    return NRF_SUCCESS;
}
Esempio n. 9
0
static void *raw_memalign(size_t hdr_size, size_t ftr_size, size_t alignment,
		size_t size)
{
	size_t s;
	uintptr_t b;

	raw_malloc_validate_pools();

	if (!IS_POWER_OF_TWO(alignment))
		return NULL;

	/*
	 * Normal malloc with headers always returns something SizeQuant
	 * aligned.
	 */
	if (alignment <= SizeQuant)
		return raw_malloc(hdr_size, ftr_size, size);

	s = hdr_size + ftr_size + alignment + size +
	    SizeQ + sizeof(struct bhead);

	/* Check wapping */
	if (s < alignment || s < size)
		return NULL;

	b = (uintptr_t)bget(s);
	if (!b)
		goto out;

	if ((b + hdr_size) & (alignment - 1)) {
		/*
		 * Returned buffer is not aligned as requested if the
		 * hdr_size is added. Find an offset into the buffer
		 * that is far enough in to the buffer to be able to free
		 * what's in front.
		 */
		uintptr_t p;

		/*
		 * Find the point where the buffer including supplied
		 * header size should start.
		 */
		p = b + hdr_size + alignment;
		p &= ~(alignment - 1);
		p -= hdr_size;
		if ((p - b) < (SizeQ + sizeof(struct bhead)))
			p += alignment;
		assert((p + hdr_size + ftr_size + size) <= (b + s));

		/* Free the front part of the buffer */
		brel_before((void *)b, (void *)p);

		/* Set the new start of the buffer */
		b = p;
	}

	/*
	 * Since b is now aligned, release what we don't need at the end of
	 * the buffer.
	 */
	brel_after((void *)b, hdr_size + ftr_size + size);
out:
	raw_malloc_return_hook((void *)b, size);

	return (void *)b;
}
Esempio n. 10
0
void* GnStandardAllocator::Allocate(gsize& stSizeInBytes, gsize& stAlignment,
	GnMemoryEventType eEventType, bool bProvideAccurateSizeOnDeallocate)
{
	GnAssert(IS_POWER_OF_TWO(stAlignment));
	return GnExternalAlignedMalloc(stSizeInBytes, stAlignment);
}
Esempio n. 11
0
void MainWindow::startBench()
{
    int rangeMin = ui->rangeMinSB->value();
    int rangeMax = ui->rangeMaxSB->value();

    Q_ASSERT(IS_POWER_OF_TWO(rangeMin));
    Q_ASSERT(IS_POWER_OF_TWO(rangeMax));

    int sizeCount = (int)(log2(rangeMax) - log2(rangeMin) + 1);
    if (sizeCount <= 0)
        return;

    int iterations = ui->benchIterRB->value();
    float fourierCount = iterations * (sizeCount + 2);
    float progressStep = 100.0 / fourierCount;
    float progressCounter;

    FT::FTType algorithm = (FT::FTType)ui->benchFtCombo->currentIndex();

    QString input = ui->benchInputLine->text();
    Q_ASSERT(FImage::isRectCode(input));

    ui->benchResultView->clear();
    progressCounter = 0.0;
    m_progress->setValue(progressCounter);

    for (int size = rangeMin; size <= rangeMax; size = qNextPowerOfTwo(size)) {
        QVector<int> results;
        FImage rectangle = FImage::rectangle(input, QSize(size, size));

        FT *fourierWarmUp = FT::createFT(algorithm, &rectangle);
        fourierWarmUp->bench();
        delete fourierWarmUp;
        progressCounter += progressStep;
        m_progress->setValue(progressCounter);

        for (int i = 0; i < iterations; ++i) {
            FT *fourier = FT::createFT(algorithm, &rectangle);
            results.append(fourier->bench());
            delete fourier;

            progressCounter += progressStep;
            m_progress->setValue(progressCounter);
        }

        int result = 0;
        if (ui->benchMinRB->isChecked())
            result = *std::min_element(results.begin(), results.end());
        else if (ui->benchMaxRB->isChecked())
            result = *std::max_element(results.begin(), results.end());
        else if (ui->benchMeanRB->isChecked()) {
            float sum = 0.0;
            Q_FOREACH (int r, results)
                sum += (float)r;

            result = qRound(sum / (float)results.count());
        }

        QStringList benchSum;
        benchSum.append(QStringLiteral("%1").arg(rectangle.id()).leftJustified(28, ' '));
        benchSum.append(QString::number(size).rightJustified(4, ' '));
        benchSum.append(QStringLiteral("%1 ms").arg(QString::number(result).rightJustified(4, ' ')));

        QStringList resultList;
        Q_FOREACH (int r, results)
            resultList.append(QString::number(r).rightJustified(4, ' '));

        ui->benchResultView->append(QStringLiteral("%1\t%2").arg(benchSum.join(" ")).arg(resultList.join(" ")));

        progressCounter += progressStep;
        m_progress->setValue(progressCounter);
    }
Esempio n. 12
0
#define debug_print(...) tf_printf(__VA_ARGS__)
#else
#define debug_print(...) ((void)0)
#endif

#define IS_POWER_OF_TWO(x)	(((x) & ((x) - 1)) == 0)

/*
 * The virtual address space size must be a power of two (as set in TCR.T0SZ).
 * As we start the initial lookup at level 1, it must also be between 2 GB and
 * 512 GB (with the virtual address size therefore 31 to 39 bits). See section
 * D4.2.5 in the ARMv8-A Architecture Reference Manual (DDI 0487A.i) for more
 * information.
 */
CASSERT(ADDR_SPACE_SIZE >= (1ull << 31) && ADDR_SPACE_SIZE <= (1ull << 39) &&
	IS_POWER_OF_TWO(ADDR_SPACE_SIZE), assert_valid_addr_space_size);

#define UNSET_DESC	~0ul

#define NUM_L1_ENTRIES (ADDR_SPACE_SIZE >> L1_XLAT_ADDRESS_SHIFT)

static uint64_t l1_xlation_table[NUM_L1_ENTRIES]
__aligned(NUM_L1_ENTRIES * sizeof(uint64_t));

static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES]
__aligned(XLAT_TABLE_SIZE) __section("xlat_table");

static unsigned next_xlat;
static unsigned long max_pa;
static unsigned long max_va;
static unsigned long tcr_ps_bits;
Esempio n. 13
0
int tone_generator_main(const struct audio_tool_config *at_config, int argc, char* argv[])
{
	struct tone_generator_config config = {
		.card = 0,
		.device = 0,
		.chan_mask = ~0,
	};
    int i;
    for ( i = 0; i < argc; i++) {
        printf("ARGV %d :: %s\n", i, argv[i]);
    }
    printf("DUration : %d\n", at_config->duration);

	struct pcm_config pcm_config;
	struct wave_table *ptr, *table;
	struct wave_scale wave_scale;
	double freq;
	char *arg_wave_type, *arg_freq, *arg_voldb;
	double tmp;

	if ((argc < 3) || (argc > 4)) {
		usage();
		return 1;
	}

	if (check_wave_tables())
		return 1;

	arg_wave_type = argv[1];
	arg_freq = argv[2];
	if (argc > 3)
		arg_voldb = argv[3];
	else
		arg_voldb = "0";

	/* Set sane defaults */
	memset(&pcm_config, 0, sizeof(struct pcm_config));
	switch (at_config->bits) {
	case 8: pcm_config.format = PCM_FORMAT_S8; break;
	case 16: pcm_config.format = PCM_FORMAT_S16_LE; break;
	case 24: pcm_config.format = PCM_FORMAT_S24_LE; break;
	case 32: pcm_config.format = PCM_FORMAT_S32_LE; break;
	default:
		assert(0);
	}

	config.device = at_config->device;
	config.card = at_config->card;
	pcm_config.period_size = at_config->period_size;
	pcm_config.period_count = at_config->num_periods;
	pcm_config.rate = at_config->rate;
	pcm_config.channels = at_config->channels;
	config.chan_mask = at_config->channel_mask;
	config.duration = at_config->duration * pcm_config.rate;
	config.bits = at_config->bits;

	for (ptr = g_wave_tables ; ptr->name ; ++ptr) {
		if (strcmp(arg_wave_type, ptr->name) == 0) {
			table = ptr;
			assert( IS_POWER_OF_TWO(table->length) );
			assert( table->mask == table->length - 1 );
			break;
		}
	}
	if (ptr->name == 0) {
		fprintf(stderr, "Invalied wave_type parameter\n");
		return 1;
	}

	tmp = atof(arg_freq);
	if (tmp < 10.0) {
		fprintf(stderr, "Error: frequency must be > 10Hz\n");
		return 1;
	}
	freq = tmp;

	tmp = atof(arg_voldb);
	if (tmp < 0 ) {
		fprintf(stderr, "Volume attenuation must be greater than 0 dB FS\n");
		return 1;
	}
	/* Convert db to fraction */
	tmp = -tmp;
	tmp = pow(10.0, tmp/10.0);
	config.volume = (unsigned short) (tmp * ((double)USHRT_MAX));

	tmp = ((double)pcm_config.rate) / freq;
	wave_scale.length = tmp;
	tmp = (tmp - wave_scale.length) * 0xFFF;
	wave_scale.sub = tmp;
	wave_scale.sub_den = 0xFFF;
	wave_scale.sub_shift = 12;

	/* This restriction prevents overflows in render()
	 */
	{
		uint16_t bits = 0;
		while ((1<<bits) < table->length) ++bits;
		if (wave_scale.sub_shift + bits > 24) {
			fprintf(stderr, "bits(wave_scale) + bits(table.length) "
				" must be less than or equal to 24\n");
			return 1;
		}
	}

	memcpy(&config.pcm_config, &pcm_config, sizeof(pcm_config));
	memcpy(&config.wave_scale, &wave_scale, sizeof(wave_scale));
	config.wave_table = table;

	return inner_main(config);

	return 0;
}
Esempio n. 14
0
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
#include "sdk_common.h"
#if NRF_MODULE_ENABLED(NRF_LOG)
#include "app_util.h"
#include "app_util_platform.h"
#include "nrf_log.h"
#include "nrf_log_internal.h"
#include "nrf_log_backend.h"
#include "nrf_log_ctrl.h"
#include <string.h>

#if NRF_LOG_DEFERRED
STATIC_ASSERT((NRF_LOG_DEFERRED_BUFSIZE == 0) || IS_POWER_OF_TWO(NRF_LOG_DEFERRED_BUFSIZE));
#else
#define NRF_LOG_DEFERRED_BUFSIZE 1
#endif

/**
 * brief An internal control block of the logger
 *
 * @note Circular buffer is using never cleared indexes and a mask. It means
 * that logger may break when indexes overflows. However, it is quite unlikely.
 * With rate of 1000 log entries with 2 parameters per second such situation
 * would happen after 12 days.
 */
typedef struct
{
    uint32_t                  wr_idx;          // Current write index (never reset)
Esempio n. 15
0
static int ht_hashpos(ht_atom_t ht, atom key){
  assert(IS_POWER_OF_TWO(ht->capacity));
  return key->hashcode & (ht->capacity - 1);
}
Esempio n. 16
0
static int ht_int_hashpos(ht_int_t ht, uint64_t key){
  assert(IS_POWER_OF_TWO(ht->capacity));
  return (int)key & (ht->capacity - 1);
}