void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd) { int i, j; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8); (void)bd; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); assert(h >= 1); assert(w >= 1); assert(IS_POWER_OF_TWO(h)); assert(IS_POWER_OF_TWO(w)); assert(bd == 8 || bd == 10 || bd == 12); for (i = 0; i < h; ++i) { const int m = mask[i]; for (j = 0; j < w; ++j) { dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], src1[i * src1_stride + j]); } } }
void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h) { typedef void (*blend_fn)(uint8_t * dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h); // Dimension: width_index static const blend_fn blend[9] = { blend_a64_vmask_w16n_sse4_1, // w % 16 == 0 aom_blend_a64_vmask_c, // w == 1 aom_blend_a64_vmask_c, // w == 2 NULL, // INVALID blend_a64_vmask_w4_sse4_1, // w == 4 NULL, // INVALID NULL, // INVALID NULL, // INVALID blend_a64_vmask_w8_sse4_1, // w == 8 }; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); assert(h >= 1); assert(w >= 1); assert(IS_POWER_OF_TWO(h)); assert(IS_POWER_OF_TWO(w)); blend[w & 0xf](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, w, h); }
uint64_t AlignmentMask(uint32_t block_size) { if (!IS_POWER_OF_TWO(block_size)) { ThrowHere(ARGUMENT_ERROR); } return ~(((uint64_t) block_size) - 1); }
void aom_highbd_blend_a64_vmask_sse4_1( uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd) { typedef void (*blend_fn)(uint16_t * dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h); // Dimensions are: bd_index X width_index static const blend_fn blend[2][2] = { { // bd == 8 or 10 blend_a64_vmask_b10_w8n_sse4_1, // w % 8 == 0 blend_a64_vmask_b10_w4_sse4_1, // w == 4 }, { // bd == 12 blend_a64_vmask_b12_w8n_sse4_1, // w % 8 == 0 blend_a64_vmask_b12_w4_sse4_1, // w == 4 } }; assert(IMPLIES(src0_8 == dst_8, src0_stride == dst_stride)); assert(IMPLIES(src1_8 == dst_8, src1_stride == dst_stride)); assert(h >= 1); assert(w >= 1); assert(IS_POWER_OF_TWO(h)); assert(IS_POWER_OF_TWO(w)); assert(bd == 8 || bd == 10 || bd == 12); if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2) aom_highbd_blend_a64_vmask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask, w, h, bd); } else { uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8); const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8); blend[bd == 12][(w >> 2) & 1](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, w, h); } }
void* GnStandardAllocator::Reallocate(void* pvMemory, gsize& stSizeInBytes, gsize& stAlignment, GnMemoryEventType eEventType, bool bProvideAccurateSizeOnDeallocate, gsize stSizeCurrent) { GnAssert(IS_POWER_OF_TWO(stAlignment)); // The deallocation case should have been caught by us before in // the allocation functions. GnAssert(stSizeInBytes != 0); return GnExternalAlignedRealloc(pvMemory, stSizeInBytes, stAlignment); }
void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w) { int i, j; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); assert(h >= 1); assert(w >= 1); assert(IS_POWER_OF_TWO(h)); assert(IS_POWER_OF_TWO(w)); for (i = 0; i < h; ++i) { const int m = mask[i]; for (j = 0; j < w; ++j) { dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], src1[i * src1_stride + j]); } } }
static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *pre8, const int pre_stride, const int32_t *wsrc, const int32_t *mask, const int width, const int height) { const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8); const int pre_step = pre_stride - width; int n = 0; __m128i v_sad_d = _mm_setzero_si128(); assert(width >= 8); assert(IS_POWER_OF_TWO(width)); do { const __m128i v_p1_w = xx_loadl_64(pre + n + 4); const __m128i v_m1_d = xx_load_128(mask + n + 4); const __m128i v_w1_d = xx_load_128(wsrc + n + 4); const __m128i v_p0_w = xx_loadl_64(pre + n); const __m128i v_m0_d = xx_load_128(mask + n); const __m128i v_w0_d = xx_load_128(wsrc + n); const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w); const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w); // Values in both pre and mask fit in 15 bits, and are packed at 32 bit // boundaries. We use pmaddwd, as it has lower latency on Haswell // than pmulld but produces the same result with these inputs. const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d); const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d); const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d); const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d); const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d); const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d); // Rounded absolute difference const __m128i v_rad0_d = xx_roundn_epu32(v_absdiff0_d, 12); const __m128i v_rad1_d = xx_roundn_epu32(v_absdiff1_d, 12); v_sad_d = _mm_add_epi32(v_sad_d, v_rad0_d); v_sad_d = _mm_add_epi32(v_sad_d, v_rad1_d); n += 8; if (n % width == 0) pre += pre_step; } while (n < width * height); return xx_hsum_epi32_si32(v_sad_d); }
uint32_t app_fifo_init(app_fifo_t * p_fifo, uint8_t * p_buf, uint16_t buf_size) { // Check buffer for null pointer. if (p_buf == NULL) { return NRF_ERROR_NULL; } // Check that the buffer size is a power of two. if (!IS_POWER_OF_TWO(buf_size)) { return NRF_ERROR_INVALID_LENGTH; } p_fifo->p_buf = p_buf; p_fifo->buf_size_mask = buf_size - 1; p_fifo->read_pos = 0; p_fifo->write_pos = 0; return NRF_SUCCESS; }
static void *raw_memalign(size_t hdr_size, size_t ftr_size, size_t alignment, size_t size) { size_t s; uintptr_t b; raw_malloc_validate_pools(); if (!IS_POWER_OF_TWO(alignment)) return NULL; /* * Normal malloc with headers always returns something SizeQuant * aligned. */ if (alignment <= SizeQuant) return raw_malloc(hdr_size, ftr_size, size); s = hdr_size + ftr_size + alignment + size + SizeQ + sizeof(struct bhead); /* Check wapping */ if (s < alignment || s < size) return NULL; b = (uintptr_t)bget(s); if (!b) goto out; if ((b + hdr_size) & (alignment - 1)) { /* * Returned buffer is not aligned as requested if the * hdr_size is added. Find an offset into the buffer * that is far enough in to the buffer to be able to free * what's in front. */ uintptr_t p; /* * Find the point where the buffer including supplied * header size should start. */ p = b + hdr_size + alignment; p &= ~(alignment - 1); p -= hdr_size; if ((p - b) < (SizeQ + sizeof(struct bhead))) p += alignment; assert((p + hdr_size + ftr_size + size) <= (b + s)); /* Free the front part of the buffer */ brel_before((void *)b, (void *)p); /* Set the new start of the buffer */ b = p; } /* * Since b is now aligned, release what we don't need at the end of * the buffer. */ brel_after((void *)b, hdr_size + ftr_size + size); out: raw_malloc_return_hook((void *)b, size); return (void *)b; }
void* GnStandardAllocator::Allocate(gsize& stSizeInBytes, gsize& stAlignment, GnMemoryEventType eEventType, bool bProvideAccurateSizeOnDeallocate) { GnAssert(IS_POWER_OF_TWO(stAlignment)); return GnExternalAlignedMalloc(stSizeInBytes, stAlignment); }
void MainWindow::startBench() { int rangeMin = ui->rangeMinSB->value(); int rangeMax = ui->rangeMaxSB->value(); Q_ASSERT(IS_POWER_OF_TWO(rangeMin)); Q_ASSERT(IS_POWER_OF_TWO(rangeMax)); int sizeCount = (int)(log2(rangeMax) - log2(rangeMin) + 1); if (sizeCount <= 0) return; int iterations = ui->benchIterRB->value(); float fourierCount = iterations * (sizeCount + 2); float progressStep = 100.0 / fourierCount; float progressCounter; FT::FTType algorithm = (FT::FTType)ui->benchFtCombo->currentIndex(); QString input = ui->benchInputLine->text(); Q_ASSERT(FImage::isRectCode(input)); ui->benchResultView->clear(); progressCounter = 0.0; m_progress->setValue(progressCounter); for (int size = rangeMin; size <= rangeMax; size = qNextPowerOfTwo(size)) { QVector<int> results; FImage rectangle = FImage::rectangle(input, QSize(size, size)); FT *fourierWarmUp = FT::createFT(algorithm, &rectangle); fourierWarmUp->bench(); delete fourierWarmUp; progressCounter += progressStep; m_progress->setValue(progressCounter); for (int i = 0; i < iterations; ++i) { FT *fourier = FT::createFT(algorithm, &rectangle); results.append(fourier->bench()); delete fourier; progressCounter += progressStep; m_progress->setValue(progressCounter); } int result = 0; if (ui->benchMinRB->isChecked()) result = *std::min_element(results.begin(), results.end()); else if (ui->benchMaxRB->isChecked()) result = *std::max_element(results.begin(), results.end()); else if (ui->benchMeanRB->isChecked()) { float sum = 0.0; Q_FOREACH (int r, results) sum += (float)r; result = qRound(sum / (float)results.count()); } QStringList benchSum; benchSum.append(QStringLiteral("%1").arg(rectangle.id()).leftJustified(28, ' ')); benchSum.append(QString::number(size).rightJustified(4, ' ')); benchSum.append(QStringLiteral("%1 ms").arg(QString::number(result).rightJustified(4, ' '))); QStringList resultList; Q_FOREACH (int r, results) resultList.append(QString::number(r).rightJustified(4, ' ')); ui->benchResultView->append(QStringLiteral("%1\t%2").arg(benchSum.join(" ")).arg(resultList.join(" "))); progressCounter += progressStep; m_progress->setValue(progressCounter); }
#define debug_print(...) tf_printf(__VA_ARGS__) #else #define debug_print(...) ((void)0) #endif #define IS_POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0) /* * The virtual address space size must be a power of two (as set in TCR.T0SZ). * As we start the initial lookup at level 1, it must also be between 2 GB and * 512 GB (with the virtual address size therefore 31 to 39 bits). See section * D4.2.5 in the ARMv8-A Architecture Reference Manual (DDI 0487A.i) for more * information. */ CASSERT(ADDR_SPACE_SIZE >= (1ull << 31) && ADDR_SPACE_SIZE <= (1ull << 39) && IS_POWER_OF_TWO(ADDR_SPACE_SIZE), assert_valid_addr_space_size); #define UNSET_DESC ~0ul #define NUM_L1_ENTRIES (ADDR_SPACE_SIZE >> L1_XLAT_ADDRESS_SHIFT) static uint64_t l1_xlation_table[NUM_L1_ENTRIES] __aligned(NUM_L1_ENTRIES * sizeof(uint64_t)); static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES] __aligned(XLAT_TABLE_SIZE) __section("xlat_table"); static unsigned next_xlat; static unsigned long max_pa; static unsigned long max_va; static unsigned long tcr_ps_bits;
int tone_generator_main(const struct audio_tool_config *at_config, int argc, char* argv[]) { struct tone_generator_config config = { .card = 0, .device = 0, .chan_mask = ~0, }; int i; for ( i = 0; i < argc; i++) { printf("ARGV %d :: %s\n", i, argv[i]); } printf("DUration : %d\n", at_config->duration); struct pcm_config pcm_config; struct wave_table *ptr, *table; struct wave_scale wave_scale; double freq; char *arg_wave_type, *arg_freq, *arg_voldb; double tmp; if ((argc < 3) || (argc > 4)) { usage(); return 1; } if (check_wave_tables()) return 1; arg_wave_type = argv[1]; arg_freq = argv[2]; if (argc > 3) arg_voldb = argv[3]; else arg_voldb = "0"; /* Set sane defaults */ memset(&pcm_config, 0, sizeof(struct pcm_config)); switch (at_config->bits) { case 8: pcm_config.format = PCM_FORMAT_S8; break; case 16: pcm_config.format = PCM_FORMAT_S16_LE; break; case 24: pcm_config.format = PCM_FORMAT_S24_LE; break; case 32: pcm_config.format = PCM_FORMAT_S32_LE; break; default: assert(0); } config.device = at_config->device; config.card = at_config->card; pcm_config.period_size = at_config->period_size; pcm_config.period_count = at_config->num_periods; pcm_config.rate = at_config->rate; pcm_config.channels = at_config->channels; config.chan_mask = at_config->channel_mask; config.duration = at_config->duration * pcm_config.rate; config.bits = at_config->bits; for (ptr = g_wave_tables ; ptr->name ; ++ptr) { if (strcmp(arg_wave_type, ptr->name) == 0) { table = ptr; assert( IS_POWER_OF_TWO(table->length) ); assert( table->mask == table->length - 1 ); break; } } if (ptr->name == 0) { fprintf(stderr, "Invalied wave_type parameter\n"); return 1; } tmp = atof(arg_freq); if (tmp < 10.0) { fprintf(stderr, "Error: frequency must be > 10Hz\n"); return 1; } freq = tmp; tmp = atof(arg_voldb); if (tmp < 0 ) { fprintf(stderr, "Volume attenuation must be greater than 0 dB FS\n"); return 1; } /* Convert db to fraction */ tmp = -tmp; tmp = pow(10.0, tmp/10.0); config.volume = (unsigned short) (tmp * ((double)USHRT_MAX)); tmp = ((double)pcm_config.rate) / freq; wave_scale.length = tmp; tmp = (tmp - wave_scale.length) * 0xFFF; wave_scale.sub = tmp; wave_scale.sub_den = 0xFFF; wave_scale.sub_shift = 12; /* This restriction prevents overflows in render() */ { uint16_t bits = 0; while ((1<<bits) < table->length) ++bits; if (wave_scale.sub_shift + bits > 24) { fprintf(stderr, "bits(wave_scale) + bits(table.length) " " must be less than or equal to 24\n"); return 1; } } memcpy(&config.pcm_config, &pcm_config, sizeof(pcm_config)); memcpy(&config.wave_scale, &wave_scale, sizeof(wave_scale)); config.wave_table = table; return inner_main(config); return 0; }
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "sdk_common.h" #if NRF_MODULE_ENABLED(NRF_LOG) #include "app_util.h" #include "app_util_platform.h" #include "nrf_log.h" #include "nrf_log_internal.h" #include "nrf_log_backend.h" #include "nrf_log_ctrl.h" #include <string.h> #if NRF_LOG_DEFERRED STATIC_ASSERT((NRF_LOG_DEFERRED_BUFSIZE == 0) || IS_POWER_OF_TWO(NRF_LOG_DEFERRED_BUFSIZE)); #else #define NRF_LOG_DEFERRED_BUFSIZE 1 #endif /** * brief An internal control block of the logger * * @note Circular buffer is using never cleared indexes and a mask. It means * that logger may break when indexes overflows. However, it is quite unlikely. * With rate of 1000 log entries with 2 parameters per second such situation * would happen after 12 days. */ typedef struct { uint32_t wr_idx; // Current write index (never reset)
static int ht_hashpos(ht_atom_t ht, atom key){ assert(IS_POWER_OF_TWO(ht->capacity)); return key->hashcode & (ht->capacity - 1); }
static int ht_int_hashpos(ht_int_t ht, uint64_t key){ assert(IS_POWER_OF_TWO(ht->capacity)); return (int)key & (ht->capacity - 1); }