resampler_simd_mask_t resampler_get_cpu_features(void) { #ifdef RARCH_INTERNAL return rarch_get_cpu_features(); #else /* no features if interface isn't implemented */ return perf_get_cpu_features_cb ? perf_get_cpu_features_cb() : 0; #endif }
void audio_convert_init_simd(void) { #ifdef HAVE_NEON struct rarch_cpu_features cpu; rarch_get_cpu_features(&cpu); audio_convert_s16_to_float_arm = cpu.simd & RARCH_SIMD_NEON ? audio_convert_s16_to_float_neon : audio_convert_s16_to_float_C; audio_convert_float_to_s16_arm = cpu.simd & RARCH_SIMD_NEON ? audio_convert_float_to_s16_neon : audio_convert_float_to_s16_C; #endif }
static bool append_plugs(rarch_dsp_filter_t *dsp, struct string_list *list) { unsigned i; dspfilter_simd_mask_t mask = rarch_get_cpu_features(); for (i = 0; i < list->size; i++) { const struct dspfilter_implementation *impl = NULL; struct rarch_dsp_plug *new_plugs = NULL; dylib_t lib = dylib_load(list->elems[i].data); if (!lib) continue; dspfilter_get_implementation_t cb = (dspfilter_get_implementation_t) dylib_proc(lib, "dspfilter_get_implementation"); if (!cb) { dylib_close(lib); continue; } impl = cb(mask); if (!impl) { dylib_close(lib); continue; } if (impl->api_version != DSPFILTER_API_VERSION) { dylib_close(lib); continue; } new_plugs = (struct rarch_dsp_plug*) realloc(dsp->plugs, sizeof(*dsp->plugs) * (dsp->num_plugs + 1)); if (!new_plugs) { dylib_close(lib); return false; } /* Found plug. */ dsp->plugs = new_plugs; dsp->plugs[dsp->num_plugs].lib = lib; dsp->plugs[dsp->num_plugs].impl = impl; dsp->num_plugs++; } return true; }
static bool resampler_append_plugs(void **re, const rarch_resampler_t **backend, double bw_ratio) { resampler_simd_mask_t mask = rarch_get_cpu_features(); *re = (*backend)->init(&resampler_config, bw_ratio, mask); if (!*re) return false; return true; }
/* validate_cpu_features: * * Validates CPU features for given processor architecture. * * Make sure we haven't compiled for something we cannot run. * Ideally, code would get swapped out depending on CPU support, * but this will do for now. */ static void validate_cpu_features(void) { uint64_t cpu = rarch_get_cpu_features(); (void)cpu; #ifdef __SSE__ if (!(cpu & RETRO_SIMD_SSE)) FAIL_CPU("SSE"); #endif #ifdef __SSE2__ if (!(cpu & RETRO_SIMD_SSE2)) FAIL_CPU("SSE2"); #endif #ifdef __AVX__ if (!(cpu & RETRO_SIMD_AVX)) FAIL_CPU("AVX"); #endif }
static bool append_plugs(rarch_dsp_filter_t *dsp, struct string_list *list) { unsigned i; dspfilter_simd_mask_t mask = rarch_get_cpu_features(); (void)list; dsp->plugs = (struct rarch_dsp_plug*) calloc(ARRAY_SIZE(dsp_plugs_builtin), sizeof(*dsp->plugs)); if (!dsp->plugs) return false; dsp->num_plugs = ARRAY_SIZE(dsp_plugs_builtin); for (i = 0; i < ARRAY_SIZE(dsp_plugs_builtin); i++) { dsp->plugs[i].impl = dsp_plugs_builtin[i](mask); if (!dsp->plugs[i].impl) return false; } return true; }
const char *rarch_softfilter_get_name(void *data) { (void)data; #ifdef HAVE_FILTERS_BUILTIN unsigned cpu_features; const struct softfilter_implementation *impl; softfilter_get_implementation_t cb = (softfilter_get_implementation_t)softfilter_get_implementation_from_idx(g_settings.video.filter_idx); if (cb) { cpu_features = rarch_get_cpu_features(); impl = (const struct softfilter_implementation *)cb(cpu_features); if (impl) return impl->ident; } return NULL; #else rarch_softfilter_t *filt = (rarch_softfilter_t*)data; if (!filt || !filt->impl) return NULL; return filt->impl->ident; #endif }
rarch_softfilter_t *rarch_softfilter_new(const char *filter_path, unsigned threads, enum retro_pixel_format in_pixel_format, unsigned max_width, unsigned max_height) { unsigned i, cpu_features, output_fmts, input_fmts, input_fmt; softfilter_get_implementation_t cb; i = 0; (void)i; (void)filter_path; rarch_softfilter_t *filt = (rarch_softfilter_t*)calloc(1, sizeof(*filt)); if (!filt) return NULL; cb = NULL; #if defined(HAVE_FILTERS_BUILTIN) cb = (softfilter_get_implementation_t)softfilter_get_implementation_from_idx(g_settings.video.filter_idx); #elif defined(HAVE_DYLIB) filt->lib = dylib_load(filter_path); if (!filt->lib) goto error; cb = (softfilter_get_implementation_t)dylib_proc(filt->lib, "softfilter_get_implementation"); #endif if (!cb) { RARCH_ERR("Couldn't find softfilter symbol.\n"); goto error; } cpu_features = rarch_get_cpu_features(); filt->impl = cb(cpu_features); if (!filt->impl) goto error; RARCH_LOG("Loaded softfilter \"%s\".\n", filt->impl->ident); if (filt->impl->api_version != SOFTFILTER_API_VERSION) { RARCH_ERR("Softfilter ABI mismatch.\n"); goto error; } // Simple assumptions. filt->pix_fmt = in_pixel_format; input_fmts = filt->impl->query_input_formats(); switch (in_pixel_format) { case RETRO_PIXEL_FORMAT_XRGB8888: input_fmt = SOFTFILTER_FMT_XRGB8888; break; case RETRO_PIXEL_FORMAT_RGB565: input_fmt = SOFTFILTER_FMT_RGB565; break; default: goto error; } if (!(input_fmt & input_fmts)) { RARCH_ERR("Softfilter does not support input format.\n"); goto error; } output_fmts = filt->impl->query_output_formats(input_fmt); if (output_fmts & input_fmt) // If we have a match of input/output formats, use that. filt->out_pix_fmt = in_pixel_format; else if (output_fmts & SOFTFILTER_FMT_XRGB8888) filt->out_pix_fmt = RETRO_PIXEL_FORMAT_XRGB8888; else if (output_fmts & SOFTFILTER_FMT_RGB565) filt->out_pix_fmt = RETRO_PIXEL_FORMAT_RGB565; else { RARCH_ERR("Did not find suitable output format for softfilter.\n"); goto error; } filt->max_width = max_width; filt->max_height = max_height; filt->impl_data = filt->impl->create(input_fmt, input_fmt, max_width, max_height, threads != RARCH_SOFTFILTER_THREADS_AUTO ? threads : rarch_get_cpu_cores(), cpu_features); if (!filt->impl_data) { RARCH_ERR("Failed to create softfilter state.\n"); goto error; } threads = filt->impl->query_num_threads(filt->impl_data); if (!threads) { RARCH_ERR("Invalid number of threads.\n"); goto error; } RARCH_LOG("Using %u threads for softfilter.\n", threads); filt->packets = (struct softfilter_work_packet*)calloc(threads, sizeof(*filt->packets)); if (!filt->packets) { RARCH_ERR("Failed to allocate softfilter packets.\n"); goto error; } #ifdef HAVE_THREADS filt->thread_data = (struct filter_thread_data*)calloc(threads, sizeof(*filt->thread_data)); if (!filt->thread_data) goto error; filt->threads = threads; for (i = 0; i < threads; i++) { filt->thread_data[i].userdata = filt->impl_data; filt->thread_data[i].done = true; filt->thread_data[i].lock = slock_new(); if (!filt->thread_data[i].lock) goto error; filt->thread_data[i].cond = scond_new(); if (!filt->thread_data[i].cond) goto error; filt->thread_data[i].thread = sthread_create(filter_thread_loop, &filt->thread_data[i]); if (!filt->thread_data[i].thread) goto error; } #endif return filt; error: rarch_softfilter_free(filt); return NULL; }
static void *resampler_sinc_new(double bandwidth_mod) { rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)calloc(1, sizeof(*re)); if (!re) return NULL; memset(re, 0, sizeof(*re)); re->taps = TAPS; double cutoff = CUTOFF; // Downsampling, must lower cutoff, and extend number of taps accordingly to keep same stopband attenuation. if (bandwidth_mod < 1.0) { cutoff *= bandwidth_mod; re->taps = (unsigned)ceil(re->taps / bandwidth_mod); } // Be SIMD-friendly. #if (defined(__AVX__) && ENABLE_AVX) || defined(HAVE_NEON) re->taps = (re->taps + 7) & ~7; #else re->taps = (re->taps + 3) & ~3; #endif size_t phase_elems = (1 << PHASE_BITS) * re->taps; #if SINC_COEFF_LERP phase_elems *= 2; #endif size_t elems = phase_elems + 4 * re->taps; re->main_buffer = (float*)aligned_alloc__(128, sizeof(float) * elems); if (!re->main_buffer) goto error; re->phase_table = re->main_buffer; re->buffer_l = re->main_buffer + phase_elems; re->buffer_r = re->buffer_l + 2 * re->taps; init_sinc_table(re, cutoff, re->phase_table, 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP); #if defined(__AVX__) && ENABLE_AVX RARCH_LOG("Sinc resampler [AVX]\n"); #elif defined(__SSE__) RARCH_LOG("Sinc resampler [SSE]\n"); #elif defined(HAVE_NEON) struct rarch_cpu_features cpu; rarch_get_cpu_features(&cpu); process_sinc_func = cpu.simd & RARCH_SIMD_NEON ? process_sinc_neon : process_sinc_C; RARCH_LOG("Sinc resampler [%s]\n", cpu.simd & RARCH_SIMD_NEON ? "NEON" : "C"); #else RARCH_LOG("Sinc resampler [C]\n"); #endif RARCH_LOG("SINC params (%u phase bits, %u taps).\n", PHASE_BITS, re->taps); return re; error: resampler_sinc_free(re); return NULL; }
int rarch_info_get_capabilities(enum rarch_capabilities type, char *s, size_t len) { switch (type) { case RARCH_CAPABILITIES_CPU: { uint64_t cpu = rarch_get_cpu_features(); if (cpu & RETRO_SIMD_MMX) strlcat(s, "MMX ", len); if (cpu & RETRO_SIMD_MMXEXT) strlcat(s, "MMXEXT ", len); if (cpu & RETRO_SIMD_SSE) strlcat(s, "SSE1 ", len); if (cpu & RETRO_SIMD_SSE2) strlcat(s, "SSE2 ", len); if (cpu & RETRO_SIMD_SSE3) strlcat(s, "SSE3 ", len); if (cpu & RETRO_SIMD_SSSE3) strlcat(s, "SSSE3 ", len); if (cpu & RETRO_SIMD_SSE4) strlcat(s, "SSE4 ", len); if (cpu & RETRO_SIMD_SSE42) strlcat(s, "SSE4.2 ", len); if (cpu & RETRO_SIMD_AVX) strlcat(s, "AVX ", len); if (cpu & RETRO_SIMD_AVX2) strlcat(s, "AVX2 ", len); if (cpu & RETRO_SIMD_VFPU) strlcat(s, "VFPU ", len); if (cpu & RETRO_SIMD_NEON) strlcat(s, "NEON ", len); if (cpu & RETRO_SIMD_PS) strlcat(s, "PS ", len); if (cpu & RETRO_SIMD_AES) strlcat(s, "AES ", len); if (cpu & RETRO_SIMD_VMX) strlcat(s, "VMX ", len); if (cpu & RETRO_SIMD_VMX128) strlcat(s, "VMX128 ", len); } break; case RARCH_CAPABILITIES_COMPILER: #if defined(_MSC_VER) snprintf(s, len, "Compiler: MSVC (%d) %u-bit", _MSC_VER, (unsigned) (CHAR_BIT * sizeof(size_t))); #elif defined(__SNC__) snprintf(s, len, "Compiler: SNC (%d) %u-bit", __SN_VER__, (unsigned)(CHAR_BIT * sizeof(size_t))); #elif defined(_WIN32) && defined(__GNUC__) snprintf(s, len, "Compiler: MinGW (%d.%d.%d) %u-bit", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, (unsigned) (CHAR_BIT * sizeof(size_t))); #elif defined(__clang__) snprintf(s, len, "Compiler: Clang/LLVM (%s) %u-bit", __clang_version__, (unsigned)(CHAR_BIT * sizeof(size_t))); #elif defined(__GNUC__) snprintf(s, len, "Compiler: GCC (%d.%d.%d) %u-bit", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, (unsigned) (CHAR_BIT * sizeof(size_t))); #else snprintf(s, len, "Unknown compiler %u-bit", (unsigned)(CHAR_BIT * sizeof(size_t))); #endif break; default: case RARCH_CAPABILITIES_NONE: break; } return 0; }