int main(int argc, char **argv) { int i; #define LEN 1024 FFTSample *ref = av_malloc_array(LEN, sizeof(*ref)); FFTSample *data = av_malloc_array(LEN, sizeof(*data)); RDFTContext *rdft_context = av_rdft_init(10, DFT_R2C); RDFTContext *irdft_context = av_rdft_init(10, IDFT_C2R); if (!ref || !data || !rdft_context || !irdft_context) return 2; for (i=0; i<LEN; i++) { ref[i] = data[i] = i*456 + 123 + i*i; } av_rdft_calc(rdft_context, data); av_rdft_calc(irdft_context, data); for (i=0; i<LEN; i++) { if (fabs(ref[i] - data[i]/LEN*2) > 1) { fprintf(stderr, "Failed at %d (%f %f)\n", i, ref[i], data[i]/LEN*2); return 1; } } av_rdft_end(rdft_context); av_rdft_end(irdft_context); av_free(data); av_free(ref); return 0; }
static int config_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AudioSurroundContext *s = ctx->priv; int ch; s->irdft = av_calloc(outlink->channels, sizeof(*s->irdft)); if (!s->irdft) return AVERROR(ENOMEM); for (ch = 0; ch < outlink->channels; ch++) { s->irdft[ch] = av_rdft_init(ff_log2(s->buf_size), IDFT_C2R); if (!s->irdft[ch]) return AVERROR(ENOMEM); } s->nb_out_channels = outlink->channels; s->output_levels = av_malloc_array(s->nb_out_channels, sizeof(*s->output_levels)); if (!s->output_levels) return AVERROR(ENOMEM); for (ch = 0; ch < s->nb_out_channels; ch++) s->output_levels[ch] = s->level_out; ch = av_get_channel_layout_channel_index(outlink->channel_layout, AV_CH_FRONT_CENTER); if (ch >= 0) s->output_levels[ch] *= s->fc_out; ch = av_get_channel_layout_channel_index(outlink->channel_layout, AV_CH_LOW_FREQUENCY); if (ch >= 0) s->output_levels[ch] *= s->lfe_out; s->output = ff_get_audio_buffer(outlink, s->buf_size * 2); s->overlap_buffer = ff_get_audio_buffer(outlink, s->buf_size * 2); if (!s->overlap_buffer || !s->output) return AVERROR(ENOMEM); return 0; }
int DLL_EXPORT FFMPEG_API setDataCaptureBuffer(uint8_t *pcm, float *left, float *right, unsigned int max_size) { if (max_size <= 0) { return -1; } if (pcm) { state->wave = (WaveBuffer*)malloc(sizeof(WaveBuffer)); state->wave->out_buffer = pcm; state->wave->tmp_buffer = (uint8_t*)malloc(max_size); state->wave->max_size = max_size; state->wave->index = 0; state->wave->mutex = SDL_CreateMutex(); } if (pcm && left && right) { state->fft = (FFTBuffer*)malloc(sizeof(FFTBuffer)); state->fft->left_buffer = left; state->fft->right_buffer = right; state->fft->max_size = max_size; // Init FFT // max_size is uint8_t type data and we assume we have 2 channels. state->fft->nbits = (int)log2(max_size / (sizeof(int16_t) * 2)); state->fft->window_size = 1 << state->fft->nbits; state->fft->ctx = av_rdft_init(state->fft->nbits, DFT_R2C); state->fft->mutex = SDL_CreateMutex(); return state->fft->window_size; } return 0; }
static inline void rdft_init(RDFTContext **r, int nbits, enum RDFTransformType trans) { #if AVFFT *r = av_rdft_init(nbits, trans); #else ff_rdft_init(*r, nbits, trans); #endif }
FFTLib::FFTLib(size_t frame_size) : m_frame_size(frame_size) { m_window = (FFTSample *) av_malloc(sizeof(FFTSample) * frame_size); m_input = (FFTSample *) av_malloc(sizeof(FFTSample) * frame_size); PrepareHammingWindow(m_window, m_window + frame_size, 1.0 / INT16_MAX); int bits = -1; while (frame_size) { bits++; frame_size >>= 1; } m_rdft_ctx = av_rdft_init(bits, DFT_R2C); }
static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, int offset, int nb_partitions, int part_size) { AudioFIRContext *s = ctx->priv; seg->rdft = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->rdft)); seg->irdft = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->irdft)); if (!seg->rdft || !seg->irdft) return AVERROR(ENOMEM); seg->fft_length = part_size * 2 + 1; seg->part_size = part_size; seg->block_size = FFALIGN(seg->fft_length, 32); seg->coeff_size = FFALIGN(seg->part_size + 1, 32); seg->nb_partitions = nb_partitions; seg->input_size = offset + s->min_part_size; seg->input_offset = offset; seg->part_index = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->part_index)); seg->output_offset = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->output_offset)); if (!seg->part_index || !seg->output_offset) return AVERROR(ENOMEM); for (int ch = 0; ch < ctx->inputs[0]->channels; ch++) { seg->rdft[ch] = av_rdft_init(av_log2(2 * part_size), DFT_R2C); seg->irdft[ch] = av_rdft_init(av_log2(2 * part_size), IDFT_C2R); if (!seg->rdft[ch] || !seg->irdft[ch]) return AVERROR(ENOMEM); } seg->sum = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length); seg->block = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size); seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); seg->coeff = ff_get_audio_buffer(ctx->inputs[1], seg->nb_partitions * seg->coeff_size * 2); seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size); seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); if (!seg->buffer || !seg->sum || !seg->block || !seg->coeff || !seg->input || !seg->output) return AVERROR(ENOMEM); return 0; }
RDFTContext* FFTFrame::contextForSize(unsigned fftSize, int trans) { // FIXME: This is non-optimal. Ideally, we'd like to share the contexts for FFTFrames of the same size. // But FFmpeg's RDFT uses a scratch buffer inside the context and so they are not thread-safe. // We could improve this by sharing the FFTFrames on a per-thread basis. ASSERT(fftSize); int pow2size = static_cast<int>(log2(fftSize)); ASSERT(pow2size < kMaxFFTPow2Size); RDFTContext* context = av_rdft_init(pow2size, (RDFTransformType)trans); return context; }
static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; AudioSurroundContext *s = ctx->priv; int ch; s->rdft = av_calloc(inlink->channels, sizeof(*s->rdft)); if (!s->rdft) return AVERROR(ENOMEM); for (ch = 0; ch < inlink->channels; ch++) { s->rdft[ch] = av_rdft_init(ff_log2(s->buf_size), DFT_R2C); if (!s->rdft[ch]) return AVERROR(ENOMEM); } s->nb_in_channels = inlink->channels; s->input_levels = av_malloc_array(s->nb_in_channels, sizeof(*s->input_levels)); if (!s->input_levels) return AVERROR(ENOMEM); for (ch = 0; ch < s->nb_in_channels; ch++) s->input_levels[ch] = s->level_in; ch = av_get_channel_layout_channel_index(inlink->channel_layout, AV_CH_FRONT_CENTER); if (ch >= 0) s->input_levels[ch] *= s->fc_in; ch = av_get_channel_layout_channel_index(inlink->channel_layout, AV_CH_LOW_FREQUENCY); if (ch >= 0) s->input_levels[ch] *= s->lfe_in; s->input = ff_get_audio_buffer(inlink, s->buf_size * 2); if (!s->input) return AVERROR(ENOMEM); s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->buf_size); if (!s->fifo) return AVERROR(ENOMEM); s->lowcut = 1.f * s->lowcutf / (inlink->sample_rate * 0.5) * (s->buf_size / 2); s->highcut = 1.f * s->highcutf / (inlink->sample_rate * 0.5) * (s->buf_size / 2); return 0; }
int main() { int N = 1000000; // 1M int nbits = 11; int input_size = 1 << nbits; int output_size = (1 << (nbits - 1)) + 1; float *input = malloc(input_size * sizeof(float)); float *output = malloc(output_size * sizeof(float)); struct RDFTContext *cx = av_rdft_init(nbits, DFT_R2C); float f = M_PI; for (int i = 0; i < input_size; ++i) { f = floorf(f * M_PI); input[i] = f; } for (int k = 0; k < N; k++ ) { av_rdft_calc(cx, input); } av_rdft_end(cx); return 0; }
/** * Prepare filter for processing audio data of given format, * sample rate and number of channels. */ static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels) { const int sample_size = av_get_bytes_per_sample(format); uint32_t nlevels = 0; uint32_t pot; int i; atempo->format = format; atempo->channels = channels; atempo->stride = sample_size * channels; // pick a segment window size: atempo->window = sample_rate / 24; // adjust window size to be a power-of-two integer: nlevels = av_log2(atempo->window); pot = 1 << nlevels; av_assert0(pot <= atempo->window); if (pot < atempo->window) { atempo->window = pot * 2; nlevels++; } // initialize audio fragment buffers: REALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride); REALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride); REALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex)); REALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex)); // initialize rDFT contexts: av_rdft_end(atempo->real_to_complex); atempo->real_to_complex = NULL; av_rdft_end(atempo->complex_to_real); atempo->complex_to_real = NULL; atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C); if (!atempo->real_to_complex) { yae_release_buffers(atempo); return AVERROR(ENOMEM); } atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R); if (!atempo->complex_to_real) { yae_release_buffers(atempo); return AVERROR(ENOMEM); } REALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex)); atempo->ring = atempo->window * 3; REALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride); // initialize the Hann window function: REALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float)); for (i = 0; i < atempo->window; i++) { double t = (double)i / (double)(atempo->window - 1); double h = 0.5 * (1.0 - cos(2.0 * M_PI * t)); atempo->hann[i] = (float)h; } yae_clear(atempo); return 0; }
float freq_sort(struct song song) { float hann_window[WIN_SIZE]; int Samples; FFTSample *spectre_moy; float tab_bandes[5]; float tab_sum; int nFrames; int d, iFrame; size_t i; FFTSample* x; RDFTContext* fft; float freq = 0; float pas_freq; FILE *file1; FILE *file2; if (debug) { } float peak; float resnum_freq = 0; peak=0; for(i = 0; i < WIN_SIZE; ++i) hann_window[i] = .5f - .5f*cos(2*M_PI*i/(WIN_SIZE-1)); spectre_moy = (FFTSample*)av_malloc((WIN_SIZE*sizeof(FFTSample))); for(i = 0; i <= WIN_SIZE/2; ++i) spectre_moy[i] = 0.0f; for(i = 0; i < 5;++i) tab_bandes[i] = 0.0f; Samples = song.nSamples; Samples /= song.channels; // Only one channel if(Samples%WIN_SIZE > 0) Samples -= Samples%WIN_SIZE; nFrames = Samples/WIN_SIZE; x = (FFTSample*)av_malloc(WIN_SIZE*sizeof(FFTSample)); fft = av_rdft_init(WIN_BITS, DFT_R2C); for(i=0, iFrame = 0; iFrame < nFrames; i+=song.channels*WIN_SIZE, iFrame++) { if (song.nb_bytes_per_sample == 2) { for(d = 0; d < WIN_SIZE; d++) x[d] = (float)((((int16_t*)song.sample_array)[i+2*d] + ((int16_t*)song.sample_array)[i+2*d+1])/2)*hann_window[d]; } else if (song.nb_bytes_per_sample == 4) { for(d = 0; d < WIN_SIZE; d++) x[d] = (float)(( ((int32_t*)song.sample_array)[i+2*d] + ((int32_t*)song.sample_array)[i+2*d+1] ) / 2)*hann_window[d]; } av_rdft_calc(fft, x); for(d = 1; d < WIN_SIZE/2; ++d) { // 1? float re = x[d*2]; float im = x[d*2+1]; float raw = re*re + im*im; spectre_moy[d] += raw; } spectre_moy[0] = x[0]*x[0]; } for(d=1;d<=WIN_SIZE/2;++d) { spectre_moy[d] /= WIN_SIZE; spectre_moy[d] = sqrt(spectre_moy[d]); peak = spectre_moy[d] < peak ? peak : spectre_moy[d]; } for(d=1;d<=WIN_SIZE/2;++d) { float x = spectre_moy[d]/peak; spectre_moy[d] = 20*log10(x)-3; } pas_freq = song.sample_rate/WIN_SIZE; if (debug) { file1 = fopen("file_freq1.txt", "w"); file2 = fopen("file_freq2.txt", "w"); for(d=1;d<WIN_SIZE/2;++d) { freq += pas_freq; fprintf(file1, "%f\n", freq); fprintf(file2, "%f\n", spectre_moy[d]); break; } } tab_bandes[0] = (spectre_moy[1]+spectre_moy[2])/2; tab_bandes[1] = (spectre_moy[3]+spectre_moy[4])/2; for(i = 5; i <= 30; ++i) tab_bandes[2] += spectre_moy[i]; tab_bandes[2] /= (29 - 4); for(i = 31; i <= 59; ++i) tab_bandes[3] += spectre_moy[i]; tab_bandes[3] /= (58-30); for(i = 60; i <= 117; ++i) tab_bandes[4] += spectre_moy[i]; tab_bandes[4] /= (116 - 59); tab_sum = tab_bandes[4] + tab_bandes[3] + tab_bandes[2] - tab_bandes[0] - tab_bandes[1]; if (tab_sum > -66.1) resnum_freq = 2; else if (tab_sum > -68.) resnum_freq = 1; else if (tab_sum > -71) resnum_freq = -1; else resnum_freq = -2; resnum_freq = ((float)1/3)*tab_sum + ((float)68/3); if (debug) { printf("\n"); printf("-> Freq debug\n"); printf("Low frequencies: %f\n", tab_bandes[0]); printf("Mid-low frequencices: %f\n", tab_bandes[1]); printf("Mid frequencies: %f\n", tab_bandes[2]); // Marche bien pour Combichrist (?) (27.1 = no strict) TODO printf("Mid-high frequencies: %f\n", tab_bandes[3]); printf("High frequencies: %f\n", tab_bandes[4]); printf("Criterion: Loud > -66.1 > -68 > -71 > Calm\n"); printf("Sum: %f\n", tab_sum); printf("Freq result: %f\n", resnum_freq); } //resnum_freq = -2*(tab_sum + 68.0f)/(tab_sum - 68.0f); return (resnum_freq); }
static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
float bl_frequency_sort(struct bl_song const * const song) { // FFT transform context RDFTContext* fft; // Hann window values float hann_window[WINDOW_SIZE]; // Number of frames, that is number of juxtaposed windows in the music int n_frames; // Complex DFT of input FFTSample* x; // Hold FFT power spectrum FFTSample *power_spectrum; // Power maximum value float peak = 0; // Array containing frequency mean of different bands float bands[5]; // Weighted sum of frequency bands float bands_sum; // Initialize Hann window for(int i = 0; i < WINDOW_SIZE; ++i) { hann_window[i] = .5f * (1.0f - cos(2 * M_PI * i / (WINDOW_SIZE - 1))); } // Initialize band array for(int i = 0; i < 5; ++i) { bands[i] = 0.0f; } // Get the number of frames in one channel n_frames = floor((song->nSamples / song->channels) / WINDOW_SIZE); // Allocate memory for x vector x = (FFTSample*)av_malloc(WINDOW_SIZE * sizeof(FFTSample)); // Zero-initialize power spectrum power_spectrum = (FFTSample*) av_malloc((WINDOW_SIZE * sizeof(FFTSample)) / 2 + 1*sizeof(FFTSample)); for(int i = 0; i <= WINDOW_SIZE / 2; ++i) { // 2 factor due to x's complex nature and power_spectrum's real nature. power_spectrum[i] = 0.0f; } // Initialize fft fft = av_rdft_init(WIN_BITS, DFT_R2C); for(int i = 0; i < n_frames * WINDOW_SIZE * song->channels; i += song->channels * WINDOW_SIZE) { if(2 == song->channels) { // Stereo sound for(int d = 0; d < WINDOW_SIZE; ++d) { x[d] = (float)((((int16_t*)song->sample_array)[i+2*d] + ((int16_t*)song->sample_array)[i+2*d+1])/2) * hann_window[d]; } } else { // Mono sound for(int d = 0; d < WINDOW_SIZE; ++d) { x[d] = (float)(((int16_t*)song->sample_array)[i+d])*hann_window[d]; } } // Compute FFT av_rdft_calc(fft, x); // Fill-in power spectrum power_spectrum[0] = x[0] * x[0]; // Ignore x[1] due to ffmpeg's fft specifity for(int d = 1; d < WINDOW_SIZE / 2; ++d) { float re = x[d * 2]; float im = x[d * 2 + 1]; float raw = (re * re) + (im * im); power_spectrum[d] += raw; } } // Normalize it and compute real power in dB for(int d = 1; d <= WINDOW_SIZE / 2; ++d) { power_spectrum[d] = sqrt(power_spectrum[d] / WINDOW_SIZE); // Get power spectrum peak peak = fmax(power_spectrum[d], peak); } // Compute power spectrum in dB with 3dB attenuation for(int d = 1; d <= WINDOW_SIZE / 2; ++d) { power_spectrum[d] = 20 * log10(power_spectrum[d] / peak) - 3; } // Sum power in frequency bands // Arbitrary separation in frequency bands bands[0] = (power_spectrum[1] + power_spectrum[2]) / 2; bands[1] = (power_spectrum[3] + power_spectrum[4]) / 2; for(int i = LOW_INF; i <= LOW_SUP; ++i) { bands[2] += power_spectrum[i]; } bands[2] /= (LOW_SUP - LOW_INF); for(int i = LOW_SUP + 1; i <= HIGH_INF; ++i) { bands[3] += power_spectrum[i]; } bands[3] /= (HIGH_INF - (LOW_SUP + 1)); for(int i = HIGH_INF + 1; i <= HIGH_SUP; ++i) { bands[4] += power_spectrum[i]; } bands[4] /= (HIGH_SUP - (HIGH_INF + 1)); bands_sum = bands[4] + bands[3] + bands[2] - bands[0] - bands[1]; // Clean everything av_free(x); av_free(power_spectrum); av_rdft_end(fft); // Return final score, weighted by coefficients in order to have -4 for a panel of calm songs, // and 4 for a panel of loud songs. (only useful if you want an absolute « Loud » and « Calm » result return ((1. / 3.) * bands_sum + 68. / 3.); }