/
my_psycho.c
302 lines (246 loc) · 8.71 KB
/
my_psycho.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
/*---------------------------------------------------------------------
* Psychoacoustic model
*
* Tonal masking based on "Algorithm for extraction of pitch and pitch
* salience from complex tonal signals", Terhardt et al.
*
* Non-tonal masking taken from Ambikairajah
*--------------------------------------------------------------------*/
#include <math.h>
#include <complex.h>
#include <stdio.h>
#include <stdlib.h>
#include <sndfile.h>
#include "watermark.h"
#include "my_psycho.h"
#include "w_array_ops.h"
double sample_rate = 0;
double cb_limits[25] = { 20, 100, 200, 300, 400, 510, 630, 770,
920, 1080, 1270, 1480, 1720, 2000, 2320, 2700,
3150, 3700, 4400, 5300, 6400, 7700, 9500,
12000, 15500};
double min_amplitude = 0;
void set_sample_rate(double s)
{
sample_rate = s;
}
double hz_to_bark(double hz)
{/*{{{*/
// according to terhardt
return 13 * atan(.00076*hz ) + 3.5*atan( (hz/7500.0)*(hz/7500.0) );
// according to Prof Ambikairajah...
//if(hz > 1500)
// return 8.7 + 14.2*log10(hz);
//return 13.0 * atan(.76 * hz);
}/*}}}*/
int get_critband(double hz)
{ //{{{
// band 0 doesn't exist, and should be cut off
int i;
for(i = 0; i < 25; i++)
if(hz <= cb_limits[i]) return i;
return i;
} //}}}
// Returns the frequency of the component at index i, in the complex buffer
// from the fft
double get_freq(int i)
{ //{{{
return (double)i * sample_rate / 1024;
} //}}}
// Returns the closest index in the frequency buffer to freq
int get_i(double freq)
{ //{{{
int i = (int)(freq * 1024 / SAMPLE_RATE + .5);
if(i < 0) i = 0;
if(i >= FREQ_LEN) i = FREQ_LEN-1;
return i;
} //}}}
// Returns an estimate of the power of the amplitude of the frequency component
// in dB. because you need a reference amplitude, I use the minimum amplitude
// in the current frequency frame. Not sure how sound this is.
// NOTE: IF CALLED BEFORE get_amp_dB, will generate runtime exception for
// dividing by 0
double get_dB(const complex a)
{ //{{{
return 20 * log10(pow_elem(a) / min_amplitude);
} //}}}
// amp_dB is set to the amplitudes of freq_buf, in dB
void get_amp_dB(const complex *freq_buf, double *amp_dB)
{ //{{{
min_amplitude = MIN_AMPLITUDE; // global
for(int i = 0; i < FREQ_LEN; i++)
if(pow_elem(freq_buf[i]) < min_amplitude)
min_amplitude = pow_elem(freq_buf[i]);
for(int i = 0; i < FREQ_LEN; i++)
amp_dB[i] = get_dB(freq_buf[i]);
} //}}}
// Terhardt describes the following method of determining whether a component
// is tonal. This implementation assumes amp_db is indexed such that the
// component to be examined is at index 0
// TODO test me
int is_tonal(const double *amp_dB)
{ // {{{
double L_i = amp_dB[0]; // name used in Terhardt
// L_(i-1) < L_i >= L_(i+1)
if(!(amp_dB[-1] < L_i)) return 0;
if(!(amp_dB[1] <= L_i)) return 0;
// L_i - L_j >= 7
for(int j = -3; j <= 3; j++){
if(-1 <= j && j <= 1) continue;
if(!(L_i - amp_dB[j] >= 7)) return 0;
}
return 1;
} // }}}
// iterates through freq_buf, storing the indices of the tonal components in
// indices. returns the number of tonal components. indices is sorted.
// TODO test me
int get_tonal_freqs(const double *amp_dB, int **indices)
{ //{{{
// this is the maximum amount of tonal frequencies. Not actually dynamically
// sizing it now, but leaving open the possibility.
*indices = (int *)malloc(sizeof(int) * (FREQ_LEN/7));
int i_i = 0; // i_i = indices index
// tonal calculations look at i-3...i+3, also don't want nyquist/dc freqs
for(int i = 4; i < FREQ_LEN-4; i++){
if(is_tonal(&_dB[i])){
(*indices)[i_i++] = i;
}
}
// TODO REMOVE
printf("there are %d tonal components\n", i_i);
// END
return i_i;
} //}}}
// generates the sum of L_Ev components in Terhardt (4)
// see also (5), (7)
double get_inter_mask(double *amp_dB, int *tonal_indices, int num_tonal, int u)
{ //{{{
// TODO only go through TONAL INDICES
double f_u = get_freq(u);
double z_u = hz_to_bark(f_u);
double sum = 0;
// calculate the masking effect from other frequencies by using a linear
// approximation of the masking curve
for(int i = 1; i < num_tonal; i++){
int v = tonal_indices[i];
double L_v = amp_dB[v];
double f_v = get_freq(v);
double z_v = hz_to_bark(f_v);
double s; // slope of masking curve is different for above/below v
if(v < u)
s = -24.0 - (230.0/f_v) + 0.2*L_v; // slope of masking curve above masker
else if(v > u)
s = 27; // slope below masker
else continue;
double exp = (L_v - s * (z_v - z_u))/20.0;
sum += pow(10,exp);
}
return sum*sum;
} //}}}
double get_intra_mask(complex *freq_buf, int f, int cb)
{ //{{{
int bandwidth = 0; // the number of frequencies within the critical band
while((f+bandwidth < FREQ_LEN) && get_freq(f+bandwidth) <= cb_limits[cb])
bandwidth++;
// get tonal components
for(int i = 0; i < bandwidth; i++) ;
} //}}}
// equation (8) in terhardt
// TODO test me
double get_quiet_thresh(double f)
{ //{{{
return 3.64*pow(f/1000,-0.8)
- 6.5*exp((-0.6*pow(f/(1000-3.3),2)))
+ .001*pow(f/1000,4);
} //}}}
// returns the masking effect of the noise (non-tonal) components around u,
// I_Nu
double get_noise_mask(double *amp_dB, int *tonal_indices, int num_tonal, int u)
{ //{{{
// Sum all the frequency components within .5 barks of u, skipping the five
// central samples of each tonal component (if i is tonal, no i-2...i+2)
double u_hz = get_freq(u);
double u_bark = hz_to_bark(u_hz);
double cb_width = cb_limits[(int)(u_bark+1)]; // upper bound on freq width
int lo = get_i(u_hz - (cb_width/2));
while(u_bark - hz_to_bark(get_freq(lo)) > .5) lo++;
int hi = get_i(u_hz + (cb_width/2));
while(hz_to_bark(get_freq(hi)) - u_bark > .5) hi--;
int i_i = 0;
double sum = 0;
for(int i = lo; i <= hi; i++){
// ensure tonal_indices[i_i] >= i-2 now
while(i+i < num_tonal && tonal_indices[i_i] < i-2){
i_i++;
}
// no tonal components above i-2
if(i_i >= num_tonal){
sum += amp_dB[i];
continue;
}
if(tonal_indices[i_i] > i+2){
sum += amp_dB[i];
}
// otherwise tonal_indices[i_i] <= i+2, and we're in a tonal component
}
return sum;
} //}}}
// see equation (12) in terhardt
double get_pitch_weight(double spl_excess, double freq)
{ //{{{
if(spl_excess < 0) return 0;
// first part represents the effect of spl_excess - doesn't increase much
// after around 20db of excess.
// second part represents frequency
return (1 - exp((0-spl_excess)/15)) *
pow(1 + 0.07 * pow((freq/(0.7*1000) - (0.7*1000)/freq),2), -1/2);
} //}}}
int get_pitch_saliencies(complex *buffer, int **indices, double **weights)
{ //{{{
// CALCULATION OF SIMULTANEOUS MASKING COEFFICIENTS
// calculate tonal simultaneous masking
// see "Algorithm for extraction of pitch and pitch salience from
// complex tonal signals", by Terhardt, Stoil, Seewann
double amp_dB[FREQ_LEN]; // the amplitude of each freq component in dB
get_amp_dB(buffer,amp_dB);
int *tonal_indices = NULL;
int num_tonal = get_tonal_freqs(amp_dB, &tonal_indices);
// iterate through tonal frequency components, and calculate the sound
// pressure level excess of each one.
double *pitch_weights = (double *)malloc(num_tonal * sizeof(double));
double intra_mask = 0;
for(int j = 0; j < num_tonal; j++){
int i = tonal_indices[j];
// calculate frequency and critical band of the component
// effect of other frequencies on spl_excess
double inter_mask = get_inter_mask(amp_dB, tonal_indices, num_tonal, i);
printf("freq = %lf, inter_mask = %lf, ", get_freq(i), inter_mask);
// effect of non-tonal components on spl_excess.
double noise_mask = get_noise_mask(amp_dB, tonal_indices, num_tonal, i);
printf("noise_mask = %lf, ", noise_mask);
// effect of the threshold of quiet - even in silence a frequency of this
double freq = get_freq(i);
double q_mask = get_quiet_thresh(freq);
q_mask = pow(10,q_mask/10);
printf("q_mask = %lf, ", q_mask);
// see equation (4) of terhardt
double spl_excess = amp_dB[i] - (10*log(inter_mask + noise_mask + q_mask));
printf("SPL excess = %lf\n", spl_excess);
// "the extent to which each tonal component contributes to the entire tonal
// percept is considered to depend on (1) the SPL excess and (2) the
// frequency of the component." See eqn (12)
pitch_weights[j] = get_pitch_weight(spl_excess, get_freq(i));
}
*indices = tonal_indices;
*weights = pitch_weights;
// Temporal masking: ~1:25 Ambikairajah
// Jesteadt et al. : M_f(t,m) = a*(b-log_10 dt)(L(t,m)-c)
// a, b, c are parameters that can be derived from psychoacoustic data
// a is based on the slope of the time course of masking
// b = log_10(forward masking duratioin = 200)
// Masking threshold = (TM^P + SN^P)^{1/P},
// where TM = temporal masking, SM = simultaneous masking
// often P = 5 seems to work pretty well
// TODO change return value
return num_tonal;
} //}}}