SKP_int SKP_Silk_encode_frame_FLP( SKP_Silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ SKP_uint8 *pCode, /* O Payload */ SKP_int16 *pnBytesOut, /* I/O Number of payload bytes; */ /* input: max length; output: used */ const SKP_int16 *pIn /* I Input speech frame */ ) { SKP_Silk_encoder_control_FLP sEncCtrl; SKP_int k, nBytes, ret = 0; SKP_float *x_frame, *res_pitch_frame; SKP_int16 pIn_HP[ MAX_FRAME_LENGTH ]; SKP_int16 pIn_HP_LP[ MAX_FRAME_LENGTH ]; SKP_float xfw[ MAX_FRAME_LENGTH ]; SKP_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; SKP_int LBRR_idx, frame_terminator; /* Low bitrate redundancy parameters */ SKP_uint8 LBRRpayload[ MAX_ARITHM_BYTES ]; SKP_int16 nBytesLBRR; const SKP_uint16 *FrameTermination_CDF; sEncCtrl.sCmn.Seed = psEnc->sCmn.frameCounter++ & 3; /**************************************************************/ /* Setup Input Pointers, and insert frame in input buffer */ /*************************************************************/ /* pointers aligned with start of frame to encode */ x_frame = psEnc->x_buf + psEnc->sCmn.frame_length; // start of frame to encode res_pitch_frame = res_pitch + psEnc->sCmn.frame_length; // start of pitch LPC residual frame /****************************/ /* Voice Activity Detection */ /****************************/ SKP_Silk_VAD_FLP( psEnc, &sEncCtrl, pIn ); /*******************************************/ /* High-pass filtering of the input signal */ /*******************************************/ #if HIGH_PASS_INPUT /* Variable high-pass filter */ SKP_Silk_HP_variable_cutoff_FLP( psEnc, &sEncCtrl, pIn_HP, pIn ); #else SKP_memcpy( pIn_HP, pIn, psEnc->sCmn.frame_length * sizeof( SKP_int16 ) ); #endif #if SWITCH_TRANSITION_FILTERING /* Ensure smooth bandwidth transitions */ SKP_Silk_LP_variable_cutoff( &psEnc->sCmn.sLP, pIn_HP_LP, pIn_HP, psEnc->sCmn.frame_length ); #else SKP_memcpy( pIn_HP_LP, pIn_HP, psEnc->sCmn.frame_length * sizeof( SKP_int16 ) ); #endif /*******************************************/ /* Copy new frame to front of input buffer */ /*******************************************/ SKP_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, pIn_HP_LP, psEnc->sCmn.frame_length ); /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ for( k = 0; k < 8; k++ ) { x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + k * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( k & 2 ) ) * 1e-6f; } /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ SKP_Silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame ); /************************/ /* Noise shape analysis */ /************************/ SKP_Silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); /*****************************************/ /* Prefiltering for noise shaper */ /*****************************************/ SKP_Silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ SKP_Silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch ); /****************************************/ /* Process gains */ /****************************************/ SKP_Silk_process_gains_FLP( psEnc, &sEncCtrl ); /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ nBytesLBRR = MAX_ARITHM_BYTES; SKP_Silk_LBRR_encode_FLP( psEnc, &sEncCtrl, LBRRpayload, &nBytesLBRR, xfw ); /*****************************************/ /* Noise shaping quantization */ /*****************************************/ SKP_Silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, xfw, psEnc->sCmn.q, 0 ); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ if( psEnc->speech_activity < SPEECH_ACTIVITY_DTX_THRES ) { psEnc->sCmn.vadFlag = NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; if( psEnc->sCmn.noSpeechCounter > NO_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.inDTX = 1; } if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NO_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.noSpeechCounter = NO_SPEECH_FRAMES_BEFORE_DTX; psEnc->sCmn.inDTX = 0; } } else { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; psEnc->sCmn.vadFlag = VOICE_ACTIVITY; } /****************************************/ /* Initialize range coder */ /****************************************/ if( psEnc->sCmn.nFramesInPayloadBuf == 0 ) { SKP_Silk_range_enc_init( &psEnc->sCmn.sRC ); psEnc->sCmn.nBytesInPayloadBuf = 0; } /****************************************/ /* Encode Parameters */ /****************************************/ SKP_Silk_encode_parameters( &psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC, psEnc->sCmn.q ); FrameTermination_CDF = SKP_Silk_FrameTermination_CDF; /****************************************/ /* Update Buffers and State */ /****************************************/ /* Update input buffer */ SKP_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], ( psEnc->sCmn.frame_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( SKP_float ) ); /* Parameters needed for next frame */ psEnc->sCmn.prev_sigtype = sEncCtrl.sCmn.sigtype; psEnc->sCmn.prevLag = sEncCtrl.sCmn.pitchL[ NB_SUBFR - 1]; psEnc->sCmn.first_frame_after_reset = 0; if( psEnc->sCmn.sRC.error ) { /* Encoder returned error: Clear payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { psEnc->sCmn.nFramesInPayloadBuf++; } /****************************************/ /* Finalize payload and copy to output */ /****************************************/ if( psEnc->sCmn.nFramesInPayloadBuf * FRAME_LENGTH_MS >= psEnc->sCmn.PacketSize_ms ) { LBRR_idx = ( psEnc->sCmn.oldest_LBRR_idx + 1 ) & LBRR_IDX_MASK; /* Check if FEC information should be added */ frame_terminator = SKP_SILK_LAST_FRAME; if( psEnc->sCmn.LBRR_buffer[ LBRR_idx ].usage == SKP_SILK_ADD_LBRR_TO_PLUS1 ) { frame_terminator = SKP_SILK_LBRR_VER1; } if( psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].usage == SKP_SILK_ADD_LBRR_TO_PLUS2 ) { frame_terminator = SKP_SILK_LBRR_VER2; LBRR_idx = psEnc->sCmn.oldest_LBRR_idx; } /* Add the frame termination info to stream */ SKP_Silk_range_encoder( &psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF ); /* Payload length so far */ SKP_Silk_range_coder_get_length( &psEnc->sCmn.sRC, &nBytes ); /* Check that there is enough space in external output buffer, and move data */ if( *pnBytesOut >= nBytes ) { SKP_Silk_range_enc_wrap_up( &psEnc->sCmn.sRC ); SKP_memcpy( pCode, psEnc->sCmn.sRC.buffer, nBytes * sizeof( SKP_uint8 ) ); if( frame_terminator > SKP_SILK_MORE_FRAMES && *pnBytesOut >= nBytes + psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes ) { /* Get old packet and add to payload. */ SKP_memcpy( &pCode[ nBytes ], psEnc->sCmn.LBRR_buffer[ LBRR_idx ].payload, psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes * sizeof( SKP_uint8 ) ); nBytes += psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes; } *pnBytesOut = nBytes; /* Update FEC buffer */ SKP_memcpy( psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].payload, LBRRpayload, nBytesLBRR * sizeof( SKP_uint8 ) ); psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].nBytes = nBytesLBRR; /* The line below describes how FEC should be used */ psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].usage = sEncCtrl.sCmn.LBRR_usage; psEnc->sCmn.oldest_LBRR_idx = ( ( psEnc->sCmn.oldest_LBRR_idx + 1 ) & LBRR_IDX_MASK ); } else { /* Not enough space: Payload will be discarded */ *pnBytesOut = 0; nBytes = 0; ret = SKP_SILK_ENC_PAYLOAD_BUF_TOO_SHORT; } /* Reset the number of frames in payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { /* No payload this time */ *pnBytesOut = 0; /* Encode that more frames follows */ frame_terminator = SKP_SILK_MORE_FRAMES; SKP_Silk_range_encoder( &psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF ); /* Payload length so far */ SKP_Silk_range_coder_get_length( &psEnc->sCmn.sRC, &nBytes ); } /* Check for arithmetic coder errors */ if( psEnc->sCmn.sRC.error ) { ret = SKP_SILK_ENC_INTERNAL_ERROR; } /* Simulate number of ms buffered in channel because of exceeding TargetRate */ psEnc->BufferedInChannel_ms += ( 8.0f * 1000.0f * ( nBytes - psEnc->sCmn.nBytesInPayloadBuf ) ) / psEnc->sCmn.TargetRate_bps; psEnc->BufferedInChannel_ms -= FRAME_LENGTH_MS; psEnc->BufferedInChannel_ms = SKP_LIMIT_float( psEnc->BufferedInChannel_ms, 0.0f, 100.0f ); psEnc->sCmn.nBytesInPayloadBuf = nBytes; if( psEnc->speech_activity > WB_DETECT_ACTIVE_SPEECH_LEVEL_THRES ) { psEnc->sCmn.sSWBdetect.ActiveSpeech_ms = SKP_ADD_POS_SAT32( psEnc->sCmn.sSWBdetect.ActiveSpeech_ms, FRAME_LENGTH_MS ); } return( ret ); }
/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ void SKP_Silk_HP_variable_cutoff_FLP( SKP_Silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ SKP_Silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ SKP_int16 *out, /* O High-pass filtered output signal */ const SKP_int16 *in /* I Input signal */ ) { SKP_float pitch_freq_Hz, pitch_freq_log, quality, delta_freq, smth_coef, Fc, r; SKP_int32 B_Q28[ 3 ], A_Q28[ 2 ]; /*********************************************/ /* Estimate low end of pitch frequency range */ /*********************************************/ if( psEnc->sCmn.prev_sigtype == SIG_TYPE_VOICED ) { /* Difference, in log domain */ pitch_freq_Hz = 1e3f * psEnc->sCmn.fs_kHz / psEnc->sCmn.prevLag; pitch_freq_log = SKP_Silk_log2( pitch_freq_Hz ); /* Adjustment based on quality */ quality = psEncCtrl->input_quality_bands[ 0 ]; pitch_freq_log -= quality * quality * ( pitch_freq_log - SKP_Silk_log2( VARIABLE_HP_MIN_FREQ ) ); pitch_freq_log += 0.5f * ( 0.6f - quality ); delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; if( delta_freq < 0.0 ) { /* Less smoothing for decreasing pitch frequency, to track something close to the minimum */ delta_freq *= 3.0f; } /* Limit delta, to reduce impact of outliers */ delta_freq = SKP_LIMIT_float( delta_freq, -VARIABLE_HP_MAX_DELTA_FREQ, VARIABLE_HP_MAX_DELTA_FREQ ); /* Update smoother */ smth_coef = VARIABLE_HP_SMTH_COEF1 * psEnc->speech_activity; psEnc->variable_HP_smth1 += smth_coef * delta_freq; } /* Second smoother */ psEnc->variable_HP_smth2 += VARIABLE_HP_SMTH_COEF2 * ( psEnc->variable_HP_smth1 - psEnc->variable_HP_smth2 ); /* Convert from log scale to Hertz */ psEncCtrl->pitch_freq_low_Hz = ( SKP_float )pow( 2.0f, psEnc->variable_HP_smth2 ); /* Limit frequency range */ psEncCtrl->pitch_freq_low_Hz = SKP_LIMIT_float( psEncCtrl->pitch_freq_low_Hz, VARIABLE_HP_MIN_FREQ, VARIABLE_HP_MAX_FREQ ); /*******************************/ /* Compute filter coefficients */ /*******************************/ /* Compute cut-off frequency, in radians */ Fc = ( SKP_float )( 0.45f * 2.0f * 3.14159265359 * psEncCtrl->pitch_freq_low_Hz / ( 1e3f * psEnc->sCmn.fs_kHz ) ); /* 2nd order ARMA coefficients */ r = 1.0f - 0.92f * Fc; /* b = r * [1; -2; 1]; */ /* a = [1; -2 * r * (1 - 0.5 * Fc^2); r^2]; */ B_Q28[ 0 ] = SKP_float2int( ( 1 << 28 ) * r ); B_Q28[ 1 ] = SKP_float2int( ( 1 << 28 ) * -2.0f * r ); B_Q28[ 2 ] = B_Q28[ 0 ]; A_Q28[ 0 ] = SKP_float2int( ( 1 << 28 ) * -2.0f * r * ( 1.0f - 0.5f * Fc * Fc ) ); A_Q28[ 1 ] = SKP_float2int( ( 1 << 28 ) * r * r ); /********************/ /* High-pass filter */ /********************/ SKP_Silk_biquad_alt( in, B_Q28, A_Q28, psEnc->sCmn.In_HP_State, out, psEnc->sCmn.frame_length ); }