int main( int argc, char* argv[] ) { unsigned long tottime, starttime; double filetime; size_t counter; SKP_int32 args, totPackets, i, k; SKP_int16 ret, len, tot_len; SKP_int16 nBytes; SKP_uint8 payload[ MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES * ( MAX_LBRR_DELAY + 1 ) ]; SKP_uint8 *payloadEnd = NULL, *payloadToDec = NULL; SKP_uint8 FECpayload[ MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES ], *payloadPtr; SKP_int16 nBytesFEC; SKP_int16 nBytesPerPacket[ MAX_LBRR_DELAY + 1 ], totBytes; SKP_int16 out[ ( ( FRAME_LENGTH_MS * MAX_API_FS_KHZ ) << 1 ) * MAX_INPUT_FRAMES ], *outPtr; char speechOutFileName[ 150 ], bitInFileName[ 150 ]; FILE *bitInFile, *speechOutFile; SKP_int32 packetSize_ms=0, API_Fs_Hz = 0; SKP_int32 decSizeBytes; void *psDec; SKP_float loss_prob; SKP_int32 frames, lost, quiet; SKP_SILK_SDK_DecControlStruct DecControl; if( argc < 3 ) { print_usage( argv ); exit( 0 ); } /* default settings */ quiet = 0; loss_prob = 0.0f; /* get arguments */ args = 1; strcpy( bitInFileName, argv[ args ] ); args++; strcpy( speechOutFileName, argv[ args ] ); args++; while( args < argc ) { if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) { sscanf( argv[ args + 1 ], "%f", &loss_prob ); args += 2; } else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-Fs_API" ) == 0 ) { sscanf( argv[ args + 1 ], "%d", &API_Fs_Hz ); args += 2; } else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-quiet" ) == 0 ) { quiet = 1; args++; } else { printf( "Error: unrecognized setting: %s\n\n", argv[ args ] ); print_usage( argv ); exit( 0 ); } } if( !quiet ) { printf("********** Silk Decoder (Fixed Point) v %s ********************\n", SKP_Silk_SDK_get_version()); printf("********** Compiled for %d bit cpu *******************************\n", (int)sizeof(void*) * 8 ); printf( "Input: %s\n", bitInFileName ); printf( "Output: %s\n", speechOutFileName ); } /* Open files */ bitInFile = fopen( bitInFileName, "rb" ); if( bitInFile == NULL ) { printf( "Error: could not open input file %s\n", bitInFileName ); exit( 0 ); } /* Check Silk header */ { char header_buf[ 50 ]; fread(header_buf, sizeof(char), 1, bitInFile); header_buf[ strlen( "" ) ] = '\0'; /* Terminate with a null character */ if( strcmp( header_buf, "" ) != 0 ) { counter = fread( header_buf, sizeof( char ), strlen( "!SILK_V3" ), bitInFile ); header_buf[ strlen( "!SILK_V3" ) ] = '\0'; /* Terminate with a null character */ if( strcmp( header_buf, "!SILK_V3" ) != 0 ) { /* Non-equal strings */ printf( "Error: Wrong Header %s\n", header_buf ); exit( 0 ); } } else { counter = fread( header_buf, sizeof( char ), strlen( "#!SILK_V3" ), bitInFile ); header_buf[ strlen( "#!SILK_V3" ) ] = '\0'; /* Terminate with a null character */ if( strcmp( header_buf, "#!SILK_V3" ) != 0 ) { /* Non-equal strings */ printf( "Error: Wrong Header %s\n", header_buf ); exit( 0 ); } } } speechOutFile = fopen( speechOutFileName, "wb" ); if( speechOutFile == NULL ) { printf( "Error: could not open output file %s\n", speechOutFileName ); exit( 0 ); } /* Set the samplingrate that is requested for the output */ if( API_Fs_Hz == 0 ) { DecControl.API_sampleRate = 24000; } else { DecControl.API_sampleRate = API_Fs_Hz; } /* Initialize to one frame per packet, for proper concealment before first packet arrives */ DecControl.framesPerPacket = 1; /* Create decoder */ ret = SKP_Silk_SDK_Get_Decoder_Size( &decSizeBytes ); if( ret ) { printf( "\nSKP_Silk_SDK_Get_Decoder_Size returned %d", ret ); } psDec = malloc( decSizeBytes ); /* Reset decoder */ ret = SKP_Silk_SDK_InitDecoder( psDec ); if( ret ) { printf( "\nSKP_Silk_InitDecoder returned %d", ret ); } totPackets = 0; tottime = 0; payloadEnd = payload; /* Simulate the jitter buffer holding MAX_FEC_DELAY packets */ for( i = 0; i < MAX_LBRR_DELAY; i++ ) { /* Read payload size */ counter = fread( &nBytes, sizeof( SKP_int16 ), 1, bitInFile ); #ifdef _SYSTEM_IS_BIG_ENDIAN swap_endian( &nBytes, 1 ); #endif /* Read payload */ counter = fread( payloadEnd, sizeof( SKP_uint8 ), nBytes, bitInFile ); if( ( SKP_int16 )counter < nBytes ) { break; } nBytesPerPacket[ i ] = nBytes; payloadEnd += nBytes; totPackets++; } while( 1 ) { /* Read payload size */ counter = fread( &nBytes, sizeof( SKP_int16 ), 1, bitInFile ); #ifdef _SYSTEM_IS_BIG_ENDIAN swap_endian( &nBytes, 1 ); #endif if( nBytes < 0 || counter < 1 ) { break; } /* Read payload */ counter = fread( payloadEnd, sizeof( SKP_uint8 ), nBytes, bitInFile ); if( ( SKP_int16 )counter < nBytes ) { break; } /* Simulate losses */ rand_seed = SKP_RAND( rand_seed ); if( ( ( ( float )( ( rand_seed >> 16 ) + ( 1 << 15 ) ) ) / 65535.0f >= ( loss_prob / 100.0f ) ) && ( counter > 0 ) ) { nBytesPerPacket[ MAX_LBRR_DELAY ] = nBytes; payloadEnd += nBytes; } else { nBytesPerPacket[ MAX_LBRR_DELAY ] = 0; } if( nBytesPerPacket[ 0 ] == 0 ) { /* Indicate lost packet */ lost = 1; /* Packet loss. Search after FEC in next packets. Should be done in the jitter buffer */ payloadPtr = payload; for( i = 0; i < MAX_LBRR_DELAY; i++ ) { if( nBytesPerPacket[ i + 1 ] > 0 ) { starttime = GetHighResolutionTime(); SKP_Silk_SDK_search_for_LBRR( payloadPtr, nBytesPerPacket[ i + 1 ], ( i + 1 ), FECpayload, &nBytesFEC ); tottime += GetHighResolutionTime() - starttime; if( nBytesFEC > 0 ) { payloadToDec = FECpayload; nBytes = nBytesFEC; lost = 0; break; } } payloadPtr += nBytesPerPacket[ i + 1 ]; } } else { lost = 0; nBytes = nBytesPerPacket[ 0 ]; payloadToDec = payload; } /* Silk decoder */ outPtr = out; tot_len = 0; starttime = GetHighResolutionTime(); if( lost == 0 ) { /* No Loss: Decode all frames in the packet */ frames = 0; do { /* Decode 20 ms */ ret = SKP_Silk_SDK_Decode( psDec, &DecControl, 0, payloadToDec, nBytes, outPtr, &len ); if( ret ) { printf( "\nSKP_Silk_SDK_Decode returned %d", ret ); } frames++; outPtr += len; tot_len += len; if( frames > MAX_INPUT_FRAMES ) { /* Hack for corrupt stream that could generate too many frames */ outPtr = out; tot_len = 0; frames = 0; } /* Until last 20 ms frame of packet has been decoded */ } while( DecControl.moreInternalDecoderFrames ); } else { /* Loss: Decode enough frames to cover one packet duration */ for( i = 0; i < DecControl.framesPerPacket; i++ ) { /* Generate 20 ms */ ret = SKP_Silk_SDK_Decode( psDec, &DecControl, 1, payloadToDec, nBytes, outPtr, &len ); if( ret ) { printf( "\nSKP_Silk_Decode returned %d", ret ); } outPtr += len; tot_len += len; } } packetSize_ms = tot_len / ( DecControl.API_sampleRate / 1000 ); tottime += GetHighResolutionTime() - starttime; totPackets++; /* Write output to file */ #ifdef _SYSTEM_IS_BIG_ENDIAN swap_endian( out, tot_len ); #endif fwrite( out, sizeof( SKP_int16 ), tot_len, speechOutFile ); /* Update buffer */ totBytes = 0; for( i = 0; i < MAX_LBRR_DELAY; i++ ) { totBytes += nBytesPerPacket[ i + 1 ]; } SKP_memmove( payload, &payload[ nBytesPerPacket[ 0 ] ], totBytes * sizeof( SKP_uint8 ) ); payloadEnd -= nBytesPerPacket[ 0 ]; SKP_memmove( nBytesPerPacket, &nBytesPerPacket[ 1 ], MAX_LBRR_DELAY * sizeof( SKP_int16 ) ); if( !quiet ) { fprintf( stderr, "\rPackets decoded: %d", totPackets ); } }
SKP_int SKP_Silk_encode_frame_FLP( SKP_Silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ SKP_uint8 *pCode, /* O Payload */ SKP_int16 *pnBytesOut, /* I/O Number of payload bytes; */ /* input: max length; output: used */ const SKP_int16 *pIn /* I Input speech frame */ ) { SKP_Silk_encoder_control_FLP sEncCtrl; SKP_int k, nBytes, ret = 0; SKP_float *x_frame, *res_pitch_frame; SKP_int16 pIn_HP[ MAX_FRAME_LENGTH ]; SKP_int16 pIn_HP_LP[ MAX_FRAME_LENGTH ]; SKP_float xfw[ MAX_FRAME_LENGTH ]; SKP_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; SKP_int LBRR_idx, frame_terminator; /* Low bitrate redundancy parameters */ SKP_uint8 LBRRpayload[ MAX_ARITHM_BYTES ]; SKP_int16 nBytesLBRR; const SKP_uint16 *FrameTermination_CDF; sEncCtrl.sCmn.Seed = psEnc->sCmn.frameCounter++ & 3; /**************************************************************/ /* Setup Input Pointers, and insert frame in input buffer */ /*************************************************************/ /* pointers aligned with start of frame to encode */ x_frame = psEnc->x_buf + psEnc->sCmn.frame_length; // start of frame to encode res_pitch_frame = res_pitch + psEnc->sCmn.frame_length; // start of pitch LPC residual frame /****************************/ /* Voice Activity Detection */ /****************************/ SKP_Silk_VAD_FLP( psEnc, &sEncCtrl, pIn ); /*******************************************/ /* High-pass filtering of the input signal */ /*******************************************/ #if HIGH_PASS_INPUT /* Variable high-pass filter */ SKP_Silk_HP_variable_cutoff_FLP( psEnc, &sEncCtrl, pIn_HP, pIn ); #else SKP_memcpy( pIn_HP, pIn, psEnc->sCmn.frame_length * sizeof( SKP_int16 ) ); #endif #if SWITCH_TRANSITION_FILTERING /* Ensure smooth bandwidth transitions */ SKP_Silk_LP_variable_cutoff( &psEnc->sCmn.sLP, pIn_HP_LP, pIn_HP, psEnc->sCmn.frame_length ); #else SKP_memcpy( pIn_HP_LP, pIn_HP, psEnc->sCmn.frame_length * sizeof( SKP_int16 ) ); #endif /*******************************************/ /* Copy new frame to front of input buffer */ /*******************************************/ SKP_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, pIn_HP_LP, psEnc->sCmn.frame_length ); /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ for( k = 0; k < 8; k++ ) { x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + k * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( k & 2 ) ) * 1e-6f; } /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ SKP_Silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame ); /************************/ /* Noise shape analysis */ /************************/ SKP_Silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); /*****************************************/ /* Prefiltering for noise shaper */ /*****************************************/ SKP_Silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ SKP_Silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch ); /****************************************/ /* Process gains */ /****************************************/ SKP_Silk_process_gains_FLP( psEnc, &sEncCtrl ); /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ nBytesLBRR = MAX_ARITHM_BYTES; SKP_Silk_LBRR_encode_FLP( psEnc, &sEncCtrl, LBRRpayload, &nBytesLBRR, xfw ); /*****************************************/ /* Noise shaping quantization */ /*****************************************/ SKP_Silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, xfw, psEnc->sCmn.q, 0 ); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ if( psEnc->speech_activity < SPEECH_ACTIVITY_DTX_THRES ) { psEnc->sCmn.vadFlag = NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; if( psEnc->sCmn.noSpeechCounter > NO_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.inDTX = 1; } if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NO_SPEECH_FRAMES_BEFORE_DTX ) { psEnc->sCmn.noSpeechCounter = NO_SPEECH_FRAMES_BEFORE_DTX; psEnc->sCmn.inDTX = 0; } } else { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; psEnc->sCmn.vadFlag = VOICE_ACTIVITY; } /****************************************/ /* Initialize range coder */ /****************************************/ if( psEnc->sCmn.nFramesInPayloadBuf == 0 ) { SKP_Silk_range_enc_init( &psEnc->sCmn.sRC ); psEnc->sCmn.nBytesInPayloadBuf = 0; } /****************************************/ /* Encode Parameters */ /****************************************/ SKP_Silk_encode_parameters( &psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC, psEnc->sCmn.q ); FrameTermination_CDF = SKP_Silk_FrameTermination_CDF; /****************************************/ /* Update Buffers and State */ /****************************************/ /* Update input buffer */ SKP_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], ( psEnc->sCmn.frame_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( SKP_float ) ); /* Parameters needed for next frame */ psEnc->sCmn.prev_sigtype = sEncCtrl.sCmn.sigtype; psEnc->sCmn.prevLag = sEncCtrl.sCmn.pitchL[ NB_SUBFR - 1]; psEnc->sCmn.first_frame_after_reset = 0; if( psEnc->sCmn.sRC.error ) { /* Encoder returned error: Clear payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { psEnc->sCmn.nFramesInPayloadBuf++; } /****************************************/ /* Finalize payload and copy to output */ /****************************************/ if( psEnc->sCmn.nFramesInPayloadBuf * FRAME_LENGTH_MS >= psEnc->sCmn.PacketSize_ms ) { LBRR_idx = ( psEnc->sCmn.oldest_LBRR_idx + 1 ) & LBRR_IDX_MASK; /* Check if FEC information should be added */ frame_terminator = SKP_SILK_LAST_FRAME; if( psEnc->sCmn.LBRR_buffer[ LBRR_idx ].usage == SKP_SILK_ADD_LBRR_TO_PLUS1 ) { frame_terminator = SKP_SILK_LBRR_VER1; } if( psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].usage == SKP_SILK_ADD_LBRR_TO_PLUS2 ) { frame_terminator = SKP_SILK_LBRR_VER2; LBRR_idx = psEnc->sCmn.oldest_LBRR_idx; } /* Add the frame termination info to stream */ SKP_Silk_range_encoder( &psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF ); /* Payload length so far */ SKP_Silk_range_coder_get_length( &psEnc->sCmn.sRC, &nBytes ); /* Check that there is enough space in external output buffer, and move data */ if( *pnBytesOut >= nBytes ) { SKP_Silk_range_enc_wrap_up( &psEnc->sCmn.sRC ); SKP_memcpy( pCode, psEnc->sCmn.sRC.buffer, nBytes * sizeof( SKP_uint8 ) ); if( frame_terminator > SKP_SILK_MORE_FRAMES && *pnBytesOut >= nBytes + psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes ) { /* Get old packet and add to payload. */ SKP_memcpy( &pCode[ nBytes ], psEnc->sCmn.LBRR_buffer[ LBRR_idx ].payload, psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes * sizeof( SKP_uint8 ) ); nBytes += psEnc->sCmn.LBRR_buffer[ LBRR_idx ].nBytes; } *pnBytesOut = nBytes; /* Update FEC buffer */ SKP_memcpy( psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].payload, LBRRpayload, nBytesLBRR * sizeof( SKP_uint8 ) ); psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].nBytes = nBytesLBRR; /* The line below describes how FEC should be used */ psEnc->sCmn.LBRR_buffer[ psEnc->sCmn.oldest_LBRR_idx ].usage = sEncCtrl.sCmn.LBRR_usage; psEnc->sCmn.oldest_LBRR_idx = ( ( psEnc->sCmn.oldest_LBRR_idx + 1 ) & LBRR_IDX_MASK ); } else { /* Not enough space: Payload will be discarded */ *pnBytesOut = 0; nBytes = 0; ret = SKP_SILK_ENC_PAYLOAD_BUF_TOO_SHORT; } /* Reset the number of frames in payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { /* No payload this time */ *pnBytesOut = 0; /* Encode that more frames follows */ frame_terminator = SKP_SILK_MORE_FRAMES; SKP_Silk_range_encoder( &psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF ); /* Payload length so far */ SKP_Silk_range_coder_get_length( &psEnc->sCmn.sRC, &nBytes ); } /* Check for arithmetic coder errors */ if( psEnc->sCmn.sRC.error ) { ret = SKP_SILK_ENC_INTERNAL_ERROR; } /* Simulate number of ms buffered in channel because of exceeding TargetRate */ psEnc->BufferedInChannel_ms += ( 8.0f * 1000.0f * ( nBytes - psEnc->sCmn.nBytesInPayloadBuf ) ) / psEnc->sCmn.TargetRate_bps; psEnc->BufferedInChannel_ms -= FRAME_LENGTH_MS; psEnc->BufferedInChannel_ms = SKP_LIMIT_float( psEnc->BufferedInChannel_ms, 0.0f, 100.0f ); psEnc->sCmn.nBytesInPayloadBuf = nBytes; if( psEnc->speech_activity > WB_DETECT_ACTIVE_SPEECH_LEVEL_THRES ) { psEnc->sCmn.sSWBdetect.ActiveSpeech_ms = SKP_ADD_POS_SAT32( psEnc->sCmn.sSWBdetect.ActiveSpeech_ms, FRAME_LENGTH_MS ); } return( ret ); }
/* Updates CNG estimate, and applies the CNG when packet was lost */ void SKP_Silk_CNG( SKP_Silk_decoder_state *psDec, /* I/O Decoder state */ SKP_Silk_decoder_control *psDecCtrl, /* I/O Decoder control */ SKP_int16 signal[], /* I/O Signal */ SKP_int length /* I Length of residual */ ) { SKP_int i, subfr; SKP_int32 tmp_32, Gain_Q26, max_Gain_Q16; SKP_int16 LPC_buf[ MAX_LPC_ORDER ]; SKP_int16 CNG_sig[ MAX_FRAME_LENGTH ]; SKP_Silk_CNG_struct *psCNG; psCNG = &psDec->sCNG; if( psDec->fs_kHz != psCNG->fs_kHz ) { /* Reset state */ SKP_Silk_CNG_Reset( psDec ); psCNG->fs_kHz = psDec->fs_kHz; } if( psDec->lossCnt == 0 && psDec->vadFlag == NO_VOICE_ACTIVITY ) { /* Update CNG parameters */ /* Smoothing of LSF's */ for( i = 0; i < psDec->LPC_order; i++ ) { psCNG->CNG_smth_NLSF_Q15[ i ] += SKP_SMULWB( psDec->prevNLSF_Q15[ i ] - psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 ); } /* Find the subframe with the highest gain */ max_Gain_Q16 = 0; subfr = 0; for( i = 0; i < NB_SUBFR; i++ ) { if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) { max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ]; subfr = i; } } /* Update CNG excitation buffer with excitation from this subframe */ SKP_memmove( &psCNG->CNG_exc_buf_Q10[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q10, ( NB_SUBFR - 1 ) * psDec->subfr_length * sizeof( SKP_int32 ) ); SKP_memcpy( psCNG->CNG_exc_buf_Q10, &psDec->exc_Q10[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( SKP_int32 ) ); /* Smooth gains */ for( i = 0; i < NB_SUBFR; i++ ) { psCNG->CNG_smth_Gain_Q16 += SKP_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 ); } } /* Add CNG when packet is lost and / or when low speech activity */ if( psDec->lossCnt ) {//|| psDec->vadFlag == NO_VOICE_ACTIVITY ) { /* Generate CNG excitation */ SKP_Silk_CNG_exc( CNG_sig, psCNG->CNG_exc_buf_Q10, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); /* Convert CNG NLSF to filter representation */ SKP_Silk_NLSF2A_stable( LPC_buf, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); Gain_Q26 = ( SKP_int32 )1 << 26; /* 1.0 */ /* Generate CNG signal, by synthesis filtering */ if( psDec->LPC_order == 16 ) { SKP_Silk_LPC_synthesis_order16( CNG_sig, LPC_buf, Gain_Q26, psCNG->CNG_synth_state, CNG_sig, length ); } else { SKP_Silk_LPC_synthesis_filter( CNG_sig, LPC_buf, Gain_Q26, psCNG->CNG_synth_state, CNG_sig, length, psDec->LPC_order ); } /* Mix with signal */ for( i = 0; i < length; i++ ) { tmp_32 = signal[ i ] + CNG_sig[ i ]; signal[ i ] = SKP_SAT16( tmp_32 ); } } else { SKP_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( SKP_int32 ) ); } }
int SKP_Silk_encode_frame_FIX(SKP_Silk_encoder_state_FIX * psEnc, /* I/O Pointer to Silk FIX encoder state */ uint8_t * pCode, /* O Pointer to payload */ int16_t * pnBytesOut, /* I/O Pointer to number of payload bytes */ /* input: max length; output: used */ const int16_t * pIn /* I Pointer to input speech frame */ ) { SKP_Silk_encoder_control_FIX sEncCtrl; int i, nBytes, ret = 0; int16_t *x_frame, *res_pitch_frame; int16_t xfw[MAX_FRAME_LENGTH]; int16_t pIn_HP[MAX_FRAME_LENGTH]; int16_t res_pitch[2 * MAX_FRAME_LENGTH + LA_PITCH_MAX]; int LBRR_idx, frame_terminator, SNR_dB_Q7; const uint16_t *FrameTermination_CDF; /* Low bitrate redundancy parameters */ uint8_t LBRRpayload[MAX_ARITHM_BYTES]; int16_t nBytesLBRR; //int32_t Seed[ MAX_LAYERS ]; sEncCtrl.sCmn.Seed = psEnc->sCmn.frameCounter++ & 3; /**************************************************************/ /* Setup Input Pointers, and insert frame in input buffer */ /*************************************************************/ x_frame = psEnc->x_buf + psEnc->sCmn.frame_length; /* start of frame to encode */ res_pitch_frame = res_pitch + psEnc->sCmn.frame_length; /* start of pitch LPC residual frame */ /****************************/ /* Voice Activity Detection */ /****************************/ ret = SKP_Silk_VAD_GetSA_Q8(&psEnc->sCmn.sVAD, &psEnc->speech_activity_Q8, &SNR_dB_Q7, sEncCtrl.input_quality_bands_Q15, &sEncCtrl.input_tilt_Q15, pIn, psEnc->sCmn.frame_length); /*******************************************/ /* High-pass filtering of the input signal */ /*******************************************/ #if HIGH_PASS_INPUT /* Variable high-pass filter */ SKP_Silk_HP_variable_cutoff_FIX(psEnc, &sEncCtrl, pIn_HP, pIn); #else SKP_memcpy(pIn_HP, pIn, psEnc->sCmn.frame_length * sizeof(int16_t)); #endif #if SWITCH_TRANSITION_FILTERING /* Ensure smooth bandwidth transitions */ SKP_Silk_LP_variable_cutoff(&psEnc->sCmn.sLP, x_frame + psEnc->sCmn.la_shape, pIn_HP, psEnc->sCmn.frame_length); #else SKP_memcpy(x_frame + psEnc->sCmn.la_shape, pIn_HP, psEnc->sCmn.frame_length * sizeof(int16_t)); #endif /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ SKP_Silk_find_pitch_lags_FIX(psEnc, &sEncCtrl, res_pitch, x_frame); /************************/ /* Noise shape analysis */ /************************/ SKP_Silk_noise_shape_analysis_FIX(psEnc, &sEncCtrl, res_pitch_frame, x_frame); /*****************************************/ /* Prefiltering for noise shaper */ /*****************************************/ SKP_Silk_prefilter_FIX(psEnc, &sEncCtrl, xfw, x_frame); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ SKP_Silk_find_pred_coefs_FIX(psEnc, &sEncCtrl, res_pitch); /****************************************/ /* Process gains */ /****************************************/ SKP_Silk_process_gains_FIX(psEnc, &sEncCtrl); psEnc->sCmn.sigtype[psEnc->sCmn.nFramesInPayloadBuf] = sEncCtrl.sCmn.sigtype; psEnc->sCmn.QuantOffsetType[psEnc->sCmn.nFramesInPayloadBuf] = sEncCtrl.sCmn.QuantOffsetType; /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ nBytesLBRR = MAX_ARITHM_BYTES; SKP_Silk_LBRR_encode_FIX(psEnc, &sEncCtrl, LBRRpayload, &nBytesLBRR, xfw); /*****************************************/ /* Noise shaping quantization */ /*****************************************/ psEnc->NoiseShapingQuantizer(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sNSQ, xfw, &psEnc->sCmn.q[psEnc->sCmn. nFramesInPayloadBuf * psEnc->sCmn.frame_length], sEncCtrl.sCmn.NLSFInterpCoef_Q2, sEncCtrl.PredCoef_Q12[0], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ if (psEnc->speech_activity_Q8 < SPEECH_ACTIVITY_DTX_THRES_Q8) { psEnc->sCmn.vadFlag = NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; if (psEnc->sCmn.noSpeechCounter > NO_SPEECH_FRAMES_BEFORE_DTX) { psEnc->sCmn.inDTX = 1; } if (psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX) { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; } } else { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; psEnc->sCmn.vadFlag = VOICE_ACTIVITY; } /****************************************/ /* Initialize arithmetic coder */ /****************************************/ if (psEnc->sCmn.nFramesInPayloadBuf == 0) { SKP_Silk_range_enc_init(&psEnc->sCmn.sRC); psEnc->sCmn.nBytesInPayloadBuf = 0; } /****************************************/ /* Encode Parameters */ /****************************************/ if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { SKP_Silk_encode_parameters_v4(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC); FrameTermination_CDF = SKP_Silk_FrameTermination_v4_CDF; } else { SKP_Silk_encode_parameters(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC, &psEnc->sCmn.q[psEnc->sCmn. nFramesInPayloadBuf * psEnc->sCmn. frame_length]); FrameTermination_CDF = SKP_Silk_FrameTermination_CDF; } /****************************************/ /* Update Buffers and State */ /****************************************/ /* Update Input buffer */ SKP_memmove(psEnc->x_buf, &psEnc->x_buf[psEnc->sCmn.frame_length], (psEnc->sCmn.frame_length + psEnc->sCmn.la_shape) * sizeof(int16_t)); /* parameters needed for next frame */ psEnc->sCmn.prev_sigtype = sEncCtrl.sCmn.sigtype; psEnc->sCmn.prevLag = sEncCtrl.sCmn.pitchL[NB_SUBFR - 1]; psEnc->sCmn.first_frame_after_reset = 0; if (psEnc->sCmn.sRC.error) { /* encoder returned error: clear payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { psEnc->sCmn.nFramesInPayloadBuf++; } /****************************************/ /* finalize payload and copy to output */ /****************************************/ if (psEnc->sCmn.nFramesInPayloadBuf * FRAME_LENGTH_MS >= psEnc->sCmn.PacketSize_ms) { LBRR_idx = (psEnc->sCmn.oldest_LBRR_idx + 1) & LBRR_IDX_MASK; /* Check if FEC information should be added */ frame_terminator = SKP_SILK_LAST_FRAME; if (psEnc->sCmn.LBRR_buffer[LBRR_idx].usage == SKP_SILK_ADD_LBRR_TO_PLUS1) { frame_terminator = SKP_SILK_LBRR_VER1; } if (psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. usage == SKP_SILK_ADD_LBRR_TO_PLUS2) { frame_terminator = SKP_SILK_LBRR_VER2; LBRR_idx = psEnc->sCmn.oldest_LBRR_idx; } /* Add the frame termination info to stream */ SKP_Silk_range_encoder(&psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF); if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { /* Code excitation signal */ for (i = 0; i < psEnc->sCmn.nFramesInPayloadBuf; i++) { SKP_Silk_encode_pulses(&psEnc->sCmn.sRC, psEnc->sCmn.sigtype[i], psEnc->sCmn. QuantOffsetType[i], &psEnc->sCmn.q[i * psEnc-> sCmn. frame_length], psEnc->sCmn. frame_length); } } /* payload length so far */ SKP_Silk_range_coder_get_length(&psEnc->sCmn.sRC, &nBytes); /* check that there is enough space in external output buffer, and move data */ if (*pnBytesOut >= nBytes) { SKP_Silk_range_enc_wrap_up(&psEnc->sCmn.sRC); SKP_memcpy(pCode, psEnc->sCmn.sRC.buffer, nBytes * sizeof(uint8_t)); if (frame_terminator > SKP_SILK_MORE_FRAMES && *pnBytesOut >= nBytes + psEnc->sCmn.LBRR_buffer[LBRR_idx].nBytes) { /* Get old packet and add to payload. */ SKP_memcpy(&pCode[nBytes], psEnc->sCmn.LBRR_buffer[LBRR_idx]. payload, psEnc->sCmn.LBRR_buffer[LBRR_idx]. nBytes * sizeof(uint8_t)); nBytes += psEnc->sCmn.LBRR_buffer[LBRR_idx].nBytes; } *pnBytesOut = nBytes; /* Update FEC buffer */ SKP_memcpy(psEnc->sCmn. LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. payload, LBRRpayload, nBytesLBRR * sizeof(uint8_t)); psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. nBytes = nBytesLBRR; /* This line tells describes how FEC should be used */ psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. usage = sEncCtrl.sCmn.LBRR_usage; psEnc->sCmn.oldest_LBRR_idx = (psEnc->sCmn.oldest_LBRR_idx + 1) & LBRR_IDX_MASK; /* Reset number of frames in payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { /* Not enough space: Payload will be discarded */ *pnBytesOut = 0; nBytes = 0; psEnc->sCmn.nFramesInPayloadBuf = 0; ret = SKP_SILK_ENC_PAYLOAD_BUF_TOO_SHORT; } } else { /* no payload for you this time */ *pnBytesOut = 0; /* Encode that more frames follows */ frame_terminator = SKP_SILK_MORE_FRAMES; SKP_Silk_range_encoder(&psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF); /* payload length so far */ SKP_Silk_range_coder_get_length(&psEnc->sCmn.sRC, &nBytes); if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { /* Take into account the q signal that isnt in the bitstream yet */ nBytes += SKP_Silk_pulses_to_bytes(&psEnc->sCmn, &psEnc->sCmn. q[(psEnc->sCmn. nFramesInPayloadBuf - 1) * psEnc->sCmn. frame_length]); } } /* Check for arithmetic coder errors */ if (psEnc->sCmn.sRC.error) { ret = SKP_SILK_ENC_INTERNAL_ERROR; } /* simulate number of ms buffered in channel because of exceeding TargetRate */ assert((8 * 1000 * ((int64_t) nBytes - (int64_t) psEnc->sCmn.nBytesInPayloadBuf)) == SKP_SAT32(8 * 1000 * ((int64_t) nBytes - (int64_t) psEnc->sCmn.nBytesInPayloadBuf))); assert(psEnc->sCmn.TargetRate_bps > 0); psEnc->BufferedInChannel_ms += SKP_DIV32(8 * 1000 * (nBytes - psEnc->sCmn.nBytesInPayloadBuf), psEnc->sCmn.TargetRate_bps); psEnc->BufferedInChannel_ms -= FRAME_LENGTH_MS; psEnc->BufferedInChannel_ms = SKP_LIMIT(psEnc->BufferedInChannel_ms, 0, 100); psEnc->sCmn.nBytesInPayloadBuf = nBytes; if (psEnc->speech_activity_Q8 > WB_DETECT_ACTIVE_SPEECH_LEVEL_THRES_Q8) { psEnc->sCmn.sSWBdetect.ActiveSpeech_ms = SKP_ADD_POS_SAT32(psEnc->sCmn.sSWBdetect.ActiveSpeech_ms, FRAME_LENGTH_MS); } return (ret); }
SKP_int SKP_Silk_decode_frame( SKP_Silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ ec_dec *psRangeDec, /* I/O Compressor data structure */ SKP_int16 pOut[], /* O Pointer to output speech frame */ SKP_int32 *pN, /* O Pointer to size of output frame */ const SKP_int nBytes, /* I Payload length */ SKP_int lostFlag /* I 0: no loss, 1 loss, 2 decode fec */ ) { SKP_Silk_decoder_control sDecCtrl; SKP_int i, L, mv_len, ret = 0; SKP_int8 flags; SKP_int32 LBRR_symbol; SKP_int pulses[ MAX_FRAME_LENGTH ]; TIC(DECODE_FRAME) L = psDec->frame_length; sDecCtrl.LTP_scale_Q14 = 0; /* Safety checks */ SKP_assert( L > 0 && L <= MAX_FRAME_LENGTH ); /********************************************/ /* Decode Frame if packet is not lost */ /********************************************/ if( lostFlag != PACKET_LOST && psDec->nFramesDecoded == 0 ) { /* First decoder call for this payload */ /* Decode VAD flags and LBRR flag */ flags = SKP_RSHIFT( psRangeDec->buf[ 0 ], 7 - psDec->nFramesPerPacket ) & ( SKP_LSHIFT( 1, psDec->nFramesPerPacket + 1 ) - 1 ); psDec->LBRR_flag = flags & 1; for( i = psDec->nFramesPerPacket - 1; i >= 0 ; i-- ) { flags = SKP_RSHIFT( flags, 1 ); psDec->VAD_flags[ i ] = flags & 1; } for( i = 0; i < psDec->nFramesPerPacket + 1; i++ ) { ec_dec_icdf( psRangeDec, SKP_Silk_uniform2_iCDF, 8 ); } /* Decode LBRR flags */ SKP_memset( psDec->LBRR_flags, 0, sizeof( psDec->LBRR_flags ) ); if( psDec->LBRR_flag ) { if( psDec->nFramesPerPacket == 1 ) { psDec->LBRR_flags[ 0 ] = 1; } else { LBRR_symbol = ec_dec_icdf( psRangeDec, SKP_Silk_LBRR_flags_iCDF_ptr[ psDec->nFramesPerPacket - 2 ], 8 ) + 1; for( i = 0; i < psDec->nFramesPerPacket; i++ ) { psDec->LBRR_flags[ i ] = SKP_RSHIFT( LBRR_symbol, i ) & 1; } } } if( lostFlag == DECODE_NORMAL ) { /* Regular decoding: skip all LBRR data */ for( i = 0; i < psDec->nFramesPerPacket; i++ ) { if( psDec->LBRR_flags[ i ] ) { SKP_Silk_decode_indices( psDec, psRangeDec, i, 1 ); SKP_Silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType, psDec->indices.quantOffsetType, psDec->frame_length ); } } } } if( lostFlag == DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 0 ) { /* Treat absent LBRR data as lost frame */ lostFlag = PACKET_LOST; psDec->nFramesDecoded++; } if( lostFlag != PACKET_LOST ) { /*********************************************/ /* Decode quantization indices of side info */ /*********************************************/ TIC(decode_indices) SKP_Silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag ); TOC(decode_indices) /*********************************************/ /* Decode quantization indices of excitation */ /*********************************************/ TIC(decode_pulses) SKP_Silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType, psDec->indices.quantOffsetType, psDec->frame_length ); TOC(decode_pulses) /********************************************/ /* Decode parameters and pulse signal */ /********************************************/ TIC(decode_params) SKP_Silk_decode_parameters( psDec, &sDecCtrl ); TOC(decode_params) /* Update length. Sampling frequency may have changed */ L = psDec->frame_length; /********************************************************/ /* Run inverse NSQ */ /********************************************************/ TIC(decode_core) SKP_Silk_decode_core( psDec, &sDecCtrl, pOut, pulses ); TOC(decode_core) /********************************************************/ /* Update PLC state */ /********************************************************/ SKP_Silk_PLC( psDec, &sDecCtrl, pOut, L, 0 ); psDec->lossCnt = 0; psDec->prevSignalType = psDec->indices.signalType; SKP_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); /* A frame has been decoded without errors */ psDec->first_frame_after_reset = 0; psDec->nFramesDecoded++; } else { /* Handle packet loss by extrapolation */ SKP_Silk_PLC( psDec, &sDecCtrl, pOut, L, 1 ); } /*************************/ /* Update output buffer. */ /*************************/ SKP_assert( psDec->ltp_mem_length >= psDec->frame_length ); mv_len = psDec->ltp_mem_length - psDec->frame_length; SKP_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(SKP_int16) ); SKP_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( SKP_int16 ) ); /****************************************************************/ /* Ensure smooth connection of extrapolated and good frames */ /****************************************************************/ SKP_Silk_PLC_glue_frames( psDec, &sDecCtrl, pOut, L ); /************************************************/ /* Comfort noise generation / estimation */ /************************************************/ SKP_Silk_CNG( psDec, &sDecCtrl, pOut, L ); /********************************************/ /* HP filter output */ /********************************************/ TIC(HP_out) SKP_Silk_biquad_alt( pOut, psDec->HP_B, psDec->HP_A, psDec->HPState, pOut, L ); TOC(HP_out) /* Update some decoder state variables */ psDec->lagPrev = sDecCtrl.pitchL[ psDec->nb_subfr - 1 ]; /********************************************/ /* set output frame length */ /********************************************/ *pN = ( SKP_int16 )L; TOC(DECODE_FRAME) return ret; }