static double fetch_and_process_audio( ucil_theora_video_file_object_t *vobj, double audiopos ) { #if HAVE_ALSA int n_samples; signed char *buf; if( !vobj->audio ) { return audiopos; } while( ucil_alsa_fill_audio_buffer( vobj->audio_data ) ) ; n_samples = ucil_alsa_get_audio_buffer( vobj->audio_data, (short**)&buf ); if( !n_samples ) { return audiopos; } if( vobj->async_audio_encoding ) { audiopos = write_pcm( vobj, audiopos, buf, n_samples ); } else { audiopos = encode_vorbis( vobj->f, &vobj->vd, &vobj->vo, &vobj->vb, vobj->audio_rate, audiopos, buf, n_samples ); } #endif return audiopos; }
int main(int argc, char *argv[]) { if (argc == 3) { video_filename = argv[1]; pcm_filaname = argv[2]; } else { printf("Missing input video file or output file.\n"); return 1; } int err; av_register_all(); AVFormatContext * fmt_ctx = NULL; err = avformat_open_input( &fmt_ctx, video_filename, NULL, NULL ); assert( err == 0 ); err = avformat_find_stream_info( fmt_ctx, NULL ); assert( err >= 0 ); av_dump_format( fmt_ctx, 0, argv[1], 0 ); // int const video_idx = get_stream_idx( fmt_ctx, AVMEDIA_TYPE_VIDEO ); int const audio_idx = get_stream_idx( fmt_ctx, AVMEDIA_TYPE_AUDIO ); assert(audio_idx >= 0); // AVCodecContext * video_codec_ctx = new_codec_ctx( fmt_ctx, video_idx ); AVCodecContext * audio_codec_ctx = new_codec_ctx( fmt_ctx, audio_idx ); int is_frame_finish; static AVPacket pkt; static uint8_t *pkt_data = NULL; static int pkt_size = 0; static AVFrame frame; AVFrame *decoded_frame = av_frame_alloc(); assert( decoded_frame ); while ( true ) { if ( av_read_frame( fmt_ctx, &pkt ) < 0 ) { break; } if ( pkt.stream_index == audio_idx ) { pkt_data = pkt.data; pkt_size = pkt.size; while ( pkt_size > 0 ) { int const byte_consumed = avcodec_decode_audio4( audio_codec_ctx, decoded_frame, &is_frame_finish, &pkt ); ASSERT_OR_ERRMSG(byte_consumed >= 0, "Errors when decode audio.\n"); pkt_data += byte_consumed; pkt_size -= byte_consumed; assert(pkt_size >= 0); if ( is_frame_finish ) { int const data_size = av_samples_get_buffer_size( NULL, audio_codec_ctx->channels, decoded_frame->nb_samples, audio_codec_ctx->sample_fmt, 1); if ( av_sample_fmt_is_planar(audio_codec_ctx->sample_fmt)) { uint8_t *buf = malloc(data_size); interleave(decoded_frame->data, buf, audio_codec_ctx->channels, audio_codec_ctx->sample_fmt, data_size); write_pcm( buf, data_size, pcm_filaname ); free(buf); } else { write_pcm( decoded_frame->data[0], data_size, pcm_filaname ); } } } if (pkt.data) { av_free_packet( &pkt ); } } else { av_free_packet( &pkt ); } } avcodec_close(audio_codec_ctx); avformat_close_input(&fmt_ctx); printf("audio decode done\n"); return 0; }
static void ci_pcmbuf_insert(const void *ch1, const void *ch2, int count) { num_output_samples += count; if (use_dsp) { struct dsp_buffer src; src.remcount = count; src.pin[0] = ch1; src.pin[1] = ch2; src.proc_mask = 0; while (1) { int out_count = MAX(count, 512); int16_t buf[2 * out_count]; struct dsp_buffer dst; dst.remcount = 0; dst.p16out = buf; dst.bufcount = out_count; dsp_process(ci.dsp, &src, &dst); if (dst.remcount > 0) { if (mode == MODE_WRITE) write_pcm(buf, dst.remcount); else if (mode == MODE_PLAY) playback_pcm(buf, dst.remcount); } else if (src.remcount <= 0) { break; } } } else { /* Convert to 32-bit interleaved. */ count *= format.channels; int i; int32_t buf[count]; if (format.depth > 16) { if (format.stereo_mode == STEREO_NONINTERLEAVED) { for (i = 0; i < count; i += 2) { buf[i+0] = ((int32_t*)ch1)[i/2]; buf[i+1] = ((int32_t*)ch2)[i/2]; } } else { memcpy(buf, ch1, sizeof(buf)); } } else { if (format.stereo_mode == STEREO_NONINTERLEAVED) { for (i = 0; i < count; i += 2) { buf[i+0] = ((int16_t*)ch1)[i/2]; buf[i+1] = ((int16_t*)ch2)[i/2]; } } else { for (i = 0; i < count; i++) { buf[i] = ((int16_t*)ch1)[i]; } } } if (mode == MODE_WRITE) write_pcm_raw(buf, count); } perform_config(); }
void get_word(int flag) { unsigned int size = 0, n = 0, i = 0; char ans[500]; int frame_n; void *buffer = NULL; frame *frames = NULL; mfcc_frame *mfcc_frames = NULL; voice_signal *signals = NULL; printf("\nFale agora e pressione enter\n"); capture_start(NULL); getchar(); getchar(); capture_stop(&size, &buffer); n = split(buffer, size / 2, &signals); for (i = 0; i < n; i++) { frame_n = make_frames_hamming(signals[i].buffer, signals[i].number, &frames); mfcc_frames = malloc(sizeof(mfcc_frame) * frame_n); mfcc_features(frames, frame_n, mfcc_frames); if (!flag) { record(NULL, signals[i].buffer, signals[i].number * 2); printf("Escreva a palavra: "); scanf("%s", (char *)ans); if (!(ans[0] == 'x' && ans[1] == '\0')) { new_word(mfcc_frames, frame_n, (char *)ans); chdir("waves"); char *path = malloc(strlen(ans) + 5); char *ext = ".wav"; memcpy(path, ans, strlen(ans)); memcpy(path + strlen(ans), ext, 5); write_pcm(signals[i].buffer, signals[i].number * 2, path); free(path); chdir(".."); } } else { /*laço de reconhecimento*/ puts("analisando dados..."); word *words = malloc(sizeof(word)); double best = 1e10; char *name = NULL; void **word_adresses; unsigned int n = 0, i = 0, count = 0; if ((n = get_list(words))) { word_adresses = malloc(n * sizeof(void *)); while (words != NULL) { double now = compare(mfcc_frames, frame_n, words->frames, words->n); word_adresses[count++] = words; if (now < best) { best = now; name = words->name; } words = words->next; } for (i = 0; i < count; i++) free(word_adresses[i]); free(word_adresses); } if (best < 3.5) printf("%f %s", best, name); } free(mfcc_frames); free(frames); } if (flag) printf("\n"); free(buffer); }