Beispiel #1
0
//1 get one frame 0 failed -1 err
int ffmpeg_adec_decode(ad_wrapper_t *wrapper, adec_ctrl_t *pinfo)
{
    int got_samples = 0;
    int ret = 0;
    int data_size = 0;
    AVFrame frame_tmp;
    //AVFrame frame;
    AVPacket pkt;
    memset(&pkt, 0, sizeof(AVPacket));
    pkt.data = pinfo->inptr;
    pkt.size = pinfo->inlen;
    pkt.side_data_elems = 0;
    memset(&frame_tmp, 0, sizeof(AVFrame));
    memset(frame, 0, sizeof(AVFrame));

    dtaudio_decoder_t *decoder = (dtaudio_decoder_t *)wrapper->parent;
    dt_debug(TAG, "start decode size:%d %02x %02x \n", pkt.size, pkt.data[0],
             pkt.data[1]);
    ret = avcodec_decode_audio4(avctxp, frame, &got_samples, &pkt);
    dt_debug(TAG, "start decode size:%d %02x %02x %02x %02x \n", pkt.size,
             pkt.data[0], pkt.data[1], pkt.data[2], pkt.data[3]);
    if (ret < 0) {
        dt_error(TAG, "decode failed ret:%d \n", ret);
        goto EXIT;
    }

    if (!got_samples) {         //decode return 0
        dt_error(TAG, "get no samples out \n");
        pinfo->outlen = 0;
        goto EXIT;
    }
    data_size = av_samples_get_buffer_size(frame->linesize, avctxp->channels,
                                           frame->nb_samples, avctxp->sample_fmt, 1);
    if (data_size > 0) {
        audio_convert(decoder, &frame_tmp, frame);
        //out frame too large, realloc out buf
        if (pinfo->outsize < frame_tmp.linesize[0]) {
            pinfo->outptr = realloc(pinfo->outptr, frame_tmp.linesize[0] * 2);
            pinfo->outsize = frame_tmp.linesize[0] * 2;
        }

        memcpy(pinfo->outptr, frame_tmp.data[0], frame_tmp.linesize[0]);
        pinfo->outlen = frame_tmp.linesize[0];
    } else {
        dt_error(TAG, "data_size invalid: size:%d outlen:%d \n", data_size,
                 pinfo->outlen);
        pinfo->outlen = 0;
    }

EXIT:
    if (frame_tmp.data[0]) {
        free(frame_tmp.data[0]);
    }
    frame_tmp.data[0] = NULL;
    return ret;
}
Beispiel #2
0
int main(int argc, char* argv[])
{
    SDL_Surface *screen;
    SDL_Event event;
    const SDL_VideoInfo *info;
    int keypress = 0;

    Audio *audio_input = NULL;
    printf("Spectrogram\n");

      // Initialize
    microphone_init();

    audio_input = NULL;

    size_t read_samples;

    if (SDL_Init(SDL_INIT_VIDEO) < 0 ) return 1;

    info   = SDL_GetVideoInfo();
    if (info == NULL) {
    	printf("Unable to get video info: %s\n", SDL_GetError());
    	return 1;
    }
    WIDTH  = info->current_w;
    HEIGHT = info->current_h;

#ifdef FORCE_WIDTH
    WIDTH  = (FORCE_WIDTH);
#endif
#ifdef FORCE_HEIGHT
    HEIGHT  = (FORCE_HEIGHT);
#endif

    printf("Milliseconds per slice:    %i\n", MS_IN_TIME_SLICE);
    printf("Maximum frequency:         %.0f\n", FREQ_MULT*(HEIGHT-1));
    printf("Rate of minimum frequency: %f\n", frequency( (((double)1) * FREQ_MULT) ) );
    printf("Rate of maximum frequency: %f\n", frequency( (((double)(HEIGHT-1)) * FREQ_MULT) ) );
    printf("Samples per slice:         %ld\n", SAMPLES_IN_TIME_SLICE );
    printf("Revolutions per slice:     %.2f .. %.2f\n", (SAMPLES_IN_TIME_SLICE*frequency( (((double)1) * FREQ_MULT) ) ),  (SAMPLES_IN_TIME_SLICE*frequency( (((double)(HEIGHT-1)) * FREQ_MULT) ) ) );
    printf("Resolution:                %ix%i\n", WIDTH, HEIGHT);

    int flags = 0;

#ifndef NO_FULLSCREEN
    flags |= SDL_FULLSCREEN;
#endif
#ifndef NO_HWSUFACE
    flags |= SDL_HWSURFACE;
#endif

    if (!(screen = SDL_SetVideoMode(WIDTH, HEIGHT, DEPTH, flags)))
    {
        SDL_Quit();
        return 1;
    }

    int xx = 0;

    fourier = audio_make_buffer( (HEIGHT/Y_STRETCH), DOUBLE_REAL );

    while(!keypress)
    {

        audio_free( audio_input );
        audio_input = audio_make_buffer( SAMPLES_IN_TIME_SLICE, MIC_FORMAT );

          // Read in a slice of audio
        read_samples = microphone_read( SAMPLES_IN_TIME_SLICE, audio_input );

        audio_convert( audio_input, DOUBLE_REAL );

        if (windowing == 1) {
#ifdef BARTLETT_WINDOW
            bartlett_window( audio_input );
#endif

#ifdef BLACKMAN_HARRIS_WINDOW
            blackman_harris_window( audio_input );
#endif

#ifdef HANN_WINDOW
            hann_window( audio_input );
#endif
        }

//        if (capturing)
//            DrawScreen( screen, captured, xx );
//        else
            DrawScreen( screen, audio_input, xx );

        xx += X_STRETCH;
        if (xx+X_STRETCH >= WIDTH) {
            xx = 0;
        }

        while(SDL_PollEvent(&event))
        {
            switch (event.type)
            {
                case SDL_QUIT:
	                keypress = 1;
	                break;
                case SDL_KEYDOWN:
                    if ( event.key.keysym.sym == SDLK_SPACE ) {
                        capturing = !capturing;

                          // If we just switched to capturing, reinitialize the capturing slice
                        if (capturing) {
                            audio_free( captured );
                            captured = audio_make_buffer( (HEIGHT/Y_STRETCH), DOUBLE_REAL );
                              // Zero the audio by amplifying it by 0.0
                            audio_scale( captured, 0.0 );
                            captured_slices = 0;
                        } else {
                            audio_scale( captured, 1.0 / ((Real)captured_slices) );
                        }
                    } else if ( event.key.keysym.sym == SDLK_BACKSPACE ) {
                        audio_save( captured, "phons/captured.fourier" );
                    } else if ( event.key.keysym.sym == SDLK_RETURN ) {
                        windowing = !windowing;
                    } else {
                        keypress = 1;
                        break;
                    }
            }
        }
    }

    SDL_Quit();

    return 0;
}
Beispiel #3
0
int main(int argc, char **argv) {
    Audio    *audio_input = NULL;
    Features features;

#ifdef SUB_RUNNING_AVERAGE
    Features running_average;
    Features features_temporary;
    zero_features( &running_average );
#endif

    printf("Tiny Speech Recognizer\n");

    // Initialize
    microphone_init();

    audio_input = NULL;

    size_t read_samples;

    while (1) {

        audio_free( audio_input );
        audio_input = audio_make_buffer( SAMPLES_IN_TIME_SLICE, MIC_FORMAT );

        // Read in a slice of audio
        read_samples = microphone_read( SAMPLES_IN_TIME_SLICE, audio_input );

        audio_convert( audio_input, DOUBLE_REAL );

        features = features_extract( audio_input );

#ifdef SUB_RUNNING_AVERAGE
        // First, copy the current feature vector, weighted by tau
        // We copy these before applying the running average subtraction to get them untainted
        copy_scaled_features( &features_temporary, &features, ((Real)1.0-(Real)RUNNING_AVERAGE_TAU) );
        // Apply the running average subtraction to the current feature vector now that we've saved it
        subtract_features( &features, &running_average );
        // Next, scale the current running average down a little bit
        scale_features( &running_average, (Real)RUNNING_AVERAGE_TAU );
        // Add the saved scaled features into running_average
        add_features( &running_average, &features_temporary );
#endif

#ifdef SQUARE_SUPPRESS
        square_suppress( &features );
#endif

        if (above_threshold( &features )) {
            features_pretty( features );
        }

        //fourier_transform( audio_input, frequencies, SAMPLES_IN_TIME_SLICE );

    }

    // Clean up
    microphone_deinit();

    return 0;

}