Example #1
0
int main (int argc, const char *argv[])
{
    FILE *pOutputSmsFile; 
    SMS_Data smsData;
    SMS_Header smsHeader;

    float pSoundData[SMS_MAX_WINDOW];
    SMS_SndHeader soundHeader;

    char *pChInputSoundFile = NULL, *pChOutputSmsFile = NULL;
    int verbose = 0;
    int iDoAnalysis = 1;
    int iFrame = 0;
    long iStatus = 0, iSample = 0, sizeNewData = 0;

    int optc;   /* switch */
    poptContext pc;

    SMS_AnalParams analParams;
    sms_initAnalParams(&analParams);    /* initialize arguments to defaults*/

    struct poptOption options[] =
    {
        {"verbose", 'v', POPT_ARG_NONE, &verbose, 0, 
            "verbose mode", 0},
        {"debug", 'd', POPT_ARG_INT, &analParams.iDebugMode, 0, 
            "debug mode (0)", "int"},
        {"format", 'f', POPT_ARG_INT, &analParams.iFormat, 0, 
            "analysis format (0, harmonic)", "int"},
        {"sound-type", 'q', POPT_ARG_INT, &analParams.iSoundType, 0, 
            "sound type (0, phrase)", "int"},
        {"direction", 'x', POPT_ARG_INT, &analParams.iAnalysisDirection, 0, 
            "analysis direction (0, forward)", "int"},
        /* STFT Parameters: */
        {"window-size", 's', POPT_ARG_FLOAT, &analParams.fSizeWindow, 0, 
            "size of the window in f0 periods (3.5)", "float"},
        {"window-type", 'i', POPT_ARG_INT, &analParams.iWindowType, 0, 
            "window type (1, blackman harris 70 dB)", "int"},
        {"frame-rate", 'r', POPT_ARG_INT, &analParams.iFrameRate, 0, 
            "frame rate in hertz (300)", "int"},
        /* Peak Detection Parameters */
        {"highest-freq", 'j', POPT_ARG_FLOAT, &analParams.fHighestFreq, 0, 
            "highest frequency to look for peaks (12000hz)", "float"},
        {"min-peak-mag", 'k', POPT_ARG_FLOAT, &analParams.fMinPeakMag, 0, 
            "minimum peak magnitude (0 normalized dB, which corresponds to -100dB)", "float"}, /*\todo check this doc*/
        /* Harmonic Detection Parameters */
        {"ref-harmonic", 'y', POPT_ARG_INT, &analParams.iRefHarmonic, 0, 
            "reference harmonic number in series (1)", "int"},
        {"min-ref-harm-mag", 'm', POPT_ARG_FLOAT, &analParams.fMinPeakMag, 0, 
            "minimum reference harmonic magnitude (30 normalized dB)", "float"}, /*\todo check this doc*/
        {"min-ref-harm-mag", 'z', POPT_ARG_FLOAT, &analParams.fRefHarmMagDiffFromMax, 0, 
            " maximum dB difference between the harmonic used for reference and the maximum peak (default 30)", "float"}, /*\todo check this doc*/
        {"default-fund", 'u', POPT_ARG_FLOAT, &analParams.fDefaultFundamental, 0, 
            "default fundamental frequency (hz), used to set initial window size, or window size for entire sound if inharmonic (default 100)", "float"}, 
        {"lowest-fund", 'l', POPT_ARG_FLOAT, &analParams.fDefaultFundamental, 0, 
            "lowest fundamental frequency(hz), or frequency in inharmonic analysis, to search for (default 50)", "float"}, 
        {"highest-fund", 'h', POPT_ARG_FLOAT, &analParams.fDefaultFundamental, 0, 
            "highest fundamental frequency to search for, has no effect on inharmonic analysis (default 1000)", "float"}, 
        /* Peak Continuation parameters */
        {"guides", 'n', POPT_ARG_INT, &analParams.nGuides, 0, 
            "number of guides to use in partial tracking (default 100)", "int"},
        {"tracks", 'p', POPT_ARG_INT, &analParams.nTracks, 0, 
            "number of output partial tracks (default 60)", "int"},
        {"freq-deviation", 'w', POPT_ARG_FLOAT, &analParams.fFreqDeviation, 0, 
            "maximum permitted frequency deviation from guide frequency (default .45)", "float"}, 
        {"peak-cont-guide", 't', POPT_ARG_FLOAT, &analParams.fPeakContToGuide, 0, 
            "contribution of the frequency of the previous peak of a given trajectory to the current guide frequency value (default .4).", "float"}, 
        {"fund-cont-guide", 'o', POPT_ARG_FLOAT, &analParams.fFundContToGuide, 0, 
            "contribution of the fundamental frequency of the previous peak of a given trajectory to the current guide frequency value (default .5).", "float"}, 
        /* Track Cleaning parameters:\n" */
        {"clean-track", 'g', POPT_ARG_INT, &analParams.iCleanTracks, 0, 
            "turn on/off track cleaning (default is on, 1)", "int"}, 
        {"min-track-length", 'a', POPT_ARG_FLOAT, &analParams.iMinTrackLength, 0, 
            "minimum track length in seconds (0.1)", "float"}, 
        {"max-sleeping-time", 'b', POPT_ARG_FLOAT, &analParams.iMaxSleepingTime, 0, 
            "maximum time a frame can sleep in seconds (0.1)", "float"},  /* this doc is horrible */
        /* Stochastic Analysis parameters */
        {"stochastic", 'e', POPT_ARG_INT, &analParams.iStochasticType, 0, 
            "turn on/off stochastic analysis (default is on, 1)", "int"}, 
        {"stoch-coeff", 'c', POPT_ARG_INT, &analParams.nStochasticCoeff, 0, 
            "number of stochastic coefficients in approximation (default 128)", "int"}, 
        /* spectral enveloping parameters */
        {"se",0, POPT_ARG_INT, &analParams.specEnvParams.iType, 0, 
            "spectral enveloping type (0, off)", "int"},
        {"co", 0, POPT_ARG_INT, &analParams.specEnvParams.iOrder, 0, 
            "discrete cepstrum order (25)", "int"},
        {"la", 0, POPT_ARG_FLOAT, &analParams.specEnvParams.fLambda, 0, 
            "lambda, regularizing coefficient (0.00001)", "float"}, 
        {"an", 0, POPT_ARG_NONE, &analParams.specEnvParams.iAnchor, 0, 
            "turn on anchoring of spectral envelope endpoints", 0}, 
        {"mef", 0, POPT_ARG_INT, &analParams.specEnvParams.iMaxFreq, 0, 
            "maximum envelope frequency (default is highest-freq", "int"}, 
        POPT_AUTOHELP
            POPT_TABLEEND
    };


    pc = poptGetContext("smsAnal", argc, argv, options, 0);
    poptSetOtherOptionHelp(pc, help_header_text);

    while ((optc = poptGetNextOpt(pc)) > 0) {
        switch (optc) {
            /* specific arguments are handled here */
            case 'v':
                verbose = 1;
            default:
                ;
        }
    }
    if (optc < -1) 
    {
        /* an error occurred during option processing */
        printf("%s: %s\n",
                poptBadOption(pc, POPT_BADOPTION_NOALIAS),
                poptStrerror(optc));
        return 1;
    }
    if (argc < 3)
    {
        poptPrintUsage(pc,stderr,0);
        return 1;
    }
    pChInputSoundFile = (char *) poptGetArg(pc);
    pChOutputSmsFile = (char *) poptGetArg(pc);
    /* parsing done */


    /* open input sound */
    if (sms_openSF(pChInputSoundFile, &soundHeader))
    {
        printf("error in sms_openSF: %s \n", sms_errorString());
        exit(EXIT_FAILURE);
    }       

    /* initialize everything */
    sms_init();
    /* TODO NExt: go from here through all the functions that need to look at specEnvParams */
    sms_initAnalysis (&analParams, &soundHeader);

    sms_fillHeader (&smsHeader, &analParams, "smsAnal");
    sms_writeHeader (pChOutputSmsFile, &smsHeader, &pOutputSmsFile);

    /* allocate output SMS record */
    sms_allocFrameH (&smsHeader, &smsData);

    /* perform analysis */
    if (analParams.iAnalysisDirection == SMS_DIR_REV)
        iSample = soundHeader.nSamples;

    if (analParams.iDebugMode == SMS_DBG_SYNC)
        sms_createDebugFile (&analParams);

    if(verbose)
    {
        printf("\n===sound file info===\n");
        printf("samples: %d, samplerate: %d, seconds: %f \n", soundHeader.nSamples, soundHeader.iSamplingRate, 
                soundHeader.nSamples / (float)soundHeader.iSamplingRate);
        printf("number of channels: %d, read channel: %d \n", soundHeader.channelCount, soundHeader.iReadChannel); 
        printf("\n===analysis parameters===\n");
        printf("sizeHop: %d, nFrames: %d \n", analParams.sizeHop, analParams.nFrames);
        /* \todo: print analysis window type (by name) here */
        if(analParams.specEnvParams.iType != SMS_ENV_NONE)
        {
            printf("\n===spectral envelope parameters===\n");
            if(analParams.specEnvParams.iType == SMS_ENV_CEP)
                printf("type: cepstral coefficients, ");
            else if(analParams.specEnvParams.iType == SMS_ENV_FBINS)
                printf("type: frequency bins, ");
            else
                printf("warning: unknown spectral envelope type! \n\n ");
            printf("order: %d, lambda: %f, max frequency: %d \n", analParams.specEnvParams.iOrder,
                    analParams.specEnvParams.fLambda, analParams.specEnvParams.iMaxFreq);
        }
        printf("\n===header info string===\n %s", smsHeader.pChTextCharacters);
        printf("\n\ndoing analysis now:\n");
    }

    while(iDoAnalysis > 0)
    {
        if (analParams.iAnalysisDirection == SMS_DIR_REV)
        {
            if ((iSample - analParams.sizeNextRead) >= 0)
                sizeNewData = analParams.sizeNextRead;
            else
                sizeNewData = iSample;
            iSample -= sizeNewData;
        }
        else
        {
            iSample += sizeNewData;
            if((iSample + analParams.sizeNextRead) < soundHeader.nSamples)
                sizeNewData = analParams.sizeNextRead;
            else
                sizeNewData = soundHeader.nSamples - iSample;
        }
        /* get one frame of sound */
        if (sms_getSound(&soundHeader, sizeNewData, pSoundData, iSample, &analParams))
        {
            printf("error: could not read sound frame %d\n", iFrame);
            printf("error message in sms_getSound: %s \n", sms_errorString());
            break;
        }
        /* perform analysis of one frame of sound */
        iStatus = sms_analyze (sizeNewData, pSoundData, &smsData,
                &analParams);

        /* if there is an output SMS record, write it */
        if (iStatus == 1)
        {
            sms_writeFrame (pOutputSmsFile, &smsHeader, &smsData);
            if(sms_errorCheck())
            {
                printf("error: could not write sms frame %d:\n", iFrame);
                printf("error message in sms_writeFrame: %s \n", sms_errorString());
                break;
            }
            if(verbose)
            {
                /*                                 if (iFrame % 10 == 0) */
                /*                                         printf ("frame: %d, %.2f \n", iFrame, iFrame / (float) smsHeader.iFrameRate); */
                if (iFrame % 10 == 0)
                    printf ("%.2f ", iFrame / (float) smsHeader.iFrameRate);

            }
            iFrame++;
        }
        else if (iStatus == -1) /* done */
        {
            iDoAnalysis = 0;
            smsHeader.nFrames = iFrame;
        }
    }


    smsHeader.fResidualPerc = analParams.fResidualAccumPerc / iFrame;
    if(verbose)
    {
        printf("\n");
        printf("residual percentage: %f \n", smsHeader.fResidualPerc);
    }                
    if(smsHeader.nFrames != analParams.nFrames && verbose)
        printf("warning: wrong number of analyzed frames: analParams: %d, smsHeader: %d \n", 
                analParams.nFrames, smsHeader.nFrames); 
    /* write an close output files */
    sms_writeFile (pOutputSmsFile, &smsHeader);
    if (analParams.iDebugMode == SMS_DBG_SYNC)
        sms_writeDebugFile ();

    printf("wrote %d analysis frames to %s\n", iFrame, pChOutputSmsFile);

    /* cleanup */
    sms_freeFrame(&smsData);
    sms_freeAnalysis(&analParams);
    sms_free();
    return 0;   
}
Example #2
0
/*! \brief main function to perform the SMS analysis on a single frame
 *
 * The input is a section of the sound, the output is the SMS data
 *
 * \param sizeWaveform	     size of input waveform data
 * \param pWaveform	     pointer to input waveform data
 * \param pSmsData          pointer to output SMS data
 * \param pAnalParams   pointer to analysis parameters
 * \return \todo sort out return meanings
 */
int sms_analyze (int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_AnalParams *pAnalParams)
{
	static int sizeWindow = 0;      /* size of current analysis window */ //RTE ?: shouldn't this just be initilalized outside?

	int iCurrentFrame = pAnalParams->iMaxDelayFrames - 1;  /* frame # of current frame */
	int delayFrames = pAnalParams->minGoodFrames + pAnalParams->analDelay;
	int i, iError, iExtraSamples;              /* samples used for next analysis frame */
	sfloat fRefFundamental = 0;   /* reference fundamental for current frame */
        SMS_AnalFrame *pTmpAnalFrame;

	/* clear SMS output */
	sms_clearFrame (pSmsData);
  
	/* set initial analysis-window size */
	if (sizeWindow == 0)
		sizeWindow = pAnalParams->iDefaultSizeWindow;
  
	/* fill the input sound buffer and perform pre-emphasis */
	if (sizeWaveform > 0)
		sms_fillSoundBuffer (sizeWaveform, pWaveform, pAnalParams);
    
	/* move analysis data one frame back */
	pTmpAnalFrame = pAnalParams->ppFrames[0];
	for(i = 1; i < pAnalParams->iMaxDelayFrames; i++)
		pAnalParams->ppFrames[i-1] = pAnalParams->ppFrames[i];
	pAnalParams->ppFrames[pAnalParams->iMaxDelayFrames-1] = pTmpAnalFrame;


	/* initialize the current frame */
	sms_initFrame (iCurrentFrame, pAnalParams, sizeWindow);
        if(sms_errorCheck())
        {
                printf("error in init frame: %s \n", sms_errorString());
                return(-1);
        }
  
	/* if right data in the sound buffer do analysis */
	if (pAnalParams->ppFrames[iCurrentFrame]->iStatus == SMS_FRAME_READY)
	{
		sfloat fAvgDev = sms_fundDeviation( pAnalParams, iCurrentFrame - 1);

		/* if single note use the default fundamental as reference */
		if (pAnalParams->iSoundType == SMS_SOUND_TYPE_NOTE)
			fRefFundamental = pAnalParams->fDefaultFundamental;
		/* if sound is stable use the last fundamental as a reference */
		else if (fAvgDev != -1 && fAvgDev <= pAnalParams->maxDeviation)
			fRefFundamental = pAnalParams->ppFrames[iCurrentFrame - 1]->fFundamental;
		else
			fRefFundamental = 0;

		/* compute spectrum, find peaks, and find fundamental of frame */
		sms_analyzeFrame (iCurrentFrame, pAnalParams, fRefFundamental);

		/* set the size of the next analysis window */
		if (pAnalParams->ppFrames[iCurrentFrame]->fFundamental > 0 &&
		    pAnalParams->iSoundType != SMS_SOUND_TYPE_NOTE)
			sizeWindow = sms_sizeNextWindow (iCurrentFrame, pAnalParams);
      
		/* figure out how much needs to be read next time */
		iExtraSamples =
			(pAnalParams->soundBuffer.iMarker + pAnalParams->soundBuffer.sizeBuffer) -
			(pAnalParams->ppFrames[iCurrentFrame]->iFrameSample + pAnalParams->sizeHop);
/*                 printf("iMarker: %d, sizeBuffer: %d, iFrameSample %d, sizeHop: %d \n",  */
/*                        pAnalParams->soundBuffer.iMarker, pAnalParams->soundBuffer.sizeBuffer, */
/*                        pAnalParams->ppFrames[iCurrentFrame]->iFrameSample, pAnalParams->sizeHop); */

		pAnalParams->sizeNextRead = MAX (0, (sizeWindow+1)/2 - iExtraSamples);
/*                 printf("pAnalParams -> sizeNextRead: %d, sizeWindow: %d, iExtraSamples: %d \n", */
/*                        pAnalParams->sizeNextRead, sizeWindow, iExtraSamples); */
		/* check again the previous frames and recompute if necessary */
                /*! \todo when deviation is really off, this function returns -1, yet it
                  isn't used.. is it being recomputed ?? */
		ReAnalyzeFrame (iCurrentFrame, pAnalParams);
	}
  
	/* incorporate the peaks into the corresponding tracks */
	/* This is done after a pAnalParams->iMaxDelayFrames delay  */
	if (pAnalParams->ppFrames[iCurrentFrame - delayFrames]->fFundamental > 0 ||
	    ((pAnalParams->iFormat == SMS_FORMAT_IH ||
	      pAnalParams->iFormat == SMS_FORMAT_IHP) &&
	     pAnalParams->ppFrames[iCurrentFrame - delayFrames]->nPeaks > 0))
		sms_peakContinuation (iCurrentFrame - delayFrames, pAnalParams);

	/* fill gaps and delete short tracks */
	if (pAnalParams->iCleanTracks > 0 &&
	    pAnalParams->ppFrames[iCurrentFrame - delayFrames]->iStatus != SMS_FRAME_EMPTY)
		sms_cleanTracks (iCurrentFrame - delayFrames, pAnalParams);

	/* do stochastic analysis */
	if (pAnalParams->iStochasticType != SMS_STOC_NONE)
	{
		/* synthesize deterministic signal */
		if (pAnalParams->ppFrames[1]->iStatus != SMS_FRAME_EMPTY &&
		    pAnalParams->ppFrames[1]->iStatus != SMS_FRAME_END)
		{
			/* shift synthesis buffer */
			memcpy ( pAnalParams->synthBuffer.pFBuffer,
                                 pAnalParams->synthBuffer.pFBuffer+pAnalParams->sizeHop,
			        sizeof(sfloat) * pAnalParams->sizeHop);
			memset (pAnalParams->synthBuffer.pFBuffer+pAnalParams->sizeHop,
                                0, sizeof(sfloat) * pAnalParams->sizeHop);
      
			/* get deterministic signal with phase  */
			sms_sineSynthFrame (&pAnalParams->ppFrames[1]->deterministic,
			                pAnalParams->synthBuffer.pFBuffer+pAnalParams->sizeHop,
			                pAnalParams->sizeHop, &pAnalParams->prevFrame,
			                pAnalParams->iSamplingRate);
		}
  
		/* perform stochastic analysis after 1 frame of the     */
		/* deterministic synthesis because it needs two frames  */
		if (pAnalParams->ppFrames[0]->iStatus != SMS_FRAME_EMPTY &&
		    pAnalParams->ppFrames[0]->iStatus != SMS_FRAME_END)
		
                {
			int sizeResidual = pAnalParams->sizeHop * 2;
			int iSoundLoc = pAnalParams->ppFrames[0]->iFrameSample - pAnalParams->sizeHop;
			sfloat *pOriginal = &(pAnalParams->soundBuffer.pFBuffer[iSoundLoc -
			                                       pAnalParams->soundBuffer.iMarker]);
			sfloat *pFResidual;

                        static sfloat *pWindow;
                        static int sizeWindowArray = 0;

			int sizeData =
				MIN (pAnalParams->soundBuffer.sizeBuffer -
				      (iSoundLoc - pAnalParams->soundBuffer.iMarker),
				     sizeResidual);
			if ((pFResidual = (sfloat *) calloc (sizeResidual, sizeof(float)))
			    == NULL)
			{
                                sms_error("sms_analyze: error allocating memory for pFResidual");
                                return -1;
                        }
                        if (sizeWindowArray != sizeData)
                        {
                                if(sizeWindowArray != 0) free(pWindow);
                                if((pWindow = (sfloat *) calloc(sizeData, sizeof(float))) == NULL)
                                {
                                        sms_error("sms_analyze: error allocating memory for pWindow");
                                        return -1;
                                }
                                sms_getWindow( sizeData, pWindow, SMS_WIN_HAMMING);
                                sms_scaleWindow( sizeData, pWindow);
                                sizeWindowArray = sizeData;
                        }

			/* obtain residual sound from original and synthesized sounds.  accumulate the residual percentage.*/
                        pAnalParams->fResidualAccumPerc += sms_residual (sizeData,
                                      pAnalParams->synthBuffer.pFBuffer,
                                      pOriginal,
                                      pFResidual,
                                      pWindow);


                        if (pAnalParams->iStochasticType == SMS_STOC_APPROX)
                        {
                                /* filter residual with a high pass filter (it solves some problems) */
                                sms_filterHighPass (sizeData, pFResidual, pAnalParams->iSamplingRate);
                                
                                /* approximate residual */
                                sms_stocAnalysis (sizeData, pFResidual, pWindow, pSmsData);
                        }
                        else if  (pAnalParams->iStochasticType == SMS_STOC_IFFT)
                        {
                                int sizeMag = sms_power2(sizeData >> 1);
                                sms_spectrum (sizeData, pFResidual, pWindow, sizeMag, pSmsData->pFStocCoeff, 
                                        pSmsData->pResPhase);
                        }

			/* get sharper transitions in deterministic representation */
                        /* \todo why is this done in the stochastic analysis space? */
                        sms_scaleDet (pAnalParams->synthBuffer.pFBuffer, pOriginal,
                                      pAnalParams->ppFrames[0]->deterministic.pFSinAmp,
                                      pAnalParams, pSmsData->nTracks);
      
			pAnalParams->ppFrames[0]->iStatus = SMS_FRAME_DONE;

			free ((char *) pFResidual); /* \todo get rid of this free, manage memory the same as spectrum functions */
		}