예제 #1
0
bool ContinuousHiddenMarkovModel::train_(TimeSeriesClassificationSample &trainingData){
    
    //Clear any previous models
    clear();
    
    //The number of states is simply set as the number of samples in the training sample
    timeseriesLength = trainingData.getLength();
    numStates = (unsigned int)floor((double)(timeseriesLength/downsampleFactor));
    numInputDimensions = trainingData.getNumDimensions();
    classLabel = trainingData.getClassLabel();
    
    //a is simply set as the number of 1/numStates
    a.resize(numStates, numStates);
    for(unsigned int i=0; i<numStates; i++){
        for(unsigned int j=0; j<numStates; j++){
            a[i][j] = 1.0/numStates;
        }
    }
    
    //b is simply set as the downsampled training sample
    b.resize(numStates, numInputDimensions);
    
    unsigned int index = 0;
    Float norm = 0;
    for(unsigned int j=0; j<numInputDimensions; j++){
        index = 0;
        for(unsigned int i=0; i<numStates; i++){
            norm = 0;
            b[i][j] = 0;
            for(unsigned int k=0; k<downsampleFactor; k++){
                if( index < trainingData.getLength() ){
                    b[i][j] += trainingData[index++][j];
                    norm += 1;
                }
            }
            if( norm > 1 )
            b[i][j] /= norm;
        }
    }
    
    //Estimate pi
    pi.resize(numStates);
    
    switch( modelType ){
        case(HMM_ERGODIC):
        for(UINT i=0; i<numStates; i++){
            pi[i] = 1.0/numStates;
        }
        break;
        case(HMM_LEFTRIGHT):
        //Set the state transitions constraints
        for(UINT i=0; i<numStates; i++){
            norm = 0;
            for(UINT j=0; j<numStates; j++){
                if((j<i) || (j>i+delta)) a[i][j] = 0.0;
                norm += a[i][j];
            }
            if( norm > 0 ){
                for(UINT j=0; j<numStates; j++){
                    a[i][j] /= norm;
                }
            }
        }
        
        //Set pi to start in state 0
        for(UINT i=0; i<numStates; i++){
            pi[i] = i==0 ? 1 : 0;
        }
        break;
        default:
        throw("HMM_ERROR: Unkown model type!");
        return false;
        break;
    }
    
    //Setup sigma for each state
    sigmaStates.resize( numStates, numInputDimensions );
    
    if( autoEstimateSigma ){
        
        //Estimate the standard dev for each dimension, for each state
        MatrixFloat meanResults( numStates, numInputDimensions );
        for(unsigned int j=0; j<numInputDimensions; j++){
            
            //Estimate the mean for each state
            index = 0;
            for(unsigned int i=0; i<numStates; i++){
                norm = 0;
                meanResults[i][j] = 0;
                for(unsigned int k=0; k<downsampleFactor; k++){
                    if( index < trainingData.getLength() ){
                        meanResults[i][j] += trainingData[index++][j];
                        norm += 1;
                    }
                }
                if( norm > 1 ){
                    meanResults[i][j] /= norm;
                }
            }
            
            //Loop back over the data again and estimate the stddev for each state
            index = 0;
            for(unsigned int i=0; i<numStates; i++){
                norm = 0;
                sigmaStates[i][j] = 0;
                for(unsigned int k=0; k<downsampleFactor; k++){
                    if( index < trainingData.getLength() ){
                        sigmaStates[i][j] += SQR( trainingData[index++][j]-meanResults[i][j] );
                        norm += 1;
                    }
                }
                if( norm > 1 ){
                    sigmaStates[i][j] = sqrt( 1.0/norm * sigmaStates[i][j] );
                }
                
                if( sigmaStates[i][j] < sigma ){
                    sigmaStates[i][j] = sigma;
                }
            }
        }
        
    }else{
        sigmaStates.setAllValues(sigma);
    }
    
    //Setup the observation buffer for prediction
    observationSequence.resize( timeseriesLength, VectorFloat(numInputDimensions,0) );
    obsSequence.resize(timeseriesLength,numInputDimensions);
    estimatedStates.resize( numStates );
    
    //Finally, flag that the model was trained
    trained = true;
    
    return true;
}
bool TimeSeriesClassificationSampleTrimmer::trimTimeSeries(TimeSeriesClassificationSample &timeSeries) {

    const UINT M = timeSeries.getLength();
    const UINT N = timeSeries.getNumDimensions();

    if( M == 0 ) {
        warningLog << "trimTimeSeries(TimeSeriesClassificationSample &timeSeries) - can't trim data, the length of the input time series is 0!" << endl;
        return false;
    }

    if( N == 0 ) {
        warningLog << "trimTimeSeries(TimeSeriesClassificationSample &timeSeries) - can't trim data, the number of dimensions in the input time series is 0!" << endl;
        return false;
    }

    //Compute the energy of the time series
    double maxValue = 0;
    VectorDouble x(M,0);

    for(UINT i=1; i<M; i++) {
        for(UINT j=0; j<N; j++) {
            x[i] += fabs(timeSeries[i][j]-timeSeries[i-1][j]);
        }
        x[i] /= N;
        if( x[i] > maxValue ) maxValue = x[i];
    }

    //Normalize x so that the maximum energy has a value of 1
    //At the same time search for the first time x[i] passes the trim threshold
    UINT firstIndex = 0;
    for(UINT i=1; i<M; i++) {
        x[i] /= maxValue;

        if( x[i] > trimThreshold && firstIndex == 0 ) {
            firstIndex = i;
        }
    }

    //Search for the last time x[i] passes the trim threshold
    UINT lastIndex = 0;
    for(UINT i=M-1; i>firstIndex; i--) {
        if( x[i] > trimThreshold && lastIndex == 0 ) {
            lastIndex = i;
            break;
        }
    }

    if( firstIndex == 0 && lastIndex == 0 ) {
        warningLog << "Failed to find either the first index or the last index!";
        return false;
    }

    if( firstIndex == lastIndex ) {
        warningLog << "The first index and last index are the same!";
        return false;
    }

    if( firstIndex > lastIndex ) {
        warningLog << "The first index is greater than the last index!";
        return false;
    }

    if( lastIndex == 0 ) {
        warningLog << "Failed to find the last index!";
        lastIndex = M-1;
    }

    //Compute how long the new time series would be if we trimmed it
    UINT newM = lastIndex-firstIndex;
    double trimPercentage = (double(newM) / double(M)) * 100.0;

    if( 100 - trimPercentage <= maximumTrimPercentage ) {

        MatrixDouble newTimeSeries(newM,N);
        UINT index = 0;
        for(UINT i=firstIndex; i<lastIndex; i++) {
            for(UINT j=0; j<N; j++) {
                newTimeSeries[index][j] = timeSeries[i][j];
            }
            index++;
        }

        timeSeries.setTrainingSample(timeSeries.getClassLabel(), newTimeSeries);
        return true;
    }

    warningLog << "Maximum Trim Percentage Excedded, Can't Trim Sample!";
    warningLog << " Original Timeseries Length: " << M << " Trimmed Timeseries Length: " << newM;
    warningLog << " Percentage: " << (100-trimPercentage) << " MaximumTrimPercentage: " << maximumTrimPercentage << endl;
    return false;
}