示例#1
0
vector<T> smoothenData(const vector<T> &rawData, int32 support)
{
    vector<float> gaussKernel = formGaussKernel(2*support + 1);
    std::vector<T> smoothData(rawData.size());
    vector<T> tempData(rawData.size()+2*support);

    //Create a temporary vector with mirrored boundaries
    std::copy(rawData.begin(), rawData.end(), tempData.begin()+support);
    std::reverse_copy(rawData.begin(), rawData.begin()+support,tempData.begin());
    std::reverse_copy(rawData.end()-support,rawData.end(), tempData.end()-support);

    for(int i=0; i< smoothData.size(); i++)
    {
          T tempVec = initVal<T>();
          for(int j=-support; j<=support; j++)
          {
              tempVec += tempData[i+j+support] * gaussKernel[j+support];
          }
          smoothData[i] = tempVec;
    }
    return smoothData;
}
示例#2
0
bool DTW::predict(MatrixDouble inputTimeSeries){

    if( !trained ){
        errorLog << "predict(Matrix<double> &inputTimeSeries) - The DTW templates have not been trained!" << endl;
        return false;
    }

    if( classLikelihoods.size() != numTemplates ) classLikelihoods.resize(numTemplates);
    if( classDistances.size() != numTemplates ) classDistances.resize(numTemplates);

    predictedClassLabel = 0;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
    for(UINT k=0; k<classLikelihoods.size(); k++){
        classLikelihoods[k] = 0;
        classDistances[k] = DEFAULT_NULL_LIKELIHOOD_VALUE;
    }

	if( numFeatures != inputTimeSeries.getNumCols() ){
        errorLog << "predict(Matrix<double> &inputTimeSeries) - The number of features in the model (" << numFeatures << ") do not match that of the input time series (" << inputTimeSeries.getNumCols() << ")" << endl;
        return false;
    }

	//Perform any preprocessing if requried
    MatrixDouble *timeSeriesPtr = &inputTimeSeries;
    MatrixDouble processedTimeSeries;
    MatrixDouble tempMatrix;
	if(useScaling){
        scaleData(*timeSeriesPtr,processedTimeSeries);
        timeSeriesPtr = &processedTimeSeries;
    }
    
    //Normalize the data if needed
	if( useZNormalisation ){
        znormData(*timeSeriesPtr,processedTimeSeries);
        timeSeriesPtr = &processedTimeSeries;
    }

	//Smooth the data if required
	if( useSmoothing ){
		smoothData(*timeSeriesPtr,smoothingFactor,tempMatrix);
		timeSeriesPtr = &tempMatrix;
	}
    
    //Offset the timeseries if required
    if( offsetUsingFirstSample ){
        offsetTimeseries( *timeSeriesPtr );
    }

	//Make the prediction by finding the closest template
    double sum = 0;
    if( distanceMatrices.size() != numTemplates ) distanceMatrices.resize( numTemplates );
    if( warpPaths.size() != numTemplates ) warpPaths.resize( numTemplates );
    
	//Test the timeSeries against all the templates in the timeSeries buffer
	for(UINT k=0; k<numTemplates; k++){
		//Perform DTW
		classDistances[k] = computeDistance(templatesBuffer[k].timeSeries,*timeSeriesPtr,distanceMatrices[k],warpPaths[k]);
        classLikelihoods[k] = classDistances[k];
        sum += classLikelihoods[k];
	}

	//See which gave the min distance
	UINT closestTemplateIndex = 0;
	bestDistance = classDistances[0];
	for(UINT k=1; k<numTemplates; k++){
		if( classDistances[k] < bestDistance ){
			bestDistance = classDistances[k];
			closestTemplateIndex = k;
		}
	}
    
    //Normalize the class likelihoods and check which class has the maximum likelihood
    UINT maxLikelihoodIndex = 0;
    maxLikelihood = 0;
    for(UINT k=0; k<numTemplates; k++){
        classLikelihoods[k] = (sum-classLikelihoods[k])/sum;
        if( classLikelihoods[k] > maxLikelihood ){
            maxLikelihood = classLikelihoods[k];
            maxLikelihoodIndex = k;
        }
    }

    if( useNullRejection ){

        switch( rejectionMode ){
            case TEMPLATE_THRESHOLDS:
                if( bestDistance <= nullRejectionThresholds[ closestTemplateIndex ] ) predictedClassLabel = templatesBuffer[ closestTemplateIndex ].classLabel;
                else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
                break;
            case CLASS_LIKELIHOODS:
                if( maxLikelihood >= 0.99 )  predictedClassLabel = templatesBuffer[ maxLikelihoodIndex ].classLabel;
                else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
                break;
            case THRESHOLDS_AND_LIKELIHOODS:
                if( bestDistance <= nullRejectionThresholds[ closestTemplateIndex ] && maxLikelihood >= 0.99 )
                    predictedClassLabel = templatesBuffer[ closestTemplateIndex ].classLabel;
                else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
                break;
            default:
                errorLog << "predict(Matrix<double> &timeSeries) - Unknown RejectionMode!" << endl;
                return false;
                break;
        }

	}else predictedClassLabel = templatesBuffer[ closestTemplateIndex ].classLabel;

    return true;
}
示例#3
0
bool DTW::train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex){

   UINT numExamples = trainingData.getNumSamples();
   VectorDouble results(numExamples,0.0);
   MatrixDouble distanceResults(numExamples,numExamples);
   dtwTemplate.averageTemplateLength = 0;
    
   for(UINT m=0; m<numExamples; m++){
       
	   MatrixDouble templateA; //The m'th template
	   MatrixDouble templateB; //The n'th template
	   dtwTemplate.averageTemplateLength += trainingData[m].getLength();

	   //Smooth the data if required
	   if( useSmoothing ) smoothData(trainingData[m].getData(),smoothingFactor,templateA);
	   else templateA = trainingData[m].getData();
       
       if( offsetUsingFirstSample ){
           offsetTimeseries(templateA);
       }

	   for(UINT n=0; n<numExamples; n++){
		if(m!=n){
		    //Smooth the data if required
		    if( useSmoothing ) smoothData(trainingData[n].getData(),smoothingFactor,templateB);
		    else templateB = trainingData[n].getData();
            
            if( offsetUsingFirstSample ){
                offsetTimeseries(templateB);
            }

			//Compute the distance between the two time series
            MatrixDouble distanceMatrix(templateA.getNumRows(),templateB.getNumRows());
            vector< IndexDist > warpPath;
			double dist = computeDistance(templateA,templateB,distanceMatrix,warpPath);
            
            trainingLog << "Template: " << m << " Timeseries: " << n << " Dist: " << dist << endl;

			//Update the results values
			distanceResults[m][n] = dist;
			results[m] += dist;
		}else distanceResults[m][n] = 0; //The distance is zero because the two timeseries are the same
	   }
   }

	for(UINT m=0; m<numExamples; m++) results[m]/=(numExamples-1);
	//Find the best average result, this is the result with the minimum value
	bestIndex = 0;
	double bestAverage = results[0];
	for(UINT m=1; m<numExamples; m++){
		if( results[m] < bestAverage ){
			bestAverage = results[m];
			bestIndex = m;
		}
	}

    if( numExamples > 2 ){

        //Work out the threshold value for the best template
        dtwTemplate.trainingMu = results[bestIndex];
        dtwTemplate.trainingSigma = 0.0;

        for(UINT n=0; n<numExamples; n++){
            if(n!=bestIndex){
                dtwTemplate.trainingSigma += SQR( distanceResults[ bestIndex ][n] - dtwTemplate.trainingMu );
            }
        }
        dtwTemplate.trainingSigma = sqrt( dtwTemplate.trainingSigma / double(numExamples-2) );
    }else{
        warningLog << "_train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex - There are not enough examples to compute the trainingMu and trainingSigma for the template for class " << dtwTemplate.classLabel << endl;
        dtwTemplate.trainingMu = 0.0;
        dtwTemplate.trainingSigma = 0.0;
    }

	//Set the average length of the training examples
	dtwTemplate.averageTemplateLength = (UINT) (dtwTemplate.averageTemplateLength/double(numExamples));
    
    trainingLog << "AverageTemplateLength: " << dtwTemplate.averageTemplateLength << endl;

    //Flag that the training was successfull
	return true;
}
示例#4
0
////////////////////////// TRAINING FUNCTIONS //////////////////////////
bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){

	UINT bestIndex = 0;

	//Cleanup Memory
	templatesBuffer.clear();
    classLabels.clear();
	trained = false;
    continuousInputDataBuffer.clear();

    if( trimTrainingData ){
        LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage);
        LabelledTimeSeriesClassificationData tempData;
        tempData.setNumDimensions( labelledTrainingData.getNumDimensions() );
        
        for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){
            if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){
                tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData());
            }else{
                trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl;
            }
        }
        //Overwrite the original training data with the trimmed dataset
        labelledTrainingData = tempData;
    }
    
    if( labelledTrainingData.getNumSamples() == 0 ){
        errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl;
        return false;
    }

	//Assign
    numClasses = labelledTrainingData.getNumClasses();
	numTemplates = labelledTrainingData.getNumClasses();
    numFeatures = labelledTrainingData.getNumDimensions();
	templatesBuffer.resize( numClasses );
    classLabels.resize( numClasses );
	nullRejectionThresholds.resize( numClasses );
	averageTemplateLength = 0;

	//Need to copy the labelled training data incase we need to scale it or znorm it
	LabelledTimeSeriesClassificationData trainingData( labelledTrainingData );

	//Perform any scaling or normalisation
    rangesBuffer = trainingData.getRanges();
	if( useScaling ) scaleData( trainingData );
	if( useZNormalisation ) znormData( trainingData );

	//For each class, run a one-to-one DTW and find the template the best describes the data
	for(UINT k=0; k<numTemplates; k++){
        //Get the class label for the cth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel );
		UINT numExamples = classData.getNumSamples();
		bestIndex = 0;

        //Set the class label of this template
        templatesBuffer[k].classLabel = classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;
        
        trainingLog << "Training Template: " << k << " Class: " << classLabel << endl;

		//Check to make sure we actually have some training examples
		if(numExamples<1){
            errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl;
			return false;
		}

		if(numExamples==1){//If we have just one training example then we have to use it as the template
            bestIndex = 0;

            nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this!
            warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl;
		}else{
            //Search for the best training example for this class
			if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){
                errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl;
                return false;
            }
		}

		//Add the template with the best index to the buffer
		int trainingMethod = 0;
		if(useSmoothing) trainingMethod = 1;

		switch (trainingMethod) {
			case(0)://Standard Training
				templatesBuffer[k].timeSeries = classData[bestIndex].getData();
				break;
			case(1)://Training using Smoothing
				//Smooth the data, reducing its size by a factor set by smoothFactor
				smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries);
				break;
			default:
				cout<<"Can not train model: Unknown training method \n";
				return false;
				break;
		}
        
        if( offsetUsingFirstSample ){
            offsetTimeseries( templatesBuffer[k].timeSeries );
        }

		//Add the average length of the training examples for this template to the overall averageTemplateLength
		averageTemplateLength += templatesBuffer[k].averageTemplateLength;
	}

    //Flag that the models have been trained
	trained = true;
	averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates);

    //Recompute the null rejection thresholds
    recomputeNullRejectionThresholds();

    //Resize the prediction results to make sure it is setup for realtime prediction
    continuousInputDataBuffer.clear();
    continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0));
    classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE);
    classDistances.resize(numTemplates,0);
    predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;

	//Training complete
	return true;
}
  void ElutionPeakDetection::detectElutionPeaks_(MassTrace& mt, std::vector<MassTrace>& single_mtraces)
  {
    //smooth data
    //std::vector<double> smoothed_data;
    // Size win_size = mt.getFWHMScansNum();
    double scan_time(mt.getAverageMS1CycleTime());
    Size win_size = std::ceil(chrom_fwhm_ / scan_time);
    // add smoothed data (original data is still accessible)
    smoothData(mt, static_cast<Int>(win_size));

    // debug intensities

    // Size i = 0;

    //    std::cout << "*****" << std::endl;
    //    for (MassTrace::const_iterator mt_it = mt.begin(); mt_it != mt.end(); ++mt_it)
    //    {
    //        std::cout << mt_it->getIntensity() << " " << smoothed_data[i] << std::endl;
    //        ++i;
    //    }
    //std::cout << "*****" << std::endl;

    std::vector<Size> maxes, mins;
    findLocalExtrema(mt, win_size / 2, maxes, mins);

    // if only one maximum exists: finished!
    if (maxes.size() == 1)
    {
      bool pw_ok = true;
      bool snr_ok = true;

      // check mass trace filter criteria (if enabled)
      if (pw_filtering_ == "fixed")
      {
        double act_fwhm(mt.estimateFWHM(true));

        // std::cout << "act_fwhm: " << act_fwhm << " ";

        if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_)
        {
          pw_ok = false;
        }

        // std::cout << pw_ok << std::endl;
      }

      if (mt_snr_filtering_)
      {
        if (computeApexSNR(mt) < chrom_peak_snr_)
        {
          snr_ok = false;
        }
      }


      if (pw_ok && snr_ok)
      {
        mt.updateSmoothedMaxRT();

        if (pw_filtering_ != "fixed")
        {
          mt.estimateFWHM(true);
        }

        // check for minimum/maximum trace length
        //          double mt_length(std::fabs(mt.rbegin()->getRT() - mt.begin()->getRT()));

        //        if ((mt_length >= min_trace_length_) && (mt_length <= max_trace_length_))
        // if (mt_quality >= 1.2)
        //      {
#ifdef _OPENMP
#pragma omp critical (OPENMS_ElutionPeakDetection_mtraces)
#endif
        single_mtraces.push_back(mt);

      }
    }
    else if (maxes.empty())
    {
      return;
    }
    else // split mt to sub-traces
    {
      MassTrace::const_iterator cp_it = mt.begin();
      Size last_idx(0);

      // add last data point as last minimum (to grep the last chunk of the MT)
      mins.push_back(mt.getSize() - 1);

      for (Size min_idx = 0; min_idx < mins.size(); ++min_idx)
      {
        // copy sub-trace between cp_it and split point
        std::vector<PeakType> tmp_mt;
        std::vector<double> smoothed_tmp;

        while (last_idx <= mins[min_idx])
        {
          tmp_mt.push_back(*cp_it);
          smoothed_tmp.push_back(mt.getSmoothedIntensities()[last_idx]);
          ++cp_it;
          ++last_idx;
        }

        // check if

//            if (tmp_mt.size() >= win_size / 2)
//            {
        MassTrace new_mt(tmp_mt);

        // copy smoothed int's
        new_mt.setSmoothedIntensities(smoothed_tmp);


        // check filter criteria
        bool pw_ok = true;
        bool snr_ok = true;

        // check mass trace filter criteria (if enabled)
        if (pw_filtering_ == "fixed")
        {
          double act_fwhm(new_mt.estimateFWHM(true));

          // std::cout << "act_fwhm: " << act_fwhm << " ";

          if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_)
          {
            pw_ok = false;
          }

          // std::cout << pw_ok << std::endl;
        }

        if (mt_snr_filtering_)
        {
          if (computeApexSNR(mt) < chrom_peak_snr_)
          {
            snr_ok = false;
          }
        }


        if (pw_ok && snr_ok)
        {
          // set label of sub-trace
          new_mt.setLabel(mt.getLabel() + "." + String(min_idx + 1));
          //new_mt.updateWeightedMeanRT();
          new_mt.updateSmoothedMaxRT();
          //new_mt.updateSmoothedWeightedMeanRT();
          new_mt.updateWeightedMeanMZ();
          new_mt.updateWeightedMZsd();

          if (pw_filtering_ != "fixed")
          {
            new_mt.estimateFWHM(true);
          }
          // double mt_quality(computeApexSNR(new_mt));

          // double new_mt_length(std::fabs(new_mt.rbegin()->getRT() - new_mt.begin()->getRT()));

          // if ((new_mt_length >= min_trace_length_) && (new_mt_length <= max_trace_length_))
          //{
#ifdef _OPENMP
#pragma omp critical (OPENMS_ElutionPeakDetection_mtraces)
#endif
          single_mtraces.push_back(new_mt);
        }
        //  }
      }

    }
    return;
  }