Exemplo n.º 1
0
boost::shared_ptr<bob::learn::boosting::LUTMachine> bob::learn::boosting::LUTTrainer::train(const blitz::Array<uint16_t,2>& trainingFeatures, const blitz::Array<double,2>& lossGradient) const{
  int featureLength = trainingFeatures.extent(1);
  _lossSum.resize(featureLength, m_numberOfOutputs);
  // Compute the sum of the gradient based on the feature values or the loss associated with each feature index
  // Compute the loss for each feature
  for (int featureIndex = featureLength; featureIndex--;){
    for (int outputIndex = m_numberOfOutputs; outputIndex--;){
      weightedHistogram(trainingFeatures(blitz::Range::all(),featureIndex), lossGradient(blitz::Range::all(), outputIndex));
      _lossSum(featureIndex,outputIndex) = - blitz::sum(blitz::abs(_gradientHistogram));
    }
  }

  // Select the most discriminative index (or indices) for classification which minimizes the loss
  //  and compute the sum of gradient for that index
  if (m_selectionType == independent){
    // independent feature selection is used if all the dimension of output use different feature
    // each of the selected feature minimize a dimension of the loss function
    for (int outputIndex = m_numberOfOutputs; outputIndex--;){
      _selectedIndices(outputIndex) = bestIndex(_lossSum(blitz::Range::all(),outputIndex));
    }
  } else {
    // for 'shared' feature selection the loss function is summed over multiple dimensions and
    // the feature that minimized this cumulative loss is used for all the outputs
    blitz::secondIndex j;
    const blitz::Array<double,1> sum(blitz::sum(_lossSum, j));
    _selectedIndices = bestIndex(sum);
  }

  // compute the look-up-tables for the best index
  for (int outputIndex = m_numberOfOutputs; outputIndex--;){
    int selectedIndex = _selectedIndices(outputIndex);
    weightedHistogram(trainingFeatures(blitz::Range::all(), selectedIndex), lossGradient(blitz::Range::all(), outputIndex));

    for (int lutIndex = m_maximumFeatureValue; lutIndex--;){
      _luts(lutIndex, outputIndex) = (_gradientHistogram(lutIndex) > 0) * 2. - 1.;
    }
  }

  // create new weak machine
  return boost::shared_ptr<LUTMachine>(new LUTMachine(_luts.copy(), _selectedIndices.copy()));

}
Exemplo n.º 2
0
/// Find best NFA and number of inliers wrt square error threshold in e.
OrsaModel::ErrorIndex OrsaModel::bestNFA(const std::vector<ErrorIndex>& e,
                                         double loge0,
                                         double maxThreshold,
                                         const std::vector<float> &logc_n,
                                         const std::vector<float> &logc_k) const
{
  const int startIndex = SizeSample();
  const double multError = (DistToPoint()? 1.0: 0.5);

  ErrorIndex bestIndex(std::numeric_limits<double>::infinity(),
                       startIndex,
                       0);
  const int n = static_cast<int>( e.size() );
  for(int k=startIndex+1; k<=n && e[k-1].error<=maxThreshold; ++k) {
    double logalpha = logalpha0_[e[k-1].side] + multError*log10(e[k-1].error);
    ErrorIndex index(loge0+logalpha*(double)(k-startIndex)+logc_n[k]+logc_k[k],
                     k, e[k-1].side);
    if(index.error < bestIndex.error)
      bestIndex = index;
  }
  return bestIndex;
}
/// Find best NFA and its index wrt square error threshold in e.
static ErrorIndex bestNFA(
  int startIndex, //number of point required for estimation
  double logalpha0,
  const std::vector<ErrorIndex>& e,
  double loge0,
  double maxThreshold,
  const std::vector<float> &logc_n,
  const std::vector<float> &logc_k,
  double multError = 1.0)
{
  ErrorIndex bestIndex(std::numeric_limits<double>::infinity(), startIndex);
  const size_t n = e.size();
  for(size_t k=startIndex+1; k<=n && e[k-1].first<=maxThreshold; ++k) {
    double logalpha = logalpha0 + multError * log10(e[k-1].first + std::numeric_limits<float>::min());
    ErrorIndex index(loge0 +
      logalpha * (double)(k-startIndex) +
      logc_n[k] +
      logc_k[k], k);

    if(index.first < bestIndex.first)
      bestIndex = index;
  }
  return bestIndex;
}
Exemplo n.º 4
0
int seqNoise::setUpOTUData(vector<int>& otuData, vector<double>& percentage, vector<int> cumCount, vector<double> tau, vector<int> otuFreq, vector<int> anP, vector<int> anI){
	try {

		int numOTUs = cumCount.size();
		int numSeqs = otuData.size();
		
		vector<double> bestTau(numSeqs, 0);
		vector<double> bestIndex(numSeqs, -1);
		
		for(int i=0;i<numOTUs;i++){
			if (m->control_pressed) { return 0; }
			for(int j=0;j<otuFreq[i];j++){
				
				int index1 = cumCount[i] + j;
				double thisTau = tau[anP[index1]];
				int index2 = anI[index1];
				
				if(thisTau > bestTau[index2]){
					bestTau[index2] = thisTau;
					bestIndex[index2] = i;
				}
			}		
		}
		
		for(int i=0;i<numSeqs;i++){
			if (m->control_pressed) { return 0; }
			otuData[i] = bestIndex[i];
			percentage[i] = 1 - bestTau[i];
		}
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "seqNoise", "setUpOTUData");
		exit(1);
	}
}
  void nw_helper(std::vector<float>& M, std::vector<float>& Iref, std::vector<float>& Iread, 
		 std::vector<int>& traceM, std::vector<int>& traceIref, std::vector<int>& traceIread,
		 const std::string& refseq, const std::string& readseq, 
		 std::string& refseq_al, std::string& readseq_al, 
		 float* score, std::vector<BamTools::CigarOp>& cigar_list){
    int L1 = refseq.length();
    int L2 = readseq.length();
    cigar_list.clear();

    // Various variables used in the matrix calculations
    int ref_base, read_base, oindex, nindex;
    float s1, s2, s3;
    int c;

    // Fill in the 3 matrices using dynamic programming
    for (int i = 1; i <= L2; i++){
      for (int j = 1; j <= L1; j++){
	nindex    = i*(L1+1)+j;
	ref_base  = base_to_int(refseq[j-1]);
	read_base = base_to_int(readseq[i-1]);

	// Update M matrix (examine (i-1, j-1))
	oindex          = (i-1)*(L1+1)+(j-1);
	s1              = M[oindex];
	s2              = Iref[oindex];
	s3              = Iread[oindex];
	M[nindex]       = bestIndex(s1, s2, s3, &c) + s[ref_base][read_base];
	traceM[nindex]  = c;

	// Update Iref matrix (examine (i,j-1))
	oindex             = i*(L1+1) + (j-1);
	s1                 = M[oindex]     - GAPOPEN;
	s2                 = Iref[oindex]  - GAPEXTEND;
	s3                 = Iread[oindex] - GAPOPEN;
	Iref[nindex]       = bestIndex(s1, s2, s3, &c);
	traceIref[nindex]  = c;

	// Update Iread matrix (examine (i-1,j))
	oindex              = (i-1)*(L1+1) + j;
	s1                  = M[oindex]     - GAPOPEN;
	s2                  = Iref[oindex]  - GAPOPEN;
	s3                  = Iread[oindex] - GAPEXTEND;
	Iread[nindex]       = bestIndex(s1, s2, s3, &c);
	traceIread[nindex]  = c;
      }
    }
  
    //Find the best ending point for the alignment
    float best_val;
    int best_col, best_type;
    findOptimalStop(L1, L2, M, Iref, Iread, best_val, best_col, best_type);
  
    // Store the optimal alignment score
    *score = best_val;
  
    std::stringstream refseq_ss, readseq_ss, cigar_ss;
  
    // Handle trailing gaps
    for(int i = L1; i > best_col; i--){
      refseq_ss  << refseq.at(i-1);
      readseq_ss << "-";
    }

    // Traceback the optimal alignment
    int best_row = L2;
    std::string raw_cigar;
    int index;
    while (best_row > 0){
      index = best_row*(L1+1) + best_col;
      if (best_type == 0){
	// M
	refseq_ss  << refseq.at(best_col-1);
	readseq_ss << readseq.at(best_row-1);
	cigar_ss   << "M";
	best_type   = traceM[index];
	best_row--;
	best_col--;
      } 
      else if (best_type == 1){
	//Iref
	refseq_ss  << refseq.at(best_col-1);
	readseq_ss << "-";
	cigar_ss   << "D";
	best_type   = traceIref[index];
	best_col--;
      } 
      else if (best_type == 2){
	// Iread
	refseq_ss  << "-";
	readseq_ss << readseq.at(best_row-1);
	cigar_ss   << "I";
	best_type   = traceIread[index];
	best_row--;
      } 
      else
	PrintMessageDieOnError("Invalid matrix type in Needleman-Wunsch alignment", ERROR);
    }

    // Handle leading gaps
    for (int i = best_col; i > 0; i--){
      refseq_ss  << refseq.at(i-1);
      readseq_ss << "-";
    }
  
    // Order alignment front to back
    refseq_al  = refseq_ss.str();
    readseq_al = readseq_ss.str();
    raw_cigar  = cigar_ss.str();
    reverse(refseq_al.begin(),  refseq_al.end());
    reverse(readseq_al.begin(), readseq_al.end());
    reverse(raw_cigar.begin(),  raw_cigar.end());

    // Simplify cigar string
    char cigar_char = raw_cigar[0];
    int  num        = 1;
    char new_cigar_char;
    for(unsigned int i = 1; i < raw_cigar.length(); i++){
      new_cigar_char = raw_cigar[i];
      if (new_cigar_char != cigar_char){
	cigar_list.push_back(BamTools::CigarOp(cigar_char, num));
	num = 1;
	cigar_char = new_cigar_char;
      }
      else
	num += 1;
    }
    cigar_list.push_back(BamTools::CigarOp(cigar_char, num));
    if (cigar_list.back().Type == 'I')
      cigar_list.back().Type = 'S';
  }