boost::shared_ptr<bob::learn::boosting::LUTMachine> bob::learn::boosting::LUTTrainer::train(const blitz::Array<uint16_t,2>& trainingFeatures, const blitz::Array<double,2>& lossGradient) const{ int featureLength = trainingFeatures.extent(1); _lossSum.resize(featureLength, m_numberOfOutputs); // Compute the sum of the gradient based on the feature values or the loss associated with each feature index // Compute the loss for each feature for (int featureIndex = featureLength; featureIndex--;){ for (int outputIndex = m_numberOfOutputs; outputIndex--;){ weightedHistogram(trainingFeatures(blitz::Range::all(),featureIndex), lossGradient(blitz::Range::all(), outputIndex)); _lossSum(featureIndex,outputIndex) = - blitz::sum(blitz::abs(_gradientHistogram)); } } // Select the most discriminative index (or indices) for classification which minimizes the loss // and compute the sum of gradient for that index if (m_selectionType == independent){ // independent feature selection is used if all the dimension of output use different feature // each of the selected feature minimize a dimension of the loss function for (int outputIndex = m_numberOfOutputs; outputIndex--;){ _selectedIndices(outputIndex) = bestIndex(_lossSum(blitz::Range::all(),outputIndex)); } } else { // for 'shared' feature selection the loss function is summed over multiple dimensions and // the feature that minimized this cumulative loss is used for all the outputs blitz::secondIndex j; const blitz::Array<double,1> sum(blitz::sum(_lossSum, j)); _selectedIndices = bestIndex(sum); } // compute the look-up-tables for the best index for (int outputIndex = m_numberOfOutputs; outputIndex--;){ int selectedIndex = _selectedIndices(outputIndex); weightedHistogram(trainingFeatures(blitz::Range::all(), selectedIndex), lossGradient(blitz::Range::all(), outputIndex)); for (int lutIndex = m_maximumFeatureValue; lutIndex--;){ _luts(lutIndex, outputIndex) = (_gradientHistogram(lutIndex) > 0) * 2. - 1.; } } // create new weak machine return boost::shared_ptr<LUTMachine>(new LUTMachine(_luts.copy(), _selectedIndices.copy())); }
/// Find best NFA and number of inliers wrt square error threshold in e. OrsaModel::ErrorIndex OrsaModel::bestNFA(const std::vector<ErrorIndex>& e, double loge0, double maxThreshold, const std::vector<float> &logc_n, const std::vector<float> &logc_k) const { const int startIndex = SizeSample(); const double multError = (DistToPoint()? 1.0: 0.5); ErrorIndex bestIndex(std::numeric_limits<double>::infinity(), startIndex, 0); const int n = static_cast<int>( e.size() ); for(int k=startIndex+1; k<=n && e[k-1].error<=maxThreshold; ++k) { double logalpha = logalpha0_[e[k-1].side] + multError*log10(e[k-1].error); ErrorIndex index(loge0+logalpha*(double)(k-startIndex)+logc_n[k]+logc_k[k], k, e[k-1].side); if(index.error < bestIndex.error) bestIndex = index; } return bestIndex; }
/// Find best NFA and its index wrt square error threshold in e. static ErrorIndex bestNFA( int startIndex, //number of point required for estimation double logalpha0, const std::vector<ErrorIndex>& e, double loge0, double maxThreshold, const std::vector<float> &logc_n, const std::vector<float> &logc_k, double multError = 1.0) { ErrorIndex bestIndex(std::numeric_limits<double>::infinity(), startIndex); const size_t n = e.size(); for(size_t k=startIndex+1; k<=n && e[k-1].first<=maxThreshold; ++k) { double logalpha = logalpha0 + multError * log10(e[k-1].first + std::numeric_limits<float>::min()); ErrorIndex index(loge0 + logalpha * (double)(k-startIndex) + logc_n[k] + logc_k[k], k); if(index.first < bestIndex.first) bestIndex = index; } return bestIndex; }
int seqNoise::setUpOTUData(vector<int>& otuData, vector<double>& percentage, vector<int> cumCount, vector<double> tau, vector<int> otuFreq, vector<int> anP, vector<int> anI){ try { int numOTUs = cumCount.size(); int numSeqs = otuData.size(); vector<double> bestTau(numSeqs, 0); vector<double> bestIndex(numSeqs, -1); for(int i=0;i<numOTUs;i++){ if (m->control_pressed) { return 0; } for(int j=0;j<otuFreq[i];j++){ int index1 = cumCount[i] + j; double thisTau = tau[anP[index1]]; int index2 = anI[index1]; if(thisTau > bestTau[index2]){ bestTau[index2] = thisTau; bestIndex[index2] = i; } } } for(int i=0;i<numSeqs;i++){ if (m->control_pressed) { return 0; } otuData[i] = bestIndex[i]; percentage[i] = 1 - bestTau[i]; } return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "setUpOTUData"); exit(1); } }
void nw_helper(std::vector<float>& M, std::vector<float>& Iref, std::vector<float>& Iread, std::vector<int>& traceM, std::vector<int>& traceIref, std::vector<int>& traceIread, const std::string& refseq, const std::string& readseq, std::string& refseq_al, std::string& readseq_al, float* score, std::vector<BamTools::CigarOp>& cigar_list){ int L1 = refseq.length(); int L2 = readseq.length(); cigar_list.clear(); // Various variables used in the matrix calculations int ref_base, read_base, oindex, nindex; float s1, s2, s3; int c; // Fill in the 3 matrices using dynamic programming for (int i = 1; i <= L2; i++){ for (int j = 1; j <= L1; j++){ nindex = i*(L1+1)+j; ref_base = base_to_int(refseq[j-1]); read_base = base_to_int(readseq[i-1]); // Update M matrix (examine (i-1, j-1)) oindex = (i-1)*(L1+1)+(j-1); s1 = M[oindex]; s2 = Iref[oindex]; s3 = Iread[oindex]; M[nindex] = bestIndex(s1, s2, s3, &c) + s[ref_base][read_base]; traceM[nindex] = c; // Update Iref matrix (examine (i,j-1)) oindex = i*(L1+1) + (j-1); s1 = M[oindex] - GAPOPEN; s2 = Iref[oindex] - GAPEXTEND; s3 = Iread[oindex] - GAPOPEN; Iref[nindex] = bestIndex(s1, s2, s3, &c); traceIref[nindex] = c; // Update Iread matrix (examine (i-1,j)) oindex = (i-1)*(L1+1) + j; s1 = M[oindex] - GAPOPEN; s2 = Iref[oindex] - GAPOPEN; s3 = Iread[oindex] - GAPEXTEND; Iread[nindex] = bestIndex(s1, s2, s3, &c); traceIread[nindex] = c; } } //Find the best ending point for the alignment float best_val; int best_col, best_type; findOptimalStop(L1, L2, M, Iref, Iread, best_val, best_col, best_type); // Store the optimal alignment score *score = best_val; std::stringstream refseq_ss, readseq_ss, cigar_ss; // Handle trailing gaps for(int i = L1; i > best_col; i--){ refseq_ss << refseq.at(i-1); readseq_ss << "-"; } // Traceback the optimal alignment int best_row = L2; std::string raw_cigar; int index; while (best_row > 0){ index = best_row*(L1+1) + best_col; if (best_type == 0){ // M refseq_ss << refseq.at(best_col-1); readseq_ss << readseq.at(best_row-1); cigar_ss << "M"; best_type = traceM[index]; best_row--; best_col--; } else if (best_type == 1){ //Iref refseq_ss << refseq.at(best_col-1); readseq_ss << "-"; cigar_ss << "D"; best_type = traceIref[index]; best_col--; } else if (best_type == 2){ // Iread refseq_ss << "-"; readseq_ss << readseq.at(best_row-1); cigar_ss << "I"; best_type = traceIread[index]; best_row--; } else PrintMessageDieOnError("Invalid matrix type in Needleman-Wunsch alignment", ERROR); } // Handle leading gaps for (int i = best_col; i > 0; i--){ refseq_ss << refseq.at(i-1); readseq_ss << "-"; } // Order alignment front to back refseq_al = refseq_ss.str(); readseq_al = readseq_ss.str(); raw_cigar = cigar_ss.str(); reverse(refseq_al.begin(), refseq_al.end()); reverse(readseq_al.begin(), readseq_al.end()); reverse(raw_cigar.begin(), raw_cigar.end()); // Simplify cigar string char cigar_char = raw_cigar[0]; int num = 1; char new_cigar_char; for(unsigned int i = 1; i < raw_cigar.length(); i++){ new_cigar_char = raw_cigar[i]; if (new_cigar_char != cigar_char){ cigar_list.push_back(BamTools::CigarOp(cigar_char, num)); num = 1; cigar_char = new_cigar_char; } else num += 1; } cigar_list.push_back(BamTools::CigarOp(cigar_char, num)); if (cigar_list.back().Type == 'I') cigar_list.back().Type = 'S'; }