string_iterator& operator -- () { prior(it, range_start); return *this; }
int main(int argc, char **argv) { OptionParser opts; string mapFile, evidFile; int factor; opts.addOption(new StringOption("map", "--map <filename> : map file", "../input/grid.bmp", mapFile, false)); opts.addOption(new StringOption("evidence", "--evidence <filename> : evidence file", "", evidFile, true)); opts.addOption(new IntOption("factor", "--factor <int> : scaling factor", 1, factor, true)); opts.parse(argc,argv); JetColorMap jet; RGBTRIPLE black = {0,0,0}; RGBTRIPLE white = {255,255,255}; RGBTRIPLE red; red.R = 255; red.G = 0; red.B = 0; RGBTRIPLE blue; blue.R = 0; blue.G = 0; blue.B = 255; RGBTRIPLE green; green.R = 0; green.G = 255; green.B = 0; RGBTRIPLE initialColor; initialColor.R = 111; initialColor.G = 49; initialColor.B = 152; // initialColor.G = 152; // initialColor.B = 49; RGBTRIPLE currentColor; currentColor.R = 181; currentColor.G = 165; currentColor.B = 213; // currentColor.G = 213; // currentColor.B = 165; RGBTRIPLE magenta; magenta.R = 255; magenta.G = 0; magenta.B = 255; RGBTRIPLE cyan; cyan.R = 0; cyan.G = 255; cyan.B = 255; RGBTRIPLE yellow; yellow.R = 255; yellow.G = 255; yellow.B = 0; BMPFile bmpFile(mapFile); Grid grid(bmpFile, black); Evidence testSet(evidFile, grid, factor); /* if (1) { evid.split(trainSet, testSet, 0.8); }else{ evid.deterministicsplit(trainSet, testSet); }*/ #if 0 cout << "Creating Markov Model"<<endl; MarkovModel markmodel(grid, trainSet); double totalObj = 0.0; for (int i=0; i < testSet.size(); i++) { vector<pair<int, int> > path = testSet.at(i); cout << "Calling eval"<<endl; double obj = markmodel.eval(path); cout << "OBJ: "<<i<<" "<<obj<<endl; totalObj += obj; } cout << "TOTAL OBJ: "<<totalObj<<endl; cout << "AVERAGE OBJ: "<<totalObj/testSet.size()<<endl; return 0; #endif vector<PosFeature> features; cout << "Constant Feature"<<endl; ConstantFeature constFeat(grid); features.push_back(constFeat); cout << "Obstacle Feature"<<endl; ObstacleFeature obsFeat(grid); features.push_back(obsFeat); for (int i=1; i < 5; i++) { cout << "Blur Feature "<<i<<endl; ObstacleBlurFeature blurFeat(grid, 5*i); features.push_back(blurFeat); } cout << "Creating feature array"<<endl; FeatureArray featArray2(features); cout << "Creating lower resolution feature array"<<endl; FeatureArray featArray(featArray2, factor); pair<int, int> dims = grid.dims(); pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); vector<double> weights(features.size(), -0.0); weights.at(1) = -6.2; //for (int i=2; i < weights.size(); i++) // weights.at(i) = -1.0; weights.at(0) = -2.23;//-2.23 weights.at(2) = -0.35; weights.at(3) = -2.73; weights.at(4) = -0.92; weights.at(5) = -0.26; Parameters params(weights); OrderedWaveInferenceEngine engine(InferenceEngine::GRID8); vector<vector<double> > prior(dims.first,vector<double> (dims.second,0.0)); /* double divide = 1.0; vector<double> radiusWeight; for (int i=0; i < 20; i++) { radiusWeight.push_back(1.0/divide); divide*=2; } generatePrior(grid, trainSet, priorOrig, radiusWeight, factor); reducePrior(priorOrig, prior, factor); */ vector<vector<vector<double> > > partition, backpartition; int time0 = time(0); BMPFile gridView(dims.first, dims.second); RewardMap rewards(featArray, params); vector<double> sums(params.size(),0.00001); vector<vector<double> > occupancy; Predictor predictor(grid, rewards, engine); predictor.setPrior(prior); cout << testSet.size() <<" Examples"<<endl; for (int i=0; i < testSet.size(); i++) { int index = 0; vector<pair<int, int> > traj = testSet.at(i); vector<double> times = testSet.getTimes(i); pair<int, int> initial = traj.front(); pair<int,int> & botinGrid = testSet.at_bot(i); pair<double,double>& botinPoint = testSet.at_rbot(i); pair<double,double>& end = testSet.at_raw(i).back(); predictor.setStart(initial); double thresh = -20.0; double startTime = times.front(); char buf[1024]; sprintf(buf, "../output/pppredict%03d.dat", i); ofstream file(buf); for (double tick = startTime; index < traj.size(); tick+=0.4) { for ( ; index < traj.size() && times.at(index) < tick; index++); if (index == traj.size() ) break; cout << "Evidence: "<<i<<" timestep: "<<tick <<" index: "<<index<<endl; predictor.predict(traj.at(index), occupancy); cout << "SIZE: "<<prior.size()<<endl; vector<vector<double> > pos = predictor.getPosterior(); gridView.addBelief(pos, -30.0, 0.0,jet); grid.addObstacles(gridView, black); gridView.addLabel(botinGrid,green); vector<pair<int, int> > subTraj; subTraj.insert(subTraj.end(), traj.begin(), traj.begin()+index); gridView.addVector(subTraj, red, factor); sprintf(buf, "../compare/pp%03d-%03f.bmp", i, tick-startTime); gridView.write(buf); //pair<double,double> values = predictor.check(traj.back()); double cost = 0.0; for(int itr = 0;itr<index;itr++) cost +=rewards.at(traj[itr].first,traj[itr].second); cout<<i<<" Normalizer: "<<predictor.getNormalizer(traj.back())<< " path cost: "<<cost<<" Probability: "<<cost+predictor.getNormalizer(traj.back())<<endl; vector<vector<vector<double> > > timeOcc = predictor.getTimeOccupancy(); vector<vector<double > > posterior = predictor.getPosterior(); double maxV = -HUGE_VAL; pair<int,int> predestGrid; pair<double,double> predestPoint; for (int ii=0; ii< dims.first; ii++) { for (int jj=0; jj < dims.second; jj++) { if(posterior[ii][jj]>maxV){ predestGrid.first = ii; predestGrid.second = jj; } maxV = max(maxV, posterior.at(ii).at(jj)); } } predestPoint = grid.grid2Real(predestGrid.first,predestGrid.second); double dist = sqrt((end.first-predestPoint.first)*(end.first-predestPoint.first) +(end.second-predestPoint.second)*(end.second-predestPoint.second)); double logloss = entropy(posterior); cout<<"final belief: "<<posterior.at(traj.back().first).at(traj.back().second) <<" max: "<<maxV <<" logloss: "<<logloss<<endl; cout<<botinGrid.first<<" "<<botinGrid.second <<" "<<predestGrid.first<<" "<<predestGrid.second<<endl; file<<tick-startTime <<" "<<logloss <<" "<<posterior.at(botinGrid.first).at(botinGrid.second) <<" "<<posterior.at(traj.back().first).at(traj.back().second) <<" "<<maxV<<" "<<dist<<endl; } file.close(); } }
bool is_straight_line_drawing(const Graph& g, GridPositionMap drawing, VertexIndexMap vm ) { typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator_t; typedef typename graph_traits<Graph>::edge_descriptor edge_t; typedef typename graph_traits<Graph>::edge_iterator edge_iterator_t; typedef typename graph_traits<Graph>::edges_size_type e_size_t; typedef typename graph_traits<Graph>::vertices_size_type v_size_t; typedef std::size_t x_coord_t; typedef std::size_t y_coord_t; typedef boost::tuple<edge_t, x_coord_t, y_coord_t> edge_event_t; typedef typename std::vector< edge_event_t > edge_event_queue_t; typedef tuple<y_coord_t, y_coord_t, x_coord_t, x_coord_t> active_map_key_t; typedef edge_t active_map_value_t; typedef std::map< active_map_key_t, active_map_value_t > active_map_t; typedef typename active_map_t::iterator active_map_iterator_t; edge_event_queue_t edge_event_queue; active_map_t active_edges; edge_iterator_t ei, ei_end; for(tie(ei,ei_end) = edges(g); ei != ei_end; ++ei) { edge_t e(*ei); vertex_t s(source(e,g)); vertex_t t(target(e,g)); edge_event_queue.push_back (make_tuple(e, static_cast<std::size_t>(drawing[s].x), static_cast<std::size_t>(drawing[s].y) ) ); edge_event_queue.push_back (make_tuple(e, static_cast<std::size_t>(drawing[t].x), static_cast<std::size_t>(drawing[t].y) ) ); } // Order by edge_event_queue by first, then second coordinate // (bucket_sort is a stable sort.) bucket_sort(edge_event_queue.begin(), edge_event_queue.end(), property_map_tuple_adaptor<edge_event_t, 2>() ); bucket_sort(edge_event_queue.begin(), edge_event_queue.end(), property_map_tuple_adaptor<edge_event_t, 1>() ); typedef typename edge_event_queue_t::iterator event_queue_iterator_t; event_queue_iterator_t itr_end = edge_event_queue.end(); for(event_queue_iterator_t itr = edge_event_queue.begin(); itr != itr_end; ++itr ) { edge_t e(get<0>(*itr)); vertex_t source_v(source(e,g)); vertex_t target_v(target(e,g)); if (drawing[source_v].y > drawing[target_v].y) std::swap(source_v, target_v); active_map_key_t key(get(drawing, source_v).y, get(drawing, target_v).y, get(drawing, source_v).x, get(drawing, target_v).x ); active_map_iterator_t a_itr = active_edges.find(key); if (a_itr == active_edges.end()) { active_edges[key] = e; } else { active_map_iterator_t before, after; if (a_itr == active_edges.begin()) before = active_edges.end(); else before = prior(a_itr); after = boost::next(a_itr); if (before != active_edges.end()) { edge_t f = before->second; vertex_t e_source(source(e,g)); vertex_t e_target(target(e,g)); vertex_t f_source(source(f,g)); vertex_t f_target(target(f,g)); if (intersects(drawing[e_source].x, drawing[e_source].y, drawing[e_target].x, drawing[e_target].y, drawing[f_source].x, drawing[f_source].y, drawing[f_target].x, drawing[f_target].y ) ) return false; } if (after != active_edges.end()) { edge_t f = after->second; vertex_t e_source(source(e,g)); vertex_t e_target(target(e,g)); vertex_t f_source(source(f,g)); vertex_t f_target(target(f,g)); if (intersects(drawing[e_source].x, drawing[e_source].y, drawing[e_target].x, drawing[e_target].y, drawing[f_source].x, drawing[f_source].y, drawing[f_target].x, drawing[f_target].y ) ) return false; } active_edges.erase(a_itr); } } return true; }
std::vector<std::string> prefix(std::string infix_str){ std::vector<std::string> tmp=postfix(infix_str); std::vector<std::string> stack; for(int i=0;i<tmp.size();++i){ if(!is_number(prior(tmp[i][0]))){ int n=stack.size()-1; std::string tmp1=stack[n]; std::string tmp2=stack[n-1]; stack.pop_back(); stack.pop_back(); stack.push_back(tmp[i]+" "+tmp2+" "+tmp1); } else{ stack.push_back(tmp[i]); } } return stack; }
void generateParam(peak_param *peak, particle_arr *oldPart, particle_t *newPa, prior_fct *prior) { //-- WARNING: d-dependent if (newPa->d != 2) { printf("Need to implement in ABC.c/generateParam\n"); exit(1); } gsl_rng *generator = peak->generator; gsl_matrix *cov = oldPart->cov; particle_t **oldPartArr = oldPart->array; double p, x1, x2, var2; particle_t *target; int i; do { i = 0; target = oldPartArr[0]; p = gsl_ran_flat(generator, 0.0, 1.0); while (p > target->weight) { p -= target->weight; i++; target = oldPartArr[i]; } //-- Compute pdf from multivariate Gaussian pdf x1 = gsl_ran_gaussian(generator, sqrt(gsl_matrix_get(cov, 0, 0))); var2 = gsl_matrix_get(cov, 1, 1) - SQ(gsl_matrix_get(cov, 1, 0)) / gsl_matrix_get(cov, 0, 0); x2 = gsl_ran_gaussian(generator, sqrt(var2)); x2 += target->param[1] + x1 * gsl_matrix_get(cov, 1, 0) / gsl_matrix_get(cov, 0, 0); x1 += target->param[0]; newPa->param[0] = x1; newPa->param[1] = x2; } while (!prior(newPa)); return; }
char* TSolver::convtopol() //converts symbol-function string to polish record {if (S==NULL || !strlen(S)) seterr(E_VOID); char* r; if (Err!=E_NO) {ERROR: if (r!=NULL) free(r); free(S); return S=NULL; } int i,j=0; int p; int SL=strlen(S); r=(char*)calloc(SL*2+2,sizeof(char)); r[0]='\0'; cst_clear; for (i=0;S[i]!='\0';i++) {if (isnumc(S[i]) || isconst(S[i]) || (S[i]=='-' && (minusE || minusN))) {r[j++]=S[i]; continue; } if (S[i]=='!') {addspc(); r[j++]=S[i]; addspc(); continue; } p=prior(S[i]); {if (S[i]==')') {addspc(); while ((!cst_free) && cst_end!='(') {r[j++]=cpop(); r[j++]=' '; } cpop(); if ((!cst_free) && isfn(cst_end)) {r[j++]=cpop(); r[j++]=' '; } continue; } if (S[i]==']') {addspc(); while ((!cst_free) && cst_end!='[') {r[j++]=cpop(); r[j++]=' '; } cpop(); r[j++]=f_abs; r[j++]=' '; continue; } if ((((!cst_free) && (p>=prior(cst_end)) && (prior(cst_end)>0)&&cst_end!='_'&&S[i]!='_') || S[i]==',')) {addspc(); while ((!cst_free) && p>=prior(cst_end) && prior(cst_end)>0) {r[j++]=cpop(); r[j++]=' '; } if (S[i]==',') continue; } cpush(S[i]); if (j>0) addspc(); } } if (Err!=E_NO) goto ERROR; if (r[j-1]!=' ') r[j++]=' '; while (!cst_free) {r[j++]=cpop(); r[j++]=' '; } if (r[j-1]!=' ') r[j++]=' '; r[j]='\0'; free(S); S=strdbl(r); free(r); #ifdef debug printf("%s\n",S); #endif poled=1; return S; }
std::vector<double> TimeSmoother::FitModel(int parameterIndex, const std::vector<double>& dataPoints, const std::vector<int>& framesWithDataPoints) const { // 1. Project data points (from each frame) to model to get corresponding xi // Here the data points are the nodal parameters at each frame and linearly map to xi // 2. Construct P FourierBasis basis; int numRows = dataPoints.size(); gmm::dense_matrix<double> P(numRows, NUMBER_OF_PARAMETERS); // std::cout << "\n\n\nnumRows = " << numRows << '\n'; for (int i = 0; i < numRows; i++) { double xiDouble[1]; xiDouble[0] = MapToXi(static_cast<double>(i) / numRows); //REVISE design // std::cout << "dataPoint(" << i << ") = " << dataPoints[i] << ", xi = "<< xiDouble[0] <<'\n'; double psi[NUMBER_OF_PARAMETERS]; basis.Evaluate(psi, xiDouble); for (int columnIndex = 0; columnIndex < NUMBER_OF_PARAMETERS; columnIndex++) { P(i, columnIndex) = psi[columnIndex]; if (framesWithDataPoints[i]) { P(i, columnIndex) *= CAP_WEIGHT_GP; //TEST } } } // std::cout << "P = " << P << std::endl; // 3. Construct A // Note that G is the identity matrix. StS is read in from file. gmm::dense_matrix<double> A(NUMBER_OF_PARAMETERS, NUMBER_OF_PARAMETERS), temp(NUMBER_OF_PARAMETERS, NUMBER_OF_PARAMETERS); gmm::mult(gmm::transposed(P), P, temp); gmm::add(pImpl->S, temp, A); // std::cout << "A = " << A << std::endl; // 4. Construct rhs std::vector<double> prior(11), p(numRows), rhs(11); for (int i = 0; i < 11; i++) { prior[i] = pImpl->Priors(i, parameterIndex); } // std::cout << "prior: " << prior << std::endl; gmm::mult(P, prior, p); // std::transform(dataPoints.begin(), dataPoints.end(), p.begin(), p.begin(), std::minus<double>()); //TEST // p[0] *= 10; //more weight for frame 0 std::vector<double> dataLambda = dataPoints; for (unsigned int i = 0; i < dataLambda.size(); i++) { if (framesWithDataPoints[i]) { dataLambda[i] *= CAP_WEIGHT_GP; } } std::transform(dataLambda.begin(), dataLambda.end(), p.begin(), p.begin(), std::minus<double>()); gmm::mult(transposed(P), p, rhs); // std::cout << "rhs: " << rhs << std::endl; // 5. Solve normal equation (direct solver) std::vector<double> x(gmm::mat_nrows(A)); gmm::lu_solve(A, x, rhs); #ifndef NDEBUG // std::cout << "delta x (" << parameterIndex << ") " << x << std::endl; #endif std::transform(x.begin(), x.end(), prior.begin(), x.begin(), std::plus<double>()); return x; }
double trajOptimizerplus::eval(vector<double> ¶ms) { cout << "IN EVAL "<<itrcount++<<" "<<params.size()<<endl; for (int i=0; i < params.size(); i++) cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl; int factor = evidence.getFactor(); pair<int, int> dims = grid.dims(); int v_dim = seqFeat.num_V(); /* pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); */ vector<vector<vector<double> > > prior(dims.first, vector<vector<double> >(dims.second, vector<double> (v_dim,-HUGE_VAL))); double obj = 0.0; vector<double> gradient(params.size(), 0.0); vector<vector<vector<double> > > occupancy; vector<vector<double> > layerOccupancy; layerOccupancy.resize(dims.first,vector<double>(dims.second,-HUGE_VAL)); vector<double> modelFeats, pathFeats; for (int i=0; i < evidence.size(); i++) { for (int j=0; j < params.size(); j++) { cout << " "<<j<<" "<<params.at(j); } cout<<endl; cout << "Evidence #"<<i<<endl; vector<pair<int, int> >& trajectory = evidence.at(i); vector<double>& velocityseq = evidence.at_v(i); pair<int,int>& bot = evidence.at_bot(i); // robot local blurres features for (int r=1; r <= NUMROBFEAT; r++) { cout << "Adding Robot Feature "<<r<<endl; RobotLocalBlurFeature robblurFeat(grid,bot,10*r); // RobotGlobalFeature robFeat(grid,bot); posFeatures.push_back(robblurFeat); } cout << " Creating feature array"<<endl; FeatureArray featArray2(posFeatures); FeatureArray featArray(featArray2, factor); for (int rr=1; rr<= NUMROBFEAT; rr++) posFeatures.pop_back(); // split different posfeatures and seqfeature weights vector<double> p_weights,s_weights; int itr = 0; for (; itr<featArray.size(); itr++) p_weights.push_back(params[itr]); for (; itr<params.size(); itr++) s_weights.push_back(params[itr]); //cout<<"Params"<<endl; Parameters p_parameters(p_weights), s_parameters(s_weights); /* cout<<featArray.size()<<endl; cout<<params.size()<<endl; cout<<p_weights.size()<<endl; cout<<s_weights.size()<<endl; cout<<p_parameters.size()<<endl; cout<<s_parameters.size()<<endl; */ //cout<<"Reward"<<endl; RewardMap rewards(featArray,seqFeat,p_parameters,s_parameters); DisSeqPredictor predictor(grid, rewards, engine); // sum of reward along the trajectory double cost = 0.0; //cout<< trajectory.size()<<endl; for (int j=0; j < trajectory.size(); j++) { //cout<<j<<" "<<trajectory.at(j).first<<" "<< trajectory.at(j).second<< " "<< seqFeat.getFeat(velocityseq.at(j))<<endl; cost+=rewards.at(trajectory.at(j).first, trajectory.at(j).second, seqFeat.getFeat(velocityseq.at(j))); } State initial(trajectory.front(),seqFeat.getFeat(velocityseq.front())); State destination(trajectory.back(),seqFeat.getFeat(velocityseq.back())); //for (int k=0;k<v_dim;k++) prior.at(destination.x()).at(destination.y()).at(destination.disV) = 0.0; cout << "Initial: "<<initial.x()<<" "<<initial.y()<<" "<<initial.disV<<endl; cout << "Destination: "<<destination.x()<<" " <<destination.y()<<" "<<destination.disV<<endl; predictor.setStart(initial); predictor.setPrior(prior); double norm = predictor.forwardBackwardInference(initial, occupancy); for (int l=0; l<v_dim; l++) { BMPFile gridView(dims.first, dims.second); for (int x= 0; x<dims.first; x++) { for(int y=0; y<dims.second; y++) { layerOccupancy.at(x).at(y) = occupancy.at(x).at(y).at(l); } } char buf[1024]; /* RobotGlobalFeature robblurFeat(grid,bot); gridView.addBelief(robblurFeat.getMap(), 0.0, 25, white, red); gridView.addVector(trajectory, blue, factor); gridView.addLabel(bot,green); sprintf(buf, "../figures/feat%04d_%d.bmp",i,l); gridView.write(buf); */ gridView.addBelief(layerOccupancy, -300.0, 5.0, white, red); //grid.addObstacles(gridView, black); gridView.addLabel(bot,green); gridView.addVector(trajectory, blue, factor); sprintf(buf, "../figures/train%04d_%d.bmp",i,l); gridView.write(buf); } /* for (int i=0; i < occupancy.size(); i++) for (int j=0; j < occupancy.at(i).size(); j++) if (occupancy.at(i).at(j) > -10) cout << i <<" "<<j<<" "<<occupancy.at(i).at(j)<<endl; */ featArray.featureCounts(occupancy, modelFeats); featArray.featureCounts(trajectory, pathFeats); seqFeat.featureCounts_vec(occupancy,modelFeats); seqFeat.featureCounts_vec(velocityseq,pathFeats); for (int k=0; k < params.size(); k++) { double diff = pathFeats.at(k) - modelFeats.at(k); gradient.at(k) -= diff; cout <<" Gradient ("<< k << " -grad: "<< gradient.at(k) <<" -path: "<< pathFeats.at(k)<<" -model: "<< modelFeats.at(k)<<")"; } cout<<endl; cout << "OBJ: "<<cost-norm<< " "<<cost<<" "<<norm<<endl; obj += (cost - norm); /* obj is the path probability * cost is the sum of rewards: sum f(s,a) * norm is V(s_1->G), since here s_T = G, V(s_T->G) = 0*/ prior.at(destination.x()).at(destination.y()).at(destination.disV) = -HUGE_VAL; } cout << "RETURN OBJ: "<<-obj<<endl; params = gradient; return -obj; }
// [ref] ${MOCAPY_HOME}/examples/discrete_hmm_with_prior.cpp void hmm_with_discrete_and_prior() { // in Mocapy++, so far only the discrete node supports the use of a prior. // [ref] Mocapy++ manual, pp. 15. #if 1 mocapy::mocapy_seed((uint)5556574); #else mocapy::mocapy_seed((uint)std::time(NULL)); #endif // number of trainining sequences const int N = 100; // sequence lengths const int T = 100; // Gibbs sampling parameters int MCMC_BURN_IN = 10; //--------------------------------------------------------------- // HMM hidden and observed node sizes const uint H_SIZE = 2; const uint O_SIZE = 2; const bool init_random = false; mocapy::CPD th0_cpd; th0_cpd.set_shape(2); th0_cpd.set_values(mocapy::vec(0.1, 0.9)); mocapy::CPD th1_cpd; th1_cpd.set_shape(2, 2); th1_cpd.set_values(mocapy::vec(0.95, 0.05, 0.1, 0.9)); mocapy::CPD to_cpd; to_cpd.set_shape(2, 2); to_cpd.set_values(mocapy::vec(0.1, 0.9, 0.8, 0.2)); // The target DBN (This DBN generates the data) mocapy::Node *th0 = mocapy::NodeFactory::new_discrete_node(H_SIZE, "th0", init_random, th0_cpd); mocapy::Node *th1 = mocapy::NodeFactory::new_discrete_node(H_SIZE, "th1", init_random, th1_cpd); mocapy::Node *to = mocapy::NodeFactory::new_discrete_node(O_SIZE, "to", init_random, to_cpd); mocapy::DBN tdbn; tdbn.set_slices(mocapy::vec(th0, to), mocapy::vec(th1, to)); tdbn.add_intra("th0", "to"); tdbn.add_inter("th0", "th1"); tdbn.construct(); //--------------------------------------------------------------- // The model DBN (this DBN will be trained) // For mh0, get the CPD from th0 and fix parameters mocapy::Node *mh0 = mocapy::NodeFactory::new_discrete_node(H_SIZE, "mh0", init_random, mocapy::CPD(), th0, true ); mocapy::Node *mh1 = mocapy::NodeFactory::new_discrete_node(H_SIZE, "mh1", init_random); mocapy::Node *mo = mocapy::NodeFactory::new_discrete_node(O_SIZE, "mo", init_random); // a pseudo count prior for the discrete nodes // [ref] Mocapy++ manual, pp. 24. mocapy::MDArray<double> pcounts; pcounts.set_shape(O_SIZE, H_SIZE); pcounts.set(0, 1, 1000); mocapy::PseudoCountPrior prior(pcounts); ((mocapy::DiscreteNode *)mo)->get_densities()->set_prior(&prior); mocapy::DBN mdbn; mdbn.set_slices(mocapy::vec(mh0, mo), mocapy::vec(mh1, mo)); mdbn.add_intra("mh0", "mo"); mdbn.add_inter("mh0", "mh1"); mdbn.construct(); std::cout << "*** TARGET ***" << std::endl; std::cout << *th0 << std::endl; std::cout << *th1 << std::endl; std::cout << *to << std::endl; std::cout << "*** MODEL ***" << std::endl; std::cout << *mh0 << std::endl; std::cout << *mh1 << std::endl; std::cout << *mo << std::endl; //--------------------------------------------------------------- std::vector<mocapy::Sequence> seq_list; std::vector<mocapy::MDArray<mocapy::eMISMASK> > mismask_list; std::cout << "Generating data" << std::endl; mocapy::MDArray<mocapy::eMISMASK> mismask; mismask.repeat(T, mocapy::vec(mocapy::MOCAPY_HIDDEN, mocapy::MOCAPY_OBSERVED)); // Generate the data double sum_LL(0); for (int i = 0; i < N; ++i) { std::pair<mocapy::Sequence, double> seq_ll = tdbn.sample_sequence(T); sum_LL += seq_ll.second; seq_list.push_back(seq_ll.first); mismask_list.push_back(mismask); } std::cout << "Average LL: " << sum_LL/N << std::endl; //--------------------------------------------------------------- mocapy::GibbsRandom mcmc = mocapy::GibbsRandom(&mdbn); mocapy::EMEngine em = mocapy::EMEngine(&mdbn, &mcmc, &seq_list, &mismask_list); std::cout << "Starting EM loop" << std::endl; double bestLL = -1000; uint it_no_improvement(0); uint i(0); // Start EM loop while (it_no_improvement < 100) { em.do_E_step(1, MCMC_BURN_IN, true); const double ll = em.get_loglik(); std::cout << "LL= " << ll; if (ll > bestLL) { std::cout << " * saving model *" << std::endl; mdbn.save("./data/probabilistic_graphical_model/mocapy/discrete_hmm_with_prior.dbn"); bestLL = ll; it_no_improvement = 0; } else { ++it_no_improvement; std::cout << std::endl; } ++i; em.do_M_step(); } std::cout << "DONE" << std::endl; //--------------------------------------------------------------- mdbn.load("./data/probabilistic_graphical_model/mocapy/discrete_hmm_with_prior.dbn"); std::cout << "*** TARGET ***" << std::endl; std::cout << "th0: \n" << *th0 << std::endl; std::cout << "th1: \n" << *th1 << std::endl; std::cout << "to: \n" << *to << std::endl; std::cout << "*** MODEL ***" << std::endl; std::cout << "mh0: \n" << *mh0 << std::endl; std::cout << "mh1: \n" << *mh1 << std::endl; std::cout << "mo: \n" << *mo << std::endl; //--------------------------------------------------------------- delete th0; delete th1; delete to; delete mh0; delete mh1; delete mo; }
void planar_canonical_ordering(const Graph& g, PlanarEmbedding embedding, OutputIterator ordering, VertexIndexMap vm) { typedef typename graph_traits<Graph>::vertex_descriptor vertex_t; typedef typename graph_traits<Graph>::edge_descriptor edge_t; typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator_t; typedef typename graph_traits<Graph>::adjacency_iterator adjacency_iterator_t; typedef typename std::pair<vertex_t, vertex_t> vertex_pair_t; typedef typename property_traits<PlanarEmbedding>::value_type embedding_value_t; typedef typename embedding_value_t::const_iterator embedding_iterator_t; typedef iterator_property_map <typename std::vector<vertex_t>::iterator, VertexIndexMap> vertex_to_vertex_map_t; typedef iterator_property_map <typename std::vector<std::size_t>::iterator, VertexIndexMap> vertex_to_size_t_map_t; enum {PROCESSED, UNPROCESSED, ONE_NEIGHBOR_PROCESSED, READY_TO_BE_PROCESSED}; std::vector<vertex_t> processed_neighbor_vector(num_vertices(g)); vertex_to_vertex_map_t processed_neighbor (processed_neighbor_vector.begin(), vm); std::vector<std::size_t> status_vector(num_vertices(g), UNPROCESSED); vertex_to_size_t_map_t status(status_vector.begin(), vm); std::list<vertex_t> ready_to_be_processed; vertex_t first_vertex = *vertices(g).first; vertex_t second_vertex; adjacency_iterator_t ai, ai_end; for(tie(ai,ai_end) = adjacent_vertices(first_vertex,g); ai != ai_end; ++ai) { if (*ai == first_vertex) continue; second_vertex = *ai; break; } ready_to_be_processed.push_back(first_vertex); status[first_vertex] = READY_TO_BE_PROCESSED; ready_to_be_processed.push_back(second_vertex); status[second_vertex] = READY_TO_BE_PROCESSED; while(!ready_to_be_processed.empty()) { vertex_t u = ready_to_be_processed.front(); ready_to_be_processed.pop_front(); if (status[u] != READY_TO_BE_PROCESSED && u != second_vertex) continue; embedding_iterator_t ei, ei_start, ei_end; embedding_iterator_t next_edge_itr, prior_edge_itr; ei_start = embedding[u].begin(); ei_end = embedding[u].end(); prior_edge_itr = prior(ei_end); while(source(*prior_edge_itr, g) == target(*prior_edge_itr,g)) prior_edge_itr = prior(prior_edge_itr); for(ei = ei_start; ei != ei_end; ++ei) { edge_t e(*ei); // e = (u,v) next_edge_itr = next(ei) == ei_end ? ei_start : next(ei); vertex_t v = source(e,g) == u ? target(e,g) : source(e,g); vertex_t prior_vertex = source(*prior_edge_itr, g) == u ? target(*prior_edge_itr, g) : source(*prior_edge_itr, g); vertex_t next_vertex = source(*next_edge_itr, g) == u ? target(*next_edge_itr, g) : source(*next_edge_itr, g); // Need prior_vertex, u, v, and next_vertex to all be // distinct. This is possible, since the input graph is // triangulated. It'll be true all the time in a simple // graph, but loops and parallel edges cause some complications. if (prior_vertex == v || prior_vertex == u) { prior_edge_itr = ei; continue; } //Skip any self-loops if (u == v) continue; // Move next_edge_itr (and next_vertex) forwards // past any loops or parallel edges while (next_vertex == v || next_vertex == u) { next_edge_itr = next(next_edge_itr) == ei_end ? ei_start : next(next_edge_itr); next_vertex = source(*next_edge_itr, g) == u ? target(*next_edge_itr, g) : source(*next_edge_itr, g); } if (status[v] == UNPROCESSED) { status[v] = ONE_NEIGHBOR_PROCESSED; processed_neighbor[v] = u; } else if (status[v] == ONE_NEIGHBOR_PROCESSED) { vertex_t x = processed_neighbor[v]; //are edges (v,u) and (v,x) adjacent in the planar //embedding? if so, set status[v] = 1. otherwise, set //status[v] = 2. if ((next_vertex == x && !(first_vertex == u && second_vertex == x) ) || (prior_vertex == x && !(first_vertex == x && second_vertex == u) ) ) { status[v] = READY_TO_BE_PROCESSED; } else { status[v] = READY_TO_BE_PROCESSED + 1; } } else if (status[v] > ONE_NEIGHBOR_PROCESSED) { //check the two edges before and after (v,u) in the planar //embedding, and update status[v] accordingly bool processed_before = false; if (status[prior_vertex] == PROCESSED) processed_before = true; bool processed_after = false; if (status[next_vertex] == PROCESSED) processed_after = true; if (!processed_before && !processed_after) ++status[v]; else if (processed_before && processed_after) --status[v]; } if (status[v] == READY_TO_BE_PROCESSED) ready_to_be_processed.push_back(v); prior_edge_itr = ei; } status[u] = PROCESSED; *ordering = u; ++ordering; } }
double trajectoryOptimizer::eval(vector<double> ¶ms, vector<double> &gradient) { cout << "IN EVAL "<<params.size()<<endl; for (int i=0; i < params.size(); i++) cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl; int factor = evidence.getFactor(); // cout << "FACTOR: "<<factor<<endl; FeatureArray featArray2(features); FeatureArray featArray(featArray2, factor); //cout<<"Dims featarray "<<featArray.dims().first<<" "<<featArray.dims().second<<endl; Parameters parameters(params); //cout << "Calculating rewards"<<endl; RewardMap rewards(featArray, parameters); pair<int, int> dims = grid.dims(); BMPFile gridView(dims.first, dims.second); pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); //cout << "Computing prior"<<endl; vector<vector<double> > prior(lowDims.first, vector<double>(lowDims.second, -HUGE_VAL)); double obj = 0.0; gradient.clear(); gradient.resize(params.size(), 0.0); for (int i=0; i < evidence.size(); i++) { Predictor predictor(grid, rewards, engine); cout << "Evidence #"<<i<<endl; vector<pair<int, int> > trajectory = evidence.at(i); double cost = 0.0; for (int j=0; j < trajectory.size(); j++){ double temp = rewards.at(trajectory.at(j).first, trajectory.at(j).second); cost += temp; } pair<int, int> initial = trajectory.front(); pair<int, int> destination = trajectory.back(); prior.at(destination.first).at(destination.second) = 0.0; #if 0 cout << "Initial: "<<initial.first<<" "<<initial.second<<endl; cout << "Destination: "<<destination.first<<" " <<destination.second<<endl; #endif predictor.setStart(initial); predictor.setPrior(prior); vector<vector<double> > occupancy; double norm = predictor.predict(initial, occupancy); gridView.addBelief(occupancy, -300.0, 0.0, white, red); gridView.addVector(trajectory, blue, factor); char buf[1024]; sprintf(buf, "../figures/train%04d.bmp", i); gridView.write(buf); vector<double> modelFeats, pathFeats; //cout << "Computing feature counts"<<endl; /* for (int i=0; i < occupancy.size(); i++) for (int j=0; j < occupancy.at(i).size(); j++) if (occupancy.at(i).at(j) > -10) cout << i <<" "<<j<<" "<<occupancy.at(i).at(j)<<endl; */ featArray.featureCounts(occupancy, modelFeats); featArray.featureCounts(trajectory, pathFeats); cout << "GRADIENT"<<endl; for (int k=0; k < params.size(); k++) { double diff = pathFeats.at(k) - modelFeats.at(k); gradient.at(k) -= diff; cout << k << ": " << gradient.at(k) << " " << pathFeats.at(k) << " " << modelFeats.at(k) <<endl; } cout << "OBJ: "<<cost-norm<<endl; cout << " "<<cost<<" "<<norm<<endl; obj += (cost - norm); prior.at(destination.first).at(destination.second) = -HUGE_VAL; } cout << "RETURN OBJ: "<<-obj<<endl; return -obj; }
void MvnConjVarSampler::draw() { Ptr<MvnSuf> suf = model()->suf(); model()->set_siginv(MvnVarSampler::draw_precision( rng(), suf->n() - 1, suf->center_sumsq(suf->ybar()), *prior())); }
void fnIMIS(const size_t InitSamples, const size_t StepSamples, const size_t FinalResamples, const size_t MaxIter, const size_t NumParam, unsigned long int rng_seed, const char * runName) { // Declare and configure GSL RNG gsl_rng * rng; const gsl_rng_type * T; gsl_rng_env_setup(); T = gsl_rng_default; rng = gsl_rng_alloc (T); gsl_rng_set(rng, rng_seed); char strDiagnosticsFile[strlen(runName) + 15 +1]; char strResampleFile[strlen(runName) + 12 +1]; strcpy(strDiagnosticsFile, runName); strcat(strDiagnosticsFile, "Diagnostics.txt"); strcpy(strResampleFile, runName); strcat(strResampleFile, "Resample.txt"); FILE * diagnostics_file = fopen(strDiagnosticsFile, "w"); fprintf(diagnostics_file, "Seeded RNG: %zu\n", rng_seed); fprintf(diagnostics_file, "Running IMIS. InitSamples: %zu, StepSamples: %zu, FinalResamples %zu, MaxIter %zu\n", InitSamples, StepSamples, FinalResamples, MaxIter); // Setup IMIS arrays gsl_matrix * Xmat = gsl_matrix_alloc(InitSamples + StepSamples*MaxIter, NumParam); double * prior_all = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * likelihood_all = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * imp_weight_denom = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); // proportional to q(k) in stage 2c of Raftery & Bao double * gaussian_sum = (double*) calloc(InitSamples + StepSamples*MaxIter, sizeof(double)); // sum of mixture distribution for mode struct dst * distance = (struct dst *) malloc(sizeof(struct dst) * (InitSamples + StepSamples*MaxIter)); // Mahalanobis distance to most recent mode double * imp_weights = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * tmp_MVNpdf = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); gsl_matrix * nearestX = gsl_matrix_alloc(StepSamples, NumParam); double center_all[MaxIter][NumParam]; gsl_matrix * sigmaChol_all[MaxIter]; gsl_matrix * sigmaInv_all[MaxIter]; // Initial prior samples sample_prior(rng, InitSamples, Xmat); // Calculate prior covariance double prior_invCov_diag[NumParam]; /* The paper describing the algorithm uses the full prior covariance matrix. This follows the code in the IMIS R package and diagonalizes the prior covariance matrix to ensure invertibility. */ for(size_t i = 0; i < NumParam; i++){ gsl_vector_view tmpCol = gsl_matrix_subcolumn(Xmat, i, 0, InitSamples); prior_invCov_diag[i] = gsl_stats_variance(tmpCol.vector.data, tmpCol.vector.stride, InitSamples); prior_invCov_diag[i] = 1.0/prior_invCov_diag[i]; } // IMIS steps fprintf(diagnostics_file, "Step Var(w_i) MargLik Unique Max(w_i) ESS Time\n"); printf("Step Var(w_i) MargLik Unique Max(w_i) ESS Time\n"); time_t time1, time2; time(&time1); size_t imisStep = 0, numImisSamples; for(imisStep = 0; imisStep < MaxIter; imisStep++){ numImisSamples = (InitSamples + imisStep*StepSamples); // Evaluate prior and likelihood if(imisStep == 0){ // initial stage #pragma omp parallel for for(size_t i = 0; i < numImisSamples; i++){ gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, i); prior_all[i] = prior(&theta.vector); likelihood_all[i] = likelihood(&theta.vector); } } else { // imisStep > 0 #pragma omp parallel for for(size_t i = InitSamples + (imisStep-1)*StepSamples; i < numImisSamples; i++){ gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, i); prior_all[i] = prior(&theta.vector); likelihood_all[i] = likelihood(&theta.vector); } } // Determine importance weights, find current maximum, calculate monitoring criteria #pragma omp parallel for for(size_t i = 0; i < numImisSamples; i++){ imp_weight_denom[i] = (InitSamples*prior_all[i] + StepSamples*gaussian_sum[i])/(InitSamples + StepSamples * imisStep); imp_weights[i] = (prior_all[i] > 0)?likelihood_all[i]*prior_all[i]/imp_weight_denom[i]:0; } double sumWeights = 0.0; for(size_t i = 0; i < numImisSamples; i++){ sumWeights += imp_weights[i]; } double maxWeight = 0.0, varImpW = 0.0, entropy = 0.0, expectedUnique = 0.0, effSampSize = 0.0, margLik; size_t maxW_idx; #pragma omp parallel for reduction(+: varImpW, entropy, expectedUnique, effSampSize) for(size_t i = 0; i < numImisSamples; i++){ imp_weights[i] /= sumWeights; varImpW += pow(numImisSamples * imp_weights[i] - 1.0, 2.0); entropy += imp_weights[i] * log(imp_weights[i]); expectedUnique += (1.0 - pow((1.0 - imp_weights[i]), FinalResamples)); effSampSize += pow(imp_weights[i], 2.0); } for(size_t i = 0; i < numImisSamples; i++){ if(imp_weights[i] > maxWeight){ maxW_idx = i; maxWeight = imp_weights[i]; } } for(size_t i = 0; i < NumParam; i++) center_all[imisStep][i] = gsl_matrix_get(Xmat, maxW_idx, i); varImpW /= numImisSamples; entropy = -entropy / log(numImisSamples); effSampSize = 1.0/effSampSize; margLik = log(sumWeights/numImisSamples); fprintf(diagnostics_file, "%4zu %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n", imisStep, varImpW, margLik, expectedUnique, maxWeight, effSampSize, difftime(time(&time2), time1)); printf("%4zu %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n", imisStep, varImpW, margLik, expectedUnique, maxWeight, effSampSize, difftime(time(&time2), time1)); time1 = time2; // Check for convergence if(expectedUnique > FinalResamples*(1.0 - exp(-1.0))){ break; } // Calculate Mahalanobis distance to current mode GetMahalanobis_diag(Xmat, center_all[imisStep], prior_invCov_diag, numImisSamples, NumParam, distance); // Find StepSamples nearest points // (Note: this was a major bottleneck when InitSamples and StepResamples are large. qsort substantially outperformed GSL sort options.) qsort(distance, numImisSamples, sizeof(struct dst), cmp_dst); #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++){ gsl_vector_const_view tmpX = gsl_matrix_const_row(Xmat, distance[i].idx); gsl_matrix_set_row(nearestX, i, &tmpX.vector); } // Calculate weighted covariance of nearestX // (a) Calculate weights for nearest points 1...StepSamples double weightsCov[StepSamples]; #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++){ weightsCov[i] = 0.5*(imp_weights[distance[i].idx] + 1.0/numImisSamples); // cov_wt function will normalize the weights } // (b) Calculate weighted covariance sigmaChol_all[imisStep] = gsl_matrix_alloc(NumParam, NumParam); covariance_weighted(nearestX, weightsCov, StepSamples, center_all[imisStep], NumParam, sigmaChol_all[imisStep]); // (c) Do Cholesky decomposition and inverse of covariance matrix gsl_linalg_cholesky_decomp(sigmaChol_all[imisStep]); for(size_t j = 0; j < NumParam; j++) // Note: GSL outputs a symmetric matrix rather than lower tri, so have to set upper tri to zero for(size_t k = j+1; k < NumParam; k++) gsl_matrix_set(sigmaChol_all[imisStep], j, k, 0.0); sigmaInv_all[imisStep] = gsl_matrix_alloc(NumParam, NumParam); gsl_matrix_memcpy(sigmaInv_all[imisStep], sigmaChol_all[imisStep]); gsl_linalg_cholesky_invert(sigmaInv_all[imisStep]); // Sample new inputs gsl_matrix_view newSamples = gsl_matrix_submatrix(Xmat, numImisSamples, 0, StepSamples, NumParam); GenerateRandMVnorm(rng, StepSamples, center_all[imisStep], sigmaChol_all[imisStep], NumParam, &newSamples.matrix); // Evaluate sampling probability from mixture distribution // (a) For newly sampled points, sum over all previous centers for(size_t pastStep = 0; pastStep < imisStep; pastStep++){ GetMVNpdf(&newSamples.matrix, center_all[pastStep], sigmaInv_all[pastStep], sigmaChol_all[pastStep], StepSamples, NumParam, tmp_MVNpdf); #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++) gaussian_sum[numImisSamples + i] += tmp_MVNpdf[i]; } // (b) For all points, add weight for most recent center gsl_matrix_const_view Xmat_curr = gsl_matrix_const_submatrix(Xmat, 0, 0, numImisSamples + StepSamples, NumParam); GetMVNpdf(&Xmat_curr.matrix, center_all[imisStep], sigmaInv_all[imisStep], sigmaChol_all[imisStep], numImisSamples + StepSamples, NumParam, tmp_MVNpdf); #pragma omp parallel for for(size_t i = 0; i < numImisSamples + StepSamples; i++) gaussian_sum[i] += tmp_MVNpdf[i]; } // loop over imisStep //// FINISHED IMIS ROUTINE fclose(diagnostics_file); // Resample posterior outputs int resampleIdx[FinalResamples]; walker_ProbSampleReplace(rng, numImisSamples, imp_weights, FinalResamples, resampleIdx); // Note: Random sampling routine used in R sample() function. // Print results FILE * resample_file = fopen(strResampleFile, "w"); for(size_t i = 0; i < FinalResamples; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(resample_file, "%.15e\t", gsl_matrix_get(Xmat, resampleIdx[i], j)); gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, resampleIdx[i]); fprintf(resample_file, "\n"); } fclose(resample_file); /* // This outputs Xmat (parameter matrix), centers, and covariance matrices to files for debugging FILE * Xmat_file = fopen("Xmat.txt", "w"); for(size_t i = 0; i < numImisSamples; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(Xmat_file, "%.15e\t", gsl_matrix_get(Xmat, i, j)); fprintf(Xmat_file, "%e\t%e\t%e\t%e\t%e\t\n", prior_all[i], likelihood_all[i], imp_weights[i], gaussian_sum[i], distance[i]); } fclose(Xmat_file); FILE * centers_file = fopen("centers.txt", "w"); for(size_t i = 0; i < imisStep; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(centers_file, "%f\t", center_all[i][j]); fprintf(centers_file, "\n"); } fclose(centers_file); FILE * sigmaInv_file = fopen("sigmaInv.txt", "w"); for(size_t i = 0; i < imisStep; i++){ for(size_t j = 0; j < NumParam; j++) for(size_t k = 0; k < NumParam; k++) fprintf(sigmaInv_file, "%f\t", gsl_matrix_get(sigmaInv_all[i], j, k)); fprintf(sigmaInv_file, "\n"); } fclose(sigmaInv_file); */ // free memory allocated by IMIS for(size_t i = 0; i < imisStep; i++){ gsl_matrix_free(sigmaChol_all[i]); gsl_matrix_free(sigmaInv_all[i]); } // release RNG gsl_rng_free(rng); gsl_matrix_free(Xmat); gsl_matrix_free(nearestX); free(prior_all); free(likelihood_all); free(imp_weight_denom); free(gaussian_sum); free(distance); free(imp_weights); free(tmp_MVNpdf); return; }
string_iterator operator -- (int) { string_iterator temp = *this; prior(it, range_start); return temp; }
//------------------------------------------------------------------ ofUTF8Ptr ofUTF8::prior(const ofUTF8String& input, ofUTF8Ptr iter) { prior(iter, endPtr(input)); return iter; }
RINGING_END_ANON_NAMESPACE // Returns false if the touch is false bool prover::add_row( const row &r ) { // This function is quite complicated to avoid doing more than one // O( ln N ) operation on the multimap. The equal_range function call // will be O( ln N ); the insert function ought to be O(1) because a // sensible hint is supplied (although the C++ standard doesn't require // the hint to be used). The number of identical elements is also // calculated from the range, rather than doing a O( ln N ) call to // multimap::count. typedef pair< mmap::iterator, mmap::iterator > range; range rng = m.equal_range(r); size_t n = distance( rng.first, rng.second ) + 1; // effecively m.count(r) mmap::iterator i( m.insert( rng.first == m.begin() ? m.begin() : prior( rng.first ), mmap::value_type( r, ++lineno ) ) ); if ( n > 1 ) ++dups; if ( (int) n > max_occurs ) { is_true = false; if ( fi ) { for ( failinfo::iterator j = fi->begin(), e = fi->end(); j != e; ++j) if ( j->_row == r ) { j->_lines.push_back( i->second ); return false; } linedetail l; // The C++ standard doesn't make any guarantee about where i // is in relation to rng.first and rng.second -- it may fall in // the range [rng.first, rng.second), but equally, it might get // inserted immediately before rng.first. If we don't detect i // in this range, we explicitly add it by hand afterwards. l._row = r; bool added_i = false; { for ( mmap::iterator j = rng.first; j != rng.second; ++j ) { if ( j == i ) added_i = true; l._lines.push_back( j->second ); } } if (!added_i) l._lines.push_back( i->second ); fi->push_back( l ); return false; } } return is_true; }
void somatic_snv_caller_strand_grid:: position_somatic_snv_call(const extended_pos_info& normal_epi, const extended_pos_info& tumor_epi, const extended_pos_info* normal_epi_t2_ptr, const extended_pos_info* tumor_epi_t2_ptr, somatic_snv_genotype_grid& sgt) const { static const bool is_always_test(false); { const snp_pos_info& normal_pi(normal_epi.pi); const snp_pos_info& tumor_pi(tumor_epi.pi); if(normal_pi.ref_base=='N') return; sgt.ref_gt=base_to_id(normal_pi.ref_base); // check that a non-reference call meeting quality criteria even // exists: if(not is_always_test) { if(is_spi_allref(normal_pi,sgt.ref_gt) and is_spi_allref(tumor_pi,sgt.ref_gt)) return; } } // strawman model treats normal and tumor as independent, so // calculate separate lhoods: blt_float_t normal_lhood[DIGT_SGRID::SIZE]; blt_float_t tumor_lhood[DIGT_SGRID::SIZE]; const bool is_tier2(NULL != normal_epi_t2_ptr); static const unsigned n_tier(2); result_set tier_rs[n_tier]; for(unsigned i(0); i<n_tier; ++i) { const bool is_include_tier2(i==1); if(is_include_tier2) { if(! is_tier2) continue; if(tier_rs[0].snv_qphred==0) { tier_rs[1].snv_qphred=0; continue; } } // get likelihood of each genotype // static const bool is_normal_het_bias(false); static const blt_float_t normal_het_bias(0.0); static const bool is_tumor_het_bias(false); static const blt_float_t tumor_het_bias(0.0); const extended_pos_info& nepi(is_include_tier2 ? *normal_epi_t2_ptr : normal_epi ); const extended_pos_info& tepi(is_include_tier2 ? *tumor_epi_t2_ptr : tumor_epi ); get_diploid_gt_lhood_spi(_opt,nepi.pi,is_normal_het_bias,normal_het_bias,normal_lhood); get_diploid_gt_lhood_spi(_opt,tepi.pi,is_tumor_het_bias,tumor_het_bias,tumor_lhood); get_diploid_het_grid_lhood_spi(nepi.pi,normal_lhood+DIGT::SIZE); get_diploid_het_grid_lhood_spi(tepi.pi,tumor_lhood+DIGT::SIZE); get_diploid_strand_grid_lhood_spi(nepi.pi,sgt.ref_gt,normal_lhood+DIGT_SGRID::PRESTRAND_SIZE); get_diploid_strand_grid_lhood_spi(tepi.pi,sgt.ref_gt,tumor_lhood+DIGT_SGRID::PRESTRAND_SIZE); // genomic site results: calculate_result_set_grid(normal_lhood, tumor_lhood, get_prior_set(sgt.ref_gt), _ln_som_match,_ln_som_mismatch, sgt.ref_gt, tier_rs[i]); #if 0 #ifdef ENABLE_POLY // polymorphic site results: assert(0); // still needs to be adapted for 2-tier system: calculate_result_set(normal_lhood,tumor_lhood, lnprior_polymorphic(sgt.ref_gt),sgt.ref_gt,sgt.poly); #else sgt.poly.snv_qphred = 0; #endif #endif #ifdef SOMATIC_DEBUG if((i==0) && (tier_rs[i].snv_qphred > 0)) { const somatic_snv_caller_strand_grid::prior_set& pset(get_prior_set(sgt.ref_gt)); const blt_float_t lnmatch(_ln_som_match); const blt_float_t lnmismatch(_ln_som_mismatch); log_os << "DUMP ON\n"; log_os << "tier1_qphred: " << tier_rs[0].snv_qphred << "\n"; // instead of dumping the entire distribution, we sort the lhood,prior,and prob to print out the N top values of each: std::vector<double> lhood(DDIGT_SGRID::SIZE); std::vector<double> prior(DDIGT_SGRID::SIZE); std::vector<double> post(DDIGT_SGRID::SIZE); // first get raw lhood: // for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt)); // unorm takes the role of the normal prior for the somatic case: // static const blt_float_t unorm(std::log(static_cast<blt_float_t>(DIGT_SGRID::PRESTRAND_SIZE))); //blt_float_t prior; //if(tgt==ngt) { prior=pset.normal[ngt]+lnmatch; } //else { prior=pset.somatic_marginal[ngt]+lnmismatch; } blt_float_t pr; if(tgt==ngt) { pr=pset.normal[ngt]+lnmatch; } else { pr=pset.somatic_marginal[ngt]+lnmismatch; } prior[dgt] = pr; lhood[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]; post[dgt] = lhood[dgt] + prior[dgt]; } } for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) { const unsigned dgt(DDIGT_SGRID::get_state(gt,gt)); lhood[dgt] = normal_lhood[gt]+tumor_lhood[gt]; prior[dgt] = pset.normal[gt]+lnmatch; post[dgt] = lhood[dgt] + prior[dgt]; } std::vector<double> lhood2(lhood); sort_n_dump("lhood_prior",lhood,prior,sgt.ref_gt); sort_n_dump("post_lhood",post,lhood2,sgt.ref_gt); log_os << "DUMP OFF\n"; } #endif } if((tier_rs[0].snv_qphred==0) || (is_tier2 && (tier_rs[1].snv_qphred==0))) return; sgt.snv_tier=0; sgt.snv_from_ntype_tier=0; if(is_tier2) { if(tier_rs[0].snv_qphred > tier_rs[1].snv_qphred) { sgt.snv_tier=1; } if(tier_rs[0].snv_from_ntype_qphred > tier_rs[1].snv_from_ntype_qphred) { sgt.snv_from_ntype_tier=1; } } sgt.rs=tier_rs[sgt.snv_from_ntype_tier]; if(is_tier2 && (tier_rs[0].ntype != tier_rs[1].ntype)) { // catch NTYPE conflict states: sgt.rs.ntype = NTYPE::CONFLICT; sgt.rs.snv_from_ntype_qphred = 0; } else { // classify NTYPE: // // convert diploid genotype into more limited ntype set: // if (sgt.rs.ntype==sgt.ref_gt) { sgt.rs.ntype=NTYPE::REF; } else if(DIGT::is_het(sgt.rs.ntype)) { sgt.rs.ntype=NTYPE::HET; } else { sgt.rs.ntype=NTYPE::HOM; } } sgt.rs.snv_qphred = tier_rs[sgt.snv_tier].snv_qphred; sgt.is_snv=((sgt.rs.snv_qphred != 0)); }
// Given the likelihood, go through the final computations to get the // posterior and derived values. // static void calculate_result_set_grid(const blt_float_t* normal_lhood, const blt_float_t* tumor_lhood, const somatic_snv_caller_strand_grid::prior_set& pset, const blt_float_t lnmatch, const blt_float_t lnmismatch, const unsigned /*ref_gt*/, result_set& rs) { // a piece transplanted from 1150 to make a formal correction to // the priors which should have a low-impact on the results. // the prior below is incomplete #ifdef DEBUG_ALTERNATE_PRIOR static const double neginf(-std::numeric_limits<double>::infinity()); std::vector<double> prior(DDIGT_SGRID::SIZE); std::fill(prior.begin(),prior.end(),neginf); // this zero'd code is incomplete and abandoned for now...: #if 0 for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { double base_prior(neginf); const bool is_noise(ngt>=STAR_DIINDEL::SIZE); if(is_noise) { base_prior=pset.normal[ngt]; } else { base_prior=pset.nonoise[ngt]; } for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch ); const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt)); prior[dgt] = normal_genomic_lnprior[ngt]+tgt_prior_mod; } } for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) { const unsigned dgt(DDIGT_SGRID::get_state(gt,gt)); prior[dgt] = normal_genomic_lnprior[gt]+lnmatch; } #endif check_ln_distro(prior.begin(), prior.end(), "somatic snv full prior"); #endif // intentionally use higher float res for this structure: std::vector<double> pprob(DDIGT_SGRID::SIZE); // mult by prior distro to get unnormalized pprob for states in // the regular grid model: // for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt)); #if 0 // the trusty old way...: const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch ); pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+pset.normal[ngt]+tgt_prior_mod; #else // unorm takes the role of the normal prior for the somatic case: // static const blt_float_t unorm(std::log(static_cast<blt_float_t>(DIGT_SGRID::PRESTRAND_SIZE))); blt_float_t prior; if(tgt==ngt) { prior=pset.normal[ngt]+lnmatch; } else { prior=pset.somatic_marginal[ngt]+lnmismatch; } pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+prior; #endif } } // Now add the single-strand noise states. note that these states // are unique in that we don't look for mixtures of somatic // variation with these noise states, b/c single-strand // observations can almost exclusively be ruled out as noise: // for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) { const unsigned dgt(DDIGT_SGRID::get_state(gt,gt)); pprob[dgt] = normal_lhood[gt]+tumor_lhood[gt]+pset.normal[gt]+lnmatch; } opt_normalize_ln_distro(pprob.begin(),pprob.end(),DDIGT_SGRID::is_nonsom.val.begin(),rs.max_gt); //normalize_ln_distro(pprob.begin(),pprob.end(),rs.max_gt); double nonsomatic_sum(0); for(unsigned gt(0); gt<DIGT_SGRID::SIZE; ++gt) { nonsomatic_sum += pprob[DDIGT_SGRID::get_state(gt,gt)]; } rs.snv_qphred=error_prob_to_qphred(nonsomatic_sum); if(0==rs.snv_qphred) return; #if 0 // alternate way to calculate the joint: // double min_not_somfrom_sum(0); for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) { double not_somfrom_sum(nonsomatic_sum); for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { // we're looking for the joint prob when state dgt is true // in the normal, so skip this as a normal state here: // if(dgt==ngt) continue; for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { // we've already started from the nonsomatic som, so we can skip the equal states: // if(ngt==tgt) continue; not_somfrom_sum += pprob[DDIGT_SGRID::get_state(ngt,tgt)]; } } if((dgt==0) || (!_somfrom_sum<min_not_somfrom_sum)) { min_not_somfrom_sum=not_somfrom_sum; rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum); rs.ntype=dgt; } } #endif #if 0 // reset max_gt to the most likely state excluding normal noise states: // rs.max_gt=0; for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) { for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const unsigned xgt(DDIGT_SGRID::get_state(dgt,tgt)); if(pprob[xgt] > pprob[rs.max_gt]) rs.max_gt=xgt; } } #endif // Calculate normal distribution alone so that we can classify this call: // // Polymorphic prior is used because in this situation we want to // be conservative about the reference classification -- // ie. conditioned on only looking at putative somatic sites, we // require evidence to show that the normal is in fact reference // and not simply an unsampled copy of the somatic variation. // std::vector<double> normal_pprob(DIGT_SGRID::PRESTRAND_SIZE); for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { normal_pprob[ngt] = normal_lhood[ngt]+pset.normal_poly[ngt]; } unsigned max_norm_gt(0); normalize_ln_distro(normal_pprob.begin(),normal_pprob.end(),max_norm_gt); // find the probability of max_norm_gt: const double ngt_prob(prob_comp(normal_pprob.begin(),normal_pprob.end(),max_norm_gt)); // (1-(1-a)(1-b)) -> a+b-(ab) double not_somfrom_sum(nonsomatic_sum+ngt_prob-(nonsomatic_sum*ngt_prob)); rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum); rs.ntype=max_norm_gt; }
double getL(double *L1) { int a,b,c,d,i,r; double L,la,la1,mu,mu1,d1,d2; L=prior(pa,L1); for(r=0; r<nr; r++) { a=res[r][0]; b=res[r][1]; c=res[r][2]; d=res[r][3]; la=fn(al[a]-be[b]+hh[0]); la1=fn1(al[a]-be[b]+hh[0]); //la2=fn2(al[a]-be[b]+hh[0]); mu=fn(al[b]-be[a]-hh[1]); mu1=fn1(al[b]-be[a]-hh[1]); //mu2=fn2(al[b]-be[a]-hh[1]); L+=-la+c*log(la)-lfac[c]-mu+d*log(mu)-lfac[d]; //L+=-log(phg[c])-log(pag[d]); //if(it==5000)printf("%12g --> %d\n%12 --> %d\n",la,c,mu,d); L1[a]+=(-1+c/la)*la1; L1[b]+=(-1+d/mu)*mu1; L1[nt+a]-=(-1+d/mu)*mu1; L1[nt+b]-=(-1+c/la)*la1; L1[2*nt]+=(-1+c/la)*la1; L1[2*nt+1]-=(-1+d/mu)*mu1; } d1=d2=0; for(i=0; i<nt; i++) { d1+=L1[i]; d2+=L1[nt+i]; } for(i=0; i<nt; i++) { L1[i]-=d1/nt; L1[nt+i]-=d2/nt; } return L; }
typename enable_if<is_interval_map<Type>, bool>::type contains(const Type& super, const typename Type::segment_type& sub_segment) { typedef typename Type::interval_type interval_type; typedef typename Type::const_iterator const_iterator; interval_type sub_interval = sub_segment.first; if(icl::is_empty(sub_interval)) return true; std::pair<const_iterator, const_iterator> exterior = super.equal_range(sub_interval); if(exterior.first == exterior.second) return false; const_iterator last_overlap = prior(exterior.second); if(!(sub_segment.second == exterior.first->second) ) return false; return icl::contains(hull(exterior.first->first, last_overlap->first), sub_interval) && Interval_Map::is_joinable(super, exterior.first, last_overlap); }