double calcPhi(double tolerance, int n) {
  double val = pow(-1, n - 1) * (4 / n);
  if ( abs(val) < tolerance)
    return val;
    val + calcPhi(tolerance, 2*n + 1);
void LLVOWLSky::buildFanBuffer(LLStrider<LLVector3> & vertices,
							   LLStrider<LLVector2> & texCoords,
							   LLStrider<U16> & indices)
	const F32 RADIUS = LLWLParamManager::getInstance()->getDomeRadius();

	U32 i, num_slices;
	F32 phi0, theta, x0, y0, z0;

	// paranoia checking for SL-55986/SL-55833
	U32 count_verts = 0;
	U32 count_indices = 0;

	// apex
	*vertices++		= LLVector3(0.f, RADIUS, 0.f);
	*texCoords++	= LLVector2(0.5f, 0.5f);

	num_slices = getNumSlices();

	// and fan in a circle around the apex
	phi0 = calcPhi(1);
	for(i = 0; i < num_slices; ++i) {
		theta = 2.f * F_PI * float(i) / float(num_slices);

		// standard transformation from  spherical to
		// rectangular coordinates
		x0 = sin(phi0) * cos(theta);
		y0 = cos(phi0);
		z0 = sin(phi0) * sin(theta);

		*vertices++		= LLVector3(x0 * RADIUS, y0 * RADIUS, z0 * RADIUS);
		// generate planar uv coordinates
		// note: x and z are transposed in order for things to animate
		// correctly in the global coordinate system where +x is east and
		// +y is north
		*texCoords++	= LLVector2((-z0 + 1.f) / 2.f, (-x0 + 1.f) / 2.f);

		if (i > 0)
			*indices++ = 0;
			*indices++ = i;
			*indices++ = i+1;
			count_indices += 3;

	// the last vertex of the last triangle should wrap around to 
	// the beginning
	*indices++ = 0;
	*indices++ = num_slices;
	*indices++ = 1;
	count_indices += 3;

	// paranoia checking for SL-55986/SL-55833
	llassert(getFanNumVerts() == count_verts);
	llassert(getFanNumIndices() == count_indices);
obvious::Matrix RandomNormalMatching::match(obvious::Matrix* M,
    const bool* maskM,
    obvious::Matrix* NM,
    obvious::Matrix* S,
    const bool* maskS,
    double phiMax,
    const double transMax,
    const double resolution)
  obvious::Matrix TBest(3, 3);

  const int pointsInM = M->getRows();
  const int pointsInS = S->getRows();

  if(pointsInM != pointsInS)
    LOGMSG(DBG_ERROR, "Model and scene need to be of same size, size of M: " << pointsInM << ", size of S: " << pointsInS);
    return TBest;

  if(pointsInM < 3)
    LOGMSG(DBG_ERROR, "Model and scene contain too less points, size of M: " << pointsInM << ", size of S: " << pointsInS);
    return TBest;

  // ----------------- Model ------------------
  obvious::Matrix* NMpca = new Matrix(pointsInM, 2); // Normals for model
  double* phiM           = new double[pointsInM];    // Orientation of model points
  bool* maskMpca         = new bool[pointsInM];      // Validity mask of model points

  memcpy(maskMpca, maskM, pointsInM*sizeof(bool));

    calcPhi(NM, maskM, phiM);
  else // if normals are not supplied
    calcNormals(M, NMpca, maskM, maskMpca, _pcaSearchRange/2);
    calcPhi(NMpca, maskMpca, phiM);
  vector<unsigned int> idxMValid = extractSamples(M, maskMpca, _pcaSearchRange/2);

  initKDTree(M, idxMValid);
  // -------------------------------------------

  // ----------------- Scene -------------------
  obvious::Matrix* NSpca = new Matrix(pointsInS, 2); // Normals for scene
  double* phiS           = new double[pointsInS];    // Orientation of scene points
  bool* maskSpca         = new bool[pointsInS];      // Validity mask of scene points
  memcpy(maskSpca, maskS, pointsInS*sizeof(bool));

  // Determine number of valid samples in local scene neighborhood
  // only from these points a valid orientation is computable
  unsigned int validPoints = 0;
  for(int i=0; i<pointsInS; i++)
    if(maskSpca[i]) validPoints++;

  // Probability of point masking
  double probability = 180.0/(double)validPoints;
    subsampleMask(maskSpca, pointsInS, probability);

  calcNormals(S, NSpca, maskS, maskSpca, _pcaSearchRange/2);
  calcPhi(NSpca, maskSpca, phiS);

  vector<unsigned int> idxSValid = extractSamples(S, maskSpca, _pcaSearchRange/2);
  // -------------------------------------------

  // --------------- Control set ---------------
  vector<unsigned int> idxControl;  //represents the indices of points used for Control in S.
  obvious::Matrix* Control = pickControlSet(S, idxSValid, idxControl);
  obvious::Matrix* NControl = new obvious::Matrix(idxControl.size(), 2);
  for(unsigned int i=0; i<Control->getCols(); i++)
    (*NControl)(i, 0) = (*NSpca)(idxControl[i], 0);
    (*NControl)(i, 1) = (*NSpca)(idxControl[i], 1);
  unsigned int pointsInC = Control->getCols();
  unsigned int cntMatchThresh = pointsInC / 3; // TODO: Determine meaningful parameter
  double* phiControl = new double[pointsInC];  // Orientation of control points
  calcPhi(NControl, NULL, phiControl);
  // -------------------------------------------//

  // Determine frustum, i.e., direction of leftmost and rightmost model point
  double thetaBoundMin = atan2((*M)(idxMValid.front(),1), (*M)(idxMValid.front(),0)); // real bounding
  double thetaBoundMax = atan2((*M)(idxMValid.back(),1),  (*M)(idxMValid.back(),0));  // real bounding

  LOGMSG(DBG_DEBUG, "Valid points in scene: " << idxSValid.size() << ", valid points in model: " << idxMValid.size() << ", Control set: " << Control->getCols());
  LOGMSG(DBG_DEBUG, "Model phi min:: " << rad2deg(thetaBoundMin) << ", Model phi max: " << rad2deg(thetaBoundMax));

  if(idxSValid.size() < 3)
    LOGMSG(DBG_ERROR, "Too less valid points in scene, matchable size: " << idxSValid.size());
    return TBest;

  if(idxMValid.size() < 3)
    LOGMSG(DBG_ERROR, "Too less valid points in model, matchable size: " << idxMValid.size());
    return TBest;

  // Check for maximum meaningful trials
  unsigned int trials = _trials;
    trials = idxMValid.size();

    _trace->setModel(M, idxMValid);
    _trace->setScene(S, idxSValid);

  // Calculate search "radius", i.e., maximum difference in polar indices because of rotation
  phiMax = min(phiMax, M_PI * 0.5);
  int span;
  if(resolution > 1e-6)
    span = floor(phiMax / resolution);
    if(span > (int)pointsInM) span = (int)pointsInM;
    LOGMSG(DBG_ERROR, "Resolution not properly set: resolution = " << resolution);
    return TBest;

  srand (time(NULL));

  double       bestRatio = 0.0;
  unsigned int bestCnt   = 0;
  double       bestErr   = 1e12;

#ifndef DEBUG
  // trace is only possible for single threaded execution
    LOGMSG(DBG_WARN, "Configured single-threaded execution due to application of trace module");

  //Timer t;
  vector<unsigned int> idxTrials = idxMValid;
#pragma omp parallel
    bool* maskControl        = new bool[pointsInC];
    double* thetaControl     = new double[pointsInC];

#pragma omp for
    for(unsigned int trial = 0; trial < trials; trial++)

      int idx;
#pragma omp critical
        const int randIdx = rand() % (idxTrials.size());
        idx               = idxTrials[randIdx];

        // remove chosen element to avoid picking same index a second time
        idxTrials.erase(idxTrials.begin() + randIdx);

      // leftmost scene point
      const int iMin = max(idx-span, _pcaSearchRange/2);
      // rightmost scene point
      const int iMax = min(idx+span, pointsInS-_pcaSearchRange/2);

      for(int i=iMin; i<iMax; i++)

          double phi              = phiM[idx] - phiS[i];
          if(phi>M_PI)       phi -= 2.0*M_PI;
          else if(phi<-M_PI) phi += 2.0*M_PI;

          if(fabs(phi) < phiMax)
            obvious::Matrix T = obvious::MatrixFactory::TransformationMatrix33(phi, 0, 0);

            // Calculate translation
            const double sx = (*S)(i,0);
            const double sy = (*S)(i,1);
            T(0, 2) = (*M)(idx,0) - (T(0, 0) * sx + T(0, 1) * sy);
            T(1, 2) = (*M)(idx,1) - (T(1, 0) * sx + T(1, 1) * sy);

            // Transform control set
            obvious::Matrix STemp = T * (*Control);
            unsigned int pointsInControl = STemp.getCols();

            // Determine number of control points in field of view
            unsigned int maxCntMatch = 0;
            for(unsigned int j=0; j<pointsInControl; j++)
              thetaControl[j] = atan2(STemp(1, j), STemp(0, j));
              if(thetaControl[j]>thetaBoundMax || thetaControl[j]<thetaBoundMin)
                maskControl[j] = false;
                maskControl[j] = true;

            // Determine how many nearest neighbors (model <-> scene) are close enough
            unsigned int cntMatch = 0;
            flann::Matrix<int> indices(new int[1], 1, 1);
            flann::Matrix<double> dists(new double[1], 1, 1);
            double errSum = 0;
            //double scoreSum = 0.0;

            for(unsigned int s = 0; s < pointsInControl; s++)
              // clip points outside of model frustum

                // find nearest neighbor of control point
                double q[2];
                q[0] = STemp(0, s);
                q[1] = STemp(1, s);
                flann::Matrix<double> query(q, 1, 2);
                flann::SearchParams p(-1, 0.0);
                _index->knnSearch(query, indices, dists, 1, p);
                const int idxQuery = idxMValid[indices[0][0]];
                double distConsensus   = dists[0][0];
                // speeded-up NN search through back projection
                const int idxQuery = round((thetaControl[s]-thetaMin) / resolution);

                if(!maskM[idxQuery]) continue;

                double distX = (*M)(idxQuery, 0) - STemp(0, s);
                double distY = (*M)(idxQuery, 1) - STemp(1, s);
                double distConsensus  = distX*distX + distY*distY;

                // Experimental idea: rate matching results additionally with normal consensus
                // consensus score is in range [0, 1] -> perfect match = 0
                double normalConsensus = (1.0 - cos(phiM[idxQuery] - phiControl[s] - phi))/2.0;
                // Normalized error (weight distance and normal consensus)
                double err = distConsensus*_scaleDistance + normalConsensus*_scaleOrientation;
                double err = distConsensus*_scaleDistance;

                errSum += err;

            delete[] indices.ptr();
            delete[] dists.ptr();

            if(cntMatch <= cntMatchThresh)

            // Experimental rating
            double ratio = (double)cntMatch / (double) maxCntMatch;

#pragma omp critical
              // Rating from Markus Kuehn
              double equalThres = 1e-5;
              bool rateCondition = ((ratio-bestRatio) > equalThres) && (cntMatch > bestCnt);
              bool similarityCondition = fabs( (ratio-bestRatio) < equalThres ) && (cntMatch == bestCnt) && errSum < bestErr;
              bool goodMatch = rateCondition ||similarityCondition;

                bestRatio = ratio;
                bestCnt = cntMatch;
                bestErr = errSum;
                TBest = T;


              //trace is only possible for single threaded execution
              vector<unsigned int> idxM;
              vector<unsigned int> idxS;
              _trace->addAssignment(M, idxM, S, idxS, &STemp, errSum, trial);

          }// if phiMax
        } // if maskS
      } // for i
    } // for trials

    delete [] maskControl;

  } // OMP

  //cout << "elapsed: " << t.elapsed() << endl;

  delete NMpca;
  delete NSpca;
  delete [] phiM;
  delete [] phiS;
  delete [] phiControl;
  delete [] maskMpca;
  delete [] maskSpca;

  delete Control;

  return TBest;
void LLVOWLSky::buildStripsBuffer(U32 begin_stack, U32 end_stack,
								  LLStrider<LLVector3> & vertices,
								  LLStrider<LLVector2> & texCoords,
								  LLStrider<U16> & indices)
	const F32 RADIUS = LLWLParamManager::getInstance()->getDomeRadius();

	U32 i, j, num_slices, num_stacks;
	F32 phi0, theta, x0, y0, z0;

	// paranoia checking for SL-55986/SL-55833
	U32 count_verts = 0;
	U32 count_indices = 0;

	num_slices = getNumSlices();
	num_stacks = getNumStacks();

	llassert(end_stack <= num_stacks);

	// stacks are iterated one-indexed since phi(0) was handled by the fan above
	for(i = begin_stack + 1; i <= end_stack+1; ++i) 
		phi0 = calcPhi(i);

		for(j = 0; j < num_slices; ++j)
			theta = F_TWO_PI * (float(j) / float(num_slices));

			// standard transformation from  spherical to
			// rectangular coordinates
			x0 = sin(phi0) * cos(theta);
			y0 = cos(phi0);
			z0 = sin(phi0) * sin(theta);

			if (i == num_stacks-2)
				*vertices++ = LLVector3(x0*RADIUS, y0*RADIUS-1024.f*2.f, z0*RADIUS);
			else if (i == num_stacks-1)
				*vertices++ = LLVector3(0, y0*RADIUS-1024.f*2.f, 0);
				*vertices++		= LLVector3(x0 * RADIUS, y0 * RADIUS, z0 * RADIUS);

			// generate planar uv coordinates
			// note: x and z are transposed in order for things to animate
			// correctly in the global coordinate system where +x is east and
			// +y is north
			*texCoords++	= LLVector2((-z0 + 1.f) / 2.f, (-x0 + 1.f) / 2.f);

	//build triangle strip...
	*indices++ = 0 ;
	count_indices++ ;
	S32 k = 0 ;
	for(i = 1; i <= end_stack - begin_stack; ++i) 
		*indices++ = i * num_slices + k ;
		count_indices++ ;

		k = (k+1) % num_slices ;
		for(j = 0; j < num_slices ; ++j) 
			*indices++ = (i-1) * num_slices + k ;
			*indices++ = i * num_slices + k ;

			count_indices += 2 ;

			k = (k+1) % num_slices ;

		if((--k) < 0)
			k = num_slices - 1 ;

		*indices++ = i * num_slices + k ;
		count_indices++ ;
RootNodeT *
buildTree(int N, MatrixT Dorig, char **taxon)
     Initialize main variables 
  int i;
  RootNodeT *root;
  int Nleft, Nnext; /* number of Nodes left to be joined 
		       and the next index to be used */
  MatrixT b=matrix(N);      /* the $b_{i;j}$ matrix (eq 0.7) */ 
  /* $q(i)$ array: value which minimizes $R(i,q(i),j)\,\forall j\ne i,q(i)$ */
  int *q;
  int *q2;                  /* Second best value */ 
  VectorT R=vector(N);      /* $R(i,q(i))$ (eq 0.10) */
  VectorT LLR=vector(N);    /* $R(i,q(i),q2(i))$ */
  VectorT Zscore=vector(N); /* $z(i,q(i))$ */
    This auxilary matrices are globally defined in \|weighbor.h| we do
    this to make it simplier so we do not always have to pass these
    around. Note that the need to be visible here as we will be
    calling \|calcR| later in this function and \|calcR| needs these
  s       = matrix(N);      /* $s_{ij}$ eq 0.9 */
  deltaB  = matrix(N);      /* $\Delta b_{ij}$ eq 0.8 */
  delta2B = matrix(N);      /* $\Delta^2 b_{ij}$ */
    oldDeltaB = matrix(N);
     This will hold this orignal $N$ distances plus any distances from
     the $N-3$ internal nodes. Note we do not care about the root node
     so $N-3$ and not $N-2$
    This is the renormalization vector $c_i$ (eq 0.39) and matrix
    $c_{i;j}$ (eq 0.43 ver0.2.5); again it must
    be large enough to hold both the original and the new joined taxa
    N.B. \|vector| sets all elements to zero.
    This matrices hold the previous iterations values of $s_{ij}$,
    $\Delta b_{ij}$, etc. They are used to speed up the next
    iterations calcultions of these quantities.
  mS     = matrix(2*N-3);
  mDelB  = matrix(2*N-3);
  mDel2B = matrix(2*N-3);
    Init \|mS| to -1 to keep track of which entries have not yet been
    computed.  */
  for(i=0;i<2*N-3;++i) {
    int j;
      mS[i][j] = -1.0;
    Make a copy of the original distance matrix; embed it in the
    larger matrix which will hold the new distance from the added
    internal nodes of the tree.
  setMM(N, Dorig, mD);
    Allocate and initialize the \|q|, \|q2| and \|nodes| array. $2N-3$
    nodes to hold both the original and the added nodes.
  q = (int *)malloc(N*sizeof(int));
  if(!q) printError("build::buildTree:out of memory-q\n");
  q2 = (int *)malloc(N*sizeof(int));
  if(!q2) printError("build::buildTree:out of memory-q2\n");
  nodes = (NodeT **)malloc( (2*N-3)*sizeof(NodeT *));
  if(!nodes) printError("build::buildTree:out of memory-nodes");
  for(i=0;i<N;++i) {
    nodes[i] = createNode();
    nodes[i]->name = taxon[i];
    nodes[i]->ind  = i;
  Nleft = N;
  Nnext = N;
    \section{Loop until 3 taxa left}
    While we have more than 3 nodes left do the neighbor joining algorithm. 
    Each pass of the algorithm will join 2 nodes replacing them with one.
  while(Nleft>3) {
    int j, k, ip, ip2;
    double minR=0.0, min2R=0.0;
    NodeT *newNode, *tmpNode;
    double sigma_inf_i, sigma_inf_ip, sigma_inf_rat;
    double sig_r, sig_l;
    int jj, jjmin;
    double LLRp=0, tR, tmp;
    /* \subsection{Calculate Residual} */
    calc_q(Nleft, q, R, b, q2, LLR, Zscore);
	fprintf(outfile, "q[%d]=%d R(%d,%d)=%g\n",
		k, q[k], k, q[k], R[k]); 
      Find $i$ than minimizes $R(i,q(i))$. With the constraint that
      $q(q(i))=i$ first if no pair found then find the best $i$
      without this constraint.
      Note: the \|checkQQI| flag determines if we will use the
      $q(q(i))=i$ constraint.
      Note: j will hold the next best pair
    i = -1;
    j = -1;
    if(checkQQI) { 
	if(q[q[k]]==k) {
	  if(R[k]<minR || i==-1) {
		      "ij=%d,%d k=%d q[k]=%d minR = %.16g R[k] = %.16g\n",
		      i,j,k, q[k], minR, R[k]);
	    j = i;
	    min2R = minR;
	    i = k;
	    minR = R[k];
	  else if(R[k]>minR && (R[k]<min2R || j==-1) ) {
	    j = k;
	    min2R = R[k];
    if(i==-1) { /* No pair had $q(q(i))=i$ */
      if(R[0]<R[1]) {
	i = 0;
	minR = R[0];
	j = 1;
	min2R = R[1];
      } else {
	i = 1;
	minR = R[1];
	j = 0;
	min2R = R[0];
	if(R[k]<minR) {
	  j = i;
	  min2R = minR;
	  i = k;
	  minR = R[k];
	else if(R[k] < min2R && R[k] > minR) {
	  j = k;
	  min2R = R[k];
      if(checkQQI && printLevel>1)
	fprintf(outfile, "No pair with q[q[i]]==i ");
	if(q[q[i]]!=i && printLevel>1)
		  "The pair does not satisfy q[q[i]]==i (checking is off)"
    ip = q[i];
    ip2 = j;
      If the extended tournament option is set (-e) then run two 
      more tournaments for (i,q[i]) to see who really wins. 
    if(extendedTourn) {
      double minR1=0, minR2=0, tmpR, oldR=R[i];
      int jmin=-1, jpmin=-1;
	 First fine the j the minimizes R(i,j)
	if(j!=i && j!=q[i]) {
	    tmpR = calcR2(Nleft, i, j, q2[i], b);
	    tmpR = calcR2(Nleft, i, j, q[i], b);
	  if(tmpR<minR1 || jmin==-1)
	      jmin = j;
	 and now the $j'$ that minimizes $R(j',q[i])$
	if(j!=i && j!=q[i]) {
	    tmpR = calcR2(Nleft, j, q[i], q2[i], b);
	    tmpR = calcR2(Nleft, j, q[i], i, b);
	  if(tmpR<minR2 || jpmin==-1) {
	    jpmin = j;
	Now fnd which of the three is the smallest
      if(minR1<minR2 && minR1<R[i]) {
	ip = jmin;
		  "Extended Tournament New Winner(A): (%d, %d) R=%g\n",
		  i, ip, minR1);
      else if(minR2<minR1 && minR2<R[i]) {
	i = jpmin;
		  "Extended Tournament New Winner(B): (%d, %d) R=%g\n",
		  i, ip, minR2);
	fprintf(outfile, "R=%g, R1=%g, R2=%g\n", oldR, minR1, minR2);
      Find the $jj$ that minimizes $R(q(i),i,jj)$ and then print out 
      the LLR and LLR' values.
      if(jj!=i && jj!=ip 
	 && (((tR=calcR(Nleft, ip, jj, i))<LLRp) || jjmin==-1)) {
	jjmin = jj;
	LLRp = tR;
    LLRp *= 0.5;
    if( (LLR[i]<1e-6) && (LLRp<1e-6) ) {
      if(!warnFlag) {
		"warning: tie scores encountered; topology may depend on sequence order!\n");
	warnFlag = True;
      if(printLevel>1) {
		"warning: tie scores encountered; topology may depend on sequence order!\n");
	fprintf(outfile, "taxon %s and taxon %s\n\n",
		nodes[i]->name, nodes[ip]->name);
    if(printLevel>0) {
	      "\nJoin taxon %s to taxon %s (%s next best choice)\n",
	      nodes[i]->name, nodes[ip]->name, nodes[q2[i]]->name);
      fprintf(outfile, "     p-value = %g\n", 
	      DMAX(1.0/(exp(LLR[i])+1.0), 1.0/(exp(LLRp)+1.0)));
      if(printLevel>1) {
	fprintf(outfile,"\nJoin taxon %s to taxon %s; R=%g\n", 
		nodes[i]->name, nodes[ip]->name, minR);
	if(ip2!=-1 && ip2!=i && ip2!=ip)
	  fprintf(outfile, "Second best pair (%s, %s); R=%g\n",
		  nodes[ip2]->name, nodes[q[ip2]]->name, min2R);
	  fprintf(outfile, "No second best pair\n");
       Note due to the way we shuffle around nodes after joining:
       i->Nnext, New->i, ip<->Nleft-1, if ip is less than i and
       i=Nleft-1 then the new node will be in position ip not i!!
       But tc (the global that is suppose to point to the position
       of the new node for calcb) is set to i so this will screw us
       up. The simpliest solution is to make sure i<ip; swap if they
       are not.
    if(ip<i) {
      int tt;
      Need to calculate the new branch lengths $\bar b_{i;i'}$ and
      $\bar b_{i';i}$, eq. 0.19.
      Note if the z-score is negative then we calculate $\phi$ eq
      (0.26) and use it to renormalize $d_{i,i'}$ and recompute 
      $b_{i;i'}$ and $b_{i';i}$.
    if(Zscore[i]<0.0) {
      double phi_iip, dBar_iip;
      phi_iip = calcPhi(Nleft, i, ip);
	fprintf(outfile, "Renormalizing z[%d,%d] = %g\n", i, ip, Zscore[i]);
      if(phi_iip>0) {
	dBar_iip = D(i,ip)-phi_iip;
	  fprintf(outfile, "phi=%g dBar_iip=%g\n", phi_iip, dBar_iip);
	/* renormalize the b's */
	if( dBar_iip >= fabs(deltaB[i][ip]) )
	  b[i][ip] = (deltaB[i][ip] + dBar_iip)/2.0;
	else if( dBar_iip < -deltaB[i][ip] )
	  b[i][ip] = 0.0;
	  b[i][ip] = dBar_iip;
	if( dBar_iip >= fabs(deltaB[ip][i]) )
	  b[ip][i] = (deltaB[ip][i] + dBar_iip)/2.0;
	else if( dBar_iip < -deltaB[ip][i] )
	  b[ip][i] = 0.0;
	  b[ip][i] = dBar_iip;
    nodes[i ]->rho = b[i][ip];
    nodes[ip]->rho = b[ip][i];
    if(nodes[i ]->rho < 0.0)  {
		"WARNING: Negative branch length %g set to zero\n", 
		nodes[i ]->rho);
      nodes[i ]->rho = 0.0;
      nodes[ip]->rho = D(i,ip);
    else if(nodes[ip]->rho < 0.0) {
		"WARNING: Negative branch length %g set to zero\n", 
      nodes[ip]->rho = 0.0;
      nodes[i ]->rho = D(i,ip);
    if(printLevel>3) {
      fprintf(outfile, "\\bar b_[%d%d] = %g b_[%d%d]=%g\n",
	      i, ip, nodes[i]->rho, i, ip, b[i][ip]);
      fprintf(outfile, "\\bar b_[%d%d] = %g b_[%d%d]=%g\n\n",
	      ip, i, nodes[ip]->rho, ip, i, b[ip][i]);
    newNode = createNode();
    newNode->ind = Nnext;
    newNode->child_r = nodes[i];
    newNode->child_l = nodes[ip];
    newNode->name = nodes[i]->name;
    nodes[Nnext] = newNode;
      Calculate $\sigma^2_\infty(i\bar\imath)$ (eq. 0.27) for each
      of the joined taxa.
    sigma_inf_i  = 0.0;
    sigma_inf_ip = 0.0;
    for(j=0;j<Nleft;++j)  {
      if(j!=i && j!=ip) {
	  += sigma_na(DMAX(b[i][ip],MINB)+C(i), 
		      DMAX(D(i,j)-b[i][ip],MINB)+C(j) );
	  += sigma_na(DMAX(b[ip][i],MINB)+C(ip), 
		      DMAX(D(ip,j)-b[ip][i],MINB)+C(j) );
      Add \|EPSILON| here to make the following formulae a bit simplier
    sigma_inf_i  += EPSILON;
    sigma_inf_ip += EPSILON;
      Calculate the new distances from eq. 0.24
      d_{\bar\imath k} = {{(d_{ik}-b_{i;i'}+\phi_i)/\sigma^2_\infty(i\bar\imath)+
      {1\over\sigma^2_\infty(i'\bar\imath)} +
      $i=$ \|newNode->child_r->ind|,\hfill\break
      $i'=$ \|newNode->child_l->ind|,\hfill\break
      $b_{i;i'}=$ \|newNode->child_r->rho|,\hfill\break
      $b_{i';i}=$ \|newNode->child_l->rho|
      Also calcuate the renormalization terms $c_{i;j}$ (eq 0.43 ver0.2.5)
      and $c_i$
    for(j=0;j<Nleft;++j)  {
      if(j!=i && j!=ip) {
	/* $1/\sigma^2_\infty(i\bar\imath)+1/\sigma^2_\infty(i'\bar\imath)$ */
	double norm = 
	  1.0/( 1.0/sigma_inf_i + 1.0/sigma_inf_ip);
	  First calcuate the new distances
	D(Nnext,j) = D(j,Nnext) = 
	  norm *
	  D(Nnext,j) = D(j,Nnext) = 0.0;
    D(Nnext,Nnext) = 0.0;
      And now the new renormalization quantity $c_{\bar\imath}$
      N.B. eq 0.30 has been rewritten from
      {1\over{{1\over X}+{1\over Y}}}
      {XY\over X+Y}
      which is better behaved numerically when $X$ or $Y$ is
      small (and cheeper since it only has one division).
    sig_r = sigma2t(C(i)+DMAX(RHO(newNode->child_r), MINB));
    sig_l = sigma2t(C(ip)+DMAX(RHO(newNode->child_l), MINB));
    if(sigma_inf_i+sigma_inf_ip>0.0) {
      if(sigma_inf_i+sigma_inf_ip < .9*sqrt(DBL_MAX) && /* no overflow */
         sigma_inf_i+sigma_inf_ip > .9*sqrt(DBL_MIN))   /* no underflow */
	    (sig_r*SQR(sigma_inf_ip)+ sig_l*SQR(sigma_inf_i))
      else if(sigma_inf_ip > sigma_inf_i)       /* to avoid over/underflow */
	  sigma_inf_rat = sigma_inf_i / sigma_inf_ip;
	  tmp = 
	  sigma_inf_rat = sigma_inf_ip / sigma_inf_i;
	  tmp = 
      C(Nnext) = sigma2tinv( tmp  );
      C(Nnext) = sigma2tinv(0.0);
    /*      if(!
	    "RHO_R=%g C(i=%d)=%g sig_r=%g\nRHO_L=%g C(ip=%d)=%g sig_l=%g -- %g\n",
	    Nnext, C(Nnext),
	    RHO(newNode->child_r), i, C(i), sig_r,
	    RHO(newNode->child_l), ip, C(ip), sig_l,
      Swap $i$ node to the empty node at the end of the list and
      place the new node in position $i$ */
    nodes[Nnext] = nodes[i];
    nodes[i] = newNode;
      Swap the $ip$ node and the last node on the list this moves
      $ip$ to the end. When we decrease \|Nleft| by one there will be
      on less node and the two joined nodes $i$ and $ip$ will now be
      after then end (\|Nleft|) of the list
    tmpNode = nodes[ip];
    nodes[ip] = nodes[Nleft-1];
    nodes[Nleft-1] = tmpNode;
      In the new node set the child indecies to the
      new indexes of the the joined nodes. This info
      will be used by \|sigma2_3| in the renormalization
      Set up the \|ta|, \|tb| and \|tc| node array indices.  \|ta|
      and \|tb| point to the two taxa that where just joined, and
      \|tc| points to the newly created taxon.
      These globals will be used in the next call to \|calcb|.
    ta = Nnext;
    tb = Nleft - 1;
    tc = i;
       Print out the values of the various variables
    if(printLevel>2) {
      int a, b;
      fprintf(outfile, "\nReduced d_ij=\n");
	    fprintf(outfile,"%7.4g ", D(a,b));
    if(printLevel>3) {
      int a, b;
      for(a=0;a<Nnext;++a) {
	  fprintf(outfile,"%7.4g ", mD[a][b]);
      fprintf(outfile, "c_i = ");
      for(a=0;a<Nleft;++a) {
	fprintf(outfile,"%7.4g ", C(a));
      for(a=0;a<Nnext;++a) {
	fprintf(outfile,"%7.4g ", vC[a]);
      fprintf(outfile, "\n");
    \section{Final three taxa}
    Now there are just three taxa left. They will join to the root
    node of our tree. Find their branch lengths (which we can do
    exactly) and set up the root node to be passed back on return from
    this functin.
  root = createRootNode();
  if(!root) printError("build::buildTree:out of memory-root");
  root->child_l = nodes[0];
  root->child_m = nodes[1];
  root->child_r = nodes[2];
    Now get the root branch lengths. We can solve this exactly since
    we have three equations and three unknows. The equations to solve
    \rho_0+\rho_1 = d_{01},
    \rho_0+\rho_2 = d_{02},
    \rho_1+\rho_2 = d_{12}
    And the solution is:
    \rho_0={1 \over 2}\left(d_{01}+d_{02}-d_{12}\right),
    \rho_1={1 \over 2}\left(d_{01}-d_{02}+d_{12}\right),
    \rho_2={1 \over 2}\left(-d_{01}+d_{02}+d_{12}\right)
  root->child_l->rho = 0.5*( D(0,1)+D(0,2)-D(1,2));
  root->child_m->rho = 0.5*( D(0,1)-D(0,2)+D(1,2));
  root->child_r->rho = 0.5*(-D(0,1)+D(0,2)+D(1,2));
  /* check for negative lengths and set to zero if found and decrease
     the other each by half the the negative length (note + a neg
     number is a decrease) */
  if(root->child_l->rho < 0.0) {
    root->child_m->rho += 0.5*root->child_l->rho;
    root->child_r->rho += 0.5*root->child_l->rho;
  if(root->child_m->rho < 0.0) { 
    root->child_l->rho += 0.5*root->child_m->rho;
    root->child_r->rho += 0.5*root->child_m->rho;
  if(root->child_r->rho < 0.0) {
    root->child_l->rho += 0.5*root->child_r->rho;
    root->child_m->rho += 0.5*root->child_r->rho;
    Clean up
obvious::Matrix TSD_PDFMatching::match( const obvious::Matrix TSensor,
                                        const obvious::Matrix* M,
                                        const bool* maskM,
                                        const obvious::Matrix* NM,
                                        const obvious::Matrix* S,
                                        const bool* maskS,
                                        double phiMax,
                                        const double transMax,
                                        const double resolution)
  obvious::Matrix TBest(3, 3);

  const int pointsInM = M->getRows();
  const int pointsInS = S->getRows();

  if(pointsInM != pointsInS)
    LOGMSG(DBG_ERROR, "Model and scene need to be of same size, size of M: " << pointsInM << ", size of S: " << pointsInS);
    return TBest;

  if(pointsInM < 3)
    LOGMSG(DBG_ERROR, "Model and scene contain too less points, size of M: " << pointsInM << ", size of S: " << pointsInS);
    return TBest;

  // ----------------- Model ------------------
  obvious::Matrix* NMpca = new Matrix(pointsInM, 2);  // Normals for model
  double* phiM = new double[pointsInM];    // Orientation of model points
  bool* maskMpca = new bool[pointsInM];      // Validity mask of model points

  memcpy(maskMpca, maskM, pointsInM * sizeof(bool));

    calcPhi(NM, maskM, phiM);
  else  // if normals are not supplied
    calcNormals(M, NMpca, maskM, maskMpca, _pcaSearchRange/2);
    calcPhi(NMpca, maskMpca, phiM);
  vector<unsigned int> idxMValid = extractSamples(M, maskMpca, _pcaSearchRange / 2);

  // -------------------------------------------

  // ----------------- Scene -------------------
  obvious::Matrix* NSpca = new Matrix(pointsInS, 2);  // Normals for scene
  double* phiS = new double[pointsInS];    // Orientation of scene points
  bool* maskSpca = new bool[pointsInS];      // Validity mask of scene points
  memcpy(maskSpca, maskS, pointsInS * sizeof(bool));

  // Determine number of valid samples in local scene neighborhood
  // only from these points a valid orientation is computable
  unsigned int validPoints = 0;
  for(int i = 0; i < pointsInS; i++)

  // Probability of point masking
  double probability = 180.0 / (double)validPoints;
  if(probability < 0.99)
    subsampleMask(maskSpca, pointsInS, probability);

  calcNormals(S, NSpca, maskS, maskSpca, _pcaSearchRange/2);
  calcPhi(NSpca, maskSpca, phiS);

  vector<unsigned int> idxSValid = extractSamples(S, maskSpca, _pcaSearchRange / 2);
  // -------------------------------------------

  // --------------- Control set ---------------
  vector<unsigned int> idxControl;  //represents the indices of points used for Control in S.
  obvious::Matrix* Control = pickControlSet(S, idxSValid, idxControl);
  obvious::Matrix* NControl = new obvious::Matrix(idxControl.size(), 2);
  for(unsigned int i = 0; i < Control->getCols(); i++)
    (*NControl)(i, 0) = (*NSpca)(idxControl[i], 0);
    (*NControl)(i, 1) = (*NSpca)(idxControl[i], 1);
  unsigned int pointsInC = Control->getCols();
  double* phiControl = new double[pointsInC];  // Orientation of control points
  calcPhi(NControl, NULL, phiControl);
  // -------------------------------------------//

  // Determine frustum, i.e., direction of leftmost and rightmost model point
  //double thetaMin = -((double)pointsInM - 1.0) / 2.0 * resolution;  // theoretical bounding
  double thetaBoundMin = atan2((*M)(idxMValid.front(), 1), (*M)(idxMValid.front(), 0));  // real bounding
  double thetaBoundMax = atan2((*M)(idxMValid.back(), 1), (*M)(idxMValid.back(), 0));  // real bounding

  LOGMSG(DBG_DEBUG, "Valid points in scene: " << idxSValid.size() << ", valid points in model: " << idxMValid.size() << ", Control set: " << Control->getCols());
  LOGMSG(DBG_DEBUG, "Model phi min:: " << rad2deg(thetaBoundMin) << ", Model phi max: " << rad2deg(thetaBoundMax));

  if(idxSValid.size() < 3)
    LOGMSG(DBG_ERROR, "Too less valid points in scene, matchable size: " << idxSValid.size());
    return TBest;

  if(idxMValid.size() < 3)
    LOGMSG(DBG_ERROR, "Too less valid points in model, matchable size: " << idxMValid.size());
    return TBest;

  // Check for maximum meaningful trials
  unsigned int trials = _trials;
  if(idxMValid.size() < _trials)
    trials = idxMValid.size();

    _trace->setModel(M, idxMValid);
    _trace->setScene(S, idxSValid);

  // Calculate search "radius", i.e., maximum difference in polar indices because of rotation
  phiMax = min(phiMax, M_PI * 0.5);
  int span;
  if(resolution > 1e-6)
    span = floor(phiMax / resolution);
    if(span > (int)pointsInM)
      span = (int)pointsInM;
    LOGMSG(DBG_ERROR, "Resolution not properly set: resolution = " << resolution);
    return TBest;


  double bestProb = 0.0;

#ifndef DEBUG
  // trace is only possible for single threaded execution
    LOGMSG(DBG_WARN, "Configured single-threaded execution due to application of trace module");

  //Timer t;
  vector<unsigned int> idxTrials = idxMValid;

  bool* maskControl = new bool[pointsInC];

#pragma omp parallel for
  for(unsigned int trial = 0; trial < trials; trial++)

    int idx;
#pragma omp critical
      const int randIdx = rand() % (idxTrials.size());
      idx = idxTrials[randIdx];

      // remove chosen element to avoid picking same index a second time
      idxTrials.erase(idxTrials.begin() + randIdx);

    // leftmost scene point
    const int iMin = max(idx - span, _pcaSearchRange / 2);
    // rightmost scene point
    const int iMax = min(idx + span, pointsInS - _pcaSearchRange / 2);

    for(int i = iMin; i < iMax; i++)

        double phi = phiM[idx] - phiS[i];
        if(phi > M_PI)
          phi -= 2.0 * M_PI;
        else if(phi < -M_PI)
          phi += 2.0 * M_PI;

        if(fabs(phi) < phiMax)
          obvious::Matrix T = obvious::MatrixFactory::TransformationMatrix33(phi, 0, 0);

          // Calculate translation
          const double sx = (*S)(i, 0);
          const double sy = (*S)(i, 1);
          T(0, 2) = (*M)(idx, 0) - (T(0, 0) * sx + T(0, 1) * sy);
          T(1, 2) = (*M)(idx, 1) - (T(1, 0) * sx + T(1, 1) * sy);

          obvious::Matrix TMap = TSensor * T;

          // Transform control set
          obvious::Matrix STemp = TMap * (*Control);
          unsigned int pointsInControl = STemp.getCols();

          // Rating Daniel Ammon & Tobias Fink
          std::vector<double> probOfAllScans;  // vector for probabilities of single scans in one measurement
          double probOfActualMeasurement = 1.0;

          for (unsigned int s = 0; s < pointsInControl; s++)	// whole control set
            obfloat coord[2];
            coord[0] = STemp(0, s);
            coord[1] = STemp(1, s);

            // todo: magic numbers 0.05 / 0.95
            obfloat tsd;
            if( !_grid.interpolateBilinear(coord, &tsd) )
              // rating function: clipped probability --> avoid prob of 0
              // multiply all probabilities for probability of whole scan
              probOfActualMeasurement *= (1.0 - (1.0 - _zrand) * fabs(tsd));
              probOfActualMeasurement *= _zrand;
          }  // whole control set

#pragma omp critical
          // update T and bestProb if better than last iteration
          if(probOfActualMeasurement > bestProb)
            TBest = T;
            bestProb = probOfActualMeasurement;

#ifndef DEBUG
              //trace is only possible for single threaded execution
              vector<unsigned int> idxM;
              vector<unsigned int> idxS;
              _trace->addAssignment(M, idxM, S, idxS, &STemp, 10 * probOfActualMeasurement, trial);
        } // if(fabs(phi) < phiMax)
      } // if(maskSpca[i])
    }  // for i
  }  // for trials

  //cout << "elapsed: " << t.elapsed() << endl;

  delete [] maskControl;
  delete    NMpca;
  delete    NSpca;
  delete [] phiM;
  delete [] phiS;
  delete [] phiControl;
  delete [] maskMpca;
  delete [] maskSpca;

  delete Control;

  return TBest;