/**
 * compute the H1 norm of the coeffs
 * h1 = [ Sum ( (\el^2 \lambda_{m,n}^2) / r_0^2 ) + 1 ) | A_mn | ^2 ] ^{1/2}
 */
double compute_h1(int mmax, int nmax, gsl_matrix* AmnRe, gsl_matrix* AmnIm)
{
  double mod = 0.0;
  double htemp = 0.0;
  
  int i, j;
  /* this is the cutoff radius you should check this if you adjust the xmin */
  double rzero = 1.0; 
  double ell = 1.0;
  double lam = 0.0;
  

  int mtemp, ntemp = 0;
  
  /* loop over all the coeffs*/
  for(i = 0; i < (2*mmax+1); i++){
    for(j = 0; j < nmax; j++){
      // the |Amn|^2 term
      mod = (pow(gsl_matrix_get(AmnRe, i, j),2.0) + pow(gsl_matrix_get(AmnIm, i,j), 2.0));
      // the appropriate bessel fn zero
      ntemp = j + 1;
      mtemp = -1.0 * mmax + i;
      lam = gsl_sf_bessel_zero_Jnu(fabs(mtemp), ntemp);
      htemp += ((lam*lam *ell*ell / rzero*rzero) + 1) * mod;
    }
  }
  return(sqrt(htemp));
}
/**
 * Compute the fourier decomposition of array to -m:m in angular components and 1:nmax in radial components
 * 
 * Note that the gridding scheme used here is defined on [-1..1] x [-1..1], the cmx and cmy specified here should
 * be given in these units. The function "compute_com" defined below can be used to do this.
 * 
 * @arg mmax - largest angular moment computed
 * @arg nmax - largest radial moment computed
 * @arg array - 2d matrix (npts x npts) of energy density in the event
 * @arg npts - number of points in in the array
 * @arg AmnReal - 2d matrix ((2*mmax+1) x nmax), filled with Real parts of the coeffs on return
 * @arg AmnIm - 2d matrix ((2*mmax+1) x nmax), filled with Im parts of the coeffs on return
 * @arg cmx - x location of the CM of the event
 * @arg cmy - y location of the CM of the event
 */ 
void compute_amn(int mmax, int nmax, gsl_matrix *array, int npts, gsl_matrix* AmnReal, gsl_matrix* AmnIm, double cmx, double cmy)
{
  int i,j,k,l;
  int nm, nn;
  int mtemp, ntemp;
  double dx;// = 2/((double)npts-1);
  double dxy;// = 4/pow(((double)npts-1),2.0);
  double xv, yv;
  double coeff = 0;
  double phiMod, phiRe, phiIm;
  double ftemp;
  double AmnRealAcc, AmnImAcc;
  // for compensated summation
  double alphaRe, alphaIm;
  double epsRe, epsIm;

  double rzero = fabs(xmin);
  
  dx = 2*rzero/((double)npts-1);
  dxy = 4*pow(rzero,2.0)/pow(((double)npts-1),2.0);
  //printf("# xmin: %lf dx: %lf dxy: %lf\n", xmin, dx, dxy);
  
  gsl_vector *xvec = gsl_vector_alloc(npts);
  gsl_matrix * rMat = gsl_matrix_alloc(npts, npts);
  gsl_matrix * thMat = gsl_matrix_alloc(npts, npts);
  gsl_matrix *lamMat = NULL;

  gsl_matrix_set_zero(rMat);
  gsl_matrix_set_zero(thMat);
  gsl_vector_set_zero(xvec);

  nm = 2*mmax+1;
  nn = nmax;

  lamMat = gsl_matrix_alloc(nm, nn);

  // fill in r and Theta matrices
  for(i = 0; i < npts; i++)
    gsl_vector_set(xvec ,i, xmin + dx*i);
  
  for(i = 0; i < npts; i++){      
    xv = gsl_vector_get(xvec, i);
    for(j = 0; j < npts;j ++){
      yv = gsl_vector_get(xvec, j);
      gsl_matrix_set(rMat, i, j, sqrt((xv-cmx)*(xv-cmx) + (yv-cmy)*(yv-cmy)));
      gsl_matrix_set(thMat, i, j, atan2((yv-cmy), (xv-cmx)));
    }
  }

  // fill in lambda matrix
  for(i=0; i < nm; i++){
    for(j = 0; j < nn; j++){
      ntemp = j + 1;
      mtemp = -1.0 * mmax + i;
      gsl_matrix_set(lamMat, i, j, gsl_sf_bessel_zero_Jnu(fabs(mtemp), ntemp));
    }
  }
  
  for(i = 0; i < nm; i++){
    for(j = 0; j < nn; j++){
      AmnImAcc = 0.0;
      AmnRealAcc = 0.0;
      epsRe = 0.0;
      epsIm = 0.0;
      
      ntemp = j + 1;
      mtemp = -1.0*mmax + i;
      // note that we have to scale the coeff by rzero, then the system is properly scale invariant
      coeff = pow(rzero,2)*sqrt(M_PI)*gsl_sf_bessel_Jn(fabs(mtemp)+1, gsl_matrix_get(lamMat, i, j));
      
      // now loop over the grid, a lot
      for(k = 0; k < npts; k++){
        for(l = 0; l < npts; l++){
          phiMod = gsl_sf_bessel_Jn(mtemp, gsl_matrix_get(lamMat, i, j)*gsl_matrix_get(rMat, k, l)/rzero) / coeff;
          phiRe = phiMod * cos(mtemp*gsl_matrix_get(thMat, k, l));
          phiIm = phiMod * sin(mtemp*gsl_matrix_get(thMat, k, l));
          ftemp = gsl_matrix_get(array, k, l);

          /* kahan compensated summation (http://en.wikipedia.org/wiki/Kahan_summation_algorithm)
           * we're adding up a lot of little numbers here
           * this trick keeps accumulation errors from, well, accumulating
           */
          alphaRe = AmnRealAcc;
          epsRe += ftemp * phiRe;
          AmnRealAcc = alphaRe + epsRe;
          epsRe += (alphaRe - AmnRealAcc);

          alphaIm = AmnImAcc;
          epsIm += ftemp * phiIm;
          AmnImAcc = alphaIm + epsIm;
          epsIm += (alphaIm - AmnImAcc);
        }
      }
      // and save the coeffs
      gsl_matrix_set(AmnReal, i, j, AmnRealAcc*dxy);
      gsl_matrix_set(AmnIm, i, j, -1.0 * AmnImAcc*dxy);
    }
  }

  gsl_matrix_free(rMat);
  gsl_matrix_free(thMat);
  gsl_vector_free(xvec);
  gsl_matrix_free(lamMat);
}
const Real GreensFunction2DAbs::p_int_theta_second(const Real r,
                                                   const Real theta,
                                                   const Real t) const
{
    const Real r_0(this->getr0());
    const Real a(this->geta());
    const Real minusDt(-1e0 * this->getD() * t);

    const Integer num_in_term_use(100);
    const Integer num_out_term_use(100);
    const Real threshold(CUTOFF);

    Real sum(0e0);
    Real term(0e0);
    Integer n(1);
    for(; n < num_out_term_use; ++n)
    {
        Real in_sum(0e0);
        Real in_term(0e0);
        Real in_term1(0e0);
        Real in_term2(0e0);
        Real in_term3(0e0);

        Real a_alpha_mn(0e0);
        Real alpha_mn(0e0);
        Real Jn_r_alpha_mn(0e0);
        Real Jn_r0_alpha_mn(0e0);
        Real Jn_d_1_a_alpha_mn(0e0);// J_n-1(a alpha_mn)
        Real Jn_p_1_a_alpha_mn(0e0);// J_n+1(a alpha_mn)

        Real n_real(static_cast<double>(n));
        int n_int(static_cast<int>(n));
        Integer m(1);

        for(; m < num_in_term_use; ++m)
        {
            a_alpha_mn = gsl_sf_bessel_zero_Jnu(n_real, m);
            alpha_mn = a_alpha_mn / a;
            Jn_r_alpha_mn     = gsl_sf_bessel_Jn(n_int, r * alpha_mn);
            Jn_r0_alpha_mn    = gsl_sf_bessel_Jn(n_int, r_0 * alpha_mn);
            Jn_d_1_a_alpha_mn = gsl_sf_bessel_Jn(n_int - 1, a_alpha_mn);
            Jn_p_1_a_alpha_mn = gsl_sf_bessel_Jn(n_int + 1, a_alpha_mn);

            in_term1 = std::exp(alpha_mn * alpha_mn * minusDt);
            in_term2 = Jn_r_alpha_mn * Jn_r0_alpha_mn;
            in_term3 = Jn_d_1_a_alpha_mn - Jn_p_1_a_alpha_mn;

            in_term = in_term1 * in_term2 / (in_term3 * in_term3);
            in_sum += in_term;

//                 std::cout << "inner sum " << in_sum << ", term" << in_term << std::endl;

            if(fabs(in_term/in_sum) < threshold)
            {
//                     std::cout << "normal exit. m = " << m << " second term" << std::endl;
                break;
            }
        }
        if(m == num_in_term_use)
            std::cout << "warning: use term over num_in_term_use" << std::endl;

//             term = in_sum * std::cos(n_real * theta);
        term = in_sum * std::sin(n_real * theta) / n_real;
        sum += term;

//             std::cout << "outer sum " << sum << ", term" << term << std::endl;

        if(fabs(in_sum / (n_real * sum)) < threshold)
        {
            /* if n * theta is a multiple of \pi, the term may be zero and *
             * term/sum become also zero. this is a problem. sin is in a   *
             * regeon [-1, 1], so the order of term does not depend on     *
             * value of sin, so this considers only (in_sum / n_real).     */

//                 std::cout << "normal exit. n = " << n << " second term" << std::endl;
            break;
        }
    }
    if(n == num_out_term_use)
        std::cout << "warning: use term over num_out_term_use" << std::endl;

    return (8e0 * sum / (M_PI * a * a));
}