Esempio n. 1
0
void F77_FUNC_(rfftwnd_f77_complex_to_real,RFFTWND_F77_COMPLEX_TO_REAL)
(fftwnd_plan *p, int *howmany, fftw_complex *in, int *istride, int *idist,
 fftw_real *out, int *ostride, int *odist)
{
     rfftwnd_complex_to_real(*p,*howmany,in,*istride,*idist,
			     out,*ostride,*odist);
}
Esempio n. 2
0
/*
 * Class:     jfftw_real_nd_Plan
 * Method:    transform
 * Signature: (I[DII[DII)V
 */
JNIEXPORT void JNICALL Java_jfftw_real_nd_Plan_transform__I_3DII_3DII( JNIEnv *env, jobject obj, jint howmany, jdoubleArray in, jint istride, jint idist, jdoubleArray out, jint ostride, jint odist )
{
	jdouble *cin, *cout;
	int i;

	jclass clazz = (*env)->GetObjectClass( env, obj );
	jfieldID id = (*env)->GetFieldID( env, clazz, "plan", "[B" );
	jbyteArray arr = (jbyteArray)(*env)->GetObjectField( env, obj, id );
	unsigned char* carr = (*env)->GetByteArrayElements( env, arr, 0 );
	rfftwnd_plan plan = *(rfftwnd_plan*)carr;
	int length = 1;
	int clength = 2;
	for( i = 0; i < plan->rank; ++i ) length *= plan->plans[i]->n;
	for( i = 0; i < plan->rank; ++i ) clength *= plan->n[i];
	if( (howmany - 1) * idist + (plan->dir==FFTW_REAL_TO_COMPLEX?length:clength) != (*env)->GetArrayLength( env, in ) )
	if( (plan->dir==FFTW_REAL_TO_COMPLEX?length:clength) != (*env)->GetArrayLength( env, in ) )
	{
		(*env)->ThrowNew( env, (*env)->FindClass( env, "java/lang/IndexOutOfBoundsException" ), "the Plan was created for a different length (in)" );
		(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
		return;
	}
	if( (howmany - 1) * odist + (plan->dir==FFTW_REAL_TO_COMPLEX?clength:length) != (*env)->GetArrayLength( env, out ) )
	{
		(*env)->ThrowNew( env, (*env)->FindClass( env, "java/lang/IndexOutOfBoundsException" ), "the Plan was created for a different length (out)" );
		(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
		return;
	}

	cin = (*env)->GetDoubleArrayElements( env, in, 0 );
	cout = (*env)->GetDoubleArrayElements( env, out, 0 );

	if( plan->rank > 0 && ! plan->plans[0]->flags & FFTW_THREADSAFE )
	{
		// synchronization
		(*env)->MonitorEnter( env, obj );
	}
	if( plan->dir == FFTW_REAL_TO_COMPLEX )
	{
		rfftwnd_real_to_complex( plan, howmany, cin, istride, idist, (fftw_complex*)cout, ostride, odist );
	}
	else
	{
		rfftwnd_complex_to_real( plan, howmany, (fftw_complex*)cin, istride, idist, cout, ostride, odist );
	}
	if( plan->rank > 0 && ! plan->plans[0]->flags & FFTW_THREADSAFE )
	{
		// synchronization
		(*env)->MonitorExit( env, obj );
	}

	(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
	(*env)->ReleaseDoubleArrayElements( env, in, cin, 0 );
	(*env)->ReleaseDoubleArrayElements( env, out, cout, 0 );
}
Esempio n. 3
0
static void other_dims_aux(rfftwnd_mpi_plan p,
			  int n_fields, fftw_real *local_data)
{
     int local_nx = p->p_transpose->local_nx;
     int n_after_x = p->p_fft->n[0] * p->p_fft->n_after[0];
     
     if (n_fields > 1) {
	  rfftwnd_plan p_fft = p->p_fft;
	  int fft_iter;
	  if (p_fft->dir == FFTW_REAL_TO_COMPLEX)
	       for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
		    rfftwnd_real_to_complex(p_fft, n_fields,
				 local_data
				 + (2 * n_after_x * n_fields) * fft_iter,
				 n_fields, 1,
				 NULL, 0, 0);
	  else
	       for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
		    rfftwnd_complex_to_real(p_fft, n_fields,
				 ((fftw_complex *) local_data)
				 + (n_after_x * n_fields) * fft_iter,
				 n_fields, 1,
				 NULL, 0, 0);
     }
     else {
	  if (p->p_fft->dir == FFTW_REAL_TO_COMPLEX)
	       rfftwnd_real_to_complex(p->p_fft, local_nx,
				       local_data, 1, 2*n_after_x,
				       NULL, 0, 0);
	  else
	       rfftwnd_complex_to_real(p->p_fft, local_nx,
				       (fftw_complex *) local_data,
				       1, n_after_x,
				       NULL, 0, 0);
     }
}
Esempio n. 4
0
int increBoundary(void)
{
    /* External Variables */
    extern int Nx, Nz;
    extern fftw_complex ***CT;  /* 6-by-(3Nz/2)-by-(3*Nx/4+1) */
    extern mcomplex **Uxb, **Uzb;
    extern fftw_plan pf1, pf2;
    extern rfftwnd_plan pr1, pr2;
    extern double *Kx, *Kz;

    int x, i, z, idx;
    double norm, tmp1, tmp2, tmp3;
    fftw_real *RT;              /* real to complex transform */
    fftw_complex *fout = NULL;
    fftw_real *rout = NULL;

    idx = (3 * Nz / 2) * (3 * Nx / 2 + 2);
    RT = (fftw_real *) CT[0][0];
    norm = 1.0 / ((3. * Nx / 2.) * (3. * Nz / 2.));

    memset(CT[0][0], 0,
           MAXT * (3 * Nz / 2) * (3 * Nx / 4 + 1) * sizeof(fftw_complex));

    /* store Uxb hat and Uzb hat and w hat on CT for inverse FFT */
    for (z = 0; z < Nz / 2; ++z) {
        /* CT[0] store the data of Uxb, CT[1] storedata for Uzb */
        memcpy(CT[0][z], Uxb[z], (Nx / 2) * sizeof(fftw_complex));
        memcpy(CT[1][z], Uzb[z], (Nx / 2) * sizeof(fftw_complex));
        /*      for(x=0; x<Nx/2; ++x)
           {
           Re(CT[2][z][x])=1.0;
           Im(CT[2][z][x])=0.;
           } */
    }

    for (z = Nz / 2 + 1; z < Nz; ++z) {
        memcpy(CT[0][z + Nz / 2], Uxb[z], (Nx / 2) * sizeof(fftw_complex));
        memcpy(CT[1][z + Nz / 2], Uzb[z], (Nx / 2) * sizeof(fftw_complex));
        /*for(x=0; x<Nx/2; ++x)
           {
           Re(CT[2][z+Nz/2][x])=1.0;
           Im(CT[2][z+Nz/2][x])=0.;
           }
         */
    }

    Re(CT[2][1][1]) = 1.;
    //Re(CT[2][3*Nz/2-1][0])=1.;
    //Re(CT[2][0][0])=1.;

    //  Re(CT[2][0][0])=1.;


    /* inverse Fourier transform */
    for (i = 0; i < 3; ++i) {
        /* Each column of CT[i] */
        fftw(pf1, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1);

        /* Each row of CT[i] */
        rfftwnd_complex_to_real(pr1, 3 * Nz / 2, CT[i][0], 1,
                                3 * Nx / 4 + 1, rout, -1, -1);
    }

    /* compute (dux)*(w.n) and (duz)*(w.n) */
    for (z = 0; z < (3 * Nz / 2); ++z) {
        for (x = 0; x < 3 * Nx / 2; ++x) {
            RT[(z * (3 * Nx / 2 + 2) + x)] =
                RT[(z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx +
                                                    (z * (3 * Nx / 2 + 2) +
                                                     x)];
            RT[idx + (z * (3 * Nx / 2 + 2) + x)] =
                RT[idx + (z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx +
                                                          (z *
                                                           (3 * Nx / 2 +
                                                            2) + x)];
        }
    }


    /* Fourier transform to get Uxb hats and Uzb hats. */
    for (i = 0; i < 3; ++i) {

        /* Each row of RT[i] */
        rfftwnd_real_to_complex(pr2, 3 * Nz / 2, RT + (i * idx), 1,
                                3 * Nx / 2 + 2, fout, -1, -1);

        /* Each column of CT[i] */
        fftw(pf2, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1);

        /* constant of FFT */
        for (z = 0; z < Nz / 2; ++z) {
            for (x = 0; x < Nx / 2; ++x) {
                Re(CT[i][z][x]) = norm * Re(CT[i][z][x]);
                Im(CT[i][z][x]) = norm * Im(CT[i][z][x]);
            }
        }

        for (z = Nz + 1; z < 3 * Nz / 2; ++z) {
            for (x = 0; x < Nx / 2; ++x) {
                Re(CT[i][z][x]) = norm * Re(CT[i][z][x]);
                Im(CT[i][z][x]) = norm * Im(CT[i][z][x]);
            }
        }
    }

    /*put date back in array Uxb and Uzb */
    memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));
    memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));
    for (z = 0; z < Nz / 2; ++z) {
        memcpy(Uxb[z], CT[0][z], Nx / 2 * sizeof(fftw_complex));
        memcpy(Uzb[z], CT[1][z], Nx / 2 * sizeof(fftw_complex));
    }
    for (z = Nz + 1; z < 3 * Nz / 2; ++z) {
        memcpy(Uxb[z - Nz / 2], CT[0][z], Nx / 2 * sizeof(fftw_complex));
        memcpy(Uzb[z - Nz / 2], CT[1][z], Nx / 2 * sizeof(fftw_complex));
    }

    /* further computation to get c1, c2, c3, c4 as in the note and results are rewritten in
       Uxb and Uzb:
       c1=g hat=iKz*Uxb-iKx*Uzb;-------rewrittin in Uxb
       c2=du_y=-iKx*Uxb-iKz*Uzb;-------rewrittin in Uzb
       c3=U=Uxb(0,0);           -------rewritten in Uxb[0][0]
       c4=Uzb(0,0)              -------rewritten in Uzb[0][0] */

    for (z = 0; z < Nz; ++z) {
        for (x = 0; x < Nx / 2; ++x) {
            if (z * z + x * x > 0) {
                tmp1 = -Kz[z] * Im(Uxb[z][x]) + Kx[x] * Im(Uzb[z][x]);
                tmp2 = Kz[z] * Re(Uxb[z][x]) - Kx[x] * Re(Uzb[z][x]);
                tmp3 = Kx[x] * Im(Uxb[z][x]) + Kz[z] * Im(Uzb[z][x]);

                Im(Uzb[z][x]) =
                    -Kx[x] * Re(Uxb[z][x]) - Kz[z] * Re(Uzb[z][x]);
                Re(Uzb[z][x]) = tmp3;
                Re(Uxb[z][x]) = tmp1;
                Im(Uxb[z][x]) = tmp2;
            }
        }
    }


    return (NO_ERR);
}
Esempio n. 5
0
void rfftwnd_one_complex_to_real(fftwnd_plan p,
				 fftw_complex *in, fftw_real *out)
{
     rfftwnd_complex_to_real(p, 1, in, 1, 1, out, 1, 1);
}
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_real *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank - 1; ++i)
	  N *= sz.narray[i];

     N *= (sz.narray[i] + 2);

     in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real));

     if (specific) {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					      speed_flag | flags
					      | wisdom_flag | no_vector_flag,
					      in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan(sz.rank, sz.narray,
				     dir, speed_flag | flags
				     | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (rfftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields,
						in, howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     } else {
	  FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields,
						(fftw_complex *) in,
						howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     }

     rfftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)"
			" = %f\n", 0.5 * howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan,
		     int alternate_api, int specific)
{
     int istride, ostride, howmany;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real));
     out3 = in1;
     out1 = (fftw_complex *) in1;
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (alternate_api && specific && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  } else {
	       p = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  }
     } else if (specific) {
	  p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX,
					   flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
	  ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL,
					    flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
     } else if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL,
					flags);
	  } else {
	       p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL,
					flags);
	  }
     } else {
	  p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
	  ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     }

     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i)
	  out3[i] = 0;

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nhc * 2 + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  howmany = ostride = istride;

	  WHEN_VERBOSE(2, printf("\n    testing in-place stride %d...",
				 istride));

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
				       out1, ostride, 1);
	  else
	       rfftwnd_one_real_to_complex(p, in1, NULL);

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						ostride,
						out2 + i * nr, 1,
						nhc) < TOLERANCE,
			  "in-place (r2c): wrong answer");

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
				       out3, istride, 1);
	  else
	       rfftwnd_one_complex_to_real(ip, out1, NULL);

	  for (i = 0; i < nc * nhc * 2 * istride; ++i)
	       out3[i] *= 1.0 / N;

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error(out3 + i * nhc * 2 * istride + k,
					istride,
					(fftw_real *) (in2 + i * nr), 2,
					nr) < TOLERANCE,
			  "in-place (c2r): wrong answer (check 2)");
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE
					 * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
     ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nr + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }
	  for (i = 0; i < N * istride; ++i)
	       out3[i] = 0.0;

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       WHEN_VERBOSE(2, printf("\n    testing stride %d/%d...",
				      istride, ostride));

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
					    out1, ostride, 1);
	       else
		    rfftwnd_one_real_to_complex(p, in1, out1);

	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						     ostride,
						     out2 + i * nr, 1,
						     nhc) < TOLERANCE,
			       "out-of-place (r2c): wrong answer");

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
					    out3, istride, 1);
	       else
		    rfftwnd_one_complex_to_real(ip, out1, out3);

	       for (i = 0; i < N * istride; ++i)
		    out3[i] *= 1.0 / N;

	       if (istride == howmany)
		    CHECK(compute_error(out3, 1, in1, 1, N * istride)
			< TOLERANCE, "out-of-place (c2r): wrong answer");
	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error(out3 + i * nr * istride + k,
					     istride,
					 (fftw_real *) (in2 + i * nr), 2,
					     nr) < TOLERANCE,
			   "out-of-place (c2r): wrong answer (check 2)");
	  }
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out3);
     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Esempio n. 9
0
int comp_stat(int n)
{
 /* External Variables */
  extern int Nx, Nz, qpts, nums;
  extern mcomplex ****U,****IU, **GUxb,**GUzb, ishear;
  extern double **uu, **us, *W;
  extern FILE *fp, *fp2, *fp3, *fp4, *fp5, *fp6, *fp7, *fp8, *fp9, *fp10, *fp11;
    extern fftw_complex ***CT;
    extern fftw_plan pf1, pf2;
    extern fftw_plan Ipf1, Ipf2;
    extern rfftwnd_plan pr1, pr2;
    extern double *Kx, *Kz, **K2;
    extern double re, dt, tau, itau;
    int  x,i,j,y, z, idx;
    fftw_real *RT;     /* real to complex transform */
    fftw_complex *fout = NULL;
    fftw_real *rout = NULL;
    extern mcomplex ****U,****IU;
    double norm; 
   

    idx = (3*Nz/2)*(3*Nx/2+2);
    RT = (fftw_real *)CT[0][0];
    norm = 1.0 / ((3.*Nx/2.)*(3.*Nz/2.));
    double u1, u2, u3, u1u2, u1u3, u2u3, u1y;
    double iu1, iu2, iu3, iu1u2, iu1u3, iu2u3, u1u1, u2u2, u3u3, iu1u1, iu2u2, iu3u3;
    double au1, au2, au3, au1u2, au1u3, au2u3, au1u1, au2u2, au3u3;
    double iau1, iau2, iau3, iau1u2, iau1u3, iau2u3, iau1u1, iau2u2, iau3u3;

    tau+=Re(GUxb[0][0]);
    itau+=Re(ishear);
    /* for(j=0; j< qpts; j++)
      {
	uu[0][j] += Re(U[0][XEL][j][0]);
	uu[1][j] += Re(U[0][YEL][j][0]);
	uu[2][j] += Re(U[0][ZEL][j][0]);


	uu[3][j] += Re(IU[0][XEL][j][0]);
	uu[4][j] += Re(IU[0][YEL][j][0]);
	uu[5][j] += Re(IU[0][ZEL][j][0]);
	}*/

    if( (n+1) % 100 ==0)
      {
	/* record wall shear stress for state and incremental state */
	fprintf(fp7, "%f\n", tau/100.);
	tau=0;
	fprintf(fp8, "%f\n", itau/100.);
	itau=0;

	au1=0; au2=0; au3=0; au1u2=0; au2u3=0; au1u3=0; au1u1=0; au2u2=0; au3u3=0;
	iau1=0; iau2=0; iau3=0; iau1u2=0; iau2u3=0; iau1u3=0; iau1u1=0; iau2u2=0; iau3u3=0;

		nums=nums+1;
		/*fprintf(fp, "the time averge %d, %d\n", (n+1)-50, (n+1));
	for(j=0; j< qpts; j++)
	  {
	    fprintf(fp, "%d, %f %f %f  \n", j, uu[0][j]/50., uu[1][j]/50., uu[2][j]/50.);
	  }

	fprintf(fp2, "the time averge %d, %d\n", (n+1)-50, (n+1));
	for(j=0; j< qpts; j++)
	  {
	    fprintf(fp2, "%d, %f %f %f  \n", j, uu[3][j]/50., uu[4][j]/50., uu[5][j]/50.);
	    }

	    memset(uu[0], 0, 6*(qpts)*sizeof(double));*/

	memset(CT[0][0], 0,  MAXTT*(3*Nz/2)*(3*Nx/4+1)*sizeof(fftw_complex));
	for (z = 0; z < Nz/2; ++z)
	  {
	    memcpy(CT[0][z], GUxb[z], (Nx/2)*sizeof(fftw_complex));
	    memcpy(CT[1][z], GUzb[z], (Nx/2)*sizeof(fftw_complex));
	  }
	for (z = Nz/2+1; z < Nz; ++z)
	  {
	    memcpy(CT[0][z+Nz/2], GUxb[z], (Nx/2)*sizeof(fftw_complex));
	    memcpy(CT[1][z+Nz/2], GUzb[z], (Nx/2)*sizeof(fftw_complex));
	  }

	for (i = 0; i < 2; ++i)
	      {
		/* Each column of CT[i] */
		fftw(pf1, Nx/2, CT[i][0], 3*Nx/4+1, 1, fout, -1, -1);

		/* Each row of CT[i] */
		rfftwnd_complex_to_real(pr1, 3*Nz/2, CT[i][0], 1, 3*Nx/4+1, 
					rout, -1, -1);
	      }
	z=5;
	for(x=0; x< Nx/2; x++)
	  {
	    fprintf(fp2, "  %f  %f \n", RT[(z*(3*Nx/2+2)+x)], RT[idx+(z*(3*Nx/2+2)+x)]);
	  }

	z=20;
	for(x=0; x< Nx/2; x++)
	  {
	    fprintf(fp11, "  %f  %f \n", RT[(z*(3*Nx/2+2)+x)], RT[idx+(z*(3*Nx/2+2)+x)]);
	  }

	for (y = 0; y < qpts; ++y)
	  {
	    memset(CT[0][0], 0, MAXTT*(3*Nz/2)*(3*Nx/4+1)*sizeof(fftw_complex));

	    for (i = 0; i < 3; ++i)
	      {
		for (z = 0; z < Nz/2; ++z)
		  {
		    memcpy(CT[i][z], U[z][i][y], (Nx/2)*sizeof(fftw_complex));
		    memcpy(CT[i+3][z], IU[z][i][y], (Nx/2)*sizeof(fftw_complex));
		  }
		for (z = Nz/2+1; z < Nz; ++z)
		  {
		    memcpy(CT[i][z+Nz/2], U[z][i][y], (Nx/2)*sizeof(fftw_complex));
		    memcpy(CT[i+3][z+Nz/2], IU[z][i][y], (Nx/2)*sizeof(fftw_complex));
		  }
	      }
	    for (z = 0; z < Nz/2; ++z)
		  {
		    memcpy(CT[6][z], U[z][DXEL][y], (Nx/2)*sizeof(fftw_complex));
		  }
		for (z = Nz/2+1; z < Nz; ++z)
		  {
		    memcpy(CT[6][z+Nz/2], U[z][DXEL][y], (Nx/2)*sizeof(fftw_complex));
		  }
	    for (i = 0; i < 7; ++i)
	      {
		/* Each column of CT[i] */
		fftw(pf1, Nx/2, CT[i][0], 3*Nx/4+1, 1, fout, -1, -1);

		/* Each row of CT[i] */
		rfftwnd_complex_to_real(pr1, 3*Nz/2, CT[i][0], 1, 3*Nx/4+1, 
					rout, -1, -1);
	      }

	    u1=0;
	    u2=0;
	    u3=0;
	    u1y=0;
	    u1u2=0;
	    u1u3=0;
	    u2u3=0;
	    u1u1=0;
	    u2u2=0;
	    u3u3=0;
	    for (z = 0; z < (3*Nz/2); ++z)
	      {
		for (x = 0; x < 3*Nx/2; ++x)
		  {
		    u1 += RT[XEL*idx+(z*(3*Nx/2+2)+x)];
		    u2 += RT[YEL*idx+(z*(3*Nx/2+2)+x)];
		    u3 += RT[ZEL*idx+(z*(3*Nx/2+2)+x)];
		    u1y+=RT[6*idx+(z*(3*Nx/2+2)+x)];
		    u1u2 += RT[XEL*idx+(z*(3*Nx/2+2)+x)]* RT[YEL*idx+(z*(3*Nx/2+2)+x)];
		    u1u3 += RT[XEL*idx+(z*(3*Nx/2+2)+x)]* RT[ZEL*idx+(z*(3*Nx/2+2)+x)];
		    u2u3 += RT[YEL*idx+(z*(3*Nx/2+2)+x)]* RT[ZEL*idx+(z*(3*Nx/2+2)+x)];
		    u1u1 += RT[XEL*idx+(z*(3*Nx/2+2)+x)]* RT[XEL*idx+(z*(3*Nx/2+2)+x)];
		    u2u2 += RT[YEL*idx+(z*(3*Nx/2+2)+x)]* RT[YEL*idx+(z*(3*Nx/2+2)+x)];
		    u3u3 += RT[ZEL*idx+(z*(3*Nx/2+2)+x)]* RT[ZEL*idx+(z*(3*Nx/2+2)+x)];
		  }
	      }

	    u1=u1/(3*Nz/2)/(3*Nx/2);
	    u2=u2/(3*Nz/2)/(3*Nx/2);
	    u3=u3/(3*Nz/2)/(3*Nx/2);
	    u1y=u1y/(3*Nz/2)/(3*Nx/2);
	    u1u2=u1u2/(3*Nz/2)/(3*Nx/2);
	    u1u3=u1u3/(3*Nz/2)/(3*Nx/2);
	    u2u3=u2u3/(3*Nz/2)/(3*Nx/2);
	    u1u1=u1u1/(3*Nz/2)/(3*Nx/2);
	    u2u2=u2u2/(3*Nz/2)/(3*Nx/2);
	    u3u3=u3u3/(3*Nz/2)/(3*Nx/2);

	    us[0][y]+=u1;
	    us[1][y]+=u2;
	    us[2][y]+=u3;
	    us[3][y]+=u1u2;
	    us[4][y]+=u1u3;
	    us[5][y]+=u2u3;
	    us[12][y]+=u1u1;
	    us[13][y]+=u2u2;
	    us[14][y]+=u3u3;
	    us[18][y]+=u1y;
	    // fprintf(fp, "%d, %f %f %f  %f %f  %f %f %f %f\n", y, u1, u2, u3, u1u2, u1u3, u2u3, u1u1, u2u2, u3u3);

	     if(y==5)
	      {
		fprintf(fp3, " %f %f %f  %f %f  %f  %f  %f  %f %f\n", u1, u2, u3, u1u2, u1u3, u2u3, u1u1, u2u2, u3u3, 0.5*(u1u1+u2u2+u3u3));
	      }

	    if(y==40)
	      {
		fprintf(fp4, " %f %f %f  %f %f  %f  %f  %f  %f %f \n", u1, u2, u3, u1u2, u1u3, u2u3, u1u1, u2u2, u3u3, 0.5*(u1u1+u2u2+u3u3));
		}

	    iu1=0;
	    iu2=0;
	    iu3=0;
	    iu1u2=0;
	    iu1u3=0;
	    iu2u3=0;
	    iu1u1=0;
	    iu2u2=0;
	    iu3u3=0;
	    for (z = 0; z < (3*Nz/2); ++z)
	      {
		for (x = 0; x < 3*Nx/2; ++x)
		  {
		    iu1 += RT[(XEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu2 += RT[(YEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu3 += RT[(ZEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu1u2 += RT[(XEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(YEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu1u3 += RT[(XEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(ZEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu2u3 += RT[(YEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(ZEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu1u1 += RT[(XEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(XEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu2u2 += RT[(YEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(YEL+3)*idx+(z*(3*Nx/2+2)+x)];
		    iu3u3 += RT[(ZEL+3)*idx+(z*(3*Nx/2+2)+x)]* RT[(ZEL+3)*idx+(z*(3*Nx/2+2)+x)];
		  }
	      }

	    iu1=iu1/(3*Nz/2)/(3*Nx/2);
	    iu2=iu2/(3*Nz/2)/(3*Nx/2);
	    iu3=iu3/(3*Nz/2)/(3*Nx/2);
	    iu1u2=iu1u2/(3*Nz/2)/(3*Nx/2);
	    iu1u3=iu1u3/(3*Nz/2)/(3*Nx/2);
	    iu2u3=iu2u3/(3*Nz/2)/(3*Nx/2);
	    iu1u1=iu1u1/(3*Nz/2)/(3*Nx/2);
	    iu2u2=iu2u2/(3*Nz/2)/(3*Nx/2);
	    iu3u3=iu3u3/(3*Nz/2)/(3*Nx/2);

	    us[6][y]+=iu1;
	    us[7][y]+=iu2;
	    us[8][y]+=iu3;
	    us[9][y]+=iu1u2;
	    us[10][y]+=iu1u3;
	    us[11][y]+=iu2u3;
	    us[15][y]+=iu1u1;
	    us[16][y]+=iu2u2;
	    us[17][y]+=iu3u3;
	    //fprintf(fp2, "%d, %f %f %f  %f %f  %f %f  %f  %f \n", y, iu1, iu2, iu3, iu1u2, iu1u3, iu2u3, iu1u1, iu2u2, iu3u3);

	     if(y==5)
	      {
		fprintf(fp5, " %f %f %f  %f %f  %f %f  %f  %f  %f \n", iu1, iu2, iu3, iu1u2, iu1u3, iu2u3, iu1u1, iu2u2, iu3u3, 0.5*(iu1u1+iu2u2+iu3u3));
	      }
	    if(y==40)
	      {
		fprintf(fp6, " %f %f %f  %f %f  %f %f  %f  %f  %f\n", iu1, iu2, iu3, iu1u2, iu1u3, iu2u3, iu1u1, iu2u2, iu3u3,0.5*(iu1u1+iu2u2+iu3u3) );
		}

	    au1+=u1*W[y];     au2+=u2*W[y]; 	    au3+=u3*W[y];
	    au1u2+=u1u2*W[y];  au2u3+=u2u3*W[y];  au1u3+=u1u3*W[y];
	    au1u1+=u1u1*W[y];  au2u2+=u2u2*W[y];  au3u3+=u3u3*W[y];

	    iau1+=iu1*W[y];     iau2+=iu2*W[y]; 	    iau3+=iu3*W[y];
	    iau1u2+=iu1u2*W[y];  iau2u3+=iu2u3*W[y];  iau1u3+=iu1u3*W[y];
	    iau1u1+=iu1u1*W[y];  iau2u2+=iu2u2*W[y];  iau3u3+=iu3u3*W[y];
	  }
	fprintf(fp9, " %f %f %f  %f %f  %f  %f  %f  %f \n", au1, au2, au3, au1u2, au1u3, au2u3, au1u1, au2u2, au3u3);
	fprintf(fp10, " %f %f %f  %f %f  %f %f  %f  %f \n",  iau1, iau2, iau3, iau1u2, iau1u3, iau2u3, iau1u1, iau2u2, iau3u3);
      }

 return(NO_ERR);
}