示例#1
0
// Read status register
uint8_t Sensirion::readSR(uint8_t *result) {
  uint8_t val;
  uint8_t error = 0;
#ifdef CRC_ENA
  _crc = bitrev(_stat_reg & SR_MASK);  // Initialize CRC calculation
#endif
  startTransmission();
  if (error = putByte(STAT_REG_R)) {
    *result = 0xFF;
    return error;
  }
#ifdef CRC_ENA
  calcCRC(STAT_REG_R, &_crc);       // Include command byte in CRC calculation
  *result = getByte(ACK);
  calcCRC(*result, &_crc);
  val = getByte(noACK);
  val = bitrev(val);
  if (val != _crc) {
    *result = 0xFF;
    error = S_Err_CRC;
  }
#else
  *result = getByte(noACK);
#endif
  return error;
}
示例#2
0
void init(void)
{
	for(n = 0; n < (FFT_LENGTH - (ORDER - 1)); n++)
	{
		PING[n].re = 0;
		PING[n].im = 0;

		PONG[n].re = 0;
		PONG[n].im = 0;

		outputPING[n] = 0;
		outputPONG[n] = 0;
	}
	for(n = 0; n < FFT_LENGTH; n++)
	{
		if(n >= ORDER)
		{
			h[n].re = 0;
			h[n].im = 0;
		}
		else
		{
			h[n].re = B[n];
			h[n].im = 0;
		}
		V[n].re = 0;
		V[n].im = 0;

		pingU[n].re = 0;
		pingU[n].im = 0;

		pongU[n].re = 0;
		pongU[0].im = 0;
	}
	for(n = 0; n < ORDER - 1; n++){
		Z[n].re = 0;
		Z[n].im = 0;
	}

	// compute first N/2 twiddle factors
	for(n=0;n<FFT_LENGTH/RADIX;n++){
		w[n].re = cos(DELTA*n);
		w[n].im = sin(DELTA*n);		// negative imag component
	}

	for (n=0; n <FFT_LENGTH/2; n++)
	  iw[n] = -1;

	for (n=0; n<FFT_LENGTH; n++)
	  ix[n] = -1;

	digitrev_index(iw, FFT_LENGTH/RADIX, RADIX);
	bitrev(w, iw, FFT_LENGTH/RADIX);

	cfftr2_dit(h, w, FFT_LENGTH);
	digitrev_index(ix, FFT_LENGTH/RADIX, RADIX);
	bitrev(h, ix, FFT_LENGTH/RADIX);
}
void matrix_print(void)
{
    print("\nr/c 01234567\n");
    for (uint8_t row = 0; row < matrix_rows(); row++) {
        xprintf("%02X: %08b\n", row, bitrev(matrix_get_row(row)));
    }
}
示例#4
0
void matrix_print(void)
{
#if (MATRIX_COLS <= 8)
    print("r/c 01234567\n");
#elif (MATRIX_COLS <= 16)
    print("r/c 0123456789ABCDEF\n");
#elif (MATRIX_COLS <= 32)
    print("r/c 0123456789ABCDEF0123456789ABCDEF\n");
#endif

    for (uint8_t row = 0; row < MATRIX_ROWS; row++) {

#if (MATRIX_COLS <= 8)
        xprintf("%02X: %08b%s\n", row, bitrev(matrix_get_row(row)),
#elif (MATRIX_COLS <= 16)
        xprintf("%02X: %016b%s\n", row, bitrev16(matrix_get_row(row)),
#elif (MATRIX_COLS <= 32)
        xprintf("%02X: %032b%s\n", row, bitrev32(matrix_get_row(row)),
#endif
#ifdef MATRIX_HAS_GHOST
        matrix_has_ghost_in_row(row) ?  " <ghost" : ""
#else
        ""
#endif
        );
    }
}
示例#5
0
void eefft_ifft(float* dst_re, float* dst_im, float* src_re, float* src_im,
    int power)
{
    int k, i, N;
    float rcp;
    N = pow(2, power);
    rcp = 1.0 / N;
    bitrev(src_re, src_im, power);
    
    for(i = 0; i < N; i ++)
        buf_im[i] *= -1;
    
    switch(power)
    {
        defsprdx(1);
        defsprdx(2);
        defsprdx(3);
        loopcall(defsprdx);
        default:
        break;
    }
    
    for(i = 0; i < N; i ++)
    {
        buf_re[i] *= rcp;
        buf_im[i] *= rcp;
    }
    
    memcpy(dst_re, buf_re, pow(2, power) * 4);
    memcpy(dst_im, buf_im, pow(2, power) * 4);
}
示例#6
0
 void dft(comp *a,bool inv) {
     int step, to; comp w, wi, A, B;
     for (int i=0; i<n; ++i) {
         to = bitrev(i);
         if (to > i) std::swap(a[to], a[i]);
     }
     for (int i=1; i<=bn; ++i) {
         wi = W(1<<i, inv); w = comp(1, 0);
         step = 1 << (i-1);
         for (int k=0; k<step; ++k) {
             for (int j=0; j<n; j+=1<<i) {
                 int t = j | k, d = j|k|step;
                 A = a[t];
                 B.x  = w.x * a[d].x - w.y * a[d].y;
                 B.y  = w.x * a[d].y + w.y * a[d].x;
                 a[t].x = A.x + B.x, a[t].y = A.y + B.y;
                 a[d].x = A.x - B.x, a[d].y = A.y - B.y;
             }
             comp tmp;
             tmp.x = w.x * wi.x - w.y * wi.y;
             tmp.y = w.x * wi.y + w.y * wi.x;
             w = tmp;
         }
     }
 }
示例#7
0
// Initiate measurement.  If blocking, wait for result
uint8_t Sensirion::meas(uint8_t cmd, uint16_t *result, bool block) {
  uint8_t error, i;
#ifdef CRC_ENA
  _crc = bitrev(_stat_reg & SR_MASK);  // Initialize CRC calculation
#endif
  startTransmission();
  if (cmd == TEMP)
    cmd = MEAS_TEMP;
  else
    cmd = MEAS_HUMI;
  if (error = putByte(cmd))
    return error;
#ifdef CRC_ENA
  calcCRC(cmd, &_crc);              // Include command byte in CRC calculation
#endif
  // If non-blocking, save pointer to result and return
  if (!block) {
    _presult = result;
    return 0;
  }
  // Otherwise, wait for measurement to complete with 720ms timeout
  i = 240;
  while (digitalRead(_pinData)) {
    i--;
    if (i == 0)
      return S_Err_TO;              // Error: Timeout
    delay(3);
  }
  error = getResult(result);
  return error;
}
示例#8
0
文件: main.c 项目: xwaynec/eplab
/* permutes the array using a bit-reversal permutation */ 
void permute_bitrev(int n, int *A_re, int *A_im) 
{ 
	int idata i;
	int idata bri;
	int idata log2n;
	int idata t_re;
	int idata t_im;

	log2n = log_2(n); 

	for (i=0; i<n; i++)
	{
		bri  = bitrev(i, log2n);

		/* skip already swapped elements */
		if (bri <= i) continue;

		t_re = A_re[i];
		t_im = A_im[i];
		A_re[i]= A_re[bri];
		A_im[i]= A_im[bri];
		A_re[bri]= t_re;
		A_im[bri]= t_im;
	}  
} 
示例#9
0
static void fct_noscale(double *f)
{
  scramble(f,N);
  fwd_butterflies(f);
  bitrev(f,N);
  fwd_sums(f);
  f[0] *= INVROOT2;
}
示例#10
0
static void ifct_noscale(double *f)
{
  f[0] *= INVROOT2;
  inv_sums(f);
  bitrev(f,N);
  inv_butterflies(f);
  unscramble(f,N);
}
示例#11
0
文件: tree.c 项目: llevin/sigproc
void taylor_flt(float outbuf[], int mlen, int nchn)
{
  float itemp;
  int   nsamp,npts,ndat1,nstages,nmem,nmem2,nsec1,nfin, i;
  int   istages,isec,ipair,ioff1,i1,i2,koff,ndelay,ndelay2;
  int   bitrev(int, int);

  /*  ======================================================================  */

  nsamp   = ((mlen/nchn) - (2*nchn));
  npts    = (nsamp + nchn);
  ndat1   = (nsamp + 2 * nchn);
  nstages = (int)(log((float)nchn) / 0.6931471 + 0.5);
  nmem    = 1;


  for (istages=0; istages<nstages; istages++) {
    nmem  *= 2;
    nsec1  = (nchn/nmem);
    nmem2  = (nmem - 2);

    for (isec=0; isec<nsec1; isec++) {
      ndelay = -1;
      koff   = (isec * nmem);

      for (ipair=0; ipair<(nmem2+1); ipair += 2) {
	ioff1   = (bitrev(ipair,istages+1)+koff) * ndat1;
	i2      = (bitrev(ipair+1,istages+1) + koff) * ndat1;
	ndelay++;
	ndelay2 = (ndelay + 1);
	nfin    = (npts + ioff1);
	for (i1=ioff1; i1<nfin; i1++) {
	  itemp      = (outbuf[i1] + outbuf[i2+ndelay]);
	  outbuf[i2] = (outbuf[i1] + outbuf[i2+ndelay2]);
	  outbuf[i1] = itemp;
	  i2++;
	}
      }
    }
  }

  return;
}
示例#12
0
int
main(void)
{
	int i;
	for (i = 0; i <= 0xFF; i++) {
		if ((i & 7) == 0)
			printf("/* 0x%02X */", i);
		printf(" 0x%02X,", bitrev(i));
		if ((i & 7) == 7)
			printf("\n");
	}
	return 0;
}
示例#13
0
static void scramble(double *f,int len){
  int i,ii1,ii2;

  bitrev(f,len);
  bitrev(&f[0], len>>1);
  bitrev(&f[len>>1], len>>1);
  ii1=len-1;
  ii2=len>>1;
  for(i=0; i<(len>>2); i++){
    SWAP(f[ii1], f[ii2]);
    ii1--;
    ii2++;
  }
}
示例#14
0
void eefft_fft(float* dst_re, float* dst_im, float* src_re, float* src_im,
    int power)
{
    int k, i;
    bitrev(src_re, src_im, power);
    
    switch(power)
    {
        defsprdx(1);
        defsprdx(2);
        defsprdx(3);
        loopcall(defsprdx);
        default:
        break;
    }
    memcpy(dst_re, buf_re, pow(2, power) * 4);
    memcpy(dst_im, buf_im, pow(2, power) * 4);
}
示例#15
0
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is
 *       done.  see that function for more details on why we treat 'i' as a
 *       (log2n-1) bit number.
 */
void compute_W(float *W_re, float *W_im)
{
  int i, br;

  for (i=0; i<(n/2); i++)
  {
    br = bitrev(i,log2n-1); 
    W_re[br] = cos(((float)i*2.0*M_PI)/((float)n));  
    W_im[br] = sin(((float)i*2.0*M_PI)/((float)n));  
  }
  #ifdef COMMENT_ONLY 
  for (i=0;i<(n/2);i++)
  { 
    br = i; //bitrev(i,log2n-1); 
    printf("(%g\t%g)\n", W_re[br], W_im[br]);
  }  
  #endif 
}
示例#16
0
void FftDitNiso( complex *array,
                 int fft_size)
{
double trig_arg;
int log2_size;
complex twiddle;
complex temp;
int pts_in_lft_grp, pts_in_rgt_grp;
int stage, grp_pos, grp_cntr;
int top_node, bot_node;

log2_size = ilog2(fft_size); 

pts_in_rgt_grp = fft_size;
for( stage=1; stage <=log2_size; stage++)
  {   
   grp_cntr = -1;
   
   pts_in_lft_grp = pts_in_rgt_grp;  // set pts_in_left_dft = N/(2**(stage-1))
   pts_in_rgt_grp /= 2;               // set pts_in_right_dft = N/(2**stage)

   for( grp_pos =0; grp_pos < fft_size; grp_pos += pts_in_lft_grp)
     {
      grp_cntr++;
                
      trig_arg = (TWO_PI*bitrev(grp_cntr, log2_size-1))/fft_size;
      twiddle = complex(cos(trig_arg), -sin(trig_arg));
                                  
      for( top_node = grp_pos; top_node < grp_pos+pts_in_rgt_grp; 
                               top_node++)
        {                              
         bot_node = top_node + pts_in_rgt_grp;
         temp = array[bot_node] * twiddle;
         array[bot_node] = array[top_node] - temp;
         array[top_node] += temp;
        }  // end of loop over top_node
        
     } // end of loop over grp_pos
  } // end of loop over stage

ComplexBitReverse(array, fft_size);  

return;
}
示例#17
0
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done.
 *       see that function for more details on why we treat 'i' as a (log2n-1) bit number.
 */
void compute_W(int n, double *W_re, double *W_im)
{
  int i, br;
  int log2n = log_2(n);

  for (i=0; i<(n/2); i++)
  {
    br = bitrev(i,log2n-1); 
    W_re[br] = cos(((double)i*2.0*M_PI)/((double)n));  
    W_im[br] = sin(((double)i*2.0*M_PI)/((double)n));  
  }
  #ifdef COMMENT_ONLY 
  for (i=0;i<(n/2);i++)
  { 
    br = i; //bitrev(i,log2n-1); 
    printf("(%g\t%g)\n", W_re[br], W_im[br]);
  }  
  #endif 
}
示例#18
0
文件: main.c 项目: xwaynec/eplab
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done.
 *       see that function for more details on why we treat 'i' as a (log2n-1) bit number.
 */
void compute_W(int idata n, int idata W_re[], int idata W_im[])
{
	int idata i;
	int idata br;
	int log2n = log_2(n);

	for (i=0; i<(n/2); i++)
	{
		br = bitrev(i,log2n-1); 
		W_re[br] = cos((i*2*3)/(n));  
		W_im[br] = sin((i*2*3)/(n));  
	}
//#ifdef COMMENT_ONLY 
//	for (i=0;i<(n/2);i++)
//	{ 
//		br = i; //bitrev(i,log2n-1); 
//		printf("(%g\t%g)\n", W_re[br], W_im[br]);
//	}  
//#endif 
}
示例#19
0
// Get measurement result from sensor (plus CRC, if enabled)
uint8_t Sensirion::getResult(uint16_t *result) {
  uint8_t val;
#ifdef CRC_ENA
  val = getByte(ACK);
  calcCRC(val, &_crc);
  *result = val;
  val = getByte(ACK);
  calcCRC(val, &_crc);
  *result = (*result << 8) | val;
  val = getByte(noACK);
  val = bitrev(val);
  if (val != _crc) {
    *result = 0xFFFF;
    return S_Err_CRC;
  }
#else
  *result = getByte(ACK);
  *result = (*result << 8) | getByte(noACK);
#endif
  return 0;
}
示例#20
0
/* permutes the array using a bit-reversal permutation */ 
void permute_bitrev(int n, double *A_re, double *A_im) 
{ 
  int i, bri, log2n;
  double t_re, t_im;

  log2n = log_2(n); 
  
  for (i=0; i<n; i++)
  {
      bri = bitrev(i, log2n);

      /* skip already swapped elements */
      if (bri <= i) continue;

      t_re = A_re[i];
      t_im = A_im[i];
      A_re[i]= A_re[bri];
      A_im[i]= A_im[bri];
      A_re[bri]= t_re;
      A_im[bri]= t_im;
  }  
} 
示例#21
0
void incachebitrev(float *X, float *Y, int nbits, int n, int lgn)
{
    int i, bitrevi;

/**** Printfs warning user that this bitreverse algorithm is not tuned *****/

    printf("This code is not at all tuned and not part of COBRA.\n");  
    printf("In fact this is a very naive implementation.\n");
    printf("We suggest putting in your own in-cache bit-reversal here ");
    printf("for robust performance.\n");
    printf("This code can be found in the bottom of file bitrev.c.\n");
    printf("These printfs can be removed with no ill effect.\n");

    Y[0] = X[0];
    Y[n-1] = X[n-1];
    for(i = 1; i < n-1; i++)
    {
        bitrevi = bitrev(i, lgn);
        if(bitrevi < i) continue;
        Y[i] = X[bitrevi];
        Y[bitrevi] = X[i];
    }
    return;
}
示例#22
0
文件: b512.c 项目: melizalab/aplot
void bitrev512( float x[], int length )
{
	register float temp;	switch (length) {
	    case 512:
		temp= x[2]; x[2] = x[512]; x[512] = temp;
		temp= x[3]; x[3] = x[513]; x[513] = temp;
		temp= x[4]; x[4] = x[256]; x[256] = temp;
		temp= x[5]; x[5] = x[257]; x[257] = temp;
		temp= x[6]; x[6] = x[768]; x[768] = temp;
		temp= x[7]; x[7] = x[769]; x[769] = temp;
		temp= x[8]; x[8] = x[128]; x[128] = temp;
		temp= x[9]; x[9] = x[129]; x[129] = temp;
		temp= x[10]; x[10] = x[640]; x[640] = temp;
		temp= x[11]; x[11] = x[641]; x[641] = temp;
		temp= x[12]; x[12] = x[384]; x[384] = temp;
		temp= x[13]; x[13] = x[385]; x[385] = temp;
		temp= x[14]; x[14] = x[896]; x[896] = temp;
		temp= x[15]; x[15] = x[897]; x[897] = temp;
		temp= x[16]; x[16] = x[64]; x[64] = temp;
		temp= x[17]; x[17] = x[65]; x[65] = temp;
		temp= x[18]; x[18] = x[576]; x[576] = temp;
		temp= x[19]; x[19] = x[577]; x[577] = temp;
		temp= x[20]; x[20] = x[320]; x[320] = temp;
		temp= x[21]; x[21] = x[321]; x[321] = temp;
		temp= x[22]; x[22] = x[832]; x[832] = temp;
		temp= x[23]; x[23] = x[833]; x[833] = temp;
		temp= x[24]; x[24] = x[192]; x[192] = temp;
		temp= x[25]; x[25] = x[193]; x[193] = temp;
		temp= x[26]; x[26] = x[704]; x[704] = temp;
		temp= x[27]; x[27] = x[705]; x[705] = temp;
		temp= x[28]; x[28] = x[448]; x[448] = temp;
		temp= x[29]; x[29] = x[449]; x[449] = temp;
		temp= x[30]; x[30] = x[960]; x[960] = temp;
		temp= x[31]; x[31] = x[961]; x[961] = temp;
		temp= x[34]; x[34] = x[544]; x[544] = temp;
		temp= x[35]; x[35] = x[545]; x[545] = temp;
		temp= x[36]; x[36] = x[288]; x[288] = temp;
		temp= x[37]; x[37] = x[289]; x[289] = temp;
		temp= x[38]; x[38] = x[800]; x[800] = temp;
		temp= x[39]; x[39] = x[801]; x[801] = temp;
		temp= x[40]; x[40] = x[160]; x[160] = temp;
		temp= x[41]; x[41] = x[161]; x[161] = temp;
		temp= x[42]; x[42] = x[672]; x[672] = temp;
		temp= x[43]; x[43] = x[673]; x[673] = temp;
		temp= x[44]; x[44] = x[416]; x[416] = temp;
		temp= x[45]; x[45] = x[417]; x[417] = temp;
		temp= x[46]; x[46] = x[928]; x[928] = temp;
		temp= x[47]; x[47] = x[929]; x[929] = temp;
		temp= x[48]; x[48] = x[96]; x[96] = temp;
		temp= x[49]; x[49] = x[97]; x[97] = temp;
		temp= x[50]; x[50] = x[608]; x[608] = temp;
		temp= x[51]; x[51] = x[609]; x[609] = temp;
		temp= x[52]; x[52] = x[352]; x[352] = temp;
		temp= x[53]; x[53] = x[353]; x[353] = temp;
		temp= x[54]; x[54] = x[864]; x[864] = temp;
		temp= x[55]; x[55] = x[865]; x[865] = temp;
		temp= x[56]; x[56] = x[224]; x[224] = temp;
		temp= x[57]; x[57] = x[225]; x[225] = temp;
		temp= x[58]; x[58] = x[736]; x[736] = temp;
		temp= x[59]; x[59] = x[737]; x[737] = temp;
		temp= x[60]; x[60] = x[480]; x[480] = temp;
		temp= x[61]; x[61] = x[481]; x[481] = temp;
		temp= x[62]; x[62] = x[992]; x[992] = temp;
		temp= x[63]; x[63] = x[993]; x[993] = temp;
		temp= x[66]; x[66] = x[528]; x[528] = temp;
		temp= x[67]; x[67] = x[529]; x[529] = temp;
		temp= x[68]; x[68] = x[272]; x[272] = temp;
		temp= x[69]; x[69] = x[273]; x[273] = temp;
		temp= x[70]; x[70] = x[784]; x[784] = temp;
		temp= x[71]; x[71] = x[785]; x[785] = temp;
		temp= x[72]; x[72] = x[144]; x[144] = temp;
		temp= x[73]; x[73] = x[145]; x[145] = temp;
		temp= x[74]; x[74] = x[656]; x[656] = temp;
		temp= x[75]; x[75] = x[657]; x[657] = temp;
		temp= x[76]; x[76] = x[400]; x[400] = temp;
		temp= x[77]; x[77] = x[401]; x[401] = temp;
		temp= x[78]; x[78] = x[912]; x[912] = temp;
		temp= x[79]; x[79] = x[913]; x[913] = temp;
		temp= x[82]; x[82] = x[592]; x[592] = temp;
		temp= x[83]; x[83] = x[593]; x[593] = temp;
		temp= x[84]; x[84] = x[336]; x[336] = temp;
		temp= x[85]; x[85] = x[337]; x[337] = temp;
		temp= x[86]; x[86] = x[848]; x[848] = temp;
		temp= x[87]; x[87] = x[849]; x[849] = temp;
		temp= x[88]; x[88] = x[208]; x[208] = temp;
		temp= x[89]; x[89] = x[209]; x[209] = temp;
		temp= x[90]; x[90] = x[720]; x[720] = temp;
		temp= x[91]; x[91] = x[721]; x[721] = temp;
		temp= x[92]; x[92] = x[464]; x[464] = temp;
		temp= x[93]; x[93] = x[465]; x[465] = temp;
		temp= x[94]; x[94] = x[976]; x[976] = temp;
		temp= x[95]; x[95] = x[977]; x[977] = temp;
		temp= x[98]; x[98] = x[560]; x[560] = temp;
		temp= x[99]; x[99] = x[561]; x[561] = temp;
		temp= x[100]; x[100] = x[304]; x[304] = temp;
		temp= x[101]; x[101] = x[305]; x[305] = temp;
		temp= x[102]; x[102] = x[816]; x[816] = temp;
		temp= x[103]; x[103] = x[817]; x[817] = temp;
		temp= x[104]; x[104] = x[176]; x[176] = temp;
		temp= x[105]; x[105] = x[177]; x[177] = temp;
		temp= x[106]; x[106] = x[688]; x[688] = temp;
		temp= x[107]; x[107] = x[689]; x[689] = temp;
		temp= x[108]; x[108] = x[432]; x[432] = temp;
		temp= x[109]; x[109] = x[433]; x[433] = temp;
		temp= x[110]; x[110] = x[944]; x[944] = temp;
		temp= x[111]; x[111] = x[945]; x[945] = temp;
		temp= x[114]; x[114] = x[624]; x[624] = temp;
		temp= x[115]; x[115] = x[625]; x[625] = temp;
		temp= x[116]; x[116] = x[368]; x[368] = temp;
		temp= x[117]; x[117] = x[369]; x[369] = temp;
		temp= x[118]; x[118] = x[880]; x[880] = temp;
		temp= x[119]; x[119] = x[881]; x[881] = temp;
		temp= x[120]; x[120] = x[240]; x[240] = temp;
		temp= x[121]; x[121] = x[241]; x[241] = temp;
		temp= x[122]; x[122] = x[752]; x[752] = temp;
		temp= x[123]; x[123] = x[753]; x[753] = temp;
		temp= x[124]; x[124] = x[496]; x[496] = temp;
		temp= x[125]; x[125] = x[497]; x[497] = temp;
		temp= x[126]; x[126] = x[1008]; x[1008] = temp;
		temp= x[127]; x[127] = x[1009]; x[1009] = temp;
		temp= x[130]; x[130] = x[520]; x[520] = temp;
		temp= x[131]; x[131] = x[521]; x[521] = temp;
		temp= x[132]; x[132] = x[264]; x[264] = temp;
		temp= x[133]; x[133] = x[265]; x[265] = temp;
		temp= x[134]; x[134] = x[776]; x[776] = temp;
		temp= x[135]; x[135] = x[777]; x[777] = temp;
		temp= x[138]; x[138] = x[648]; x[648] = temp;
		temp= x[139]; x[139] = x[649]; x[649] = temp;
		temp= x[140]; x[140] = x[392]; x[392] = temp;
		temp= x[141]; x[141] = x[393]; x[393] = temp;
		temp= x[142]; x[142] = x[904]; x[904] = temp;
		temp= x[143]; x[143] = x[905]; x[905] = temp;
		temp= x[146]; x[146] = x[584]; x[584] = temp;
		temp= x[147]; x[147] = x[585]; x[585] = temp;
		temp= x[148]; x[148] = x[328]; x[328] = temp;
		temp= x[149]; x[149] = x[329]; x[329] = temp;
		temp= x[150]; x[150] = x[840]; x[840] = temp;
		temp= x[151]; x[151] = x[841]; x[841] = temp;
		temp= x[152]; x[152] = x[200]; x[200] = temp;
		temp= x[153]; x[153] = x[201]; x[201] = temp;
		temp= x[154]; x[154] = x[712]; x[712] = temp;
		temp= x[155]; x[155] = x[713]; x[713] = temp;
		temp= x[156]; x[156] = x[456]; x[456] = temp;
		temp= x[157]; x[157] = x[457]; x[457] = temp;
		temp= x[158]; x[158] = x[968]; x[968] = temp;
		temp= x[159]; x[159] = x[969]; x[969] = temp;
		temp= x[162]; x[162] = x[552]; x[552] = temp;
		temp= x[163]; x[163] = x[553]; x[553] = temp;
		temp= x[164]; x[164] = x[296]; x[296] = temp;
		temp= x[165]; x[165] = x[297]; x[297] = temp;
		temp= x[166]; x[166] = x[808]; x[808] = temp;
		temp= x[167]; x[167] = x[809]; x[809] = temp;
		temp= x[170]; x[170] = x[680]; x[680] = temp;
		temp= x[171]; x[171] = x[681]; x[681] = temp;
		temp= x[172]; x[172] = x[424]; x[424] = temp;
		temp= x[173]; x[173] = x[425]; x[425] = temp;
		temp= x[174]; x[174] = x[936]; x[936] = temp;
		temp= x[175]; x[175] = x[937]; x[937] = temp;
		temp= x[178]; x[178] = x[616]; x[616] = temp;
		temp= x[179]; x[179] = x[617]; x[617] = temp;
		temp= x[180]; x[180] = x[360]; x[360] = temp;
		temp= x[181]; x[181] = x[361]; x[361] = temp;
		temp= x[182]; x[182] = x[872]; x[872] = temp;
		temp= x[183]; x[183] = x[873]; x[873] = temp;
		temp= x[184]; x[184] = x[232]; x[232] = temp;
		temp= x[185]; x[185] = x[233]; x[233] = temp;
		temp= x[186]; x[186] = x[744]; x[744] = temp;
		temp= x[187]; x[187] = x[745]; x[745] = temp;
		temp= x[188]; x[188] = x[488]; x[488] = temp;
		temp= x[189]; x[189] = x[489]; x[489] = temp;
		temp= x[190]; x[190] = x[1000]; x[1000] = temp;
		temp= x[191]; x[191] = x[1001]; x[1001] = temp;
		temp= x[194]; x[194] = x[536]; x[536] = temp;
		temp= x[195]; x[195] = x[537]; x[537] = temp;
		temp= x[196]; x[196] = x[280]; x[280] = temp;
		temp= x[197]; x[197] = x[281]; x[281] = temp;
		temp= x[198]; x[198] = x[792]; x[792] = temp;
		temp= x[199]; x[199] = x[793]; x[793] = temp;
		temp= x[202]; x[202] = x[664]; x[664] = temp;
		temp= x[203]; x[203] = x[665]; x[665] = temp;
		temp= x[204]; x[204] = x[408]; x[408] = temp;
		temp= x[205]; x[205] = x[409]; x[409] = temp;
		temp= x[206]; x[206] = x[920]; x[920] = temp;
		temp= x[207]; x[207] = x[921]; x[921] = temp;
		temp= x[210]; x[210] = x[600]; x[600] = temp;
		temp= x[211]; x[211] = x[601]; x[601] = temp;
		temp= x[212]; x[212] = x[344]; x[344] = temp;
		temp= x[213]; x[213] = x[345]; x[345] = temp;
		temp= x[214]; x[214] = x[856]; x[856] = temp;
		temp= x[215]; x[215] = x[857]; x[857] = temp;
		temp= x[218]; x[218] = x[728]; x[728] = temp;
		temp= x[219]; x[219] = x[729]; x[729] = temp;
		temp= x[220]; x[220] = x[472]; x[472] = temp;
		temp= x[221]; x[221] = x[473]; x[473] = temp;
		temp= x[222]; x[222] = x[984]; x[984] = temp;
		temp= x[223]; x[223] = x[985]; x[985] = temp;
		temp= x[226]; x[226] = x[568]; x[568] = temp;
		temp= x[227]; x[227] = x[569]; x[569] = temp;
		temp= x[228]; x[228] = x[312]; x[312] = temp;
		temp= x[229]; x[229] = x[313]; x[313] = temp;
		temp= x[230]; x[230] = x[824]; x[824] = temp;
		temp= x[231]; x[231] = x[825]; x[825] = temp;
		temp= x[234]; x[234] = x[696]; x[696] = temp;
		temp= x[235]; x[235] = x[697]; x[697] = temp;
		temp= x[236]; x[236] = x[440]; x[440] = temp;
		temp= x[237]; x[237] = x[441]; x[441] = temp;
		temp= x[238]; x[238] = x[952]; x[952] = temp;
		temp= x[239]; x[239] = x[953]; x[953] = temp;
		temp= x[242]; x[242] = x[632]; x[632] = temp;
		temp= x[243]; x[243] = x[633]; x[633] = temp;
		temp= x[244]; x[244] = x[376]; x[376] = temp;
		temp= x[245]; x[245] = x[377]; x[377] = temp;
		temp= x[246]; x[246] = x[888]; x[888] = temp;
		temp= x[247]; x[247] = x[889]; x[889] = temp;
		temp= x[250]; x[250] = x[760]; x[760] = temp;
		temp= x[251]; x[251] = x[761]; x[761] = temp;
		temp= x[252]; x[252] = x[504]; x[504] = temp;
		temp= x[253]; x[253] = x[505]; x[505] = temp;
		temp= x[254]; x[254] = x[1016]; x[1016] = temp;
		temp= x[255]; x[255] = x[1017]; x[1017] = temp;
		temp= x[258]; x[258] = x[516]; x[516] = temp;
		temp= x[259]; x[259] = x[517]; x[517] = temp;
		temp= x[262]; x[262] = x[772]; x[772] = temp;
		temp= x[263]; x[263] = x[773]; x[773] = temp;
		temp= x[266]; x[266] = x[644]; x[644] = temp;
		temp= x[267]; x[267] = x[645]; x[645] = temp;
		temp= x[268]; x[268] = x[388]; x[388] = temp;
		temp= x[269]; x[269] = x[389]; x[389] = temp;
		temp= x[270]; x[270] = x[900]; x[900] = temp;
		temp= x[271]; x[271] = x[901]; x[901] = temp;
		temp= x[274]; x[274] = x[580]; x[580] = temp;
		temp= x[275]; x[275] = x[581]; x[581] = temp;
		temp= x[276]; x[276] = x[324]; x[324] = temp;
		temp= x[277]; x[277] = x[325]; x[325] = temp;
		temp= x[278]; x[278] = x[836]; x[836] = temp;
		temp= x[279]; x[279] = x[837]; x[837] = temp;
		temp= x[282]; x[282] = x[708]; x[708] = temp;
		temp= x[283]; x[283] = x[709]; x[709] = temp;
		temp= x[284]; x[284] = x[452]; x[452] = temp;
		temp= x[285]; x[285] = x[453]; x[453] = temp;
		temp= x[286]; x[286] = x[964]; x[964] = temp;
		temp= x[287]; x[287] = x[965]; x[965] = temp;
		temp= x[290]; x[290] = x[548]; x[548] = temp;
		temp= x[291]; x[291] = x[549]; x[549] = temp;
		temp= x[294]; x[294] = x[804]; x[804] = temp;
		temp= x[295]; x[295] = x[805]; x[805] = temp;
		temp= x[298]; x[298] = x[676]; x[676] = temp;
		temp= x[299]; x[299] = x[677]; x[677] = temp;
		temp= x[300]; x[300] = x[420]; x[420] = temp;
		temp= x[301]; x[301] = x[421]; x[421] = temp;
		temp= x[302]; x[302] = x[932]; x[932] = temp;
		temp= x[303]; x[303] = x[933]; x[933] = temp;
		temp= x[306]; x[306] = x[612]; x[612] = temp;
		temp= x[307]; x[307] = x[613]; x[613] = temp;
		temp= x[308]; x[308] = x[356]; x[356] = temp;
		temp= x[309]; x[309] = x[357]; x[357] = temp;
		temp= x[310]; x[310] = x[868]; x[868] = temp;
		temp= x[311]; x[311] = x[869]; x[869] = temp;
		temp= x[314]; x[314] = x[740]; x[740] = temp;
		temp= x[315]; x[315] = x[741]; x[741] = temp;
		temp= x[316]; x[316] = x[484]; x[484] = temp;
		temp= x[317]; x[317] = x[485]; x[485] = temp;
		temp= x[318]; x[318] = x[996]; x[996] = temp;
		temp= x[319]; x[319] = x[997]; x[997] = temp;
		temp= x[322]; x[322] = x[532]; x[532] = temp;
		temp= x[323]; x[323] = x[533]; x[533] = temp;
		temp= x[326]; x[326] = x[788]; x[788] = temp;
		temp= x[327]; x[327] = x[789]; x[789] = temp;
		temp= x[330]; x[330] = x[660]; x[660] = temp;
		temp= x[331]; x[331] = x[661]; x[661] = temp;
		temp= x[332]; x[332] = x[404]; x[404] = temp;
		temp= x[333]; x[333] = x[405]; x[405] = temp;
		temp= x[334]; x[334] = x[916]; x[916] = temp;
		temp= x[335]; x[335] = x[917]; x[917] = temp;
		temp= x[338]; x[338] = x[596]; x[596] = temp;
		temp= x[339]; x[339] = x[597]; x[597] = temp;
		temp= x[342]; x[342] = x[852]; x[852] = temp;
		temp= x[343]; x[343] = x[853]; x[853] = temp;
		temp= x[346]; x[346] = x[724]; x[724] = temp;
		temp= x[347]; x[347] = x[725]; x[725] = temp;
		temp= x[348]; x[348] = x[468]; x[468] = temp;
		temp= x[349]; x[349] = x[469]; x[469] = temp;
		temp= x[350]; x[350] = x[980]; x[980] = temp;
		temp= x[351]; x[351] = x[981]; x[981] = temp;
		temp= x[354]; x[354] = x[564]; x[564] = temp;
		temp= x[355]; x[355] = x[565]; x[565] = temp;
		temp= x[358]; x[358] = x[820]; x[820] = temp;
		temp= x[359]; x[359] = x[821]; x[821] = temp;
		temp= x[362]; x[362] = x[692]; x[692] = temp;
		temp= x[363]; x[363] = x[693]; x[693] = temp;
		temp= x[364]; x[364] = x[436]; x[436] = temp;
		temp= x[365]; x[365] = x[437]; x[437] = temp;
		temp= x[366]; x[366] = x[948]; x[948] = temp;
		temp= x[367]; x[367] = x[949]; x[949] = temp;
		temp= x[370]; x[370] = x[628]; x[628] = temp;
		temp= x[371]; x[371] = x[629]; x[629] = temp;
		temp= x[374]; x[374] = x[884]; x[884] = temp;
		temp= x[375]; x[375] = x[885]; x[885] = temp;
		temp= x[378]; x[378] = x[756]; x[756] = temp;
		temp= x[379]; x[379] = x[757]; x[757] = temp;
		temp= x[380]; x[380] = x[500]; x[500] = temp;
		temp= x[381]; x[381] = x[501]; x[501] = temp;
		temp= x[382]; x[382] = x[1012]; x[1012] = temp;
		temp= x[383]; x[383] = x[1013]; x[1013] = temp;
		temp= x[386]; x[386] = x[524]; x[524] = temp;
		temp= x[387]; x[387] = x[525]; x[525] = temp;
		temp= x[390]; x[390] = x[780]; x[780] = temp;
		temp= x[391]; x[391] = x[781]; x[781] = temp;
		temp= x[394]; x[394] = x[652]; x[652] = temp;
		temp= x[395]; x[395] = x[653]; x[653] = temp;
		temp= x[398]; x[398] = x[908]; x[908] = temp;
		temp= x[399]; x[399] = x[909]; x[909] = temp;
		temp= x[402]; x[402] = x[588]; x[588] = temp;
		temp= x[403]; x[403] = x[589]; x[589] = temp;
		temp= x[406]; x[406] = x[844]; x[844] = temp;
		temp= x[407]; x[407] = x[845]; x[845] = temp;
		temp= x[410]; x[410] = x[716]; x[716] = temp;
		temp= x[411]; x[411] = x[717]; x[717] = temp;
		temp= x[412]; x[412] = x[460]; x[460] = temp;
		temp= x[413]; x[413] = x[461]; x[461] = temp;
		temp= x[414]; x[414] = x[972]; x[972] = temp;
		temp= x[415]; x[415] = x[973]; x[973] = temp;
		temp= x[418]; x[418] = x[556]; x[556] = temp;
		temp= x[419]; x[419] = x[557]; x[557] = temp;
		temp= x[422]; x[422] = x[812]; x[812] = temp;
		temp= x[423]; x[423] = x[813]; x[813] = temp;
		temp= x[426]; x[426] = x[684]; x[684] = temp;
		temp= x[427]; x[427] = x[685]; x[685] = temp;
		temp= x[430]; x[430] = x[940]; x[940] = temp;
		temp= x[431]; x[431] = x[941]; x[941] = temp;
		temp= x[434]; x[434] = x[620]; x[620] = temp;
		temp= x[435]; x[435] = x[621]; x[621] = temp;
		temp= x[438]; x[438] = x[876]; x[876] = temp;
		temp= x[439]; x[439] = x[877]; x[877] = temp;
		temp= x[442]; x[442] = x[748]; x[748] = temp;
		temp= x[443]; x[443] = x[749]; x[749] = temp;
		temp= x[444]; x[444] = x[492]; x[492] = temp;
		temp= x[445]; x[445] = x[493]; x[493] = temp;
		temp= x[446]; x[446] = x[1004]; x[1004] = temp;
		temp= x[447]; x[447] = x[1005]; x[1005] = temp;
		temp= x[450]; x[450] = x[540]; x[540] = temp;
		temp= x[451]; x[451] = x[541]; x[541] = temp;
		temp= x[454]; x[454] = x[796]; x[796] = temp;
		temp= x[455]; x[455] = x[797]; x[797] = temp;
		temp= x[458]; x[458] = x[668]; x[668] = temp;
		temp= x[459]; x[459] = x[669]; x[669] = temp;
		temp= x[462]; x[462] = x[924]; x[924] = temp;
		temp= x[463]; x[463] = x[925]; x[925] = temp;
		temp= x[466]; x[466] = x[604]; x[604] = temp;
		temp= x[467]; x[467] = x[605]; x[605] = temp;
		temp= x[470]; x[470] = x[860]; x[860] = temp;
		temp= x[471]; x[471] = x[861]; x[861] = temp;
		temp= x[474]; x[474] = x[732]; x[732] = temp;
		temp= x[475]; x[475] = x[733]; x[733] = temp;
		temp= x[478]; x[478] = x[988]; x[988] = temp;
		temp= x[479]; x[479] = x[989]; x[989] = temp;
		temp= x[482]; x[482] = x[572]; x[572] = temp;
		temp= x[483]; x[483] = x[573]; x[573] = temp;
		temp= x[486]; x[486] = x[828]; x[828] = temp;
		temp= x[487]; x[487] = x[829]; x[829] = temp;
		temp= x[490]; x[490] = x[700]; x[700] = temp;
		temp= x[491]; x[491] = x[701]; x[701] = temp;
		temp= x[494]; x[494] = x[956]; x[956] = temp;
		temp= x[495]; x[495] = x[957]; x[957] = temp;
		temp= x[498]; x[498] = x[636]; x[636] = temp;
		temp= x[499]; x[499] = x[637]; x[637] = temp;
		temp= x[502]; x[502] = x[892]; x[892] = temp;
		temp= x[503]; x[503] = x[893]; x[893] = temp;
		temp= x[506]; x[506] = x[764]; x[764] = temp;
		temp= x[507]; x[507] = x[765]; x[765] = temp;
		temp= x[510]; x[510] = x[1020]; x[1020] = temp;
		temp= x[511]; x[511] = x[1021]; x[1021] = temp;
		temp= x[518]; x[518] = x[770]; x[770] = temp;
		temp= x[519]; x[519] = x[771]; x[771] = temp;
		temp= x[522]; x[522] = x[642]; x[642] = temp;
		temp= x[523]; x[523] = x[643]; x[643] = temp;
		temp= x[526]; x[526] = x[898]; x[898] = temp;
		temp= x[527]; x[527] = x[899]; x[899] = temp;
		temp= x[530]; x[530] = x[578]; x[578] = temp;
		temp= x[531]; x[531] = x[579]; x[579] = temp;
		temp= x[534]; x[534] = x[834]; x[834] = temp;
		temp= x[535]; x[535] = x[835]; x[835] = temp;
		temp= x[538]; x[538] = x[706]; x[706] = temp;
		temp= x[539]; x[539] = x[707]; x[707] = temp;
		temp= x[542]; x[542] = x[962]; x[962] = temp;
		temp= x[543]; x[543] = x[963]; x[963] = temp;
		temp= x[550]; x[550] = x[802]; x[802] = temp;
		temp= x[551]; x[551] = x[803]; x[803] = temp;
		temp= x[554]; x[554] = x[674]; x[674] = temp;
		temp= x[555]; x[555] = x[675]; x[675] = temp;
		temp= x[558]; x[558] = x[930]; x[930] = temp;
		temp= x[559]; x[559] = x[931]; x[931] = temp;
		temp= x[562]; x[562] = x[610]; x[610] = temp;
		temp= x[563]; x[563] = x[611]; x[611] = temp;
		temp= x[566]; x[566] = x[866]; x[866] = temp;
		temp= x[567]; x[567] = x[867]; x[867] = temp;
		temp= x[570]; x[570] = x[738]; x[738] = temp;
		temp= x[571]; x[571] = x[739]; x[739] = temp;
		temp= x[574]; x[574] = x[994]; x[994] = temp;
		temp= x[575]; x[575] = x[995]; x[995] = temp;
		temp= x[582]; x[582] = x[786]; x[786] = temp;
		temp= x[583]; x[583] = x[787]; x[787] = temp;
		temp= x[586]; x[586] = x[658]; x[658] = temp;
		temp= x[587]; x[587] = x[659]; x[659] = temp;
		temp= x[590]; x[590] = x[914]; x[914] = temp;
		temp= x[591]; x[591] = x[915]; x[915] = temp;
		temp= x[598]; x[598] = x[850]; x[850] = temp;
		temp= x[599]; x[599] = x[851]; x[851] = temp;
		temp= x[602]; x[602] = x[722]; x[722] = temp;
		temp= x[603]; x[603] = x[723]; x[723] = temp;
		temp= x[606]; x[606] = x[978]; x[978] = temp;
		temp= x[607]; x[607] = x[979]; x[979] = temp;
		temp= x[614]; x[614] = x[818]; x[818] = temp;
		temp= x[615]; x[615] = x[819]; x[819] = temp;
		temp= x[618]; x[618] = x[690]; x[690] = temp;
		temp= x[619]; x[619] = x[691]; x[691] = temp;
		temp= x[622]; x[622] = x[946]; x[946] = temp;
		temp= x[623]; x[623] = x[947]; x[947] = temp;
		temp= x[630]; x[630] = x[882]; x[882] = temp;
		temp= x[631]; x[631] = x[883]; x[883] = temp;
		temp= x[634]; x[634] = x[754]; x[754] = temp;
		temp= x[635]; x[635] = x[755]; x[755] = temp;
		temp= x[638]; x[638] = x[1010]; x[1010] = temp;
		temp= x[639]; x[639] = x[1011]; x[1011] = temp;
		temp= x[646]; x[646] = x[778]; x[778] = temp;
		temp= x[647]; x[647] = x[779]; x[779] = temp;
		temp= x[654]; x[654] = x[906]; x[906] = temp;
		temp= x[655]; x[655] = x[907]; x[907] = temp;
		temp= x[662]; x[662] = x[842]; x[842] = temp;
		temp= x[663]; x[663] = x[843]; x[843] = temp;
		temp= x[666]; x[666] = x[714]; x[714] = temp;
		temp= x[667]; x[667] = x[715]; x[715] = temp;
		temp= x[670]; x[670] = x[970]; x[970] = temp;
		temp= x[671]; x[671] = x[971]; x[971] = temp;
		temp= x[678]; x[678] = x[810]; x[810] = temp;
		temp= x[679]; x[679] = x[811]; x[811] = temp;
		temp= x[686]; x[686] = x[938]; x[938] = temp;
		temp= x[687]; x[687] = x[939]; x[939] = temp;
		temp= x[694]; x[694] = x[874]; x[874] = temp;
		temp= x[695]; x[695] = x[875]; x[875] = temp;
		temp= x[698]; x[698] = x[746]; x[746] = temp;
		temp= x[699]; x[699] = x[747]; x[747] = temp;
		temp= x[702]; x[702] = x[1002]; x[1002] = temp;
		temp= x[703]; x[703] = x[1003]; x[1003] = temp;
		temp= x[710]; x[710] = x[794]; x[794] = temp;
		temp= x[711]; x[711] = x[795]; x[795] = temp;
		temp= x[718]; x[718] = x[922]; x[922] = temp;
		temp= x[719]; x[719] = x[923]; x[923] = temp;
		temp= x[726]; x[726] = x[858]; x[858] = temp;
		temp= x[727]; x[727] = x[859]; x[859] = temp;
		temp= x[734]; x[734] = x[986]; x[986] = temp;
		temp= x[735]; x[735] = x[987]; x[987] = temp;
		temp= x[742]; x[742] = x[826]; x[826] = temp;
		temp= x[743]; x[743] = x[827]; x[827] = temp;
		temp= x[750]; x[750] = x[954]; x[954] = temp;
		temp= x[751]; x[751] = x[955]; x[955] = temp;
		temp= x[758]; x[758] = x[890]; x[890] = temp;
		temp= x[759]; x[759] = x[891]; x[891] = temp;
		temp= x[766]; x[766] = x[1018]; x[1018] = temp;
		temp= x[767]; x[767] = x[1019]; x[1019] = temp;
		temp= x[782]; x[782] = x[902]; x[902] = temp;
		temp= x[783]; x[783] = x[903]; x[903] = temp;
		temp= x[790]; x[790] = x[838]; x[838] = temp;
		temp= x[791]; x[791] = x[839]; x[839] = temp;
		temp= x[798]; x[798] = x[966]; x[966] = temp;
		temp= x[799]; x[799] = x[967]; x[967] = temp;
		temp= x[814]; x[814] = x[934]; x[934] = temp;
		temp= x[815]; x[815] = x[935]; x[935] = temp;
		temp= x[822]; x[822] = x[870]; x[870] = temp;
		temp= x[823]; x[823] = x[871]; x[871] = temp;
		temp= x[830]; x[830] = x[998]; x[998] = temp;
		temp= x[831]; x[831] = x[999]; x[999] = temp;
		temp= x[846]; x[846] = x[918]; x[918] = temp;
		temp= x[847]; x[847] = x[919]; x[919] = temp;
		temp= x[862]; x[862] = x[982]; x[982] = temp;
		temp= x[863]; x[863] = x[983]; x[983] = temp;
		temp= x[878]; x[878] = x[950]; x[950] = temp;
		temp= x[879]; x[879] = x[951]; x[951] = temp;
		temp= x[894]; x[894] = x[1014]; x[1014] = temp;
		temp= x[895]; x[895] = x[1015]; x[1015] = temp;
		temp= x[926]; x[926] = x[974]; x[974] = temp;
		temp= x[927]; x[927] = x[975]; x[975] = temp;
		temp= x[958]; x[958] = x[1006]; x[1006] = temp;
		temp= x[959]; x[959] = x[1007]; x[1007] = temp;
		break;
	    case 256:
		temp= x[2]; x[2] = x[256]; x[256] = temp;
		temp= x[3]; x[3] = x[257]; x[257] = temp;
		temp= x[4]; x[4] = x[128]; x[128] = temp;
		temp= x[5]; x[5] = x[129]; x[129] = temp;
		temp= x[6]; x[6] = x[384]; x[384] = temp;
		temp= x[7]; x[7] = x[385]; x[385] = temp;
		temp= x[8]; x[8] = x[64]; x[64] = temp;
		temp= x[9]; x[9] = x[65]; x[65] = temp;
		temp= x[10]; x[10] = x[320]; x[320] = temp;
		temp= x[11]; x[11] = x[321]; x[321] = temp;
		temp= x[12]; x[12] = x[192]; x[192] = temp;
		temp= x[13]; x[13] = x[193]; x[193] = temp;
		temp= x[14]; x[14] = x[448]; x[448] = temp;
		temp= x[15]; x[15] = x[449]; x[449] = temp;
		temp= x[16]; x[16] = x[32]; x[32] = temp;
		temp= x[17]; x[17] = x[33]; x[33] = temp;
		temp= x[18]; x[18] = x[288]; x[288] = temp;
		temp= x[19]; x[19] = x[289]; x[289] = temp;
		temp= x[20]; x[20] = x[160]; x[160] = temp;
		temp= x[21]; x[21] = x[161]; x[161] = temp;
		temp= x[22]; x[22] = x[416]; x[416] = temp;
		temp= x[23]; x[23] = x[417]; x[417] = temp;
		temp= x[24]; x[24] = x[96]; x[96] = temp;
		temp= x[25]; x[25] = x[97]; x[97] = temp;
		temp= x[26]; x[26] = x[352]; x[352] = temp;
		temp= x[27]; x[27] = x[353]; x[353] = temp;
		temp= x[28]; x[28] = x[224]; x[224] = temp;
		temp= x[29]; x[29] = x[225]; x[225] = temp;
		temp= x[30]; x[30] = x[480]; x[480] = temp;
		temp= x[31]; x[31] = x[481]; x[481] = temp;
		temp= x[34]; x[34] = x[272]; x[272] = temp;
		temp= x[35]; x[35] = x[273]; x[273] = temp;
		temp= x[36]; x[36] = x[144]; x[144] = temp;
		temp= x[37]; x[37] = x[145]; x[145] = temp;
		temp= x[38]; x[38] = x[400]; x[400] = temp;
		temp= x[39]; x[39] = x[401]; x[401] = temp;
		temp= x[40]; x[40] = x[80]; x[80] = temp;
		temp= x[41]; x[41] = x[81]; x[81] = temp;
		temp= x[42]; x[42] = x[336]; x[336] = temp;
		temp= x[43]; x[43] = x[337]; x[337] = temp;
		temp= x[44]; x[44] = x[208]; x[208] = temp;
		temp= x[45]; x[45] = x[209]; x[209] = temp;
		temp= x[46]; x[46] = x[464]; x[464] = temp;
		temp= x[47]; x[47] = x[465]; x[465] = temp;
		temp= x[50]; x[50] = x[304]; x[304] = temp;
		temp= x[51]; x[51] = x[305]; x[305] = temp;
		temp= x[52]; x[52] = x[176]; x[176] = temp;
		temp= x[53]; x[53] = x[177]; x[177] = temp;
		temp= x[54]; x[54] = x[432]; x[432] = temp;
		temp= x[55]; x[55] = x[433]; x[433] = temp;
		temp= x[56]; x[56] = x[112]; x[112] = temp;
		temp= x[57]; x[57] = x[113]; x[113] = temp;
		temp= x[58]; x[58] = x[368]; x[368] = temp;
		temp= x[59]; x[59] = x[369]; x[369] = temp;
		temp= x[60]; x[60] = x[240]; x[240] = temp;
		temp= x[61]; x[61] = x[241]; x[241] = temp;
		temp= x[62]; x[62] = x[496]; x[496] = temp;
		temp= x[63]; x[63] = x[497]; x[497] = temp;
		temp= x[66]; x[66] = x[264]; x[264] = temp;
		temp= x[67]; x[67] = x[265]; x[265] = temp;
		temp= x[68]; x[68] = x[136]; x[136] = temp;
		temp= x[69]; x[69] = x[137]; x[137] = temp;
		temp= x[70]; x[70] = x[392]; x[392] = temp;
		temp= x[71]; x[71] = x[393]; x[393] = temp;
		temp= x[74]; x[74] = x[328]; x[328] = temp;
		temp= x[75]; x[75] = x[329]; x[329] = temp;
		temp= x[76]; x[76] = x[200]; x[200] = temp;
		temp= x[77]; x[77] = x[201]; x[201] = temp;
		temp= x[78]; x[78] = x[456]; x[456] = temp;
		temp= x[79]; x[79] = x[457]; x[457] = temp;
		temp= x[82]; x[82] = x[296]; x[296] = temp;
		temp= x[83]; x[83] = x[297]; x[297] = temp;
		temp= x[84]; x[84] = x[168]; x[168] = temp;
		temp= x[85]; x[85] = x[169]; x[169] = temp;
		temp= x[86]; x[86] = x[424]; x[424] = temp;
		temp= x[87]; x[87] = x[425]; x[425] = temp;
		temp= x[88]; x[88] = x[104]; x[104] = temp;
		temp= x[89]; x[89] = x[105]; x[105] = temp;
		temp= x[90]; x[90] = x[360]; x[360] = temp;
		temp= x[91]; x[91] = x[361]; x[361] = temp;
		temp= x[92]; x[92] = x[232]; x[232] = temp;
		temp= x[93]; x[93] = x[233]; x[233] = temp;
		temp= x[94]; x[94] = x[488]; x[488] = temp;
		temp= x[95]; x[95] = x[489]; x[489] = temp;
		temp= x[98]; x[98] = x[280]; x[280] = temp;
		temp= x[99]; x[99] = x[281]; x[281] = temp;
		temp= x[100]; x[100] = x[152]; x[152] = temp;
		temp= x[101]; x[101] = x[153]; x[153] = temp;
		temp= x[102]; x[102] = x[408]; x[408] = temp;
		temp= x[103]; x[103] = x[409]; x[409] = temp;
		temp= x[106]; x[106] = x[344]; x[344] = temp;
		temp= x[107]; x[107] = x[345]; x[345] = temp;
		temp= x[108]; x[108] = x[216]; x[216] = temp;
		temp= x[109]; x[109] = x[217]; x[217] = temp;
		temp= x[110]; x[110] = x[472]; x[472] = temp;
		temp= x[111]; x[111] = x[473]; x[473] = temp;
		temp= x[114]; x[114] = x[312]; x[312] = temp;
		temp= x[115]; x[115] = x[313]; x[313] = temp;
		temp= x[116]; x[116] = x[184]; x[184] = temp;
		temp= x[117]; x[117] = x[185]; x[185] = temp;
		temp= x[118]; x[118] = x[440]; x[440] = temp;
		temp= x[119]; x[119] = x[441]; x[441] = temp;
		temp= x[122]; x[122] = x[376]; x[376] = temp;
		temp= x[123]; x[123] = x[377]; x[377] = temp;
		temp= x[124]; x[124] = x[248]; x[248] = temp;
		temp= x[125]; x[125] = x[249]; x[249] = temp;
		temp= x[126]; x[126] = x[504]; x[504] = temp;
		temp= x[127]; x[127] = x[505]; x[505] = temp;
		temp= x[130]; x[130] = x[260]; x[260] = temp;
		temp= x[131]; x[131] = x[261]; x[261] = temp;
		temp= x[134]; x[134] = x[388]; x[388] = temp;
		temp= x[135]; x[135] = x[389]; x[389] = temp;
		temp= x[138]; x[138] = x[324]; x[324] = temp;
		temp= x[139]; x[139] = x[325]; x[325] = temp;
		temp= x[140]; x[140] = x[196]; x[196] = temp;
		temp= x[141]; x[141] = x[197]; x[197] = temp;
		temp= x[142]; x[142] = x[452]; x[452] = temp;
		temp= x[143]; x[143] = x[453]; x[453] = temp;
		temp= x[146]; x[146] = x[292]; x[292] = temp;
		temp= x[147]; x[147] = x[293]; x[293] = temp;
		temp= x[148]; x[148] = x[164]; x[164] = temp;
		temp= x[149]; x[149] = x[165]; x[165] = temp;
		temp= x[150]; x[150] = x[420]; x[420] = temp;
		temp= x[151]; x[151] = x[421]; x[421] = temp;
		temp= x[154]; x[154] = x[356]; x[356] = temp;
		temp= x[155]; x[155] = x[357]; x[357] = temp;
		temp= x[156]; x[156] = x[228]; x[228] = temp;
		temp= x[157]; x[157] = x[229]; x[229] = temp;
		temp= x[158]; x[158] = x[484]; x[484] = temp;
		temp= x[159]; x[159] = x[485]; x[485] = temp;
		temp= x[162]; x[162] = x[276]; x[276] = temp;
		temp= x[163]; x[163] = x[277]; x[277] = temp;
		temp= x[166]; x[166] = x[404]; x[404] = temp;
		temp= x[167]; x[167] = x[405]; x[405] = temp;
		temp= x[170]; x[170] = x[340]; x[340] = temp;
		temp= x[171]; x[171] = x[341]; x[341] = temp;
		temp= x[172]; x[172] = x[212]; x[212] = temp;
		temp= x[173]; x[173] = x[213]; x[213] = temp;
		temp= x[174]; x[174] = x[468]; x[468] = temp;
		temp= x[175]; x[175] = x[469]; x[469] = temp;
		temp= x[178]; x[178] = x[308]; x[308] = temp;
		temp= x[179]; x[179] = x[309]; x[309] = temp;
		temp= x[182]; x[182] = x[436]; x[436] = temp;
		temp= x[183]; x[183] = x[437]; x[437] = temp;
		temp= x[186]; x[186] = x[372]; x[372] = temp;
		temp= x[187]; x[187] = x[373]; x[373] = temp;
		temp= x[188]; x[188] = x[244]; x[244] = temp;
		temp= x[189]; x[189] = x[245]; x[245] = temp;
		temp= x[190]; x[190] = x[500]; x[500] = temp;
		temp= x[191]; x[191] = x[501]; x[501] = temp;
		temp= x[194]; x[194] = x[268]; x[268] = temp;
		temp= x[195]; x[195] = x[269]; x[269] = temp;
		temp= x[198]; x[198] = x[396]; x[396] = temp;
		temp= x[199]; x[199] = x[397]; x[397] = temp;
		temp= x[202]; x[202] = x[332]; x[332] = temp;
		temp= x[203]; x[203] = x[333]; x[333] = temp;
		temp= x[206]; x[206] = x[460]; x[460] = temp;
		temp= x[207]; x[207] = x[461]; x[461] = temp;
		temp= x[210]; x[210] = x[300]; x[300] = temp;
		temp= x[211]; x[211] = x[301]; x[301] = temp;
		temp= x[214]; x[214] = x[428]; x[428] = temp;
		temp= x[215]; x[215] = x[429]; x[429] = temp;
		temp= x[218]; x[218] = x[364]; x[364] = temp;
		temp= x[219]; x[219] = x[365]; x[365] = temp;
		temp= x[220]; x[220] = x[236]; x[236] = temp;
		temp= x[221]; x[221] = x[237]; x[237] = temp;
		temp= x[222]; x[222] = x[492]; x[492] = temp;
		temp= x[223]; x[223] = x[493]; x[493] = temp;
		temp= x[226]; x[226] = x[284]; x[284] = temp;
		temp= x[227]; x[227] = x[285]; x[285] = temp;
		temp= x[230]; x[230] = x[412]; x[412] = temp;
		temp= x[231]; x[231] = x[413]; x[413] = temp;
		temp= x[234]; x[234] = x[348]; x[348] = temp;
		temp= x[235]; x[235] = x[349]; x[349] = temp;
		temp= x[238]; x[238] = x[476]; x[476] = temp;
		temp= x[239]; x[239] = x[477]; x[477] = temp;
		temp= x[242]; x[242] = x[316]; x[316] = temp;
		temp= x[243]; x[243] = x[317]; x[317] = temp;
		temp= x[246]; x[246] = x[444]; x[444] = temp;
		temp= x[247]; x[247] = x[445]; x[445] = temp;
		temp= x[250]; x[250] = x[380]; x[380] = temp;
		temp= x[251]; x[251] = x[381]; x[381] = temp;
		temp= x[254]; x[254] = x[508]; x[508] = temp;
		temp= x[255]; x[255] = x[509]; x[509] = temp;
		temp= x[262]; x[262] = x[386]; x[386] = temp;
		temp= x[263]; x[263] = x[387]; x[387] = temp;
		temp= x[266]; x[266] = x[322]; x[322] = temp;
		temp= x[267]; x[267] = x[323]; x[323] = temp;
		temp= x[270]; x[270] = x[450]; x[450] = temp;
		temp= x[271]; x[271] = x[451]; x[451] = temp;
		temp= x[274]; x[274] = x[290]; x[290] = temp;
		temp= x[275]; x[275] = x[291]; x[291] = temp;
		temp= x[278]; x[278] = x[418]; x[418] = temp;
		temp= x[279]; x[279] = x[419]; x[419] = temp;
		temp= x[282]; x[282] = x[354]; x[354] = temp;
		temp= x[283]; x[283] = x[355]; x[355] = temp;
		temp= x[286]; x[286] = x[482]; x[482] = temp;
		temp= x[287]; x[287] = x[483]; x[483] = temp;
		temp= x[294]; x[294] = x[402]; x[402] = temp;
		temp= x[295]; x[295] = x[403]; x[403] = temp;
		temp= x[298]; x[298] = x[338]; x[338] = temp;
		temp= x[299]; x[299] = x[339]; x[339] = temp;
		temp= x[302]; x[302] = x[466]; x[466] = temp;
		temp= x[303]; x[303] = x[467]; x[467] = temp;
		temp= x[310]; x[310] = x[434]; x[434] = temp;
		temp= x[311]; x[311] = x[435]; x[435] = temp;
		temp= x[314]; x[314] = x[370]; x[370] = temp;
		temp= x[315]; x[315] = x[371]; x[371] = temp;
		temp= x[318]; x[318] = x[498]; x[498] = temp;
		temp= x[319]; x[319] = x[499]; x[499] = temp;
		temp= x[326]; x[326] = x[394]; x[394] = temp;
		temp= x[327]; x[327] = x[395]; x[395] = temp;
		temp= x[334]; x[334] = x[458]; x[458] = temp;
		temp= x[335]; x[335] = x[459]; x[459] = temp;
		temp= x[342]; x[342] = x[426]; x[426] = temp;
		temp= x[343]; x[343] = x[427]; x[427] = temp;
		temp= x[346]; x[346] = x[362]; x[362] = temp;
		temp= x[347]; x[347] = x[363]; x[363] = temp;
		temp= x[350]; x[350] = x[490]; x[490] = temp;
		temp= x[351]; x[351] = x[491]; x[491] = temp;
		temp= x[358]; x[358] = x[410]; x[410] = temp;
		temp= x[359]; x[359] = x[411]; x[411] = temp;
		temp= x[366]; x[366] = x[474]; x[474] = temp;
		temp= x[367]; x[367] = x[475]; x[475] = temp;
		temp= x[374]; x[374] = x[442]; x[442] = temp;
		temp= x[375]; x[375] = x[443]; x[443] = temp;
		temp= x[382]; x[382] = x[506]; x[506] = temp;
		temp= x[383]; x[383] = x[507]; x[507] = temp;
		temp= x[398]; x[398] = x[454]; x[454] = temp;
		temp= x[399]; x[399] = x[455]; x[455] = temp;
		temp= x[406]; x[406] = x[422]; x[422] = temp;
		temp= x[407]; x[407] = x[423]; x[423] = temp;
		temp= x[414]; x[414] = x[486]; x[486] = temp;
		temp= x[415]; x[415] = x[487]; x[487] = temp;
		temp= x[430]; x[430] = x[470]; x[470] = temp;
		temp= x[431]; x[431] = x[471]; x[471] = temp;
		temp= x[446]; x[446] = x[502]; x[502] = temp;
		temp= x[447]; x[447] = x[503]; x[503] = temp;
		temp= x[478]; x[478] = x[494]; x[494] = temp;
		temp= x[479]; x[479] = x[495]; x[495] = temp;
		break;
	    case 128:
		temp= x[2]; x[2] = x[128]; x[128] = temp;
		temp= x[3]; x[3] = x[129]; x[129] = temp;
		temp= x[4]; x[4] = x[64]; x[64] = temp;
		temp= x[5]; x[5] = x[65]; x[65] = temp;
		temp= x[6]; x[6] = x[192]; x[192] = temp;
		temp= x[7]; x[7] = x[193]; x[193] = temp;
		temp= x[8]; x[8] = x[32]; x[32] = temp;
		temp= x[9]; x[9] = x[33]; x[33] = temp;
		temp= x[10]; x[10] = x[160]; x[160] = temp;
		temp= x[11]; x[11] = x[161]; x[161] = temp;
		temp= x[12]; x[12] = x[96]; x[96] = temp;
		temp= x[13]; x[13] = x[97]; x[97] = temp;
		temp= x[14]; x[14] = x[224]; x[224] = temp;
		temp= x[15]; x[15] = x[225]; x[225] = temp;
		temp= x[18]; x[18] = x[144]; x[144] = temp;
		temp= x[19]; x[19] = x[145]; x[145] = temp;
		temp= x[20]; x[20] = x[80]; x[80] = temp;
		temp= x[21]; x[21] = x[81]; x[81] = temp;
		temp= x[22]; x[22] = x[208]; x[208] = temp;
		temp= x[23]; x[23] = x[209]; x[209] = temp;
		temp= x[24]; x[24] = x[48]; x[48] = temp;
		temp= x[25]; x[25] = x[49]; x[49] = temp;
		temp= x[26]; x[26] = x[176]; x[176] = temp;
		temp= x[27]; x[27] = x[177]; x[177] = temp;
		temp= x[28]; x[28] = x[112]; x[112] = temp;
		temp= x[29]; x[29] = x[113]; x[113] = temp;
		temp= x[30]; x[30] = x[240]; x[240] = temp;
		temp= x[31]; x[31] = x[241]; x[241] = temp;
		temp= x[34]; x[34] = x[136]; x[136] = temp;
		temp= x[35]; x[35] = x[137]; x[137] = temp;
		temp= x[36]; x[36] = x[72]; x[72] = temp;
		temp= x[37]; x[37] = x[73]; x[73] = temp;
		temp= x[38]; x[38] = x[200]; x[200] = temp;
		temp= x[39]; x[39] = x[201]; x[201] = temp;
		temp= x[42]; x[42] = x[168]; x[168] = temp;
		temp= x[43]; x[43] = x[169]; x[169] = temp;
		temp= x[44]; x[44] = x[104]; x[104] = temp;
		temp= x[45]; x[45] = x[105]; x[105] = temp;
		temp= x[46]; x[46] = x[232]; x[232] = temp;
		temp= x[47]; x[47] = x[233]; x[233] = temp;
		temp= x[50]; x[50] = x[152]; x[152] = temp;
		temp= x[51]; x[51] = x[153]; x[153] = temp;
		temp= x[52]; x[52] = x[88]; x[88] = temp;
		temp= x[53]; x[53] = x[89]; x[89] = temp;
		temp= x[54]; x[54] = x[216]; x[216] = temp;
		temp= x[55]; x[55] = x[217]; x[217] = temp;
		temp= x[58]; x[58] = x[184]; x[184] = temp;
		temp= x[59]; x[59] = x[185]; x[185] = temp;
		temp= x[60]; x[60] = x[120]; x[120] = temp;
		temp= x[61]; x[61] = x[121]; x[121] = temp;
		temp= x[62]; x[62] = x[248]; x[248] = temp;
		temp= x[63]; x[63] = x[249]; x[249] = temp;
		temp= x[66]; x[66] = x[132]; x[132] = temp;
		temp= x[67]; x[67] = x[133]; x[133] = temp;
		temp= x[70]; x[70] = x[196]; x[196] = temp;
		temp= x[71]; x[71] = x[197]; x[197] = temp;
		temp= x[74]; x[74] = x[164]; x[164] = temp;
		temp= x[75]; x[75] = x[165]; x[165] = temp;
		temp= x[76]; x[76] = x[100]; x[100] = temp;
		temp= x[77]; x[77] = x[101]; x[101] = temp;
		temp= x[78]; x[78] = x[228]; x[228] = temp;
		temp= x[79]; x[79] = x[229]; x[229] = temp;
		temp= x[82]; x[82] = x[148]; x[148] = temp;
		temp= x[83]; x[83] = x[149]; x[149] = temp;
		temp= x[86]; x[86] = x[212]; x[212] = temp;
		temp= x[87]; x[87] = x[213]; x[213] = temp;
		temp= x[90]; x[90] = x[180]; x[180] = temp;
		temp= x[91]; x[91] = x[181]; x[181] = temp;
		temp= x[92]; x[92] = x[116]; x[116] = temp;
		temp= x[93]; x[93] = x[117]; x[117] = temp;
		temp= x[94]; x[94] = x[244]; x[244] = temp;
		temp= x[95]; x[95] = x[245]; x[245] = temp;
		temp= x[98]; x[98] = x[140]; x[140] = temp;
		temp= x[99]; x[99] = x[141]; x[141] = temp;
		temp= x[102]; x[102] = x[204]; x[204] = temp;
		temp= x[103]; x[103] = x[205]; x[205] = temp;
		temp= x[106]; x[106] = x[172]; x[172] = temp;
		temp= x[107]; x[107] = x[173]; x[173] = temp;
		temp= x[110]; x[110] = x[236]; x[236] = temp;
		temp= x[111]; x[111] = x[237]; x[237] = temp;
		temp= x[114]; x[114] = x[156]; x[156] = temp;
		temp= x[115]; x[115] = x[157]; x[157] = temp;
		temp= x[118]; x[118] = x[220]; x[220] = temp;
		temp= x[119]; x[119] = x[221]; x[221] = temp;
		temp= x[122]; x[122] = x[188]; x[188] = temp;
		temp= x[123]; x[123] = x[189]; x[189] = temp;
		temp= x[126]; x[126] = x[252]; x[252] = temp;
		temp= x[127]; x[127] = x[253]; x[253] = temp;
		temp= x[134]; x[134] = x[194]; x[194] = temp;
		temp= x[135]; x[135] = x[195]; x[195] = temp;
		temp= x[138]; x[138] = x[162]; x[162] = temp;
		temp= x[139]; x[139] = x[163]; x[163] = temp;
		temp= x[142]; x[142] = x[226]; x[226] = temp;
		temp= x[143]; x[143] = x[227]; x[227] = temp;
		temp= x[150]; x[150] = x[210]; x[210] = temp;
		temp= x[151]; x[151] = x[211]; x[211] = temp;
		temp= x[154]; x[154] = x[178]; x[178] = temp;
		temp= x[155]; x[155] = x[179]; x[179] = temp;
		temp= x[158]; x[158] = x[242]; x[242] = temp;
		temp= x[159]; x[159] = x[243]; x[243] = temp;
		temp= x[166]; x[166] = x[202]; x[202] = temp;
		temp= x[167]; x[167] = x[203]; x[203] = temp;
		temp= x[174]; x[174] = x[234]; x[234] = temp;
		temp= x[175]; x[175] = x[235]; x[235] = temp;
		temp= x[182]; x[182] = x[218]; x[218] = temp;
		temp= x[183]; x[183] = x[219]; x[219] = temp;
		temp= x[190]; x[190] = x[250]; x[250] = temp;
		temp= x[191]; x[191] = x[251]; x[251] = temp;
		temp= x[206]; x[206] = x[230]; x[230] = temp;
		temp= x[207]; x[207] = x[231]; x[231] = temp;
		temp= x[222]; x[222] = x[246]; x[246] = temp;
		temp= x[223]; x[223] = x[247]; x[247] = temp;
		break;
	    case 64:
		temp= x[2]; x[2] = x[64]; x[64] = temp;
		temp= x[3]; x[3] = x[65]; x[65] = temp;
		temp= x[4]; x[4] = x[32]; x[32] = temp;
		temp= x[5]; x[5] = x[33]; x[33] = temp;
		temp= x[6]; x[6] = x[96]; x[96] = temp;
		temp= x[7]; x[7] = x[97]; x[97] = temp;
		temp= x[8]; x[8] = x[16]; x[16] = temp;
		temp= x[9]; x[9] = x[17]; x[17] = temp;
		temp= x[10]; x[10] = x[80]; x[80] = temp;
		temp= x[11]; x[11] = x[81]; x[81] = temp;
		temp= x[12]; x[12] = x[48]; x[48] = temp;
		temp= x[13]; x[13] = x[49]; x[49] = temp;
		temp= x[14]; x[14] = x[112]; x[112] = temp;
		temp= x[15]; x[15] = x[113]; x[113] = temp;
		temp= x[18]; x[18] = x[72]; x[72] = temp;
		temp= x[19]; x[19] = x[73]; x[73] = temp;
		temp= x[20]; x[20] = x[40]; x[40] = temp;
		temp= x[21]; x[21] = x[41]; x[41] = temp;
		temp= x[22]; x[22] = x[104]; x[104] = temp;
		temp= x[23]; x[23] = x[105]; x[105] = temp;
		temp= x[26]; x[26] = x[88]; x[88] = temp;
		temp= x[27]; x[27] = x[89]; x[89] = temp;
		temp= x[28]; x[28] = x[56]; x[56] = temp;
		temp= x[29]; x[29] = x[57]; x[57] = temp;
		temp= x[30]; x[30] = x[120]; x[120] = temp;
		temp= x[31]; x[31] = x[121]; x[121] = temp;
		temp= x[34]; x[34] = x[68]; x[68] = temp;
		temp= x[35]; x[35] = x[69]; x[69] = temp;
		temp= x[38]; x[38] = x[100]; x[100] = temp;
		temp= x[39]; x[39] = x[101]; x[101] = temp;
		temp= x[42]; x[42] = x[84]; x[84] = temp;
		temp= x[43]; x[43] = x[85]; x[85] = temp;
		temp= x[44]; x[44] = x[52]; x[52] = temp;
		temp= x[45]; x[45] = x[53]; x[53] = temp;
		temp= x[46]; x[46] = x[116]; x[116] = temp;
		temp= x[47]; x[47] = x[117]; x[117] = temp;
		temp= x[50]; x[50] = x[76]; x[76] = temp;
		temp= x[51]; x[51] = x[77]; x[77] = temp;
		temp= x[54]; x[54] = x[108]; x[108] = temp;
		temp= x[55]; x[55] = x[109]; x[109] = temp;
		temp= x[58]; x[58] = x[92]; x[92] = temp;
		temp= x[59]; x[59] = x[93]; x[93] = temp;
		temp= x[62]; x[62] = x[124]; x[124] = temp;
		temp= x[63]; x[63] = x[125]; x[125] = temp;
		temp= x[70]; x[70] = x[98]; x[98] = temp;
		temp= x[71]; x[71] = x[99]; x[99] = temp;
		temp= x[74]; x[74] = x[82]; x[82] = temp;
		temp= x[75]; x[75] = x[83]; x[83] = temp;
		temp= x[78]; x[78] = x[114]; x[114] = temp;
		temp= x[79]; x[79] = x[115]; x[115] = temp;
		temp= x[86]; x[86] = x[106]; x[106] = temp;
		temp= x[87]; x[87] = x[107]; x[107] = temp;
		temp= x[94]; x[94] = x[122]; x[122] = temp;
		temp= x[95]; x[95] = x[123]; x[123] = temp;
		temp= x[110]; x[110] = x[118]; x[118] = temp;
		temp= x[111]; x[111] = x[119]; x[119] = temp;
		break;
	    case 32:
		temp= x[2]; x[2] = x[32]; x[32] = temp;
		temp= x[3]; x[3] = x[33]; x[33] = temp;
		temp= x[4]; x[4] = x[16]; x[16] = temp;
		temp= x[5]; x[5] = x[17]; x[17] = temp;
		temp= x[6]; x[6] = x[48]; x[48] = temp;
		temp= x[7]; x[7] = x[49]; x[49] = temp;
		temp= x[10]; x[10] = x[40]; x[40] = temp;
		temp= x[11]; x[11] = x[41]; x[41] = temp;
		temp= x[12]; x[12] = x[24]; x[24] = temp;
		temp= x[13]; x[13] = x[25]; x[25] = temp;
		temp= x[14]; x[14] = x[56]; x[56] = temp;
		temp= x[15]; x[15] = x[57]; x[57] = temp;
		temp= x[18]; x[18] = x[36]; x[36] = temp;
		temp= x[19]; x[19] = x[37]; x[37] = temp;
		temp= x[22]; x[22] = x[52]; x[52] = temp;
		temp= x[23]; x[23] = x[53]; x[53] = temp;
		temp= x[26]; x[26] = x[44]; x[44] = temp;
		temp= x[27]; x[27] = x[45]; x[45] = temp;
		temp= x[30]; x[30] = x[60]; x[60] = temp;
		temp= x[31]; x[31] = x[61]; x[61] = temp;
		temp= x[38]; x[38] = x[50]; x[50] = temp;
		temp= x[39]; x[39] = x[51]; x[51] = temp;
		temp= x[46]; x[46] = x[58]; x[58] = temp;
		temp= x[47]; x[47] = x[59]; x[59] = temp;
		break;
	    case 16:
		temp= x[2]; x[2] = x[16]; x[16] = temp;
		temp= x[3]; x[3] = x[17]; x[17] = temp;
		temp= x[4]; x[4] = x[8]; x[8] = temp;
		temp= x[5]; x[5] = x[9]; x[9] = temp;
		temp= x[6]; x[6] = x[24]; x[24] = temp;
		temp= x[7]; x[7] = x[25]; x[25] = temp;
		temp= x[10]; x[10] = x[20]; x[20] = temp;
		temp= x[11]; x[11] = x[21]; x[21] = temp;
		temp= x[14]; x[14] = x[28]; x[28] = temp;
		temp= x[15]; x[15] = x[29]; x[29] = temp;
		temp= x[22]; x[22] = x[26]; x[26] = temp;
		temp= x[23]; x[23] = x[27]; x[27] = temp;
		break;
	    case 8:
		temp= x[2]; x[2] = x[8]; x[8] = temp;
		temp= x[3]; x[3] = x[9]; x[9] = temp;
		temp= x[6]; x[6] = x[12]; x[12] = temp;
		temp= x[7]; x[7] = x[13]; x[13] = temp;
		break;
	    case 4:
		temp= x[2]; x[2] = x[4]; x[4] = temp;
		temp= x[3]; x[3] = x[5]; x[5] = temp;
		break;
	    case 2:
		break;
	    default:
		bitrev( x, length );
	}
}
示例#23
0
int mace68k_probe(struct net_device *unused)
{
	int j;
	static int once=0;
	struct mace68k_data *mp;
	unsigned char *addr;
	struct net_device *dev;
	unsigned char checksum = 0;
	
	/*
	 *	There can be only one...
	 */
	 
	if (once) return -ENODEV;
	
	once = 1;

	if (macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV;

	printk("MACE ethernet should be present ");
	
	dev = init_etherdev(0, PRIV_BYTES);
	if(dev==NULL)
	{
		printk("no free memory.\n");
		return -ENOMEM;
	}		
	mp = (struct mace68k_data *) dev->priv;
	dev->base_addr = (u32)MACE_BASE;
	mp->mace = (volatile struct mace *) MACE_BASE;
	
	printk("at 0x%p", mp->mace);
	
	/*
	 *	16K RX ring and 4K TX ring should do nicely
	 */

	mp->rx_ring=(void *)__get_free_pages(GFP_KERNEL, 2);
	mp->tx_ring=(void *)__get_free_page(GFP_KERNEL);
	
	printk(".");
	
	if(mp->tx_ring==NULL || mp->rx_ring==NULL)
	{
		if(mp->tx_ring)
			free_page((u32)mp->tx_ring);
//		if(mp->rx_ring)
//			__free_pages(mp->rx_ring,2);
		printk("\nNo memory for ring buffers.\n");
		return -ENOMEM;
	}

	/* We want the receive data to be uncached. We dont care about the
	   byte reading order */

	printk(".");	
	kernel_set_cachemode((void *)mp->rx_ring, 16384, IOMAP_NOCACHE_NONSER);	
	
	printk(".");	
	/* The transmit buffer needs to be write through */
	kernel_set_cachemode((void *)mp->tx_ring, 4096, IOMAP_WRITETHROUGH);

	printk(" Ok\n");	
	dev->irq = IRQ_MAC_MACE;
	printk(KERN_INFO "%s: MACE at", dev->name);

	/*
	 *	The PROM contains 8 bytes which total 0xFF when XOR'd
	 *	together. Due to the usual peculiar apple brain damage
	 *	the bytes are spaced out in a strange boundary and the
	 * 	bits are reversed.
	 */

	addr = (void *)MACE_PROM;
		 
	for (j = 0; j < 6; ++j)
	{
		u8 v=bitrev(addr[j<<4]);
		checksum^=v;
		dev->dev_addr[j] = v;
		printk("%c%.2x", (j ? ':' : ' '), dev->dev_addr[j]);
	}
	for (; j < 8; ++j)
	{
		checksum^=bitrev(addr[j<<4]);
	}
	
	if(checksum!=0xFF)
	{
		printk(" (invalid checksum)\n");
		return -ENODEV;
	}		
	printk("\n");

	memset(&mp->stats, 0, sizeof(mp->stats));
	init_timer(&mp->tx_timeout);
	mp->timeout_active = 0;

	dev->open = mace68k_open;
	dev->stop = mace68k_close;
	dev->hard_start_xmit = mace68k_xmit_start;
	dev->get_stats = mace68k_stats;
	dev->set_multicast_list = mace68k_set_multicast;
	dev->set_mac_address = mace68k_set_address;

	ether_setup(dev);

	mp = (struct mace68k_data *) dev->priv;
	mp->maccc = ENXMT | ENRCV;
	mp->dma_intr = IRQ_MAC_MACE_DMA;

	psc_write_word(PSC_ENETWR_CTL, 0x9000);
	psc_write_word(PSC_ENETRD_CTL, 0x9000);
	psc_write_word(PSC_ENETWR_CTL, 0x0400);
	psc_write_word(PSC_ENETRD_CTL, 0x0400);
                                        	
	/* apple's driver doesn't seem to do this */
	/* except at driver shutdown time...      */
#if 0
	mace68k_dma_off(dev);
#endif

	return 0;
}
示例#24
0
void ntt_transform(poly out, const poly o)
{ 
  int s, pos = 0, offset;
  __m256d vt,vo0,vo10,vo11,vo20,vo21,vo22,vo23,vc,vp,vpinv,neg2,neg4;
  __m256d vx0,vx1,vx2,vx3,vx4,vx5,vx6,vx7;
  
  vpinv = _mm256_set_pd(PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE);
  vp    = _mm256_set_pd(8383489., 8383489., 8383489., 8383489.);

  bitrev(out);

  vo10 = _mm256_load_pd(o+pos);
  vo20 = _mm256_load_pd(o+pos+4);
  neg2 = _mm256_load_pd(_neg2);
  neg4 = _mm256_load_pd(_neg4);
                                  
  // m = 2, m = 4, m = 8 (3 levels merged)
  for(s = 0; s<POLY_DEG; s+=8)
  {
    // No multiplication with omega required, respective value is 1
    vx0 = _mm256_load_pd(out+s);
    vt = _mm256_mul_pd(vx0,neg2);
    vx0 = _mm256_hadd_pd(vx0,vt);

    vx1 = _mm256_load_pd(out+s+4);
    vt = _mm256_mul_pd(vx1,neg2);
    vx1 = _mm256_hadd_pd(vx1,vt);

    vx0 = _mm256_mul_pd(vx0, vo10);
    vc = _mm256_mul_pd(vx0, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vx0 = _mm256_sub_pd(vx0,vc);
    vt = _mm256_permute2f128_pd (vx0, vx0, 0x01); // now contains x2,x3,x0,x1
    vx0 = _mm256_mul_pd(vx0, neg4);
    vx0 = _mm256_add_pd(vx0, vt);

    vx1 = _mm256_mul_pd(vx1, vo10);
    vc = _mm256_mul_pd(vx1, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vx1 = _mm256_sub_pd(vx1,vc);
    vt = _mm256_permute2f128_pd (vx1, vx1, 0x01); // now contains x2,x3,x0,x1
    vx1 = _mm256_mul_pd(vx1, neg4);
    vx1 = _mm256_add_pd(vx1, vt);

    vt = _mm256_mul_pd(vx1, vo20);
    vc = _mm256_mul_pd(vt, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vt = _mm256_sub_pd(vt,vc);
    vx1 = _mm256_sub_pd(vx0, vt);
    _mm256_store_pd(out+s+4, vx1);

    vx0 = _mm256_add_pd(vx0, vt);
    _mm256_store_pd(out+s+0, vx0);
  }
  
  pos += 8;

// m = 16, m = 32, m = 64 (3 levels merged)
  for(offset = 0; offset < 8; offset+=4)
  {
    vo0 = _mm256_load_pd(o+pos+offset);
    vo10 = _mm256_load_pd(o+pos+offset+8);
    vo11 = _mm256_load_pd(o+pos+offset+16);

    for(s = 0; s<POLY_DEG; s+=64)
    {
      vx1 = _mm256_load_pd(out+offset+s+8);
      vt = _mm256_mul_pd(vx1, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx0 = _mm256_load_pd(out+offset+s+0);
      vx1 = _mm256_sub_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+8, vx1);
      vx0 = _mm256_add_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+0, vx0);

      vx3 = _mm256_load_pd(out+offset+s+24);
      vt = _mm256_mul_pd(vx3, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx2 = _mm256_load_pd(out+offset+s+16);
      vx3 = _mm256_sub_pd(vx2, vt);
      //  _mm256_store_pd(out+offset+s+24, vx3);
      vx2 = _mm256_add_pd(vx2, vt);
      //  _mm256_store_pd(out+offset+s+16, vx2);

      vx5 = _mm256_load_pd(out+offset+s+40);
      vt = _mm256_mul_pd(vx5, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx4 = _mm256_load_pd(out+offset+s+32);
      vx5 = _mm256_sub_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+40, vx5);
      vx4 = _mm256_add_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+32, vx4);

      vx7 = _mm256_load_pd(out+offset+s+56);
      vt = _mm256_mul_pd(vx7, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx6 = _mm256_load_pd(out+offset+s+48);
      vx7 = _mm256_sub_pd(vx6, vt);
      //  _mm256_store_pd(out+offset+s+56, vx7);
      vx6 = _mm256_add_pd(vx6, vt);
      //  _mm256_store_pd(out+offset+s+48, vx6);


      //  vx2 = _mm256_load_pd(out+offset+s+16);
      vt = _mm256_mul_pd(vx2, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx0 = _mm256_load_pd(out+offset+s+0);
      vx2 = _mm256_sub_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+16, vx2);
      vx0 = _mm256_add_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+0, vx0);

      //  vx6 = _mm256_load_pd(out+offset+s+48);
      vt = _mm256_mul_pd(vx6, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx4 = _mm256_load_pd(out+offset+s+32);
      vx6 = _mm256_sub_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+48, vx6);
      vx4 = _mm256_add_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+32, vx4);


      //  vx3 = _mm256_load_pd(out+offset+s+24);
      vt = _mm256_mul_pd(vx3, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx1 = _mm256_load_pd(out+offset+s+8);
      vx3 = _mm256_sub_pd(vx1, vt);
      //  _mm256_store_pd(out+offset+s+24, vx3);
      vx1 = _mm256_add_pd(vx1, vt);
      //  _mm256_store_pd(out+offset+s+8, vx1);

      //  vx7 = _mm256_load_pd(out+offset+s+56);
      vt = _mm256_mul_pd(vx7, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx5 = _mm256_load_pd(out+offset+s+40);
      vx7 = _mm256_sub_pd(vx5, vt);
      //  _mm256_store_pd(out+offset+s+56, vx7);
      vx5 = _mm256_add_pd(vx5, vt);
      //  _mm256_store_pd(out+offset+s+40, vx5);



      //  vx4 = _mm256_load_pd(out+offset+s+32);
    vo20 = _mm256_load_pd(o+pos+offset+24);
      vt = _mm256_mul_pd(vx4, vo20);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx0 = _mm256_load_pd(out+offset+s+0);
      vx4 = _mm256_sub_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+32, vx4);
      vx0 = _mm256_add_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+0, vx0);

      //  vx5 = _mm256_load_pd(out+offset+s+40);
    vo21 = _mm256_load_pd(o+pos+offset+32);
      vt = _mm256_mul_pd(vx5, vo21);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx1 = _mm256_load_pd(out+offset+s+8);
      vx5 = _mm256_sub_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+40, vx5);
      vx1 = _mm256_add_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+8, vx1);

      //  vx6 = _mm256_load_pd(out+offset+s+48);
    vo22 = _mm256_load_pd(o+pos+offset+40);
      vt = _mm256_mul_pd(vx6, vo22);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx2 = _mm256_load_pd(out+offset+s+16);
      vx6 = _mm256_sub_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+48, vx6);
      vx2 = _mm256_add_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+16, vx2);

      //  vx7 = _mm256_load_pd(out+offset+s+56);
    vo23 = _mm256_load_pd(o+pos+offset+48);
      vt = _mm256_mul_pd(vx7, vo23);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx3 = _mm256_load_pd(out+offset+s+24);
      vx7 = _mm256_sub_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+56, vx7);
      vx3 = _mm256_add_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+24, vx3);
    }
  }


  pos += 56;

  // m = 128, m=256, m=512 (3 levels merged)
  for(offset=0;offset<64;offset+=4)
  {
    vo0 = _mm256_load_pd(o+pos+offset);
    vo10 = _mm256_load_pd(o+pos+offset+64);
    vo11 = _mm256_load_pd(o+pos+offset+128);

    for(s = 0; s<POLY_DEG; s+=512)
    {
      vx1 = _mm256_load_pd(out+offset+s+64);
      vt = _mm256_mul_pd(vx1, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx0 = _mm256_load_pd(out+offset+s+0);
      vx1 = _mm256_sub_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+64, vx1);
      vx0 = _mm256_add_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+0, vx0);

      vx3 = _mm256_load_pd(out+offset+s+192);
      vt = _mm256_mul_pd(vx3, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx2 = _mm256_load_pd(out+offset+s+128);
      vx3 = _mm256_sub_pd(vx2, vt);
      //_mm256_store_pd(out+offset+s+192, vx3);
      vx2 = _mm256_add_pd(vx2, vt);
      //_mm256_store_pd(out+offset+s+128, vx2);

      vx5 = _mm256_load_pd(out+offset+s+320);
      vt = _mm256_mul_pd(vx5, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx4 = _mm256_load_pd(out+offset+s+256);
      vx5 = _mm256_sub_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+320, vx5);
      vx4 = _mm256_add_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+256, vx4);

      vx7 = _mm256_load_pd(out+offset+s+448);
      vt = _mm256_mul_pd(vx7, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx6 = _mm256_load_pd(out+offset+s+384);
      vx7 = _mm256_sub_pd(vx6, vt);
      //_mm256_store_pd(out+offset+s+448, vx7);
      vx6 = _mm256_add_pd(vx6, vt);
      //_mm256_store_pd(out+offset+s+384, vx6);

    

      //vx2 = _mm256_load_pd(out+offset+s+128);
      vt = _mm256_mul_pd(vx2, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx0 = _mm256_load_pd(out+offset+s+0);
      vx2 = _mm256_sub_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+128, vx2);
      vx0 = _mm256_add_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+0, vx0);

      //vx3 = _mm256_load_pd(out+offset+s+192);
      vt = _mm256_mul_pd(vx3, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx1 = _mm256_load_pd(out+offset+s+64);
      vx3 = _mm256_sub_pd(vx1, vt);
      //_mm256_store_pd(out+offset+s+192, vx3);
      vx1 = _mm256_add_pd(vx1, vt);
      //_mm256_store_pd(out+offset+s+64, vx1);

      //vx6 = _mm256_load_pd(out+offset+s+384);
      vt = _mm256_mul_pd(vx6, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx4 = _mm256_load_pd(out+offset+s+256);
      vx6 = _mm256_sub_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+384, vx6);
      vx4 = _mm256_add_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+256, vx4);

      //vx7 = _mm256_load_pd(out+offset+s+448);
      vt = _mm256_mul_pd(vx7, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx5 = _mm256_load_pd(out+offset+s+320);
      vx7 = _mm256_sub_pd(vx5, vt);
      //_mm256_store_pd(out+offset+s+448, vx7);
      vx5 = _mm256_add_pd(vx5, vt);
      //_mm256_store_pd(out+offset+s+320, vx5);


    
      //vx4 = _mm256_load_pd(out+offset+s+256);
    vo20 = _mm256_load_pd(o+pos+offset+192);
      vt = _mm256_mul_pd(vx4, vo20);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx0 = _mm256_load_pd(out+offset+s+0);
      vx4 = _mm256_sub_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+256, vx4);
      vx0 = _mm256_add_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+0, vx0);

      //vx5 = _mm256_load_pd(out+offset+s+320);
    vo21 = _mm256_load_pd(o+pos+offset+256);
      vt = _mm256_mul_pd(vx5, vo21);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx1 = _mm256_load_pd(out+offset+s+64);
      vx5 = _mm256_sub_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+320, vx5);
      vx1 = _mm256_add_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+64, vx1);

      //vx6 = _mm256_load_pd(out+offset+s+384);
    vo22 = _mm256_load_pd(o+pos+offset+320);
      vt = _mm256_mul_pd(vx6, vo22);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx2 = _mm256_load_pd(out+offset+s+128);
      vx6 = _mm256_sub_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+384, vx6);
      vx2 = _mm256_add_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+128, vx2);

      //vx7 = _mm256_load_pd(out+offset+s+448);
    vo23 = _mm256_load_pd(o+pos+offset+384);
      vt = _mm256_mul_pd(vx7, vo23);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx3 = _mm256_load_pd(out+offset+s+192);
      vx7 = _mm256_sub_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+448, vx7);

      vx3 = _mm256_add_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+192, vx3);
    }
  }
}
示例#25
0
文件: fft.cpp 项目: sunpho84/nissa
  // Perform the 1d transform (mu is used to choose the communicator and n)
  // The fft consist of four step:
  //  1) data of is rearranged across the ranks
  //  2) ft is done on the odd length blocks (not needed if data length is a power of 2)
  //  3) Lanczos lemma is implemented on the local data
  //  4) Lanczos lemma is applied on non-local data (if needed)
  void fft1d(complex *out,complex *in,int ncpp,int mu,double sign,int normalize)
  {
    GET_THREAD_ID();
    
    //allocate the buffer to send data
    complex *buf=nissa_malloc("buf",loc_size[mu]*ncpp,complex);
    
    if(IS_MASTER_THREAD)
      {
	int log2glb_nblk=find_max_pow2(glb_size[mu]); //number of powers of 2 contained in n
	int glb_nblk=1<<log2glb_nblk;                 //remaining odd block
	int blk_size=glb_size[mu]/glb_nblk;           //block size
	int loc_nblk=glb_nblk/nrank_dir[mu];          //number of local blocks
	
	//check if the loc_size is a multiple of block_size
	int r=nrank_dir[mu];
	while(r>1)
	  {
	    if(r%2!=0) crash("Error, FFT implemented only for power of 2 grids! Ask sunpho to adapt it to a more general case if you really need!");
	    r/=2;
	  }
	
	////////////////////////////// first part /////////////////////////////////
	
	//reorder data across the various rank: glb_iel_in=iel_blk_out*glb_nblk+glb_iblk_out_rev
	int nrequest0=0,nexp_request0=loc_size[mu]*2;
	MPI_Request request0[nexp_request0];
	for(int loc_iblk_in=0;loc_iblk_in<loc_nblk;loc_iblk_in++)
	  for(int iel_blk_in=0;iel_blk_in<blk_size;iel_blk_in++)
	    {
	      //find the full index
	      int loc_iel_in=loc_iblk_in*blk_size+iel_blk_in;
	      int glb_iel_in=glb_coord_of_loclx[0][mu]+loc_iel_in;
	      int glb_iblk_in=rank_coord[mu]*loc_nblk+loc_iblk_in;
	      
	      //look at the output
	      int iel_blk_out=glb_iel_in/glb_nblk;
	      int glb_iblk_out=bitrev(glb_iel_in-iel_blk_out*glb_nblk,log2glb_nblk);
	      int rank_coord_out=glb_iblk_out/loc_nblk;
	      int loc_iblk_out=glb_iblk_out-rank_coord_out*loc_nblk;
	      int loc_iel_out=loc_iblk_out*blk_size+iel_blk_out;
	      
	      //now, if the destination is local, put it on place
	      if(rank_coord_out==rank_coord[mu]) memcpy(buf+ncpp*loc_iel_out,in+ncpp*loc_iel_in,sizeof(complex)*ncpp);
	      else //send the data to the destination
		MPI_Isend((void*)(in+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_out,1241+loc_iel_out,
			  line_comm[mu],&request0[nrequest0++]);
	      
	      //if necessary receive data: glb_iel_send=iel_blk_in*glb_nblk+glb_iblk_in_rev
	      int glb_iel_send=iel_blk_in*glb_nblk+bitrev(glb_iblk_in,log2glb_nblk);
	      int rank_coord_send=glb_iel_send/loc_size[mu];
	      if(rank_coord_send!=line_rank[mu])
		MPI_Irecv((void*)(buf+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_send,1241+loc_iel_in,
			  line_comm[mu],&request0[nrequest0++]);
	    }
	
	//wait for scramble to finish
	MPI_Status status0[nexp_request0];
	if(nrequest0>0) MPI_Waitall(nrequest0,request0,status0);
	
	/////////////////////////// second part ////////////////////////////////////
	
	//perform the block fourier transform if needed
	if(blk_size==1) memcpy(out,buf,loc_size[mu]*ncpp*sizeof(complex));
	else
	  {
	    //initialize the output
	    memset(out,0,loc_size[mu]*ncpp*sizeof(complex));
	    
	    //loop over local blocks
	    for(int loc_iblk=0;loc_iblk<loc_nblk;loc_iblk++)
	      {
		//take initial position of the output and input block
		complex *in_blk=buf+loc_iblk*blk_size*ncpp;
		complex *out_blk=out+loc_iblk*blk_size*ncpp;
		
		//loop over out elements of out block
		for(int iel_out=0;iel_out<blk_size;iel_out++)
		  {
		    //take initial position of out local elements
		    complex *out_pad=out_blk+iel_out*ncpp;
		    
		    //incrementing factor
		    double theta=iel_out*sign*2*M_PI/blk_size;
		    double wtemp=sin(0.5*theta);
		    double wpr=-2*wtemp*wtemp;
		    double wpi=sin(theta);
		    
		    //fourier factor
		    double wr=1;
		    double wi=0;
		    
		    //loop over elements of in blocks
		    for(int iel_in=0;iel_in<blk_size;iel_in++)
		      {
			//take initial position of in local elements
			complex *in_pad=in_blk+iel_in*ncpp;
			
			//loop over local elements
			for(int ic=0;ic<ncpp;ic++)
			  {
			    out_pad[ic][0]+=wr*in_pad[ic][0]-wi*in_pad[ic][1];
			    out_pad[ic][1]+=wr*in_pad[ic][1]+wi*in_pad[ic][0];
			  }
			
			//increment the twiddle
			wtemp=wr;
			wr+=wr*wpr-   wi*wpi;
			wi+=wi*wpr+wtemp*wpi;
		      }
		  }
	      }
	  }
	
	/////////////////////////////  third part ////////////////////////////
	
	//now perform the lanczos procedure up to when it does not need communications
	for(int delta=blk_size;delta<loc_size[mu];delta*=2)
	  {
	    //incrementing factor
	    double theta=sign*2*M_PI/(2*delta);
	    double wtemp=sin(0.5*theta);
	    double wpr=-2*wtemp*wtemp;
	    double wpi=sin(theta);
	    
	    //fourier coefficient
	    double wr=1;
	    double wi=0;
	    
	    //loop over the delta length (each m will correspond to increasing twiddle)
	    for(int m=0;m<delta;m++)
	      {
		//loop on the first addend
		for(int i=m;i<loc_size[mu];i+=2*delta)
		  {
		    //second addend
		    int j=i+delta;
		    
		    //site data multiplication
		    for(int ic=0;ic<ncpp;ic++)
		      {
			double tempr=wr*out[j*ncpp+ic][0]-wi*out[j*ncpp+ic][1];
			double tempi=wr*out[j*ncpp+ic][1]+wi*out[j*ncpp+ic][0];
			
			out[j*ncpp+ic][0]=out[i*ncpp+ic][0]-tempr;
			out[j*ncpp+ic][1]=out[i*ncpp+ic][1]-tempi;
			
			out[i*ncpp+ic][0]+=tempr;
			out[i*ncpp+ic][1]+=tempi;
		      }
		  }
		wtemp=wr;
		wr+=wr*wpr-   wi*wpi;
		wi+=wi*wpr+wtemp*wpi;
	      }
	  }
	
	///////////////////////////////////// fourth part //////////////////////////////
	
	//now perform the lanczos procedure up to the end
	for(int delta_rank=1;delta_rank<nrank_dir[mu];delta_rank*=2) //this is rank width of delta
	  {
	    int delta=delta_rank*loc_size[mu];    //block extent
	    int idelta=rank_coord[mu]/delta_rank; //identify the delta of the current rank
	    
	    //find if the current rank holding the first or second block
	    complex *first,*second;
	    MPI_Request request2[2];
	    MPI_Status status2[2];
	    
	    if(idelta%2==0) //first: so it has to receive second block and send first
	      {
		first=out;
		second=buf;
		
		MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu],
			  line_comm[mu],&(request2[0]));
		MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu]+delta_rank,
			  line_comm[mu],&(request2[1]));
	      }
	    else           //second: so it has to receive first block and send second
	      {
		first=buf;
		second=out;
		
		MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu],
			  line_comm[mu],&(request2[0]));
		MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu]-delta_rank,
			  line_comm[mu],&(request2[1]));
	      }
	    
	    //incrementing factor
	    double theta=sign*2*M_PI/(2*delta);
	    double wtemp=sin(0.5*theta);
	    double wpr=-2*wtemp*wtemp;
	    double wpi=sin(theta);
	    
	    int rank_pos_delta=rank_coord[mu]%delta_rank;  //position of the rank inside the delta
	    int pos_delta=rank_pos_delta*loc_size[mu];     //starting coord of local delta inside the delta
	    
	    //fourier coefficient
	    double wr=cos(pos_delta*theta);
	    double wi=sin(pos_delta*theta);
	    
	    //wait for communications to finish
	    MPI_Waitall(2,request2,status2);
	    
	    //loop over the delta length (each m will correspond to increasing twiddle)
	    for(int loc_m=0;loc_m<loc_size[mu];loc_m++)
	      {
		//site data multiplication
		for(int ic=0;ic<ncpp;ic++)
		  {
		    double tempr=wr*second[loc_m*ncpp+ic][0]-wi*second[loc_m*ncpp+ic][1];
		    double tempi=wr*second[loc_m*ncpp+ic][1]+wi*second[loc_m*ncpp+ic][0];
		    
		    if(idelta%2==0)
		      {
			first[loc_m*ncpp+ic][0]+=tempr;
			first[loc_m*ncpp+ic][1]+=tempi;
		      }
		    else
		      {
			second[loc_m*ncpp+ic][0]=first[loc_m*ncpp+ic][0]-tempr;
			second[loc_m*ncpp+ic][1]=first[loc_m*ncpp+ic][1]-tempi;
		      }
		  }
		wtemp=wr;
		wr+=wr*wpr-   wi*wpi;
		wi+=wi*wpr+wtemp*wpi;
	      }
	  }
	
	if(normalize==1)
	  for(int i=0;i<loc_size[mu]*ncpp;i++)
	    for(int ri=0;ri<2;ri++) out[i][ri]/=glb_size[mu];
      }
    
    nissa_free(buf);
  }
示例#26
0
文件: fft.c 项目: kapteyn-astro/gipsy
static	void	inpmap( fint  lpt ,	/* number of points in X */
                        fint  mpt ,	/* number of points in Y */
                        fint  lmin ,	/* left most x map coordinate */
                        fint  mmin ,	/* lower most y map coordinate */
                        fchar set[] ,	/* input set name */
                        fint  sub[] ,	/* subsets */
                        fint  nset ,	/* number of sets */
                        fint  modi )	/* input mode, amp./pha. or cos./sin. */
{
   fint		i, itab, k, m, md, mi, mj, mph, mphh, ms;
   fint		q, r, ri, rj, row, rs, wor;
   float	rbuf[2*SIZE], cbuf[2*SIZE];
   float	fr, fi, zr, zi;

   if ( FORM == -1 ) {			/* complex ---FFT---> real */
      q = POWTAB[ MAXP2 - LOGP2Y - 1 ];	/* table offset factor */
      mph = mpt / 2;			/* number of rows to load */
      mphh = mph / 2 + 1;
      fr = -0.5 * SIGN;			/* calculate factors */
      fi = -0.5;
      for ( ri = 0 ; ri < mphh; ri++ ) {	/*  loop to load data */
         rj = mph - ri;
         itab = ri * q;			/*  get table position */
         zr = 0.5 - fr * SINTAB[itab];
         zi =       fi * COSTAB[itab];
         mi = shiftr( mmin, mpt, ri );
         for ( k = 0; k < nset; k++ ) {	/* get data */
            i= k * lpt;
            readxy( set[k], sub[k], lmin, mi, &cbuf[i], lpt, 1 );
         }
         mj = shiftr( mmin, mpt, rj );
         for ( k = 0; k < nset; k++ ) {	/*  get data */
            i = k * lpt;
            readxy( set[k], sub[k], lmin, mj, &rbuf[i], lpt, 1 );
         }
         vector( cbuf, lpt, modi, 1 );
         vector( rbuf, lpt, modi, 1 );
         fftx( cbuf );			/* do the FFT */
         fftx( rbuf );
         /* fixup for complex to real transform */
         fxrl( zr, zi, cbuf, rbuf, ri, rj, lpt );
         /* store in scratch file */
         wor = bitrev( ri, LOGP2Y );
         putrow( cbuf, 2 * lpt, wor );
         if ( ( ri != rj ) && ( ri != 0 ) ) {
            wor = bitrev( rj, LOGP2Y );
            putrow( rbuf, 2 * lpt, wor );
         }
      }
   } else if ( FORM == 0 ) {		/* real ---FFT---> complex */
      rs = MIN( mpt, ( 2 * SIZE ) / lpt );	/* number of rows in one pass */
      k = 0;				/* subset number */
      for  ( r = 0; r < mpt; r += rs ) {/* loop to load data */
         md = 0;
         do {
            m = shiftr( mmin, mpt, r + md );
            ms = MIN( rs - md, mpt + mmin - m );
            i = md * lpt;
            readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, ms );
            md = md + ms;
         } while ( md != rs );
         for ( row = r; row < ( r + rs ); row += 2 ) {
            i = ( row - r ) * lpt;
            wor = bitrev( row / 2, LOGP2Y );
            putrow( &cbuf[i], 2 * lpt, wor );
         }
      }
   } else if ( FORM == 1 ) {		/* complex ---FFT---> complex */
      for  ( row = 0; row < mpt; row++ ) {
         m = shiftr( mmin, mpt, row );	/* get data from disk */
         for ( k = 0; k < nset; k++ ) {
            i = k * lpt;
            readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, 1 );
         }
         vector( cbuf, lpt, modi, 1 );
         fftx( cbuf );
         wor = bitrev( row, LOGP2Y );
         putrow( cbuf, 2 * lpt, wor );
      }
   }
}
示例#27
0
文件: tree.c 项目: llevin/sigproc
main(int argc, char** argv)
{
  int dum_int, iread, iw, indx, ITStart, namelen, HdrSize,
    NTOT, NSkip, NPtsToRead, SkiByte, log2NTOT, NDM,
    NOPFiles, MAX, NTSampInRead, i, NSampInRead, BytePerFrame,
    NByteInRead, ITOffset, NBitChan, *ibrev, NRead, IFiles, FOld,
    FNew, FSwitch, KeepTrack, NTReadOld, NTReadNew, NReadOld,
    NReadNew, OPFileSize, IOffset, SkipByte, oldper, newper ,NT_Files;

  long long TotalTrack;

  float  FMin, FMax, FCen, *Inbuf, *Outbuf, timecount;

  char  *filename, *filefull, *parfile;

  FILE  *fpar, *Fout[8192], *Fin;

  if (argc <= 1) 
    tree_help();
  else 
    tree_parms(argc, argv);

  filename   = (char *) calloc(120, sizeof(char));
  filefull   = (char *) calloc(120, sizeof(char));
  parfile    = (char *) calloc(120, sizeof(char));

  ITStart = 0; NT_Files = 0;

  /* To read a sample header from first time file */

  strcpy(filename, unfname);
  namelen    = strlen(filename);

  printf("First filename   : %s\n", filename);
  if ((fpar     = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening file %s.\n", filename);
    exit(0);
  }
  HdrSize  = read_header(fpar);
  fclose(fpar);

  FMin     = fch1;
  FMax     = FMin + (float)(nchans - 1) * foff;
  FCen     = (FMin + FMax) / 2.0;

  printf("No. of frequency channels        : %d\n",nchans);
  printf("Beginning radio frequency        : %f MHz\n", FMin - (foff / 2.0));
  printf("Ending radio frequency           : %f MHz\n", FMax + (foff / 2.0));
  printf("Centre frq. of the whole band    : %f MHz\n", FCen);
  printf("\n");
  printf("input sampling interval       : %f sec\n", tsamp);

  NTOT = 0;
  strcpy(filename, unfname);
  namelen    = strlen(filename);
  if ((fpar       = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening %s.\n", filename);
  }

  HdrSize    = read_header(fpar);
  fclose(fpar);

  NT_Files = (int)nsamples(filename, HdrSize, nbits, nifs, nchans);

  NTOT       += NT_Files;
  printf("File %5d   : %s with %d samples\n", i, filename, NT_Files);

  NSkip      = (int)(TSkip / tsamp);
  if (NSkip > NT_Files) {
    printf("Initial Skip-length longer than the first file!\n");
    exit(0);
  }

  NPtsToRead = (int)(TRead / tsamp);
  if (NPtsToRead==0) {
    TRead=NT_Files*tsamp;
    NPtsToRead=NT_Files;
  }
  if (NPtsToRead > NTOT) NPtsToRead = NTOT;
  SkipByte   = (NSkip * nbits * nchans / 8);

  printf("\n");
  printf("This data set contains %d number of samples\n", NTOT);
  printf("Number of bits per sample          : %d\n", nbits);
  printf("Time length to skip at the begin.  : %f\n", TSkip);
  printf("Bytes to skip at the beginning     : %d\n", SkipByte);
  printf("No. of time samples to skip        : %d\n", NSkip);
  printf("Time length to read after skipping : %f\n", TRead);
  printf("Samples to read after skipping     : %d\n", NPtsToRead);

  NTOT  -= NSkip;
  printf("Time samples after initial skip    : %d\n", NTOT);

  if (NPtsToRead > NTOT) {
    NPtsToRead  = NTOT;
    printf("Too many samples to read. Truncated to %d samples\n", NTOT);
  } else
    NTOT  = NPtsToRead;

  log2NTOT = (int)(log((double)NTOT) / log((double)2.0));
  NTOT     = (1 << log2NTOT);
  printf("After truncating to lower 2^n      : %d\n", NTOT);

  if (DMMin < 0) DMMin = 0;
  if (DMMax >= nchans) DMMax = (nchans - 1);
  if ((DMMin == 0) && (DMMax == 0)) {
    DMMin = 0; DMMax = (nchans - 1);
  } else if ((DMMin != 0) && (DMMax == 0)) 
    DMMax = (nchans - 1);
  printf("\n");
  printf("Minimum DM index = %d   Maximum = %d\n", DMMin, DMMax);
  DMMinv=(tsamp/8.3e3)*(double)(DMMin)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin);
  DMMaxv=(tsamp/8.3e3)*(double)(DMMax)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin);
  printf("Minimum DM value = %f   Maximum = %f\n", DMMinv,DMMaxv);
  NDM        = (DMMax - DMMin + 1);

  NOPFiles   = NDM;
  OPFileSize = sizeof(float) * NTOT;

  printf("\n");
  printf("Total number of output files    : %d\n", NOPFiles);
  printf("Size of each output file        : %d bytes\n", OPFileSize);

  for (i=0; i<NOPFiles; i++) {
    strcpy(filefull,unfname);
    sprintf(&filefull[strlen(filefull)-4], ".DM%.4d.tim", (i+DMMin));
    if((Fout[i] = fopen(filefull,"wb")) == NULL) {
      printf("ERROR opening output file %s.\n", filefull);
      exit(0);
    } else {
      nbands=1;
      nobits=32;
      refdm=(tsamp/8.3e3)*(double)(i+DMMin)*pow((FMax+FMin)/2.,3.)/
	fabs(FMax-FMin);
      output=Fout[i];
      dedisperse_header();
    }
  }
  printf("\n");

  /*
  strcpy(parfile, "\0"); strcpy(parfile, unfname);
  strncat(parfile, ".par", 4);
  fpar = fopen(parfile, "w");
  fprintf(fpar, "%s\n", unfname);
  fprintf(fpar, "%d  %d  %10.8f \n ", NTOT, nchans, tsamp);
  fprintf(fpar, "%d  %d  %d  %d\n", DMMin, DMMax, NDM, NOPFiles);
  fprintf(fpar, "%d\n", nchans);
  fprintf(fpar, "%f %f\n", FMin, FMax);
  fclose(fpar);
  */

  MAX          = (1 << 12);
  if (MAX > NTOT) MAX = NTOT;

  NTSampInRead = (MAX + (2 * nchans));
  NSampInRead  = (nchans * NTSampInRead);
  BytePerFrame = (nchans * nbits / 8);
  NByteInRead  = (NTSampInRead * BytePerFrame);

  ITOffset     = 2 * nchans;
  IOffset      = (-ITOffset * BytePerFrame);

  NBitChan = (int)(log((double)nchans) / 0.6931471);

  printf("Time samples to read in one read      : %d\n", NTSampInRead);
  printf("Bytes to read in one read             : %d bytes\n", NByteInRead);
  printf("Offset counter for each read          : %d time samples\n", IOffset);
  printf("\n");
  printf("No of bits to represent all channels  : %d\n", NBitChan);

  Inbuf    = (float *) calloc(NSampInRead, sizeof(float));
  Outbuf   = (float *) calloc(NSampInRead, sizeof(float));
  ibrev    = (int *)   calloc(nchans, sizeof(int));

  for (i=0; i<nchans; i++) {
    ibrev[i] = bitrev(i, NBitChan);
  }

  NRead    = (int)(NTOT / MAX);
  printf("Total number of reads in the run       : %d\n", NRead);

  IFiles   = 0;
  FOld     = 0;
  FNew     = 0;
  FSwitch  = 1;

  strcpy(filename, unfname);  namelen    = strlen(filename);
  printf("First filename   : %s\n", filename);
  if ((Fin = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening file %s.\n", filename);
    exit(0);
  }
  HdrSize  = read_header(Fin);
  printf("Going into the main loop\n");

  fseek(Fin, SkipByte, SEEK_CUR);
  KeepTrack = (NT_Files - NSkip);
  TotalTrack = 0;

  newper   = 0.0;
  oldper   = newper;
  printf("\n\n");
  for (iread=0; iread<NRead; iread++) {

    newper = 100.0 * (((float)iread / (float)NRead));
    if (newper > oldper) {
      timecount = ((float)iread * NTSampInRead * tsamp);
      printf("\rProcessed : %3d%%    Current file : %s    Time from beg : %8.2f sec", 
	     (int)newper, filename, timecount); fflush(stdout);
      oldper = newper;
    }

    KeepTrack  -= NTSampInRead;
    TotalTrack += NTSampInRead;

    if (KeepTrack > 0) {
      read_block(Fin, nbits, Inbuf, NSampInRead);
      fseek(Fin, IOffset, SEEK_CUR);
      KeepTrack  += ITOffset;
      TotalTrack -= ITOffset;
    }

    if (KeepTrack > 0) FSwitch = 1;

    memset(&Outbuf[0], 0, (sizeof(float) * NSampInRead));

    AxisSwap(Inbuf, Outbuf, nchans, NTSampInRead);

    if (FlipSwitch == 0)  {
      printf("BEFORE..."); fflush(stdout);
      FlipBand(Outbuf, nchans, NTSampInRead);
      printf("AFTER!\n");
    }

    taylor_flt(Outbuf, NSampInRead, nchans);

    for (iw=DMMin; iw<=DMMax; iw++) {
      indx  = (ibrev[iw] * NTSampInRead);
      fwrite(&Outbuf[indx], sizeof(float), MAX, Fout[iw-DMMin]);
    }
  }

  newper    = 100.0 * ((float)iread / NRead);
  timecount = ((float)iread * NTSampInRead * tsamp);
  printf("\rProcessed : %3d%%    Current file : %s    Time from beg : %8.2f sec", 
	 (int)newper, filename, timecount); fflush(stdout);
  printf("\n\n");

  for (i=0; i<NOPFiles; i++) fclose(Fout[i]);

  exit(0);
}
示例#28
0
void bit_rev(float *buffer, float *X, float *Y, int nbits, int n, int lgn)
{
    float ftemp1, ftemp2, ftemp3, ftemp4;
    int a, b, c, lgnminus;
    int aprime, bprime, bshift, bprimeshift, lgnstep;
    float *Xt1, *Xt2, *Ypoint, *Xpoint;

    if(lgn < 2*LOGSIZE)
    {
        incachebitrev(X, Y, nbits, n, lgn);
        return;
    }

    lgnminus = lgn-LOGSIZE;
    lgnstep = 1 << lgnminus;
    for(b = 0; b < nbits; b++)
    {
        bprime = bitrev(b, logn(nbits));   
        bshift = b << LOGSIZE;
        bprimeshift = bprime << LOGSIZE;
        Xt1 = &X[bshift];
        for(a = 0; a < SIZE; a++, Xt1 += lgnstep)
        {
            aprime = bittable[a];
            Xt2 = &buffer[aprime << LOGSIZE];
/******************************************************************
   This part coming up is where we move the data into the buffer.
   We unrolled 4 times and load data into scalars to avoid some alias problems 
   and to mask latency.  
******************************************************************/
            for(c = 0; c < SIZE; c += 4)
            {
                ftemp1 = Xt1[c];
                ftemp2 = Xt1[c+1];
                ftemp3 = Xt1[c+2];
                ftemp4 = Xt1[c+3];
                Xt2[c] = ftemp1;
                Xt2[c+1] = ftemp2;
                Xt2[c+2] = ftemp3;
                Xt2[c+3] = ftemp4;
            }
        }
        for(c = 0; c < SIZE; c++)
        {
            Ypoint = &Y[(bittable[c] << (lgnminus)) | bprimeshift];
            Xpoint = &buffer[c];

/***************************************************************
  Here we move the data from the buffer which should be in cache to
  the output array.  Notice that the addressing on the array buffer is
  surprisingly simple and despite the fact that we aren't making unit
  strides is irrelevant since it is all in cache.  Also this loop is 
  unrolled 8 times.
******************************************************************/
            for(aprime = 0; aprime < SIZE; aprime += 8, Xpoint += (SIZE*8))
            { 
                Ypoint[aprime] = Xpoint[0];
                Ypoint[aprime+1] = Xpoint[SIZE];
                Ypoint[aprime+2] = Xpoint[2*SIZE];
                Ypoint[aprime+3] = Xpoint[3*SIZE];
                Ypoint[aprime+4] = Xpoint[4*SIZE];
                Ypoint[aprime+5] = Xpoint[5*SIZE];
                Ypoint[aprime+6] = Xpoint[6*SIZE];
                Ypoint[aprime+7] = Xpoint[7*SIZE];
            } 

        }
    }

}
示例#29
0
int mace_probe(struct net_device *unused)
{
	int j;
	struct mace_data *mp;
	unsigned char *addr;
	struct net_device *dev;
	unsigned char checksum = 0;
	static int found = 0;
	
	if (found || macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV;

	found = 1;	/* prevent 'finding' one on every device probe */

	dev = init_etherdev(0, PRIV_BYTES);
	if (!dev) return -ENOMEM;

	mp = (struct mace_data *) dev->priv;
	dev->base_addr = (u32)MACE_BASE;
	mp->mace = (volatile struct mace *) MACE_BASE;
	
	dev->irq = IRQ_MAC_MACE;
	mp->dma_intr = IRQ_MAC_MACE_DMA;

	/*
	 * The PROM contains 8 bytes which total 0xFF when XOR'd
	 * together. Due to the usual peculiar apple brain damage
	 * the bytes are spaced out in a strange boundary and the
	 * bits are reversed.
	 */

	addr = (void *)MACE_PROM;
		 
	for (j = 0; j < 6; ++j) {
		u8 v=bitrev(addr[j<<4]);
		checksum ^= v;
		dev->dev_addr[j] = v;
	}
	for (; j < 8; ++j) {
		checksum ^= bitrev(addr[j<<4]);
	}
	
	if (checksum != 0xFF) return -ENODEV;

	memset(&mp->stats, 0, sizeof(mp->stats));

	dev->open		= mace_open;
	dev->stop		= mace_close;
	dev->hard_start_xmit	= mace_xmit_start;
	dev->tx_timeout		= mace_tx_timeout;
	dev->watchdog_timeo	= TX_TIMEOUT;
	dev->get_stats		= mace_stats;
	dev->set_multicast_list	= mace_set_multicast;
	dev->set_mac_address	= mace_set_address;

	ether_setup(dev);

	printk(KERN_INFO "%s: 68K MACE, hardware address %.2X", dev->name, dev->dev_addr[0]);
	for (j = 1 ; j < 6 ; j++) printk(":%.2X", dev->dev_addr[j]);
	printk("\n");

	return 0;
}
示例#30
0
文件: cd.c 项目: j0sh/thesis
static int nat2seq(int n, int bits)
{
    // go from natural ordering to sequency ordering:
    // first take gray code encoding, then reverse those bits
    return bitrev(gc(n), bits);
}