コード例 #1
0
ファイル: Sensirion.cpp プロジェクト: shday/myarduino
// Read status register
uint8_t Sensirion::readSR(uint8_t *result) {
  uint8_t val;
  uint8_t error = 0;
#ifdef CRC_ENA
  _crc = bitrev(_stat_reg & SR_MASK);  // Initialize CRC calculation
#endif
  startTransmission();
  if (error = putByte(STAT_REG_R)) {
    *result = 0xFF;
    return error;
  }
#ifdef CRC_ENA
  calcCRC(STAT_REG_R, &_crc);       // Include command byte in CRC calculation
  *result = getByte(ACK);
  calcCRC(*result, &_crc);
  val = getByte(noACK);
  val = bitrev(val);
  if (val != _crc) {
    *result = 0xFF;
    error = S_Err_CRC;
  }
#else
  *result = getByte(noACK);
#endif
  return error;
}
コード例 #2
0
void init(void)
{
	for(n = 0; n < (FFT_LENGTH - (ORDER - 1)); n++)
	{
		PING[n].re = 0;
		PING[n].im = 0;

		PONG[n].re = 0;
		PONG[n].im = 0;

		outputPING[n] = 0;
		outputPONG[n] = 0;
	}
	for(n = 0; n < FFT_LENGTH; n++)
	{
		if(n >= ORDER)
		{
			h[n].re = 0;
			h[n].im = 0;
		}
		else
		{
			h[n].re = B[n];
			h[n].im = 0;
		}
		V[n].re = 0;
		V[n].im = 0;

		pingU[n].re = 0;
		pingU[n].im = 0;

		pongU[n].re = 0;
		pongU[0].im = 0;
	}
	for(n = 0; n < ORDER - 1; n++){
		Z[n].re = 0;
		Z[n].im = 0;
	}

	// compute first N/2 twiddle factors
	for(n=0;n<FFT_LENGTH/RADIX;n++){
		w[n].re = cos(DELTA*n);
		w[n].im = sin(DELTA*n);		// negative imag component
	}

	for (n=0; n <FFT_LENGTH/2; n++)
	  iw[n] = -1;

	for (n=0; n<FFT_LENGTH; n++)
	  ix[n] = -1;

	digitrev_index(iw, FFT_LENGTH/RADIX, RADIX);
	bitrev(w, iw, FFT_LENGTH/RADIX);

	cfftr2_dit(h, w, FFT_LENGTH);
	digitrev_index(ix, FFT_LENGTH/RADIX, RADIX);
	bitrev(h, ix, FFT_LENGTH/RADIX);
}
コード例 #3
0
void matrix_print(void)
{
    print("\nr/c 01234567\n");
    for (uint8_t row = 0; row < matrix_rows(); row++) {
        xprintf("%02X: %08b\n", row, bitrev(matrix_get_row(row)));
    }
}
コード例 #4
0
ファイル: matrix.c プロジェクト: 0tsuki/qmk_firmware
void matrix_print(void)
{
#if (MATRIX_COLS <= 8)
    print("r/c 01234567\n");
#elif (MATRIX_COLS <= 16)
    print("r/c 0123456789ABCDEF\n");
#elif (MATRIX_COLS <= 32)
    print("r/c 0123456789ABCDEF0123456789ABCDEF\n");
#endif

    for (uint8_t row = 0; row < MATRIX_ROWS; row++) {

#if (MATRIX_COLS <= 8)
        xprintf("%02X: %08b%s\n", row, bitrev(matrix_get_row(row)),
#elif (MATRIX_COLS <= 16)
        xprintf("%02X: %016b%s\n", row, bitrev16(matrix_get_row(row)),
#elif (MATRIX_COLS <= 32)
        xprintf("%02X: %032b%s\n", row, bitrev32(matrix_get_row(row)),
#endif
#ifdef MATRIX_HAS_GHOST
        matrix_has_ghost_in_row(row) ?  " <ghost" : ""
#else
        ""
#endif
        );
    }
}
コード例 #5
0
ファイル: ee-fft.c プロジェクト: Sleepwalking/IB-EE-FFT
void eefft_ifft(float* dst_re, float* dst_im, float* src_re, float* src_im,
    int power)
{
    int k, i, N;
    float rcp;
    N = pow(2, power);
    rcp = 1.0 / N;
    bitrev(src_re, src_im, power);
    
    for(i = 0; i < N; i ++)
        buf_im[i] *= -1;
    
    switch(power)
    {
        defsprdx(1);
        defsprdx(2);
        defsprdx(3);
        loopcall(defsprdx);
        default:
        break;
    }
    
    for(i = 0; i < N; i ++)
    {
        buf_re[i] *= rcp;
        buf_im[i] *= rcp;
    }
    
    memcpy(dst_re, buf_re, pow(2, power) * 4);
    memcpy(dst_im, buf_im, pow(2, power) * 4);
}
コード例 #6
0
ファイル: FFT.cpp プロジェクト: RunningTime/algorithm
 void dft(comp *a,bool inv) {
     int step, to; comp w, wi, A, B;
     for (int i=0; i<n; ++i) {
         to = bitrev(i);
         if (to > i) std::swap(a[to], a[i]);
     }
     for (int i=1; i<=bn; ++i) {
         wi = W(1<<i, inv); w = comp(1, 0);
         step = 1 << (i-1);
         for (int k=0; k<step; ++k) {
             for (int j=0; j<n; j+=1<<i) {
                 int t = j | k, d = j|k|step;
                 A = a[t];
                 B.x  = w.x * a[d].x - w.y * a[d].y;
                 B.y  = w.x * a[d].y + w.y * a[d].x;
                 a[t].x = A.x + B.x, a[t].y = A.y + B.y;
                 a[d].x = A.x - B.x, a[d].y = A.y - B.y;
             }
             comp tmp;
             tmp.x = w.x * wi.x - w.y * wi.y;
             tmp.y = w.x * wi.y + w.y * wi.x;
             w = tmp;
         }
     }
 }
コード例 #7
0
ファイル: Sensirion.cpp プロジェクト: shday/myarduino
// Initiate measurement.  If blocking, wait for result
uint8_t Sensirion::meas(uint8_t cmd, uint16_t *result, bool block) {
  uint8_t error, i;
#ifdef CRC_ENA
  _crc = bitrev(_stat_reg & SR_MASK);  // Initialize CRC calculation
#endif
  startTransmission();
  if (cmd == TEMP)
    cmd = MEAS_TEMP;
  else
    cmd = MEAS_HUMI;
  if (error = putByte(cmd))
    return error;
#ifdef CRC_ENA
  calcCRC(cmd, &_crc);              // Include command byte in CRC calculation
#endif
  // If non-blocking, save pointer to result and return
  if (!block) {
    _presult = result;
    return 0;
  }
  // Otherwise, wait for measurement to complete with 720ms timeout
  i = 240;
  while (digitalRead(_pinData)) {
    i--;
    if (i == 0)
      return S_Err_TO;              // Error: Timeout
    delay(3);
  }
  error = getResult(result);
  return error;
}
コード例 #8
0
ファイル: main.c プロジェクト: xwaynec/eplab
/* permutes the array using a bit-reversal permutation */ 
void permute_bitrev(int n, int *A_re, int *A_im) 
{ 
	int idata i;
	int idata bri;
	int idata log2n;
	int idata t_re;
	int idata t_im;

	log2n = log_2(n); 

	for (i=0; i<n; i++)
	{
		bri  = bitrev(i, log2n);

		/* skip already swapped elements */
		if (bri <= i) continue;

		t_re = A_re[i];
		t_im = A_im[i];
		A_re[i]= A_re[bri];
		A_im[i]= A_im[bri];
		A_re[bri]= t_re;
		A_im[bri]= t_im;
	}  
} 
コード例 #9
0
ファイル: dct.c プロジェクト: faywong/watermarking
static void fct_noscale(double *f)
{
  scramble(f,N);
  fwd_butterflies(f);
  bitrev(f,N);
  fwd_sums(f);
  f[0] *= INVROOT2;
}
コード例 #10
0
ファイル: dct.c プロジェクト: faywong/watermarking
static void ifct_noscale(double *f)
{
  f[0] *= INVROOT2;
  inv_sums(f);
  bitrev(f,N);
  inv_butterflies(f);
  unscramble(f,N);
}
コード例 #11
0
ファイル: tree.c プロジェクト: llevin/sigproc
void taylor_flt(float outbuf[], int mlen, int nchn)
{
  float itemp;
  int   nsamp,npts,ndat1,nstages,nmem,nmem2,nsec1,nfin, i;
  int   istages,isec,ipair,ioff1,i1,i2,koff,ndelay,ndelay2;
  int   bitrev(int, int);

  /*  ======================================================================  */

  nsamp   = ((mlen/nchn) - (2*nchn));
  npts    = (nsamp + nchn);
  ndat1   = (nsamp + 2 * nchn);
  nstages = (int)(log((float)nchn) / 0.6931471 + 0.5);
  nmem    = 1;


  for (istages=0; istages<nstages; istages++) {
    nmem  *= 2;
    nsec1  = (nchn/nmem);
    nmem2  = (nmem - 2);

    for (isec=0; isec<nsec1; isec++) {
      ndelay = -1;
      koff   = (isec * nmem);

      for (ipair=0; ipair<(nmem2+1); ipair += 2) {
	ioff1   = (bitrev(ipair,istages+1)+koff) * ndat1;
	i2      = (bitrev(ipair+1,istages+1) + koff) * ndat1;
	ndelay++;
	ndelay2 = (ndelay + 1);
	nfin    = (npts + ioff1);
	for (i1=ioff1; i1<nfin; i1++) {
	  itemp      = (outbuf[i1] + outbuf[i2+ndelay]);
	  outbuf[i2] = (outbuf[i1] + outbuf[i2+ndelay2]);
	  outbuf[i1] = itemp;
	  i2++;
	}
      }
    }
  }

  return;
}
コード例 #12
0
ファイル: hwaddr.c プロジェクト: AhmadTux/DragonFlyBSD
int
main(void)
{
	int i;
	for (i = 0; i <= 0xFF; i++) {
		if ((i & 7) == 0)
			printf("/* 0x%02X */", i);
		printf(" 0x%02X,", bitrev(i));
		if ((i & 7) == 7)
			printf("\n");
	}
	return 0;
}
コード例 #13
0
ファイル: dct.c プロジェクト: faywong/watermarking
static void scramble(double *f,int len){
  int i,ii1,ii2;

  bitrev(f,len);
  bitrev(&f[0], len>>1);
  bitrev(&f[len>>1], len>>1);
  ii1=len-1;
  ii2=len>>1;
  for(i=0; i<(len>>2); i++){
    SWAP(f[ii1], f[ii2]);
    ii1--;
    ii2++;
  }
}
コード例 #14
0
ファイル: ee-fft.c プロジェクト: Sleepwalking/IB-EE-FFT
void eefft_fft(float* dst_re, float* dst_im, float* src_re, float* src_im,
    int power)
{
    int k, i;
    bitrev(src_re, src_im, power);
    
    switch(power)
    {
        defsprdx(1);
        defsprdx(2);
        defsprdx(3);
        loopcall(defsprdx);
        default:
        break;
    }
    memcpy(dst_re, buf_re, pow(2, power) * 4);
    memcpy(dst_im, buf_im, pow(2, power) * 4);
}
コード例 #15
0
ファイル: fftref.c プロジェクト: SirAbedi/streamit
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is
 *       done.  see that function for more details on why we treat 'i' as a
 *       (log2n-1) bit number.
 */
void compute_W(float *W_re, float *W_im)
{
  int i, br;

  for (i=0; i<(n/2); i++)
  {
    br = bitrev(i,log2n-1); 
    W_re[br] = cos(((float)i*2.0*M_PI)/((float)n));  
    W_im[br] = sin(((float)i*2.0*M_PI)/((float)n));  
  }
  #ifdef COMMENT_ONLY 
  for (i=0;i<(n/2);i++)
  { 
    br = i; //bitrev(i,log2n-1); 
    printf("(%g\t%g)\n", W_re[br], W_im[br]);
  }  
  #endif 
}
コード例 #16
0
ファイル: dit_niso.cpp プロジェクト: fparaggio/PuggleLinux
void FftDitNiso( complex *array,
                 int fft_size)
{
double trig_arg;
int log2_size;
complex twiddle;
complex temp;
int pts_in_lft_grp, pts_in_rgt_grp;
int stage, grp_pos, grp_cntr;
int top_node, bot_node;

log2_size = ilog2(fft_size); 

pts_in_rgt_grp = fft_size;
for( stage=1; stage <=log2_size; stage++)
  {   
   grp_cntr = -1;
   
   pts_in_lft_grp = pts_in_rgt_grp;  // set pts_in_left_dft = N/(2**(stage-1))
   pts_in_rgt_grp /= 2;               // set pts_in_right_dft = N/(2**stage)

   for( grp_pos =0; grp_pos < fft_size; grp_pos += pts_in_lft_grp)
     {
      grp_cntr++;
                
      trig_arg = (TWO_PI*bitrev(grp_cntr, log2_size-1))/fft_size;
      twiddle = complex(cos(trig_arg), -sin(trig_arg));
                                  
      for( top_node = grp_pos; top_node < grp_pos+pts_in_rgt_grp; 
                               top_node++)
        {                              
         bot_node = top_node + pts_in_rgt_grp;
         temp = array[bot_node] * twiddle;
         array[bot_node] = array[top_node] - temp;
         array[top_node] += temp;
        }  // end of loop over top_node
        
     } // end of loop over grp_pos
  } // end of loop over stage

ComplexBitReverse(array, fft_size);  

return;
}
コード例 #17
0
ファイル: driver.c プロジェクト: czlilac/SIMD-benchmarks
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done.
 *       see that function for more details on why we treat 'i' as a (log2n-1) bit number.
 */
void compute_W(int n, double *W_re, double *W_im)
{
  int i, br;
  int log2n = log_2(n);

  for (i=0; i<(n/2); i++)
  {
    br = bitrev(i,log2n-1); 
    W_re[br] = cos(((double)i*2.0*M_PI)/((double)n));  
    W_im[br] = sin(((double)i*2.0*M_PI)/((double)n));  
  }
  #ifdef COMMENT_ONLY 
  for (i=0;i<(n/2);i++)
  { 
    br = i; //bitrev(i,log2n-1); 
    printf("(%g\t%g)\n", W_re[br], W_im[br]);
  }  
  #endif 
}
コード例 #18
0
ファイル: main.c プロジェクト: xwaynec/eplab
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n)
 * n should be a power of 2
 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done.
 *       see that function for more details on why we treat 'i' as a (log2n-1) bit number.
 */
void compute_W(int idata n, int idata W_re[], int idata W_im[])
{
	int idata i;
	int idata br;
	int log2n = log_2(n);

	for (i=0; i<(n/2); i++)
	{
		br = bitrev(i,log2n-1); 
		W_re[br] = cos((i*2*3)/(n));  
		W_im[br] = sin((i*2*3)/(n));  
	}
//#ifdef COMMENT_ONLY 
//	for (i=0;i<(n/2);i++)
//	{ 
//		br = i; //bitrev(i,log2n-1); 
//		printf("(%g\t%g)\n", W_re[br], W_im[br]);
//	}  
//#endif 
}
コード例 #19
0
ファイル: Sensirion.cpp プロジェクト: shday/myarduino
// Get measurement result from sensor (plus CRC, if enabled)
uint8_t Sensirion::getResult(uint16_t *result) {
  uint8_t val;
#ifdef CRC_ENA
  val = getByte(ACK);
  calcCRC(val, &_crc);
  *result = val;
  val = getByte(ACK);
  calcCRC(val, &_crc);
  *result = (*result << 8) | val;
  val = getByte(noACK);
  val = bitrev(val);
  if (val != _crc) {
    *result = 0xFFFF;
    return S_Err_CRC;
  }
#else
  *result = getByte(ACK);
  *result = (*result << 8) | getByte(noACK);
#endif
  return 0;
}
コード例 #20
0
ファイル: driver.c プロジェクト: czlilac/SIMD-benchmarks
/* permutes the array using a bit-reversal permutation */ 
void permute_bitrev(int n, double *A_re, double *A_im) 
{ 
  int i, bri, log2n;
  double t_re, t_im;

  log2n = log_2(n); 
  
  for (i=0; i<n; i++)
  {
      bri = bitrev(i, log2n);

      /* skip already swapped elements */
      if (bri <= i) continue;

      t_re = A_re[i];
      t_im = A_im[i];
      A_re[i]= A_re[bri];
      A_im[i]= A_im[bri];
      A_re[bri]= t_re;
      A_im[bri]= t_im;
  }  
} 
コード例 #21
0
ファイル: bitrev.c プロジェクト: kangsu/BitReversal
void incachebitrev(float *X, float *Y, int nbits, int n, int lgn)
{
    int i, bitrevi;

/**** Printfs warning user that this bitreverse algorithm is not tuned *****/

    printf("This code is not at all tuned and not part of COBRA.\n");  
    printf("In fact this is a very naive implementation.\n");
    printf("We suggest putting in your own in-cache bit-reversal here ");
    printf("for robust performance.\n");
    printf("This code can be found in the bottom of file bitrev.c.\n");
    printf("These printfs can be removed with no ill effect.\n");

    Y[0] = X[0];
    Y[n-1] = X[n-1];
    for(i = 1; i < n-1; i++)
    {
        bitrevi = bitrev(i, lgn);
        if(bitrevi < i) continue;
        Y[i] = X[bitrevi];
        Y[bitrevi] = X[i];
    }
    return;
}
コード例 #22
0
ファイル: b512.c プロジェクト: melizalab/aplot
void bitrev512( float x[], int length )
{
	register float temp;	switch (length) {
	    case 512:
		temp= x[2]; x[2] = x[512]; x[512] = temp;
		temp= x[3]; x[3] = x[513]; x[513] = temp;
		temp= x[4]; x[4] = x[256]; x[256] = temp;
		temp= x[5]; x[5] = x[257]; x[257] = temp;
		temp= x[6]; x[6] = x[768]; x[768] = temp;
		temp= x[7]; x[7] = x[769]; x[769] = temp;
		temp= x[8]; x[8] = x[128]; x[128] = temp;
		temp= x[9]; x[9] = x[129]; x[129] = temp;
		temp= x[10]; x[10] = x[640]; x[640] = temp;
		temp= x[11]; x[11] = x[641]; x[641] = temp;
		temp= x[12]; x[12] = x[384]; x[384] = temp;
		temp= x[13]; x[13] = x[385]; x[385] = temp;
		temp= x[14]; x[14] = x[896]; x[896] = temp;
		temp= x[15]; x[15] = x[897]; x[897] = temp;
		temp= x[16]; x[16] = x[64]; x[64] = temp;
		temp= x[17]; x[17] = x[65]; x[65] = temp;
		temp= x[18]; x[18] = x[576]; x[576] = temp;
		temp= x[19]; x[19] = x[577]; x[577] = temp;
		temp= x[20]; x[20] = x[320]; x[320] = temp;
		temp= x[21]; x[21] = x[321]; x[321] = temp;
		temp= x[22]; x[22] = x[832]; x[832] = temp;
		temp= x[23]; x[23] = x[833]; x[833] = temp;
		temp= x[24]; x[24] = x[192]; x[192] = temp;
		temp= x[25]; x[25] = x[193]; x[193] = temp;
		temp= x[26]; x[26] = x[704]; x[704] = temp;
		temp= x[27]; x[27] = x[705]; x[705] = temp;
		temp= x[28]; x[28] = x[448]; x[448] = temp;
		temp= x[29]; x[29] = x[449]; x[449] = temp;
		temp= x[30]; x[30] = x[960]; x[960] = temp;
		temp= x[31]; x[31] = x[961]; x[961] = temp;
		temp= x[34]; x[34] = x[544]; x[544] = temp;
		temp= x[35]; x[35] = x[545]; x[545] = temp;
		temp= x[36]; x[36] = x[288]; x[288] = temp;
		temp= x[37]; x[37] = x[289]; x[289] = temp;
		temp= x[38]; x[38] = x[800]; x[800] = temp;
		temp= x[39]; x[39] = x[801]; x[801] = temp;
		temp= x[40]; x[40] = x[160]; x[160] = temp;
		temp= x[41]; x[41] = x[161]; x[161] = temp;
		temp= x[42]; x[42] = x[672]; x[672] = temp;
		temp= x[43]; x[43] = x[673]; x[673] = temp;
		temp= x[44]; x[44] = x[416]; x[416] = temp;
		temp= x[45]; x[45] = x[417]; x[417] = temp;
		temp= x[46]; x[46] = x[928]; x[928] = temp;
		temp= x[47]; x[47] = x[929]; x[929] = temp;
		temp= x[48]; x[48] = x[96]; x[96] = temp;
		temp= x[49]; x[49] = x[97]; x[97] = temp;
		temp= x[50]; x[50] = x[608]; x[608] = temp;
		temp= x[51]; x[51] = x[609]; x[609] = temp;
		temp= x[52]; x[52] = x[352]; x[352] = temp;
		temp= x[53]; x[53] = x[353]; x[353] = temp;
		temp= x[54]; x[54] = x[864]; x[864] = temp;
		temp= x[55]; x[55] = x[865]; x[865] = temp;
		temp= x[56]; x[56] = x[224]; x[224] = temp;
		temp= x[57]; x[57] = x[225]; x[225] = temp;
		temp= x[58]; x[58] = x[736]; x[736] = temp;
		temp= x[59]; x[59] = x[737]; x[737] = temp;
		temp= x[60]; x[60] = x[480]; x[480] = temp;
		temp= x[61]; x[61] = x[481]; x[481] = temp;
		temp= x[62]; x[62] = x[992]; x[992] = temp;
		temp= x[63]; x[63] = x[993]; x[993] = temp;
		temp= x[66]; x[66] = x[528]; x[528] = temp;
		temp= x[67]; x[67] = x[529]; x[529] = temp;
		temp= x[68]; x[68] = x[272]; x[272] = temp;
		temp= x[69]; x[69] = x[273]; x[273] = temp;
		temp= x[70]; x[70] = x[784]; x[784] = temp;
		temp= x[71]; x[71] = x[785]; x[785] = temp;
		temp= x[72]; x[72] = x[144]; x[144] = temp;
		temp= x[73]; x[73] = x[145]; x[145] = temp;
		temp= x[74]; x[74] = x[656]; x[656] = temp;
		temp= x[75]; x[75] = x[657]; x[657] = temp;
		temp= x[76]; x[76] = x[400]; x[400] = temp;
		temp= x[77]; x[77] = x[401]; x[401] = temp;
		temp= x[78]; x[78] = x[912]; x[912] = temp;
		temp= x[79]; x[79] = x[913]; x[913] = temp;
		temp= x[82]; x[82] = x[592]; x[592] = temp;
		temp= x[83]; x[83] = x[593]; x[593] = temp;
		temp= x[84]; x[84] = x[336]; x[336] = temp;
		temp= x[85]; x[85] = x[337]; x[337] = temp;
		temp= x[86]; x[86] = x[848]; x[848] = temp;
		temp= x[87]; x[87] = x[849]; x[849] = temp;
		temp= x[88]; x[88] = x[208]; x[208] = temp;
		temp= x[89]; x[89] = x[209]; x[209] = temp;
		temp= x[90]; x[90] = x[720]; x[720] = temp;
		temp= x[91]; x[91] = x[721]; x[721] = temp;
		temp= x[92]; x[92] = x[464]; x[464] = temp;
		temp= x[93]; x[93] = x[465]; x[465] = temp;
		temp= x[94]; x[94] = x[976]; x[976] = temp;
		temp= x[95]; x[95] = x[977]; x[977] = temp;
		temp= x[98]; x[98] = x[560]; x[560] = temp;
		temp= x[99]; x[99] = x[561]; x[561] = temp;
		temp= x[100]; x[100] = x[304]; x[304] = temp;
		temp= x[101]; x[101] = x[305]; x[305] = temp;
		temp= x[102]; x[102] = x[816]; x[816] = temp;
		temp= x[103]; x[103] = x[817]; x[817] = temp;
		temp= x[104]; x[104] = x[176]; x[176] = temp;
		temp= x[105]; x[105] = x[177]; x[177] = temp;
		temp= x[106]; x[106] = x[688]; x[688] = temp;
		temp= x[107]; x[107] = x[689]; x[689] = temp;
		temp= x[108]; x[108] = x[432]; x[432] = temp;
		temp= x[109]; x[109] = x[433]; x[433] = temp;
		temp= x[110]; x[110] = x[944]; x[944] = temp;
		temp= x[111]; x[111] = x[945]; x[945] = temp;
		temp= x[114]; x[114] = x[624]; x[624] = temp;
		temp= x[115]; x[115] = x[625]; x[625] = temp;
		temp= x[116]; x[116] = x[368]; x[368] = temp;
		temp= x[117]; x[117] = x[369]; x[369] = temp;
		temp= x[118]; x[118] = x[880]; x[880] = temp;
		temp= x[119]; x[119] = x[881]; x[881] = temp;
		temp= x[120]; x[120] = x[240]; x[240] = temp;
		temp= x[121]; x[121] = x[241]; x[241] = temp;
		temp= x[122]; x[122] = x[752]; x[752] = temp;
		temp= x[123]; x[123] = x[753]; x[753] = temp;
		temp= x[124]; x[124] = x[496]; x[496] = temp;
		temp= x[125]; x[125] = x[497]; x[497] = temp;
		temp= x[126]; x[126] = x[1008]; x[1008] = temp;
		temp= x[127]; x[127] = x[1009]; x[1009] = temp;
		temp= x[130]; x[130] = x[520]; x[520] = temp;
		temp= x[131]; x[131] = x[521]; x[521] = temp;
		temp= x[132]; x[132] = x[264]; x[264] = temp;
		temp= x[133]; x[133] = x[265]; x[265] = temp;
		temp= x[134]; x[134] = x[776]; x[776] = temp;
		temp= x[135]; x[135] = x[777]; x[777] = temp;
		temp= x[138]; x[138] = x[648]; x[648] = temp;
		temp= x[139]; x[139] = x[649]; x[649] = temp;
		temp= x[140]; x[140] = x[392]; x[392] = temp;
		temp= x[141]; x[141] = x[393]; x[393] = temp;
		temp= x[142]; x[142] = x[904]; x[904] = temp;
		temp= x[143]; x[143] = x[905]; x[905] = temp;
		temp= x[146]; x[146] = x[584]; x[584] = temp;
		temp= x[147]; x[147] = x[585]; x[585] = temp;
		temp= x[148]; x[148] = x[328]; x[328] = temp;
		temp= x[149]; x[149] = x[329]; x[329] = temp;
		temp= x[150]; x[150] = x[840]; x[840] = temp;
		temp= x[151]; x[151] = x[841]; x[841] = temp;
		temp= x[152]; x[152] = x[200]; x[200] = temp;
		temp= x[153]; x[153] = x[201]; x[201] = temp;
		temp= x[154]; x[154] = x[712]; x[712] = temp;
		temp= x[155]; x[155] = x[713]; x[713] = temp;
		temp= x[156]; x[156] = x[456]; x[456] = temp;
		temp= x[157]; x[157] = x[457]; x[457] = temp;
		temp= x[158]; x[158] = x[968]; x[968] = temp;
		temp= x[159]; x[159] = x[969]; x[969] = temp;
		temp= x[162]; x[162] = x[552]; x[552] = temp;
		temp= x[163]; x[163] = x[553]; x[553] = temp;
		temp= x[164]; x[164] = x[296]; x[296] = temp;
		temp= x[165]; x[165] = x[297]; x[297] = temp;
		temp= x[166]; x[166] = x[808]; x[808] = temp;
		temp= x[167]; x[167] = x[809]; x[809] = temp;
		temp= x[170]; x[170] = x[680]; x[680] = temp;
		temp= x[171]; x[171] = x[681]; x[681] = temp;
		temp= x[172]; x[172] = x[424]; x[424] = temp;
		temp= x[173]; x[173] = x[425]; x[425] = temp;
		temp= x[174]; x[174] = x[936]; x[936] = temp;
		temp= x[175]; x[175] = x[937]; x[937] = temp;
		temp= x[178]; x[178] = x[616]; x[616] = temp;
		temp= x[179]; x[179] = x[617]; x[617] = temp;
		temp= x[180]; x[180] = x[360]; x[360] = temp;
		temp= x[181]; x[181] = x[361]; x[361] = temp;
		temp= x[182]; x[182] = x[872]; x[872] = temp;
		temp= x[183]; x[183] = x[873]; x[873] = temp;
		temp= x[184]; x[184] = x[232]; x[232] = temp;
		temp= x[185]; x[185] = x[233]; x[233] = temp;
		temp= x[186]; x[186] = x[744]; x[744] = temp;
		temp= x[187]; x[187] = x[745]; x[745] = temp;
		temp= x[188]; x[188] = x[488]; x[488] = temp;
		temp= x[189]; x[189] = x[489]; x[489] = temp;
		temp= x[190]; x[190] = x[1000]; x[1000] = temp;
		temp= x[191]; x[191] = x[1001]; x[1001] = temp;
		temp= x[194]; x[194] = x[536]; x[536] = temp;
		temp= x[195]; x[195] = x[537]; x[537] = temp;
		temp= x[196]; x[196] = x[280]; x[280] = temp;
		temp= x[197]; x[197] = x[281]; x[281] = temp;
		temp= x[198]; x[198] = x[792]; x[792] = temp;
		temp= x[199]; x[199] = x[793]; x[793] = temp;
		temp= x[202]; x[202] = x[664]; x[664] = temp;
		temp= x[203]; x[203] = x[665]; x[665] = temp;
		temp= x[204]; x[204] = x[408]; x[408] = temp;
		temp= x[205]; x[205] = x[409]; x[409] = temp;
		temp= x[206]; x[206] = x[920]; x[920] = temp;
		temp= x[207]; x[207] = x[921]; x[921] = temp;
		temp= x[210]; x[210] = x[600]; x[600] = temp;
		temp= x[211]; x[211] = x[601]; x[601] = temp;
		temp= x[212]; x[212] = x[344]; x[344] = temp;
		temp= x[213]; x[213] = x[345]; x[345] = temp;
		temp= x[214]; x[214] = x[856]; x[856] = temp;
		temp= x[215]; x[215] = x[857]; x[857] = temp;
		temp= x[218]; x[218] = x[728]; x[728] = temp;
		temp= x[219]; x[219] = x[729]; x[729] = temp;
		temp= x[220]; x[220] = x[472]; x[472] = temp;
		temp= x[221]; x[221] = x[473]; x[473] = temp;
		temp= x[222]; x[222] = x[984]; x[984] = temp;
		temp= x[223]; x[223] = x[985]; x[985] = temp;
		temp= x[226]; x[226] = x[568]; x[568] = temp;
		temp= x[227]; x[227] = x[569]; x[569] = temp;
		temp= x[228]; x[228] = x[312]; x[312] = temp;
		temp= x[229]; x[229] = x[313]; x[313] = temp;
		temp= x[230]; x[230] = x[824]; x[824] = temp;
		temp= x[231]; x[231] = x[825]; x[825] = temp;
		temp= x[234]; x[234] = x[696]; x[696] = temp;
		temp= x[235]; x[235] = x[697]; x[697] = temp;
		temp= x[236]; x[236] = x[440]; x[440] = temp;
		temp= x[237]; x[237] = x[441]; x[441] = temp;
		temp= x[238]; x[238] = x[952]; x[952] = temp;
		temp= x[239]; x[239] = x[953]; x[953] = temp;
		temp= x[242]; x[242] = x[632]; x[632] = temp;
		temp= x[243]; x[243] = x[633]; x[633] = temp;
		temp= x[244]; x[244] = x[376]; x[376] = temp;
		temp= x[245]; x[245] = x[377]; x[377] = temp;
		temp= x[246]; x[246] = x[888]; x[888] = temp;
		temp= x[247]; x[247] = x[889]; x[889] = temp;
		temp= x[250]; x[250] = x[760]; x[760] = temp;
		temp= x[251]; x[251] = x[761]; x[761] = temp;
		temp= x[252]; x[252] = x[504]; x[504] = temp;
		temp= x[253]; x[253] = x[505]; x[505] = temp;
		temp= x[254]; x[254] = x[1016]; x[1016] = temp;
		temp= x[255]; x[255] = x[1017]; x[1017] = temp;
		temp= x[258]; x[258] = x[516]; x[516] = temp;
		temp= x[259]; x[259] = x[517]; x[517] = temp;
		temp= x[262]; x[262] = x[772]; x[772] = temp;
		temp= x[263]; x[263] = x[773]; x[773] = temp;
		temp= x[266]; x[266] = x[644]; x[644] = temp;
		temp= x[267]; x[267] = x[645]; x[645] = temp;
		temp= x[268]; x[268] = x[388]; x[388] = temp;
		temp= x[269]; x[269] = x[389]; x[389] = temp;
		temp= x[270]; x[270] = x[900]; x[900] = temp;
		temp= x[271]; x[271] = x[901]; x[901] = temp;
		temp= x[274]; x[274] = x[580]; x[580] = temp;
		temp= x[275]; x[275] = x[581]; x[581] = temp;
		temp= x[276]; x[276] = x[324]; x[324] = temp;
		temp= x[277]; x[277] = x[325]; x[325] = temp;
		temp= x[278]; x[278] = x[836]; x[836] = temp;
		temp= x[279]; x[279] = x[837]; x[837] = temp;
		temp= x[282]; x[282] = x[708]; x[708] = temp;
		temp= x[283]; x[283] = x[709]; x[709] = temp;
		temp= x[284]; x[284] = x[452]; x[452] = temp;
		temp= x[285]; x[285] = x[453]; x[453] = temp;
		temp= x[286]; x[286] = x[964]; x[964] = temp;
		temp= x[287]; x[287] = x[965]; x[965] = temp;
		temp= x[290]; x[290] = x[548]; x[548] = temp;
		temp= x[291]; x[291] = x[549]; x[549] = temp;
		temp= x[294]; x[294] = x[804]; x[804] = temp;
		temp= x[295]; x[295] = x[805]; x[805] = temp;
		temp= x[298]; x[298] = x[676]; x[676] = temp;
		temp= x[299]; x[299] = x[677]; x[677] = temp;
		temp= x[300]; x[300] = x[420]; x[420] = temp;
		temp= x[301]; x[301] = x[421]; x[421] = temp;
		temp= x[302]; x[302] = x[932]; x[932] = temp;
		temp= x[303]; x[303] = x[933]; x[933] = temp;
		temp= x[306]; x[306] = x[612]; x[612] = temp;
		temp= x[307]; x[307] = x[613]; x[613] = temp;
		temp= x[308]; x[308] = x[356]; x[356] = temp;
		temp= x[309]; x[309] = x[357]; x[357] = temp;
		temp= x[310]; x[310] = x[868]; x[868] = temp;
		temp= x[311]; x[311] = x[869]; x[869] = temp;
		temp= x[314]; x[314] = x[740]; x[740] = temp;
		temp= x[315]; x[315] = x[741]; x[741] = temp;
		temp= x[316]; x[316] = x[484]; x[484] = temp;
		temp= x[317]; x[317] = x[485]; x[485] = temp;
		temp= x[318]; x[318] = x[996]; x[996] = temp;
		temp= x[319]; x[319] = x[997]; x[997] = temp;
		temp= x[322]; x[322] = x[532]; x[532] = temp;
		temp= x[323]; x[323] = x[533]; x[533] = temp;
		temp= x[326]; x[326] = x[788]; x[788] = temp;
		temp= x[327]; x[327] = x[789]; x[789] = temp;
		temp= x[330]; x[330] = x[660]; x[660] = temp;
		temp= x[331]; x[331] = x[661]; x[661] = temp;
		temp= x[332]; x[332] = x[404]; x[404] = temp;
		temp= x[333]; x[333] = x[405]; x[405] = temp;
		temp= x[334]; x[334] = x[916]; x[916] = temp;
		temp= x[335]; x[335] = x[917]; x[917] = temp;
		temp= x[338]; x[338] = x[596]; x[596] = temp;
		temp= x[339]; x[339] = x[597]; x[597] = temp;
		temp= x[342]; x[342] = x[852]; x[852] = temp;
		temp= x[343]; x[343] = x[853]; x[853] = temp;
		temp= x[346]; x[346] = x[724]; x[724] = temp;
		temp= x[347]; x[347] = x[725]; x[725] = temp;
		temp= x[348]; x[348] = x[468]; x[468] = temp;
		temp= x[349]; x[349] = x[469]; x[469] = temp;
		temp= x[350]; x[350] = x[980]; x[980] = temp;
		temp= x[351]; x[351] = x[981]; x[981] = temp;
		temp= x[354]; x[354] = x[564]; x[564] = temp;
		temp= x[355]; x[355] = x[565]; x[565] = temp;
		temp= x[358]; x[358] = x[820]; x[820] = temp;
		temp= x[359]; x[359] = x[821]; x[821] = temp;
		temp= x[362]; x[362] = x[692]; x[692] = temp;
		temp= x[363]; x[363] = x[693]; x[693] = temp;
		temp= x[364]; x[364] = x[436]; x[436] = temp;
		temp= x[365]; x[365] = x[437]; x[437] = temp;
		temp= x[366]; x[366] = x[948]; x[948] = temp;
		temp= x[367]; x[367] = x[949]; x[949] = temp;
		temp= x[370]; x[370] = x[628]; x[628] = temp;
		temp= x[371]; x[371] = x[629]; x[629] = temp;
		temp= x[374]; x[374] = x[884]; x[884] = temp;
		temp= x[375]; x[375] = x[885]; x[885] = temp;
		temp= x[378]; x[378] = x[756]; x[756] = temp;
		temp= x[379]; x[379] = x[757]; x[757] = temp;
		temp= x[380]; x[380] = x[500]; x[500] = temp;
		temp= x[381]; x[381] = x[501]; x[501] = temp;
		temp= x[382]; x[382] = x[1012]; x[1012] = temp;
		temp= x[383]; x[383] = x[1013]; x[1013] = temp;
		temp= x[386]; x[386] = x[524]; x[524] = temp;
		temp= x[387]; x[387] = x[525]; x[525] = temp;
		temp= x[390]; x[390] = x[780]; x[780] = temp;
		temp= x[391]; x[391] = x[781]; x[781] = temp;
		temp= x[394]; x[394] = x[652]; x[652] = temp;
		temp= x[395]; x[395] = x[653]; x[653] = temp;
		temp= x[398]; x[398] = x[908]; x[908] = temp;
		temp= x[399]; x[399] = x[909]; x[909] = temp;
		temp= x[402]; x[402] = x[588]; x[588] = temp;
		temp= x[403]; x[403] = x[589]; x[589] = temp;
		temp= x[406]; x[406] = x[844]; x[844] = temp;
		temp= x[407]; x[407] = x[845]; x[845] = temp;
		temp= x[410]; x[410] = x[716]; x[716] = temp;
		temp= x[411]; x[411] = x[717]; x[717] = temp;
		temp= x[412]; x[412] = x[460]; x[460] = temp;
		temp= x[413]; x[413] = x[461]; x[461] = temp;
		temp= x[414]; x[414] = x[972]; x[972] = temp;
		temp= x[415]; x[415] = x[973]; x[973] = temp;
		temp= x[418]; x[418] = x[556]; x[556] = temp;
		temp= x[419]; x[419] = x[557]; x[557] = temp;
		temp= x[422]; x[422] = x[812]; x[812] = temp;
		temp= x[423]; x[423] = x[813]; x[813] = temp;
		temp= x[426]; x[426] = x[684]; x[684] = temp;
		temp= x[427]; x[427] = x[685]; x[685] = temp;
		temp= x[430]; x[430] = x[940]; x[940] = temp;
		temp= x[431]; x[431] = x[941]; x[941] = temp;
		temp= x[434]; x[434] = x[620]; x[620] = temp;
		temp= x[435]; x[435] = x[621]; x[621] = temp;
		temp= x[438]; x[438] = x[876]; x[876] = temp;
		temp= x[439]; x[439] = x[877]; x[877] = temp;
		temp= x[442]; x[442] = x[748]; x[748] = temp;
		temp= x[443]; x[443] = x[749]; x[749] = temp;
		temp= x[444]; x[444] = x[492]; x[492] = temp;
		temp= x[445]; x[445] = x[493]; x[493] = temp;
		temp= x[446]; x[446] = x[1004]; x[1004] = temp;
		temp= x[447]; x[447] = x[1005]; x[1005] = temp;
		temp= x[450]; x[450] = x[540]; x[540] = temp;
		temp= x[451]; x[451] = x[541]; x[541] = temp;
		temp= x[454]; x[454] = x[796]; x[796] = temp;
		temp= x[455]; x[455] = x[797]; x[797] = temp;
		temp= x[458]; x[458] = x[668]; x[668] = temp;
		temp= x[459]; x[459] = x[669]; x[669] = temp;
		temp= x[462]; x[462] = x[924]; x[924] = temp;
		temp= x[463]; x[463] = x[925]; x[925] = temp;
		temp= x[466]; x[466] = x[604]; x[604] = temp;
		temp= x[467]; x[467] = x[605]; x[605] = temp;
		temp= x[470]; x[470] = x[860]; x[860] = temp;
		temp= x[471]; x[471] = x[861]; x[861] = temp;
		temp= x[474]; x[474] = x[732]; x[732] = temp;
		temp= x[475]; x[475] = x[733]; x[733] = temp;
		temp= x[478]; x[478] = x[988]; x[988] = temp;
		temp= x[479]; x[479] = x[989]; x[989] = temp;
		temp= x[482]; x[482] = x[572]; x[572] = temp;
		temp= x[483]; x[483] = x[573]; x[573] = temp;
		temp= x[486]; x[486] = x[828]; x[828] = temp;
		temp= x[487]; x[487] = x[829]; x[829] = temp;
		temp= x[490]; x[490] = x[700]; x[700] = temp;
		temp= x[491]; x[491] = x[701]; x[701] = temp;
		temp= x[494]; x[494] = x[956]; x[956] = temp;
		temp= x[495]; x[495] = x[957]; x[957] = temp;
		temp= x[498]; x[498] = x[636]; x[636] = temp;
		temp= x[499]; x[499] = x[637]; x[637] = temp;
		temp= x[502]; x[502] = x[892]; x[892] = temp;
		temp= x[503]; x[503] = x[893]; x[893] = temp;
		temp= x[506]; x[506] = x[764]; x[764] = temp;
		temp= x[507]; x[507] = x[765]; x[765] = temp;
		temp= x[510]; x[510] = x[1020]; x[1020] = temp;
		temp= x[511]; x[511] = x[1021]; x[1021] = temp;
		temp= x[518]; x[518] = x[770]; x[770] = temp;
		temp= x[519]; x[519] = x[771]; x[771] = temp;
		temp= x[522]; x[522] = x[642]; x[642] = temp;
		temp= x[523]; x[523] = x[643]; x[643] = temp;
		temp= x[526]; x[526] = x[898]; x[898] = temp;
		temp= x[527]; x[527] = x[899]; x[899] = temp;
		temp= x[530]; x[530] = x[578]; x[578] = temp;
		temp= x[531]; x[531] = x[579]; x[579] = temp;
		temp= x[534]; x[534] = x[834]; x[834] = temp;
		temp= x[535]; x[535] = x[835]; x[835] = temp;
		temp= x[538]; x[538] = x[706]; x[706] = temp;
		temp= x[539]; x[539] = x[707]; x[707] = temp;
		temp= x[542]; x[542] = x[962]; x[962] = temp;
		temp= x[543]; x[543] = x[963]; x[963] = temp;
		temp= x[550]; x[550] = x[802]; x[802] = temp;
		temp= x[551]; x[551] = x[803]; x[803] = temp;
		temp= x[554]; x[554] = x[674]; x[674] = temp;
		temp= x[555]; x[555] = x[675]; x[675] = temp;
		temp= x[558]; x[558] = x[930]; x[930] = temp;
		temp= x[559]; x[559] = x[931]; x[931] = temp;
		temp= x[562]; x[562] = x[610]; x[610] = temp;
		temp= x[563]; x[563] = x[611]; x[611] = temp;
		temp= x[566]; x[566] = x[866]; x[866] = temp;
		temp= x[567]; x[567] = x[867]; x[867] = temp;
		temp= x[570]; x[570] = x[738]; x[738] = temp;
		temp= x[571]; x[571] = x[739]; x[739] = temp;
		temp= x[574]; x[574] = x[994]; x[994] = temp;
		temp= x[575]; x[575] = x[995]; x[995] = temp;
		temp= x[582]; x[582] = x[786]; x[786] = temp;
		temp= x[583]; x[583] = x[787]; x[787] = temp;
		temp= x[586]; x[586] = x[658]; x[658] = temp;
		temp= x[587]; x[587] = x[659]; x[659] = temp;
		temp= x[590]; x[590] = x[914]; x[914] = temp;
		temp= x[591]; x[591] = x[915]; x[915] = temp;
		temp= x[598]; x[598] = x[850]; x[850] = temp;
		temp= x[599]; x[599] = x[851]; x[851] = temp;
		temp= x[602]; x[602] = x[722]; x[722] = temp;
		temp= x[603]; x[603] = x[723]; x[723] = temp;
		temp= x[606]; x[606] = x[978]; x[978] = temp;
		temp= x[607]; x[607] = x[979]; x[979] = temp;
		temp= x[614]; x[614] = x[818]; x[818] = temp;
		temp= x[615]; x[615] = x[819]; x[819] = temp;
		temp= x[618]; x[618] = x[690]; x[690] = temp;
		temp= x[619]; x[619] = x[691]; x[691] = temp;
		temp= x[622]; x[622] = x[946]; x[946] = temp;
		temp= x[623]; x[623] = x[947]; x[947] = temp;
		temp= x[630]; x[630] = x[882]; x[882] = temp;
		temp= x[631]; x[631] = x[883]; x[883] = temp;
		temp= x[634]; x[634] = x[754]; x[754] = temp;
		temp= x[635]; x[635] = x[755]; x[755] = temp;
		temp= x[638]; x[638] = x[1010]; x[1010] = temp;
		temp= x[639]; x[639] = x[1011]; x[1011] = temp;
		temp= x[646]; x[646] = x[778]; x[778] = temp;
		temp= x[647]; x[647] = x[779]; x[779] = temp;
		temp= x[654]; x[654] = x[906]; x[906] = temp;
		temp= x[655]; x[655] = x[907]; x[907] = temp;
		temp= x[662]; x[662] = x[842]; x[842] = temp;
		temp= x[663]; x[663] = x[843]; x[843] = temp;
		temp= x[666]; x[666] = x[714]; x[714] = temp;
		temp= x[667]; x[667] = x[715]; x[715] = temp;
		temp= x[670]; x[670] = x[970]; x[970] = temp;
		temp= x[671]; x[671] = x[971]; x[971] = temp;
		temp= x[678]; x[678] = x[810]; x[810] = temp;
		temp= x[679]; x[679] = x[811]; x[811] = temp;
		temp= x[686]; x[686] = x[938]; x[938] = temp;
		temp= x[687]; x[687] = x[939]; x[939] = temp;
		temp= x[694]; x[694] = x[874]; x[874] = temp;
		temp= x[695]; x[695] = x[875]; x[875] = temp;
		temp= x[698]; x[698] = x[746]; x[746] = temp;
		temp= x[699]; x[699] = x[747]; x[747] = temp;
		temp= x[702]; x[702] = x[1002]; x[1002] = temp;
		temp= x[703]; x[703] = x[1003]; x[1003] = temp;
		temp= x[710]; x[710] = x[794]; x[794] = temp;
		temp= x[711]; x[711] = x[795]; x[795] = temp;
		temp= x[718]; x[718] = x[922]; x[922] = temp;
		temp= x[719]; x[719] = x[923]; x[923] = temp;
		temp= x[726]; x[726] = x[858]; x[858] = temp;
		temp= x[727]; x[727] = x[859]; x[859] = temp;
		temp= x[734]; x[734] = x[986]; x[986] = temp;
		temp= x[735]; x[735] = x[987]; x[987] = temp;
		temp= x[742]; x[742] = x[826]; x[826] = temp;
		temp= x[743]; x[743] = x[827]; x[827] = temp;
		temp= x[750]; x[750] = x[954]; x[954] = temp;
		temp= x[751]; x[751] = x[955]; x[955] = temp;
		temp= x[758]; x[758] = x[890]; x[890] = temp;
		temp= x[759]; x[759] = x[891]; x[891] = temp;
		temp= x[766]; x[766] = x[1018]; x[1018] = temp;
		temp= x[767]; x[767] = x[1019]; x[1019] = temp;
		temp= x[782]; x[782] = x[902]; x[902] = temp;
		temp= x[783]; x[783] = x[903]; x[903] = temp;
		temp= x[790]; x[790] = x[838]; x[838] = temp;
		temp= x[791]; x[791] = x[839]; x[839] = temp;
		temp= x[798]; x[798] = x[966]; x[966] = temp;
		temp= x[799]; x[799] = x[967]; x[967] = temp;
		temp= x[814]; x[814] = x[934]; x[934] = temp;
		temp= x[815]; x[815] = x[935]; x[935] = temp;
		temp= x[822]; x[822] = x[870]; x[870] = temp;
		temp= x[823]; x[823] = x[871]; x[871] = temp;
		temp= x[830]; x[830] = x[998]; x[998] = temp;
		temp= x[831]; x[831] = x[999]; x[999] = temp;
		temp= x[846]; x[846] = x[918]; x[918] = temp;
		temp= x[847]; x[847] = x[919]; x[919] = temp;
		temp= x[862]; x[862] = x[982]; x[982] = temp;
		temp= x[863]; x[863] = x[983]; x[983] = temp;
		temp= x[878]; x[878] = x[950]; x[950] = temp;
		temp= x[879]; x[879] = x[951]; x[951] = temp;
		temp= x[894]; x[894] = x[1014]; x[1014] = temp;
		temp= x[895]; x[895] = x[1015]; x[1015] = temp;
		temp= x[926]; x[926] = x[974]; x[974] = temp;
		temp= x[927]; x[927] = x[975]; x[975] = temp;
		temp= x[958]; x[958] = x[1006]; x[1006] = temp;
		temp= x[959]; x[959] = x[1007]; x[1007] = temp;
		break;
	    case 256:
		temp= x[2]; x[2] = x[256]; x[256] = temp;
		temp= x[3]; x[3] = x[257]; x[257] = temp;
		temp= x[4]; x[4] = x[128]; x[128] = temp;
		temp= x[5]; x[5] = x[129]; x[129] = temp;
		temp= x[6]; x[6] = x[384]; x[384] = temp;
		temp= x[7]; x[7] = x[385]; x[385] = temp;
		temp= x[8]; x[8] = x[64]; x[64] = temp;
		temp= x[9]; x[9] = x[65]; x[65] = temp;
		temp= x[10]; x[10] = x[320]; x[320] = temp;
		temp= x[11]; x[11] = x[321]; x[321] = temp;
		temp= x[12]; x[12] = x[192]; x[192] = temp;
		temp= x[13]; x[13] = x[193]; x[193] = temp;
		temp= x[14]; x[14] = x[448]; x[448] = temp;
		temp= x[15]; x[15] = x[449]; x[449] = temp;
		temp= x[16]; x[16] = x[32]; x[32] = temp;
		temp= x[17]; x[17] = x[33]; x[33] = temp;
		temp= x[18]; x[18] = x[288]; x[288] = temp;
		temp= x[19]; x[19] = x[289]; x[289] = temp;
		temp= x[20]; x[20] = x[160]; x[160] = temp;
		temp= x[21]; x[21] = x[161]; x[161] = temp;
		temp= x[22]; x[22] = x[416]; x[416] = temp;
		temp= x[23]; x[23] = x[417]; x[417] = temp;
		temp= x[24]; x[24] = x[96]; x[96] = temp;
		temp= x[25]; x[25] = x[97]; x[97] = temp;
		temp= x[26]; x[26] = x[352]; x[352] = temp;
		temp= x[27]; x[27] = x[353]; x[353] = temp;
		temp= x[28]; x[28] = x[224]; x[224] = temp;
		temp= x[29]; x[29] = x[225]; x[225] = temp;
		temp= x[30]; x[30] = x[480]; x[480] = temp;
		temp= x[31]; x[31] = x[481]; x[481] = temp;
		temp= x[34]; x[34] = x[272]; x[272] = temp;
		temp= x[35]; x[35] = x[273]; x[273] = temp;
		temp= x[36]; x[36] = x[144]; x[144] = temp;
		temp= x[37]; x[37] = x[145]; x[145] = temp;
		temp= x[38]; x[38] = x[400]; x[400] = temp;
		temp= x[39]; x[39] = x[401]; x[401] = temp;
		temp= x[40]; x[40] = x[80]; x[80] = temp;
		temp= x[41]; x[41] = x[81]; x[81] = temp;
		temp= x[42]; x[42] = x[336]; x[336] = temp;
		temp= x[43]; x[43] = x[337]; x[337] = temp;
		temp= x[44]; x[44] = x[208]; x[208] = temp;
		temp= x[45]; x[45] = x[209]; x[209] = temp;
		temp= x[46]; x[46] = x[464]; x[464] = temp;
		temp= x[47]; x[47] = x[465]; x[465] = temp;
		temp= x[50]; x[50] = x[304]; x[304] = temp;
		temp= x[51]; x[51] = x[305]; x[305] = temp;
		temp= x[52]; x[52] = x[176]; x[176] = temp;
		temp= x[53]; x[53] = x[177]; x[177] = temp;
		temp= x[54]; x[54] = x[432]; x[432] = temp;
		temp= x[55]; x[55] = x[433]; x[433] = temp;
		temp= x[56]; x[56] = x[112]; x[112] = temp;
		temp= x[57]; x[57] = x[113]; x[113] = temp;
		temp= x[58]; x[58] = x[368]; x[368] = temp;
		temp= x[59]; x[59] = x[369]; x[369] = temp;
		temp= x[60]; x[60] = x[240]; x[240] = temp;
		temp= x[61]; x[61] = x[241]; x[241] = temp;
		temp= x[62]; x[62] = x[496]; x[496] = temp;
		temp= x[63]; x[63] = x[497]; x[497] = temp;
		temp= x[66]; x[66] = x[264]; x[264] = temp;
		temp= x[67]; x[67] = x[265]; x[265] = temp;
		temp= x[68]; x[68] = x[136]; x[136] = temp;
		temp= x[69]; x[69] = x[137]; x[137] = temp;
		temp= x[70]; x[70] = x[392]; x[392] = temp;
		temp= x[71]; x[71] = x[393]; x[393] = temp;
		temp= x[74]; x[74] = x[328]; x[328] = temp;
		temp= x[75]; x[75] = x[329]; x[329] = temp;
		temp= x[76]; x[76] = x[200]; x[200] = temp;
		temp= x[77]; x[77] = x[201]; x[201] = temp;
		temp= x[78]; x[78] = x[456]; x[456] = temp;
		temp= x[79]; x[79] = x[457]; x[457] = temp;
		temp= x[82]; x[82] = x[296]; x[296] = temp;
		temp= x[83]; x[83] = x[297]; x[297] = temp;
		temp= x[84]; x[84] = x[168]; x[168] = temp;
		temp= x[85]; x[85] = x[169]; x[169] = temp;
		temp= x[86]; x[86] = x[424]; x[424] = temp;
		temp= x[87]; x[87] = x[425]; x[425] = temp;
		temp= x[88]; x[88] = x[104]; x[104] = temp;
		temp= x[89]; x[89] = x[105]; x[105] = temp;
		temp= x[90]; x[90] = x[360]; x[360] = temp;
		temp= x[91]; x[91] = x[361]; x[361] = temp;
		temp= x[92]; x[92] = x[232]; x[232] = temp;
		temp= x[93]; x[93] = x[233]; x[233] = temp;
		temp= x[94]; x[94] = x[488]; x[488] = temp;
		temp= x[95]; x[95] = x[489]; x[489] = temp;
		temp= x[98]; x[98] = x[280]; x[280] = temp;
		temp= x[99]; x[99] = x[281]; x[281] = temp;
		temp= x[100]; x[100] = x[152]; x[152] = temp;
		temp= x[101]; x[101] = x[153]; x[153] = temp;
		temp= x[102]; x[102] = x[408]; x[408] = temp;
		temp= x[103]; x[103] = x[409]; x[409] = temp;
		temp= x[106]; x[106] = x[344]; x[344] = temp;
		temp= x[107]; x[107] = x[345]; x[345] = temp;
		temp= x[108]; x[108] = x[216]; x[216] = temp;
		temp= x[109]; x[109] = x[217]; x[217] = temp;
		temp= x[110]; x[110] = x[472]; x[472] = temp;
		temp= x[111]; x[111] = x[473]; x[473] = temp;
		temp= x[114]; x[114] = x[312]; x[312] = temp;
		temp= x[115]; x[115] = x[313]; x[313] = temp;
		temp= x[116]; x[116] = x[184]; x[184] = temp;
		temp= x[117]; x[117] = x[185]; x[185] = temp;
		temp= x[118]; x[118] = x[440]; x[440] = temp;
		temp= x[119]; x[119] = x[441]; x[441] = temp;
		temp= x[122]; x[122] = x[376]; x[376] = temp;
		temp= x[123]; x[123] = x[377]; x[377] = temp;
		temp= x[124]; x[124] = x[248]; x[248] = temp;
		temp= x[125]; x[125] = x[249]; x[249] = temp;
		temp= x[126]; x[126] = x[504]; x[504] = temp;
		temp= x[127]; x[127] = x[505]; x[505] = temp;
		temp= x[130]; x[130] = x[260]; x[260] = temp;
		temp= x[131]; x[131] = x[261]; x[261] = temp;
		temp= x[134]; x[134] = x[388]; x[388] = temp;
		temp= x[135]; x[135] = x[389]; x[389] = temp;
		temp= x[138]; x[138] = x[324]; x[324] = temp;
		temp= x[139]; x[139] = x[325]; x[325] = temp;
		temp= x[140]; x[140] = x[196]; x[196] = temp;
		temp= x[141]; x[141] = x[197]; x[197] = temp;
		temp= x[142]; x[142] = x[452]; x[452] = temp;
		temp= x[143]; x[143] = x[453]; x[453] = temp;
		temp= x[146]; x[146] = x[292]; x[292] = temp;
		temp= x[147]; x[147] = x[293]; x[293] = temp;
		temp= x[148]; x[148] = x[164]; x[164] = temp;
		temp= x[149]; x[149] = x[165]; x[165] = temp;
		temp= x[150]; x[150] = x[420]; x[420] = temp;
		temp= x[151]; x[151] = x[421]; x[421] = temp;
		temp= x[154]; x[154] = x[356]; x[356] = temp;
		temp= x[155]; x[155] = x[357]; x[357] = temp;
		temp= x[156]; x[156] = x[228]; x[228] = temp;
		temp= x[157]; x[157] = x[229]; x[229] = temp;
		temp= x[158]; x[158] = x[484]; x[484] = temp;
		temp= x[159]; x[159] = x[485]; x[485] = temp;
		temp= x[162]; x[162] = x[276]; x[276] = temp;
		temp= x[163]; x[163] = x[277]; x[277] = temp;
		temp= x[166]; x[166] = x[404]; x[404] = temp;
		temp= x[167]; x[167] = x[405]; x[405] = temp;
		temp= x[170]; x[170] = x[340]; x[340] = temp;
		temp= x[171]; x[171] = x[341]; x[341] = temp;
		temp= x[172]; x[172] = x[212]; x[212] = temp;
		temp= x[173]; x[173] = x[213]; x[213] = temp;
		temp= x[174]; x[174] = x[468]; x[468] = temp;
		temp= x[175]; x[175] = x[469]; x[469] = temp;
		temp= x[178]; x[178] = x[308]; x[308] = temp;
		temp= x[179]; x[179] = x[309]; x[309] = temp;
		temp= x[182]; x[182] = x[436]; x[436] = temp;
		temp= x[183]; x[183] = x[437]; x[437] = temp;
		temp= x[186]; x[186] = x[372]; x[372] = temp;
		temp= x[187]; x[187] = x[373]; x[373] = temp;
		temp= x[188]; x[188] = x[244]; x[244] = temp;
		temp= x[189]; x[189] = x[245]; x[245] = temp;
		temp= x[190]; x[190] = x[500]; x[500] = temp;
		temp= x[191]; x[191] = x[501]; x[501] = temp;
		temp= x[194]; x[194] = x[268]; x[268] = temp;
		temp= x[195]; x[195] = x[269]; x[269] = temp;
		temp= x[198]; x[198] = x[396]; x[396] = temp;
		temp= x[199]; x[199] = x[397]; x[397] = temp;
		temp= x[202]; x[202] = x[332]; x[332] = temp;
		temp= x[203]; x[203] = x[333]; x[333] = temp;
		temp= x[206]; x[206] = x[460]; x[460] = temp;
		temp= x[207]; x[207] = x[461]; x[461] = temp;
		temp= x[210]; x[210] = x[300]; x[300] = temp;
		temp= x[211]; x[211] = x[301]; x[301] = temp;
		temp= x[214]; x[214] = x[428]; x[428] = temp;
		temp= x[215]; x[215] = x[429]; x[429] = temp;
		temp= x[218]; x[218] = x[364]; x[364] = temp;
		temp= x[219]; x[219] = x[365]; x[365] = temp;
		temp= x[220]; x[220] = x[236]; x[236] = temp;
		temp= x[221]; x[221] = x[237]; x[237] = temp;
		temp= x[222]; x[222] = x[492]; x[492] = temp;
		temp= x[223]; x[223] = x[493]; x[493] = temp;
		temp= x[226]; x[226] = x[284]; x[284] = temp;
		temp= x[227]; x[227] = x[285]; x[285] = temp;
		temp= x[230]; x[230] = x[412]; x[412] = temp;
		temp= x[231]; x[231] = x[413]; x[413] = temp;
		temp= x[234]; x[234] = x[348]; x[348] = temp;
		temp= x[235]; x[235] = x[349]; x[349] = temp;
		temp= x[238]; x[238] = x[476]; x[476] = temp;
		temp= x[239]; x[239] = x[477]; x[477] = temp;
		temp= x[242]; x[242] = x[316]; x[316] = temp;
		temp= x[243]; x[243] = x[317]; x[317] = temp;
		temp= x[246]; x[246] = x[444]; x[444] = temp;
		temp= x[247]; x[247] = x[445]; x[445] = temp;
		temp= x[250]; x[250] = x[380]; x[380] = temp;
		temp= x[251]; x[251] = x[381]; x[381] = temp;
		temp= x[254]; x[254] = x[508]; x[508] = temp;
		temp= x[255]; x[255] = x[509]; x[509] = temp;
		temp= x[262]; x[262] = x[386]; x[386] = temp;
		temp= x[263]; x[263] = x[387]; x[387] = temp;
		temp= x[266]; x[266] = x[322]; x[322] = temp;
		temp= x[267]; x[267] = x[323]; x[323] = temp;
		temp= x[270]; x[270] = x[450]; x[450] = temp;
		temp= x[271]; x[271] = x[451]; x[451] = temp;
		temp= x[274]; x[274] = x[290]; x[290] = temp;
		temp= x[275]; x[275] = x[291]; x[291] = temp;
		temp= x[278]; x[278] = x[418]; x[418] = temp;
		temp= x[279]; x[279] = x[419]; x[419] = temp;
		temp= x[282]; x[282] = x[354]; x[354] = temp;
		temp= x[283]; x[283] = x[355]; x[355] = temp;
		temp= x[286]; x[286] = x[482]; x[482] = temp;
		temp= x[287]; x[287] = x[483]; x[483] = temp;
		temp= x[294]; x[294] = x[402]; x[402] = temp;
		temp= x[295]; x[295] = x[403]; x[403] = temp;
		temp= x[298]; x[298] = x[338]; x[338] = temp;
		temp= x[299]; x[299] = x[339]; x[339] = temp;
		temp= x[302]; x[302] = x[466]; x[466] = temp;
		temp= x[303]; x[303] = x[467]; x[467] = temp;
		temp= x[310]; x[310] = x[434]; x[434] = temp;
		temp= x[311]; x[311] = x[435]; x[435] = temp;
		temp= x[314]; x[314] = x[370]; x[370] = temp;
		temp= x[315]; x[315] = x[371]; x[371] = temp;
		temp= x[318]; x[318] = x[498]; x[498] = temp;
		temp= x[319]; x[319] = x[499]; x[499] = temp;
		temp= x[326]; x[326] = x[394]; x[394] = temp;
		temp= x[327]; x[327] = x[395]; x[395] = temp;
		temp= x[334]; x[334] = x[458]; x[458] = temp;
		temp= x[335]; x[335] = x[459]; x[459] = temp;
		temp= x[342]; x[342] = x[426]; x[426] = temp;
		temp= x[343]; x[343] = x[427]; x[427] = temp;
		temp= x[346]; x[346] = x[362]; x[362] = temp;
		temp= x[347]; x[347] = x[363]; x[363] = temp;
		temp= x[350]; x[350] = x[490]; x[490] = temp;
		temp= x[351]; x[351] = x[491]; x[491] = temp;
		temp= x[358]; x[358] = x[410]; x[410] = temp;
		temp= x[359]; x[359] = x[411]; x[411] = temp;
		temp= x[366]; x[366] = x[474]; x[474] = temp;
		temp= x[367]; x[367] = x[475]; x[475] = temp;
		temp= x[374]; x[374] = x[442]; x[442] = temp;
		temp= x[375]; x[375] = x[443]; x[443] = temp;
		temp= x[382]; x[382] = x[506]; x[506] = temp;
		temp= x[383]; x[383] = x[507]; x[507] = temp;
		temp= x[398]; x[398] = x[454]; x[454] = temp;
		temp= x[399]; x[399] = x[455]; x[455] = temp;
		temp= x[406]; x[406] = x[422]; x[422] = temp;
		temp= x[407]; x[407] = x[423]; x[423] = temp;
		temp= x[414]; x[414] = x[486]; x[486] = temp;
		temp= x[415]; x[415] = x[487]; x[487] = temp;
		temp= x[430]; x[430] = x[470]; x[470] = temp;
		temp= x[431]; x[431] = x[471]; x[471] = temp;
		temp= x[446]; x[446] = x[502]; x[502] = temp;
		temp= x[447]; x[447] = x[503]; x[503] = temp;
		temp= x[478]; x[478] = x[494]; x[494] = temp;
		temp= x[479]; x[479] = x[495]; x[495] = temp;
		break;
	    case 128:
		temp= x[2]; x[2] = x[128]; x[128] = temp;
		temp= x[3]; x[3] = x[129]; x[129] = temp;
		temp= x[4]; x[4] = x[64]; x[64] = temp;
		temp= x[5]; x[5] = x[65]; x[65] = temp;
		temp= x[6]; x[6] = x[192]; x[192] = temp;
		temp= x[7]; x[7] = x[193]; x[193] = temp;
		temp= x[8]; x[8] = x[32]; x[32] = temp;
		temp= x[9]; x[9] = x[33]; x[33] = temp;
		temp= x[10]; x[10] = x[160]; x[160] = temp;
		temp= x[11]; x[11] = x[161]; x[161] = temp;
		temp= x[12]; x[12] = x[96]; x[96] = temp;
		temp= x[13]; x[13] = x[97]; x[97] = temp;
		temp= x[14]; x[14] = x[224]; x[224] = temp;
		temp= x[15]; x[15] = x[225]; x[225] = temp;
		temp= x[18]; x[18] = x[144]; x[144] = temp;
		temp= x[19]; x[19] = x[145]; x[145] = temp;
		temp= x[20]; x[20] = x[80]; x[80] = temp;
		temp= x[21]; x[21] = x[81]; x[81] = temp;
		temp= x[22]; x[22] = x[208]; x[208] = temp;
		temp= x[23]; x[23] = x[209]; x[209] = temp;
		temp= x[24]; x[24] = x[48]; x[48] = temp;
		temp= x[25]; x[25] = x[49]; x[49] = temp;
		temp= x[26]; x[26] = x[176]; x[176] = temp;
		temp= x[27]; x[27] = x[177]; x[177] = temp;
		temp= x[28]; x[28] = x[112]; x[112] = temp;
		temp= x[29]; x[29] = x[113]; x[113] = temp;
		temp= x[30]; x[30] = x[240]; x[240] = temp;
		temp= x[31]; x[31] = x[241]; x[241] = temp;
		temp= x[34]; x[34] = x[136]; x[136] = temp;
		temp= x[35]; x[35] = x[137]; x[137] = temp;
		temp= x[36]; x[36] = x[72]; x[72] = temp;
		temp= x[37]; x[37] = x[73]; x[73] = temp;
		temp= x[38]; x[38] = x[200]; x[200] = temp;
		temp= x[39]; x[39] = x[201]; x[201] = temp;
		temp= x[42]; x[42] = x[168]; x[168] = temp;
		temp= x[43]; x[43] = x[169]; x[169] = temp;
		temp= x[44]; x[44] = x[104]; x[104] = temp;
		temp= x[45]; x[45] = x[105]; x[105] = temp;
		temp= x[46]; x[46] = x[232]; x[232] = temp;
		temp= x[47]; x[47] = x[233]; x[233] = temp;
		temp= x[50]; x[50] = x[152]; x[152] = temp;
		temp= x[51]; x[51] = x[153]; x[153] = temp;
		temp= x[52]; x[52] = x[88]; x[88] = temp;
		temp= x[53]; x[53] = x[89]; x[89] = temp;
		temp= x[54]; x[54] = x[216]; x[216] = temp;
		temp= x[55]; x[55] = x[217]; x[217] = temp;
		temp= x[58]; x[58] = x[184]; x[184] = temp;
		temp= x[59]; x[59] = x[185]; x[185] = temp;
		temp= x[60]; x[60] = x[120]; x[120] = temp;
		temp= x[61]; x[61] = x[121]; x[121] = temp;
		temp= x[62]; x[62] = x[248]; x[248] = temp;
		temp= x[63]; x[63] = x[249]; x[249] = temp;
		temp= x[66]; x[66] = x[132]; x[132] = temp;
		temp= x[67]; x[67] = x[133]; x[133] = temp;
		temp= x[70]; x[70] = x[196]; x[196] = temp;
		temp= x[71]; x[71] = x[197]; x[197] = temp;
		temp= x[74]; x[74] = x[164]; x[164] = temp;
		temp= x[75]; x[75] = x[165]; x[165] = temp;
		temp= x[76]; x[76] = x[100]; x[100] = temp;
		temp= x[77]; x[77] = x[101]; x[101] = temp;
		temp= x[78]; x[78] = x[228]; x[228] = temp;
		temp= x[79]; x[79] = x[229]; x[229] = temp;
		temp= x[82]; x[82] = x[148]; x[148] = temp;
		temp= x[83]; x[83] = x[149]; x[149] = temp;
		temp= x[86]; x[86] = x[212]; x[212] = temp;
		temp= x[87]; x[87] = x[213]; x[213] = temp;
		temp= x[90]; x[90] = x[180]; x[180] = temp;
		temp= x[91]; x[91] = x[181]; x[181] = temp;
		temp= x[92]; x[92] = x[116]; x[116] = temp;
		temp= x[93]; x[93] = x[117]; x[117] = temp;
		temp= x[94]; x[94] = x[244]; x[244] = temp;
		temp= x[95]; x[95] = x[245]; x[245] = temp;
		temp= x[98]; x[98] = x[140]; x[140] = temp;
		temp= x[99]; x[99] = x[141]; x[141] = temp;
		temp= x[102]; x[102] = x[204]; x[204] = temp;
		temp= x[103]; x[103] = x[205]; x[205] = temp;
		temp= x[106]; x[106] = x[172]; x[172] = temp;
		temp= x[107]; x[107] = x[173]; x[173] = temp;
		temp= x[110]; x[110] = x[236]; x[236] = temp;
		temp= x[111]; x[111] = x[237]; x[237] = temp;
		temp= x[114]; x[114] = x[156]; x[156] = temp;
		temp= x[115]; x[115] = x[157]; x[157] = temp;
		temp= x[118]; x[118] = x[220]; x[220] = temp;
		temp= x[119]; x[119] = x[221]; x[221] = temp;
		temp= x[122]; x[122] = x[188]; x[188] = temp;
		temp= x[123]; x[123] = x[189]; x[189] = temp;
		temp= x[126]; x[126] = x[252]; x[252] = temp;
		temp= x[127]; x[127] = x[253]; x[253] = temp;
		temp= x[134]; x[134] = x[194]; x[194] = temp;
		temp= x[135]; x[135] = x[195]; x[195] = temp;
		temp= x[138]; x[138] = x[162]; x[162] = temp;
		temp= x[139]; x[139] = x[163]; x[163] = temp;
		temp= x[142]; x[142] = x[226]; x[226] = temp;
		temp= x[143]; x[143] = x[227]; x[227] = temp;
		temp= x[150]; x[150] = x[210]; x[210] = temp;
		temp= x[151]; x[151] = x[211]; x[211] = temp;
		temp= x[154]; x[154] = x[178]; x[178] = temp;
		temp= x[155]; x[155] = x[179]; x[179] = temp;
		temp= x[158]; x[158] = x[242]; x[242] = temp;
		temp= x[159]; x[159] = x[243]; x[243] = temp;
		temp= x[166]; x[166] = x[202]; x[202] = temp;
		temp= x[167]; x[167] = x[203]; x[203] = temp;
		temp= x[174]; x[174] = x[234]; x[234] = temp;
		temp= x[175]; x[175] = x[235]; x[235] = temp;
		temp= x[182]; x[182] = x[218]; x[218] = temp;
		temp= x[183]; x[183] = x[219]; x[219] = temp;
		temp= x[190]; x[190] = x[250]; x[250] = temp;
		temp= x[191]; x[191] = x[251]; x[251] = temp;
		temp= x[206]; x[206] = x[230]; x[230] = temp;
		temp= x[207]; x[207] = x[231]; x[231] = temp;
		temp= x[222]; x[222] = x[246]; x[246] = temp;
		temp= x[223]; x[223] = x[247]; x[247] = temp;
		break;
	    case 64:
		temp= x[2]; x[2] = x[64]; x[64] = temp;
		temp= x[3]; x[3] = x[65]; x[65] = temp;
		temp= x[4]; x[4] = x[32]; x[32] = temp;
		temp= x[5]; x[5] = x[33]; x[33] = temp;
		temp= x[6]; x[6] = x[96]; x[96] = temp;
		temp= x[7]; x[7] = x[97]; x[97] = temp;
		temp= x[8]; x[8] = x[16]; x[16] = temp;
		temp= x[9]; x[9] = x[17]; x[17] = temp;
		temp= x[10]; x[10] = x[80]; x[80] = temp;
		temp= x[11]; x[11] = x[81]; x[81] = temp;
		temp= x[12]; x[12] = x[48]; x[48] = temp;
		temp= x[13]; x[13] = x[49]; x[49] = temp;
		temp= x[14]; x[14] = x[112]; x[112] = temp;
		temp= x[15]; x[15] = x[113]; x[113] = temp;
		temp= x[18]; x[18] = x[72]; x[72] = temp;
		temp= x[19]; x[19] = x[73]; x[73] = temp;
		temp= x[20]; x[20] = x[40]; x[40] = temp;
		temp= x[21]; x[21] = x[41]; x[41] = temp;
		temp= x[22]; x[22] = x[104]; x[104] = temp;
		temp= x[23]; x[23] = x[105]; x[105] = temp;
		temp= x[26]; x[26] = x[88]; x[88] = temp;
		temp= x[27]; x[27] = x[89]; x[89] = temp;
		temp= x[28]; x[28] = x[56]; x[56] = temp;
		temp= x[29]; x[29] = x[57]; x[57] = temp;
		temp= x[30]; x[30] = x[120]; x[120] = temp;
		temp= x[31]; x[31] = x[121]; x[121] = temp;
		temp= x[34]; x[34] = x[68]; x[68] = temp;
		temp= x[35]; x[35] = x[69]; x[69] = temp;
		temp= x[38]; x[38] = x[100]; x[100] = temp;
		temp= x[39]; x[39] = x[101]; x[101] = temp;
		temp= x[42]; x[42] = x[84]; x[84] = temp;
		temp= x[43]; x[43] = x[85]; x[85] = temp;
		temp= x[44]; x[44] = x[52]; x[52] = temp;
		temp= x[45]; x[45] = x[53]; x[53] = temp;
		temp= x[46]; x[46] = x[116]; x[116] = temp;
		temp= x[47]; x[47] = x[117]; x[117] = temp;
		temp= x[50]; x[50] = x[76]; x[76] = temp;
		temp= x[51]; x[51] = x[77]; x[77] = temp;
		temp= x[54]; x[54] = x[108]; x[108] = temp;
		temp= x[55]; x[55] = x[109]; x[109] = temp;
		temp= x[58]; x[58] = x[92]; x[92] = temp;
		temp= x[59]; x[59] = x[93]; x[93] = temp;
		temp= x[62]; x[62] = x[124]; x[124] = temp;
		temp= x[63]; x[63] = x[125]; x[125] = temp;
		temp= x[70]; x[70] = x[98]; x[98] = temp;
		temp= x[71]; x[71] = x[99]; x[99] = temp;
		temp= x[74]; x[74] = x[82]; x[82] = temp;
		temp= x[75]; x[75] = x[83]; x[83] = temp;
		temp= x[78]; x[78] = x[114]; x[114] = temp;
		temp= x[79]; x[79] = x[115]; x[115] = temp;
		temp= x[86]; x[86] = x[106]; x[106] = temp;
		temp= x[87]; x[87] = x[107]; x[107] = temp;
		temp= x[94]; x[94] = x[122]; x[122] = temp;
		temp= x[95]; x[95] = x[123]; x[123] = temp;
		temp= x[110]; x[110] = x[118]; x[118] = temp;
		temp= x[111]; x[111] = x[119]; x[119] = temp;
		break;
	    case 32:
		temp= x[2]; x[2] = x[32]; x[32] = temp;
		temp= x[3]; x[3] = x[33]; x[33] = temp;
		temp= x[4]; x[4] = x[16]; x[16] = temp;
		temp= x[5]; x[5] = x[17]; x[17] = temp;
		temp= x[6]; x[6] = x[48]; x[48] = temp;
		temp= x[7]; x[7] = x[49]; x[49] = temp;
		temp= x[10]; x[10] = x[40]; x[40] = temp;
		temp= x[11]; x[11] = x[41]; x[41] = temp;
		temp= x[12]; x[12] = x[24]; x[24] = temp;
		temp= x[13]; x[13] = x[25]; x[25] = temp;
		temp= x[14]; x[14] = x[56]; x[56] = temp;
		temp= x[15]; x[15] = x[57]; x[57] = temp;
		temp= x[18]; x[18] = x[36]; x[36] = temp;
		temp= x[19]; x[19] = x[37]; x[37] = temp;
		temp= x[22]; x[22] = x[52]; x[52] = temp;
		temp= x[23]; x[23] = x[53]; x[53] = temp;
		temp= x[26]; x[26] = x[44]; x[44] = temp;
		temp= x[27]; x[27] = x[45]; x[45] = temp;
		temp= x[30]; x[30] = x[60]; x[60] = temp;
		temp= x[31]; x[31] = x[61]; x[61] = temp;
		temp= x[38]; x[38] = x[50]; x[50] = temp;
		temp= x[39]; x[39] = x[51]; x[51] = temp;
		temp= x[46]; x[46] = x[58]; x[58] = temp;
		temp= x[47]; x[47] = x[59]; x[59] = temp;
		break;
	    case 16:
		temp= x[2]; x[2] = x[16]; x[16] = temp;
		temp= x[3]; x[3] = x[17]; x[17] = temp;
		temp= x[4]; x[4] = x[8]; x[8] = temp;
		temp= x[5]; x[5] = x[9]; x[9] = temp;
		temp= x[6]; x[6] = x[24]; x[24] = temp;
		temp= x[7]; x[7] = x[25]; x[25] = temp;
		temp= x[10]; x[10] = x[20]; x[20] = temp;
		temp= x[11]; x[11] = x[21]; x[21] = temp;
		temp= x[14]; x[14] = x[28]; x[28] = temp;
		temp= x[15]; x[15] = x[29]; x[29] = temp;
		temp= x[22]; x[22] = x[26]; x[26] = temp;
		temp= x[23]; x[23] = x[27]; x[27] = temp;
		break;
	    case 8:
		temp= x[2]; x[2] = x[8]; x[8] = temp;
		temp= x[3]; x[3] = x[9]; x[9] = temp;
		temp= x[6]; x[6] = x[12]; x[12] = temp;
		temp= x[7]; x[7] = x[13]; x[13] = temp;
		break;
	    case 4:
		temp= x[2]; x[2] = x[4]; x[4] = temp;
		temp= x[3]; x[3] = x[5]; x[5] = temp;
		break;
	    case 2:
		break;
	    default:
		bitrev( x, length );
	}
}
コード例 #23
0
ファイル: macmace.c プロジェクト: dmgerman/original
int mace68k_probe(struct net_device *unused)
{
	int j;
	static int once=0;
	struct mace68k_data *mp;
	unsigned char *addr;
	struct net_device *dev;
	unsigned char checksum = 0;
	
	/*
	 *	There can be only one...
	 */
	 
	if (once) return -ENODEV;
	
	once = 1;

	if (macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV;

	printk("MACE ethernet should be present ");
	
	dev = init_etherdev(0, PRIV_BYTES);
	if(dev==NULL)
	{
		printk("no free memory.\n");
		return -ENOMEM;
	}		
	mp = (struct mace68k_data *) dev->priv;
	dev->base_addr = (u32)MACE_BASE;
	mp->mace = (volatile struct mace *) MACE_BASE;
	
	printk("at 0x%p", mp->mace);
	
	/*
	 *	16K RX ring and 4K TX ring should do nicely
	 */

	mp->rx_ring=(void *)__get_free_pages(GFP_KERNEL, 2);
	mp->tx_ring=(void *)__get_free_page(GFP_KERNEL);
	
	printk(".");
	
	if(mp->tx_ring==NULL || mp->rx_ring==NULL)
	{
		if(mp->tx_ring)
			free_page((u32)mp->tx_ring);
//		if(mp->rx_ring)
//			__free_pages(mp->rx_ring,2);
		printk("\nNo memory for ring buffers.\n");
		return -ENOMEM;
	}

	/* We want the receive data to be uncached. We dont care about the
	   byte reading order */

	printk(".");	
	kernel_set_cachemode((void *)mp->rx_ring, 16384, IOMAP_NOCACHE_NONSER);	
	
	printk(".");	
	/* The transmit buffer needs to be write through */
	kernel_set_cachemode((void *)mp->tx_ring, 4096, IOMAP_WRITETHROUGH);

	printk(" Ok\n");	
	dev->irq = IRQ_MAC_MACE;
	printk(KERN_INFO "%s: MACE at", dev->name);

	/*
	 *	The PROM contains 8 bytes which total 0xFF when XOR'd
	 *	together. Due to the usual peculiar apple brain damage
	 *	the bytes are spaced out in a strange boundary and the
	 * 	bits are reversed.
	 */

	addr = (void *)MACE_PROM;
		 
	for (j = 0; j < 6; ++j)
	{
		u8 v=bitrev(addr[j<<4]);
		checksum^=v;
		dev->dev_addr[j] = v;
		printk("%c%.2x", (j ? ':' : ' '), dev->dev_addr[j]);
	}
	for (; j < 8; ++j)
	{
		checksum^=bitrev(addr[j<<4]);
	}
	
	if(checksum!=0xFF)
	{
		printk(" (invalid checksum)\n");
		return -ENODEV;
	}		
	printk("\n");

	memset(&mp->stats, 0, sizeof(mp->stats));
	init_timer(&mp->tx_timeout);
	mp->timeout_active = 0;

	dev->open = mace68k_open;
	dev->stop = mace68k_close;
	dev->hard_start_xmit = mace68k_xmit_start;
	dev->get_stats = mace68k_stats;
	dev->set_multicast_list = mace68k_set_multicast;
	dev->set_mac_address = mace68k_set_address;

	ether_setup(dev);

	mp = (struct mace68k_data *) dev->priv;
	mp->maccc = ENXMT | ENRCV;
	mp->dma_intr = IRQ_MAC_MACE_DMA;

	psc_write_word(PSC_ENETWR_CTL, 0x9000);
	psc_write_word(PSC_ENETRD_CTL, 0x9000);
	psc_write_word(PSC_ENETWR_CTL, 0x0400);
	psc_write_word(PSC_ENETRD_CTL, 0x0400);
                                        	
	/* apple's driver doesn't seem to do this */
	/* except at driver shutdown time...      */
#if 0
	mace68k_dma_off(dev);
#endif

	return 0;
}
コード例 #24
0
void ntt_transform(poly out, const poly o)
{ 
  int s, pos = 0, offset;
  __m256d vt,vo0,vo10,vo11,vo20,vo21,vo22,vo23,vc,vp,vpinv,neg2,neg4;
  __m256d vx0,vx1,vx2,vx3,vx4,vx5,vx6,vx7;
  
  vpinv = _mm256_set_pd(PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE);
  vp    = _mm256_set_pd(8383489., 8383489., 8383489., 8383489.);

  bitrev(out);

  vo10 = _mm256_load_pd(o+pos);
  vo20 = _mm256_load_pd(o+pos+4);
  neg2 = _mm256_load_pd(_neg2);
  neg4 = _mm256_load_pd(_neg4);
                                  
  // m = 2, m = 4, m = 8 (3 levels merged)
  for(s = 0; s<POLY_DEG; s+=8)
  {
    // No multiplication with omega required, respective value is 1
    vx0 = _mm256_load_pd(out+s);
    vt = _mm256_mul_pd(vx0,neg2);
    vx0 = _mm256_hadd_pd(vx0,vt);

    vx1 = _mm256_load_pd(out+s+4);
    vt = _mm256_mul_pd(vx1,neg2);
    vx1 = _mm256_hadd_pd(vx1,vt);

    vx0 = _mm256_mul_pd(vx0, vo10);
    vc = _mm256_mul_pd(vx0, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vx0 = _mm256_sub_pd(vx0,vc);
    vt = _mm256_permute2f128_pd (vx0, vx0, 0x01); // now contains x2,x3,x0,x1
    vx0 = _mm256_mul_pd(vx0, neg4);
    vx0 = _mm256_add_pd(vx0, vt);

    vx1 = _mm256_mul_pd(vx1, vo10);
    vc = _mm256_mul_pd(vx1, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vx1 = _mm256_sub_pd(vx1,vc);
    vt = _mm256_permute2f128_pd (vx1, vx1, 0x01); // now contains x2,x3,x0,x1
    vx1 = _mm256_mul_pd(vx1, neg4);
    vx1 = _mm256_add_pd(vx1, vt);

    vt = _mm256_mul_pd(vx1, vo20);
    vc = _mm256_mul_pd(vt, vpinv);
    vc = _mm256_round_pd(vc,0x08);
    vc = _mm256_mul_pd(vc, vp);
    vt = _mm256_sub_pd(vt,vc);
    vx1 = _mm256_sub_pd(vx0, vt);
    _mm256_store_pd(out+s+4, vx1);

    vx0 = _mm256_add_pd(vx0, vt);
    _mm256_store_pd(out+s+0, vx0);
  }
  
  pos += 8;

// m = 16, m = 32, m = 64 (3 levels merged)
  for(offset = 0; offset < 8; offset+=4)
  {
    vo0 = _mm256_load_pd(o+pos+offset);
    vo10 = _mm256_load_pd(o+pos+offset+8);
    vo11 = _mm256_load_pd(o+pos+offset+16);

    for(s = 0; s<POLY_DEG; s+=64)
    {
      vx1 = _mm256_load_pd(out+offset+s+8);
      vt = _mm256_mul_pd(vx1, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx0 = _mm256_load_pd(out+offset+s+0);
      vx1 = _mm256_sub_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+8, vx1);
      vx0 = _mm256_add_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+0, vx0);

      vx3 = _mm256_load_pd(out+offset+s+24);
      vt = _mm256_mul_pd(vx3, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx2 = _mm256_load_pd(out+offset+s+16);
      vx3 = _mm256_sub_pd(vx2, vt);
      //  _mm256_store_pd(out+offset+s+24, vx3);
      vx2 = _mm256_add_pd(vx2, vt);
      //  _mm256_store_pd(out+offset+s+16, vx2);

      vx5 = _mm256_load_pd(out+offset+s+40);
      vt = _mm256_mul_pd(vx5, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx4 = _mm256_load_pd(out+offset+s+32);
      vx5 = _mm256_sub_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+40, vx5);
      vx4 = _mm256_add_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+32, vx4);

      vx7 = _mm256_load_pd(out+offset+s+56);
      vt = _mm256_mul_pd(vx7, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx6 = _mm256_load_pd(out+offset+s+48);
      vx7 = _mm256_sub_pd(vx6, vt);
      //  _mm256_store_pd(out+offset+s+56, vx7);
      vx6 = _mm256_add_pd(vx6, vt);
      //  _mm256_store_pd(out+offset+s+48, vx6);


      //  vx2 = _mm256_load_pd(out+offset+s+16);
      vt = _mm256_mul_pd(vx2, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx0 = _mm256_load_pd(out+offset+s+0);
      vx2 = _mm256_sub_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+16, vx2);
      vx0 = _mm256_add_pd(vx0, vt);
      //  _mm256_store_pd(out+offset+s+0, vx0);

      //  vx6 = _mm256_load_pd(out+offset+s+48);
      vt = _mm256_mul_pd(vx6, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx4 = _mm256_load_pd(out+offset+s+32);
      vx6 = _mm256_sub_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+48, vx6);
      vx4 = _mm256_add_pd(vx4, vt);
      //  _mm256_store_pd(out+offset+s+32, vx4);


      //  vx3 = _mm256_load_pd(out+offset+s+24);
      vt = _mm256_mul_pd(vx3, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx1 = _mm256_load_pd(out+offset+s+8);
      vx3 = _mm256_sub_pd(vx1, vt);
      //  _mm256_store_pd(out+offset+s+24, vx3);
      vx1 = _mm256_add_pd(vx1, vt);
      //  _mm256_store_pd(out+offset+s+8, vx1);

      //  vx7 = _mm256_load_pd(out+offset+s+56);
      vt = _mm256_mul_pd(vx7, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx5 = _mm256_load_pd(out+offset+s+40);
      vx7 = _mm256_sub_pd(vx5, vt);
      //  _mm256_store_pd(out+offset+s+56, vx7);
      vx5 = _mm256_add_pd(vx5, vt);
      //  _mm256_store_pd(out+offset+s+40, vx5);



      //  vx4 = _mm256_load_pd(out+offset+s+32);
    vo20 = _mm256_load_pd(o+pos+offset+24);
      vt = _mm256_mul_pd(vx4, vo20);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx0 = _mm256_load_pd(out+offset+s+0);
      vx4 = _mm256_sub_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+32, vx4);
      vx0 = _mm256_add_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+0, vx0);

      //  vx5 = _mm256_load_pd(out+offset+s+40);
    vo21 = _mm256_load_pd(o+pos+offset+32);
      vt = _mm256_mul_pd(vx5, vo21);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx1 = _mm256_load_pd(out+offset+s+8);
      vx5 = _mm256_sub_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+40, vx5);
      vx1 = _mm256_add_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+8, vx1);

      //  vx6 = _mm256_load_pd(out+offset+s+48);
    vo22 = _mm256_load_pd(o+pos+offset+40);
      vt = _mm256_mul_pd(vx6, vo22);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx2 = _mm256_load_pd(out+offset+s+16);
      vx6 = _mm256_sub_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+48, vx6);
      vx2 = _mm256_add_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+16, vx2);

      //  vx7 = _mm256_load_pd(out+offset+s+56);
    vo23 = _mm256_load_pd(o+pos+offset+48);
      vt = _mm256_mul_pd(vx7, vo23);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //  vx3 = _mm256_load_pd(out+offset+s+24);
      vx7 = _mm256_sub_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+56, vx7);
      vx3 = _mm256_add_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+24, vx3);
    }
  }


  pos += 56;

  // m = 128, m=256, m=512 (3 levels merged)
  for(offset=0;offset<64;offset+=4)
  {
    vo0 = _mm256_load_pd(o+pos+offset);
    vo10 = _mm256_load_pd(o+pos+offset+64);
    vo11 = _mm256_load_pd(o+pos+offset+128);

    for(s = 0; s<POLY_DEG; s+=512)
    {
      vx1 = _mm256_load_pd(out+offset+s+64);
      vt = _mm256_mul_pd(vx1, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx0 = _mm256_load_pd(out+offset+s+0);
      vx1 = _mm256_sub_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+64, vx1);
      vx0 = _mm256_add_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+0, vx0);

      vx3 = _mm256_load_pd(out+offset+s+192);
      vt = _mm256_mul_pd(vx3, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx2 = _mm256_load_pd(out+offset+s+128);
      vx3 = _mm256_sub_pd(vx2, vt);
      //_mm256_store_pd(out+offset+s+192, vx3);
      vx2 = _mm256_add_pd(vx2, vt);
      //_mm256_store_pd(out+offset+s+128, vx2);

      vx5 = _mm256_load_pd(out+offset+s+320);
      vt = _mm256_mul_pd(vx5, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx4 = _mm256_load_pd(out+offset+s+256);
      vx5 = _mm256_sub_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+320, vx5);
      vx4 = _mm256_add_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+256, vx4);

      vx7 = _mm256_load_pd(out+offset+s+448);
      vt = _mm256_mul_pd(vx7, vo0);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      vx6 = _mm256_load_pd(out+offset+s+384);
      vx7 = _mm256_sub_pd(vx6, vt);
      //_mm256_store_pd(out+offset+s+448, vx7);
      vx6 = _mm256_add_pd(vx6, vt);
      //_mm256_store_pd(out+offset+s+384, vx6);

    

      //vx2 = _mm256_load_pd(out+offset+s+128);
      vt = _mm256_mul_pd(vx2, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx0 = _mm256_load_pd(out+offset+s+0);
      vx2 = _mm256_sub_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+128, vx2);
      vx0 = _mm256_add_pd(vx0, vt);
      //_mm256_store_pd(out+offset+s+0, vx0);

      //vx3 = _mm256_load_pd(out+offset+s+192);
      vt = _mm256_mul_pd(vx3, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx1 = _mm256_load_pd(out+offset+s+64);
      vx3 = _mm256_sub_pd(vx1, vt);
      //_mm256_store_pd(out+offset+s+192, vx3);
      vx1 = _mm256_add_pd(vx1, vt);
      //_mm256_store_pd(out+offset+s+64, vx1);

      //vx6 = _mm256_load_pd(out+offset+s+384);
      vt = _mm256_mul_pd(vx6, vo10);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx4 = _mm256_load_pd(out+offset+s+256);
      vx6 = _mm256_sub_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+384, vx6);
      vx4 = _mm256_add_pd(vx4, vt);
      //_mm256_store_pd(out+offset+s+256, vx4);

      //vx7 = _mm256_load_pd(out+offset+s+448);
      vt = _mm256_mul_pd(vx7, vo11);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx5 = _mm256_load_pd(out+offset+s+320);
      vx7 = _mm256_sub_pd(vx5, vt);
      //_mm256_store_pd(out+offset+s+448, vx7);
      vx5 = _mm256_add_pd(vx5, vt);
      //_mm256_store_pd(out+offset+s+320, vx5);


    
      //vx4 = _mm256_load_pd(out+offset+s+256);
    vo20 = _mm256_load_pd(o+pos+offset+192);
      vt = _mm256_mul_pd(vx4, vo20);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx0 = _mm256_load_pd(out+offset+s+0);
      vx4 = _mm256_sub_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+256, vx4);
      vx0 = _mm256_add_pd(vx0, vt);
      _mm256_store_pd(out+offset+s+0, vx0);

      //vx5 = _mm256_load_pd(out+offset+s+320);
    vo21 = _mm256_load_pd(o+pos+offset+256);
      vt = _mm256_mul_pd(vx5, vo21);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx1 = _mm256_load_pd(out+offset+s+64);
      vx5 = _mm256_sub_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+320, vx5);
      vx1 = _mm256_add_pd(vx1, vt);
      _mm256_store_pd(out+offset+s+64, vx1);

      //vx6 = _mm256_load_pd(out+offset+s+384);
    vo22 = _mm256_load_pd(o+pos+offset+320);
      vt = _mm256_mul_pd(vx6, vo22);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx2 = _mm256_load_pd(out+offset+s+128);
      vx6 = _mm256_sub_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+384, vx6);
      vx2 = _mm256_add_pd(vx2, vt);
      _mm256_store_pd(out+offset+s+128, vx2);

      //vx7 = _mm256_load_pd(out+offset+s+448);
    vo23 = _mm256_load_pd(o+pos+offset+384);
      vt = _mm256_mul_pd(vx7, vo23);
      vc = _mm256_mul_pd(vt, vpinv);
      vc = _mm256_round_pd(vc,0x08);
      vc = _mm256_mul_pd(vc, vp);
      vt = _mm256_sub_pd(vt,vc);
      //vx3 = _mm256_load_pd(out+offset+s+192);
      vx7 = _mm256_sub_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+448, vx7);

      vx3 = _mm256_add_pd(vx3, vt);
      _mm256_store_pd(out+offset+s+192, vx3);
    }
  }
}
コード例 #25
0
ファイル: fft.cpp プロジェクト: sunpho84/nissa
  // Perform the 1d transform (mu is used to choose the communicator and n)
  // The fft consist of four step:
  //  1) data of is rearranged across the ranks
  //  2) ft is done on the odd length blocks (not needed if data length is a power of 2)
  //  3) Lanczos lemma is implemented on the local data
  //  4) Lanczos lemma is applied on non-local data (if needed)
  void fft1d(complex *out,complex *in,int ncpp,int mu,double sign,int normalize)
  {
    GET_THREAD_ID();
    
    //allocate the buffer to send data
    complex *buf=nissa_malloc("buf",loc_size[mu]*ncpp,complex);
    
    if(IS_MASTER_THREAD)
      {
	int log2glb_nblk=find_max_pow2(glb_size[mu]); //number of powers of 2 contained in n
	int glb_nblk=1<<log2glb_nblk;                 //remaining odd block
	int blk_size=glb_size[mu]/glb_nblk;           //block size
	int loc_nblk=glb_nblk/nrank_dir[mu];          //number of local blocks
	
	//check if the loc_size is a multiple of block_size
	int r=nrank_dir[mu];
	while(r>1)
	  {
	    if(r%2!=0) crash("Error, FFT implemented only for power of 2 grids! Ask sunpho to adapt it to a more general case if you really need!");
	    r/=2;
	  }
	
	////////////////////////////// first part /////////////////////////////////
	
	//reorder data across the various rank: glb_iel_in=iel_blk_out*glb_nblk+glb_iblk_out_rev
	int nrequest0=0,nexp_request0=loc_size[mu]*2;
	MPI_Request request0[nexp_request0];
	for(int loc_iblk_in=0;loc_iblk_in<loc_nblk;loc_iblk_in++)
	  for(int iel_blk_in=0;iel_blk_in<blk_size;iel_blk_in++)
	    {
	      //find the full index
	      int loc_iel_in=loc_iblk_in*blk_size+iel_blk_in;
	      int glb_iel_in=glb_coord_of_loclx[0][mu]+loc_iel_in;
	      int glb_iblk_in=rank_coord[mu]*loc_nblk+loc_iblk_in;
	      
	      //look at the output
	      int iel_blk_out=glb_iel_in/glb_nblk;
	      int glb_iblk_out=bitrev(glb_iel_in-iel_blk_out*glb_nblk,log2glb_nblk);
	      int rank_coord_out=glb_iblk_out/loc_nblk;
	      int loc_iblk_out=glb_iblk_out-rank_coord_out*loc_nblk;
	      int loc_iel_out=loc_iblk_out*blk_size+iel_blk_out;
	      
	      //now, if the destination is local, put it on place
	      if(rank_coord_out==rank_coord[mu]) memcpy(buf+ncpp*loc_iel_out,in+ncpp*loc_iel_in,sizeof(complex)*ncpp);
	      else //send the data to the destination
		MPI_Isend((void*)(in+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_out,1241+loc_iel_out,
			  line_comm[mu],&request0[nrequest0++]);
	      
	      //if necessary receive data: glb_iel_send=iel_blk_in*glb_nblk+glb_iblk_in_rev
	      int glb_iel_send=iel_blk_in*glb_nblk+bitrev(glb_iblk_in,log2glb_nblk);
	      int rank_coord_send=glb_iel_send/loc_size[mu];
	      if(rank_coord_send!=line_rank[mu])
		MPI_Irecv((void*)(buf+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_send,1241+loc_iel_in,
			  line_comm[mu],&request0[nrequest0++]);
	    }
	
	//wait for scramble to finish
	MPI_Status status0[nexp_request0];
	if(nrequest0>0) MPI_Waitall(nrequest0,request0,status0);
	
	/////////////////////////// second part ////////////////////////////////////
	
	//perform the block fourier transform if needed
	if(blk_size==1) memcpy(out,buf,loc_size[mu]*ncpp*sizeof(complex));
	else
	  {
	    //initialize the output
	    memset(out,0,loc_size[mu]*ncpp*sizeof(complex));
	    
	    //loop over local blocks
	    for(int loc_iblk=0;loc_iblk<loc_nblk;loc_iblk++)
	      {
		//take initial position of the output and input block
		complex *in_blk=buf+loc_iblk*blk_size*ncpp;
		complex *out_blk=out+loc_iblk*blk_size*ncpp;
		
		//loop over out elements of out block
		for(int iel_out=0;iel_out<blk_size;iel_out++)
		  {
		    //take initial position of out local elements
		    complex *out_pad=out_blk+iel_out*ncpp;
		    
		    //incrementing factor
		    double theta=iel_out*sign*2*M_PI/blk_size;
		    double wtemp=sin(0.5*theta);
		    double wpr=-2*wtemp*wtemp;
		    double wpi=sin(theta);
		    
		    //fourier factor
		    double wr=1;
		    double wi=0;
		    
		    //loop over elements of in blocks
		    for(int iel_in=0;iel_in<blk_size;iel_in++)
		      {
			//take initial position of in local elements
			complex *in_pad=in_blk+iel_in*ncpp;
			
			//loop over local elements
			for(int ic=0;ic<ncpp;ic++)
			  {
			    out_pad[ic][0]+=wr*in_pad[ic][0]-wi*in_pad[ic][1];
			    out_pad[ic][1]+=wr*in_pad[ic][1]+wi*in_pad[ic][0];
			  }
			
			//increment the twiddle
			wtemp=wr;
			wr+=wr*wpr-   wi*wpi;
			wi+=wi*wpr+wtemp*wpi;
		      }
		  }
	      }
	  }
	
	/////////////////////////////  third part ////////////////////////////
	
	//now perform the lanczos procedure up to when it does not need communications
	for(int delta=blk_size;delta<loc_size[mu];delta*=2)
	  {
	    //incrementing factor
	    double theta=sign*2*M_PI/(2*delta);
	    double wtemp=sin(0.5*theta);
	    double wpr=-2*wtemp*wtemp;
	    double wpi=sin(theta);
	    
	    //fourier coefficient
	    double wr=1;
	    double wi=0;
	    
	    //loop over the delta length (each m will correspond to increasing twiddle)
	    for(int m=0;m<delta;m++)
	      {
		//loop on the first addend
		for(int i=m;i<loc_size[mu];i+=2*delta)
		  {
		    //second addend
		    int j=i+delta;
		    
		    //site data multiplication
		    for(int ic=0;ic<ncpp;ic++)
		      {
			double tempr=wr*out[j*ncpp+ic][0]-wi*out[j*ncpp+ic][1];
			double tempi=wr*out[j*ncpp+ic][1]+wi*out[j*ncpp+ic][0];
			
			out[j*ncpp+ic][0]=out[i*ncpp+ic][0]-tempr;
			out[j*ncpp+ic][1]=out[i*ncpp+ic][1]-tempi;
			
			out[i*ncpp+ic][0]+=tempr;
			out[i*ncpp+ic][1]+=tempi;
		      }
		  }
		wtemp=wr;
		wr+=wr*wpr-   wi*wpi;
		wi+=wi*wpr+wtemp*wpi;
	      }
	  }
	
	///////////////////////////////////// fourth part //////////////////////////////
	
	//now perform the lanczos procedure up to the end
	for(int delta_rank=1;delta_rank<nrank_dir[mu];delta_rank*=2) //this is rank width of delta
	  {
	    int delta=delta_rank*loc_size[mu];    //block extent
	    int idelta=rank_coord[mu]/delta_rank; //identify the delta of the current rank
	    
	    //find if the current rank holding the first or second block
	    complex *first,*second;
	    MPI_Request request2[2];
	    MPI_Status status2[2];
	    
	    if(idelta%2==0) //first: so it has to receive second block and send first
	      {
		first=out;
		second=buf;
		
		MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu],
			  line_comm[mu],&(request2[0]));
		MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu]+delta_rank,
			  line_comm[mu],&(request2[1]));
	      }
	    else           //second: so it has to receive first block and send second
	      {
		first=buf;
		second=out;
		
		MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu],
			  line_comm[mu],&(request2[0]));
		MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu]-delta_rank,
			  line_comm[mu],&(request2[1]));
	      }
	    
	    //incrementing factor
	    double theta=sign*2*M_PI/(2*delta);
	    double wtemp=sin(0.5*theta);
	    double wpr=-2*wtemp*wtemp;
	    double wpi=sin(theta);
	    
	    int rank_pos_delta=rank_coord[mu]%delta_rank;  //position of the rank inside the delta
	    int pos_delta=rank_pos_delta*loc_size[mu];     //starting coord of local delta inside the delta
	    
	    //fourier coefficient
	    double wr=cos(pos_delta*theta);
	    double wi=sin(pos_delta*theta);
	    
	    //wait for communications to finish
	    MPI_Waitall(2,request2,status2);
	    
	    //loop over the delta length (each m will correspond to increasing twiddle)
	    for(int loc_m=0;loc_m<loc_size[mu];loc_m++)
	      {
		//site data multiplication
		for(int ic=0;ic<ncpp;ic++)
		  {
		    double tempr=wr*second[loc_m*ncpp+ic][0]-wi*second[loc_m*ncpp+ic][1];
		    double tempi=wr*second[loc_m*ncpp+ic][1]+wi*second[loc_m*ncpp+ic][0];
		    
		    if(idelta%2==0)
		      {
			first[loc_m*ncpp+ic][0]+=tempr;
			first[loc_m*ncpp+ic][1]+=tempi;
		      }
		    else
		      {
			second[loc_m*ncpp+ic][0]=first[loc_m*ncpp+ic][0]-tempr;
			second[loc_m*ncpp+ic][1]=first[loc_m*ncpp+ic][1]-tempi;
		      }
		  }
		wtemp=wr;
		wr+=wr*wpr-   wi*wpi;
		wi+=wi*wpr+wtemp*wpi;
	      }
	  }
	
	if(normalize==1)
	  for(int i=0;i<loc_size[mu]*ncpp;i++)
	    for(int ri=0;ri<2;ri++) out[i][ri]/=glb_size[mu];
      }
    
    nissa_free(buf);
  }
コード例 #26
0
ファイル: fft.c プロジェクト: kapteyn-astro/gipsy
static	void	inpmap( fint  lpt ,	/* number of points in X */
                        fint  mpt ,	/* number of points in Y */
                        fint  lmin ,	/* left most x map coordinate */
                        fint  mmin ,	/* lower most y map coordinate */
                        fchar set[] ,	/* input set name */
                        fint  sub[] ,	/* subsets */
                        fint  nset ,	/* number of sets */
                        fint  modi )	/* input mode, amp./pha. or cos./sin. */
{
   fint		i, itab, k, m, md, mi, mj, mph, mphh, ms;
   fint		q, r, ri, rj, row, rs, wor;
   float	rbuf[2*SIZE], cbuf[2*SIZE];
   float	fr, fi, zr, zi;

   if ( FORM == -1 ) {			/* complex ---FFT---> real */
      q = POWTAB[ MAXP2 - LOGP2Y - 1 ];	/* table offset factor */
      mph = mpt / 2;			/* number of rows to load */
      mphh = mph / 2 + 1;
      fr = -0.5 * SIGN;			/* calculate factors */
      fi = -0.5;
      for ( ri = 0 ; ri < mphh; ri++ ) {	/*  loop to load data */
         rj = mph - ri;
         itab = ri * q;			/*  get table position */
         zr = 0.5 - fr * SINTAB[itab];
         zi =       fi * COSTAB[itab];
         mi = shiftr( mmin, mpt, ri );
         for ( k = 0; k < nset; k++ ) {	/* get data */
            i= k * lpt;
            readxy( set[k], sub[k], lmin, mi, &cbuf[i], lpt, 1 );
         }
         mj = shiftr( mmin, mpt, rj );
         for ( k = 0; k < nset; k++ ) {	/*  get data */
            i = k * lpt;
            readxy( set[k], sub[k], lmin, mj, &rbuf[i], lpt, 1 );
         }
         vector( cbuf, lpt, modi, 1 );
         vector( rbuf, lpt, modi, 1 );
         fftx( cbuf );			/* do the FFT */
         fftx( rbuf );
         /* fixup for complex to real transform */
         fxrl( zr, zi, cbuf, rbuf, ri, rj, lpt );
         /* store in scratch file */
         wor = bitrev( ri, LOGP2Y );
         putrow( cbuf, 2 * lpt, wor );
         if ( ( ri != rj ) && ( ri != 0 ) ) {
            wor = bitrev( rj, LOGP2Y );
            putrow( rbuf, 2 * lpt, wor );
         }
      }
   } else if ( FORM == 0 ) {		/* real ---FFT---> complex */
      rs = MIN( mpt, ( 2 * SIZE ) / lpt );	/* number of rows in one pass */
      k = 0;				/* subset number */
      for  ( r = 0; r < mpt; r += rs ) {/* loop to load data */
         md = 0;
         do {
            m = shiftr( mmin, mpt, r + md );
            ms = MIN( rs - md, mpt + mmin - m );
            i = md * lpt;
            readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, ms );
            md = md + ms;
         } while ( md != rs );
         for ( row = r; row < ( r + rs ); row += 2 ) {
            i = ( row - r ) * lpt;
            wor = bitrev( row / 2, LOGP2Y );
            putrow( &cbuf[i], 2 * lpt, wor );
         }
      }
   } else if ( FORM == 1 ) {		/* complex ---FFT---> complex */
      for  ( row = 0; row < mpt; row++ ) {
         m = shiftr( mmin, mpt, row );	/* get data from disk */
         for ( k = 0; k < nset; k++ ) {
            i = k * lpt;
            readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, 1 );
         }
         vector( cbuf, lpt, modi, 1 );
         fftx( cbuf );
         wor = bitrev( row, LOGP2Y );
         putrow( cbuf, 2 * lpt, wor );
      }
   }
}
コード例 #27
0
ファイル: tree.c プロジェクト: llevin/sigproc
main(int argc, char** argv)
{
  int dum_int, iread, iw, indx, ITStart, namelen, HdrSize,
    NTOT, NSkip, NPtsToRead, SkiByte, log2NTOT, NDM,
    NOPFiles, MAX, NTSampInRead, i, NSampInRead, BytePerFrame,
    NByteInRead, ITOffset, NBitChan, *ibrev, NRead, IFiles, FOld,
    FNew, FSwitch, KeepTrack, NTReadOld, NTReadNew, NReadOld,
    NReadNew, OPFileSize, IOffset, SkipByte, oldper, newper ,NT_Files;

  long long TotalTrack;

  float  FMin, FMax, FCen, *Inbuf, *Outbuf, timecount;

  char  *filename, *filefull, *parfile;

  FILE  *fpar, *Fout[8192], *Fin;

  if (argc <= 1) 
    tree_help();
  else 
    tree_parms(argc, argv);

  filename   = (char *) calloc(120, sizeof(char));
  filefull   = (char *) calloc(120, sizeof(char));
  parfile    = (char *) calloc(120, sizeof(char));

  ITStart = 0; NT_Files = 0;

  /* To read a sample header from first time file */

  strcpy(filename, unfname);
  namelen    = strlen(filename);

  printf("First filename   : %s\n", filename);
  if ((fpar     = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening file %s.\n", filename);
    exit(0);
  }
  HdrSize  = read_header(fpar);
  fclose(fpar);

  FMin     = fch1;
  FMax     = FMin + (float)(nchans - 1) * foff;
  FCen     = (FMin + FMax) / 2.0;

  printf("No. of frequency channels        : %d\n",nchans);
  printf("Beginning radio frequency        : %f MHz\n", FMin - (foff / 2.0));
  printf("Ending radio frequency           : %f MHz\n", FMax + (foff / 2.0));
  printf("Centre frq. of the whole band    : %f MHz\n", FCen);
  printf("\n");
  printf("input sampling interval       : %f sec\n", tsamp);

  NTOT = 0;
  strcpy(filename, unfname);
  namelen    = strlen(filename);
  if ((fpar       = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening %s.\n", filename);
  }

  HdrSize    = read_header(fpar);
  fclose(fpar);

  NT_Files = (int)nsamples(filename, HdrSize, nbits, nifs, nchans);

  NTOT       += NT_Files;
  printf("File %5d   : %s with %d samples\n", i, filename, NT_Files);

  NSkip      = (int)(TSkip / tsamp);
  if (NSkip > NT_Files) {
    printf("Initial Skip-length longer than the first file!\n");
    exit(0);
  }

  NPtsToRead = (int)(TRead / tsamp);
  if (NPtsToRead==0) {
    TRead=NT_Files*tsamp;
    NPtsToRead=NT_Files;
  }
  if (NPtsToRead > NTOT) NPtsToRead = NTOT;
  SkipByte   = (NSkip * nbits * nchans / 8);

  printf("\n");
  printf("This data set contains %d number of samples\n", NTOT);
  printf("Number of bits per sample          : %d\n", nbits);
  printf("Time length to skip at the begin.  : %f\n", TSkip);
  printf("Bytes to skip at the beginning     : %d\n", SkipByte);
  printf("No. of time samples to skip        : %d\n", NSkip);
  printf("Time length to read after skipping : %f\n", TRead);
  printf("Samples to read after skipping     : %d\n", NPtsToRead);

  NTOT  -= NSkip;
  printf("Time samples after initial skip    : %d\n", NTOT);

  if (NPtsToRead > NTOT) {
    NPtsToRead  = NTOT;
    printf("Too many samples to read. Truncated to %d samples\n", NTOT);
  } else
    NTOT  = NPtsToRead;

  log2NTOT = (int)(log((double)NTOT) / log((double)2.0));
  NTOT     = (1 << log2NTOT);
  printf("After truncating to lower 2^n      : %d\n", NTOT);

  if (DMMin < 0) DMMin = 0;
  if (DMMax >= nchans) DMMax = (nchans - 1);
  if ((DMMin == 0) && (DMMax == 0)) {
    DMMin = 0; DMMax = (nchans - 1);
  } else if ((DMMin != 0) && (DMMax == 0)) 
    DMMax = (nchans - 1);
  printf("\n");
  printf("Minimum DM index = %d   Maximum = %d\n", DMMin, DMMax);
  DMMinv=(tsamp/8.3e3)*(double)(DMMin)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin);
  DMMaxv=(tsamp/8.3e3)*(double)(DMMax)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin);
  printf("Minimum DM value = %f   Maximum = %f\n", DMMinv,DMMaxv);
  NDM        = (DMMax - DMMin + 1);

  NOPFiles   = NDM;
  OPFileSize = sizeof(float) * NTOT;

  printf("\n");
  printf("Total number of output files    : %d\n", NOPFiles);
  printf("Size of each output file        : %d bytes\n", OPFileSize);

  for (i=0; i<NOPFiles; i++) {
    strcpy(filefull,unfname);
    sprintf(&filefull[strlen(filefull)-4], ".DM%.4d.tim", (i+DMMin));
    if((Fout[i] = fopen(filefull,"wb")) == NULL) {
      printf("ERROR opening output file %s.\n", filefull);
      exit(0);
    } else {
      nbands=1;
      nobits=32;
      refdm=(tsamp/8.3e3)*(double)(i+DMMin)*pow((FMax+FMin)/2.,3.)/
	fabs(FMax-FMin);
      output=Fout[i];
      dedisperse_header();
    }
  }
  printf("\n");

  /*
  strcpy(parfile, "\0"); strcpy(parfile, unfname);
  strncat(parfile, ".par", 4);
  fpar = fopen(parfile, "w");
  fprintf(fpar, "%s\n", unfname);
  fprintf(fpar, "%d  %d  %10.8f \n ", NTOT, nchans, tsamp);
  fprintf(fpar, "%d  %d  %d  %d\n", DMMin, DMMax, NDM, NOPFiles);
  fprintf(fpar, "%d\n", nchans);
  fprintf(fpar, "%f %f\n", FMin, FMax);
  fclose(fpar);
  */

  MAX          = (1 << 12);
  if (MAX > NTOT) MAX = NTOT;

  NTSampInRead = (MAX + (2 * nchans));
  NSampInRead  = (nchans * NTSampInRead);
  BytePerFrame = (nchans * nbits / 8);
  NByteInRead  = (NTSampInRead * BytePerFrame);

  ITOffset     = 2 * nchans;
  IOffset      = (-ITOffset * BytePerFrame);

  NBitChan = (int)(log((double)nchans) / 0.6931471);

  printf("Time samples to read in one read      : %d\n", NTSampInRead);
  printf("Bytes to read in one read             : %d bytes\n", NByteInRead);
  printf("Offset counter for each read          : %d time samples\n", IOffset);
  printf("\n");
  printf("No of bits to represent all channels  : %d\n", NBitChan);

  Inbuf    = (float *) calloc(NSampInRead, sizeof(float));
  Outbuf   = (float *) calloc(NSampInRead, sizeof(float));
  ibrev    = (int *)   calloc(nchans, sizeof(int));

  for (i=0; i<nchans; i++) {
    ibrev[i] = bitrev(i, NBitChan);
  }

  NRead    = (int)(NTOT / MAX);
  printf("Total number of reads in the run       : %d\n", NRead);

  IFiles   = 0;
  FOld     = 0;
  FNew     = 0;
  FSwitch  = 1;

  strcpy(filename, unfname);  namelen    = strlen(filename);
  printf("First filename   : %s\n", filename);
  if ((Fin = fopen(filename, "rb")) == NULL) {
    printf("ERROR opening file %s.\n", filename);
    exit(0);
  }
  HdrSize  = read_header(Fin);
  printf("Going into the main loop\n");

  fseek(Fin, SkipByte, SEEK_CUR);
  KeepTrack = (NT_Files - NSkip);
  TotalTrack = 0;

  newper   = 0.0;
  oldper   = newper;
  printf("\n\n");
  for (iread=0; iread<NRead; iread++) {

    newper = 100.0 * (((float)iread / (float)NRead));
    if (newper > oldper) {
      timecount = ((float)iread * NTSampInRead * tsamp);
      printf("\rProcessed : %3d%%    Current file : %s    Time from beg : %8.2f sec", 
	     (int)newper, filename, timecount); fflush(stdout);
      oldper = newper;
    }

    KeepTrack  -= NTSampInRead;
    TotalTrack += NTSampInRead;

    if (KeepTrack > 0) {
      read_block(Fin, nbits, Inbuf, NSampInRead);
      fseek(Fin, IOffset, SEEK_CUR);
      KeepTrack  += ITOffset;
      TotalTrack -= ITOffset;
    }

    if (KeepTrack > 0) FSwitch = 1;

    memset(&Outbuf[0], 0, (sizeof(float) * NSampInRead));

    AxisSwap(Inbuf, Outbuf, nchans, NTSampInRead);

    if (FlipSwitch == 0)  {
      printf("BEFORE..."); fflush(stdout);
      FlipBand(Outbuf, nchans, NTSampInRead);
      printf("AFTER!\n");
    }

    taylor_flt(Outbuf, NSampInRead, nchans);

    for (iw=DMMin; iw<=DMMax; iw++) {
      indx  = (ibrev[iw] * NTSampInRead);
      fwrite(&Outbuf[indx], sizeof(float), MAX, Fout[iw-DMMin]);
    }
  }

  newper    = 100.0 * ((float)iread / NRead);
  timecount = ((float)iread * NTSampInRead * tsamp);
  printf("\rProcessed : %3d%%    Current file : %s    Time from beg : %8.2f sec", 
	 (int)newper, filename, timecount); fflush(stdout);
  printf("\n\n");

  for (i=0; i<NOPFiles; i++) fclose(Fout[i]);

  exit(0);
}
コード例 #28
0
ファイル: bitrev.c プロジェクト: kangsu/BitReversal
void bit_rev(float *buffer, float *X, float *Y, int nbits, int n, int lgn)
{
    float ftemp1, ftemp2, ftemp3, ftemp4;
    int a, b, c, lgnminus;
    int aprime, bprime, bshift, bprimeshift, lgnstep;
    float *Xt1, *Xt2, *Ypoint, *Xpoint;

    if(lgn < 2*LOGSIZE)
    {
        incachebitrev(X, Y, nbits, n, lgn);
        return;
    }

    lgnminus = lgn-LOGSIZE;
    lgnstep = 1 << lgnminus;
    for(b = 0; b < nbits; b++)
    {
        bprime = bitrev(b, logn(nbits));   
        bshift = b << LOGSIZE;
        bprimeshift = bprime << LOGSIZE;
        Xt1 = &X[bshift];
        for(a = 0; a < SIZE; a++, Xt1 += lgnstep)
        {
            aprime = bittable[a];
            Xt2 = &buffer[aprime << LOGSIZE];
/******************************************************************
   This part coming up is where we move the data into the buffer.
   We unrolled 4 times and load data into scalars to avoid some alias problems 
   and to mask latency.  
******************************************************************/
            for(c = 0; c < SIZE; c += 4)
            {
                ftemp1 = Xt1[c];
                ftemp2 = Xt1[c+1];
                ftemp3 = Xt1[c+2];
                ftemp4 = Xt1[c+3];
                Xt2[c] = ftemp1;
                Xt2[c+1] = ftemp2;
                Xt2[c+2] = ftemp3;
                Xt2[c+3] = ftemp4;
            }
        }
        for(c = 0; c < SIZE; c++)
        {
            Ypoint = &Y[(bittable[c] << (lgnminus)) | bprimeshift];
            Xpoint = &buffer[c];

/***************************************************************
  Here we move the data from the buffer which should be in cache to
  the output array.  Notice that the addressing on the array buffer is
  surprisingly simple and despite the fact that we aren't making unit
  strides is irrelevant since it is all in cache.  Also this loop is 
  unrolled 8 times.
******************************************************************/
            for(aprime = 0; aprime < SIZE; aprime += 8, Xpoint += (SIZE*8))
            { 
                Ypoint[aprime] = Xpoint[0];
                Ypoint[aprime+1] = Xpoint[SIZE];
                Ypoint[aprime+2] = Xpoint[2*SIZE];
                Ypoint[aprime+3] = Xpoint[3*SIZE];
                Ypoint[aprime+4] = Xpoint[4*SIZE];
                Ypoint[aprime+5] = Xpoint[5*SIZE];
                Ypoint[aprime+6] = Xpoint[6*SIZE];
                Ypoint[aprime+7] = Xpoint[7*SIZE];
            } 

        }
    }

}
コード例 #29
0
ファイル: macmace.c プロジェクト: JBTech/ralink_rt5350
int mace_probe(struct net_device *unused)
{
	int j;
	struct mace_data *mp;
	unsigned char *addr;
	struct net_device *dev;
	unsigned char checksum = 0;
	static int found = 0;
	
	if (found || macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV;

	found = 1;	/* prevent 'finding' one on every device probe */

	dev = init_etherdev(0, PRIV_BYTES);
	if (!dev) return -ENOMEM;

	mp = (struct mace_data *) dev->priv;
	dev->base_addr = (u32)MACE_BASE;
	mp->mace = (volatile struct mace *) MACE_BASE;
	
	dev->irq = IRQ_MAC_MACE;
	mp->dma_intr = IRQ_MAC_MACE_DMA;

	/*
	 * The PROM contains 8 bytes which total 0xFF when XOR'd
	 * together. Due to the usual peculiar apple brain damage
	 * the bytes are spaced out in a strange boundary and the
	 * bits are reversed.
	 */

	addr = (void *)MACE_PROM;
		 
	for (j = 0; j < 6; ++j) {
		u8 v=bitrev(addr[j<<4]);
		checksum ^= v;
		dev->dev_addr[j] = v;
	}
	for (; j < 8; ++j) {
		checksum ^= bitrev(addr[j<<4]);
	}
	
	if (checksum != 0xFF) return -ENODEV;

	memset(&mp->stats, 0, sizeof(mp->stats));

	dev->open		= mace_open;
	dev->stop		= mace_close;
	dev->hard_start_xmit	= mace_xmit_start;
	dev->tx_timeout		= mace_tx_timeout;
	dev->watchdog_timeo	= TX_TIMEOUT;
	dev->get_stats		= mace_stats;
	dev->set_multicast_list	= mace_set_multicast;
	dev->set_mac_address	= mace_set_address;

	ether_setup(dev);

	printk(KERN_INFO "%s: 68K MACE, hardware address %.2X", dev->name, dev->dev_addr[0]);
	for (j = 1 ; j < 6 ; j++) printk(":%.2X", dev->dev_addr[j]);
	printk("\n");

	return 0;
}
コード例 #30
0
ファイル: cd.c プロジェクト: j0sh/thesis
static int nat2seq(int n, int bits)
{
    // go from natural ordering to sequency ordering:
    // first take gray code encoding, then reverse those bits
    return bitrev(gc(n), bits);
}