// Read status register uint8_t Sensirion::readSR(uint8_t *result) { uint8_t val; uint8_t error = 0; #ifdef CRC_ENA _crc = bitrev(_stat_reg & SR_MASK); // Initialize CRC calculation #endif startTransmission(); if (error = putByte(STAT_REG_R)) { *result = 0xFF; return error; } #ifdef CRC_ENA calcCRC(STAT_REG_R, &_crc); // Include command byte in CRC calculation *result = getByte(ACK); calcCRC(*result, &_crc); val = getByte(noACK); val = bitrev(val); if (val != _crc) { *result = 0xFF; error = S_Err_CRC; } #else *result = getByte(noACK); #endif return error; }
void init(void) { for(n = 0; n < (FFT_LENGTH - (ORDER - 1)); n++) { PING[n].re = 0; PING[n].im = 0; PONG[n].re = 0; PONG[n].im = 0; outputPING[n] = 0; outputPONG[n] = 0; } for(n = 0; n < FFT_LENGTH; n++) { if(n >= ORDER) { h[n].re = 0; h[n].im = 0; } else { h[n].re = B[n]; h[n].im = 0; } V[n].re = 0; V[n].im = 0; pingU[n].re = 0; pingU[n].im = 0; pongU[n].re = 0; pongU[0].im = 0; } for(n = 0; n < ORDER - 1; n++){ Z[n].re = 0; Z[n].im = 0; } // compute first N/2 twiddle factors for(n=0;n<FFT_LENGTH/RADIX;n++){ w[n].re = cos(DELTA*n); w[n].im = sin(DELTA*n); // negative imag component } for (n=0; n <FFT_LENGTH/2; n++) iw[n] = -1; for (n=0; n<FFT_LENGTH; n++) ix[n] = -1; digitrev_index(iw, FFT_LENGTH/RADIX, RADIX); bitrev(w, iw, FFT_LENGTH/RADIX); cfftr2_dit(h, w, FFT_LENGTH); digitrev_index(ix, FFT_LENGTH/RADIX, RADIX); bitrev(h, ix, FFT_LENGTH/RADIX); }
void matrix_print(void) { print("\nr/c 01234567\n"); for (uint8_t row = 0; row < matrix_rows(); row++) { xprintf("%02X: %08b\n", row, bitrev(matrix_get_row(row))); } }
void matrix_print(void) { #if (MATRIX_COLS <= 8) print("r/c 01234567\n"); #elif (MATRIX_COLS <= 16) print("r/c 0123456789ABCDEF\n"); #elif (MATRIX_COLS <= 32) print("r/c 0123456789ABCDEF0123456789ABCDEF\n"); #endif for (uint8_t row = 0; row < MATRIX_ROWS; row++) { #if (MATRIX_COLS <= 8) xprintf("%02X: %08b%s\n", row, bitrev(matrix_get_row(row)), #elif (MATRIX_COLS <= 16) xprintf("%02X: %016b%s\n", row, bitrev16(matrix_get_row(row)), #elif (MATRIX_COLS <= 32) xprintf("%02X: %032b%s\n", row, bitrev32(matrix_get_row(row)), #endif #ifdef MATRIX_HAS_GHOST matrix_has_ghost_in_row(row) ? " <ghost" : "" #else "" #endif ); } }
void eefft_ifft(float* dst_re, float* dst_im, float* src_re, float* src_im, int power) { int k, i, N; float rcp; N = pow(2, power); rcp = 1.0 / N; bitrev(src_re, src_im, power); for(i = 0; i < N; i ++) buf_im[i] *= -1; switch(power) { defsprdx(1); defsprdx(2); defsprdx(3); loopcall(defsprdx); default: break; } for(i = 0; i < N; i ++) { buf_re[i] *= rcp; buf_im[i] *= rcp; } memcpy(dst_re, buf_re, pow(2, power) * 4); memcpy(dst_im, buf_im, pow(2, power) * 4); }
void dft(comp *a,bool inv) { int step, to; comp w, wi, A, B; for (int i=0; i<n; ++i) { to = bitrev(i); if (to > i) std::swap(a[to], a[i]); } for (int i=1; i<=bn; ++i) { wi = W(1<<i, inv); w = comp(1, 0); step = 1 << (i-1); for (int k=0; k<step; ++k) { for (int j=0; j<n; j+=1<<i) { int t = j | k, d = j|k|step; A = a[t]; B.x = w.x * a[d].x - w.y * a[d].y; B.y = w.x * a[d].y + w.y * a[d].x; a[t].x = A.x + B.x, a[t].y = A.y + B.y; a[d].x = A.x - B.x, a[d].y = A.y - B.y; } comp tmp; tmp.x = w.x * wi.x - w.y * wi.y; tmp.y = w.x * wi.y + w.y * wi.x; w = tmp; } } }
// Initiate measurement. If blocking, wait for result uint8_t Sensirion::meas(uint8_t cmd, uint16_t *result, bool block) { uint8_t error, i; #ifdef CRC_ENA _crc = bitrev(_stat_reg & SR_MASK); // Initialize CRC calculation #endif startTransmission(); if (cmd == TEMP) cmd = MEAS_TEMP; else cmd = MEAS_HUMI; if (error = putByte(cmd)) return error; #ifdef CRC_ENA calcCRC(cmd, &_crc); // Include command byte in CRC calculation #endif // If non-blocking, save pointer to result and return if (!block) { _presult = result; return 0; } // Otherwise, wait for measurement to complete with 720ms timeout i = 240; while (digitalRead(_pinData)) { i--; if (i == 0) return S_Err_TO; // Error: Timeout delay(3); } error = getResult(result); return error; }
/* permutes the array using a bit-reversal permutation */ void permute_bitrev(int n, int *A_re, int *A_im) { int idata i; int idata bri; int idata log2n; int idata t_re; int idata t_im; log2n = log_2(n); for (i=0; i<n; i++) { bri = bitrev(i, log2n); /* skip already swapped elements */ if (bri <= i) continue; t_re = A_re[i]; t_im = A_im[i]; A_re[i]= A_re[bri]; A_im[i]= A_im[bri]; A_re[bri]= t_re; A_im[bri]= t_im; } }
static void fct_noscale(double *f) { scramble(f,N); fwd_butterflies(f); bitrev(f,N); fwd_sums(f); f[0] *= INVROOT2; }
static void ifct_noscale(double *f) { f[0] *= INVROOT2; inv_sums(f); bitrev(f,N); inv_butterflies(f); unscramble(f,N); }
void taylor_flt(float outbuf[], int mlen, int nchn) { float itemp; int nsamp,npts,ndat1,nstages,nmem,nmem2,nsec1,nfin, i; int istages,isec,ipair,ioff1,i1,i2,koff,ndelay,ndelay2; int bitrev(int, int); /* ====================================================================== */ nsamp = ((mlen/nchn) - (2*nchn)); npts = (nsamp + nchn); ndat1 = (nsamp + 2 * nchn); nstages = (int)(log((float)nchn) / 0.6931471 + 0.5); nmem = 1; for (istages=0; istages<nstages; istages++) { nmem *= 2; nsec1 = (nchn/nmem); nmem2 = (nmem - 2); for (isec=0; isec<nsec1; isec++) { ndelay = -1; koff = (isec * nmem); for (ipair=0; ipair<(nmem2+1); ipair += 2) { ioff1 = (bitrev(ipair,istages+1)+koff) * ndat1; i2 = (bitrev(ipair+1,istages+1) + koff) * ndat1; ndelay++; ndelay2 = (ndelay + 1); nfin = (npts + ioff1); for (i1=ioff1; i1<nfin; i1++) { itemp = (outbuf[i1] + outbuf[i2+ndelay]); outbuf[i2] = (outbuf[i1] + outbuf[i2+ndelay2]); outbuf[i1] = itemp; i2++; } } } } return; }
int main(void) { int i; for (i = 0; i <= 0xFF; i++) { if ((i & 7) == 0) printf("/* 0x%02X */", i); printf(" 0x%02X,", bitrev(i)); if ((i & 7) == 7) printf("\n"); } return 0; }
static void scramble(double *f,int len){ int i,ii1,ii2; bitrev(f,len); bitrev(&f[0], len>>1); bitrev(&f[len>>1], len>>1); ii1=len-1; ii2=len>>1; for(i=0; i<(len>>2); i++){ SWAP(f[ii1], f[ii2]); ii1--; ii2++; } }
void eefft_fft(float* dst_re, float* dst_im, float* src_re, float* src_im, int power) { int k, i; bitrev(src_re, src_im, power); switch(power) { defsprdx(1); defsprdx(2); defsprdx(3); loopcall(defsprdx); default: break; } memcpy(dst_re, buf_re, pow(2, power) * 4); memcpy(dst_im, buf_im, pow(2, power) * 4); }
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n) * n should be a power of 2 * Note: W is bit-reversal permuted because fft(..) goes faster if this is * done. see that function for more details on why we treat 'i' as a * (log2n-1) bit number. */ void compute_W(float *W_re, float *W_im) { int i, br; for (i=0; i<(n/2); i++) { br = bitrev(i,log2n-1); W_re[br] = cos(((float)i*2.0*M_PI)/((float)n)); W_im[br] = sin(((float)i*2.0*M_PI)/((float)n)); } #ifdef COMMENT_ONLY for (i=0;i<(n/2);i++) { br = i; //bitrev(i,log2n-1); printf("(%g\t%g)\n", W_re[br], W_im[br]); } #endif }
void FftDitNiso( complex *array, int fft_size) { double trig_arg; int log2_size; complex twiddle; complex temp; int pts_in_lft_grp, pts_in_rgt_grp; int stage, grp_pos, grp_cntr; int top_node, bot_node; log2_size = ilog2(fft_size); pts_in_rgt_grp = fft_size; for( stage=1; stage <=log2_size; stage++) { grp_cntr = -1; pts_in_lft_grp = pts_in_rgt_grp; // set pts_in_left_dft = N/(2**(stage-1)) pts_in_rgt_grp /= 2; // set pts_in_right_dft = N/(2**stage) for( grp_pos =0; grp_pos < fft_size; grp_pos += pts_in_lft_grp) { grp_cntr++; trig_arg = (TWO_PI*bitrev(grp_cntr, log2_size-1))/fft_size; twiddle = complex(cos(trig_arg), -sin(trig_arg)); for( top_node = grp_pos; top_node < grp_pos+pts_in_rgt_grp; top_node++) { bot_node = top_node + pts_in_rgt_grp; temp = array[bot_node] * twiddle; array[bot_node] = array[top_node] - temp; array[top_node] += temp; } // end of loop over top_node } // end of loop over grp_pos } // end of loop over stage ComplexBitReverse(array, fft_size); return; }
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n) * n should be a power of 2 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done. * see that function for more details on why we treat 'i' as a (log2n-1) bit number. */ void compute_W(int n, double *W_re, double *W_im) { int i, br; int log2n = log_2(n); for (i=0; i<(n/2); i++) { br = bitrev(i,log2n-1); W_re[br] = cos(((double)i*2.0*M_PI)/((double)n)); W_im[br] = sin(((double)i*2.0*M_PI)/((double)n)); } #ifdef COMMENT_ONLY for (i=0;i<(n/2);i++) { br = i; //bitrev(i,log2n-1); printf("(%g\t%g)\n", W_re[br], W_im[br]); } #endif }
/* W will contain roots of unity so that W[bitrev(i,log2n-1)] = e^(2*pi*i/n) * n should be a power of 2 * Note: W is bit-reversal permuted because fft(..) goes faster if this is done. * see that function for more details on why we treat 'i' as a (log2n-1) bit number. */ void compute_W(int idata n, int idata W_re[], int idata W_im[]) { int idata i; int idata br; int log2n = log_2(n); for (i=0; i<(n/2); i++) { br = bitrev(i,log2n-1); W_re[br] = cos((i*2*3)/(n)); W_im[br] = sin((i*2*3)/(n)); } //#ifdef COMMENT_ONLY // for (i=0;i<(n/2);i++) // { // br = i; //bitrev(i,log2n-1); // printf("(%g\t%g)\n", W_re[br], W_im[br]); // } //#endif }
// Get measurement result from sensor (plus CRC, if enabled) uint8_t Sensirion::getResult(uint16_t *result) { uint8_t val; #ifdef CRC_ENA val = getByte(ACK); calcCRC(val, &_crc); *result = val; val = getByte(ACK); calcCRC(val, &_crc); *result = (*result << 8) | val; val = getByte(noACK); val = bitrev(val); if (val != _crc) { *result = 0xFFFF; return S_Err_CRC; } #else *result = getByte(ACK); *result = (*result << 8) | getByte(noACK); #endif return 0; }
/* permutes the array using a bit-reversal permutation */ void permute_bitrev(int n, double *A_re, double *A_im) { int i, bri, log2n; double t_re, t_im; log2n = log_2(n); for (i=0; i<n; i++) { bri = bitrev(i, log2n); /* skip already swapped elements */ if (bri <= i) continue; t_re = A_re[i]; t_im = A_im[i]; A_re[i]= A_re[bri]; A_im[i]= A_im[bri]; A_re[bri]= t_re; A_im[bri]= t_im; } }
void incachebitrev(float *X, float *Y, int nbits, int n, int lgn) { int i, bitrevi; /**** Printfs warning user that this bitreverse algorithm is not tuned *****/ printf("This code is not at all tuned and not part of COBRA.\n"); printf("In fact this is a very naive implementation.\n"); printf("We suggest putting in your own in-cache bit-reversal here "); printf("for robust performance.\n"); printf("This code can be found in the bottom of file bitrev.c.\n"); printf("These printfs can be removed with no ill effect.\n"); Y[0] = X[0]; Y[n-1] = X[n-1]; for(i = 1; i < n-1; i++) { bitrevi = bitrev(i, lgn); if(bitrevi < i) continue; Y[i] = X[bitrevi]; Y[bitrevi] = X[i]; } return; }
void bitrev512( float x[], int length ) { register float temp; switch (length) { case 512: temp= x[2]; x[2] = x[512]; x[512] = temp; temp= x[3]; x[3] = x[513]; x[513] = temp; temp= x[4]; x[4] = x[256]; x[256] = temp; temp= x[5]; x[5] = x[257]; x[257] = temp; temp= x[6]; x[6] = x[768]; x[768] = temp; temp= x[7]; x[7] = x[769]; x[769] = temp; temp= x[8]; x[8] = x[128]; x[128] = temp; temp= x[9]; x[9] = x[129]; x[129] = temp; temp= x[10]; x[10] = x[640]; x[640] = temp; temp= x[11]; x[11] = x[641]; x[641] = temp; temp= x[12]; x[12] = x[384]; x[384] = temp; temp= x[13]; x[13] = x[385]; x[385] = temp; temp= x[14]; x[14] = x[896]; x[896] = temp; temp= x[15]; x[15] = x[897]; x[897] = temp; temp= x[16]; x[16] = x[64]; x[64] = temp; temp= x[17]; x[17] = x[65]; x[65] = temp; temp= x[18]; x[18] = x[576]; x[576] = temp; temp= x[19]; x[19] = x[577]; x[577] = temp; temp= x[20]; x[20] = x[320]; x[320] = temp; temp= x[21]; x[21] = x[321]; x[321] = temp; temp= x[22]; x[22] = x[832]; x[832] = temp; temp= x[23]; x[23] = x[833]; x[833] = temp; temp= x[24]; x[24] = x[192]; x[192] = temp; temp= x[25]; x[25] = x[193]; x[193] = temp; temp= x[26]; x[26] = x[704]; x[704] = temp; temp= x[27]; x[27] = x[705]; x[705] = temp; temp= x[28]; x[28] = x[448]; x[448] = temp; temp= x[29]; x[29] = x[449]; x[449] = temp; temp= x[30]; x[30] = x[960]; x[960] = temp; temp= x[31]; x[31] = x[961]; x[961] = temp; temp= x[34]; x[34] = x[544]; x[544] = temp; temp= x[35]; x[35] = x[545]; x[545] = temp; temp= x[36]; x[36] = x[288]; x[288] = temp; temp= x[37]; x[37] = x[289]; x[289] = temp; temp= x[38]; x[38] = x[800]; x[800] = temp; temp= x[39]; x[39] = x[801]; x[801] = temp; temp= x[40]; x[40] = x[160]; x[160] = temp; temp= x[41]; x[41] = x[161]; x[161] = temp; temp= x[42]; x[42] = x[672]; x[672] = temp; temp= x[43]; x[43] = x[673]; x[673] = temp; temp= x[44]; x[44] = x[416]; x[416] = temp; temp= x[45]; x[45] = x[417]; x[417] = temp; temp= x[46]; x[46] = x[928]; x[928] = temp; temp= x[47]; x[47] = x[929]; x[929] = temp; temp= x[48]; x[48] = x[96]; x[96] = temp; temp= x[49]; x[49] = x[97]; x[97] = temp; temp= x[50]; x[50] = x[608]; x[608] = temp; temp= x[51]; x[51] = x[609]; x[609] = temp; temp= x[52]; x[52] = x[352]; x[352] = temp; temp= x[53]; x[53] = x[353]; x[353] = temp; temp= x[54]; x[54] = x[864]; x[864] = temp; temp= x[55]; x[55] = x[865]; x[865] = temp; temp= x[56]; x[56] = x[224]; x[224] = temp; temp= x[57]; x[57] = x[225]; x[225] = temp; temp= x[58]; x[58] = x[736]; x[736] = temp; temp= x[59]; x[59] = x[737]; x[737] = temp; temp= x[60]; x[60] = x[480]; x[480] = temp; temp= x[61]; x[61] = x[481]; x[481] = temp; temp= x[62]; x[62] = x[992]; x[992] = temp; temp= x[63]; x[63] = x[993]; x[993] = temp; temp= x[66]; x[66] = x[528]; x[528] = temp; temp= x[67]; x[67] = x[529]; x[529] = temp; temp= x[68]; x[68] = x[272]; x[272] = temp; temp= x[69]; x[69] = x[273]; x[273] = temp; temp= x[70]; x[70] = x[784]; x[784] = temp; temp= x[71]; x[71] = x[785]; x[785] = temp; temp= x[72]; x[72] = x[144]; x[144] = temp; temp= x[73]; x[73] = x[145]; x[145] = temp; temp= x[74]; x[74] = x[656]; x[656] = temp; temp= x[75]; x[75] = x[657]; x[657] = temp; temp= x[76]; x[76] = x[400]; x[400] = temp; temp= x[77]; x[77] = x[401]; x[401] = temp; temp= x[78]; x[78] = x[912]; x[912] = temp; temp= x[79]; x[79] = x[913]; x[913] = temp; temp= x[82]; x[82] = x[592]; x[592] = temp; temp= x[83]; x[83] = x[593]; x[593] = temp; temp= x[84]; x[84] = x[336]; x[336] = temp; temp= x[85]; x[85] = x[337]; x[337] = temp; temp= x[86]; x[86] = x[848]; x[848] = temp; temp= x[87]; x[87] = x[849]; x[849] = temp; temp= x[88]; x[88] = x[208]; x[208] = temp; temp= x[89]; x[89] = x[209]; x[209] = temp; temp= x[90]; x[90] = x[720]; x[720] = temp; temp= x[91]; x[91] = x[721]; x[721] = temp; temp= x[92]; x[92] = x[464]; x[464] = temp; temp= x[93]; x[93] = x[465]; x[465] = temp; temp= x[94]; x[94] = x[976]; x[976] = temp; temp= x[95]; x[95] = x[977]; x[977] = temp; temp= x[98]; x[98] = x[560]; x[560] = temp; temp= x[99]; x[99] = x[561]; x[561] = temp; temp= x[100]; x[100] = x[304]; x[304] = temp; temp= x[101]; x[101] = x[305]; x[305] = temp; temp= x[102]; x[102] = x[816]; x[816] = temp; temp= x[103]; x[103] = x[817]; x[817] = temp; temp= x[104]; x[104] = x[176]; x[176] = temp; temp= x[105]; x[105] = x[177]; x[177] = temp; temp= x[106]; x[106] = x[688]; x[688] = temp; temp= x[107]; x[107] = x[689]; x[689] = temp; temp= x[108]; x[108] = x[432]; x[432] = temp; temp= x[109]; x[109] = x[433]; x[433] = temp; temp= x[110]; x[110] = x[944]; x[944] = temp; temp= x[111]; x[111] = x[945]; x[945] = temp; temp= x[114]; x[114] = x[624]; x[624] = temp; temp= x[115]; x[115] = x[625]; x[625] = temp; temp= x[116]; x[116] = x[368]; x[368] = temp; temp= x[117]; x[117] = x[369]; x[369] = temp; temp= x[118]; x[118] = x[880]; x[880] = temp; temp= x[119]; x[119] = x[881]; x[881] = temp; temp= x[120]; x[120] = x[240]; x[240] = temp; temp= x[121]; x[121] = x[241]; x[241] = temp; temp= x[122]; x[122] = x[752]; x[752] = temp; temp= x[123]; x[123] = x[753]; x[753] = temp; temp= x[124]; x[124] = x[496]; x[496] = temp; temp= x[125]; x[125] = x[497]; x[497] = temp; temp= x[126]; x[126] = x[1008]; x[1008] = temp; temp= x[127]; x[127] = x[1009]; x[1009] = temp; temp= x[130]; x[130] = x[520]; x[520] = temp; temp= x[131]; x[131] = x[521]; x[521] = temp; temp= x[132]; x[132] = x[264]; x[264] = temp; temp= x[133]; x[133] = x[265]; x[265] = temp; temp= x[134]; x[134] = x[776]; x[776] = temp; temp= x[135]; x[135] = x[777]; x[777] = temp; temp= x[138]; x[138] = x[648]; x[648] = temp; temp= x[139]; x[139] = x[649]; x[649] = temp; temp= x[140]; x[140] = x[392]; x[392] = temp; temp= x[141]; x[141] = x[393]; x[393] = temp; temp= x[142]; x[142] = x[904]; x[904] = temp; temp= x[143]; x[143] = x[905]; x[905] = temp; temp= x[146]; x[146] = x[584]; x[584] = temp; temp= x[147]; x[147] = x[585]; x[585] = temp; temp= x[148]; x[148] = x[328]; x[328] = temp; temp= x[149]; x[149] = x[329]; x[329] = temp; temp= x[150]; x[150] = x[840]; x[840] = temp; temp= x[151]; x[151] = x[841]; x[841] = temp; temp= x[152]; x[152] = x[200]; x[200] = temp; temp= x[153]; x[153] = x[201]; x[201] = temp; temp= x[154]; x[154] = x[712]; x[712] = temp; temp= x[155]; x[155] = x[713]; x[713] = temp; temp= x[156]; x[156] = x[456]; x[456] = temp; temp= x[157]; x[157] = x[457]; x[457] = temp; temp= x[158]; x[158] = x[968]; x[968] = temp; temp= x[159]; x[159] = x[969]; x[969] = temp; temp= x[162]; x[162] = x[552]; x[552] = temp; temp= x[163]; x[163] = x[553]; x[553] = temp; temp= x[164]; x[164] = x[296]; x[296] = temp; temp= x[165]; x[165] = x[297]; x[297] = temp; temp= x[166]; x[166] = x[808]; x[808] = temp; temp= x[167]; x[167] = x[809]; x[809] = temp; temp= x[170]; x[170] = x[680]; x[680] = temp; temp= x[171]; x[171] = x[681]; x[681] = temp; temp= x[172]; x[172] = x[424]; x[424] = temp; temp= x[173]; x[173] = x[425]; x[425] = temp; temp= x[174]; x[174] = x[936]; x[936] = temp; temp= x[175]; x[175] = x[937]; x[937] = temp; temp= x[178]; x[178] = x[616]; x[616] = temp; temp= x[179]; x[179] = x[617]; x[617] = temp; temp= x[180]; x[180] = x[360]; x[360] = temp; temp= x[181]; x[181] = x[361]; x[361] = temp; temp= x[182]; x[182] = x[872]; x[872] = temp; temp= x[183]; x[183] = x[873]; x[873] = temp; temp= x[184]; x[184] = x[232]; x[232] = temp; temp= x[185]; x[185] = x[233]; x[233] = temp; temp= x[186]; x[186] = x[744]; x[744] = temp; temp= x[187]; x[187] = x[745]; x[745] = temp; temp= x[188]; x[188] = x[488]; x[488] = temp; temp= x[189]; x[189] = x[489]; x[489] = temp; temp= x[190]; x[190] = x[1000]; x[1000] = temp; temp= x[191]; x[191] = x[1001]; x[1001] = temp; temp= x[194]; x[194] = x[536]; x[536] = temp; temp= x[195]; x[195] = x[537]; x[537] = temp; temp= x[196]; x[196] = x[280]; x[280] = temp; temp= x[197]; x[197] = x[281]; x[281] = temp; temp= x[198]; x[198] = x[792]; x[792] = temp; temp= x[199]; x[199] = x[793]; x[793] = temp; temp= x[202]; x[202] = x[664]; x[664] = temp; temp= x[203]; x[203] = x[665]; x[665] = temp; temp= x[204]; x[204] = x[408]; x[408] = temp; temp= x[205]; x[205] = x[409]; x[409] = temp; temp= x[206]; x[206] = x[920]; x[920] = temp; temp= x[207]; x[207] = x[921]; x[921] = temp; temp= x[210]; x[210] = x[600]; x[600] = temp; temp= x[211]; x[211] = x[601]; x[601] = temp; temp= x[212]; x[212] = x[344]; x[344] = temp; temp= x[213]; x[213] = x[345]; x[345] = temp; temp= x[214]; x[214] = x[856]; x[856] = temp; temp= x[215]; x[215] = x[857]; x[857] = temp; temp= x[218]; x[218] = x[728]; x[728] = temp; temp= x[219]; x[219] = x[729]; x[729] = temp; temp= x[220]; x[220] = x[472]; x[472] = temp; temp= x[221]; x[221] = x[473]; x[473] = temp; temp= x[222]; x[222] = x[984]; x[984] = temp; temp= x[223]; x[223] = x[985]; x[985] = temp; temp= x[226]; x[226] = x[568]; x[568] = temp; temp= x[227]; x[227] = x[569]; x[569] = temp; temp= x[228]; x[228] = x[312]; x[312] = temp; temp= x[229]; x[229] = x[313]; x[313] = temp; temp= x[230]; x[230] = x[824]; x[824] = temp; temp= x[231]; x[231] = x[825]; x[825] = temp; temp= x[234]; x[234] = x[696]; x[696] = temp; temp= x[235]; x[235] = x[697]; x[697] = temp; temp= x[236]; x[236] = x[440]; x[440] = temp; temp= x[237]; x[237] = x[441]; x[441] = temp; temp= x[238]; x[238] = x[952]; x[952] = temp; temp= x[239]; x[239] = x[953]; x[953] = temp; temp= x[242]; x[242] = x[632]; x[632] = temp; temp= x[243]; x[243] = x[633]; x[633] = temp; temp= x[244]; x[244] = x[376]; x[376] = temp; temp= x[245]; x[245] = x[377]; x[377] = temp; temp= x[246]; x[246] = x[888]; x[888] = temp; temp= x[247]; x[247] = x[889]; x[889] = temp; temp= x[250]; x[250] = x[760]; x[760] = temp; temp= x[251]; x[251] = x[761]; x[761] = temp; temp= x[252]; x[252] = x[504]; x[504] = temp; temp= x[253]; x[253] = x[505]; x[505] = temp; temp= x[254]; x[254] = x[1016]; x[1016] = temp; temp= x[255]; x[255] = x[1017]; x[1017] = temp; temp= x[258]; x[258] = x[516]; x[516] = temp; temp= x[259]; x[259] = x[517]; x[517] = temp; temp= x[262]; x[262] = x[772]; x[772] = temp; temp= x[263]; x[263] = x[773]; x[773] = temp; temp= x[266]; x[266] = x[644]; x[644] = temp; temp= x[267]; x[267] = x[645]; x[645] = temp; temp= x[268]; x[268] = x[388]; x[388] = temp; temp= x[269]; x[269] = x[389]; x[389] = temp; temp= x[270]; x[270] = x[900]; x[900] = temp; temp= x[271]; x[271] = x[901]; x[901] = temp; temp= x[274]; x[274] = x[580]; x[580] = temp; temp= x[275]; x[275] = x[581]; x[581] = temp; temp= x[276]; x[276] = x[324]; x[324] = temp; temp= x[277]; x[277] = x[325]; x[325] = temp; temp= x[278]; x[278] = x[836]; x[836] = temp; temp= x[279]; x[279] = x[837]; x[837] = temp; temp= x[282]; x[282] = x[708]; x[708] = temp; temp= x[283]; x[283] = x[709]; x[709] = temp; temp= x[284]; x[284] = x[452]; x[452] = temp; temp= x[285]; x[285] = x[453]; x[453] = temp; temp= x[286]; x[286] = x[964]; x[964] = temp; temp= x[287]; x[287] = x[965]; x[965] = temp; temp= x[290]; x[290] = x[548]; x[548] = temp; temp= x[291]; x[291] = x[549]; x[549] = temp; temp= x[294]; x[294] = x[804]; x[804] = temp; temp= x[295]; x[295] = x[805]; x[805] = temp; temp= x[298]; x[298] = x[676]; x[676] = temp; temp= x[299]; x[299] = x[677]; x[677] = temp; temp= x[300]; x[300] = x[420]; x[420] = temp; temp= x[301]; x[301] = x[421]; x[421] = temp; temp= x[302]; x[302] = x[932]; x[932] = temp; temp= x[303]; x[303] = x[933]; x[933] = temp; temp= x[306]; x[306] = x[612]; x[612] = temp; temp= x[307]; x[307] = x[613]; x[613] = temp; temp= x[308]; x[308] = x[356]; x[356] = temp; temp= x[309]; x[309] = x[357]; x[357] = temp; temp= x[310]; x[310] = x[868]; x[868] = temp; temp= x[311]; x[311] = x[869]; x[869] = temp; temp= x[314]; x[314] = x[740]; x[740] = temp; temp= x[315]; x[315] = x[741]; x[741] = temp; temp= x[316]; x[316] = x[484]; x[484] = temp; temp= x[317]; x[317] = x[485]; x[485] = temp; temp= x[318]; x[318] = x[996]; x[996] = temp; temp= x[319]; x[319] = x[997]; x[997] = temp; temp= x[322]; x[322] = x[532]; x[532] = temp; temp= x[323]; x[323] = x[533]; x[533] = temp; temp= x[326]; x[326] = x[788]; x[788] = temp; temp= x[327]; x[327] = x[789]; x[789] = temp; temp= x[330]; x[330] = x[660]; x[660] = temp; temp= x[331]; x[331] = x[661]; x[661] = temp; temp= x[332]; x[332] = x[404]; x[404] = temp; temp= x[333]; x[333] = x[405]; x[405] = temp; temp= x[334]; x[334] = x[916]; x[916] = temp; temp= x[335]; x[335] = x[917]; x[917] = temp; temp= x[338]; x[338] = x[596]; x[596] = temp; temp= x[339]; x[339] = x[597]; x[597] = temp; temp= x[342]; x[342] = x[852]; x[852] = temp; temp= x[343]; x[343] = x[853]; x[853] = temp; temp= x[346]; x[346] = x[724]; x[724] = temp; temp= x[347]; x[347] = x[725]; x[725] = temp; temp= x[348]; x[348] = x[468]; x[468] = temp; temp= x[349]; x[349] = x[469]; x[469] = temp; temp= x[350]; x[350] = x[980]; x[980] = temp; temp= x[351]; x[351] = x[981]; x[981] = temp; temp= x[354]; x[354] = x[564]; x[564] = temp; temp= x[355]; x[355] = x[565]; x[565] = temp; temp= x[358]; x[358] = x[820]; x[820] = temp; temp= x[359]; x[359] = x[821]; x[821] = temp; temp= x[362]; x[362] = x[692]; x[692] = temp; temp= x[363]; x[363] = x[693]; x[693] = temp; temp= x[364]; x[364] = x[436]; x[436] = temp; temp= x[365]; x[365] = x[437]; x[437] = temp; temp= x[366]; x[366] = x[948]; x[948] = temp; temp= x[367]; x[367] = x[949]; x[949] = temp; temp= x[370]; x[370] = x[628]; x[628] = temp; temp= x[371]; x[371] = x[629]; x[629] = temp; temp= x[374]; x[374] = x[884]; x[884] = temp; temp= x[375]; x[375] = x[885]; x[885] = temp; temp= x[378]; x[378] = x[756]; x[756] = temp; temp= x[379]; x[379] = x[757]; x[757] = temp; temp= x[380]; x[380] = x[500]; x[500] = temp; temp= x[381]; x[381] = x[501]; x[501] = temp; temp= x[382]; x[382] = x[1012]; x[1012] = temp; temp= x[383]; x[383] = x[1013]; x[1013] = temp; temp= x[386]; x[386] = x[524]; x[524] = temp; temp= x[387]; x[387] = x[525]; x[525] = temp; temp= x[390]; x[390] = x[780]; x[780] = temp; temp= x[391]; x[391] = x[781]; x[781] = temp; temp= x[394]; x[394] = x[652]; x[652] = temp; temp= x[395]; x[395] = x[653]; x[653] = temp; temp= x[398]; x[398] = x[908]; x[908] = temp; temp= x[399]; x[399] = x[909]; x[909] = temp; temp= x[402]; x[402] = x[588]; x[588] = temp; temp= x[403]; x[403] = x[589]; x[589] = temp; temp= x[406]; x[406] = x[844]; x[844] = temp; temp= x[407]; x[407] = x[845]; x[845] = temp; temp= x[410]; x[410] = x[716]; x[716] = temp; temp= x[411]; x[411] = x[717]; x[717] = temp; temp= x[412]; x[412] = x[460]; x[460] = temp; temp= x[413]; x[413] = x[461]; x[461] = temp; temp= x[414]; x[414] = x[972]; x[972] = temp; temp= x[415]; x[415] = x[973]; x[973] = temp; temp= x[418]; x[418] = x[556]; x[556] = temp; temp= x[419]; x[419] = x[557]; x[557] = temp; temp= x[422]; x[422] = x[812]; x[812] = temp; temp= x[423]; x[423] = x[813]; x[813] = temp; temp= x[426]; x[426] = x[684]; x[684] = temp; temp= x[427]; x[427] = x[685]; x[685] = temp; temp= x[430]; x[430] = x[940]; x[940] = temp; temp= x[431]; x[431] = x[941]; x[941] = temp; temp= x[434]; x[434] = x[620]; x[620] = temp; temp= x[435]; x[435] = x[621]; x[621] = temp; temp= x[438]; x[438] = x[876]; x[876] = temp; temp= x[439]; x[439] = x[877]; x[877] = temp; temp= x[442]; x[442] = x[748]; x[748] = temp; temp= x[443]; x[443] = x[749]; x[749] = temp; temp= x[444]; x[444] = x[492]; x[492] = temp; temp= x[445]; x[445] = x[493]; x[493] = temp; temp= x[446]; x[446] = x[1004]; x[1004] = temp; temp= x[447]; x[447] = x[1005]; x[1005] = temp; temp= x[450]; x[450] = x[540]; x[540] = temp; temp= x[451]; x[451] = x[541]; x[541] = temp; temp= x[454]; x[454] = x[796]; x[796] = temp; temp= x[455]; x[455] = x[797]; x[797] = temp; temp= x[458]; x[458] = x[668]; x[668] = temp; temp= x[459]; x[459] = x[669]; x[669] = temp; temp= x[462]; x[462] = x[924]; x[924] = temp; temp= x[463]; x[463] = x[925]; x[925] = temp; temp= x[466]; x[466] = x[604]; x[604] = temp; temp= x[467]; x[467] = x[605]; x[605] = temp; temp= x[470]; x[470] = x[860]; x[860] = temp; temp= x[471]; x[471] = x[861]; x[861] = temp; temp= x[474]; x[474] = x[732]; x[732] = temp; temp= x[475]; x[475] = x[733]; x[733] = temp; temp= x[478]; x[478] = x[988]; x[988] = temp; temp= x[479]; x[479] = x[989]; x[989] = temp; temp= x[482]; x[482] = x[572]; x[572] = temp; temp= x[483]; x[483] = x[573]; x[573] = temp; temp= x[486]; x[486] = x[828]; x[828] = temp; temp= x[487]; x[487] = x[829]; x[829] = temp; temp= x[490]; x[490] = x[700]; x[700] = temp; temp= x[491]; x[491] = x[701]; x[701] = temp; temp= x[494]; x[494] = x[956]; x[956] = temp; temp= x[495]; x[495] = x[957]; x[957] = temp; temp= x[498]; x[498] = x[636]; x[636] = temp; temp= x[499]; x[499] = x[637]; x[637] = temp; temp= x[502]; x[502] = x[892]; x[892] = temp; temp= x[503]; x[503] = x[893]; x[893] = temp; temp= x[506]; x[506] = x[764]; x[764] = temp; temp= x[507]; x[507] = x[765]; x[765] = temp; temp= x[510]; x[510] = x[1020]; x[1020] = temp; temp= x[511]; x[511] = x[1021]; x[1021] = temp; temp= x[518]; x[518] = x[770]; x[770] = temp; temp= x[519]; x[519] = x[771]; x[771] = temp; temp= x[522]; x[522] = x[642]; x[642] = temp; temp= x[523]; x[523] = x[643]; x[643] = temp; temp= x[526]; x[526] = x[898]; x[898] = temp; temp= x[527]; x[527] = x[899]; x[899] = temp; temp= x[530]; x[530] = x[578]; x[578] = temp; temp= x[531]; x[531] = x[579]; x[579] = temp; temp= x[534]; x[534] = x[834]; x[834] = temp; temp= x[535]; x[535] = x[835]; x[835] = temp; temp= x[538]; x[538] = x[706]; x[706] = temp; temp= x[539]; x[539] = x[707]; x[707] = temp; temp= x[542]; x[542] = x[962]; x[962] = temp; temp= x[543]; x[543] = x[963]; x[963] = temp; temp= x[550]; x[550] = x[802]; x[802] = temp; temp= x[551]; x[551] = x[803]; x[803] = temp; temp= x[554]; x[554] = x[674]; x[674] = temp; temp= x[555]; x[555] = x[675]; x[675] = temp; temp= x[558]; x[558] = x[930]; x[930] = temp; temp= x[559]; x[559] = x[931]; x[931] = temp; temp= x[562]; x[562] = x[610]; x[610] = temp; temp= x[563]; x[563] = x[611]; x[611] = temp; temp= x[566]; x[566] = x[866]; x[866] = temp; temp= x[567]; x[567] = x[867]; x[867] = temp; temp= x[570]; x[570] = x[738]; x[738] = temp; temp= x[571]; x[571] = x[739]; x[739] = temp; temp= x[574]; x[574] = x[994]; x[994] = temp; temp= x[575]; x[575] = x[995]; x[995] = temp; temp= x[582]; x[582] = x[786]; x[786] = temp; temp= x[583]; x[583] = x[787]; x[787] = temp; temp= x[586]; x[586] = x[658]; x[658] = temp; temp= x[587]; x[587] = x[659]; x[659] = temp; temp= x[590]; x[590] = x[914]; x[914] = temp; temp= x[591]; x[591] = x[915]; x[915] = temp; temp= x[598]; x[598] = x[850]; x[850] = temp; temp= x[599]; x[599] = x[851]; x[851] = temp; temp= x[602]; x[602] = x[722]; x[722] = temp; temp= x[603]; x[603] = x[723]; x[723] = temp; temp= x[606]; x[606] = x[978]; x[978] = temp; temp= x[607]; x[607] = x[979]; x[979] = temp; temp= x[614]; x[614] = x[818]; x[818] = temp; temp= x[615]; x[615] = x[819]; x[819] = temp; temp= x[618]; x[618] = x[690]; x[690] = temp; temp= x[619]; x[619] = x[691]; x[691] = temp; temp= x[622]; x[622] = x[946]; x[946] = temp; temp= x[623]; x[623] = x[947]; x[947] = temp; temp= x[630]; x[630] = x[882]; x[882] = temp; temp= x[631]; x[631] = x[883]; x[883] = temp; temp= x[634]; x[634] = x[754]; x[754] = temp; temp= x[635]; x[635] = x[755]; x[755] = temp; temp= x[638]; x[638] = x[1010]; x[1010] = temp; temp= x[639]; x[639] = x[1011]; x[1011] = temp; temp= x[646]; x[646] = x[778]; x[778] = temp; temp= x[647]; x[647] = x[779]; x[779] = temp; temp= x[654]; x[654] = x[906]; x[906] = temp; temp= x[655]; x[655] = x[907]; x[907] = temp; temp= x[662]; x[662] = x[842]; x[842] = temp; temp= x[663]; x[663] = x[843]; x[843] = temp; temp= x[666]; x[666] = x[714]; x[714] = temp; temp= x[667]; x[667] = x[715]; x[715] = temp; temp= x[670]; x[670] = x[970]; x[970] = temp; temp= x[671]; x[671] = x[971]; x[971] = temp; temp= x[678]; x[678] = x[810]; x[810] = temp; temp= x[679]; x[679] = x[811]; x[811] = temp; temp= x[686]; x[686] = x[938]; x[938] = temp; temp= x[687]; x[687] = x[939]; x[939] = temp; temp= x[694]; x[694] = x[874]; x[874] = temp; temp= x[695]; x[695] = x[875]; x[875] = temp; temp= x[698]; x[698] = x[746]; x[746] = temp; temp= x[699]; x[699] = x[747]; x[747] = temp; temp= x[702]; x[702] = x[1002]; x[1002] = temp; temp= x[703]; x[703] = x[1003]; x[1003] = temp; temp= x[710]; x[710] = x[794]; x[794] = temp; temp= x[711]; x[711] = x[795]; x[795] = temp; temp= x[718]; x[718] = x[922]; x[922] = temp; temp= x[719]; x[719] = x[923]; x[923] = temp; temp= x[726]; x[726] = x[858]; x[858] = temp; temp= x[727]; x[727] = x[859]; x[859] = temp; temp= x[734]; x[734] = x[986]; x[986] = temp; temp= x[735]; x[735] = x[987]; x[987] = temp; temp= x[742]; x[742] = x[826]; x[826] = temp; temp= x[743]; x[743] = x[827]; x[827] = temp; temp= x[750]; x[750] = x[954]; x[954] = temp; temp= x[751]; x[751] = x[955]; x[955] = temp; temp= x[758]; x[758] = x[890]; x[890] = temp; temp= x[759]; x[759] = x[891]; x[891] = temp; temp= x[766]; x[766] = x[1018]; x[1018] = temp; temp= x[767]; x[767] = x[1019]; x[1019] = temp; temp= x[782]; x[782] = x[902]; x[902] = temp; temp= x[783]; x[783] = x[903]; x[903] = temp; temp= x[790]; x[790] = x[838]; x[838] = temp; temp= x[791]; x[791] = x[839]; x[839] = temp; temp= x[798]; x[798] = x[966]; x[966] = temp; temp= x[799]; x[799] = x[967]; x[967] = temp; temp= x[814]; x[814] = x[934]; x[934] = temp; temp= x[815]; x[815] = x[935]; x[935] = temp; temp= x[822]; x[822] = x[870]; x[870] = temp; temp= x[823]; x[823] = x[871]; x[871] = temp; temp= x[830]; x[830] = x[998]; x[998] = temp; temp= x[831]; x[831] = x[999]; x[999] = temp; temp= x[846]; x[846] = x[918]; x[918] = temp; temp= x[847]; x[847] = x[919]; x[919] = temp; temp= x[862]; x[862] = x[982]; x[982] = temp; temp= x[863]; x[863] = x[983]; x[983] = temp; temp= x[878]; x[878] = x[950]; x[950] = temp; temp= x[879]; x[879] = x[951]; x[951] = temp; temp= x[894]; x[894] = x[1014]; x[1014] = temp; temp= x[895]; x[895] = x[1015]; x[1015] = temp; temp= x[926]; x[926] = x[974]; x[974] = temp; temp= x[927]; x[927] = x[975]; x[975] = temp; temp= x[958]; x[958] = x[1006]; x[1006] = temp; temp= x[959]; x[959] = x[1007]; x[1007] = temp; break; case 256: temp= x[2]; x[2] = x[256]; x[256] = temp; temp= x[3]; x[3] = x[257]; x[257] = temp; temp= x[4]; x[4] = x[128]; x[128] = temp; temp= x[5]; x[5] = x[129]; x[129] = temp; temp= x[6]; x[6] = x[384]; x[384] = temp; temp= x[7]; x[7] = x[385]; x[385] = temp; temp= x[8]; x[8] = x[64]; x[64] = temp; temp= x[9]; x[9] = x[65]; x[65] = temp; temp= x[10]; x[10] = x[320]; x[320] = temp; temp= x[11]; x[11] = x[321]; x[321] = temp; temp= x[12]; x[12] = x[192]; x[192] = temp; temp= x[13]; x[13] = x[193]; x[193] = temp; temp= x[14]; x[14] = x[448]; x[448] = temp; temp= x[15]; x[15] = x[449]; x[449] = temp; temp= x[16]; x[16] = x[32]; x[32] = temp; temp= x[17]; x[17] = x[33]; x[33] = temp; temp= x[18]; x[18] = x[288]; x[288] = temp; temp= x[19]; x[19] = x[289]; x[289] = temp; temp= x[20]; x[20] = x[160]; x[160] = temp; temp= x[21]; x[21] = x[161]; x[161] = temp; temp= x[22]; x[22] = x[416]; x[416] = temp; temp= x[23]; x[23] = x[417]; x[417] = temp; temp= x[24]; x[24] = x[96]; x[96] = temp; temp= x[25]; x[25] = x[97]; x[97] = temp; temp= x[26]; x[26] = x[352]; x[352] = temp; temp= x[27]; x[27] = x[353]; x[353] = temp; temp= x[28]; x[28] = x[224]; x[224] = temp; temp= x[29]; x[29] = x[225]; x[225] = temp; temp= x[30]; x[30] = x[480]; x[480] = temp; temp= x[31]; x[31] = x[481]; x[481] = temp; temp= x[34]; x[34] = x[272]; x[272] = temp; temp= x[35]; x[35] = x[273]; x[273] = temp; temp= x[36]; x[36] = x[144]; x[144] = temp; temp= x[37]; x[37] = x[145]; x[145] = temp; temp= x[38]; x[38] = x[400]; x[400] = temp; temp= x[39]; x[39] = x[401]; x[401] = temp; temp= x[40]; x[40] = x[80]; x[80] = temp; temp= x[41]; x[41] = x[81]; x[81] = temp; temp= x[42]; x[42] = x[336]; x[336] = temp; temp= x[43]; x[43] = x[337]; x[337] = temp; temp= x[44]; x[44] = x[208]; x[208] = temp; temp= x[45]; x[45] = x[209]; x[209] = temp; temp= x[46]; x[46] = x[464]; x[464] = temp; temp= x[47]; x[47] = x[465]; x[465] = temp; temp= x[50]; x[50] = x[304]; x[304] = temp; temp= x[51]; x[51] = x[305]; x[305] = temp; temp= x[52]; x[52] = x[176]; x[176] = temp; temp= x[53]; x[53] = x[177]; x[177] = temp; temp= x[54]; x[54] = x[432]; x[432] = temp; temp= x[55]; x[55] = x[433]; x[433] = temp; temp= x[56]; x[56] = x[112]; x[112] = temp; temp= x[57]; x[57] = x[113]; x[113] = temp; temp= x[58]; x[58] = x[368]; x[368] = temp; temp= x[59]; x[59] = x[369]; x[369] = temp; temp= x[60]; x[60] = x[240]; x[240] = temp; temp= x[61]; x[61] = x[241]; x[241] = temp; temp= x[62]; x[62] = x[496]; x[496] = temp; temp= x[63]; x[63] = x[497]; x[497] = temp; temp= x[66]; x[66] = x[264]; x[264] = temp; temp= x[67]; x[67] = x[265]; x[265] = temp; temp= x[68]; x[68] = x[136]; x[136] = temp; temp= x[69]; x[69] = x[137]; x[137] = temp; temp= x[70]; x[70] = x[392]; x[392] = temp; temp= x[71]; x[71] = x[393]; x[393] = temp; temp= x[74]; x[74] = x[328]; x[328] = temp; temp= x[75]; x[75] = x[329]; x[329] = temp; temp= x[76]; x[76] = x[200]; x[200] = temp; temp= x[77]; x[77] = x[201]; x[201] = temp; temp= x[78]; x[78] = x[456]; x[456] = temp; temp= x[79]; x[79] = x[457]; x[457] = temp; temp= x[82]; x[82] = x[296]; x[296] = temp; temp= x[83]; x[83] = x[297]; x[297] = temp; temp= x[84]; x[84] = x[168]; x[168] = temp; temp= x[85]; x[85] = x[169]; x[169] = temp; temp= x[86]; x[86] = x[424]; x[424] = temp; temp= x[87]; x[87] = x[425]; x[425] = temp; temp= x[88]; x[88] = x[104]; x[104] = temp; temp= x[89]; x[89] = x[105]; x[105] = temp; temp= x[90]; x[90] = x[360]; x[360] = temp; temp= x[91]; x[91] = x[361]; x[361] = temp; temp= x[92]; x[92] = x[232]; x[232] = temp; temp= x[93]; x[93] = x[233]; x[233] = temp; temp= x[94]; x[94] = x[488]; x[488] = temp; temp= x[95]; x[95] = x[489]; x[489] = temp; temp= x[98]; x[98] = x[280]; x[280] = temp; temp= x[99]; x[99] = x[281]; x[281] = temp; temp= x[100]; x[100] = x[152]; x[152] = temp; temp= x[101]; x[101] = x[153]; x[153] = temp; temp= x[102]; x[102] = x[408]; x[408] = temp; temp= x[103]; x[103] = x[409]; x[409] = temp; temp= x[106]; x[106] = x[344]; x[344] = temp; temp= x[107]; x[107] = x[345]; x[345] = temp; temp= x[108]; x[108] = x[216]; x[216] = temp; temp= x[109]; x[109] = x[217]; x[217] = temp; temp= x[110]; x[110] = x[472]; x[472] = temp; temp= x[111]; x[111] = x[473]; x[473] = temp; temp= x[114]; x[114] = x[312]; x[312] = temp; temp= x[115]; x[115] = x[313]; x[313] = temp; temp= x[116]; x[116] = x[184]; x[184] = temp; temp= x[117]; x[117] = x[185]; x[185] = temp; temp= x[118]; x[118] = x[440]; x[440] = temp; temp= x[119]; x[119] = x[441]; x[441] = temp; temp= x[122]; x[122] = x[376]; x[376] = temp; temp= x[123]; x[123] = x[377]; x[377] = temp; temp= x[124]; x[124] = x[248]; x[248] = temp; temp= x[125]; x[125] = x[249]; x[249] = temp; temp= x[126]; x[126] = x[504]; x[504] = temp; temp= x[127]; x[127] = x[505]; x[505] = temp; temp= x[130]; x[130] = x[260]; x[260] = temp; temp= x[131]; x[131] = x[261]; x[261] = temp; temp= x[134]; x[134] = x[388]; x[388] = temp; temp= x[135]; x[135] = x[389]; x[389] = temp; temp= x[138]; x[138] = x[324]; x[324] = temp; temp= x[139]; x[139] = x[325]; x[325] = temp; temp= x[140]; x[140] = x[196]; x[196] = temp; temp= x[141]; x[141] = x[197]; x[197] = temp; temp= x[142]; x[142] = x[452]; x[452] = temp; temp= x[143]; x[143] = x[453]; x[453] = temp; temp= x[146]; x[146] = x[292]; x[292] = temp; temp= x[147]; x[147] = x[293]; x[293] = temp; temp= x[148]; x[148] = x[164]; x[164] = temp; temp= x[149]; x[149] = x[165]; x[165] = temp; temp= x[150]; x[150] = x[420]; x[420] = temp; temp= x[151]; x[151] = x[421]; x[421] = temp; temp= x[154]; x[154] = x[356]; x[356] = temp; temp= x[155]; x[155] = x[357]; x[357] = temp; temp= x[156]; x[156] = x[228]; x[228] = temp; temp= x[157]; x[157] = x[229]; x[229] = temp; temp= x[158]; x[158] = x[484]; x[484] = temp; temp= x[159]; x[159] = x[485]; x[485] = temp; temp= x[162]; x[162] = x[276]; x[276] = temp; temp= x[163]; x[163] = x[277]; x[277] = temp; temp= x[166]; x[166] = x[404]; x[404] = temp; temp= x[167]; x[167] = x[405]; x[405] = temp; temp= x[170]; x[170] = x[340]; x[340] = temp; temp= x[171]; x[171] = x[341]; x[341] = temp; temp= x[172]; x[172] = x[212]; x[212] = temp; temp= x[173]; x[173] = x[213]; x[213] = temp; temp= x[174]; x[174] = x[468]; x[468] = temp; temp= x[175]; x[175] = x[469]; x[469] = temp; temp= x[178]; x[178] = x[308]; x[308] = temp; temp= x[179]; x[179] = x[309]; x[309] = temp; temp= x[182]; x[182] = x[436]; x[436] = temp; temp= x[183]; x[183] = x[437]; x[437] = temp; temp= x[186]; x[186] = x[372]; x[372] = temp; temp= x[187]; x[187] = x[373]; x[373] = temp; temp= x[188]; x[188] = x[244]; x[244] = temp; temp= x[189]; x[189] = x[245]; x[245] = temp; temp= x[190]; x[190] = x[500]; x[500] = temp; temp= x[191]; x[191] = x[501]; x[501] = temp; temp= x[194]; x[194] = x[268]; x[268] = temp; temp= x[195]; x[195] = x[269]; x[269] = temp; temp= x[198]; x[198] = x[396]; x[396] = temp; temp= x[199]; x[199] = x[397]; x[397] = temp; temp= x[202]; x[202] = x[332]; x[332] = temp; temp= x[203]; x[203] = x[333]; x[333] = temp; temp= x[206]; x[206] = x[460]; x[460] = temp; temp= x[207]; x[207] = x[461]; x[461] = temp; temp= x[210]; x[210] = x[300]; x[300] = temp; temp= x[211]; x[211] = x[301]; x[301] = temp; temp= x[214]; x[214] = x[428]; x[428] = temp; temp= x[215]; x[215] = x[429]; x[429] = temp; temp= x[218]; x[218] = x[364]; x[364] = temp; temp= x[219]; x[219] = x[365]; x[365] = temp; temp= x[220]; x[220] = x[236]; x[236] = temp; temp= x[221]; x[221] = x[237]; x[237] = temp; temp= x[222]; x[222] = x[492]; x[492] = temp; temp= x[223]; x[223] = x[493]; x[493] = temp; temp= x[226]; x[226] = x[284]; x[284] = temp; temp= x[227]; x[227] = x[285]; x[285] = temp; temp= x[230]; x[230] = x[412]; x[412] = temp; temp= x[231]; x[231] = x[413]; x[413] = temp; temp= x[234]; x[234] = x[348]; x[348] = temp; temp= x[235]; x[235] = x[349]; x[349] = temp; temp= x[238]; x[238] = x[476]; x[476] = temp; temp= x[239]; x[239] = x[477]; x[477] = temp; temp= x[242]; x[242] = x[316]; x[316] = temp; temp= x[243]; x[243] = x[317]; x[317] = temp; temp= x[246]; x[246] = x[444]; x[444] = temp; temp= x[247]; x[247] = x[445]; x[445] = temp; temp= x[250]; x[250] = x[380]; x[380] = temp; temp= x[251]; x[251] = x[381]; x[381] = temp; temp= x[254]; x[254] = x[508]; x[508] = temp; temp= x[255]; x[255] = x[509]; x[509] = temp; temp= x[262]; x[262] = x[386]; x[386] = temp; temp= x[263]; x[263] = x[387]; x[387] = temp; temp= x[266]; x[266] = x[322]; x[322] = temp; temp= x[267]; x[267] = x[323]; x[323] = temp; temp= x[270]; x[270] = x[450]; x[450] = temp; temp= x[271]; x[271] = x[451]; x[451] = temp; temp= x[274]; x[274] = x[290]; x[290] = temp; temp= x[275]; x[275] = x[291]; x[291] = temp; temp= x[278]; x[278] = x[418]; x[418] = temp; temp= x[279]; x[279] = x[419]; x[419] = temp; temp= x[282]; x[282] = x[354]; x[354] = temp; temp= x[283]; x[283] = x[355]; x[355] = temp; temp= x[286]; x[286] = x[482]; x[482] = temp; temp= x[287]; x[287] = x[483]; x[483] = temp; temp= x[294]; x[294] = x[402]; x[402] = temp; temp= x[295]; x[295] = x[403]; x[403] = temp; temp= x[298]; x[298] = x[338]; x[338] = temp; temp= x[299]; x[299] = x[339]; x[339] = temp; temp= x[302]; x[302] = x[466]; x[466] = temp; temp= x[303]; x[303] = x[467]; x[467] = temp; temp= x[310]; x[310] = x[434]; x[434] = temp; temp= x[311]; x[311] = x[435]; x[435] = temp; temp= x[314]; x[314] = x[370]; x[370] = temp; temp= x[315]; x[315] = x[371]; x[371] = temp; temp= x[318]; x[318] = x[498]; x[498] = temp; temp= x[319]; x[319] = x[499]; x[499] = temp; temp= x[326]; x[326] = x[394]; x[394] = temp; temp= x[327]; x[327] = x[395]; x[395] = temp; temp= x[334]; x[334] = x[458]; x[458] = temp; temp= x[335]; x[335] = x[459]; x[459] = temp; temp= x[342]; x[342] = x[426]; x[426] = temp; temp= x[343]; x[343] = x[427]; x[427] = temp; temp= x[346]; x[346] = x[362]; x[362] = temp; temp= x[347]; x[347] = x[363]; x[363] = temp; temp= x[350]; x[350] = x[490]; x[490] = temp; temp= x[351]; x[351] = x[491]; x[491] = temp; temp= x[358]; x[358] = x[410]; x[410] = temp; temp= x[359]; x[359] = x[411]; x[411] = temp; temp= x[366]; x[366] = x[474]; x[474] = temp; temp= x[367]; x[367] = x[475]; x[475] = temp; temp= x[374]; x[374] = x[442]; x[442] = temp; temp= x[375]; x[375] = x[443]; x[443] = temp; temp= x[382]; x[382] = x[506]; x[506] = temp; temp= x[383]; x[383] = x[507]; x[507] = temp; temp= x[398]; x[398] = x[454]; x[454] = temp; temp= x[399]; x[399] = x[455]; x[455] = temp; temp= x[406]; x[406] = x[422]; x[422] = temp; temp= x[407]; x[407] = x[423]; x[423] = temp; temp= x[414]; x[414] = x[486]; x[486] = temp; temp= x[415]; x[415] = x[487]; x[487] = temp; temp= x[430]; x[430] = x[470]; x[470] = temp; temp= x[431]; x[431] = x[471]; x[471] = temp; temp= x[446]; x[446] = x[502]; x[502] = temp; temp= x[447]; x[447] = x[503]; x[503] = temp; temp= x[478]; x[478] = x[494]; x[494] = temp; temp= x[479]; x[479] = x[495]; x[495] = temp; break; case 128: temp= x[2]; x[2] = x[128]; x[128] = temp; temp= x[3]; x[3] = x[129]; x[129] = temp; temp= x[4]; x[4] = x[64]; x[64] = temp; temp= x[5]; x[5] = x[65]; x[65] = temp; temp= x[6]; x[6] = x[192]; x[192] = temp; temp= x[7]; x[7] = x[193]; x[193] = temp; temp= x[8]; x[8] = x[32]; x[32] = temp; temp= x[9]; x[9] = x[33]; x[33] = temp; temp= x[10]; x[10] = x[160]; x[160] = temp; temp= x[11]; x[11] = x[161]; x[161] = temp; temp= x[12]; x[12] = x[96]; x[96] = temp; temp= x[13]; x[13] = x[97]; x[97] = temp; temp= x[14]; x[14] = x[224]; x[224] = temp; temp= x[15]; x[15] = x[225]; x[225] = temp; temp= x[18]; x[18] = x[144]; x[144] = temp; temp= x[19]; x[19] = x[145]; x[145] = temp; temp= x[20]; x[20] = x[80]; x[80] = temp; temp= x[21]; x[21] = x[81]; x[81] = temp; temp= x[22]; x[22] = x[208]; x[208] = temp; temp= x[23]; x[23] = x[209]; x[209] = temp; temp= x[24]; x[24] = x[48]; x[48] = temp; temp= x[25]; x[25] = x[49]; x[49] = temp; temp= x[26]; x[26] = x[176]; x[176] = temp; temp= x[27]; x[27] = x[177]; x[177] = temp; temp= x[28]; x[28] = x[112]; x[112] = temp; temp= x[29]; x[29] = x[113]; x[113] = temp; temp= x[30]; x[30] = x[240]; x[240] = temp; temp= x[31]; x[31] = x[241]; x[241] = temp; temp= x[34]; x[34] = x[136]; x[136] = temp; temp= x[35]; x[35] = x[137]; x[137] = temp; temp= x[36]; x[36] = x[72]; x[72] = temp; temp= x[37]; x[37] = x[73]; x[73] = temp; temp= x[38]; x[38] = x[200]; x[200] = temp; temp= x[39]; x[39] = x[201]; x[201] = temp; temp= x[42]; x[42] = x[168]; x[168] = temp; temp= x[43]; x[43] = x[169]; x[169] = temp; temp= x[44]; x[44] = x[104]; x[104] = temp; temp= x[45]; x[45] = x[105]; x[105] = temp; temp= x[46]; x[46] = x[232]; x[232] = temp; temp= x[47]; x[47] = x[233]; x[233] = temp; temp= x[50]; x[50] = x[152]; x[152] = temp; temp= x[51]; x[51] = x[153]; x[153] = temp; temp= x[52]; x[52] = x[88]; x[88] = temp; temp= x[53]; x[53] = x[89]; x[89] = temp; temp= x[54]; x[54] = x[216]; x[216] = temp; temp= x[55]; x[55] = x[217]; x[217] = temp; temp= x[58]; x[58] = x[184]; x[184] = temp; temp= x[59]; x[59] = x[185]; x[185] = temp; temp= x[60]; x[60] = x[120]; x[120] = temp; temp= x[61]; x[61] = x[121]; x[121] = temp; temp= x[62]; x[62] = x[248]; x[248] = temp; temp= x[63]; x[63] = x[249]; x[249] = temp; temp= x[66]; x[66] = x[132]; x[132] = temp; temp= x[67]; x[67] = x[133]; x[133] = temp; temp= x[70]; x[70] = x[196]; x[196] = temp; temp= x[71]; x[71] = x[197]; x[197] = temp; temp= x[74]; x[74] = x[164]; x[164] = temp; temp= x[75]; x[75] = x[165]; x[165] = temp; temp= x[76]; x[76] = x[100]; x[100] = temp; temp= x[77]; x[77] = x[101]; x[101] = temp; temp= x[78]; x[78] = x[228]; x[228] = temp; temp= x[79]; x[79] = x[229]; x[229] = temp; temp= x[82]; x[82] = x[148]; x[148] = temp; temp= x[83]; x[83] = x[149]; x[149] = temp; temp= x[86]; x[86] = x[212]; x[212] = temp; temp= x[87]; x[87] = x[213]; x[213] = temp; temp= x[90]; x[90] = x[180]; x[180] = temp; temp= x[91]; x[91] = x[181]; x[181] = temp; temp= x[92]; x[92] = x[116]; x[116] = temp; temp= x[93]; x[93] = x[117]; x[117] = temp; temp= x[94]; x[94] = x[244]; x[244] = temp; temp= x[95]; x[95] = x[245]; x[245] = temp; temp= x[98]; x[98] = x[140]; x[140] = temp; temp= x[99]; x[99] = x[141]; x[141] = temp; temp= x[102]; x[102] = x[204]; x[204] = temp; temp= x[103]; x[103] = x[205]; x[205] = temp; temp= x[106]; x[106] = x[172]; x[172] = temp; temp= x[107]; x[107] = x[173]; x[173] = temp; temp= x[110]; x[110] = x[236]; x[236] = temp; temp= x[111]; x[111] = x[237]; x[237] = temp; temp= x[114]; x[114] = x[156]; x[156] = temp; temp= x[115]; x[115] = x[157]; x[157] = temp; temp= x[118]; x[118] = x[220]; x[220] = temp; temp= x[119]; x[119] = x[221]; x[221] = temp; temp= x[122]; x[122] = x[188]; x[188] = temp; temp= x[123]; x[123] = x[189]; x[189] = temp; temp= x[126]; x[126] = x[252]; x[252] = temp; temp= x[127]; x[127] = x[253]; x[253] = temp; temp= x[134]; x[134] = x[194]; x[194] = temp; temp= x[135]; x[135] = x[195]; x[195] = temp; temp= x[138]; x[138] = x[162]; x[162] = temp; temp= x[139]; x[139] = x[163]; x[163] = temp; temp= x[142]; x[142] = x[226]; x[226] = temp; temp= x[143]; x[143] = x[227]; x[227] = temp; temp= x[150]; x[150] = x[210]; x[210] = temp; temp= x[151]; x[151] = x[211]; x[211] = temp; temp= x[154]; x[154] = x[178]; x[178] = temp; temp= x[155]; x[155] = x[179]; x[179] = temp; temp= x[158]; x[158] = x[242]; x[242] = temp; temp= x[159]; x[159] = x[243]; x[243] = temp; temp= x[166]; x[166] = x[202]; x[202] = temp; temp= x[167]; x[167] = x[203]; x[203] = temp; temp= x[174]; x[174] = x[234]; x[234] = temp; temp= x[175]; x[175] = x[235]; x[235] = temp; temp= x[182]; x[182] = x[218]; x[218] = temp; temp= x[183]; x[183] = x[219]; x[219] = temp; temp= x[190]; x[190] = x[250]; x[250] = temp; temp= x[191]; x[191] = x[251]; x[251] = temp; temp= x[206]; x[206] = x[230]; x[230] = temp; temp= x[207]; x[207] = x[231]; x[231] = temp; temp= x[222]; x[222] = x[246]; x[246] = temp; temp= x[223]; x[223] = x[247]; x[247] = temp; break; case 64: temp= x[2]; x[2] = x[64]; x[64] = temp; temp= x[3]; x[3] = x[65]; x[65] = temp; temp= x[4]; x[4] = x[32]; x[32] = temp; temp= x[5]; x[5] = x[33]; x[33] = temp; temp= x[6]; x[6] = x[96]; x[96] = temp; temp= x[7]; x[7] = x[97]; x[97] = temp; temp= x[8]; x[8] = x[16]; x[16] = temp; temp= x[9]; x[9] = x[17]; x[17] = temp; temp= x[10]; x[10] = x[80]; x[80] = temp; temp= x[11]; x[11] = x[81]; x[81] = temp; temp= x[12]; x[12] = x[48]; x[48] = temp; temp= x[13]; x[13] = x[49]; x[49] = temp; temp= x[14]; x[14] = x[112]; x[112] = temp; temp= x[15]; x[15] = x[113]; x[113] = temp; temp= x[18]; x[18] = x[72]; x[72] = temp; temp= x[19]; x[19] = x[73]; x[73] = temp; temp= x[20]; x[20] = x[40]; x[40] = temp; temp= x[21]; x[21] = x[41]; x[41] = temp; temp= x[22]; x[22] = x[104]; x[104] = temp; temp= x[23]; x[23] = x[105]; x[105] = temp; temp= x[26]; x[26] = x[88]; x[88] = temp; temp= x[27]; x[27] = x[89]; x[89] = temp; temp= x[28]; x[28] = x[56]; x[56] = temp; temp= x[29]; x[29] = x[57]; x[57] = temp; temp= x[30]; x[30] = x[120]; x[120] = temp; temp= x[31]; x[31] = x[121]; x[121] = temp; temp= x[34]; x[34] = x[68]; x[68] = temp; temp= x[35]; x[35] = x[69]; x[69] = temp; temp= x[38]; x[38] = x[100]; x[100] = temp; temp= x[39]; x[39] = x[101]; x[101] = temp; temp= x[42]; x[42] = x[84]; x[84] = temp; temp= x[43]; x[43] = x[85]; x[85] = temp; temp= x[44]; x[44] = x[52]; x[52] = temp; temp= x[45]; x[45] = x[53]; x[53] = temp; temp= x[46]; x[46] = x[116]; x[116] = temp; temp= x[47]; x[47] = x[117]; x[117] = temp; temp= x[50]; x[50] = x[76]; x[76] = temp; temp= x[51]; x[51] = x[77]; x[77] = temp; temp= x[54]; x[54] = x[108]; x[108] = temp; temp= x[55]; x[55] = x[109]; x[109] = temp; temp= x[58]; x[58] = x[92]; x[92] = temp; temp= x[59]; x[59] = x[93]; x[93] = temp; temp= x[62]; x[62] = x[124]; x[124] = temp; temp= x[63]; x[63] = x[125]; x[125] = temp; temp= x[70]; x[70] = x[98]; x[98] = temp; temp= x[71]; x[71] = x[99]; x[99] = temp; temp= x[74]; x[74] = x[82]; x[82] = temp; temp= x[75]; x[75] = x[83]; x[83] = temp; temp= x[78]; x[78] = x[114]; x[114] = temp; temp= x[79]; x[79] = x[115]; x[115] = temp; temp= x[86]; x[86] = x[106]; x[106] = temp; temp= x[87]; x[87] = x[107]; x[107] = temp; temp= x[94]; x[94] = x[122]; x[122] = temp; temp= x[95]; x[95] = x[123]; x[123] = temp; temp= x[110]; x[110] = x[118]; x[118] = temp; temp= x[111]; x[111] = x[119]; x[119] = temp; break; case 32: temp= x[2]; x[2] = x[32]; x[32] = temp; temp= x[3]; x[3] = x[33]; x[33] = temp; temp= x[4]; x[4] = x[16]; x[16] = temp; temp= x[5]; x[5] = x[17]; x[17] = temp; temp= x[6]; x[6] = x[48]; x[48] = temp; temp= x[7]; x[7] = x[49]; x[49] = temp; temp= x[10]; x[10] = x[40]; x[40] = temp; temp= x[11]; x[11] = x[41]; x[41] = temp; temp= x[12]; x[12] = x[24]; x[24] = temp; temp= x[13]; x[13] = x[25]; x[25] = temp; temp= x[14]; x[14] = x[56]; x[56] = temp; temp= x[15]; x[15] = x[57]; x[57] = temp; temp= x[18]; x[18] = x[36]; x[36] = temp; temp= x[19]; x[19] = x[37]; x[37] = temp; temp= x[22]; x[22] = x[52]; x[52] = temp; temp= x[23]; x[23] = x[53]; x[53] = temp; temp= x[26]; x[26] = x[44]; x[44] = temp; temp= x[27]; x[27] = x[45]; x[45] = temp; temp= x[30]; x[30] = x[60]; x[60] = temp; temp= x[31]; x[31] = x[61]; x[61] = temp; temp= x[38]; x[38] = x[50]; x[50] = temp; temp= x[39]; x[39] = x[51]; x[51] = temp; temp= x[46]; x[46] = x[58]; x[58] = temp; temp= x[47]; x[47] = x[59]; x[59] = temp; break; case 16: temp= x[2]; x[2] = x[16]; x[16] = temp; temp= x[3]; x[3] = x[17]; x[17] = temp; temp= x[4]; x[4] = x[8]; x[8] = temp; temp= x[5]; x[5] = x[9]; x[9] = temp; temp= x[6]; x[6] = x[24]; x[24] = temp; temp= x[7]; x[7] = x[25]; x[25] = temp; temp= x[10]; x[10] = x[20]; x[20] = temp; temp= x[11]; x[11] = x[21]; x[21] = temp; temp= x[14]; x[14] = x[28]; x[28] = temp; temp= x[15]; x[15] = x[29]; x[29] = temp; temp= x[22]; x[22] = x[26]; x[26] = temp; temp= x[23]; x[23] = x[27]; x[27] = temp; break; case 8: temp= x[2]; x[2] = x[8]; x[8] = temp; temp= x[3]; x[3] = x[9]; x[9] = temp; temp= x[6]; x[6] = x[12]; x[12] = temp; temp= x[7]; x[7] = x[13]; x[13] = temp; break; case 4: temp= x[2]; x[2] = x[4]; x[4] = temp; temp= x[3]; x[3] = x[5]; x[5] = temp; break; case 2: break; default: bitrev( x, length ); } }
int mace68k_probe(struct net_device *unused) { int j; static int once=0; struct mace68k_data *mp; unsigned char *addr; struct net_device *dev; unsigned char checksum = 0; /* * There can be only one... */ if (once) return -ENODEV; once = 1; if (macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV; printk("MACE ethernet should be present "); dev = init_etherdev(0, PRIV_BYTES); if(dev==NULL) { printk("no free memory.\n"); return -ENOMEM; } mp = (struct mace68k_data *) dev->priv; dev->base_addr = (u32)MACE_BASE; mp->mace = (volatile struct mace *) MACE_BASE; printk("at 0x%p", mp->mace); /* * 16K RX ring and 4K TX ring should do nicely */ mp->rx_ring=(void *)__get_free_pages(GFP_KERNEL, 2); mp->tx_ring=(void *)__get_free_page(GFP_KERNEL); printk("."); if(mp->tx_ring==NULL || mp->rx_ring==NULL) { if(mp->tx_ring) free_page((u32)mp->tx_ring); // if(mp->rx_ring) // __free_pages(mp->rx_ring,2); printk("\nNo memory for ring buffers.\n"); return -ENOMEM; } /* We want the receive data to be uncached. We dont care about the byte reading order */ printk("."); kernel_set_cachemode((void *)mp->rx_ring, 16384, IOMAP_NOCACHE_NONSER); printk("."); /* The transmit buffer needs to be write through */ kernel_set_cachemode((void *)mp->tx_ring, 4096, IOMAP_WRITETHROUGH); printk(" Ok\n"); dev->irq = IRQ_MAC_MACE; printk(KERN_INFO "%s: MACE at", dev->name); /* * The PROM contains 8 bytes which total 0xFF when XOR'd * together. Due to the usual peculiar apple brain damage * the bytes are spaced out in a strange boundary and the * bits are reversed. */ addr = (void *)MACE_PROM; for (j = 0; j < 6; ++j) { u8 v=bitrev(addr[j<<4]); checksum^=v; dev->dev_addr[j] = v; printk("%c%.2x", (j ? ':' : ' '), dev->dev_addr[j]); } for (; j < 8; ++j) { checksum^=bitrev(addr[j<<4]); } if(checksum!=0xFF) { printk(" (invalid checksum)\n"); return -ENODEV; } printk("\n"); memset(&mp->stats, 0, sizeof(mp->stats)); init_timer(&mp->tx_timeout); mp->timeout_active = 0; dev->open = mace68k_open; dev->stop = mace68k_close; dev->hard_start_xmit = mace68k_xmit_start; dev->get_stats = mace68k_stats; dev->set_multicast_list = mace68k_set_multicast; dev->set_mac_address = mace68k_set_address; ether_setup(dev); mp = (struct mace68k_data *) dev->priv; mp->maccc = ENXMT | ENRCV; mp->dma_intr = IRQ_MAC_MACE_DMA; psc_write_word(PSC_ENETWR_CTL, 0x9000); psc_write_word(PSC_ENETRD_CTL, 0x9000); psc_write_word(PSC_ENETWR_CTL, 0x0400); psc_write_word(PSC_ENETRD_CTL, 0x0400); /* apple's driver doesn't seem to do this */ /* except at driver shutdown time... */ #if 0 mace68k_dma_off(dev); #endif return 0; }
void ntt_transform(poly out, const poly o) { int s, pos = 0, offset; __m256d vt,vo0,vo10,vo11,vo20,vo21,vo22,vo23,vc,vp,vpinv,neg2,neg4; __m256d vx0,vx1,vx2,vx3,vx4,vx5,vx6,vx7; vpinv = _mm256_set_pd(PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE, PARAM_APPROX_P_INVERSE); vp = _mm256_set_pd(8383489., 8383489., 8383489., 8383489.); bitrev(out); vo10 = _mm256_load_pd(o+pos); vo20 = _mm256_load_pd(o+pos+4); neg2 = _mm256_load_pd(_neg2); neg4 = _mm256_load_pd(_neg4); // m = 2, m = 4, m = 8 (3 levels merged) for(s = 0; s<POLY_DEG; s+=8) { // No multiplication with omega required, respective value is 1 vx0 = _mm256_load_pd(out+s); vt = _mm256_mul_pd(vx0,neg2); vx0 = _mm256_hadd_pd(vx0,vt); vx1 = _mm256_load_pd(out+s+4); vt = _mm256_mul_pd(vx1,neg2); vx1 = _mm256_hadd_pd(vx1,vt); vx0 = _mm256_mul_pd(vx0, vo10); vc = _mm256_mul_pd(vx0, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vx0 = _mm256_sub_pd(vx0,vc); vt = _mm256_permute2f128_pd (vx0, vx0, 0x01); // now contains x2,x3,x0,x1 vx0 = _mm256_mul_pd(vx0, neg4); vx0 = _mm256_add_pd(vx0, vt); vx1 = _mm256_mul_pd(vx1, vo10); vc = _mm256_mul_pd(vx1, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vx1 = _mm256_sub_pd(vx1,vc); vt = _mm256_permute2f128_pd (vx1, vx1, 0x01); // now contains x2,x3,x0,x1 vx1 = _mm256_mul_pd(vx1, neg4); vx1 = _mm256_add_pd(vx1, vt); vt = _mm256_mul_pd(vx1, vo20); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx1 = _mm256_sub_pd(vx0, vt); _mm256_store_pd(out+s+4, vx1); vx0 = _mm256_add_pd(vx0, vt); _mm256_store_pd(out+s+0, vx0); } pos += 8; // m = 16, m = 32, m = 64 (3 levels merged) for(offset = 0; offset < 8; offset+=4) { vo0 = _mm256_load_pd(o+pos+offset); vo10 = _mm256_load_pd(o+pos+offset+8); vo11 = _mm256_load_pd(o+pos+offset+16); for(s = 0; s<POLY_DEG; s+=64) { vx1 = _mm256_load_pd(out+offset+s+8); vt = _mm256_mul_pd(vx1, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx0 = _mm256_load_pd(out+offset+s+0); vx1 = _mm256_sub_pd(vx0, vt); // _mm256_store_pd(out+offset+s+8, vx1); vx0 = _mm256_add_pd(vx0, vt); // _mm256_store_pd(out+offset+s+0, vx0); vx3 = _mm256_load_pd(out+offset+s+24); vt = _mm256_mul_pd(vx3, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx2 = _mm256_load_pd(out+offset+s+16); vx3 = _mm256_sub_pd(vx2, vt); // _mm256_store_pd(out+offset+s+24, vx3); vx2 = _mm256_add_pd(vx2, vt); // _mm256_store_pd(out+offset+s+16, vx2); vx5 = _mm256_load_pd(out+offset+s+40); vt = _mm256_mul_pd(vx5, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx4 = _mm256_load_pd(out+offset+s+32); vx5 = _mm256_sub_pd(vx4, vt); // _mm256_store_pd(out+offset+s+40, vx5); vx4 = _mm256_add_pd(vx4, vt); // _mm256_store_pd(out+offset+s+32, vx4); vx7 = _mm256_load_pd(out+offset+s+56); vt = _mm256_mul_pd(vx7, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx6 = _mm256_load_pd(out+offset+s+48); vx7 = _mm256_sub_pd(vx6, vt); // _mm256_store_pd(out+offset+s+56, vx7); vx6 = _mm256_add_pd(vx6, vt); // _mm256_store_pd(out+offset+s+48, vx6); // vx2 = _mm256_load_pd(out+offset+s+16); vt = _mm256_mul_pd(vx2, vo10); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx0 = _mm256_load_pd(out+offset+s+0); vx2 = _mm256_sub_pd(vx0, vt); // _mm256_store_pd(out+offset+s+16, vx2); vx0 = _mm256_add_pd(vx0, vt); // _mm256_store_pd(out+offset+s+0, vx0); // vx6 = _mm256_load_pd(out+offset+s+48); vt = _mm256_mul_pd(vx6, vo10); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx4 = _mm256_load_pd(out+offset+s+32); vx6 = _mm256_sub_pd(vx4, vt); // _mm256_store_pd(out+offset+s+48, vx6); vx4 = _mm256_add_pd(vx4, vt); // _mm256_store_pd(out+offset+s+32, vx4); // vx3 = _mm256_load_pd(out+offset+s+24); vt = _mm256_mul_pd(vx3, vo11); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx1 = _mm256_load_pd(out+offset+s+8); vx3 = _mm256_sub_pd(vx1, vt); // _mm256_store_pd(out+offset+s+24, vx3); vx1 = _mm256_add_pd(vx1, vt); // _mm256_store_pd(out+offset+s+8, vx1); // vx7 = _mm256_load_pd(out+offset+s+56); vt = _mm256_mul_pd(vx7, vo11); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx5 = _mm256_load_pd(out+offset+s+40); vx7 = _mm256_sub_pd(vx5, vt); // _mm256_store_pd(out+offset+s+56, vx7); vx5 = _mm256_add_pd(vx5, vt); // _mm256_store_pd(out+offset+s+40, vx5); // vx4 = _mm256_load_pd(out+offset+s+32); vo20 = _mm256_load_pd(o+pos+offset+24); vt = _mm256_mul_pd(vx4, vo20); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx0 = _mm256_load_pd(out+offset+s+0); vx4 = _mm256_sub_pd(vx0, vt); _mm256_store_pd(out+offset+s+32, vx4); vx0 = _mm256_add_pd(vx0, vt); _mm256_store_pd(out+offset+s+0, vx0); // vx5 = _mm256_load_pd(out+offset+s+40); vo21 = _mm256_load_pd(o+pos+offset+32); vt = _mm256_mul_pd(vx5, vo21); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx1 = _mm256_load_pd(out+offset+s+8); vx5 = _mm256_sub_pd(vx1, vt); _mm256_store_pd(out+offset+s+40, vx5); vx1 = _mm256_add_pd(vx1, vt); _mm256_store_pd(out+offset+s+8, vx1); // vx6 = _mm256_load_pd(out+offset+s+48); vo22 = _mm256_load_pd(o+pos+offset+40); vt = _mm256_mul_pd(vx6, vo22); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx2 = _mm256_load_pd(out+offset+s+16); vx6 = _mm256_sub_pd(vx2, vt); _mm256_store_pd(out+offset+s+48, vx6); vx2 = _mm256_add_pd(vx2, vt); _mm256_store_pd(out+offset+s+16, vx2); // vx7 = _mm256_load_pd(out+offset+s+56); vo23 = _mm256_load_pd(o+pos+offset+48); vt = _mm256_mul_pd(vx7, vo23); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); // vx3 = _mm256_load_pd(out+offset+s+24); vx7 = _mm256_sub_pd(vx3, vt); _mm256_store_pd(out+offset+s+56, vx7); vx3 = _mm256_add_pd(vx3, vt); _mm256_store_pd(out+offset+s+24, vx3); } } pos += 56; // m = 128, m=256, m=512 (3 levels merged) for(offset=0;offset<64;offset+=4) { vo0 = _mm256_load_pd(o+pos+offset); vo10 = _mm256_load_pd(o+pos+offset+64); vo11 = _mm256_load_pd(o+pos+offset+128); for(s = 0; s<POLY_DEG; s+=512) { vx1 = _mm256_load_pd(out+offset+s+64); vt = _mm256_mul_pd(vx1, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx0 = _mm256_load_pd(out+offset+s+0); vx1 = _mm256_sub_pd(vx0, vt); //_mm256_store_pd(out+offset+s+64, vx1); vx0 = _mm256_add_pd(vx0, vt); //_mm256_store_pd(out+offset+s+0, vx0); vx3 = _mm256_load_pd(out+offset+s+192); vt = _mm256_mul_pd(vx3, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx2 = _mm256_load_pd(out+offset+s+128); vx3 = _mm256_sub_pd(vx2, vt); //_mm256_store_pd(out+offset+s+192, vx3); vx2 = _mm256_add_pd(vx2, vt); //_mm256_store_pd(out+offset+s+128, vx2); vx5 = _mm256_load_pd(out+offset+s+320); vt = _mm256_mul_pd(vx5, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx4 = _mm256_load_pd(out+offset+s+256); vx5 = _mm256_sub_pd(vx4, vt); //_mm256_store_pd(out+offset+s+320, vx5); vx4 = _mm256_add_pd(vx4, vt); //_mm256_store_pd(out+offset+s+256, vx4); vx7 = _mm256_load_pd(out+offset+s+448); vt = _mm256_mul_pd(vx7, vo0); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); vx6 = _mm256_load_pd(out+offset+s+384); vx7 = _mm256_sub_pd(vx6, vt); //_mm256_store_pd(out+offset+s+448, vx7); vx6 = _mm256_add_pd(vx6, vt); //_mm256_store_pd(out+offset+s+384, vx6); //vx2 = _mm256_load_pd(out+offset+s+128); vt = _mm256_mul_pd(vx2, vo10); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx0 = _mm256_load_pd(out+offset+s+0); vx2 = _mm256_sub_pd(vx0, vt); //_mm256_store_pd(out+offset+s+128, vx2); vx0 = _mm256_add_pd(vx0, vt); //_mm256_store_pd(out+offset+s+0, vx0); //vx3 = _mm256_load_pd(out+offset+s+192); vt = _mm256_mul_pd(vx3, vo11); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx1 = _mm256_load_pd(out+offset+s+64); vx3 = _mm256_sub_pd(vx1, vt); //_mm256_store_pd(out+offset+s+192, vx3); vx1 = _mm256_add_pd(vx1, vt); //_mm256_store_pd(out+offset+s+64, vx1); //vx6 = _mm256_load_pd(out+offset+s+384); vt = _mm256_mul_pd(vx6, vo10); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx4 = _mm256_load_pd(out+offset+s+256); vx6 = _mm256_sub_pd(vx4, vt); //_mm256_store_pd(out+offset+s+384, vx6); vx4 = _mm256_add_pd(vx4, vt); //_mm256_store_pd(out+offset+s+256, vx4); //vx7 = _mm256_load_pd(out+offset+s+448); vt = _mm256_mul_pd(vx7, vo11); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx5 = _mm256_load_pd(out+offset+s+320); vx7 = _mm256_sub_pd(vx5, vt); //_mm256_store_pd(out+offset+s+448, vx7); vx5 = _mm256_add_pd(vx5, vt); //_mm256_store_pd(out+offset+s+320, vx5); //vx4 = _mm256_load_pd(out+offset+s+256); vo20 = _mm256_load_pd(o+pos+offset+192); vt = _mm256_mul_pd(vx4, vo20); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx0 = _mm256_load_pd(out+offset+s+0); vx4 = _mm256_sub_pd(vx0, vt); _mm256_store_pd(out+offset+s+256, vx4); vx0 = _mm256_add_pd(vx0, vt); _mm256_store_pd(out+offset+s+0, vx0); //vx5 = _mm256_load_pd(out+offset+s+320); vo21 = _mm256_load_pd(o+pos+offset+256); vt = _mm256_mul_pd(vx5, vo21); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx1 = _mm256_load_pd(out+offset+s+64); vx5 = _mm256_sub_pd(vx1, vt); _mm256_store_pd(out+offset+s+320, vx5); vx1 = _mm256_add_pd(vx1, vt); _mm256_store_pd(out+offset+s+64, vx1); //vx6 = _mm256_load_pd(out+offset+s+384); vo22 = _mm256_load_pd(o+pos+offset+320); vt = _mm256_mul_pd(vx6, vo22); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx2 = _mm256_load_pd(out+offset+s+128); vx6 = _mm256_sub_pd(vx2, vt); _mm256_store_pd(out+offset+s+384, vx6); vx2 = _mm256_add_pd(vx2, vt); _mm256_store_pd(out+offset+s+128, vx2); //vx7 = _mm256_load_pd(out+offset+s+448); vo23 = _mm256_load_pd(o+pos+offset+384); vt = _mm256_mul_pd(vx7, vo23); vc = _mm256_mul_pd(vt, vpinv); vc = _mm256_round_pd(vc,0x08); vc = _mm256_mul_pd(vc, vp); vt = _mm256_sub_pd(vt,vc); //vx3 = _mm256_load_pd(out+offset+s+192); vx7 = _mm256_sub_pd(vx3, vt); _mm256_store_pd(out+offset+s+448, vx7); vx3 = _mm256_add_pd(vx3, vt); _mm256_store_pd(out+offset+s+192, vx3); } } }
// Perform the 1d transform (mu is used to choose the communicator and n) // The fft consist of four step: // 1) data of is rearranged across the ranks // 2) ft is done on the odd length blocks (not needed if data length is a power of 2) // 3) Lanczos lemma is implemented on the local data // 4) Lanczos lemma is applied on non-local data (if needed) void fft1d(complex *out,complex *in,int ncpp,int mu,double sign,int normalize) { GET_THREAD_ID(); //allocate the buffer to send data complex *buf=nissa_malloc("buf",loc_size[mu]*ncpp,complex); if(IS_MASTER_THREAD) { int log2glb_nblk=find_max_pow2(glb_size[mu]); //number of powers of 2 contained in n int glb_nblk=1<<log2glb_nblk; //remaining odd block int blk_size=glb_size[mu]/glb_nblk; //block size int loc_nblk=glb_nblk/nrank_dir[mu]; //number of local blocks //check if the loc_size is a multiple of block_size int r=nrank_dir[mu]; while(r>1) { if(r%2!=0) crash("Error, FFT implemented only for power of 2 grids! Ask sunpho to adapt it to a more general case if you really need!"); r/=2; } ////////////////////////////// first part ///////////////////////////////// //reorder data across the various rank: glb_iel_in=iel_blk_out*glb_nblk+glb_iblk_out_rev int nrequest0=0,nexp_request0=loc_size[mu]*2; MPI_Request request0[nexp_request0]; for(int loc_iblk_in=0;loc_iblk_in<loc_nblk;loc_iblk_in++) for(int iel_blk_in=0;iel_blk_in<blk_size;iel_blk_in++) { //find the full index int loc_iel_in=loc_iblk_in*blk_size+iel_blk_in; int glb_iel_in=glb_coord_of_loclx[0][mu]+loc_iel_in; int glb_iblk_in=rank_coord[mu]*loc_nblk+loc_iblk_in; //look at the output int iel_blk_out=glb_iel_in/glb_nblk; int glb_iblk_out=bitrev(glb_iel_in-iel_blk_out*glb_nblk,log2glb_nblk); int rank_coord_out=glb_iblk_out/loc_nblk; int loc_iblk_out=glb_iblk_out-rank_coord_out*loc_nblk; int loc_iel_out=loc_iblk_out*blk_size+iel_blk_out; //now, if the destination is local, put it on place if(rank_coord_out==rank_coord[mu]) memcpy(buf+ncpp*loc_iel_out,in+ncpp*loc_iel_in,sizeof(complex)*ncpp); else //send the data to the destination MPI_Isend((void*)(in+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_out,1241+loc_iel_out, line_comm[mu],&request0[nrequest0++]); //if necessary receive data: glb_iel_send=iel_blk_in*glb_nblk+glb_iblk_in_rev int glb_iel_send=iel_blk_in*glb_nblk+bitrev(glb_iblk_in,log2glb_nblk); int rank_coord_send=glb_iel_send/loc_size[mu]; if(rank_coord_send!=line_rank[mu]) MPI_Irecv((void*)(buf+loc_iel_in*ncpp),ncpp*2,MPI_DOUBLE,rank_coord_send,1241+loc_iel_in, line_comm[mu],&request0[nrequest0++]); } //wait for scramble to finish MPI_Status status0[nexp_request0]; if(nrequest0>0) MPI_Waitall(nrequest0,request0,status0); /////////////////////////// second part //////////////////////////////////// //perform the block fourier transform if needed if(blk_size==1) memcpy(out,buf,loc_size[mu]*ncpp*sizeof(complex)); else { //initialize the output memset(out,0,loc_size[mu]*ncpp*sizeof(complex)); //loop over local blocks for(int loc_iblk=0;loc_iblk<loc_nblk;loc_iblk++) { //take initial position of the output and input block complex *in_blk=buf+loc_iblk*blk_size*ncpp; complex *out_blk=out+loc_iblk*blk_size*ncpp; //loop over out elements of out block for(int iel_out=0;iel_out<blk_size;iel_out++) { //take initial position of out local elements complex *out_pad=out_blk+iel_out*ncpp; //incrementing factor double theta=iel_out*sign*2*M_PI/blk_size; double wtemp=sin(0.5*theta); double wpr=-2*wtemp*wtemp; double wpi=sin(theta); //fourier factor double wr=1; double wi=0; //loop over elements of in blocks for(int iel_in=0;iel_in<blk_size;iel_in++) { //take initial position of in local elements complex *in_pad=in_blk+iel_in*ncpp; //loop over local elements for(int ic=0;ic<ncpp;ic++) { out_pad[ic][0]+=wr*in_pad[ic][0]-wi*in_pad[ic][1]; out_pad[ic][1]+=wr*in_pad[ic][1]+wi*in_pad[ic][0]; } //increment the twiddle wtemp=wr; wr+=wr*wpr- wi*wpi; wi+=wi*wpr+wtemp*wpi; } } } } ///////////////////////////// third part //////////////////////////// //now perform the lanczos procedure up to when it does not need communications for(int delta=blk_size;delta<loc_size[mu];delta*=2) { //incrementing factor double theta=sign*2*M_PI/(2*delta); double wtemp=sin(0.5*theta); double wpr=-2*wtemp*wtemp; double wpi=sin(theta); //fourier coefficient double wr=1; double wi=0; //loop over the delta length (each m will correspond to increasing twiddle) for(int m=0;m<delta;m++) { //loop on the first addend for(int i=m;i<loc_size[mu];i+=2*delta) { //second addend int j=i+delta; //site data multiplication for(int ic=0;ic<ncpp;ic++) { double tempr=wr*out[j*ncpp+ic][0]-wi*out[j*ncpp+ic][1]; double tempi=wr*out[j*ncpp+ic][1]+wi*out[j*ncpp+ic][0]; out[j*ncpp+ic][0]=out[i*ncpp+ic][0]-tempr; out[j*ncpp+ic][1]=out[i*ncpp+ic][1]-tempi; out[i*ncpp+ic][0]+=tempr; out[i*ncpp+ic][1]+=tempi; } } wtemp=wr; wr+=wr*wpr- wi*wpi; wi+=wi*wpr+wtemp*wpi; } } ///////////////////////////////////// fourth part ////////////////////////////// //now perform the lanczos procedure up to the end for(int delta_rank=1;delta_rank<nrank_dir[mu];delta_rank*=2) //this is rank width of delta { int delta=delta_rank*loc_size[mu]; //block extent int idelta=rank_coord[mu]/delta_rank; //identify the delta of the current rank //find if the current rank holding the first or second block complex *first,*second; MPI_Request request2[2]; MPI_Status status2[2]; if(idelta%2==0) //first: so it has to receive second block and send first { first=out; second=buf; MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu], line_comm[mu],&(request2[0])); MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]+delta_rank,113+line_rank[mu]+delta_rank, line_comm[mu],&(request2[1])); } else //second: so it has to receive first block and send second { first=buf; second=out; MPI_Irecv((void*)buf,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu], line_comm[mu],&(request2[0])); MPI_Isend((void*)out,2*ncpp*loc_size[mu],MPI_DOUBLE,line_rank[mu]-delta_rank,113+line_rank[mu]-delta_rank, line_comm[mu],&(request2[1])); } //incrementing factor double theta=sign*2*M_PI/(2*delta); double wtemp=sin(0.5*theta); double wpr=-2*wtemp*wtemp; double wpi=sin(theta); int rank_pos_delta=rank_coord[mu]%delta_rank; //position of the rank inside the delta int pos_delta=rank_pos_delta*loc_size[mu]; //starting coord of local delta inside the delta //fourier coefficient double wr=cos(pos_delta*theta); double wi=sin(pos_delta*theta); //wait for communications to finish MPI_Waitall(2,request2,status2); //loop over the delta length (each m will correspond to increasing twiddle) for(int loc_m=0;loc_m<loc_size[mu];loc_m++) { //site data multiplication for(int ic=0;ic<ncpp;ic++) { double tempr=wr*second[loc_m*ncpp+ic][0]-wi*second[loc_m*ncpp+ic][1]; double tempi=wr*second[loc_m*ncpp+ic][1]+wi*second[loc_m*ncpp+ic][0]; if(idelta%2==0) { first[loc_m*ncpp+ic][0]+=tempr; first[loc_m*ncpp+ic][1]+=tempi; } else { second[loc_m*ncpp+ic][0]=first[loc_m*ncpp+ic][0]-tempr; second[loc_m*ncpp+ic][1]=first[loc_m*ncpp+ic][1]-tempi; } } wtemp=wr; wr+=wr*wpr- wi*wpi; wi+=wi*wpr+wtemp*wpi; } } if(normalize==1) for(int i=0;i<loc_size[mu]*ncpp;i++) for(int ri=0;ri<2;ri++) out[i][ri]/=glb_size[mu]; } nissa_free(buf); }
static void inpmap( fint lpt , /* number of points in X */ fint mpt , /* number of points in Y */ fint lmin , /* left most x map coordinate */ fint mmin , /* lower most y map coordinate */ fchar set[] , /* input set name */ fint sub[] , /* subsets */ fint nset , /* number of sets */ fint modi ) /* input mode, amp./pha. or cos./sin. */ { fint i, itab, k, m, md, mi, mj, mph, mphh, ms; fint q, r, ri, rj, row, rs, wor; float rbuf[2*SIZE], cbuf[2*SIZE]; float fr, fi, zr, zi; if ( FORM == -1 ) { /* complex ---FFT---> real */ q = POWTAB[ MAXP2 - LOGP2Y - 1 ]; /* table offset factor */ mph = mpt / 2; /* number of rows to load */ mphh = mph / 2 + 1; fr = -0.5 * SIGN; /* calculate factors */ fi = -0.5; for ( ri = 0 ; ri < mphh; ri++ ) { /* loop to load data */ rj = mph - ri; itab = ri * q; /* get table position */ zr = 0.5 - fr * SINTAB[itab]; zi = fi * COSTAB[itab]; mi = shiftr( mmin, mpt, ri ); for ( k = 0; k < nset; k++ ) { /* get data */ i= k * lpt; readxy( set[k], sub[k], lmin, mi, &cbuf[i], lpt, 1 ); } mj = shiftr( mmin, mpt, rj ); for ( k = 0; k < nset; k++ ) { /* get data */ i = k * lpt; readxy( set[k], sub[k], lmin, mj, &rbuf[i], lpt, 1 ); } vector( cbuf, lpt, modi, 1 ); vector( rbuf, lpt, modi, 1 ); fftx( cbuf ); /* do the FFT */ fftx( rbuf ); /* fixup for complex to real transform */ fxrl( zr, zi, cbuf, rbuf, ri, rj, lpt ); /* store in scratch file */ wor = bitrev( ri, LOGP2Y ); putrow( cbuf, 2 * lpt, wor ); if ( ( ri != rj ) && ( ri != 0 ) ) { wor = bitrev( rj, LOGP2Y ); putrow( rbuf, 2 * lpt, wor ); } } } else if ( FORM == 0 ) { /* real ---FFT---> complex */ rs = MIN( mpt, ( 2 * SIZE ) / lpt ); /* number of rows in one pass */ k = 0; /* subset number */ for ( r = 0; r < mpt; r += rs ) {/* loop to load data */ md = 0; do { m = shiftr( mmin, mpt, r + md ); ms = MIN( rs - md, mpt + mmin - m ); i = md * lpt; readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, ms ); md = md + ms; } while ( md != rs ); for ( row = r; row < ( r + rs ); row += 2 ) { i = ( row - r ) * lpt; wor = bitrev( row / 2, LOGP2Y ); putrow( &cbuf[i], 2 * lpt, wor ); } } } else if ( FORM == 1 ) { /* complex ---FFT---> complex */ for ( row = 0; row < mpt; row++ ) { m = shiftr( mmin, mpt, row ); /* get data from disk */ for ( k = 0; k < nset; k++ ) { i = k * lpt; readxy( set[k], sub[k], lmin, m, &cbuf[i], lpt, 1 ); } vector( cbuf, lpt, modi, 1 ); fftx( cbuf ); wor = bitrev( row, LOGP2Y ); putrow( cbuf, 2 * lpt, wor ); } } }
main(int argc, char** argv) { int dum_int, iread, iw, indx, ITStart, namelen, HdrSize, NTOT, NSkip, NPtsToRead, SkiByte, log2NTOT, NDM, NOPFiles, MAX, NTSampInRead, i, NSampInRead, BytePerFrame, NByteInRead, ITOffset, NBitChan, *ibrev, NRead, IFiles, FOld, FNew, FSwitch, KeepTrack, NTReadOld, NTReadNew, NReadOld, NReadNew, OPFileSize, IOffset, SkipByte, oldper, newper ,NT_Files; long long TotalTrack; float FMin, FMax, FCen, *Inbuf, *Outbuf, timecount; char *filename, *filefull, *parfile; FILE *fpar, *Fout[8192], *Fin; if (argc <= 1) tree_help(); else tree_parms(argc, argv); filename = (char *) calloc(120, sizeof(char)); filefull = (char *) calloc(120, sizeof(char)); parfile = (char *) calloc(120, sizeof(char)); ITStart = 0; NT_Files = 0; /* To read a sample header from first time file */ strcpy(filename, unfname); namelen = strlen(filename); printf("First filename : %s\n", filename); if ((fpar = fopen(filename, "rb")) == NULL) { printf("ERROR opening file %s.\n", filename); exit(0); } HdrSize = read_header(fpar); fclose(fpar); FMin = fch1; FMax = FMin + (float)(nchans - 1) * foff; FCen = (FMin + FMax) / 2.0; printf("No. of frequency channels : %d\n",nchans); printf("Beginning radio frequency : %f MHz\n", FMin - (foff / 2.0)); printf("Ending radio frequency : %f MHz\n", FMax + (foff / 2.0)); printf("Centre frq. of the whole band : %f MHz\n", FCen); printf("\n"); printf("input sampling interval : %f sec\n", tsamp); NTOT = 0; strcpy(filename, unfname); namelen = strlen(filename); if ((fpar = fopen(filename, "rb")) == NULL) { printf("ERROR opening %s.\n", filename); } HdrSize = read_header(fpar); fclose(fpar); NT_Files = (int)nsamples(filename, HdrSize, nbits, nifs, nchans); NTOT += NT_Files; printf("File %5d : %s with %d samples\n", i, filename, NT_Files); NSkip = (int)(TSkip / tsamp); if (NSkip > NT_Files) { printf("Initial Skip-length longer than the first file!\n"); exit(0); } NPtsToRead = (int)(TRead / tsamp); if (NPtsToRead==0) { TRead=NT_Files*tsamp; NPtsToRead=NT_Files; } if (NPtsToRead > NTOT) NPtsToRead = NTOT; SkipByte = (NSkip * nbits * nchans / 8); printf("\n"); printf("This data set contains %d number of samples\n", NTOT); printf("Number of bits per sample : %d\n", nbits); printf("Time length to skip at the begin. : %f\n", TSkip); printf("Bytes to skip at the beginning : %d\n", SkipByte); printf("No. of time samples to skip : %d\n", NSkip); printf("Time length to read after skipping : %f\n", TRead); printf("Samples to read after skipping : %d\n", NPtsToRead); NTOT -= NSkip; printf("Time samples after initial skip : %d\n", NTOT); if (NPtsToRead > NTOT) { NPtsToRead = NTOT; printf("Too many samples to read. Truncated to %d samples\n", NTOT); } else NTOT = NPtsToRead; log2NTOT = (int)(log((double)NTOT) / log((double)2.0)); NTOT = (1 << log2NTOT); printf("After truncating to lower 2^n : %d\n", NTOT); if (DMMin < 0) DMMin = 0; if (DMMax >= nchans) DMMax = (nchans - 1); if ((DMMin == 0) && (DMMax == 0)) { DMMin = 0; DMMax = (nchans - 1); } else if ((DMMin != 0) && (DMMax == 0)) DMMax = (nchans - 1); printf("\n"); printf("Minimum DM index = %d Maximum = %d\n", DMMin, DMMax); DMMinv=(tsamp/8.3e3)*(double)(DMMin)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin); DMMaxv=(tsamp/8.3e3)*(double)(DMMax)*pow((FMax+FMin)/2.,3.)/fabs(FMax-FMin); printf("Minimum DM value = %f Maximum = %f\n", DMMinv,DMMaxv); NDM = (DMMax - DMMin + 1); NOPFiles = NDM; OPFileSize = sizeof(float) * NTOT; printf("\n"); printf("Total number of output files : %d\n", NOPFiles); printf("Size of each output file : %d bytes\n", OPFileSize); for (i=0; i<NOPFiles; i++) { strcpy(filefull,unfname); sprintf(&filefull[strlen(filefull)-4], ".DM%.4d.tim", (i+DMMin)); if((Fout[i] = fopen(filefull,"wb")) == NULL) { printf("ERROR opening output file %s.\n", filefull); exit(0); } else { nbands=1; nobits=32; refdm=(tsamp/8.3e3)*(double)(i+DMMin)*pow((FMax+FMin)/2.,3.)/ fabs(FMax-FMin); output=Fout[i]; dedisperse_header(); } } printf("\n"); /* strcpy(parfile, "\0"); strcpy(parfile, unfname); strncat(parfile, ".par", 4); fpar = fopen(parfile, "w"); fprintf(fpar, "%s\n", unfname); fprintf(fpar, "%d %d %10.8f \n ", NTOT, nchans, tsamp); fprintf(fpar, "%d %d %d %d\n", DMMin, DMMax, NDM, NOPFiles); fprintf(fpar, "%d\n", nchans); fprintf(fpar, "%f %f\n", FMin, FMax); fclose(fpar); */ MAX = (1 << 12); if (MAX > NTOT) MAX = NTOT; NTSampInRead = (MAX + (2 * nchans)); NSampInRead = (nchans * NTSampInRead); BytePerFrame = (nchans * nbits / 8); NByteInRead = (NTSampInRead * BytePerFrame); ITOffset = 2 * nchans; IOffset = (-ITOffset * BytePerFrame); NBitChan = (int)(log((double)nchans) / 0.6931471); printf("Time samples to read in one read : %d\n", NTSampInRead); printf("Bytes to read in one read : %d bytes\n", NByteInRead); printf("Offset counter for each read : %d time samples\n", IOffset); printf("\n"); printf("No of bits to represent all channels : %d\n", NBitChan); Inbuf = (float *) calloc(NSampInRead, sizeof(float)); Outbuf = (float *) calloc(NSampInRead, sizeof(float)); ibrev = (int *) calloc(nchans, sizeof(int)); for (i=0; i<nchans; i++) { ibrev[i] = bitrev(i, NBitChan); } NRead = (int)(NTOT / MAX); printf("Total number of reads in the run : %d\n", NRead); IFiles = 0; FOld = 0; FNew = 0; FSwitch = 1; strcpy(filename, unfname); namelen = strlen(filename); printf("First filename : %s\n", filename); if ((Fin = fopen(filename, "rb")) == NULL) { printf("ERROR opening file %s.\n", filename); exit(0); } HdrSize = read_header(Fin); printf("Going into the main loop\n"); fseek(Fin, SkipByte, SEEK_CUR); KeepTrack = (NT_Files - NSkip); TotalTrack = 0; newper = 0.0; oldper = newper; printf("\n\n"); for (iread=0; iread<NRead; iread++) { newper = 100.0 * (((float)iread / (float)NRead)); if (newper > oldper) { timecount = ((float)iread * NTSampInRead * tsamp); printf("\rProcessed : %3d%% Current file : %s Time from beg : %8.2f sec", (int)newper, filename, timecount); fflush(stdout); oldper = newper; } KeepTrack -= NTSampInRead; TotalTrack += NTSampInRead; if (KeepTrack > 0) { read_block(Fin, nbits, Inbuf, NSampInRead); fseek(Fin, IOffset, SEEK_CUR); KeepTrack += ITOffset; TotalTrack -= ITOffset; } if (KeepTrack > 0) FSwitch = 1; memset(&Outbuf[0], 0, (sizeof(float) * NSampInRead)); AxisSwap(Inbuf, Outbuf, nchans, NTSampInRead); if (FlipSwitch == 0) { printf("BEFORE..."); fflush(stdout); FlipBand(Outbuf, nchans, NTSampInRead); printf("AFTER!\n"); } taylor_flt(Outbuf, NSampInRead, nchans); for (iw=DMMin; iw<=DMMax; iw++) { indx = (ibrev[iw] * NTSampInRead); fwrite(&Outbuf[indx], sizeof(float), MAX, Fout[iw-DMMin]); } } newper = 100.0 * ((float)iread / NRead); timecount = ((float)iread * NTSampInRead * tsamp); printf("\rProcessed : %3d%% Current file : %s Time from beg : %8.2f sec", (int)newper, filename, timecount); fflush(stdout); printf("\n\n"); for (i=0; i<NOPFiles; i++) fclose(Fout[i]); exit(0); }
void bit_rev(float *buffer, float *X, float *Y, int nbits, int n, int lgn) { float ftemp1, ftemp2, ftemp3, ftemp4; int a, b, c, lgnminus; int aprime, bprime, bshift, bprimeshift, lgnstep; float *Xt1, *Xt2, *Ypoint, *Xpoint; if(lgn < 2*LOGSIZE) { incachebitrev(X, Y, nbits, n, lgn); return; } lgnminus = lgn-LOGSIZE; lgnstep = 1 << lgnminus; for(b = 0; b < nbits; b++) { bprime = bitrev(b, logn(nbits)); bshift = b << LOGSIZE; bprimeshift = bprime << LOGSIZE; Xt1 = &X[bshift]; for(a = 0; a < SIZE; a++, Xt1 += lgnstep) { aprime = bittable[a]; Xt2 = &buffer[aprime << LOGSIZE]; /****************************************************************** This part coming up is where we move the data into the buffer. We unrolled 4 times and load data into scalars to avoid some alias problems and to mask latency. ******************************************************************/ for(c = 0; c < SIZE; c += 4) { ftemp1 = Xt1[c]; ftemp2 = Xt1[c+1]; ftemp3 = Xt1[c+2]; ftemp4 = Xt1[c+3]; Xt2[c] = ftemp1; Xt2[c+1] = ftemp2; Xt2[c+2] = ftemp3; Xt2[c+3] = ftemp4; } } for(c = 0; c < SIZE; c++) { Ypoint = &Y[(bittable[c] << (lgnminus)) | bprimeshift]; Xpoint = &buffer[c]; /*************************************************************** Here we move the data from the buffer which should be in cache to the output array. Notice that the addressing on the array buffer is surprisingly simple and despite the fact that we aren't making unit strides is irrelevant since it is all in cache. Also this loop is unrolled 8 times. ******************************************************************/ for(aprime = 0; aprime < SIZE; aprime += 8, Xpoint += (SIZE*8)) { Ypoint[aprime] = Xpoint[0]; Ypoint[aprime+1] = Xpoint[SIZE]; Ypoint[aprime+2] = Xpoint[2*SIZE]; Ypoint[aprime+3] = Xpoint[3*SIZE]; Ypoint[aprime+4] = Xpoint[4*SIZE]; Ypoint[aprime+5] = Xpoint[5*SIZE]; Ypoint[aprime+6] = Xpoint[6*SIZE]; Ypoint[aprime+7] = Xpoint[7*SIZE]; } } } }
int mace_probe(struct net_device *unused) { int j; struct mace_data *mp; unsigned char *addr; struct net_device *dev; unsigned char checksum = 0; static int found = 0; if (found || macintosh_config->ether_type != MAC_ETHER_MACE) return -ENODEV; found = 1; /* prevent 'finding' one on every device probe */ dev = init_etherdev(0, PRIV_BYTES); if (!dev) return -ENOMEM; mp = (struct mace_data *) dev->priv; dev->base_addr = (u32)MACE_BASE; mp->mace = (volatile struct mace *) MACE_BASE; dev->irq = IRQ_MAC_MACE; mp->dma_intr = IRQ_MAC_MACE_DMA; /* * The PROM contains 8 bytes which total 0xFF when XOR'd * together. Due to the usual peculiar apple brain damage * the bytes are spaced out in a strange boundary and the * bits are reversed. */ addr = (void *)MACE_PROM; for (j = 0; j < 6; ++j) { u8 v=bitrev(addr[j<<4]); checksum ^= v; dev->dev_addr[j] = v; } for (; j < 8; ++j) { checksum ^= bitrev(addr[j<<4]); } if (checksum != 0xFF) return -ENODEV; memset(&mp->stats, 0, sizeof(mp->stats)); dev->open = mace_open; dev->stop = mace_close; dev->hard_start_xmit = mace_xmit_start; dev->tx_timeout = mace_tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; dev->get_stats = mace_stats; dev->set_multicast_list = mace_set_multicast; dev->set_mac_address = mace_set_address; ether_setup(dev); printk(KERN_INFO "%s: 68K MACE, hardware address %.2X", dev->name, dev->dev_addr[0]); for (j = 1 ; j < 6 ; j++) printk(":%.2X", dev->dev_addr[j]); printk("\n"); return 0; }
static int nat2seq(int n, int bits) { // go from natural ordering to sequency ordering: // first take gray code encoding, then reverse those bits return bitrev(gc(n), bits); }