void ProxySurface::apply_xform() { if(!_patch) return; // apply the transform to the vertices of the proxy mesh assert(_proxy_mesh); PIXEL o = _patch->get_old_sample_center(); PIXEL n = _patch->get_sample_center(); VEXEL z = _patch->get_z(); CBvert_list& verts = _proxy_mesh->verts(); // vertices of the proxy mesh PIXEL_list pixels = get_pixels(verts, this); // their old pixel locations // compute and apply the transform to each vertex: for (int i=0; i<verts.num(); i++) { set_pix(verts[i], this, n + cmult(pixels[i] - o, z)); } //cache _uv_orig, _uv_u_pt, _uv_v_pt so that we can grow new quads _o = n + cmult(_o - o, z); _u_o = n + cmult(_u_o - o, z); _v_o = n + cmult(_v_o - o, z); // take care of bizness: _proxy_mesh->changed(); //BMESH::VERT_POSITIONS_CHANGED }
void bits_to_dqpsk_symbols(COMP tx_symbols[], COMP prev_tx_symbols[], int tx_bits[], int *pilot_bit) { int c, msb, lsb; COMP j = {0.0,1.0}; /* map tx_bits to to Nc DQPSK symbols */ for(c=0; c<NC; c++) { msb = tx_bits[2*c]; lsb = tx_bits[2*c+1]; if ((msb == 0) && (lsb == 0)) tx_symbols[c] = prev_tx_symbols[c]; if ((msb == 0) && (lsb == 1)) tx_symbols[c] = cmult(j, prev_tx_symbols[c]); if ((msb == 1) && (lsb == 0)) tx_symbols[c] = cneg(prev_tx_symbols[c]); if ((msb == 1) && (lsb == 1)) tx_symbols[c] = cmult(cneg(j),prev_tx_symbols[c]); } /* +1 -1 +1 -1 BPSK sync carrier, once filtered becomes (roughly) two spectral lines at +/- Rs/2 */ if (*pilot_bit) tx_symbols[NC] = cneg(prev_tx_symbols[NC]); else tx_symbols[NC] = prev_tx_symbols[NC]; if (*pilot_bit) *pilot_bit = 0; else *pilot_bit = 1; }
float qpsk_to_bits(int rx_bits[], int *sync_bit, COMP phase_difference[], COMP prev_rx_symbols[], COMP rx_symbols[]) { int c; COMP pi_on_4; COMP d; int msb=0, lsb=0; float ferr; pi_on_4.real = cosf(PI/4.0); pi_on_4.imag = sinf(PI/4.0); /* Extra 45 degree clockwise lets us use real and imag axis as decision boundaries */ for(c=0; c<NC; c++) phase_difference[c] = cmult(cmult(rx_symbols[c], cconj(prev_rx_symbols[c])), pi_on_4); /* map (Nc,1) DQPSK symbols back into an (1,Nc*Nb) array of bits */ for (c=0; c<NC; c++) { d = phase_difference[c]; if ((d.real >= 0) && (d.imag >= 0)) { msb = 0; lsb = 0; } if ((d.real < 0) && (d.imag >= 0)) { msb = 0; lsb = 1; } if ((d.real < 0) && (d.imag < 0)) { msb = 1; lsb = 0; } if ((d.real >= 0) && (d.imag < 0)) { msb = 1; lsb = 1; } rx_bits[2*c] = msb; rx_bits[2*c+1] = lsb; } /* Extract DBPSK encoded Sync bit and fine freq offset estimate */ phase_difference[NC] = cmult(rx_symbols[NC], cconj(prev_rx_symbols[NC])); if (phase_difference[NC].real < 0) { *sync_bit = 1; ferr = phase_difference[NC].imag; } else { *sync_bit = 0; ferr = -phase_difference[NC].imag; } /* pilot carrier gets an extra pi/4 rotation to make it consistent with other carriers, as we need it for snr_update and scatter diagram */ phase_difference[NC] = cmult(phase_difference[NC], pi_on_4); return ferr; }
float rx_est_freq_offset(struct FDMDV *f, COMP rx_fdm[], int nin) { int i,j; COMP pilot[M+M/P]; COMP prev_pilot[M+M/P]; float foff, foff1, foff2; float max1, max2; assert(nin <= M+M/P); /* get pilot samples used for correlation/down conversion of rx signal */ for (i=0; i<nin; i++) { pilot[i] = f->pilot_lut[f->pilot_lut_index]; f->pilot_lut_index++; if (f->pilot_lut_index >= 4*M) f->pilot_lut_index = 0; prev_pilot[i] = f->pilot_lut[f->prev_pilot_lut_index]; f->prev_pilot_lut_index++; if (f->prev_pilot_lut_index >= 4*M) f->prev_pilot_lut_index = 0; } /* Down convert latest M samples of pilot by multiplying by ideal BPSK pilot signal we have generated locally. The peak of the resulting signal is sensitive to the time shift between the received and local version of the pilot, so we do it twice at different time shifts and choose the maximum. */ for(i=0; i<NPILOTBASEBAND-nin; i++) { f->pilot_baseband1[i] = f->pilot_baseband1[i+nin]; f->pilot_baseband2[i] = f->pilot_baseband2[i+nin]; } for(i=0,j=NPILOTBASEBAND-nin; i<nin; i++,j++) { f->pilot_baseband1[j] = cmult(rx_fdm[i], cconj(pilot[i])); f->pilot_baseband2[j] = cmult(rx_fdm[i], cconj(prev_pilot[i])); } lpf_peak_pick(&foff1, &max1, f->pilot_baseband1, f->pilot_lpf1, f->fft_pilot_cfg, f->S1, nin); lpf_peak_pick(&foff2, &max2, f->pilot_baseband2, f->pilot_lpf2, f->fft_pilot_cfg, f->S2, nin); if (max1 > max2) foff = foff1; else foff = foff2; return foff; }
int fft_exec (int N, complex * in) { int flops; unsigned int n = N; unsigned int a, b, i, j, k, r, s; complex w, p; flops = 0; for (i = 1; i < N; i = i * 2) { n = n >> 1; for (k = 0; k < i; k++) { w = tableW[k]; r = 2 * n * k; s = n * (1 + 2 * k); for (j = 0; j < n; j++) { flops += 10; a = j + r; b = j + s; cmult (p, w, in[b]); //6 flop csub (in[b], in[a], p); //2 flop cadd (in[a], in[a], p); //2 flop } } } return flops; }
void fft_exec (int N, complex * in) { unsigned int n = N; unsigned int a, b, i, j, k, r, s; complex w, p; for (i = 1; i < N; i = i * 2) { n = n >> 1; for (k = 0; k < i; k++) { w = tableW[k]; r = 2 * n * k; s = n * (1 + 2 * k); for (j = 0; j < n; j++) { a = j + r/0; // An error b = j + s; cmult (p, w, in[b]); //6 flop csub (in[b], in[a], p); //2 flop cadd (in[a], in[a], p); //2 flop } } } }
void curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) { felem bp[10], x[10], z[10], zmone[10]; fexpand(bp, basepoint); cmult(x, z, secret, bp); crecip(zmone, z); fmul(z, x, zmone); fcontract(mypublic, z); }
void CODEC2_WIN32SUPPORT fdmdv_freq_shift(COMP rx_fdm_fcorr[], COMP rx_fdm[], float foff, COMP *foff_rect, COMP *foff_phase_rect, int nin) { int i; foff_rect->real = cosf(2.0*PI*foff/FS); foff_rect->imag = sinf(2.0*PI*foff/FS); for(i=0; i<nin; i++) { *foff_phase_rect = cmult(*foff_phase_rect, cconj(*foff_rect)); rx_fdm_fcorr[i] = cmult(rx_fdm[i], *foff_phase_rect); } /* normalise digital oscilator as the magnitude can drfift over time */ foff_phase_rect->real /= cabsolute(*foff_phase_rect); foff_phase_rect->imag /= cabsolute(*foff_phase_rect); }
void nzmg_geod( double e, double n, double *ln, double *lt ) { complex z0, z1, zn, zd, tmp1, tmp2; double sum,tmp; int i, it; z0.real = (n-n0)/a; z0.imag = (e-e0)/a; z1.real = cfb2[5].real; z1.imag = cfb2[5].imag; for (i=5; i--; ) cadd(&z1, cmult(&z1, &z1, &z0), cfb2+i ); cmult(&z1,&z1,&z0); for(it=2; it--; ) { cscale( &zn, cfb1+5, 5.0); cscale( &zd, cfb1+5, 6.0); for (i=4; i; i--) { cadd( &zn, cmult(&tmp1, &zn, &z1), cscale(&tmp2, cfb1+i, (double) i)); cadd( &zd, cmult(&tmp1, &zd, &z1), cscale(&tmp2, cfb1+i, (double) (i+1))); } cadd( &zn, &z0, cmult( &zn, cmult( &zn, &zn, &z1), &z1)); cadd( &zd, cfb1, cmult( &zd, &zd, &z1 )); cdiv( &z1, &zn, &zd ); } *ln = ln0/rad2deg + z1.imag; tmp = z1.real; sum = cfl[8]; for (i=8; i--;) sum = sum*tmp + cfl[i]; sum *= tmp/3600.0e-5; *lt = (lt0+sum)/rad2deg; }
void fdm_upconvert(COMP tx_fdm[], int Nc, COMP tx_baseband[NC+1][M], COMP phase_tx[], COMP freq[]) { int i,c; COMP two = {2.0, 0.0}; COMP pilot; for(i=0; i<M; i++) { tx_fdm[i].real = 0.0; tx_fdm[i].imag = 0.0; } /* Nc/2 tones below centre freq */ for (c=0; c<Nc/2; c++) for (i=0; i<M; i++) { phase_tx[c] = cmult(phase_tx[c], freq[c]); tx_fdm[i] = cadd(tx_fdm[i], cmult(tx_baseband[c][i], phase_tx[c])); } /* Nc/2 tones above centre freq */ for (c=Nc/2; c<Nc; c++) for (i=0; i<M; i++) { phase_tx[c] = cmult(phase_tx[c], freq[c]); tx_fdm[i] = cadd(tx_fdm[i], cmult(tx_baseband[c][i], phase_tx[c])); } /* add centre pilot tone */ c = Nc; for (i=0; i<M; i++) { phase_tx[c] = cmult(phase_tx[c], freq[c]); pilot = cmult(cmult(two, tx_baseband[c][i]), phase_tx[c]); tx_fdm[i] = cadd(tx_fdm[i], pilot); } /* Scale such that total Carrier power C of real(tx_fdm) = Nc. This excludes the power of the pilot tone. We return the complex (single sided) signal to make frequency shifting for the purpose of testing easier */ for (i=0; i<M; i++) tx_fdm[i] = cmult(two, tx_fdm[i]); /* normalise digital oscilators as the magnitude can drfift over time */ for (c=0; c<Nc+1; c++) { phase_tx[c].real /= cabsolute(phase_tx[c]); phase_tx[c].imag /= cabsolute(phase_tx[c]); } }
static complex *cdiv(complex *cr, complex *c1, complex *c2) { complex temp; double cmod2; cmod2 = (c2->real*c2->real + c2->imag*c2->imag); temp.real = c2->real/cmod2; temp.imag = -c2->imag/cmod2; cmult( cr, c1, &temp ); return cr; }
void geod_nzmg( double ln, double lt, double *e, double *n ) { double sum; int i; complex z0,z1; lt = (lt*rad2deg - lt0) * 3600.0e-5; sum = cfi[9];; for (i = 9; i--;) sum = sum*lt+cfi[i]; sum *= lt; z1.real = sum; z1.imag = ln-ln0/rad2deg; z0.real = cfb1[5].real; z0.imag = cfb1[5].imag; for ( i=5; i--;) cadd(&z0,cmult(&z0,&z0,&z1),cfb1+i); cmult(&z0,&z0,&z1); *n = n0+z0.real*a; *e = e0+z0.imag*a; }
void bits_to_dqpsk_symbols(COMP tx_symbols[], int Nc, COMP prev_tx_symbols[], int tx_bits[], int *pilot_bit, int old_qpsk_mapping) { int c, msb, lsb; COMP j = {0.0,1.0}; /* Map tx_bits to to Nc DQPSK symbols. Note legacy support for old (suboptimal) V0.91 FreeDV mapping */ for(c=0; c<Nc; c++) { msb = tx_bits[2*c]; lsb = tx_bits[2*c+1]; if ((msb == 0) && (lsb == 0)) tx_symbols[c] = prev_tx_symbols[c]; if ((msb == 0) && (lsb == 1)) tx_symbols[c] = cmult(j, prev_tx_symbols[c]); if ((msb == 1) && (lsb == 0)) { if (old_qpsk_mapping) tx_symbols[c] = cneg(prev_tx_symbols[c]); else tx_symbols[c] = cmult(cneg(j),prev_tx_symbols[c]); } if ((msb == 1) && (lsb == 1)) { if (old_qpsk_mapping) tx_symbols[c] = cmult(cneg(j),prev_tx_symbols[c]); else tx_symbols[c] = cneg(prev_tx_symbols[c]); } } /* +1 -1 +1 -1 BPSK sync carrier, once filtered becomes (roughly) two spectral lines at +/- Rs/2 */ if (*pilot_bit) tx_symbols[Nc] = cneg(prev_tx_symbols[Nc]); else tx_symbols[Nc] = prev_tx_symbols[Nc]; if (*pilot_bit) *pilot_bit = 0; else *pilot_bit = 1; }
void tx_filter(COMP tx_baseband[NC+1][M], COMP tx_symbols[], COMP tx_filter_memory[NC+1][NSYM]) { int c; int i,j,k; float acc; COMP gain; gain.real = sqrtf(2.0)/2.0; gain.imag = 0.0; /* for(c=0; c<NC+1; c++) tx_filter_memory[c][NFILTER-1] = cmult(tx_symbols[c], gain); */ for(c=0; c<NC+1; c++) tx_filter_memory[c][NSYM-1] = cmult(tx_symbols[c], gain); /* tx filter each symbol, generate M filtered output samples for each symbol. Efficient polyphase filter techniques used as tx_filter_memory is sparse */ for(i=0; i<M; i++) { for(c=0; c<NC+1; c++) { /* filter real sample of symbol for carrier c */ acc = 0.0; for(j=0,k=M-i-1; j<NSYM; j++,k+=M) acc += M * tx_filter_memory[c][j].real * gt_alpha5_root[k]; tx_baseband[c][i].real = acc; /* filter imag sample of symbol for carrier c */ acc = 0.0; for(j=0,k=M-i-1; j<NSYM; j++,k+=M) acc += M * tx_filter_memory[c][j].imag * gt_alpha5_root[k]; tx_baseband[c][i].imag = acc; } } /* shift memory, inserting zeros at end */ for(i=0; i<NSYM-1; i++) for(c=0; c<NC+1; c++) tx_filter_memory[c][i] = tx_filter_memory[c][i+1]; for(c=0; c<NC+1; c++) { tx_filter_memory[c][NSYM-1].real = 0.0; tx_filter_memory[c][NSYM-1].imag = 0.0; } }
Complex cpowi(Complex c,int n) { int i; Complex result; result = c; for (i=1; i<n; ++i) result = cmult(result,c); return(result); }
void cinv22(COMPLEX *a,COMPLEX *b){ double tmp = 1/(a->re*(a+3)->im - (a+1)->im*(a+2)->re); COMPLEX res1,res2,res3,res4,one; cmult((a),(a+3),&res1); cmult((a+1),(a+2),&res2); res2.re *= -1; res2.im *= -1; cadd(&res1,&res2,&res3); one.re = 1; one.im = 0; cdev(&one,&res3,&res4); cmult(&res4,a+3,b); cmult(&res4,a+1,b+1); (b+1)->re *= -1; (b+1)->im *= -1; cmult(&res4,a+2,b+2); (b+2)->re *= -1; (b+2)->im *= -1; cmult(&res4,a,b+3); }
int crypto_scalarmult(u8 *mypublic, const u8 *secret, const u8 *basepoint) { felem bp[5], x[5], z[5], zmone[5]; unsigned char e[32]; int i; for (i = 0;i < 32;++i) e[i] = secret[i]; e[0] &= 248; e[31] &= 127; e[31] |= 64; fexpand(bp, basepoint); cmult(x, z, e, bp); crecip(zmone, z); fmul(z, x, zmone); fcontract(mypublic, z); return 0; }
static complex complex_inner_product( const complex *lhs, const complex *rhs, int length) { complex accum; int i; accum.re = accum.im = 0.0f; for (i = 0; i < length; ++i) { const complex prod = cmult( cconj(lhs[i]), rhs[i]); accum.re += prod.re; accum.im += prod.im; } return accum; // ACCEPT_PERMIT }
void generate_pilot_fdm(COMP *pilot_fdm, int *bit, float *symbol, float *filter_mem, COMP *phase, COMP *freq) { int i,j,k; float tx_baseband[M]; /* +1 -1 +1 -1 DBPSK sync carrier, once filtered becomes (roughly) two spectral lines at +/- RS/2 */ if (*bit) *symbol = -*symbol; else *symbol = *symbol; if (*bit) *bit = 0; else *bit = 1; /* filter DPSK symbol to create M baseband samples */ filter_mem[NFILTER-1] = (sqrtf(2)/2) * *symbol; for(i=0; i<M; i++) { tx_baseband[i] = 0.0; for(j=M-1,k=M-i-1; j<NFILTER; j+=M,k+=M) tx_baseband[i] += M * filter_mem[j] * gt_alpha5_root[k]; } /* shift memory, inserting zeros at end */ for(i=0; i<NFILTER-M; i++) filter_mem[i] = filter_mem[i+M]; for(i=NFILTER-M; i<NFILTER; i++) filter_mem[i] = 0.0; /* upconvert */ for(i=0; i<M; i++) { *phase = cmult(*phase, *freq); pilot_fdm[i].real = sqrtf(2)*2*tx_baseband[i] * phase->real; pilot_fdm[i].imag = sqrtf(2)*2*tx_baseband[i] * phase->imag; } }
void CMatMult(COMPLEX *p1,COMPLEX *p2,COMPLEX *p3,int m,int n,int p){//all the matrix in the multiplex must be allot the memory space int i,j,k; COMPLEX sum; //printf("in cmatmult\n"); for( i = 0 ; i < m ; i++ ){ for( j = 0 ; j < p ; j++ ){ sum.re = 0; sum.im = 0; for( k = 0 ; k < n ; k++ ){ //printf("ijk:%d %d %d",i,j,k); COMPLEX tmp; COMPLEX ori = sum; cmult((p1+i*n+k),(p2+k*p+j),&tmp); cadd(&ori,&tmp,&sum); } *(p3+i*p+j)= sum; //sprintf(str,"\n%d %d %d %d sum : %lf \n",m,n,i,j,sum.re);qDebug() << str; } } }
static void compute_gamma_weights( APPROX float gamma[N_STEERING], complex (* const adaptive_weights)[N_BLOCKS][N_STEERING][N_CHAN*TDOF], complex (* const steering_vectors)[N_CHAN*TDOF], int range_block, int dop_index) { int i, sv; complex accum; for (sv = 0; sv < N_STEERING; ++sv) { accum.re = accum.im = 0.0f; for (i = 0; i < N_CHAN*TDOF; ++i) { const complex prod = cmult( cconj(adaptive_weights[dop_index][range_block][sv][i]), steering_vectors[sv][i]); accum.re += prod.re; accum.im += prod.im; } /* * In exact arithmetic, accum should be a real positive * scalar and thus the imaginary component should be zero. * However, with limited precision that may not be the case, * so we take the magnitude of accum. Also, gamma is a * normalization scalar and thus we take the inverse of * the computed inner product, w*v. */ gamma[sv] = sqrt(accum.re*accum.re + accum.im*accum.im); if (ENDORSE(gamma[sv] > 0)) { gamma[sv] = 1.0f / gamma[sv]; } else { gamma[sv] = 1.0f; } } }
static int vq_phase(COMP cb[], COMP vec[], float weights[], int d, int e, float *se) { float error; /* current error */ int besti; /* best index so far */ float best_error; /* best error so far */ int i,j; int ignore; COMP diffr; float diffp, metric, best_metric; besti = 0; best_metric = best_error = 1E32; for(j=0; j<e; j++) { error = 0.0; metric = 0.0; for(i=0; i<d; i++) { ignore = (vec[i].real == 0.0) && (vec[i].imag == 0.0); if (!ignore) { diffr = cmult(cb[j*d+i], cconj(vec[i])); diffp = atan2(diffr.imag, diffr.real); error += diffp*diffp; metric += weights[i]*weights[i]*diffp*diffp; //metric += weights[i]*diffp*diffp; //metric = log10(weights[i]*fabs(diffp)); //printf("diffp %f metric %f\n", diffp, metric); //if (metric < log10(PI/(8.0*sqrt(3.0)))) // metric = log10(PI/(8.0*sqrt(3.0))); } } if (metric < best_metric) { best_metric = metric; best_error = error; besti = j; } } *se += best_error; return(besti); }
void fdm_downconvert(COMP rx_baseband[NC+1][M+M/P], COMP rx_fdm[], COMP phase_rx[], COMP freq[], int nin) { int i,c; /* maximum number of input samples to demod */ assert(nin <= (M+M/P)); /* Nc/2 tones below centre freq */ for (c=0; c<NC/2; c++) for (i=0; i<nin; i++) { phase_rx[c] = cmult(phase_rx[c], freq[c]); rx_baseband[c][i] = cmult(rx_fdm[i], cconj(phase_rx[c])); } /* Nc/2 tones above centre freq */ for (c=NC/2; c<NC; c++) for (i=0; i<nin; i++) { phase_rx[c] = cmult(phase_rx[c], freq[c]); rx_baseband[c][i] = cmult(rx_fdm[i], cconj(phase_rx[c])); } /* centre pilot tone */ c = NC; for (i=0; i<nin; i++) { phase_rx[c] = cmult(phase_rx[c], freq[c]); rx_baseband[c][i] = cmult(rx_fdm[i], cconj(phase_rx[c])); } /* normalise digital oscilators as the magnitude can drfift over time */ for (c=0; c<NC+1; c++) { phase_rx[c].real /= cabsolute(phase_rx[c]); phase_rx[c].imag /= cabsolute(phase_rx[c]); } }
static void cholesky_factorization( complex cholesky_factors[N_DOP][N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF], complex (* const covariance)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF]) { int k, dop, block; APPROX int i, j; complex (* R)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF] = NULL; APPROX float Rkk_inv, Rkk_inv_sqrt; /* * cholesky_factors is a working buffer used to factorize the * covariance matrices in-place. We copy the covariance matrices * into cholesky_factors and give cholesky_factors the convenient * name R for a more succinct inner loop below. */ memcpy(cholesky_factors, covariance, sizeof(complex)*N_DOP*N_BLOCKS*N_CHAN*TDOF*N_CHAN*TDOF); R = cholesky_factors; for (dop = 0; dop < N_DOP; ++dop) { for (block = 0; block < N_BLOCKS; ++block) { /* * The following Cholesky factorization notation is based * upon the presentation in "Numerical Linear Algebra" by * Trefethen and Bau, SIAM, 1997. */ for (k = 0; k < N_CHAN*TDOF; ++k) { /* * Hermitian positive definite matrices are assumed, but * for safety we check that the diagonal is always positive. */ //assert(R[dop][block][k][k].re > 0); /* Diagonal entries are real-valued. */ Rkk_inv = 1.0f / R[dop][block][k][k].re; Rkk_inv_sqrt = sqrt(Rkk_inv); for (j = k+1; ENDORSE(j < N_CHAN*TDOF); ++j) { const complex Rkj_conj = cconj(R[dop][block][k][j]); for (i = j; ENDORSE(i < N_CHAN*TDOF); ++i) { const complex Rki_Rkj_conj = cmult( R[dop][block][k][i], Rkj_conj); R[dop][block][j][i].re -= Rki_Rkj_conj.re * Rkk_inv; R[dop][block][j][i].im -= Rki_Rkj_conj.im * Rkk_inv; } } for (i = k; ENDORSE(i < N_CHAN*TDOF); ++i) { R[dop][block][k][i].re *= Rkk_inv_sqrt; R[dop][block][k][i].im *= Rkk_inv_sqrt; } } /* * Copy the conjugate of the upper triangular portion of R * into the lower triangular portion. This is not required * for correctness, but can help with testing and validation * (e.g., correctness metrics calculated over all elements * will not be "diluted" by trivially correct zeros in the * lower diagonal region). */ for (i = 0; ENDORSE(i < N_CHAN*TDOF); ++i) { for (j = i+1; ENDORSE(j < N_CHAN*TDOF); ++j) { const complex x = R[dop][block][i][j]; // ACCEPT_PERMIT R[dop][block][j][i].re = x.re; R[dop][block][j][i].im = -1.0f * x.im; } } } } }
void Advance_Coeffs(int levels) { int i,j,p,p1,l,size; Complex *Coeff_Array,*Coeff_Array_Finer,tmpz,dz[PMAX+1]; int cpu,cpu_k,cpu_m,cpu_n,lower_limit,upper_limit; #ifdef MULTIPROC int rank,total_processes; #endif /* These need to be explicitly determined here for some reason.*/ /* Check into this later. Without explicitly finding rank and total_processes, they get wolloped for some reason */ #ifdef MULTIPROC MPI_Comm_size(MPI_COMM_WORLD, &total_processes); MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif for (l=levels-2; l>=0; --l) { Coeff_Array = Level_Ptr[l]; Coeff_Array_Finer = Level_Ptr[l+1]; size = ldexp(1.0,l+1); /* Wipe the current layer */ tmpz.re = 0.0; tmpz.im = 0.0; for (i=0; i<size; ++i) for (j=0; j<size; ++j) for (p=0; p<PMAX; ++p) *(Coeff_Array+(i+j*size)*PMAX+p) = tmpz; dz[0].re = 1.0; dz[0].im = 0.0; /* Lower left children */ dz[1].re = -distX/size/4.0; dz[1].im = -distY/size/4.0; for (p=2; p<=PMAX; ++p) { if (p % 2 == 0) dz[p] = cmult(dz[p/2],dz[p/2]); else dz[p] = cmult(dz[p/2],dz[(p/2)+1]); } for (i=0; i<size; ++i) for (j=0; j<size; ++j) #ifdef MULTIPROC /* Split up the work. */ if (sort_cpu(i+j*size,size*size,total_processes) == rank) { #endif for (p=0; p<PMAX; ++p) for (p1=0; p1<=p; ++p1) { tmpz = cmult(dz[p-p1], *(Coeff_Array_Finer+(2*i+2*j*2*size)*PMAX+p1)); (*(Coeff_Array+(i+j*size)*PMAX+p)).re += tmpz.re*C(p,p1); (*(Coeff_Array+(i+j*size)*PMAX+p)).im += tmpz.im*C(p,p1); } #ifdef MULTIPROC } #endif /* Lower right children */ dz[1].re = distX/size/4.0; dz[1].im = -distY/size/4.0; for (p=2; p<=PMAX; ++p) { if (p % 2 == 0) dz[p] = cmult(dz[p/2],dz[p/2]); else dz[p] = cmult(dz[p/2],dz[(p/2)+1]); } for (i=0; i<size; ++i) for (j=0; j<size; ++j) #ifdef MULTIPROC /* Split up the work. */ if (sort_cpu(i+j*size,size*size,total_processes) == rank) { #endif for (p=0; p<PMAX; ++p) for (p1=0; p1<=p; ++p1) { tmpz = cmult(dz[p-p1], *(Coeff_Array_Finer+(2*i+1+2*j*2*size)*PMAX+p1)); (*(Coeff_Array+(i+j*size)*PMAX+p)).re += tmpz.re*C(p,p1); (*(Coeff_Array+(i+j*size)*PMAX+p)).im += tmpz.im*C(p,p1); } #ifdef MULTIPROC } #endif /* Upper left children */ dz[1].re = -distX/size/4.0; dz[1].im = distY/size/4.0; for (p=2; p<=PMAX; ++p) { if (p % 2 == 0) dz[p] = cmult(dz[p/2],dz[p/2]); else dz[p] = cmult(dz[p/2],dz[(p/2)+1]); } for (i=0; i<size; ++i) for (j=0; j<size; ++j) #ifdef MULTIPROC /* Split up the work. */ if (sort_cpu(i+j*size,size*size,total_processes) == rank) { #endif for (p=0; p<PMAX; ++p) for (p1=0; p1<=p; ++p1) { tmpz = cmult(dz[p-p1], *(Coeff_Array_Finer+(2*i+(2*j+1)*2*size)*PMAX+p1)); (*(Coeff_Array+(i+j*size)*PMAX+p)).re += tmpz.re*C(p,p1); (*(Coeff_Array+(i+j*size)*PMAX+p)).im += tmpz.im*C(p,p1); } #ifdef MULTIPROC } #endif /* Upper right children */ dz[1].re = distX/size/4.0; dz[1].im = distY/size/4.0; for (p=2; p<=PMAX; ++p) { if (p % 2 == 0) dz[p] = cmult(dz[p/2],dz[p/2]); else dz[p] = cmult(dz[p/2],dz[(p/2)+1]); } for (i=0; i<size; ++i) for (j=0; j<size; ++j) #ifdef MULTIPROC /* Split up the work. */ if (sort_cpu(i+j*size,size*size,total_processes) == rank) { #endif for (p=0; p<PMAX; ++p) for (p1=0; p1<=p; ++p1) { tmpz = cmult(dz[p-p1], *(Coeff_Array_Finer+(2*i+1+(2*j+1)*2*size)*PMAX+p1)); (*(Coeff_Array+(i+j*size)*PMAX+p)).re += tmpz.re*C(p,p1); (*(Coeff_Array+(i+j*size)*PMAX+p)).im += tmpz.im*C(p,p1); } #ifdef MULTIPROC } #endif #ifdef MULTIPROC cpu_k = (size*size) % total_processes; cpu_m = (size*size) / total_processes; cpu_n = cpu_m + (cpu_k != 0); for (cpu=0; cpu<total_processes; ++cpu) { if (cpu< cpu_k) { lower_limit = cpu*cpu_n; upper_limit = (cpu+1)*cpu_n; } else { lower_limit = cpu_k*cpu_n+(cpu - cpu_k)*cpu_m; upper_limit = cpu_k*cpu_n+(cpu+1 - cpu_k)*cpu_m; } /* The pointer is of Complex type but we are sending it as an MPI_DOUBLE so the length is twice the displacement increment. */ if (upper_limit != lower_limit) MPI_Bcast(Coeff_Array+lower_limit*PMAX, (upper_limit-lower_limit)*2*PMAX, MPI_DOUBLE, cpu, MPI_COMM_WORLD); } #endif } }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { REAL scale; /* scale factor */ REAL dt; /* time step */ REAL dz; /* propagation stepsize */ int nz; /* number of z steps to take */ int nalpha; /* number of beta coefs */ double* alphap; /* alpha(w) array, if applicable */ int nbeta; /* number of beta coefs */ double* beta; /* dispersion polynomial coefs */ REAL gamma; /* nonlinearity coefficient */ REAL traman = 0; /* Raman response time */ REAL toptical = 0; /* Optical cycle time = lambda/c */ int maxiter = 4; /* max number of iterations */ REAL tol = 1e-5; /* convergence tolerance */ REAL* w; /* vector of angular frequencies */ int iz,ii,jj; /* loop counters */ REAL phase, alpha, wii, fii; /* temporary variables */ COMPLEX nlp, /* nonlinear phase */ *ua, *ub, *uc; /* samples of u at three adjacent times */ char argstr[100]; /* string argument */ if (nrhs == 1) { if (mxGetString(prhs[0],argstr,100)) mexErrMsgTxt("Unrecognized option."); if (!strcmp(argstr,"-savewisdom")) { sspropc_save_wisdom(); } else if (!strcmp(argstr,"-forgetwisdom")) { FORGET_WISDOM(); } else if (!strcmp(argstr,"-loadwisdom")) { sspropc_load_wisdom(); } else if (!strcmp(argstr,"-patient")) { method = FFTW_PATIENT; } else if (!strcmp(argstr,"-exhaustive")) { method = FFTW_EXHAUSTIVE; } else if (!strcmp(argstr,"-measure")) { method = FFTW_MEASURE; } else if (!strcmp(argstr,"-estimate")) { method = FFTW_ESTIMATE; } else mexErrMsgTxt("Unrecognized option."); return; } if (nrhs < 7) mexErrMsgTxt("Not enough input arguments provided."); if (nlhs > 1) mexErrMsgTxt("Too many output arguments."); sspropc_initialize_data(mxGetNumberOfElements(prhs[0])); /* parse input arguments */ dt = (REAL) mxGetScalar(prhs[1]); dz = (REAL) mxGetScalar(prhs[2]); nz = round(mxGetScalar(prhs[3])); nalpha = mxGetNumberOfElements(prhs[4]); alphap = mxGetPr(prhs[4]); beta = mxGetPr(prhs[5]); nbeta = mxGetNumberOfElements(prhs[5]); gamma = (REAL) mxGetScalar(prhs[6]); if (nrhs > 7) traman = (mxIsEmpty(prhs[7])) ? 0 : (REAL) mxGetScalar(prhs[7]); if (nrhs > 8) toptical = (mxIsEmpty(prhs[8])) ? 0 : (REAL) mxGetScalar(prhs[8]); if (nrhs > 9) maxiter = (mxIsEmpty(prhs[9])) ? 4 : round(mxGetScalar(prhs[9])); if (nrhs > 10) tol = (mxIsEmpty(prhs[10])) ? 1e-5 : (REAL) mxGetScalar(prhs[10]); if ((nalpha != 1) && (nalpha != nt)) mexErrMsgTxt("Invalid vector length (alpha)."); /* compute vector of angular frequency components */ /* MATLAB equivalent: w = wspace(tv); */ w = (REAL*)mxMalloc(sizeof(REAL)*nt); for (ii = 0; ii <= (nt-1)/2; ii++) { w[ii] = 2*pi*ii/(dt*nt); } for (; ii < nt; ii++) { w[ii] = 2*pi*ii/(dt*nt) - 2*pi/dt; } /* compute halfstep and initialize u0 and u1 */ for (jj = 0; jj < nt; jj++) { if (nbeta != nt) for (ii = 0, phase = 0, fii = 1, wii = 1; ii < nbeta; ii++, fii*=ii, wii*=w[jj]) phase += wii*((REAL)beta[ii])/fii; else phase = (REAL)beta[jj]; alpha = (nalpha == nt) ? (REAL)alphap[jj] : (REAL)alphap[0]; halfstep[jj][0] = +exp(-alpha*dz/4)*cos(phase*dz/2); halfstep[jj][1] = -exp(-alpha*dz/4)*sin(phase*dz/2); u0[jj][0] = (REAL) mxGetPr(prhs[0])[jj]; u0[jj][1] = mxIsComplex(prhs[0]) ? (REAL)(mxGetPi(prhs[0])[jj]) : 0.0; u1[jj][0] = u0[jj][0]; u1[jj][1] = u0[jj][1]; } mxFree(w); /* free w vector */ mexPrintf("Performing split-step iterations ... "); EXECUTE(p1); /* ufft = fft(u0) */ for (iz = 0; iz < nz; iz++) { cmult(uhalf,halfstep,ufft); /* uhalf = halfstep.*ufft */ EXECUTE(ip1); /* uhalf = nt*ifft(uhalf) */ for (ii = 0; ii < maxiter; ii++) { if ((traman == 0.0) && (toptical == 0)) { for (jj = 0; jj < nt; jj++) { phase = gamma*(u0[jj][0]*u0[jj][0] + u0[jj][1]*u0[jj][1] + u1[jj][0]*u1[jj][0] + u1[jj][1]*u1[jj][1])*dz/2; uv[jj][0] = (uhalf[jj][0]*cos(phase) + uhalf[jj][1]*sin(phase))/nt; uv[jj][1] = (-uhalf[jj][0]*sin(phase) + uhalf[jj][1]*cos(phase))/nt; } } else { jj = 0; ua = &u0[nt-1]; ub = &u0[jj]; uc = &u0[jj+1]; nlp[1] = -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] = abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); ua = &u1[nt-1]; ub = &u1[jj]; uc = &u1[jj+1]; nlp[1] += -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] += abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); nlp[0] *= gamma*dz/2; nlp[1] *= gamma*dz/2; uv[jj][0] = (uhalf[jj][0]*cos(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*sin(nlp[0])*exp(+nlp[1]))/nt; uv[jj][1] = (-uhalf[jj][0]*sin(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*cos(nlp[0])*exp(+nlp[1]))/nt; for (jj = 1; jj < nt-1; jj++) { ua = &u0[jj-1]; ub = &u0[jj]; uc = &u0[jj+1]; nlp[1] = -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] = abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); ua = &u1[jj-1]; ub = &u1[jj]; uc = &u1[jj+1]; nlp[1] += -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] += abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); nlp[0] *= gamma*dz/2; nlp[1] *= gamma*dz/2; uv[jj][0] = (uhalf[jj][0]*cos(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*sin(nlp[0])*exp(+nlp[1]))/nt; uv[jj][1] = (-uhalf[jj][0]*sin(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*cos(nlp[0])*exp(+nlp[1]))/nt; } /* we now handle the endpoint where jj = nt-1 */ ua = &u0[jj-1]; ub = &u0[jj]; uc = &u0[0]; nlp[1] = -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] = abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); ua = &u1[jj-1]; ub = &u1[jj]; uc = &u1[0]; nlp[1] += -toptical*(abs2(uc) - abs2(ua) + prodr(ub,uc) - prodr(ub,ua))/(4*pi*dt); nlp[0] += abs2(ub) - traman*(abs2(uc) - abs2(ua))/(2*dt) + toptical*(prodi(ub,uc) - prodi(ub,ua))/(4*pi*dt); nlp[0] *= gamma*dz/2; nlp[1] *= gamma*dz/2; uv[jj][0] = (uhalf[jj][0]*cos(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*sin(nlp[0])*exp(+nlp[1]))/nt; uv[jj][1] = (-uhalf[jj][0]*sin(nlp[0])*exp(+nlp[1]) + uhalf[jj][1]*cos(nlp[0])*exp(+nlp[1]))/nt; } EXECUTE(p2); /* uv = fft(uv) */ cmult(ufft,uv,halfstep); /* ufft = uv.*halfstep */ EXECUTE(ip2); /* uv = nt*ifft(ufft) */ if (ssconverged(uv,u1,tol)) { /* test for convergence */ cscale(u1,uv,1.0/nt); /* u1 = uv/nt; */ break; /* exit from ii loop */ } else { cscale(u1,uv,1.0/nt); /* u1 = uv/nt; */ } } if (ii == maxiter) mexWarnMsgTxt("Failed to converge."); cscale(u0,u1,1); /* u0 = u1 */ } mexPrintf("done.\n"); /* allocate space for returned vector */ plhs[0] = mxCreateDoubleMatrix(nt,1,mxCOMPLEX); for (jj = 0; jj < nt; jj++) { mxGetPr(plhs[0])[jj] = (double) u1[jj][0]; /* fill return vector */ mxGetPi(plhs[0])[jj] = (double) u1[jj][1]; /* with u1 */ } sspropc_destroy_data(); }
void abbr0h() { #include "vars.h" #include "inline.h" F32 = ChainV2(Spinor(1,2),0,ec(5),k(3),1,Spinor(4,2)); F34 = -1*(ChainB2(Spinor(1,2),0,ec(5),k(4),0,Spinor(3,1))); F9 = -1*(ChainV0(Spinor(4,2),0,0,Spinor(3,1))); F13 = ChainB0(Spinor(4,2),0,0,Spinor(2,1)); F12 = ChainB0(Spinor(4,2),0,0,Spinor(3,1)); F35 = ChainV1(Spinor(1,2),0,ec(5),0,Spinor(3,1)); F3 = ChainV1(Spinor(1,2),0,ec(5),1,Spinor(4,2)); F33 = ChainB1(Spinor(1,2),0,ec(5),0,Spinor(3,1)); F6 = -1*(ChainB1(Spinor(1,2),0,ec(5),1,Spinor(4,2))); F30 = -1*(ChainB1(Spinor(1,2),0,k(3),0,Spinor(2,1))); F8 = ChainV1(Spinor(4,2),0,ec(5),0,Spinor(2,1)); F17 = ChainV1(Spinor(4,2),0,k(2),0,Spinor(3,1)); F4 = ChainB1(Spinor(4,2),0,ec(5),0,Spinor(2,1)); F22 = -1*(ChainB1(Spinor(4,2),0,k(1),0,Spinor(3,1))); F18 = -1*(ChainB1(Spinor(4,2),0,k(2),0,Spinor(3,1))); F27 = ChainV2(Spinor(1,2),0,ec(5),k(2),1,Spinor(4,2)); F36 = -1*(ChainV2(Spinor(1,2),0,ec(5),k(4),0,Spinor(3,1))); F28 = -1*(ChainB2(Spinor(1,2),0,ec(5),k(2),1,Spinor(4,2))); F31 = -1*(ChainB2(Spinor(1,2),0,ec(5),k(3),1,Spinor(4,2))); F23 = ChainV2(Spinor(4,2),0,ec(5),k(1),0,Spinor(2,1)); F7 = -1*(ChainV0(Spinor(1,2),0,0,Spinor(3,1))); F15 = ChainV0(Spinor(1,2),0,1,Spinor(4,2)); F1 = -1*(ChainB0(Spinor(1,2),0,0,Spinor(3,1))); F25 = -1*(ChainB0(Spinor(1,2),0,1,Spinor(4,2))); F5 = -1*(ChainV0(Spinor(2,1),1,0,Spinor(3,1))); F2 = ChainB0(Spinor(2,1),1,0,Spinor(3,1)); F24 = ChainV0(Spinor(4,2),0,0,Spinor(2,1)); F10 = ChainV1(Spinor(1,2),0,ec(5),0,Spinor(2,1)); F29 = ChainV1(Spinor(1,2),0,k(3),0,Spinor(2,1)); F19 = ChainV1(Spinor(1,2),0,k(4),0,Spinor(2,1)); F11 = -1*(ChainB1(Spinor(1,2),0,ec(5),0,Spinor(2,1))); F26 = -1*(ChainB1(Spinor(1,2),0,k(4),0,Spinor(2,1))); F20 = ChainV1(Spinor(4,2),0,ec(5),0,Spinor(3,1)); F16 = ChainV1(Spinor(4,2),0,k(1),0,Spinor(3,1)); F21 = -1*(ChainB1(Spinor(4,2),0,ec(5),0,Spinor(3,1))); F14 = ChainB2(Spinor(4,2),0,ec(5),k(1),0,Spinor(2,1)); Pair1 = Pair(ec(5),k(1)); Pair2 = Pair(ec(5),k(2)); Sub436 = cmult(F10,F16+F17)+cmult(F2,-F27-cmult(2*(F15),Pair1)); Sub3721 = -1*(cmult(F11,F16+F17))+cmult(F1,-F23+cmult(2*(F24),Pair1+Pair2)); Sub18 = cmult(F2,F3)+cmult(F10,-F12+F9); Sub437 = cmult(F11,F18+F22)+cmult(F5,-F28-cmult(2*(F25),Pair1)); Sub1408 = cmult(F10,F18+F22)-cmult(F7,-F14+cmult(2*(F13),Pair1+Pair2)); Sub14 = cmult(F24,F33)+cmult(F11,F12-F9); Sub32 = cmult(F19,F20)+Sub436; Sub29 = -1*(cmult(F20,F26))+Sub3721; Sub4 = cmult(F10,F17)+cmult(F2,-F32-MT*F3); Sub3 = cmult(F11,F17)+cmult(F24,-F34+MT*(F33)); Sub22 = (1/(SW2))*((2*(SW2))*(Sub14)+(1-2*(SW2))*(Sub18)); Sub35 = (1/(SW2))*((2*(SW2))*(Sub29)+(1-2*(SW2))*(Sub32)); Sub31 = -1*(cmult(F7,F8))+Sub18; Sub28 = cmult(F1,F4)-cmult(F5,F6)+cmult(F11,-F12+F9); Sub33 = cmult(F21,F26)+Sub437; Sub2949 = cmult(F1,F23)+cmult(F2,-F27+cmult(2*(F15),Pair2)); Sub9 = -1*(cmult(F20,F29))+cmult(F2,-F27+cmult(2*(F15),Pair2)); Sub15 = cmult(F11,F16)+cmult(F20,F26+F30)-cmult(F24,-F34+cmult(2*F1,Pair1+Pair2)); Sub30 = cmult(F19,F21)+Sub1408; Sub9998 = cmult(F2,F32)+cmult(F24,F34)+cmult(F13,F36)+cmult(F31,F5); Sub19 = cmult(F10,F16)+cmult(F20,F19+F29)+cmult(F2,F32-cmult(2*(F15),Pair1+Pair2)); Opt25 = -1*(cmult(F21,F29))+cmult(F20,-F29-F30)-cmult(F21,F30); Sub20 = cmult(F5,F6)+cmult(F11,F12-F9); Sub16 = cmult(F13,F35)+cmult(F10,-F12+F9); Sub6 = cmult(F10,F18)+cmult(F13,-F36+MT*(F35)); Sub8561 = cmult(F20,F26)-cmult(cmult(2*F1,F24),Pair1+Pair2); Sub14151 = cmult(F11,F22)-cmult(cmult(2*(F25),F5),Pair1+Pair2); Sub14146 = cmult(F10,F22)-cmult(cmult(2*(F13),F7),Pair1+Pair2); Sub11 = (1/(SW2))*((1-2*(SW2))*(cmult(F2,F3))+(SW2)*(cmult(-2*F1,F4)+cmult(2*F5,F6))+(-1+2*(SW2))*(cmult(F7,F8))); Sub34 = (1/(SW2))*((2*(SW2))*(Sub28)+(-1+2*(SW2))*(Sub31)); Sub5 = (1/(SW2))*((2*(SW2))*(Sub3)+(-1+2*(SW2))*(Sub4)); Sub12 = (1/(SW2))*((SW2)*(cmult(-2*F1,F23)+cmult(2*(F20),F30))+(1-2*(SW2))*(Sub9)); Sub1070 = cmult(F11,F16+F17+F18+F22)+cmult(F1,F23)-cmult(F28,F5); Sub917 = cmult(F10,F16+F17+F18+F22)-cmult(F2,F27)+cmult(F14,F7); Sub7 = -1*(cmult(F11,F18))+cmult(F5,F31+MT*F6); Sub10 = cmult(F21,F30)+cmult(F5,F28-cmult(2*(F25),Pair2)); Sub8567 = -1*(cmult(F19,F21))+cmult(cmult(2*(F13),F7),Pair1+Pair2); Sub21 = cmult(F11,F22)+cmult(F21,F26+F30)-cmult(F5,-F31+cmult(2*(F25),Pair1+Pair2)); Sub23 = (1/(SW2))*((-1+2*(SW2))*(Sub19)+(SW2)*(2*(Sub15)-MT*(Sub22))); Sub753 = (-1*4+8*(SW2))*(Sub30)+(8*(SW2))*(Sub33)+(3-4*(SW2))*(Sub35); Sub14061 = -1*(cmult(F2,F3))+cmult(F24,F33)+cmult(F13,F35)-cmult(F5,F6); Sub612 = cmult(F10+F11,F17+F18)-Sub9998; Sub566 = cmult(F20,-F29-F30)+Sub2949; Sub569 = -1*(cmult(F21,F29))+cmult(F14,F7)-Sub10; Sub17 = cmult(F21,F19+F29)+cmult(F13,F36)+Sub14146; Sub14237 = Sub9998-MT*(Sub14061); Sub24 = (4-8*(SW2))*(Sub17)-(8*(SW2))*(Sub21)+(3-4*(SW2))*(Sub23)+MT*((-4+8*(SW2))*(Sub16)-(8*(SW2))*(Sub20)); Sub8 = (3-4*(SW2))*(Sub5)+(4-8*(SW2))*(Sub6)+(8*(SW2))*(Sub7); Sub13 = (4-8*(SW2))*(cmult(F21,F29))+(-4+8*(SW2))*(cmult(F14,F7))-(8*(SW2))*(Sub10)+(3*MT)*(Sub11)+(3-4*(SW2))*(Sub12); Sub36 = -1*(Sub753)+(3*MT)*(Sub34); Sub26 = Sub612+MT*(Sub14061); Sub2 = Sub566+Sub569; Sub1 = cmult(F21,F26)+cmult(F20,F19+F26)-2*(cmult(cmult(F15,F2)+cmult(F1,F24)+cmult(F25,F5),Pair1)+cmult(cmult(F1,F24),Pair2))+Sub1070-Sub8567+Sub917; Sub27 = cmult(F10+F11,F16)+cmult(F19,F20+F21)+cmult(F21,F26)-Opt25-cmult(cmult(2*(F15),F2),Pair1+Pair2)+Sub14146+Sub14151+Sub14237+Sub8561; Sub14662 = (Sub13)/(-MZ2+S)+(8*(CW2))*((Sub2)/S); Sub14664 = (8*(Sub1))/S-(1/(CW2))*((Sub36)/(-MZ2+S)); Sub14666 = (8*(Sub27))/(S34)-(1/(CW2))*((Sub24)/(-MZ2+S34)); Sub14660 = (8*(Sub26))/(S34)-(1/(CW2))*((Sub8)/(-MZ2+S34)); }
float qpsk_to_bits(int rx_bits[], int *sync_bit, int Nc, COMP phase_difference[], COMP prev_rx_symbols[], COMP rx_symbols[], int old_qpsk_mapping) { int c; COMP pi_on_4; COMP d; int msb=0, lsb=0; float ferr, norm; pi_on_4.real = cos(PI/4.0); pi_on_4.imag = sin(PI/4.0); /* Extra 45 degree clockwise lets us use real and imag axis as decision boundaries. "norm" makes sure the phase subtraction from the previous symbol doesn't affect the amplitude, which leads to sensible scatter plots */ for(c=0; c<Nc; c++) { norm = 1.0/(cabsolute(prev_rx_symbols[c])+1E-6); phase_difference[c] = cmult(cmult(rx_symbols[c], fcmult(norm,cconj(prev_rx_symbols[c]))), pi_on_4); } /* map (Nc,1) DQPSK symbols back into an (1,Nc*Nb) array of bits */ for (c=0; c<Nc; c++) { d = phase_difference[c]; if ((d.real >= 0) && (d.imag >= 0)) { msb = 0; lsb = 0; } if ((d.real < 0) && (d.imag >= 0)) { msb = 0; lsb = 1; } if ((d.real < 0) && (d.imag < 0)) { if (old_qpsk_mapping) { msb = 1; lsb = 0; } else { msb = 1; lsb = 1; } } if ((d.real >= 0) && (d.imag < 0)) { if (old_qpsk_mapping) { msb = 1; lsb = 1; } else { msb = 1; lsb = 0; } } rx_bits[2*c] = msb; rx_bits[2*c+1] = lsb; } /* Extract DBPSK encoded Sync bit and fine freq offset estimate */ norm = 1.0/(cabsolute(prev_rx_symbols[Nc])+1E-6); phase_difference[Nc] = cmult(rx_symbols[Nc], fcmult(norm, cconj(prev_rx_symbols[Nc]))); if (phase_difference[Nc].real < 0) { *sync_bit = 1; ferr = phase_difference[Nc].imag; } else { *sync_bit = 0; ferr = -phase_difference[Nc].imag; } /* pilot carrier gets an extra pi/4 rotation to make it consistent with other carriers, as we need it for snr_update and scatter diagram */ phase_difference[Nc] = cmult(phase_difference[Nc], pi_on_4); return ferr; }
float rx_est_timing(COMP rx_symbols[], COMP rx_filt[NC+1][P+1], COMP rx_baseband[NC+1][M+M/P], COMP rx_filter_mem_timing[NC+1][NT*P], float env[], COMP rx_baseband_mem_timing[NC+1][NFILTERTIMING], int nin) { int c,i,j,k; int adjust, s; COMP x, phase, freq; float rx_timing; /* nin adjust -------------------------------- 120 -1 (one less rate P sample) 160 0 (nominal) 200 1 (one more rate P sample) */ adjust = P - nin*P/M; /* update buffer of NT rate P filtered symbols */ for(c=0; c<NC+1; c++) for(i=0,j=P-adjust; i<(NT-1)*P+adjust; i++,j++) rx_filter_mem_timing[c][i] = rx_filter_mem_timing[c][j]; for(c=0; c<NC+1; c++) for(i=(NT-1)*P+adjust,j=0; i<NT*P; i++,j++) rx_filter_mem_timing[c][i] = rx_filt[c][j]; /* sum envelopes of all carriers */ for(i=0; i<NT*P; i++) { env[i] = 0.0; for(c=0; c<NC+1; c++) env[i] += cabsolute(rx_filter_mem_timing[c][i]); } /* The envelope has a frequency component at the symbol rate. The phase of this frequency component indicates the timing. So work out single DFT at frequency 2*pi/P */ x.real = 0.0; x.imag = 0.0; freq.real = cosf(2*PI/P); freq.imag = sinf(2*PI/P); phase.real = 1.0; phase.imag = 0.0; for(i=0; i<NT*P; i++) { x = cadd(x, fcmult(env[i], phase)); phase = cmult(phase, freq); } /* Map phase to estimated optimum timing instant at rate M. The M/4 part was adjusted by experiment, I know not why.... */ rx_timing = atan2f(x.imag, x.real)*M/(2*PI) + M/4; if (rx_timing > M) rx_timing -= M; if (rx_timing < -M) rx_timing += M; /* rx_filt_mem_timing contains M + Nfilter + M samples of the baseband signal at rate M this enables us to resample the filtered rx symbol with M sample precision once we have rx_timing */ for(c=0; c<NC+1; c++) for(i=0,j=nin; i<NFILTERTIMING-nin; i++,j++) rx_baseband_mem_timing[c][i] = rx_baseband_mem_timing[c][j]; for(c=0; c<NC+1; c++) for(i=NFILTERTIMING-nin,j=0; i<NFILTERTIMING; i++,j++) rx_baseband_mem_timing[c][i] = rx_baseband[c][j]; /* rx filter to get symbol for each carrier at estimated optimum timing instant. We use rate M filter memory to get fine timing resolution. */ s = round(rx_timing) + M; for(c=0; c<NC+1; c++) { rx_symbols[c].real = 0.0; rx_symbols[c].imag = 0.0; for(k=s,j=0; k<s+NFILTER; k++,j++) rx_symbols[c] = cadd(rx_symbols[c], fcmult(gt_alpha5_root[j], rx_baseband_mem_timing[c][k])); } return rx_timing; }
static void forward_and_back_substitution( complex adaptive_weights[N_DOP][N_BLOCKS][N_STEERING][N_CHAN*TDOF], complex (* const cholesky_factors)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF], complex (* const steering_vectors)[N_CHAN*TDOF]) { /* * We are solving the system R*Rx = b where upper triangular matrix R * is the result of Cholesky factorization. To do so, we first apply * forward substitution to solve R*y = b for y and then apply back * substitution to solve Rx = y for x. In this case, b and x correspond * to the steering vectors and adaptive weights, respectively. */ complex (* R)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF] = cholesky_factors; complex (* x)[N_BLOCKS][N_STEERING][N_CHAN*TDOF] = adaptive_weights; complex (* b)[N_CHAN*TDOF] = steering_vectors; int dop, block, sv, i, k; APPROX int j; complex accum; for (dop = 0; dop < N_DOP; ++dop) { for (block = 0; block < N_BLOCKS; ++block) { for (sv = 0; sv < N_STEERING; ++sv) { /* First apply forward substitution */ for (i = 0; i < N_CHAN*TDOF; ++i) { APPROX const float Rii_inv = 1.0f / R[dop][block][i][i].re; accum.re = accum.im = 0.0f; for (j = 0; ENDORSE(j < i); ++j) { /* * Use the conjugate of the upper triangular entries * of R as the lower triangular entries. */ const complex prod = cmult( cconj(R[dop][block][j][i]), x[dop][block][sv][j]); accum.re += prod.re; accum.im += prod.im; } x[dop][block][sv][i].re = (b[sv][i].re - accum.re) * Rii_inv; x[dop][block][sv][i].im = (b[sv][i].im - accum.im) * Rii_inv; } /* And now apply back substitution */ for (j = N_CHAN*TDOF-1; ENDORSE(j >= 0); --j) { APPROX const float Rjj_inv = 1.0f / R[dop][block][j][j].re; accum.re = accum.im = 0.0f; for (k = ENDORSE(j+1); k < N_CHAN*TDOF; ++k) { const complex prod = cmult( R[dop][block][j][k], x[dop][block][sv][k]); accum.re += prod.re; accum.im += prod.im; } x[dop][block][sv][j].re = (x[dop][block][sv][j].re - accum.re) * Rjj_inv; x[dop][block][sv][j].im = (x[dop][block][sv][j].im - accum.im) * Rjj_inv; } } } } }