int Transpose_RowSize(int row, int m, int bw) { if (row >= bw) return 0; else if ((m % 2) == 0) { if (row <= m) return ( ((bw-m)/2) ); else return ( ((bw-row-1)/2) + 1); } else { if (row == (bw-1)) return 0; else if (row >= m) return (Transpose_RowSize(row+1,m-1,bw)); else /* (row < m) */ return (Transpose_RowSize(row+1,m-1,bw) - (row % 2)); } }
void Transpose_CosPmlTableGen(int bw, int m, double *cos_pml_table, double *result) { /* recall that cospml_table has had all the zeroes stripped out, and that if m is odd, then it is really a Gml function, which affects indexing a bit. */ double *trans_tableptr, *tableptr; int i, row, rowsize, stride, offset, costable_offset; /* note that the number of non-zero entries is the same as in the non-transposed case */ trans_tableptr = result; /* now traverse the cos_pml_table , loading appropriate values into the rows of transposed array */ if ( m == bw - 1 ) memcpy( result, cos_pml_table, sizeof(double)*TableSize(m,bw)); else { for (row = 0; row < bw; row++) { /* if m odd, no need to do last row - all zeroes */ if (row == (bw-1)) { if ( m % 2 ) return; } /* get the rowsize for the transposed array */ rowsize = Transpose_RowSize(row, m, bw); /* compute the starting point for values in cos_pml_table */ if (row <= m) { if ((row % 2) == 0) tableptr = cos_pml_table + (row/2); else tableptr = cos_pml_table + (m/2) + 1 + (row/2); } else { /* if row > m, then the highest degree coefficient of P(m,row) should be the first coefficient loaded into the transposed array, so figure out where this point is. */ offset = 0; if ( (m%2) == 0 ) { for (i=m; i<=row; i++) offset += RowSize(m, i); } else { for (i=m;i<=row+1;i++) offset += RowSize(m, i); } /* now we are pointing one element too far, so decrement */ offset--; tableptr = cos_pml_table + offset; } /* stride is how far we need to jump between values in cos_pml_table, i.e., to traverse the columns of the cos_pml_table. Need to set initial value. Stride always increases by 2 after that */ if (row <= m) stride = m + 2 - (m % 2) + (row % 2); else stride = row + 2; /* now load up this row of the transposed table */ costable_offset = 0; for (i=0; i < rowsize; i++) { trans_tableptr[i] = tableptr[costable_offset]; costable_offset += stride; stride += 2; } /* closes i loop */ trans_tableptr += rowsize; } /* closes row loop */ } }
int Transpose_RowSize(int row, int m, int bw) { /* my version might be longer, but at least I understand it better, and it's only minimally recursive */ if ( bw % 2 ) { if ( m % 2 ) { if ( m == 1 ) return( (bw-row)/2 ); else if ( row < m - 1 ) return ( (bw-m+1)/2 ); else return ( Transpose_RowSize(row, 1, bw) ) ; } else { if ( m == 0 ) return( (bw-row)/2 + ((row+1)%2) ); else if ( row < m ) return ( (bw-m)/2 + ((row+1)%2) ); else return ( Transpose_RowSize(row, 0, bw) ) ; } } else { if ( m % 2 ) { if ( m == 1 ) return( (bw-row)/2 ); else if ( row < m - 1 ) return ( (bw-m+1)/2 - (row%2) ); else return ( Transpose_RowSize(row, 1, bw) ) ; } else { if ( m == 0 ) return( (bw-row)/2 + (row%2) ); else if ( row < m ) return ( (bw-m)/2 ); else return ( Transpose_RowSize(row, 0, bw) ) ; } } /*** original version if (row >= bw) return 0; else if ((m % 2) == 0) { if (row <= m) return ( ((bw-m)/2) ); else return ( ((bw-row-1)/2) + 1); } else { if (row == (bw-1)) return 0; else if (row >= m) return (Transpose_RowSize(row+1,m-1,bw)); else return (Transpose_RowSize(row+1,m-1,bw) - (row % 2)); } ***/ }
void InvSemiNaiveReduced(double *coeffs, int bw, int m, double *result, double *trans_cos_pml_table, double *sin_values, double *workspace, fftw_plan *fplan ) { double *trans_tableptr; double *assoc_offset; int i, j, rowsize; double *p; double *fcos, fcos0, fcos1, fcos2, fcos3; double fudge ; fcos = workspace ; /* for paranoia, zero out arrays */ memset( fcos, 0, sizeof(double) * 2 * bw ); memset( result, 0, sizeof(double) * 2 * bw ); trans_tableptr = trans_cos_pml_table; p = trans_cos_pml_table; /* main loop - compute each value of fcos Note that all zeroes have been stripped out of the trans_cos_pml_table, so indexing is somewhat complicated. */ for (i=0; i<bw; i++) { if (i == (bw-1)) { if ( m % 2 ) { fcos[bw-1] = 0.0; break; } } rowsize = Transpose_RowSize(i, m, bw); if (i > m) assoc_offset = coeffs + (i - m) + (m % 2); else assoc_offset = coeffs + (i % 2); fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0; for (j = 0; j < rowsize % 4; ++j) fcos0 += assoc_offset[2*j] * trans_tableptr[j]; for ( ; j < rowsize; j += 4){ fcos0 += assoc_offset[2*j] * trans_tableptr[j]; fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1]; fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2]; fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3]; } fcos[i] = fcos0 + fcos1 + fcos2 + fcos3 ; trans_tableptr += rowsize; } /* now we have the cosine series for the result, so now evaluate the cosine series at 2*bw Chebyshev nodes */ /* scale coefficients prior to taking inverse DCT */ fudge = 0.5 / sqrt((double) bw) ; for ( j = 1 ; j < 2*bw ; j ++ ) fcos[j] *= fudge ; fcos[0] /= sqrt(2. * ((double) bw)); /* now take the inverse dct */ /* NOTE that I am using the guru interface */ fftw_execute_r2r( *fplan, fcos, result ); /* if m is odd, then need to multiply by sin(x) at Chebyshev nodes */ if ( m % 2 ) { for (j=0; j<(2*bw); j++) result[j] *= sin_values[j]; } trans_tableptr = p; /* amscray */ }