inline int vec_fir_tiler(vbx_mm_t *output, vbx_mm_t *input, vbx_mm_t *coeffs, int sample_size, int num_taps) { typedef vbx_mm_t vbx_sp_t; //use 1/8 of scratchpad, only really need 1/4, but lets be safe int chunk_size = vbx_sp_getfree()>>3; //divide by sizeof vbx_sp_t chunk_size >>= (sizeof(vbx_sp_t)==sizeof(vbx_word_t)?2: sizeof(vbx_sp_t)==sizeof(vbx_half_t)?1:0); // Note: chunksize is the size of the input chunk, so the output // chunk is chunk_size - num_taps. if( chunk_size==0 ){ return VBW_ERROR_SP_ALLOC_FAILED; } VBX::Vector<vbx_sp_t> v_coeffs(num_taps); v_coeffs.dma_read(coeffs); VBX::Prefetcher<vbx_sp_t> input_dbl_buf(1,chunk_size+num_taps,input,input+sample_size,chunk_size); input_dbl_buf.fetch(); //if the entire sample ifts in the scratchpad, do that. if(chunk_size>sample_size-num_taps){ //do in sp fir filter VBX::Vector<vbx_sp_t>& v_in=input_dbl_buf[0]; VBX::Vector<vbx_sp_t> v_out(sample_size-num_taps); vec_fir(v_out,v_in,v_coeffs); v_out.dma_write(output); vbx_sync(); return VBW_SUCCESS; } VBX::Vector<vbx_sp_t> v_out(chunk_size); int num_chunks=(sample_size + chunk_size/2)/chunk_size; for(int chunk=0;chunk<num_chunks;chunk++){ input_dbl_buf.fetch(); VBX::Vector<vbx_sp_t>& v_in=input_dbl_buf[0]; vec_fir(v_out,v_in,v_coeffs); v_out[0 upto v_in.size-num_taps].dma_write(output+chunk*chunk_size); } vbx_sync(); return VBW_SUCCESS; }
viennacl::vector_range<viennacl::vector_base<T> > range(viennacl::range in_range){ viennacl::vector_range<viennacl::vector_base<T> > v_sub(*shptr.get(), r); viennacl::vector_range<viennacl::vector_base<T> > v_out(v_sub, in_range); return v_out; }
void test(const int n, const int lb, const int ub, bool dumpfull) { // Generate a special-band-matrix mfull With lb lower diagonals, ub upper // diagonals and the elements of the highest upper diagonal extended across // each row MAT *mfull = m_get(n, n); //m_rand(mfull); randlist(mfull->base, (mfull->n)*(mfull->n)); double **me = mfull->me; for(int i = 0; i < n; i++) { for(int j = 0; j < i - lb; j++) me[i][j] = 0.0; for(int j = i+ub+1; j < n; j++) me[i][j] = me[i][i+ub]; } // Copy matrix mfull to a compactly stored version // mcmpct // First lb columns padding for later use // Next lb columns for lower diagonals // Next column for diagonal // Next ub columns for upper diagonals // as the highest upper diagonal of the same row const int mm = 2*lb+ub+1; double mcmpct[n][mm]; zero(&mcmpct[0][0], n*mm); for(int i = 0; i < n; i++) for(int j = MAX(i-lb, 0); j < MIN(i+ub+1, n); j++) mcmpct[i][j-i+lb+lb] = me[i][j]; // Replace unused values with NAN to be sure they aren't used for(int k = 0; k < n; k++) for(int i = 0; i < lb; i++) mcmpct[k][i] = NAN; for(int k = 0; k < lb; k++) for(int i = 0; i < lb-k; i++) mcmpct[k][i+lb] = NAN; for(int k=n-1; k >= n-ub; k--) for(int i = n-1-k+1+lb; i < mm; i++) mcmpct[k][i+lb] = NAN; // Generate start vector x1 for test VEC *x1 = v_get(n); randlist(x1->ve, n); // Calculate mfull*x1 = dfull VEC *dfull = v_get(n); mv_mlt(mfull, x1, dfull); // Calculate mcmpct*x1 = dcmpct double dcmpct[n]; bdspecLUmlt(&mcmpct[0][0], n, lb, ub, x1->ve, dcmpct); if(dumpfull) { printf("Vector x (random values)\n"); printf("========================\n"); v_out(x1->ve, n); printf("Matrix A (random values)\n"); printf("========================\n"); printf("Full NxN Meschach Matrix\n"); m_out(mfull->base, n, n); printf("Compact bdspec Array\n"); m_out(&mcmpct[0][0], n, mm); printf("Vector d = A*x\n"); printf("==============\n"); printf("Calculated from Full Meschach Matrix:\n"); v_out(dfull->ve, n); printf("Calculated from Compact bdspec Array:\n"); v_out(dcmpct, n); printf("L2 norm of difference between Meschach and bdspec calculations of d\n"); } double ddiff = v_diff(dfull->ve, dcmpct, n); printf("d diff=%6.0E ", ddiff); if(ddiff*ddiff > DBL_EPSILON*v_normsq(dfull->ve, n)) printf("FAIL,"); else printf("PASS,"); PERM *p = px_get(n); LUfactor(mfull, p); int indx[n]; bdspecLUfactormeschscale(&mcmpct[0][0], n, lb, ub, indx); VEC *yfull = v_get(n); catchall(LUsolve(mfull, p, dfull, yfull), printf("--matrix singular--:\n")); double ycmpct[n]; for(int i = 0; i < n; i++) ycmpct[i] = dcmpct[i]; bdspecLUsolve(&mcmpct[0][0], n, lb, ub, indx, ycmpct); if(dumpfull) { printf("\n\n"); printf("LU Factorization\n"); printf("================\n"); printf("Meschach LU Array\n"); m_out(mfull->base, n, n); printf("Compact bdspec LU Array\n"); m_out(&mcmpct[0][0], n, mm); printf("Permutation\n"); printf("===========\n"); printf("Meschach permutation vector\n"); p_out((int *) p->pe, n); printf("bdspec indx vector\n"); p_out(indx, n); printf("A*y = d Solved for y\n"); printf("====================\n"); printf("Meschach result\n"); v_out(yfull->ve, n); printf("bdspec result\n"); v_out(ycmpct, n); printf("L2 norm of difference between Meschach and bdspec calculations of y:\n"); } double ydiff = v_diff(yfull->ve, ycmpct, n); printf("y diff=%6.0E ", ydiff); if(ydiff*ydiff > DBL_EPSILON*v_normsq(yfull->ve, n)) printf("FAIL,"); else printf("PASS,"); if(dumpfull) { printf("\n\n"); printf("L2 norm of error = y-x\n"); printf("======================\n"); } double x1normsq = v_normsq(x1->ve, n); double mescherr = v_diff(yfull->ve, x1->ve, n); printf("mesch err=%6.0E ", mescherr); if(mescherr*mescherr > DBL_EPSILON*x1normsq) printf("FAIL,"); else printf("PASS,"); double bdspecerr = v_diff(ycmpct, x1->ve, n); printf("bdspec err=%6.0E ", bdspecerr); if(bdspecerr*bdspecerr > DBL_EPSILON*x1normsq) printf("FAIL "); else printf("PASS "); if(dumpfull) { printf("\n\n"); } fflush(stdout); }