int main(){vsip_init((void*)0); { vsip_cscalar_d alpha = vsip_cmplx_d(alpha_r,alpha_i); vsip_cscalar_d beta = vsip_cmplx_d(beta_r,beta_i); void VU_cmprint_d(vsip_cmview_d*); void VU_cmfill_d(vsip_cmview_d*, vsip_cscalar_d); vsip_cmview_d *A = vsip_cmcreate_d(M,N,VSIP_ROW,0), *C = vsip_cmcreate_d(N,M,VSIP_ROW,0); int row, col,i; for(row=0;row<M;row++) for(col=0;col<N;col++) vsip_cmput_d(A,row,col,vsip_cmplx_d(col,-row - sqrt(col*row))); printf("\n A input \n"); VU_cmprint_d(A); VU_cmfill_d(C,vsip_cmplx_d(0,0)); printf("\n C input \n"); VU_cmprint_d(C); printf("alpha= %f %+fi, beta= %f %+fi,\n", alpha_r,alpha_i, beta_r,beta_i); { vsip_mat_op OpA = VSIP_MAT_HERM; printf("OpA %i\n",OpA); for(i=0; i<L; i++){ vsip_cgems_d(alpha,A,OpA,beta,C); printf("C number %i\n",i); VU_cmprint_d(C); } } vsip_cmalldestroy_d(A); vsip_cmalldestroy_d(C); }vsip_finalize((void*)0);return 0; }
int vsip_cfirflt_d( vsip_cfir_d *fir, const vsip_cvview_d *xc, const vsip_cvview_d *yc) { vsip_length nout,k; vsip_cvview_d xx = *(xc), yy = *(yc); vsip_cvview_d H1 = *(fir->h), H2 = *(fir->h); vsip_cvview_d *x=&xx,*y=&yy; vsip_cvview_d *h1=&H1,*h2=&H2; vsip_offset oinc; oinc = (vsip_offset)((vsip_stride)fir->D * x->stride); /* calculate number of terms in y */ nout = (fir->N - fir->p); nout = ((nout % fir->D) == 0) ? (nout / fir->D ) : (nout / fir->D + 1); /* do overlap section */ k = 0; x->length = fir->p + 1; h1->length = fir->s->length; h2->length = x->length; h2->offset = h1->length; while(x->length < fir->M){ vsip_cscalar_d a = vsip_cvdot_d(h1,fir->s); vsip_cscalar_d b = vsip_cvdot_d(h2,x); vsip_cvput_d(y,k++,vsip_cmplx_d(a.r + b.r,a.i + b.i)); x->length += fir->D; fir->s->length -= fir->D; fir->s->offset += fir->D; h1->length = fir->s->length; h2->length = x->length; h2->offset = h1->length; } x->offset += (x->length - fir->M) * x->stride; x->length = fir->M; while(k < nout){ /* do the rest of the pieces */ vsip_cvput_d(y,k++,vsip_cvdot_d(fir->h,x)); x->offset += oinc; } { vsip_stride temp_p = (fir->p % fir->D) - (fir->N % fir->D); fir->p = ((temp_p < 0) ? (vsip_length)((vsip_stride)fir->D + temp_p) : (vsip_length)temp_p); } fir->s->offset = 0; fir->s->length = (fir->state == VSIP_STATE_SAVE) ? fir->M - 1 - fir->p : fir->M -1; x->length = fir->s->length; /* fix by JMA 31/01/2000, incorrect offset calculation */ /* x->offset = xc->length - fir->s->length; */ x->offset = xc->offset + (xc->length - fir->s->length) * xc->stride; if((fir->s->length > 0) && (fir->state == VSIP_STATE_SAVE)) VI_cvcopy_d_d(x,fir->s); if(fir->state == VSIP_STATE_NO_SAVE) { VI_cvfill_d(vsip_cmplx_d((vsip_scalar_d)0,(vsip_scalar_d)0),fir->s); fir->p = 0; } return k; }
int main(){vsip_init((void*)0); { vsip_cmview_d *A = vsip_cmcreate_d(M,P,VSIP_ROW,0), *B = vsip_cmcreate_d(P,N,VSIP_ROW,0), *R = vsip_cmcreate_d(M,N,VSIP_ROW,0); int i, j; for(i=0;i<M;i++) for(j=0;j<P;j++) vsip_cmput_d(A,i,j,vsip_cmplx_d(i,1 + sqrt(i*j))); for(i=0;i<P;i++) for(j=0;j<N;j++) vsip_cmput_d(B,i,j,vsip_cmplx_d(1 , i * j)); printf("\n A input \n"); vsip_cmprodj_d(A,B,R); {for(i=0;i<M;i++) {for(j=0;j<P;j++){ printf("%5.2f + %5.2fi%s ", vsip_real_d(vsip_cmget_d(A,i,j)), vsip_imag_d(vsip_cmget_d(A,i,j)), (j == P-1) ? ";":","); } printf("\n"); } } printf("\n B input \n"); {for(i=0;i<P;i++) {for(j=0;j<N;j++){ printf("%5.2f + %5.2fi%s ", vsip_real_d(vsip_cmget_d(B,i,j)), vsip_imag_d(vsip_cmget_d(B,i,j)), (j == N-1) ? ";":","); } printf("\n"); } } printf("\n R output = A * conj(B)\n"); {for(i=0;i<M;i++) {for(j=0;j<N;j++){ printf("%5.2f + %5.2fi%s ", vsip_real_d(vsip_cmget_d(R,i,j)), vsip_imag_d(vsip_cmget_d(R,i,j)), (j == N-1) ? ";":","); } printf("\n"); } } vsip_cmalldestroy_d(A); vsip_cmalldestroy_d(B); vsip_cmalldestroy_d(R); }vsip_finalize((void*)0);return 0; }
int main() { int i; /* define some data space */ vsip_cvview_d* dataComplex; vsip_cvview_d* dataComplexQuotient; vsip_init((void *)0); dataComplex = vsip_cvcreate_d(L, VSIP_MEM_NONE); dataComplexQuotient = vsip_cvcreate_d(L, VSIP_MEM_NONE); /* put some complex data in dataComplex */ for(i = 0; i < L; i++) vsip_cvput_d(dataComplex,i, vsip_cmplx_d((double)(i * i),(double)(i+1))); /*divide dataComplex by some denom and print the input and output */ vsip_cvrsdiv_d(dataComplex,denom,dataComplexQuotient); for(i=0; i<L; i++) printf("(%7.4f + %7.4fi) / %7.4f) = (%7.4f + %7.4fi)\n", vsip_real_d(vsip_cvget_d(dataComplex,i)), vsip_imag_d(vsip_cvget_d(dataComplex,i)), denom, vsip_real_d(vsip_cvget_d(dataComplexQuotient,i)), vsip_imag_d(vsip_cvget_d(dataComplexQuotient,i))); vsip_cblockdestroy_d(vsip_cvdestroy_d(dataComplex)); vsip_cblockdestroy_d(vsip_cvdestroy_d(dataComplexQuotient)); vsip_finalize((void *)0); return 0; }
static void VI_cgivens_d( vsip_cscalar_d a, vsip_cscalar_d b, vsip_cscalar_d *c, vsip_cscalar_d *s, vsip_cscalar_d *r) { vsip_scalar_d am = vsip_cmag_d(a); vsip_scalar_d bm = vsip_cmag_d(b); c->i = 0.0; if(am == 0.0){ *r = b; c->r = 0.0; s->r = 1; s->i = 0.0; } else { vsip_scalar_d scale = am + bm; vsip_cscalar_d alpha = vsip_cmplx_d(a.r/am, a.i/am); vsip_scalar_d scalesq = scale * scale; vsip_scalar_d norm = scale * (vsip_scalar_d)sqrt((am*am)/scalesq + (bm * bm)/scalesq); c->r =am/norm; s->r = (alpha.r * b.r + alpha.i * b.i)/norm; s->i = (-alpha.r * b.i + alpha.i * b.r)/norm; r->r = alpha.r * norm; r->i = alpha.i * norm; } return; }
int main(){vsip_init((void*)0); { void VU_vfprintyg_d(char*,vsip_vview_d*,char*); void VU_vfreqswapIP_d(vsip_vview_d*); vsip_vview_d* Cw = vsip_vcreate_cheby_d(Nlength,ripple,0); vsip_fft_d *fft = vsip_ccfftip_create_d(Nlength,1.0,VSIP_FFT_FWD,0,0); vsip_cvview_d* FCW = vsip_cvcreate_d(Nlength,0); /*printf("CW = "); VU_vprintm_d("%6.8f ;\n",Cw); */ VU_vfprintyg_d("%6.8f\n",Cw,"Cheby_Window"); vsip_cvfill_d(vsip_cmplx_d(0,0),FCW); { vsip_vview_d *rv = vsip_vrealview_d(FCW); vsip_vcopy_d_d(Cw,rv); vsip_ccfftip_d(fft,FCW); vsip_vcmagsq_d(FCW,rv); { vsip_index ind; vsip_scalar_d max = vsip_vmaxval_d(rv,&ind); vsip_scalar_d min = max/(10e12); vsip_vclip_d(rv,min,max,min,max,rv); } vsip_vlog10_d(rv,rv); vsip_svmul_d(10,rv,rv); VU_vfreqswapIP_d(rv); VU_vfprintyg_d("%6.8f\n",rv,"Cheby_Window_Frequency_Response"); vsip_vdestroy_d(rv); } vsip_fft_destroy_d(fft); vsip_valldestroy_d(Cw); vsip_cvalldestroy_d(FCW); } vsip_finalize((void*)0); return 0; }
int main(){vsip_init((void*)0); { vsip_cmview_d *A = vsip_cmcreate_d(M,P,VSIP_ROW,0), *B = vsip_cmcreate_d(N,P,VSIP_ROW,0), *R = vsip_cmcreate_d(M,N,VSIP_ROW,0); int i, j; for(i=0;i<M;i++) for(j=0;j<P;j++) vsip_cmput_d(A,i,j,vsip_cmplx_d(i,1 + sqrt(i*j))); for(i=0;i<N;i++) for(j=0;j<P;j++) vsip_cmput_d(B,i,j,vsip_cmplx_d(1 , i * j)); printf("\n A input \n"); vsip_cmprodh_d(A,B,R); {for(i=0;i<M;i++) {for(j=0;j<P;j++){ printf(": (%5.2f, %5.2f) ", vsip_real_d(vsip_cmget_d(A,i,j)), vsip_imag_d(vsip_cmget_d(A,i,j))); } printf(":\n"); } } printf("\n B input \n"); {for(i=0;i<N;i++) {for(j=0;j<P;j++){ printf(": (%5.2f, %5.2f) ", vsip_real_d(vsip_cmget_d(B,i,j)), vsip_imag_d(vsip_cmget_d(B,i,j))); } printf(":\n"); } } printf("\n R output \n"); {for(i=0;i<M;i++) {for(j=0;j<N;j++){ printf(": (%5.2f, %5.2f) ", vsip_real_d(vsip_cmget_d(R,i,j)), vsip_imag_d(vsip_cmget_d(R,i,j))); } printf(":\n"); } } vsip_cmalldestroy_d(A); vsip_cmalldestroy_d(B); vsip_cmalldestroy_d(R); }vsip_finalize((void*)0);return 0; }
int main(){vsip_init((void*)0); { /* vsip_cmview_d *B = vsip_cmcreate_d(P,N,VSIP_ROW,0);*/ vsip_scalar_d Re[20], Im[20]; vsip_cblock_d *Bblock = vsip_cblockbind_d(Re,Im,20,0); vsip_cmview_d *B = vsip_cmbind_d(Bblock,0,4,5,1,4); vsip_cvview_d *a = vsip_cvcreate_d(P,0), *r = vsip_cvcreate_d(N,0); int i, j; for(i=0;i<P;i++) vsip_cvput_d(a,i,vsip_cmplx_d(i,1 + sqrt(i))); for(i=0;i<P;i++) for(j=0;j<N;j++) vsip_cmput_d(B,i,j,vsip_cmplx_d(1 , i * j)); vsip_cvmprod_d(a,B,r); printf("\n vector input \n v=["); for(i=0;i<P;i++) printf("(%5.2f %+5.2fi) ", vsip_real_d(vsip_cvget_d(a,i)), vsip_imag_d(vsip_cvget_d(a,i))); printf("]\n"); printf("\n B input \n B =[\n"); {for(i=0;i<P;i++) {for(j=0;j<N;j++){ printf(" (%5.2f %+5.2fi) ", vsip_real_d(vsip_cmget_d(B,i,j)), vsip_imag_d(vsip_cmget_d(B,i,j))); } printf(";\n"); } printf("]\n"); } printf("\n vector output \n"); for(i=0;i<N;i++) printf("(%5.2f, %5.2f) ", vsip_real_d(vsip_cvget_d(r,i)), vsip_imag_d(vsip_cvget_d(r,i))); printf("\n"); vsip_cvalldestroy_d(a); vsip_cmalldestroy_d(B); vsip_cvalldestroy_d(r); }vsip_finalize((void*)0);return 0; }
static void VU_cmfillz_d(vsip_cmview_d* Z) { vsip_length m = vsip_cmgetcollength_d(Z); vsip_length n = vsip_cmgetrowlength_d(Z); vsip_stride i,j; for(i=0; i<m; i++) for(j=0; j<n; j++) vsip_cmput_d(Z,i,j,vsip_cmplx_d(0.0,0.0)); return; }
int main() { vsip_init((void*)0); { vsip_cscalar_d alpha = vsip_cmplx_d(alpha_r,alpha_i), beta = vsip_cmplx_d(beta_r,beta_i); void VU_cmprint_d(vsip_cmview_d*); void VU_cmfill_d(vsip_cmview_d*, vsip_cscalar_d); vsip_cmview_d *A = vsip_cmcreate_d(M,N,VSIP_ROW,0), *B = vsip_cmcreate_d(M,P,VSIP_ROW,0), *C = vsip_cmcreate_d(N,P,VSIP_ROW,0); int row, col,i; for(row=0; row<M; row++) for(col=0; col<N; col++) vsip_cmput_d(A,row,col,vsip_cmplx_d(col,-row)); for(row=0; row<M; row++) for(col=0; col<P; col++) vsip_cmput_d(B,row,col,vsip_cmplx_d(row, 2*col)); printf("\n A input \n"); VU_cmprint_d(A); printf("\n B input \n"); VU_cmprint_d(B); VU_cmfill_d(C,vsip_cmplx_d(0,0)); printf("alpha= (%f %+fi), beta= (%f %+fi),\n", vsip_real_d(alpha),vsip_imag_d(alpha), vsip_real_d(beta),vsip_imag_d(beta)); { vsip_mat_op OpA = VSIP_MAT_HERM; vsip_mat_op OpB = VSIP_MAT_NTRANS; printf("OpA %i OpB %i\n",OpA,OpB); for(i=0; i<L; i++) { vsip_cgemp_d(alpha,A,OpA,B,OpB,beta,C); printf("C number %i\n",i); VU_cmprint_d(C); } } vsip_cmalldestroy_d(A); vsip_cmalldestroy_d(B); vsip_cmalldestroy_d(C); } vsip_finalize((void*)0); return 0; }
static vsip_cmview_d* VU_cI_d(vsip_length M) { vsip_cmview_d *I = vsip_cmcreate_d(M,M,VSIP_ROW,VSIP_MEM_NONE); if(I != NULL){ vsip_cvview_d *row = vsip_cmrowview_d(I,0); if(row != NULL){ vsip_cvputlength_d(row,(vsip_length)(M * M)); vsip_cvfill_d(vsip_cmplx_d(0.0,0.0),row); vsip_cvputlength_d(row,M); vsip_cvputstride_d(row,(vsip_stride) (M + 1)); vsip_cvfill_d(vsip_cmplx_d(1.0,0.0),row); vsip_cvdestroy_d(row); } else { vsip_cmdestroy_d(I); return (vsip_cmview_d*) NULL; } } else { return (vsip_cmview_d*) NULL; } return I; }
int main(int argc, char *argv[]){vsip_init((void*)0); { if(argc < 4){ printf("usage\nfirEx length decimation filterFile <indataFile >outputFile\n"); exit(0); } { vsip_length N = (vsip_length)atoi(argv[1]); int D = atoi(argv[2]); vsip_scalar_vi n = 0,k = 0; vsip_scalar_d hvr,hvi; FILE *fh; vsip_cfir_d *fir; vsip_cvview_d *x = vsip_cvcreate_d(N,0), *h,*y; fh = fopen(argv[3],"r"); while(fscanf(fh,"%lf %lf",&hvr, &hvi) == 2) n++; rewind(fh); y = vsip_cvcreate_d(N/D + ((N % D) ? 1:0),0); h = vsip_cvcreate_d(n,0); n=0; while(fscanf(fh,"%lf %lf",&hvr,&hvi) == 2){ vsip_cvput_d(h,n,vsip_cmplx_d(hvr,hvi)); n++; } fclose(fh); /* fir = vsip_cfir_create_d(h,VSIP_SYM_EVEN_LEN_ODD,N,D,STATE,0,0); */ /* fir = vsip_cfir_create_d(h,VSIP_SYM_EVEN_LEN_EVEN,N,D,STATE,0,0); */ fir = vsip_cfir_create_d(h,VSIP_NONSYM,N,D,STATE,0,0); n = VU_cscanstdin_d(x); while(n == N){ k = vsip_cfirflt_d(fir,x,y); VU_cprintstdout_d(y,0,k); n = VU_cscanstdin_d(x); } if(n != 0){ k = vsip_cfirflt_d(fir,x,y); VU_cprintstdout_d(y,0,k); } {/* test getattr */ vsip_cfir_attr attr; vsip_cfir_getattr_d(fir,&attr); printf("kernel %lu\n input %lu\n output %lu\ndecimation %d\n", attr.kernel_len,attr.in_len,attr.out_len, attr.decimation); } vsip_cvalldestroy_d(y); vsip_cvalldestroy_d(x); vsip_cvalldestroy_d(h); vsip_cfir_destroy_d(fir); }vsip_finalize((void*)0);return 1; } }
vsip_cscalar_d (vsip_cvmeanval_d)( const vsip_cvview_d* a) { { /* register */ vsip_length n = a->length; vsip_stride cast = a->block->cstride; vsip_scalar_d *apr = (vsip_scalar_d*) ((a->block->R->array) + cast * a->offset); vsip_scalar_d *api = (vsip_scalar_d*) ((a->block->I->array) + cast * a->offset); vsip_cscalar_d sum = vsip_cmplx_d((vsip_scalar_d)0,(vsip_scalar_d)0); /* register */ vsip_stride ast = (cast * a->stride); while(n-- > 0){ sum.r += *apr; sum.i += *api; apr += ast; api += ast; } sum.r /= a->length; sum.i /= a->length; return sum; } }
vsip_cscalar_d (vsip_cadd_d)( vsip_cscalar_d x, vsip_cscalar_d y) {/* x + y*/ return vsip_cmplx_d( (vsip_real_d(x) + vsip_real_d(y)), (vsip_imag_d(x) + vsip_imag_d(y))); }
vsip_cscalar_d (vsip_conj_d)(vsip_cscalar_d x) { /* conj(x) */ return vsip_cmplx_d(vsip_real_d(x), -(vsip_imag_d(x))); }
void vsip_ccorrelate1d_d( const vsip_ccorr1d_d *cor, vsip_bias bias, const vsip_cvview_d *h, const vsip_cvview_d *x, const vsip_cvview_d *y) { vsip_cvview_d xx = *cor->x, hh = *cor->h; vsip_cvview_d *xt = &xx, *ht = &hh; xt->length = cor->x->length - x->length; VI_cvfill_d(vsip_cmplx_d((vsip_scalar_d)0,(vsip_scalar_d)0),xt); xt->offset = xt->length; xt->length = x->length; VI_cvcopy_d_d(x,xt); xt->length = cor->x->length; xt->offset = 0; ht->length = cor->h->length - h->length; ht->offset = h->length; VI_cvfill_d(vsip_cmplx_d((vsip_scalar_d)0,(vsip_scalar_d)0),ht); ht->offset = 0; ht->length = h->length; VI_cvcopy_d_d(h,ht); vsip_ccfftip_d(cor->fft,cor->h); vsip_ccfftip_d(cor->fft,cor->x); vsip_cvjmul_d(cor->x,cor->h,cor->x); vsip_cvconj_d(cor->x,cor->x); vsip_rscvmul_d(1/(vsip_scalar_d)cor->N,cor->x,cor->x); vsip_ccfftip_d(cor->fft,cor->x); /* vsip_cvconj_d(cor->x,cor->x); */ switch(cor->support){ case VSIP_SUPPORT_FULL: xt->offset = xt->length - cor->mn; xt->length = y->length; if(bias == VSIP_UNBIASED){ VI_cvunbiasfull_d(cor,xt,y); } else { VI_cvcopy_d_d(xt,y); } break; case VSIP_SUPPORT_SAME: xt->offset = xt->length - cor->mn + (cor->m-1)/2; xt->length = y->length; if(bias == VSIP_UNBIASED){ VI_cvunbiassame_d(cor,xt,y); } else { VI_cvcopy_d_d(xt,y); } break; case VSIP_SUPPORT_MIN: xt->offset = xt->length - cor->mn + cor->m - 1; xt->length = y->length; if(bias == VSIP_UNBIASED){ vsip_rscvmul_d(1.0/(vsip_scalar_d)cor->m,xt,y); } else { VI_cvcopy_d_d(xt,y); } break; } return; }
/* note that matrix products may not be done in place */ void (vsip_cmprod_d)( const vsip_cmview_d* a, const vsip_cmview_d* b, const vsip_cmview_d* c) { /* Note that c => column major, r => row major */ /* Below decide input matrix majors ccc => 0 ccr=> 1, crc => 2, etc to rrr => 7 */ /* the resulting method is used to calculate */ unsigned method = (unsigned)(c->row_stride <= c->col_stride) + (unsigned)(b->row_stride <= b->col_stride) * 2 + (unsigned)(a->row_stride <= a->col_stride) * 4; /* get the stride info */ vsip_stride a_st_r = a->row_stride * a->block->cstride, a_st_c = a->col_stride * a->block->cstride, b_st_r = b->row_stride * b->block->cstride, b_st_c = b->col_stride * b->block->cstride, c_st_r = c->row_stride * c->block->cstride, c_st_c = c->col_stride * c->block->cstride; /* get the length info */ vsip_length a_c_l = a->col_length, /* i_size */ c_r_l = c->row_length, /* j_size */ a_r_l = a->row_length; /* k_size */ /* define some scalars used in the calculations below */ vsip_cscalar_d a_scalar, b_scalar, temp = vsip_cmplx_d(0.0f,0.0f); /* get the pointers to the input and output data spaces */ vsip_scalar_d *ap_r = (a->block->R->array) + a->offset * a->block->cstride, *ap_i = (a->block->I->array) + a->offset * a->block->cstride, *bp_r = (b->block->R->array) + b->offset * b->block->cstride, *bp_i = (b->block->I->array) + b->offset * b->block->cstride, *cp_r = (c->block->R->array) + c->offset * c->block->cstride, *cp_i = (c->block->I->array) + c->offset * c->block->cstride; /* some additional pointers to store initial data */ vsip_scalar_d *ap0_r = ap_r, *ap0_i = ap_i, *bp0_r = bp_r, *bp0_i = bp_i, *cp0_r = cp_r, *cp0_i = cp_i; vsip_scalar_d *bp1_r = bp_r, *bp1_i = bp_i; /* initialize output matrix to zero */ VI_cmfill_d(temp,c); /* initilize output matrix to zero */ /* select multiply routine bassed on majors of input matrices */ switch(method){ case 0 : { /* ccc */ vsip_scalar_d *ap_ik_r, *ap_0k_r = ap0_r, *ap_ik_i, *ap_0k_i = ap0_i; vsip_scalar_d *bp_kj_r, *bp_0j_r = bp0_r, *bp_kj_i, *bp_0j_i = bp0_i; vsip_scalar_d *cp_ij_r, *cp_0j_r = cp0_r, *cp_ij_i, *cp_0j_i = cp0_i; while(c_r_l-- > 0){ /* j */ cp_ij_r = cp_0j_r; cp_ij_i = cp_0j_i; bp_kj_r = bp_0j_r; bp_kj_i = bp_0j_i; while(a_r_l-- > 0){ /* k */ b_scalar.r = *bp_kj_r; b_scalar.i = *bp_kj_i; bp_kj_r += b_st_c; bp_kj_i += b_st_c; ap_ik_r = ap_0k_r; ap_ik_i = ap_0k_i; while(a_c_l-- > 0){ /* i */ *cp_ij_r = *ap_ik_r * b_scalar.r - *ap_ik_i * b_scalar.i + *cp_ij_r; *cp_ij_i = *ap_ik_r * b_scalar.i + *ap_ik_i * b_scalar.r + *cp_ij_i; cp_ij_r += c_st_c; cp_ij_i += c_st_c; ap_ik_r += a_st_c; ap_ik_i += a_st_c; } a_c_l = a->col_length; ap_0k_r += a_st_r; ap_0k_i += a_st_r; cp_ij_r = cp_0j_r; cp_ij_i = cp_0j_i; } a_r_l = a->row_length; cp_0j_r += c_st_r; cp_0j_i += c_st_r; bp_0j_r += b_st_r; bp_0j_i += b_st_r; ap_0k_r = ap0_r; ap_0k_i = ap0_i; } } break; case 1 : { /* ccr */ vsip_scalar_d *ap_ik_r, *ap_i0_r = ap0_r, *ap_ik_i, *ap_i0_i = ap0_i; vsip_scalar_d *bp_kj_r, *bp_0j_r, *bp_kj_i, *bp_0j_i; vsip_scalar_d *cp_ij_r, *cp_i0_r = cp0_r, *cp_ij_i, *cp_i0_i = cp0_i; while(a_c_l-- > 0){ /* i */ cp_ij_r = cp_i0_r; cp_ij_i = cp_i0_i; bp_0j_r = bp0_r; bp_0j_i = bp0_i; while(c_r_l-- > 0){ /* j */ ap_ik_r = ap_i0_r; ap_ik_i = ap_i0_i; temp = vsip_cmplx_d(0.0f,0.0f); bp_kj_r = bp_0j_r; bp_kj_i = bp_0j_i; while(a_r_l-- > 0){ /* k */ temp.r += *ap_ik_r * *bp_kj_r - *ap_ik_i * *bp_kj_i; temp.i += *ap_ik_r * *bp_kj_i + *ap_ik_i * *bp_kj_r; ap_ik_r += a_st_r; ap_ik_i += a_st_r; bp_kj_r += b_st_c; bp_kj_i += b_st_c; } a_r_l = a->row_length; *cp_ij_r = temp.r; *cp_ij_i = temp.i; cp_ij_r += c_st_r; cp_ij_i += c_st_r; bp_0j_r += b_st_r; bp_0j_i += b_st_r; } c_r_l = c->row_length; cp_i0_r += c_st_c; cp_i0_i += c_st_c; ap_i0_r += a_st_c; ap_i0_i += a_st_c; } } break; case 2 : { /* crc */ vsip_scalar_d *ap_ik_r, *ap_0k_r = ap0_r, *ap_ik_i, *ap_0k_i = ap0_i; vsip_scalar_d *bp_kj_r, *bp_0j_r = bp0_r, *bp_kj_i, *bp_0j_i = bp0_i; vsip_scalar_d *cp_ij_r, *cp_0j_r = cp0_r, *cp_ij_i, *cp_0j_i = cp0_i; while(c_r_l-- > 0){ /* j */ bp_kj_r = bp_0j_r; bp_kj_i = bp_0j_i; while(a_r_l-- > 0){ /* k */ b_scalar.r = *bp_kj_r; b_scalar.i = *bp_kj_i; ap_ik_r = ap_0k_r; ap_ik_i = ap_0k_i; cp_ij_r = cp_0j_r; cp_ij_i = cp_0j_i; while(a_c_l-- > 0){ /* i */ *cp_ij_r += *ap_ik_r * b_scalar.r - *ap_ik_i * b_scalar.i; *cp_ij_i += *ap_ik_i * b_scalar.r + *ap_ik_r * b_scalar.i; cp_ij_r += c_st_c; cp_ij_i += c_st_c; ap_ik_r += a_st_c; ap_ik_i += a_st_c; } a_c_l = a->col_length; ap_0k_r += a_st_r; ap_0k_i += a_st_r; bp_kj_r += b_st_c; bp_kj_i += b_st_c; } a_r_l = a->row_length; cp_0j_r += c_st_r; cp_0j_i += c_st_r; bp_0j_r += b_st_r; bp_0j_i += b_st_r; ap_0k_r = ap0_r; ap_0k_i = ap0_i; } } break; case 3 : { /* crr */ vsip_scalar_d *ap_ik_r, *ap_i0_r = ap0_r, *ap_ik_i, *ap_i0_i = ap0_i; vsip_scalar_d *bp_kj_r, *bp_k0_r = bp0_r, *bp_kj_i, *bp_k0_i = bp0_i; vsip_scalar_d *cp_ij_r, *cp_i0_r = cp0_r, *cp_ij_i, *cp_i0_i = cp0_i; while(a_c_l-- > 0){ /* i */ ap_ik_r = ap_i0_r; ap_ik_i = ap_i0_i; while(a_r_l-- > 0){ /* k */ a_scalar.r = *ap_ik_r; a_scalar.i = *ap_ik_i; ap_ik_r += a_st_r; ap_ik_i += a_st_r; bp_kj_r = bp_k0_r; bp_kj_i = bp_k0_i; cp_ij_r = cp_i0_r; cp_ij_i = cp_i0_i; while(c_r_l-- > 0){ /* j */ *cp_ij_r = *bp_kj_r * a_scalar.r - *bp_kj_i * a_scalar.i + *cp_ij_r; *cp_ij_i = *bp_kj_i * a_scalar.r + *bp_kj_r * a_scalar.i + *cp_ij_i; cp_ij_r += c_st_r; cp_ij_i += c_st_r; bp_kj_r += b_st_r; bp_kj_i += b_st_r; } c_r_l = c->row_length; bp_k0_r += b_st_c; bp_k0_i += b_st_c; }a_r_l = a->row_length; bp_k0_r = bp0_r; bp_k0_i = bp0_i; cp_i0_r += c_st_c; cp_i0_i += c_st_c; ap_i0_r += a_st_c; ap_i0_i += a_st_c; } } break; case 4 : { /* rcc */ while(a_c_l-- > 0){ while(c_r_l-- > 0){ temp = vsip_cmplx_d(0.0f,0.0f); while(a_r_l-- > 0){ temp.r += *ap_r * *bp_r - *ap_i * *bp_i; temp.i += *ap_r * *bp_i + *ap_i * *bp_r; ap_r += a_st_r; ap_i += a_st_r; bp_r += b_st_c; bp_i += b_st_c; } *cp_r = temp.r; *cp_i = temp.i; cp_r += c_st_r; cp_i += c_st_r; ap_r = ap0_r; ap_i = ap0_i; bp0_r += b_st_r; bp0_i += b_st_r; bp_r = bp0_r; bp_i = bp0_i; a_r_l = a->row_length; } ap0_r += a_st_c; ap0_i += a_st_c; ap_r = ap0_r; ap_i = ap0_i; bp0_r = bp1_r; bp0_i = bp1_i; bp_r = bp0_r; bp_i = bp0_i; cp0_r += c_st_c; cp0_i += c_st_c; cp_r = cp0_r; cp_i = cp0_i; c_r_l = c->row_length; } } break; case 5 : { /* rcr */ while(a_c_l-- > 0){ while(c_r_l-- > 0){ temp = vsip_cmplx_d(0.0f,0.0f); while(a_r_l-- > 0){ temp.r += *ap_r * *bp_r - *ap_i * *bp_i; temp.i += *ap_r * *bp_i + *ap_i * *bp_r; ap_r += a_st_r; ap_i += a_st_r; bp_r += b_st_c; bp_i += b_st_c; } *cp_r = temp.r; *cp_i = temp.i; cp_r += c_st_r; cp_i += c_st_r; ap_r = ap0_r; ap_i = ap0_i; bp0_r += b_st_r; bp_r = bp0_r; bp0_i += b_st_r; bp_i = bp0_i; a_r_l = a->row_length; } ap0_r += a_st_c; ap_r = ap0_r; ap0_i += a_st_c; ap_i = ap0_i; bp0_r = bp1_r; bp_r = bp0_r; bp0_i = bp1_i; bp_i = bp0_i; cp0_r += c_st_c; cp_r = cp0_r; cp0_i += c_st_c; cp_i = cp0_i; c_r_l = c->row_length; } } break; case 6 : { /* rrc */ vsip_scalar_d *ap_ik_r, *ap_i0_r, *ap_ik_i, *ap_i0_i; vsip_scalar_d *bp_kj_r, *bp_0j_r = bp0_r, *bp_kj_i, *bp_0j_i = bp0_i; vsip_scalar_d *cp_ij_r, *cp_0j_r = cp0_r, *cp_ij_i, *cp_0j_i = cp0_i; /* jik */ while(c_r_l-- > 0){ /* j */ cp_ij_r = cp_0j_r; cp_ij_i = cp_0j_i; ap_i0_r = ap0_r; ap_i0_i = ap0_i; while(a_c_l-- > 0){ /* i */ temp = vsip_cmplx_d(0.0f, 0.0f); ap_ik_r = ap_i0_r; ap_ik_i = ap_i0_i; bp_kj_r = bp_0j_r; bp_kj_i = bp_0j_i; while(a_r_l-- > 0){ /* k */ temp.r += *ap_ik_r * *bp_kj_r - *ap_ik_i * *bp_kj_i;; temp.i += *ap_ik_r * *bp_kj_i + *ap_ik_i * *bp_kj_r; ap_ik_r += a_st_r; ap_ik_i += a_st_r; bp_kj_r += b_st_c; bp_kj_i += b_st_c; } a_r_l = a->row_length; *cp_ij_r = temp.r; *cp_ij_i = temp.i; cp_ij_r +=c_st_c; cp_ij_i +=c_st_c; ap_i0_r += a_st_c; ap_i0_i += a_st_c; } a_c_l = a->col_length; cp_0j_r += c_st_r; cp_0j_i += c_st_r; bp_0j_r += b_st_r; bp_0j_i += b_st_r; } } break; case 7 : { /* rrr */ vsip_scalar_d *ap_ik_r, *ap_i0_r = ap0_r, *ap_ik_i, *ap_i0_i = ap0_i; vsip_scalar_d *bp_kj_r, *bp_k0_r = bp0_r, *bp_kj_i, *bp_k0_i = bp0_i; vsip_scalar_d *cp_ij_r, *cp_i0_r = cp0_r, *cp_ij_i, *cp_i0_i = cp0_i; while(a_c_l-- > 0){ /* i */ ap_ik_r = ap_i0_r; ap_ik_i = ap_i0_i; while(a_r_l-- > 0){ /* k */ a_scalar.r = *ap_ik_r; a_scalar.i = *ap_ik_i; ap_ik_r += a_st_r; ap_ik_i += a_st_r; bp_kj_r = bp_k0_r; bp_kj_i = bp_k0_i; cp_ij_r = cp_i0_r; cp_ij_i = cp_i0_i; while(c_r_l-- > 0){ /* j */ *cp_ij_r = *bp_kj_r * a_scalar.r - *bp_kj_i * a_scalar.i + *cp_ij_r; *cp_ij_i = *bp_kj_r * a_scalar.i + *bp_kj_i * a_scalar.r + *cp_ij_i; cp_ij_r += c_st_r; cp_ij_i += c_st_r; bp_kj_r += b_st_r; bp_kj_i += b_st_r; } c_r_l = c->row_length; bp_k0_r += b_st_c; bp_k0_i += b_st_c; }a_r_l = a->row_length; bp_k0_r = bp0_r; bp_k0_i = bp0_i; cp_i0_r += c_st_c; cp_i0_i += c_st_c; ap_i0_r += a_st_c; ap_i0_i += a_st_c; } } } }
void test_ccorr_d(vsip_support_region support, vsip_bias bias, vsip_length ref_size, vsip_length input_size) { vsip_length const n_loop = 3; vsip_length const output_size = ref_corr_output_size(support, ref_size, input_size); vsip_ccorr1d_d *corr = vsip_ccorr1d_create_d(ref_size, input_size, support, 0, VSIP_ALG_SPACE); vsip_ccorr1d_attr attr; vsip_ccorr1d_getattr_d(corr, &attr); test_assert(attr.support == support); test_assert(attr.ref_len == ref_size); test_assert(attr.data_len == input_size); test_assert(attr.lag_len == output_size); vsip_randstate *rand = vsip_randcreate(0, 1, 1, VSIP_PRNG); vsip_cvview_d *ref = vsip_cvcreate_d(ref_size, VSIP_MEM_NONE); vsip_cvview_d *in = vsip_cvcreate_d(input_size, VSIP_MEM_NONE); vsip_cvview_d *out = vsip_cvcreate_d(output_size, VSIP_MEM_NONE); vsip_cvfill_d(vsip_cmplx_d(100,0), out); vsip_cvview_d *chk = vsip_cvcreate_d(output_size, VSIP_MEM_NONE); vsip_cvfill_d(vsip_cmplx_d(101,0), chk); vsip_index loop; for (loop=0; loop<n_loop; ++loop) { if (loop == 0) { vsip_cvfill_d(vsip_cmplx_d(1,0), ref); vsip_cvramp_d(vsip_cmplx_d(0,0), vsip_cmplx_d(1,0), in); } else if (loop == 1) { vsip_cvrandu_d(rand, ref); vsip_cvramp_d(vsip_cmplx_d(0,0), vsip_cmplx_d(1,0), in); } else { vsip_cvrandu_d(rand, ref); vsip_cvrandu_d(rand, in); } vsip_ccorrelate1d_d(corr, bias, ref, in, out); ref_ccorr_d(bias, support, ref, in, chk); double error = cverror_db_d(out, chk); #if VERBOSE if (error > -100) { vsip_index i; for (i=0; i<output_size; ++i) { vsip_cscalar_d out_value = vsip_cvget_d(out, i); vsip_cscalar_d chk_value = vsip_cvget_d(chk, i); printf("%d : out = (%f, %f), chk = (%f, %f)\n", i, out_value.r, out_value.i, chk_value.r, chk_value.i); } printf("error = %f\n", error); } #endif test_assert(error < -100); } }
void ref_ccorr_d(vsip_bias bias, vsip_support_region sup, vsip_cvview_d const *ref, vsip_cvview_d const *in, vsip_cvview_d const *out) { vsip_length M = vsip_cvgetlength_d(ref); vsip_length N = vsip_cvgetlength_d(in); vsip_length P = vsip_cvgetlength_d(out); vsip_length expected_P = ref_corr_output_size(sup, M, N); vsip_stride shift = ref_expected_shift(sup, M); assert(expected_P == P); vsip_cvview_d *sub = vsip_cvcreate_d(M, VSIP_MEM_NONE); // compute correlation vsip_index i; for (i=0; i<P; ++i) { vsip_cvfill_d(vsip_cmplx_d(0,0), sub); vsip_stride pos = (vsip_stride)i + shift; double scale; if (pos < 0) { vsip_cvview_d *subsub = vsip_cvsubview_d(sub, -pos, M + pos); vsip_cvview_d *insub = vsip_cvsubview_d(in, 0, M + pos); vsip_cvcopy_d_d(insub, subsub); vsip_cvdestroy_d(subsub); vsip_cvdestroy_d(insub); scale = M + pos; } else if (pos + M > N) { vsip_cvview_d *subsub = vsip_cvsubview_d(sub, 0, N - pos); vsip_cvview_d *insub = vsip_cvsubview_d(in, pos, N - pos); vsip_cvcopy_d_d(insub, subsub); vsip_cvdestroy_d(subsub); vsip_cvdestroy_d(insub); scale = N - pos; } else { vsip_cvview_d *insub = vsip_cvsubview_d(in, pos, M); vsip_cvcopy_d_d(insub, sub); vsip_cvdestroy_d(insub); scale = M; } #if VSIP_IMPL_CORR_CORRECT_SAME_SUPPORT_SCALING #else if (sup == VSIP_SUPPORT_SAME) { if (i < (M/2)) scale = i + (M+1)/2; // i + ceil(M/2) else if (i < N - (M/2)) scale = M; // M else scale = N - 1 + (M+1)/2 - i; // N-1+ceil(M/2)-i } #endif vsip_cscalar_d val = vsip_cvjdot_d(ref, sub); if (bias == VSIP_UNBIASED) { val.r /= scale; val.i /= scale; } vsip_cvput_d(out, i, val); } }
vsip_cscalar_d (vsip_rcmul_d)( vsip_scalar_d x, vsip_cscalar_d y) { /* x*y */ return vsip_cmplx_d( x * vsip_real_d(y), x * vsip_imag_d(y)); }
int main(){vsip_init((void*)0); { vsip_cmview_d *Adummy = vsip_cmcreate_d(5*NN,5*NN,VSIP_COL,0); vsip_cmview_d *A = vsip_cmsubview_d(Adummy,3,2,NN,NN); /* vsip_cmview_d *A= vsip_cmcreate_d(NN,NN,VSIP_COL,0); */ vsip_cvview_d *x0 = vsip_cvcreate_d(NN,0); vsip_vview_d *x0_r = vsip_vrealview_d(x0); vsip_vview_d *x0_i = vsip_vimagview_d(x0); vsip_cmview_d *X = vsip_cmcreate_d(NN,3,VSIP_ROW,0); vsip_cmview_d *XT = vsip_cmcreate_d(NN,3,VSIP_COL,0); vsip_cmputrowstride_d(A,2*vsip_cmgetrowstride_d(A)); vsip_cmputcolstride_d(A,3*vsip_cmgetcolstride_d(A)); /* matrix data */ vsip_cmput_d(A,0,0,vsip_cmplx_d(0.5,0.1)); vsip_cmput_d(A,0,1,vsip_cmplx_d(7,0.1)); vsip_cmput_d(A,0,2,vsip_cmplx_d(10,0.1)); vsip_cmput_d(A,0,3,vsip_cmplx_d(12,0.1)); vsip_cmput_d(A,0,4,vsip_cmplx_d(-3,0.1)); vsip_cmput_d(A,0,5,vsip_cmplx_d(0,0.1)); vsip_cmput_d(A,0,6,vsip_cmplx_d(.05,0.1)); vsip_cmput_d(A,1,0,vsip_cmplx_d(2,0.1)); vsip_cmput_d(A,1,1,vsip_cmplx_d(13,0.1)); vsip_cmput_d(A,1,2,vsip_cmplx_d(18,0.1)); vsip_cmput_d(A,1,3,vsip_cmplx_d(6,0.1)); vsip_cmput_d(A,1,4,vsip_cmplx_d(0,0.1)); vsip_cmput_d(A,1,5,vsip_cmplx_d(130,0.1)); vsip_cmput_d(A,1,6,vsip_cmplx_d(8,0.1)); vsip_cmput_d(A,2,0,vsip_cmplx_d(3,0.1)); vsip_cmput_d(A,2,1,vsip_cmplx_d(-9,0.1)); vsip_cmput_d(A,2,2,vsip_cmplx_d(2,0.1)); vsip_cmput_d(A,2,3,vsip_cmplx_d(3,0.2)); vsip_cmput_d(A,2,4,vsip_cmplx_d(2,0.2)); vsip_cmput_d(A,2,5,vsip_cmplx_d(-9,0.2)); vsip_cmput_d(A,2,6,vsip_cmplx_d(6,0.2)); vsip_cmput_d(A,3,0,vsip_cmplx_d(4,0.2)); vsip_cmput_d(A,3,1,vsip_cmplx_d(2,0.2)); vsip_cmput_d(A,3,2,vsip_cmplx_d(2,0.2)); vsip_cmput_d(A,3,3,vsip_cmplx_d(4,0.2)); vsip_cmput_d(A,3,4,vsip_cmplx_d(1,0.2)); vsip_cmput_d(A,3,5,vsip_cmplx_d(2,0.2)); vsip_cmput_d(A,3,6,vsip_cmplx_d(3,0.2)); vsip_cmput_d(A,4,0,vsip_cmplx_d(.2,0.3)); vsip_cmput_d(A,4,1,vsip_cmplx_d(2,0.3)); vsip_cmput_d(A,4,2,vsip_cmplx_d(9,0.3)); vsip_cmput_d(A,4,3,vsip_cmplx_d(4,0.3)); vsip_cmput_d(A,4,4,vsip_cmplx_d(1,0.3)); vsip_cmput_d(A,4,5,vsip_cmplx_d(2,0.3)); vsip_cmput_d(A,4,6,vsip_cmplx_d(3,0.3)); vsip_cmput_d(A,5,0,vsip_cmplx_d(.1,0.4)); vsip_cmput_d(A,5,1,vsip_cmplx_d(2,0.4)); vsip_cmput_d(A,5,2,vsip_cmplx_d(.3,0.4)); vsip_cmput_d(A,5,3,vsip_cmplx_d(4,0.4)); vsip_cmput_d(A,5,4,vsip_cmplx_d(1,0.4)); vsip_cmput_d(A,5,5,vsip_cmplx_d(2,0.4)); vsip_cmput_d(A,5,6,vsip_cmplx_d(3,0.4)); vsip_cmput_d(A,6,0,vsip_cmplx_d(.01,0.4)); vsip_cmput_d(A,6,1,vsip_cmplx_d(.2,0.4)); vsip_cmput_d(A,6,2,vsip_cmplx_d(3,0.4)); vsip_cmput_d(A,6,3,vsip_cmplx_d(4,0.4)); vsip_cmput_d(A,6,4,vsip_cmplx_d(1,0.4)); vsip_cmput_d(A,6,5,vsip_cmplx_d(2,0.4)); vsip_cmput_d(A,6,6,vsip_cmplx_d(3,0.4)); { /* were solving for NTRANS Ax = B */ /* use a known X, calculate B using Ax */ int k; vsip_cvview_d *x; vsip_cmview_d *AT = vsip_cmcreate_d(NN,NN,VSIP_ROW,VSIP_MEM_NONE); vsip_length L = vsip_cmgetrowlength_d(X); vsip_cmherm_d(A,AT); printf("A = "); VU_cmprintm_d("7.4",A); printf("AT = "); VU_cmprintm_d("7.4",AT); vsip_vramp_d(1,1,x0_r); vsip_vramp_d(1,-1,x0_i); for(k=0; k<L; k++){ x = vsip_cmcolview_d(X,k); vsip_cmvprod_d(A,x0,x); vsip_rscvmul_d(2.0,x0,x0); vsip_cvdestroy_d(x); } vsip_vramp_d(1,1,x0_r); vsip_vramp_d(1,-1,x0_i); for(k=0; k<L; k++){ x = vsip_cmcolview_d(XT,k); vsip_cmvprod_d(AT,x0,x); VU_cvprintm_d("7.4",x0); vsip_rscvmul_d(2.0,x0,x0); vsip_cvdestroy_d(x); } vsip_cmalldestroy_d(AT); printf("B = "); VU_cmprintm_d("7.4",X); printf("BT = "); VU_cmprintm_d("7.4",XT); { /* then solve to see if we get X back */ vsip_clu_d* luAop = vsip_clud_create_d(NN); if(luAop == NULL) exit(1); vsip_clud_d(luAop,A); { vsip_clu_attr_d attr; vsip_clud_getattr_d(luAop,&attr); printf("lud size %lu\n",attr.n); } vsip_clusol_d(luAop,VSIP_MAT_NTRANS,X); vsip_clusol_d(luAop,VSIP_MAT_HERM,XT); vsip_clud_destroy_d(luAop); } } printf("A\\X = "); VU_cmprintm_d("9.6",X); printf("A'\\X = "); VU_cmprintm_d("9.6",XT); { vsip_vdestroy_d(x0_r);vsip_vdestroy_d(x0_i); vsip_cvalldestroy_d(x0); vsip_cmalldestroy_d(X); vsip_cmalldestroy_d(A); } }vsip_finalize((void*)0);return 1; }
void (vsip_cvouter_d)( vsip_cscalar_d alpha, const vsip_cvview_d* a, const vsip_cvview_d* b, const vsip_cmview_d* R) { /* R_ij = a_i * conj(b_j) */ if((a == b) && (alpha.i == 0)){ /* covariance matrix with real multiplier */ vsip_length n = a->length; vsip_stride cRst = R->block->cstride, cast = a->block->cstride; vsip_scalar_d *a_pr = (vsip_scalar_d*)(a->block->R->array + cast * a->offset), *a_pi = (vsip_scalar_d*)(a->block->I->array + cast * a->offset); vsip_length i,j; vsip_stride stRu = cRst * R->row_stride, /* upper stride (above diagonal) */ stRl = cRst * R->col_stride, /* lower stride (below diagonal) */ sta = cast * a->stride; /* stride of a (and b) */ vsip_offset Ro = cRst * R->offset, Rdiag = cRst * (R->col_stride + R->row_stride); vsip_scalar_d *R_pr = (vsip_scalar_d*)(R->block->R->array + Ro), *R_pi = (vsip_scalar_d*)(R->block->I->array + Ro); for(i=0; i<n; i++){ /* prep for next loop */ vsip_scalar_d *R_plr = R_pr + stRl, /* pointer to lower real */ *R_pli = R_pi + stRl, /* pointer to lower imaginary */ *R_pur = R_pr + stRu, /* pointer to upper real */ *R_pui = R_pi + stRu; /* pointer to upper imaginary */ vsip_scalar_d atmp_r = *a_pr * alpha.r; vsip_scalar_d atmp_i = *a_pi * alpha.r; vsip_scalar_d *b_pr = a_pr + sta, *b_pi = a_pi + sta; /* do diagonal member */ *(R_pr) = (atmp_r * *a_pr + atmp_i * *a_pi); *(R_pi) = 0; R_pr += Rdiag; R_pi += Rdiag; for(j=i+1; j<n; j++){ /* do other members, along row for upper, column for lower */ vsip_scalar_d tmp = (atmp_r * *b_pr + atmp_i * *b_pi); *R_plr = tmp; *R_pur = tmp; tmp = (atmp_i * *b_pr - atmp_r * *b_pi); *R_pli = -tmp; *R_pui = tmp; R_plr += stRl; R_pli += stRl; R_pur += stRu; R_pui += stRu; b_pr += sta; b_pi += sta; } a_pr += sta, a_pi += sta; } } else { vsip_length n = a->length, m = b->length; vsip_stride cRst = R->block->cstride, cast = a->block->cstride, cbst = b->block->cstride; vsip_scalar_d *a_pr = (vsip_scalar_d*)(a->block->R->array + cast * a->offset), *a_pi = (vsip_scalar_d*)(a->block->I->array + cast * a->offset); vsip_length i,j; vsip_stride stR = cRst * R->row_stride, sta = cast * a->stride, stb = cbst * b->stride; vsip_offset Ro = cRst * R->offset, Rco = cRst * R->col_stride, bo = cbst * b->offset; for(i=0; i<n; i++){ vsip_scalar_d *R_pr = (vsip_scalar_d*)(R->block->R->array + Ro + i * Rco), *R_pi = (vsip_scalar_d*)(R->block->I->array + Ro + i * Rco), *b_pr = (vsip_scalar_d*)(b->block->R->array + bo), *b_pi = (vsip_scalar_d*)(b->block->I->array + bo); vsip_cscalar_d temp = vsip_cmul_d(alpha,vsip_cmplx_d(*a_pr,*a_pi)); for(j=0; j<m; j++){ *R_pr = (temp.r * *b_pr + temp.i * *b_pi); *R_pi = (temp.i * *b_pr - temp.r * *b_pi); R_pr += stR; b_pr += stb; R_pi += stR; b_pi += stb; } a_pr += sta; a_pi += sta; } } }
vsip_cscalar_d vsip_crandn_d( vsip_randstate *state) { vsip_scalar_d real,imag; if(state->type) { /* nonportable generator */ vsip_scalar_ue32 a = state->a, c = state->c, X = state->X; vsip_scalar_d t2; X = a * X + c; real = (vsip_scalar_d)X/4294967296.0; X = a * X + c; real += (vsip_scalar_d)X/4294967296.0; X = a * X + c; real += (vsip_scalar_d)X/4294967296.0; X = a * X + c; t2 = (vsip_scalar_d)X/4294967296.0; X = a * X + c; t2 += (vsip_scalar_d)X/4294967296.0; X = a * X + c; t2 += (vsip_scalar_d)X/4294967296.0; imag = real - t2; real = 3 - t2 - real; state->X = X; } else { /* portable generator */ vsip_scalar_ue32 itemp; vsip_scalar_d t2; state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } real = (vsip_scalar_d)itemp/4294967296.0; state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } real += (vsip_scalar_d)itemp/4294967296.0; state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } real += (vsip_scalar_d)itemp/4294967296.0; /* end t1 */ state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } t2 = (vsip_scalar_d)itemp/4294967296.0; state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } t2 += (vsip_scalar_d)itemp/4294967296.0; state->X = state->X * state->a + state->c; state->X1 = state->X1 * state->a1 + state->c1; itemp = state->X - state->X1; if(state->X1 == state->X2){ state->X1++; state->X2++; } t2 += (vsip_scalar_d)itemp/4294967296.0; /* end t2 */ imag = real - t2; real = 3 - t2 - real; } return vsip_cmplx_d(real,imag); }
vsip_cscalar_d vsip_rect_d( vsip_scalar_d r, vsip_scalar_d t) { return vsip_cmplx_d(r * VSIP_COS_D(t), r * VSIP_SIN_D(t)); }
vsip_cfir_d *vsip_cfir_create_d( const vsip_cvview_d *kernel, vsip_symmetry symm, vsip_length N, vsip_length D, vsip_obj_state state, unsigned int ntimes, vsip_alg_hint hint) { vsip_length klength = 0; vsip_cfir_d *fir = (vsip_cfir_d*)malloc(sizeof(vsip_cfir_d)); if(fir == NULL){ return (vsip_cfir_d*) NULL; } else { switch(symm){ case 0: klength = kernel->length; break; case 1: klength = 2 * kernel->length - 1; break; case 2: klength = 2 * kernel->length; break; } fir->h = vsip_cvcreate_d(klength,VSIP_MEM_NONE); fir->s = vsip_cvcreate_d(klength-1,VSIP_MEM_NONE); } if((fir->h == NULL) | (fir->s == NULL)){ vsip_cfir_destroy_d(fir); return (vsip_cfir_d*) NULL; } else { fir->N = N; fir->M = klength; fir->D = D; fir->p = 0; fir->ntimes = ntimes; fir->symm = symm; fir->hint = hint; switch(symm){ case 0:{ fir->h->offset = fir->h->length - 1; fir->h->stride = -1; VI_cvcopy_d_d(kernel,fir->h); } break; case 1: { fir->h->length = kernel->length; vsip_cvconj_d(kernel,fir->h); fir->h->offset = klength - 1; fir->h->stride = -1; VI_cvcopy_d_d(kernel,fir->h); fir->h->length = klength; } break; case 2: { fir->h->length = kernel->length; vsip_cvconj_d(kernel,fir->h); fir->h->offset = klength - 1; fir->h->stride = -1; VI_cvcopy_d_d(kernel,fir->h); fir->h->length = klength; } break; } fir->state = state; fir->h->offset = 0; fir->h->stride = 1; VI_cvfill_d(vsip_cmplx_d((vsip_scalar_d)0,(vsip_scalar_d)0),fir->s); return fir; } }