static PetscErrorCode gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, PetscInt step) { PetscInt *num, *map, **reduce; PetscScalar *base; PetscFunctionBegin; num = gs->num_local_reduce; reduce = gs->local_reduce; while ((map = *reduce)) { base = vals + map[0] * step; /* wall */ if (*num == 2) { num++; reduce++; rvec_add (base,vals+map[1]*step,step); rvec_copy(vals+map[1]*step,base,step); } /* corner shared by three elements */ else if (*num == 3) { num++; reduce++; rvec_add (base,vals+map[1]*step,step); rvec_add (base,vals+map[2]*step,step); rvec_copy(vals+map[2]*step,base,step); rvec_copy(vals+map[1]*step,base,step); } /* corner shared by four elements */ else if (*num == 4) { num++; reduce++; rvec_add (base,vals+map[1]*step,step); rvec_add (base,vals+map[2]*step,step); rvec_add (base,vals+map[3]*step,step); rvec_copy(vals+map[3]*step,base,step); rvec_copy(vals+map[2]*step,base,step); rvec_copy(vals+map[1]*step,base,step); } /* general case ... odd geoms ... 3D */ else { num++; while (*++map >= 0) {rvec_add (base,vals+*map*step,step);} map = *reduce; while (*++map >= 0) {rvec_copy(vals+*map*step,base,step);} reduce++; } } PetscFunctionReturn(0); }
/** @brief ハウスホルダー・ベクトルへの変換. @details h[0]=0; ...; h[k-1]=0; h[k]=-s*xi; h[k+1]=x[k+1]; ...; h[n-1]=x[n-1]; @param[in] h 初期化済みのベクトル.サイズはn. @param[in] x 初期化済みのベクトル.サイズはn. @param[in] n ベクトルのサイズ. @param[in] k 第k要素が基準. @param[out] h ハウスホルダー・ベクトル. */ void rhouseholder_vec(int n, int k, rmulti **h, rmulti *alpha, rmulti **x) { int p0,p1,prec; rmulti *eta=NULL,*zeta=NULL,*xi=NULL,*axk=NULL; // allocate p0=rget_prec(alpha); p1=rvec_get_prec_max(n,h); prec=MAX2(p0,p1); eta=rallocate_prec(prec); zeta=rallocate_prec(prec); xi=rallocate_prec(prec); axk=rallocate_prec(prec); //----------- norm rvec_sum_pow2(xi,n-k-1,&x[k+1]); // xi=sum(abs(x((k+1):end)).^2); rmul(axk,x[k],x[k]); // axk=|x[k]|^2 radd(eta,axk,xi); // eta=|x[k]|^2+... rsqrt(axk,axk); // axk=|x[k]| rsqrt(eta,eta); // eta=sqrt(|x[k]|^2+...) if(req_d(eta,0)){rsub(xi,eta,axk);} // xi=eta-|x(k)| else{ // xi=xi/(|x(k)|+eta) radd(zeta,axk,eta); rdiv(xi,xi,zeta); } //----------- h rvec_set_zeros(k,h); rvec_copy(n-k-1,&h[k+1],&x[k+1]); // h((k+1):end)=x((k+1):end); if(ris_zero(x[k])){ rcopy(h[k],xi); rneg(h[k],h[k]); // h[k]=-xi }else{ rdiv(zeta,xi,axk); rneg(zeta,zeta); // zeta=-xi/axk; rmul(h[k],x[k],zeta); // h[k]=zeta*x[k]; } //----------- alpha if(req_d(xi,0) || req_d(eta,0)){ rset_d(alpha,0); }else{ rmul(alpha,xi,eta); // alpha=1/(xi*eta) rinv(alpha,alpha); } // free eta=rfree(eta); zeta=rfree(zeta); xi=rfree(xi); axk=rfree(axk); }
/** @brief ハウスホルダー変換によるQR分解の変則版. @param[in] n ベクトルxのサイズ.ベクトルhのサイズ. @param[in] k0 Hの最初のハウスホルダー・ベクトルの基準は第k0要素. @param[in] nH 配列Alphaのサイズ.行列Hの列の個数. @param[in] k 第k要素が基準. @param[in] H ハウスホルダー・ベクトルの格納用の行列.サイズは(n,nH). @param[in] Alpha ハウスホルダー・ベクトルの規格化定数の格納用の配列.サイズはnH. @param[in] x 変換されるx. @param[out] h 生成されたハウスホルダー・ベクトル. @param[out] alpha 生成されたハウスホルダー・ベクトルの規格化定数. */ void rhouseholder(int n, int k0, int nH, int k, rmulti **h, rmulti *alpha, rmulti **H, int LDH, rmulti **Alpha, rmulti **x) { int j,l,p0,p1,prec; rmulti *value=NULL,**R=NULL; // allocate p0=rget_prec(alpha); p1=rvec_get_prec_max(n,h); prec=MAX2(p0,p1); value=rallocate_prec(prec); R=rvec_allocate_prec(n,prec); rvec_copy(n,R,x); // R=(I-alpha*h*h')*R=R-alpha*h*(h'*R) for(j=0; j<nH; j++){ // R=R-alpha[j]*(H(:,j)'*R)*H(:,j) l=k0+j; rvec_sum_mul(value,n-l,&MAT(H,l,j,LDH),&R[l]); rmul(value,value,Alpha[j]); rvec_sub_mul_r(n-l,&R[l],&MAT(H,l,j,LDH),value); } rhouseholder_vec(n,k,h,alpha,R); // free value=rfree(value); R=rvec_free(n,R); }
static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, PetscInt step) { PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; PetscInt *pw, *list, *size, **nodes; MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; MPI_Status status; PetscBLASInt i1 = 1,dstep; PetscErrorCode ierr; PetscFunctionBegin; /* strip and load s */ msg_list =list = gs->pair_list; msg_size =size = gs->msg_sizes; msg_nodes=nodes = gs->node_list; iptr=pw = gs->pw_elm_list; dptr1=dptr3 = gs->pw_vals; msg_ids_in = ids_in = gs->msg_ids_in; msg_ids_out = ids_out = gs->msg_ids_out; dptr2 = gs->out; in1=in2 = gs->in; /* post the receives */ /* msg_nodes=nodes; */ do { /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the second one *list and do list++ afterwards */ ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr); list++;msg_ids_in++; in1 += *size++ *step; } while (*++msg_nodes); msg_nodes=nodes; /* load gs values into in out gs buffers */ while (*iptr >= 0) { rvec_copy(dptr3,in_vals + *iptr*step,step); dptr3+=step; iptr++; } /* load out buffers and post the sends */ while ((iptr = *msg_nodes++)) { dptr3 = dptr2; while (*iptr >= 0) { rvec_copy(dptr2,dptr1 + *iptr*step,step); dptr2+=step; iptr++; } ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr); msg_size++; msg_list++;msg_ids_out++; } /* tree */ if (gs->max_left_over) {gs_gop_vec_tree_plus(gs,in_vals,step);} /* process the received data */ msg_nodes=nodes; while ((iptr = *nodes++)){ PetscScalar d1 = 1.0; /* Should I check the return value of MPI_Wait() or status? */ /* Can this loop be replaced by a call to MPI_Waitall()? */ ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); ids_in++; while (*iptr >= 0) { dstep = PetscBLASIntCast(step); BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1); in2+=step; iptr++; } } /* replace vals */ while (*pw >= 0) { rvec_copy(in_vals + *pw*step,dptr1,step); dptr1+=step; pw++; } /* clear isend message handles */ /* This changed for clarity though it could be the same */ while (*msg_nodes++) /* Should I check the return value of MPI_Wait() or status? */ /* Can this loop be replaced by a call to MPI_Waitall()? */ {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;} PetscFunctionReturn(0); }