void VM_VALUE_HASH_IMP_free( VCONTEXT *ctx, VM_VALUE_HASH_IMP *imp) { size_t i, j; for(i = 0; i<imp->buckets_count; i++) { VM_VALUE_HASH_BUCKET *cur, *next; cur = imp->buckets[ i ]; while( cur ) { for(j = 0; j < VM_VALUE_HASH_ENTRIES_PER_BUCKET; j++ ) { if (cur->entry[ j ].hash) { VM_OBJ_HEADER_release( ctx, cur->entry[ j ].key ); VM_OBJ_HEADER_release( ctx, cur->entry[ j ].value ); } } next = cur->next; V_FREE( ctx, cur ); cur = next; } } V_FREE( ctx, imp->buckets ); }
MAT *bifactor(MAT *A, MAT *U, MAT *V) #endif { int k; STATIC VEC *tmp1=VNULL, *tmp2=VNULL, *w=VNULL; Real beta; if ( ! A ) error(E_NULL,"bifactor"); if ( ( U && ( U->m != U->n ) ) || ( V && ( V->m != V->n ) ) ) error(E_SQUARE,"bifactor"); if ( ( U && U->m != A->m ) || ( V && V->m != A->n ) ) error(E_SIZES,"bifactor"); tmp1 = v_resize(tmp1,A->m); tmp2 = v_resize(tmp2,A->n); w = v_resize(w, max(A->m,A->n)); MEM_STAT_REG(tmp1,TYPE_VEC); MEM_STAT_REG(tmp2,TYPE_VEC); MEM_STAT_REG(w, TYPE_VEC); if ( A->m >= A->n ) for ( k = 0; k < A->n; k++ ) { get_col(A,k,tmp1); hhvec(tmp1,k,&beta,tmp1,&(A->me[k][k])); _hhtrcols(A,k,k+1,tmp1,beta,w); if ( U ) _hhtrcols(U,k,0,tmp1,beta,w); if ( k+1 >= A->n ) continue; get_row(A,k,tmp2); hhvec(tmp2,k+1,&beta,tmp2,&(A->me[k][k+1])); hhtrrows(A,k+1,k+1,tmp2,beta); if ( V ) _hhtrcols(V,k+1,0,tmp2,beta,w); } else for ( k = 0; k < A->m; k++ ) { get_row(A,k,tmp2); hhvec(tmp2,k,&beta,tmp2,&(A->me[k][k])); hhtrrows(A,k+1,k,tmp2,beta); if ( V ) _hhtrcols(V,k,0,tmp2,beta,w); if ( k+1 >= A->m ) continue; get_col(A,k,tmp1); hhvec(tmp1,k+1,&beta,tmp1,&(A->me[k+1][k])); _hhtrcols(A,k+1,k+1,tmp1,beta,w); if ( U ) _hhtrcols(U,k+1,0,tmp1,beta,w); } #ifdef THREADSAFE V_FREE(tmp1); V_FREE(tmp2); #endif return A; }
MAT *m_inverse(const MAT *A, MAT *out) { unsigned int i; char MatrixTempBuffer[ 4000 ]; VEC *tmp = VNULL, *tmp2 = VNULL; MAT *A_cp = MNULL; PERM *pivot = PNULL; if ( ! A ) error(E_NULL,"m_inverse"); if ( A->m != A->n ) error(E_SQUARE,"m_inverse"); if ( ! out || out->m < A->m || out->n < A->n ) out = m_resize(out,A->m,A->n); if( SET_MAT_SIZE( A->m, A->n ) < 1000 ) mat_get( &A_cp, (void *)( MatrixTempBuffer + 2000 ), A->m, A->n ); else A_cp = matrix_get( A->m, A->n ); A_cp = m_copy( A, A_cp ); if( SET_VEC_SIZE( A->m ) < 1000 ) { vec_get( &tmp, (void *)MatrixTempBuffer, A->m ); vec_get( &tmp2, (void *)(MatrixTempBuffer + 1000), A->m ); } else { tmp = v_get( A->m ); tmp2 = v_get( A->m ); } if( SET_PERM_SIZE( A->m ) < 1000 ) { perm_get( &pivot, (void *)( MatrixTempBuffer + 3000 ), A->m ); } else { pivot = px_get( A->m ); } LUfactor(A_cp,pivot); //tracecatch_matrix(LUfactor(A_cp,pivot),"m_inverse"); for ( i = 0; i < A->n; i++ ){ v_zero(tmp); tmp->ve[i] = 1.0; LUsolve(A_cp,pivot,tmp,tmp2); //tracecatch_matrix(LUsolve(A_cp,pivot,tmp,tmp2),"m_inverse"); set_col(out,i,tmp2); } if( tmp != (VEC *)(MatrixTempBuffer ) ) // память выделялась, надо освободить V_FREE(tmp); if( tmp2 != (VEC *)(MatrixTempBuffer + 1000) ) // память выделялась, надо освободить V_FREE(tmp2); if( A_cp != (MAT *)(MatrixTempBuffer + 2000) ) // память выделялась, надо освободить M_FREE(A_cp); if( pivot != (PERM *)(MatrixTempBuffer + 3000) ) // память выделялась, надо освободить PX_FREE( pivot ); return out; }
MAT *makeQ(const MAT *QR,const VEC *diag, MAT *Qout) #endif { STATIC VEC *tmp1=VNULL,*tmp2=VNULL; unsigned int i, limit; Real beta, r_ii, tmp_val; int j; limit = min(QR->m,QR->n); if ( ! QR || ! diag ) error(E_NULL,"makeQ"); if ( diag->dim < limit ) error(E_SIZES,"makeQ"); if ( Qout==(MAT *)NULL || Qout->m < QR->m || Qout->n < QR->m ) Qout = m_get(QR->m,QR->m); tmp1 = v_resize(tmp1,QR->m); /* contains basis vec & columns of Q */ tmp2 = v_resize(tmp2,QR->m); /* contains H/holder vectors */ MEM_STAT_REG(tmp1,TYPE_VEC); MEM_STAT_REG(tmp2,TYPE_VEC); for ( i=0; i<QR->m ; i++ ) { /* get i-th column of Q */ /* set up tmp1 as i-th basis vector */ for ( j=0; j<QR->m ; j++ ) tmp1->ve[j] = 0.0; tmp1->ve[i] = 1.0; /* apply H/h transforms in reverse order */ for ( j=limit-1; j>=0; j-- ) { get_col(QR,j,tmp2); r_ii = fabs(tmp2->ve[j]); tmp2->ve[j] = diag->ve[j]; tmp_val = (r_ii*fabs(diag->ve[j])); beta = ( tmp_val == 0.0 ) ? 0.0 : 1.0/tmp_val; /* hhtrvec(tmp2,beta->ve[j],j,tmp1,tmp1); */ hhtrvec(tmp2,beta,j,tmp1,tmp1); } /* insert into Q */ set_col(Qout,i,tmp1); } #ifdef THREADSAFE V_FREE(tmp1); V_FREE(tmp2); #endif return (Qout); }
MAT *makeHQ(MAT *H, VEC *diag, VEC *beta, MAT *Qout) #endif { int i, j, limit; STATIC VEC *tmp1 = VNULL, *tmp2 = VNULL; if ( H==(MAT *)NULL || diag==(VEC *)NULL || beta==(VEC *)NULL ) error(E_NULL,"makeHQ"); limit = H->m - 1; if ( diag->dim < limit || beta->dim < limit ) error(E_SIZES,"makeHQ"); if ( H->m != H->n ) error(E_SQUARE,"makeHQ"); Qout = m_resize(Qout,H->m,H->m); tmp1 = v_resize(tmp1,H->m); tmp2 = v_resize(tmp2,H->m); MEM_STAT_REG(tmp1,TYPE_VEC); MEM_STAT_REG(tmp2,TYPE_VEC); for ( i = 0; i < H->m; i++ ) { /* tmp1 = i'th basis vector */ for ( j = 0; j < H->m; j++ ) /* tmp1->ve[j] = 0.0; */ v_set_val(tmp1,j,0.0); /* tmp1->ve[i] = 1.0; */ v_set_val(tmp1,i,1.0); /* apply H/h transforms in reverse order */ for ( j = limit-1; j >= 0; j-- ) { get_col(H,(unsigned int)j,tmp2); /* tmp2->ve[j+1] = diag->ve[j]; */ v_set_val(tmp2,j+1,v_entry(diag,j)); hhtrvec(tmp2,beta->ve[j],j+1,tmp1,tmp1); } /* insert into Qout */ set_col(Qout,(unsigned int)i,tmp1); } #ifdef THREADSAFE V_FREE(tmp1); V_FREE(tmp2); #endif return (Qout); }
VEC *QRsolve(const MAT *QR, const VEC *diag, const VEC *b, VEC *x) #endif { int limit; STATIC VEC *tmp = VNULL; if ( ! QR || ! diag || ! b ) error(E_NULL,"QRsolve"); limit = min(QR->m,QR->n); if ( diag->dim < limit || b->dim != QR->m ) error(E_SIZES,"QRsolve"); tmp = v_resize(tmp,limit); MEM_STAT_REG(tmp,TYPE_VEC); x = v_resize(x,QR->n); _Qsolve(QR,diag,b,x,tmp); x = Usolve(QR,x,x,0.0); v_resize(x,QR->n); #ifdef THREADSAFE V_FREE(tmp); #endif return x; }
MAT *hhtrcols(MAT *M, unsigned int i0, unsigned int j0, const VEC *hh, double beta) #endif { STATIC VEC *w = VNULL; if ( M == MNULL || hh == VNULL || w == VNULL ) error(E_NULL,"hhtrcols"); if ( M->m != hh->dim ) error(E_SIZES,"hhtrcols"); if ( i0 > M->m || j0 > M->n ) error(E_BOUNDS,"hhtrcols"); if ( ! w || w->dim < M->n ) w = v_resize(w,M->n); MEM_STAT_REG(w,TYPE_VEC); M = _hhtrcols(M,i0,j0,hh,beta,w); #ifdef THREADSAFE V_FREE(w); #endif return M; }
MAT *Hfactor(MAT *A, VEC *diag, VEC *beta) #endif { STATIC VEC *hh = VNULL, *w = VNULL; int k, limit; if ( ! A || ! diag || ! beta ) error(E_NULL,"Hfactor"); if ( diag->dim < A->m - 1 || beta->dim < A->m - 1 ) error(E_SIZES,"Hfactor"); if ( A->m != A->n ) error(E_SQUARE,"Hfactor"); limit = A->m - 1; hh = v_resize(hh,A->m); w = v_resize(w,A->n); MEM_STAT_REG(hh,TYPE_VEC); MEM_STAT_REG(w, TYPE_VEC); for ( k = 0; k < limit; k++ ) { /* compute the Householder vector hh */ get_col(A,(unsigned int)k,hh); /* printf("the %d'th column = "); v_output(hh); */ hhvec(hh,k+1,&beta->ve[k],hh,&A->me[k+1][k]); /* diag->ve[k] = hh->ve[k+1]; */ v_set_val(diag,k,v_entry(hh,k+1)); /* printf("H/h vector = "); v_output(hh); */ /* printf("from the %d'th entry\n",k+1); */ /* printf("beta = %g\n",beta->ve[k]); */ /* apply Householder operation symmetrically to A */ _hhtrcols(A,k+1,k+1,hh,v_entry(beta,k),w); hhtrrows(A,0 ,k+1,hh,v_entry(beta,k)); /* printf("A = "); m_output(A); */ } #ifdef THREADSAFE V_FREE(hh); V_FREE(w); #endif return (A); }
VEC *svd(MAT *A, MAT *U, MAT *V, VEC *d) #endif { STATIC VEC *f=VNULL; int i, limit; MAT *A_tmp; if ( ! A ) error(E_NULL,"svd"); if ( ( U && ( U->m != U->n ) ) || ( V && ( V->m != V->n ) ) ) error(E_SQUARE,"svd"); if ( ( U && U->m != A->m ) || ( V && V->m != A->n ) ) error(E_SIZES,"svd"); A_tmp = m_copy(A,MNULL); if ( U != MNULL ) m_ident(U); if ( V != MNULL ) m_ident(V); limit = min(A_tmp->m,A_tmp->n); d = v_resize(d,limit); f = v_resize(f,limit-1); MEM_STAT_REG(f,TYPE_VEC); bifactor(A_tmp,U,V); if ( A_tmp->m >= A_tmp->n ) for ( i = 0; i < limit; i++ ) { d->ve[i] = A_tmp->me[i][i]; if ( i+1 < limit ) f->ve[i] = A_tmp->me[i][i+1]; } else for ( i = 0; i < limit; i++ ) { d->ve[i] = A_tmp->me[i][i]; if ( i+1 < limit ) f->ve[i] = A_tmp->me[i+1][i]; } if ( A_tmp->m >= A_tmp->n ) bisvd(d,f,U,V); else bisvd(d,f,V,U); M_FREE(A_tmp); #ifdef THREADSAFE V_FREE(f); #endif return d; }
MAT *m_inverse(const MAT *A, MAT *out) #endif { int i; STATIC VEC *tmp = VNULL, *tmp2 = VNULL; STATIC MAT *A_cp = MNULL; STATIC PERM *pivot = PNULL; if ( ! A ) error(E_NULL,"m_inverse"); if ( A->m != A->n ) error(E_SQUARE,"m_inverse"); if ( ! out || out->m < A->m || out->n < A->n ) out = m_resize(out,A->m,A->n); A_cp = m_resize(A_cp,A->m,A->n); A_cp = m_copy(A,A_cp); tmp = v_resize(tmp,A->m); tmp2 = v_resize(tmp2,A->m); pivot = px_resize(pivot,A->m); MEM_STAT_REG(A_cp,TYPE_MAT); MEM_STAT_REG(tmp, TYPE_VEC); MEM_STAT_REG(tmp2,TYPE_VEC); MEM_STAT_REG(pivot,TYPE_PERM); tracecatch(LUfactor(A_cp,pivot),"m_inverse"); for ( i = 0; i < A->n; i++ ) { v_zero(tmp); tmp->ve[i] = 1.0; tracecatch(LUsolve(A_cp,pivot,tmp,tmp2),"m_inverse"); set_col(out,i,tmp2); } #ifdef THREADSAFE V_FREE(tmp); V_FREE(tmp2); M_FREE(A_cp); PX_FREE(pivot); #endif return out; }
MAT *QRfactor(MAT *A, VEC *diag) #endif { unsigned int k,limit; Real beta; STATIC VEC *hh=VNULL, *w=VNULL; if ( ! A || ! diag ) error(E_NULL,"QRfactor"); limit = min(A->m,A->n); if ( diag->dim < limit ) error(E_SIZES,"QRfactor"); hh = v_resize(hh,A->m); w = v_resize(w, A->n); MEM_STAT_REG(hh,TYPE_VEC); MEM_STAT_REG(w, TYPE_VEC); for ( k=0; k<limit; k++ ) { /* get H/holder vector for the k-th column */ get_col(A,k,hh); /* hhvec(hh,k,&beta->ve[k],hh,&A->me[k][k]); */ hhvec(hh,k,&beta,hh,&A->me[k][k]); diag->ve[k] = hh->ve[k]; /* apply H/holder vector to remaining columns */ /* hhtrcols(A,k,k+1,hh,beta->ve[k]); */ _hhtrcols(A,k,k+1,hh,beta,w); } #ifdef THREADSAFE V_FREE(hh); V_FREE(w); #endif return (A); }
double rk4(double t, VEC *x, double h, VEC *Torq) { VEC *v1=VNULL, *v2=VNULL, *v3=VNULL, *v4=VNULL; VEC *temp=VNULL; double step_size=0.5*h; if ( x == VNULL ) error(E_NULL,"rk4"); v1 = v_get(x->dim); v2 = v_get(x->dim); v3 = v_get(x->dim); v4 = v_get(x->dim); temp = v_get(x->dim); Sy_m(x, Torq, v1); v_mltadd(x,v1,step_size,temp); Sy_m(temp, Torq, v2); v_mltadd(x,v2,step_size,temp); Sy_m(temp, Torq, v3); step_size = h; v_mltadd(x, v3, step_size, temp); Sy_m(temp, Torq, v4); temp = v_copy(v1,temp); v_mltadd(temp,v2,2.0,temp); v_mltadd(temp,v3,2.0,temp); v_add(temp,v4,temp); v_mltadd(x,temp,(h/6.0),x); return t+h; V_FREE(v1); V_FREE(v2); V_FREE(v3); V_FREE(v4); V_FREE(temp); }
VM_VALUE_HASH *VM_VALUE_HASH_init( VCONTEXT *ctx, int buckets) { VM_VALUE_HASH * ret; ret = (VM_VALUE_HASH *) V_MALLOC( ctx , sizeof(VM_VALUE_HASH)); if (!ret) { return 0; } if (VM_VALUE_HASH_IMP_init(ctx, &ret->imp, buckets) ) { V_FREE( ctx, ret ); return 0; } ret->base.ref_count = 1; ret->base.type.base_type = VM_HASH; return ret; }
V_EXPORT int VCLOSEDHASH_resize(VCLOSEDHASH *hash, size_t buckets) { void *key,*value; VCLOSEDHASH_HEADER *hdr; VCLOSEDHASH new_hash; if (buckets < hash->elmcount) { return -1; } if (VCLOSEDHASH_init_uniqemap( hash->ctx, &new_hash, hash->keysize, hash->datasize, buckets, hash->hash, hash->compare, hash->rehash )) { return -1; } VCLOSEDHASH_FOREACH(key, void * , value, void *, hash) hdr = ((VCLOSEDHASH_HEADER *) key) - 1; /* add current entry into new table whiles reusing hash value */ if (VCLOSEDHASH_insert_for_resize(&new_hash, hdr->hashvalue, key, value )) { VCLOSEDHASH_free(&new_hash); return -1; } VCLOSEDHASH_FOREACH_END V_FREE( hash->ctx, hash->data ); hash->data = new_hash.data; hash->resize_threshold = new_hash.resize_threshold; hash->elmmaxcount = new_hash.elmmaxcount; return 0; }
static int VFIXEDHEAP_CHUNK_free( VFIXEDHEAP *alloc, void *ptr ) { size_t i; VFIXEDHEAP_CHUNK_INFO *info; for(i=0;i<alloc->info_sizes.elmcount;i++) { info = (VFIXEDHEAP_CHUNK_INFO *) VARR_at( &alloc->info_sizes, i ); if (VUTIL_ptr_in_range(ptr, info->start_buffer, info->eof_buffer)) { V_FREE( alloc->ctx, info->start_buffer ); VARR_delete_at( &alloc->info_sizes, i ); return 0; } } /* error condition - array of big blocks corrupted */ return -1; }
V_EXPORT void VFIXEDHEAP_free( VFIXEDHEAP *alloc ) { size_t i; VFIXEDHEAP_CHUNK_INFO *info; for(i=0;i<alloc->info_sizes.elmcount;i++) { info = (VFIXEDHEAP_CHUNK_INFO *) VARR_at( &alloc->info_sizes, i ); V_FREE( alloc->ctx, info->start_buffer); } VARR_free( &alloc->info_sizes ); alloc->freelist = 0; alloc->current = 0; alloc->num_free = 0; }
static VFIXEDHEAP_CHUNK_INFO * VFIXEDHEAP_CHUNK_init(VFIXEDHEAP *alloc) { VFIXEDHEAP_CHUNK_INFO ret; ret.alloc_data = ret.start_buffer = V_MALLOC(alloc->ctx, alloc->chunk_size ); if (!ret.alloc_data) { return 0; } ret.eof_buffer = ret.alloc_data + alloc->chunk_size; ret.num_free = 0; /* add chunk info to alloc control structure */ if (!VARR_push_back( &alloc->info_sizes, &ret, sizeof(ret) )) { return (VFIXEDHEAP_CHUNK_INFO * ) VARR_at( &alloc->info_sizes, VARR_size(&alloc->info_sizes) - 1 ); } V_FREE(alloc->ctx, ret.alloc_data ); return 0; }
VEC *QRCPsolve(const MAT *QR, const VEC *diag, PERM *pivot, const VEC *b, VEC *x) #endif { STATIC VEC *tmp=VNULL; if ( ! QR || ! diag || ! pivot || ! b ) error(E_NULL,"QRCPsolve"); if ( (QR->m > diag->dim &&QR->n > diag->dim) || QR->n != pivot->size ) error(E_SIZES,"QRCPsolve"); tmp = QRsolve(QR,diag,b,tmp); MEM_STAT_REG(tmp,TYPE_VEC); x = pxinv_vec(pivot,tmp,x); #ifdef THREADSAFE V_FREE(tmp); #endif return x; }
VEC *_Qsolve(const MAT *QR, const VEC *diag, const VEC *b, VEC *x, VEC *tmp) #endif { unsigned int dynamic; int k, limit; Real beta, r_ii, tmp_val; limit = min(QR->m,QR->n); dynamic = FALSE; if ( ! QR || ! diag || ! b ) error(E_NULL,"_Qsolve"); if ( diag->dim < limit || b->dim != QR->m ) error(E_SIZES,"_Qsolve"); x = v_resize(x,QR->m); if ( tmp == VNULL ) dynamic = TRUE; tmp = v_resize(tmp,QR->m); /* apply H/holder transforms in normal order */ x = v_copy(b,x); for ( k = 0 ; k < limit ; k++ ) { get_col(QR,k,tmp); r_ii = fabs(tmp->ve[k]); tmp->ve[k] = diag->ve[k]; tmp_val = (r_ii*fabs(diag->ve[k])); beta = ( tmp_val == 0.0 ) ? 0.0 : 1.0/tmp_val; /* hhtrvec(tmp,beta->ve[k],k,x,x); */ hhtrvec(tmp,beta,k,x,x); } if ( dynamic ) V_FREE(tmp); return (x); }
void VM_OBJECT_destroy(VCONTEXT *ctx, VM_OBJ_HEADER *hdr) { switch( hdr->type.base_type ) { case VM_STRING: VM_VALUE_STRING_free( ctx, (VM_VALUE_STRING *) hdr ); break; case VM_LONG: case VM_DOUBLE: V_FREE( ctx, hdr ); break; case VM_HASH: VM_VALUE_HASH_free( ctx, (VM_VALUE_HASH *) hdr ); break; case VM_ARRAY: VM_VALUE_ARRAY_free( ctx, (VM_VALUE_ARRAY *) hdr ); break; default: ; } }
VEC *iter_mgcr(ITER *ip) #endif { STATIC VEC *As=VNULL, *beta=VNULL, *alpha=VNULL, *z=VNULL; STATIC MAT *N=MNULL, *H=MNULL; VEC *rr, v, s; /* additional pointer and structures */ Real nres; /* norm of a residual */ Real dd; /* coefficient d_i */ int i,j; int done; /* if TRUE then stop the iterative process */ int dim; /* dimension of the problem */ /* ip cannot be NULL */ if (ip == INULL) error(E_NULL,"mgcr"); /* Ax, b and stopping criterion must be given */ if (! ip->Ax || ! ip->b || ! ip->stop_crit) error(E_NULL,"mgcr"); /* at least one direction vector must exist */ if ( ip->k <= 0) error(E_BOUNDS,"mgcr"); /* if the vector x is given then b and x must have the same dimension */ if ( ip->x && ip->x->dim != ip->b->dim) error(E_SIZES,"mgcr"); if (ip->eps <= 0.0) ip->eps = MACHEPS; dim = ip->b->dim; As = v_resize(As,dim); alpha = v_resize(alpha,ip->k); beta = v_resize(beta,ip->k); MEM_STAT_REG(As,TYPE_VEC); MEM_STAT_REG(alpha,TYPE_VEC); MEM_STAT_REG(beta,TYPE_VEC); H = m_resize(H,ip->k,ip->k); N = m_resize(N,ip->k,dim); MEM_STAT_REG(H,TYPE_MAT); MEM_STAT_REG(N,TYPE_MAT); /* if a preconditioner is defined */ if (ip->Bx) { z = v_resize(z,dim); MEM_STAT_REG(z,TYPE_VEC); } /* if x is NULL then it is assumed that x has entries with value zero */ if ( ! ip->x ) { ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } /* v and s are additional pointers to rows of N */ /* they must have the same dimension as rows of N */ v.dim = v.max_dim = s.dim = s.max_dim = dim; done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { (*ip->Ax)(ip->A_par,ip->x,As); /* As = A*x */ v_sub(ip->b,As,As); /* As = b - A*x */ rr = As; /* rr is an additional pointer */ /* if a preconditioner is defined */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*(b-A*x) */ rr = z; } /* norm of the residual */ nres = v_norm2(rr); dd = nres; /* dd = ||r_i|| */ /* check if the norm of the residual is zero */ if (ip->steps == 0) { /* information for a user */ if (ip->info) (*ip->info)(ip,nres,As,rr); ip->init_res = fabs(nres); } if (nres == 0.0) { /* iterative process is finished */ done = TRUE; break; } /* save this residual in the first row of N */ v.ve = N->me[0]; v_copy(rr,&v); for (i = 0; i < ip->k && ip->steps < ip->limit; i++) { ip->steps++; v.ve = N->me[i]; /* pointer to a row of N (=s_i) */ /* note that we must use here &v, not v */ (*ip->Ax)(ip->A_par,&v,As); rr = As; /* As = A*s_i */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*A*s_i */ rr = z; } if (i < ip->k - 1) { s.ve = N->me[i+1]; /* pointer to a row of N (=s_{i+1}) */ v_copy(rr,&s); /* s_{i+1} = B*A*s_i */ for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ /* beta->ve[j] = in_prod(&v,rr); */ /* beta_{j,i} */ /* modified Gram-Schmidt algorithm */ beta->ve[j] = in_prod(&v,&s); /* beta_{j,i} */ /* s_{i+1} -= beta_{j,i}*s_{j+1} */ v_mltadd(&s,&v,- beta->ve[j],&s); } /* beta_{i,i} = ||s_{i+1}||_2 */ beta->ve[i] = nres = v_norm2(&s); if ( nres <= MACHEPS*ip->init_res) { /* s_{i+1} == 0 */ i--; done = TRUE; break; } sv_mlt(1.0/nres,&s,&s); /* normalize s_{i+1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,&s); /* alpha_i = (s_0 , s_{i+1}) */ } else { for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ beta->ve[j] = in_prod(&v,rr); /* beta_{j,i} */ } nres = in_prod(rr,rr); /* rr = B*A*s_{k-1} */ for (j = 0; j <= i-1; j++) nres -= beta->ve[j]*beta->ve[j]; if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { /* s_k is zero */ i--; done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } beta->ve[i] = sqrt(nres); /* beta_{k-1,k-1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,rr); for (j = 0; j <= i-1; j++) alpha->ve[i] -= beta->ve[j]*alpha->ve[j]; alpha->ve[i] /= beta->ve[i]; /* alpha_{k-1} */ } set_col(H,i,beta); /* other method of computing dd */ /* if (fabs((double)alpha->ve[i]) > dd) { nres = - dd*dd + alpha->ve[i]*alpha->ve[i]; nres = sqrt((double) nres); if (ip->info) (*ip->info)(ip,-nres,VNULL,VNULL); break; } */ /* to avoid overflow/underflow in computing dd */ /* dd *= cos(asin((double)(alpha->ve[i]/dd))); */ nres = alpha->ve[i]/dd; if (fabs(nres-1.0) <= MACHEPS*ip->init_res) dd = 0.0; else { nres = 1.0 - nres*nres; if (nres < 0.0) { nres = sqrt((double) -nres); if (ip->info) (*ip->info)(ip,-dd*nres,VNULL,VNULL); break; } dd *= sqrt((double) nres); } if (ip->info) (*ip->info)(ip,dd,VNULL,VNULL); if ( ip->stop_crit(ip,dd,VNULL,VNULL) ) { /* stopping criterion is satisfied */ done = TRUE; break; } } /* end of for */ if (i >= ip->k) i = ip->k - 1; /* use (i+1) by (i+1) submatrix of H */ H = m_resize(H,i+1,i+1); alpha = v_resize(alpha,i+1); Usolve(H,alpha,alpha,0.0); /* c_i is saved in alpha */ for (j = 0; j <= i; j++) { v.ve = N->me[j]; v_mltadd(ip->x,&v,alpha->ve[j],ip->x); } if (done) break; /* stop the iterative process */ alpha = v_resize(alpha,ip->k); H = m_resize(H,ip->k,ip->k); } /* end of while */ #ifdef THREADSAFE V_FREE(As); V_FREE(beta); V_FREE(alpha); V_FREE(z); M_FREE(N); M_FREE(H); #endif return ip->x; /* return the solution */ }
VEC *iter_gmres(ITER *ip) #endif { STATIC VEC *u=VNULL, *r=VNULL, *rhs = VNULL; STATIC VEC *givs=VNULL, *givc=VNULL, *z = VNULL; STATIC MAT *Q = MNULL, *R = MNULL; VEC *rr, v, v1; /* additional pointers (not real vectors) */ int i,j, done; Real nres; /* Real last_h; */ if (ip == INULL) error(E_NULL,"iter_gmres"); if ( ! ip->Ax || ! ip->b ) error(E_NULL,"iter_gmres"); if ( ! ip->stop_crit ) error(E_NULL,"iter_gmres"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_gmres"); if (ip->x != VNULL && ip->x->dim != ip->b->dim) error(E_SIZES,"iter_gmres"); if (ip->eps <= 0.0) ip->eps = MACHEPS; r = v_resize(r,ip->k+1); u = v_resize(u,ip->b->dim); rhs = v_resize(rhs,ip->k+1); givs = v_resize(givs,ip->k); /* Givens rotations */ givc = v_resize(givc,ip->k); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(rhs,TYPE_VEC); MEM_STAT_REG(givs,TYPE_VEC); MEM_STAT_REG(givc,TYPE_VEC); R = m_resize(R,ip->k+1,ip->k); Q = m_resize(Q,ip->k,ip->b->dim); MEM_STAT_REG(R,TYPE_MAT); MEM_STAT_REG(Q,TYPE_MAT); if (ip->x == VNULL) { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */ v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */ if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */ z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { /* restart */ ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b - A*x */ rr = u; /* rr is a pointer only */ if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */ rr = z; } nres = v_norm2(rr); if (ip->steps == 0) { if (ip->info) ip->info(ip,nres,VNULL,VNULL); ip->init_res = nres; } if ( nres == 0.0 ) { done = TRUE; break; } v.ve = Q->me[0]; sv_mlt(1.0/nres,rr,&v); v_zero(r); v_zero(rhs); rhs->ve[0] = nres; for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) { ip->steps++; v.ve = Q->me[i]; (ip->Ax)(ip->A_par,&v,u); rr = u; if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); rr = z; } if (i < ip->k - 1) { v1.ve = Q->me[i+1]; v_copy(rr,&v1); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* r->ve[j] = in_prod(&v,rr); */ /* modified Gram-Schmidt algorithm */ r->ve[j] = in_prod(&v,&v1); v_mltadd(&v1,&v,-r->ve[j],&v1); } r->ve[i+1] = nres = v_norm2(&v1); if (nres <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } sv_mlt(1.0/nres,&v1,&v1); } else { /* i == ip->k - 1 */ /* Q->me[ip->k] need not be computed */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; r->ve[j] = in_prod(&v,rr); } nres = in_prod(rr,rr) - in_prod(r,r); if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } r->ve[i+1] = sqrt(nres); } /* QR update */ /* last_h = r->ve[i+1]; */ /* for test only */ for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]); rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r); rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs); set_col(R,i,r); nres = fabs((double) rhs->ve[i+1]); if (ip->info) ip->info(ip,nres,VNULL,VNULL); if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) { done = TRUE; break; } } /* use ixi submatrix of R */ if (i >= ip->k) i = ip->k - 1; R = m_resize(R,i+1,i+1); rhs = v_resize(rhs,i+1); /* test only */ /* test_gmres(ip,i,Q,R,givc,givs,last_h); */ Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */ /* new approximation */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; v_mltadd(ip->x,&v,rhs->ve[j],ip->x); } if (done) break; /* back to old dimensions */ rhs = v_resize(rhs,ip->k+1); R = m_resize(R,ip->k+1,ip->k); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(rhs); V_FREE(givs); V_FREE(givc); V_FREE(z); M_FREE(Q); M_FREE(R); #endif return ip->x; }
VEC *iter_cgs(ITER *ip, VEC *r0) #endif { STATIC VEC *p = VNULL, *q = VNULL, *r = VNULL, *u = VNULL; STATIC VEC *v = VNULL, *z = VNULL; VEC *tmp; Real alpha, beta, nres, rho, old_rho, sigma, inner; if (ip == INULL) error(E_NULL,"iter_cgs"); if (!ip->Ax || !ip->b || !r0) error(E_NULL,"iter_cgs"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cgs"); if (!ip->stop_crit) error(E_NULL,"iter_cgs"); if ( r0->dim != ip->b->dim ) error(E_SIZES,"iter_cgs"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); r = v_resize(r,ip->b->dim); u = v_resize(u,ip->b->dim); v = v_resize(v,ip->b->dim); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(v,TYPE_VEC); if (ip->Bx) { z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } if (ip->x != VNULL) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cgs"); ip->Ax(ip->A_par,ip->x,v); /* v = A*x */ if (ip->Bx) { v_sub(ip->b,v,v); /* v = b - A*x */ (ip->Bx)(ip->B_par,v,r); /* r = B*(b-A*x) */ } else v_sub(ip->b,v,r); /* r = b-A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); /* x == 0 */ ip->shared_x = FALSE; if (ip->Bx) (ip->Bx)(ip->B_par,ip->b,r); /* r = B*b */ else v_copy(ip->b,r); /* r = b */ } v_zero(p); v_zero(q); old_rho = 1.0; for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { inner = in_prod(r,r); nres = sqrt(fabs(inner)); if (ip->steps == 0) ip->init_res = nres; if (ip->info) ip->info(ip,nres,r,VNULL); if ( ip->stop_crit(ip,nres,r,VNULL) ) break; rho = in_prod(r0,r); if ( old_rho == 0.0 ) error(E_BREAKDOWN,"iter_cgs"); beta = rho/old_rho; v_mltadd(r,q,beta,u); v_mltadd(q,p,beta,v); v_mltadd(u,v,beta,p); (ip->Ax)(ip->A_par,p,q); if (ip->Bx) { (ip->Bx)(ip->B_par,q,z); tmp = z; } else tmp = q; sigma = in_prod(r0,tmp); if ( sigma == 0.0 ) error(E_BREAKDOWN,"iter_cgs"); alpha = rho/sigma; v_mltadd(u,tmp,-alpha,q); v_add(u,q,v); (ip->Ax)(ip->A_par,v,u); if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); tmp = z; } else tmp = u; v_mltadd(r,tmp,-alpha,r); v_mltadd(ip->x,v,alpha,ip->x); old_rho = rho; } #ifdef THREADSAFE V_FREE(p); V_FREE(q); V_FREE(r); V_FREE(u); V_FREE(v); V_FREE(z); #endif return ip->x; }
MAT *iter_arnoldi(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } set_col(H,i,r); if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); #endif return H; }
MAT *iter_arnoldi_iref(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL, *s=VNULL, *tmp=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi_iref"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi_iref"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi_iref"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi_iref"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); s = v_resize(s,ip->k); tmp = v_resize(tmp,ip->x->dim); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(s,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); v_zero(s); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } /* iterative refinement -- ensures near orthogonality */ do { v_zero(tmp); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; s->ve[j] = in_prod(&v,u); v_mltadd(tmp,&v,s->ve[j],tmp); } v_sub(u,tmp,u); v_add(r,s,r); } while ( v_norm2(s) > 0.1*(h_val = v_norm2(u)) ); /* now that u is nearly orthogonal to Q, update H */ set_col(H,i,r); /* check once again if h_val is zero */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(s); V_FREE(tmp); #endif return H; }
VEC *iter_lsqr(ITER *ip) #endif { STATIC VEC *u = VNULL, *v = VNULL, *w = VNULL, *tmp = VNULL; Real alpha, beta, phi, phi_bar; Real rho, rho_bar, rho_max, theta, nres; Real s, c; /* for Givens' rotations */ int m, n; if ( ! ip || ! ip->b || !ip->Ax || !ip->ATx ) error(E_NULL,"iter_lsqr"); if ( ip->x == ip->b ) error(E_INSITU,"iter_lsqr"); if (!ip->stop_crit || !ip->x) error(E_NULL,"iter_lsqr"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; m = ip->b->dim; n = ip->x->dim; u = v_resize(u,(unsigned int)m); v = v_resize(v,(unsigned int)n); w = v_resize(w,(unsigned int)n); tmp = v_resize(tmp,(unsigned int)n); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(v,TYPE_VEC); MEM_STAT_REG(w,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); if (ip->x != VNULL) { ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b-A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,u); /* u = b */ } beta = v_norm2(u); if ( beta == 0.0 ) return ip->x; sv_mlt(1.0/beta,u,u); (ip->ATx)(ip->AT_par,u,v); alpha = v_norm2(v); if ( alpha == 0.0 ) return ip->x; sv_mlt(1.0/alpha,v,v); v_copy(v,w); phi_bar = beta; rho_bar = alpha; rho_max = 1.0; for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { tmp = v_resize(tmp,m); (ip->Ax)(ip->A_par,v,tmp); v_mltadd(tmp,u,-alpha,u); beta = v_norm2(u); sv_mlt(1.0/beta,u,u); tmp = v_resize(tmp,n); (ip->ATx)(ip->AT_par,u,tmp); v_mltadd(tmp,v,-beta,v); alpha = v_norm2(v); sv_mlt(1.0/alpha,v,v); rho = sqrt(rho_bar*rho_bar+beta*beta); if ( rho > rho_max ) rho_max = rho; c = rho_bar/rho; s = beta/rho; theta = s*alpha; rho_bar = -c*alpha; phi = c*phi_bar; phi_bar = s*phi_bar; /* update ip->x & w */ if ( rho == 0.0 ) error(E_BREAKDOWN,"iter_lsqr"); v_mltadd(ip->x,w,phi/rho,ip->x); v_mltadd(v,w,-theta/rho,w); nres = fabs(phi_bar*alpha*c)*rho_max; if (ip->info) ip->info(ip,nres,w,VNULL); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,w,VNULL) ) break; } #ifdef THREADSAFE V_FREE(u); V_FREE(v); V_FREE(w); V_FREE(tmp); #endif return ip->x; }
VEC *iter_cgne(ITER *ip) #endif { STATIC VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; Real alpha, beta, inner, old_inner, nres; VEC *rr1; /* pointer only */ if (ip == INULL) error(E_NULL,"iter_cgne"); if (!ip->Ax || ! ip->ATx || !ip->b) error(E_NULL,"iter_cgne"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cgne"); if (!ip->stop_crit) error(E_NULL,"iter_cgne"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; r = v_resize(r,ip->b->dim); p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); if (ip->x) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cgne"); ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ v_sub(ip->b,p,z); /* z = b - A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,z); } rr1 = z; if (ip->Bx) { (ip->Bx)(ip->B_par,rr1,p); rr1 = p; } (ip->ATx)(ip->AT_par,rr1,r); /* r = A^T*B*(b-A*x) */ old_inner = 0.0; for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) { rr1 = r; if ( ip->Bx ) { (ip->Bx)(ip->B_par,r,z); /* rr = B*r */ rr1 = z; } inner = in_prod(r,rr1); nres = sqrt(fabs(inner)); if (ip->info) ip->info(ip,nres,r,rr1); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,r,rr1) ) break; if ( ip->steps ) /* if ( ip->steps > 0 ) ... */ { beta = inner/old_inner; p = v_mltadd(rr1,p,beta,p); } else /* if ( ip->steps == 0 ) ... */ { beta = 0.0; p = v_copy(rr1,p); old_inner = 0.0; } (ip->Ax)(ip->A_par,p,q); /* q = A*p */ if (ip->Bx) { (ip->Bx)(ip->B_par,q,z); (ip->ATx)(ip->AT_par,z,q); rr1 = q; /* q = A^T*B*A*p */ } else { (ip->ATx)(ip->AT_par,q,z); /* z = A^T*A*p */ rr1 = z; } alpha = inner/in_prod(rr1,p); v_mltadd(ip->x,p,alpha,ip->x); v_mltadd(r,rr1,-alpha,r); old_inner = inner; } #ifdef THREADSAFE V_FREE(r); V_FREE(p); V_FREE(q); V_FREE(z); #endif return ip->x; }
/* * n_vars is the number of variables to be considered, * d is the data array of variables d[0],...,d[n_vars-1], * pred determines which estimate is required: BLUE, BLUP, or BLP */ void gls(DATA **d /* pointer to DATA array */, int n_vars, /* length of DATA array (to consider) */ enum GLS_WHAT pred, /* what type of prediction is requested */ DPOINT *where, /* prediction location */ double *est /* output: array that holds the predicted values and variances */) { GLM *glm = NULL; /* to be copied to/from d */ static MAT *X0 = MNULL, *C0 = MNULL, *MSPE = MNULL, *CinvC0 = MNULL, *Tmp1 = MNULL, *Tmp2 = MNULL, *Tmp3 = MNULL, *R = MNULL; static VEC *blup = VNULL, *tmpa = VNULL, *tmpb = VNULL; PERM *piv = PNULL; volatile unsigned int i, rows_C; unsigned int j, k, l = 0, row, col, start_i, start_j, start_X, global, one_nbh_empty; VARIOGRAM *v = NULL; static enum GLS_WHAT last_pred = GLS_INIT; /* the initial value */ double c_value, *X_ori; int info; if (d == NULL) { /* clean up */ if (X0 != MNULL) M_FREE(X0); if (C0 != MNULL) M_FREE(C0); if (MSPE != MNULL) M_FREE(MSPE); if (CinvC0 != MNULL) M_FREE(CinvC0); if (Tmp1 != MNULL) M_FREE(Tmp1); if (Tmp2 != MNULL) M_FREE(Tmp2); if (Tmp3 != MNULL) M_FREE(Tmp3); if (R != MNULL) M_FREE(R); if (blup != VNULL) V_FREE(blup); if (tmpa != VNULL) V_FREE(tmpa); if (tmpb != VNULL) V_FREE(tmpb); last_pred = GLS_INIT; return; } if (DEBUG_COV) { printlog("we're at %s X: %g Y: %g Z: %g\n", IS_BLOCK(where) ? "block" : "point", where->x, where->y, where->z); } if (pred != UPDATE) /* it right away: */ last_pred = pred; assert(last_pred != GLS_INIT); if (d[0]->glm == NULL) { /* allocate and initialize: */ glm = new_glm(); d[0]->glm = (void *) glm; } else glm = (GLM *) d[0]->glm; glm->mu0 = v_resize(glm->mu0, n_vars); MSPE = m_resize(MSPE, n_vars, n_vars); if (pred == GLS_BLP || UPDATE_BLP) { X_ori = where->X; for (i = 0; i < n_vars; i++) { /* mu(0) */ glm->mu0->ve[i] = calc_mu(d[i], where); blup = v_copy(glm->mu0, v_resize(blup, glm->mu0->dim)); where->X += d[i]->n_X; /* shift to next x0 entry */ } where->X = X_ori; /* ... and set back */ for (i = 0; i < n_vars; i++) { /* Cij(0,0): */ for (j = 0; j <= i; j++) { v = get_vgm(LTI(d[i]->id,d[j]->id)); ME(MSPE, i, j) = ME(MSPE, j, i) = COVARIANCE0(v, where, where, d[j]->pp_norm2); } } fill_est(NULL, blup, MSPE, n_vars, est); /* in case of empty neighbourhood */ } /* xxx */ /* logprint_variogram(v, 1); */ /* * selection dependent problem dimensions: */ for (i = rows_C = 0, one_nbh_empty = 0; i < n_vars; i++) { rows_C += d[i]->n_sel; if (d[i]->n_sel == 0) one_nbh_empty = 1; } if (rows_C == 0 /* all selection lists empty */ || one_nbh_empty == 1) { /* one selection list empty */ if (pred == GLS_BLP || UPDATE_BLP) debug_result(blup, MSPE, pred); return; } for (i = 0, global = 1; i < n_vars && global; i++) global = (d[i]->sel == d[i]->list && d[i]->n_list == d[i]->n_original && d[i]->n_list == d[i]->n_sel); /* * global things: enter whenever (a) first time, (b) local selections or * (c) the size of the problem grew since the last call (e.g. simulation) */ if (glm->C == NULL || !global || rows_C > glm->C->m) { /* * fill y: */ glm->y = get_y(d, glm->y, n_vars); if (pred != UPDATE) { glm->C = m_resize(glm->C, rows_C, rows_C); if (gl_choleski == 0) /* use LDL' decomposition, allocate piv: */ piv = px_resize(piv, rows_C); m_zero(glm->C); glm->X = get_X(d, glm->X, n_vars); M_DEBUG(glm->X, "X"); glm->CinvX = m_resize(glm->CinvX, rows_C, glm->X->n); glm->XCinvX = m_resize(glm->XCinvX, glm->X->n, glm->X->n); glm->beta = v_resize(glm->beta, glm->X->n); for (i = start_X = start_i = 0; i < n_vars; i++) { /* row var */ /* fill C, mu: */ for (j = start_j = 0; j <= i; j++) { /* col var */ v = get_vgm(LTI(d[i]->id,d[j]->id)); for (k = 0; k < d[i]->n_sel; k++) { /* rows */ row = start_i + k; for (l = 0, col = start_j; col <= row && l < d[j]->n_sel; l++, col++) { if (pred == GLS_BLUP) c_value = GCV(v, d[i]->sel[k], d[j]->sel[l]); else c_value = COVARIANCE(v, d[i]->sel[k], d[j]->sel[l]); /* on the diagonal, if necessary, add measurement error variance */ if (d[i]->colnvariance && i == j && k == l) c_value += d[i]->sel[k]->variance; ME(glm->C, col, row) = c_value; /* fill upper */ if (col != row) ME(glm->C, row, col) = c_value; /* fill all */ } /* for l */ } /* for k */ start_j += d[j]->n_sel; } /* for j */ start_i += d[i]->n_sel; if (d[i]->n_sel > 0) start_X += d[i]->n_X - d[i]->n_merge; } /* for i */ /* if (d[0]->colnvmu) glm->C = convert_vmuC(glm->C, d[0]); */ if (d[0]->variance_fn) { glm->mu = get_mu(glm->mu, glm->y, d, n_vars); convert_C(glm->C, glm->mu, d[0]->variance_fn); } if (DEBUG_COV && pred == GLS_BLUP) printlog("[using generalized covariances: max_val - semivariance()]"); M_DEBUG(glm->C, "Covariances (x_i, x_j) matrix C (upper triangle)"); /* * factorize C: */ CHfactor(glm->C, piv, &info); if (info != 0) { /* singular: */ pr_warning("Covariance matrix singular at location [%g,%g,%g]: skipping...", where->x, where->y, where->z); m_free(glm->C); glm->C = MNULL; /* assure re-entrance if global */ P_FREE(piv); return; } if (piv == NULL) M_DEBUG(glm->C, "glm->C, Choleski decomposed:") else M_DEBUG(glm->C, "glm->C, LDL' decomposed:") } /* if (pred != UPDATE) */
/* LUfactor -- gaussian elimination with scaled partial pivoting -- Note: returns LU matrix which is A */ MAT *LUfactor(MAT *A, PERM *pivot) { unsigned int i, j, m, n; unsigned int k, k_max; int i_max; char MatrixTempBuffer[ 1000 ]; MatrixReal **A_v, *A_piv, *A_row; MatrixReal max1, temp, tiny; VEC *scale = VNULL; if ( A==(MAT *)NULL || pivot==(PERM *)NULL ){ error(E_NULL,"LUfactor"); } if ( pivot->size != A->m ) error(E_SIZES,"LUfactor"); m = A->m; n = A->n; if( SET_VEC_SIZE( A->m ) <1000 ) vec_get( &scale, (void *)MatrixTempBuffer, A->m ); else scale = v_get( A->m ); //MEM_STAT_REG(scale,TYPE_VEC); A_v = A->me; tiny = (MatrixReal)(10.0/HUGE_VAL); /* initialise pivot with identity permutation */ for ( i=0; i<m; i++ ) pivot->pe[i] = i; /* set scale parameters */ for ( i=0; i<m; i++ ) { max1 = 0.0; for ( j=0; j<n; j++ ) { temp = (MatrixReal)fabs(A_v[i][j]); max1 = mat_max(max1,temp); } scale->ve[i] = max1; } /* main loop */ k_max = mat_min(m,n)-1; for ( k=0; k<k_max; k++ ) { /* find best pivot row */ max1 = 0.0; i_max = -1; for ( i=k; i<m; i++ ) if ( fabs(scale->ve[i]) >= tiny*fabs(A_v[i][k]) ) { temp = (MatrixReal)fabs(A_v[i][k])/scale->ve[i]; if ( temp > max1 ) { max1 = temp; i_max = i; } } /* if no pivot then ignore column k... */ if ( i_max == -1 ) { /* set pivot entry A[k][k] exactly to zero, rather than just "small" */ A_v[k][k] = 0.0; continue; } /* do we pivot ? */ if ( i_max != (int)k ) /* yes we do... */ { px_transp(pivot,i_max,k); for ( j=0; j<n; j++ ) { temp = A_v[i_max][j]; A_v[i_max][j] = A_v[k][j]; A_v[k][j] = temp; } } /* row operations */ for ( i=k+1; i<m; i++ ) /* for each row do... */ { /* Note: divide by zero should never happen */ temp = A_v[i][k] = A_v[i][k]/A_v[k][k]; A_piv = &(A_v[k][k+1]); A_row = &(A_v[i][k+1]); if ( k+1 < n ) __mltadd__(A_row,A_piv,-temp,(int)(n-(k+1))); } } if( scale != (VEC *)MatrixTempBuffer ) // память выделялась, надо освободить V_FREE(scale); return A; }
//FIXME: add LOCALFUNC? (OR GLOBAL?) BYTE* RECURSIVE_SUBROUTINE(BYTE t, list *cur_guess, list *guesses, list_node *first_guess) { if ( t == key_length) { unsigned char* key=NULL; // generate key from the cur_guess (push guesses into a matrix and use some matrix solving library?) MAT* A; VEC *x,*b; PERM* pivot; A=m_get(key_length,key_length); b=v_get(key_length); x=v_get(key_length); int c=0,r=0; for(list_node* iter=list_head(cur_guess); iter != NULL && r<key_length; iter=list_node_next(iter)){ for(c=list_node_data_ptr(iter,byte_sum_guess_t)->i1; c <= list_node_data_ptr(iter,byte_sum_guess_t)->i2; ++c){ A->me[r][c]=1; } b->ve[r]=list_node_data_ptr(iter,byte_sum_guess_t)->value; ++r; } //Calculate matrix determinant SQRMATRIX A_det; SQRMATRIX_CreateMatrix(&A_det,key_length); for(r=0;r<key_length;++r){ for(c=0;c<key_length;++c){ A_det.array[r][c]=A->me[r][c]; } } int det; det=SQRMATRIX_CalcDeterminant(&A_det); //TODO: return this later SQRMATRIX_DestroyMatrix(&A_det); if(det==0){//If determinant is 0 continue to next guess ++count_bad_matrix; #ifdef __DEBUG //SQRMATRIX_DisplayMatrix(&A_det); v_output(b); #endif DEBUG_PRINT("Matrix determinant is 0\n"); }else{ ++count_guesses; pivot = px_get(A->m); LUfactor(A,pivot); x=LUsolve(A,pivot,b,VNULL); PX_FREE(pivot); //test key (use our RC4 impl) key=(unsigned char*)malloc(sizeof(unsigned char)*key_length); for(int i=0;i<key_length;++i){ key[i]=x->ve[i]; } int res=rc4_test_key(key); if(res){ printf("MAZAL TOV! we got the right key.\n"); print_key(key); printf("\n"); }else{ printf("Tried key: "); print_key(key); printf("\n"); free(key);key=NULL; } } //release matrix vars M_FREE(A); V_FREE(x); V_FREE(b); return key; } byte_sum_guess_t cur; //list *new_list_head=guesses; //TODO: (later) add a for loop running along the "lambeda_t" values here, for the initial impl we'll try the best guess //for () //{ for(int i=0; i<LAMBDA_T; ++i){ cur = *(list_node_data_ptr(first_guess, byte_sum_guess_t)); list_node *biatch = list_add_head(cur_guess, &cur); BYTE* res=RECURSIVE_SUBROUTINE(t+1, cur_guess, guesses, list_node_next(first_guess)); if(res!=NULL){ return res; } list_del(cur_guess,biatch); first_guess = list_node_next(first_guess); } return NULL; //TODO: do something to find the next guess and link it to the current guess //when I say something I mean find best guess (i.e best weight) of all the guesses that give us new information //(i.e not linearily dependent in our byte values and sums matrix that can be deduced from the cur_guess) //see also the note above (intuition) //IMPORTANT! //explaination how cur_guess is a matrix: each entry in cur_guess contains a list of bytes that are part of the sum and a //guess as to the value of the sum. if this matrix is solvable then solving it should give us a value for each byte of the //key thus the entire key //note: we probably should change cur_guess to a smarter database (for example a (L)x(L+1) matrix as an array?) but we //need to consider that we need to keep the ability to backtrack without making it too expensive //TODO: if weight of the guess is too small -> return FAIL (section 4.6) //These are based on section 4.4 //correct suggestions (this can be done later, basic alg should work without it) //adjust weights (this can be done later, basic alg should work without it) //merge counters (section 4.2), also skip for initial impl? need to check //go to next iteration in the recurtion //RECURSIVE_SUBROUTINE(t+1, cur_guess, ); //} end of for }