int cholesky(double *_rr,int *_pivot,double _tol,int _n){ double akk; int pi; int j; int k; if(_pivot==NULL)return cholesky_unpivoted(_rr,_tol,_n); /*Find the first pivot element.*/ akk=0; pi=-1; for(j=0;j<_n;j++)if(_rr[UT_IDX(j,j,_n)]>akk){ pi=j; akk=_rr[UT_IDX(j,j,_n)]; } _tol*=40*_n*(_n+1)*akk; /*Initialize the pivot list.*/ for(k=0;k<_n;k++)_pivot[k]=k; for(k=0;pi>=0;k++){ if(pi!=k)ch_pivot(_rr,NULL,_pivot,k,pi,_n); ch_update(_rr,sqrt(akk),k,_n); /*Find the next pivot element.*/ akk=_tol; pi=-1; for(j=k+1;j<_n;j++)if(_rr[UT_IDX(j,j,_n)]>akk){ akk=_rr[UT_IDX(j,j,_n)]; pi=j; } } return k; }
static int cholesky_unpivoted(double *_rr,double _tol,int _n){ double akk; int k; /*We derive the tolerance from \cite{High90}. Higham reported that akk*50*DBL_EPSILON was always sufficient in his numerical experiments on matrices up to 50x50. We use the empirical bound of 10 on ||W||_2 observed when pivoting, even though we do no pivoting here, so this is optimistic. @INPROCEEDINGS{High90, author="Nicholas J. Higham", title="Chapter 9: Analysis of the {Cholesky} Decomposition of a Semi-Definite Matrix", editor="Maurice G. Cox and Sven J. Hammarling", booktitle="Reliable Numerical Computation", publisher="Oxford University Press", pages="161--185", year=1990 }*/ akk=0; for(k=0;k<_n;k++)if(_rr[UT_IDX(k,k,_n)]>akk)akk=_rr[UT_IDX(k,k,_n)]; _tol*=40*_n*(_n+1)*akk; for(k=0;k<_n&&_rr[UT_IDX(k,k,_n)]>_tol;k++){ ch_update(_rr,sqrt(_rr[UT_IDX(k,k,_n)]),k,_n); } return k; }
/*Back substitution.*/ static void ch_back_sub(const double *_rr,double *_x,int _k,int _n){ int i; for(i=_k;i-->0;){ int j; for(j=i+1;j<_k;j++)_x[i]-=_rr[UT_IDX(i,j,_n)]*_x[j]; _x[i]/=_rr[UT_IDX(i,i,_n)]; } }
/*Forward substitution.*/ static void ch_fwd_sub(const double *_rr,double *_x,int _k,int _n){ int i; for(i=0;i<_k;i++){ int j; _x[i]/=_rr[UT_IDX(i,i,_n)]; for(j=i+1;j<_k;j++)_x[j]-=_rr[UT_IDX(i,j,_n)]*_x[i]; } }
/*Expand the factorization to encompass the next row of the input matrix.*/ static void ch_update(double *_rr,double _alpha,int _k,int _n){ int i; int j; _rr[UT_IDX(_k,_k,_n)]=_alpha; for(i=_k+1;i<_n;i++)_rr[UT_IDX(_k,i,_n)]/=_alpha; for(i=_k+1;i<_n;i++){ double t; t=_rr[UT_IDX(_k,i,_n)]; for(j=i;j<_n;j++)_rr[UT_IDX(i,j,_n)]-=t*_rr[UT_IDX(_k,j,_n)]; } }
/*Pivot: swap row and column _i of C^(_k) with row and column _k. Note _rr[UT_IDX(_k,_k,_n)] is not set: it is assumed this will be set by the caller, and the appropriate value must have already been saved.*/ static void ch_pivot(double *_rr,double *_wwt,int *_pivot, int _k,int _i,int _n){ double t; int j; for(j=0;j<_k;j++)CP_SWAP(_rr[UT_IDX(j,_k,_n)],_rr[UT_IDX(j,_i,_n)],t); for(j=_k+1;j<_i;j++)CP_SWAP(_rr[UT_IDX(_k,j,_n)],_rr[UT_IDX(j,_i,_n)],t); for(j=_i+1;j<_n;j++)CP_SWAP(_rr[UT_IDX(_k,j,_n)],_rr[UT_IDX(_i,j,_n)],t); _rr[UT_IDX(_i,_i,_n)]=_rr[UT_IDX(_k,_k,_n)]; if(_wwt!=NULL){ for(j=0;j<_k;j++)CP_SWAP(_wwt[SLT_IDX(_i,j)],_wwt[SLT_IDX(_k,j)],t); } CP_SWAP(_pivot[_k],_pivot[_i],j); }
void chsolve(const double *_rr,const int *_pivot,const double *_tau,double *_x, const double *_b,double *_work,int _r,int _n){ double *y; double s; int i; int j; int k; if(_pivot!=NULL){ y=_work!=NULL?_work:(double *)malloc(chsolve_worksz(_pivot,_n)*sizeof(*y)); for(i=0;i<_n;i++)y[i]=_b[_pivot[i]]; } else{ memmove(_x,_b,_n*sizeof(*_x)); y=_x; } if(_r<_n){ for(k=_r;k-->0;){ s=y[k]; for(j=_r;j<_n;j++)s+=y[j]*_rr[UT_IDX(k,j,_n)]; s*=_tau[k]; y[k]=s-y[k]; for(j=_r;j<_n;j++)y[j]-=s*_rr[UT_IDX(k,j,_n)]; } } ch_fwd_sub(_rr,y,_r,_n); ch_back_sub(_rr,y,_r,_n); if(_r<_n){ memset(y+_r,0,(_n-_r)*sizeof(*y)); for(k=0;k<_r;k++){ s=-y[k]; for(j=_r;j<_n;j++)s+=y[j]*_rr[UT_IDX(k,j,_n)]; s*=_tau[k]; y[k]=-(s+y[k]); for(j=_r;j<_n;j++)y[j]-=s*_rr[UT_IDX(k,j,_n)]; } } if(_pivot!=NULL){ for(i=0;i<_n;i++)_x[_pivot[i]]=y[i]; if(_work==NULL)free(y); } }
int qrdecomp_hh(double *aat, int aat_stride, double *d, double *qqt, int qqt_stride, double *rr, int n, int m) { int rank; int i; int j; int k; int l; rank = 0; l = m < n ? m : n; for (k = 0; k < l; k++) { double *aatk; double d2; aatk = aat + k*aat_stride; d2 = v2norm(aatk + k, m - k); if (d2 != 0) { double e; double s; if (aatk[k] < 0) d2 = -d2; for (i = k; i < m; i++) aatk[i] /= d2; e = ++aatk[k]; for (j = k + 1; j < n; j++) { double *aatj; aatj = aat + j*aat_stride; s = -vdot(aatk + k, aatj + k, m - k)/e; for (i = k; i < m; i++) aatj[i] += s*aatk[i]; if (rr != NULL) rr[UT_IDX(k, j, n)] = aatj[k]; } rank++; } d[k] = -d2; if (rr != NULL) rr[UT_IDX(k, k, n)] = d[k]; } /*Uncomment (along with code below for Q) to compute the _unique_ factorization with the diagonal of R strictly non-negative. Unfortunately, this will not match the encoded Q and R in qrt, preventing the user from mixing and matching the explicit and implicit decompositions.*/ /*if(rr != NULL) { for (k = 0; k < l; k++) { if (d[i] < 0) { for(j = k; j < n; j++) rr[UT_IDX(k, j, n)] = -rr[UT_IDX(k, j, n)]; } } }*/ if(qqt != NULL) { for (k = l; k-- > 0;) { double *aatk; double *qqtj; double e; aatk = aat + k*aat_stride; qqtj = qqt + k*qqt_stride; memset(qqtj, 0, k*sizeof(*qqtj)); for (i = k; i < m; i++) qqtj[i] = -aatk[i]; qqtj[k]++; e = aatk[k]; if(e != 0)for(j = k + 1; j < l; j++) { double s; qqtj = qqt + j*qqt_stride; s = -vdot(aatk + k, qqtj + k, m - k)/e; for (i = k; i < m; i++) qqtj[i] += s*aatk[i]; } } /*Uncomment (along with code above for R) to compute the _unique_ factorization with the diagonal of R strictly non-negative. Unfortunately, this will not match the encoded Q and R in qrt, preventing the user from mixing and matching the explicit and implicit decompositions.*/ /*for (k = 0; k < l; k++) if(d[k] < 0) { double *qqtk; qqtk = qqt + k*qqt_stride; for (i = 0; i < m; i++) qqtk[i] = -qqtk[i]; }*/ } return rank; }
static void update_intra_xforms(intra_xform_ctx *_ctx){ int mode; int pli; /*Update the model for each coefficient in each mode.*/ printf("/* This file is generated automatically by init_intra_xform */\n"); printf("#include \"intra.h\"\n"); printf("\n"); printf("const double OD_INTRA_PRED_WEIGHTS_%ix%i" "[OD_INTRA_NMODES][%i][%i][2*%i][2*%i]={\n", B_SZ,B_SZ,B_SZ,B_SZ,B_SZ,B_SZ); for(mode=0;mode<OD_INTRA_NMODES;mode++){ int xi[2*B_SZ*2*B_SZ]; int nxi; int i; int j; /*double *r_x;*/ r_xx_row *r_xx; double *scale; /*r_x=_ctx->r_x[mode];*/ r_xx=_ctx->r_xx[mode]; scale=_ctx->scale[mode]; printf(" {\n"); for(i=0;i<2*B_SZ*2*B_SZ;i++){ scale[i]=sqrt(r_xx[i][i]); if(scale[i]<=0)scale[i]=1; } for(i=0;i<2*B_SZ*2*B_SZ;i++){ for(j=0;j<2*B_SZ*2*B_SZ;j++){ r_xx[i][j]/=scale[i]*scale[j]; } } nxi=0; for(j=0;j<B_SZ;j++){ for(i=0;i<B_SZ;i++){ xi[nxi]=2*B_SZ*j+i; xi[nxi+B_SZ*B_SZ]=2*B_SZ*j+B_SZ+i; xi[nxi+2*B_SZ*B_SZ]=2*B_SZ*(B_SZ+j)+i; nxi++; } } #if 0 if(mode==0){ for(i=0;i<2*B_SZ;i++){ for(j=0;j<2*B_SZ;j++){ int k; int l; for(k=0;k<2*B_SZ;k++){ for(l=0;l<2*B_SZ;l++){ printf("%0.18G%s",r_xx[2*B_SZ*i+j][2*B_SZ*k+l],2*B_SZ*k+l>=2*B_SZ*2*B_SZ-1?"\n":" "); } } } } } #endif for(i=0;i<B_SZ;i++){ printf(" {\n"); for(j=0;j<B_SZ;j++){ double xty[2*B_SZ*2*B_SZ]; double *beta; int xii; int xij; int yi; nxi=3*B_SZ*B_SZ; #if 0 /*Include coefficients for the current block*/ { int k; int l; for(k=0;k<=i;k++){ for(l=0;l<=j;l++){ xi[nxi++]=2*B_SZ*(B_SZ+k)+B_SZ+l; } } nxi--; } #endif yi=2*B_SZ*(B_SZ+i)+B_SZ+j; for(xii=0;xii<nxi;xii++)xty[xii]=r_xx[xi[xii]][yi]; beta=_ctx->beta[mode][B_SZ*i+j]; memset(beta,0,2*B_SZ*2*B_SZ*sizeof(*beta)); #if defined(OD_USE_SVD) { double xtx[2*2*B_SZ*2*B_SZ][2*B_SZ*2*B_SZ]; double *xtxp[2*2*B_SZ*2*B_SZ]; double s[2*B_SZ*2*B_SZ]; for(xii=0;xii<nxi;xii++){ for(xij=0;xij<nxi;xij++){ xtx[xii][xij]=r_xx[xi[xii]][xi[xij]]; } } for(xii=0;xii<2*nxi;xii++)xtxp[xii]=xtx[xii]; svd_pseudoinverse(xtxp,s,nxi,nxi); /*beta[yi]=r_x[yi];*/ for(xii=0;xii<nxi;xii++){ double beta_i; beta_i=0; for(xij=0;xij<nxi;xij++)beta_i+=xtx[xij][xii]*xty[xij]; beta[xi[xii]]=beta_i*scale[yi]/scale[xi[xii]]; /*beta[yi]-=beta_i*r_x[xi[xii]];*/ } } #else { double xtx[UT_SZ(2*B_SZ*2*B_SZ,2*B_SZ*2*B_SZ)]; double tau[2*B_SZ*2*B_SZ]; double work[2*B_SZ*2*B_SZ]; int pivot[2*B_SZ*2*B_SZ]; int rank; for(xii=0;xii<nxi;xii++){ for(xij=xii;xij<nxi;xij++){ xtx[UT_IDX(xii,xij,nxi)]=r_xx[xi[xii]][xi[xij]]; } } rank=cholesky(xtx,pivot,DBL_EPSILON,nxi); chdecomp(xtx,tau,rank,nxi); chsolve(xtx,pivot,tau,xty,xty,work,rank,nxi); for(xii=0;xii<nxi;xii++){ beta[xi[xii]]=xty[xii]*scale[yi]/scale[xi[xii]]; /*beta[yi]-=beta_i*r_x[xi[xii]];*/ } } #endif print_beta(mode,i,j,beta); } printf(" }%s\n",i<B_SZ-1?",":""); } printf(" }%s\n",mode<OD_INTRA_NMODES-1?",":""); } printf("};\n\n"); printf("const unsigned char OD_INTRA_PRED_PROB_%dx%d[3][OD_INTRA_NMODES][OD_INTRA_NCONTEXTS]={\n",B_SZ,B_SZ); for(pli=0;pli<3;pli++) { int i; printf("{"); for(i=0;i<OD_INTRA_NMODES;i++) { int j; printf("{"); for(j=0;j<NB_CONTEXTS;j++) printf("%d, ", (int)floor(.5+256.*_ctx->freq[pli][i][j][1]/(float)_ctx->freq[pli][i][j][0])); printf("},\n"); } printf("},\n"); } printf("};\n\n"); }
void chdecomp(double *_rr,double *_tau,int _r,int _n){ int k; int i; int j; /*See Section 4 of \cite{HL69} for a derivation for a general matrix. We ignore the orthogonal matrix Q on the left, since we're already upper trapezoidal (and it would cancel with its transpose in the product R^T.R). @ARTICLE{HL69, author="Richard J. Hanson and Charles L. Lawson", title="Extensions and Applications of the Householder Algorithm for Solving Linear Least Squares", journal="Mathematics of Computation", volume=23, number=108, pages="787--812", month=Oct, year=1969 }*/ for(k=_r;k-->0;){ double alpha; double beta; double s; double d2; /*Apply the Householder reflections from the previous rows.*/ for(i=_r;--i>k;){ s=_rr[UT_IDX(k,i,_n)]; for(j=_r;j<_n;j++)s+=_rr[UT_IDX(k,j,_n)]*_rr[UT_IDX(i,j,_n)]; s*=_tau[i]; /*Note the negative here: we add an extra scale by -1 to the i'th column so that the diagonal entry remains positive.*/ _rr[UT_IDX(k,i,_n)]=s-_rr[UT_IDX(k,i,_n)]; for(j=_r;j<_n;j++)_rr[UT_IDX(k,j,_n)]-=s*_rr[UT_IDX(i,j,_n)]; } /*Compute the reflection which zeros the right part of this row.*/ alpha=_rr[UT_IDX(k,k,_n)]; beta=alpha; for(j=_r;j<_n;j++){ if(fabs(_rr[UT_IDX(k,j,_n)])>beta)beta=fabs(_rr[UT_IDX(k,j,_n)]); } s=1/beta; d2=(alpha*s)*(alpha*s); for(j=_r;j<_n;j++)d2+=(_rr[UT_IDX(k,j,_n)]*s)*(_rr[UT_IDX(k,j,_n)]*s); beta*=sqrt(d2); _tau[k]=alpha/beta+1; s=1/(alpha+beta); _rr[UT_IDX(k,k,_n)]=beta; for(j=_r;j<_n;j++)_rr[UT_IDX(k,j,_n)]*=s; } }