Exemplo n.º 1
0
CAMLprim value ml_gsl_linalg_cholesky_solve(value CHO, value B, value X)
{
  _DECLARE_MATRIX(CHO);
  _DECLARE_VECTOR2(B, X);
  _CONVERT_MATRIX(CHO);
  _CONVERT_VECTOR2(B, X);
  gsl_linalg_cholesky_solve(&m_CHO, &v_B, &v_X);
  return Val_unit;
}
Exemplo n.º 2
0
// f = (1/2) x^T Ax + b^T x
void prox_quad(gsl_vector *x, const double rho, gsl_matrix *A, gsl_matrix *b) 
{
    gsl_matrix *I = gsl_matrix_alloc(A->size1);
    gsl_matrix_set_identity(I);
    gsl_matrix_scale(I, rho);
    gsl_matrix_add(I, A);

    gsl_vector_scale(x, rho);
    gsl_vector_scale(b, -1);
    gsl_vector_add(b, x);

    gsl_linalg_cholesky_decomp(I);
    gsl_linalg_cholesky_solve(I, b, x);

    gsl_matrix_free(I);
}
Exemplo n.º 3
0
int PoissonGlm::betaEst( unsigned int id, unsigned int iter, double *tol, double th)
{
   gsl_set_error_handler_off();
   int status, isValid;
  // unsigned int j, ngoodobs;
   unsigned int i, step, step1; 
   double wij, zij, eij, mij, yij; //, bij;   
   double dev_old, dev_grad=1.0;
   gsl_vector_view Xwi;
   gsl_matrix *WX, *XwX;
   gsl_vector *z, *Xwz;
   gsl_vector *coef_old = gsl_vector_alloc(nParams);
   gsl_vector_view bj=gsl_matrix_column (Beta, id);

   // Main Loop of IRLS begins
   z = gsl_vector_alloc(nRows);
   WX = gsl_matrix_alloc(nRows, nParams); 
   XwX = gsl_matrix_alloc(nParams, nParams);
   Xwz = gsl_vector_alloc(nParams);
   step=0;
   *tol = 1.0;
   gsl_vector_memcpy (coef_old, &bj.vector);
   while ( step<iter ) {
       for (i=0; i<nRows; i++) { // (y-m)/g'
           yij = gsl_matrix_get(Yref, i, id);
           eij = gsl_matrix_get(Eta, i, id);
           mij = gsl_matrix_get(Mu, i, id);
        //   if (mij<mintol) mij=mintol;
        //   if (mij>maxtol) mij=maxtol;
           zij = eij + (yij-mij)*LinkDash(mij);
           if (Oref!=NULL) 
              zij = zij - gsl_matrix_get(Oref, i, id);
           // wt=sqrt(weifunc);
           wij = sqrt(weifunc(mij, th));
           // W^1/2*z[good]
           gsl_vector_set(z, i, wij*zij); 
           // W^1/2*X[good]
           Xwi = gsl_matrix_row (Xref, i);
           gsl_matrix_set_row (WX, i, &Xwi.vector);
           Xwi = gsl_matrix_row (WX, i);
           gsl_vector_scale(&Xwi.vector, wij); 
       }
       // in glm2, solve WXb=Wz, David suggested not good 
       // So back to solving X'WXb=X'Wz
       gsl_matrix_set_identity (XwX);
       gsl_blas_dsyrk (CblasLower,CblasTrans,1.0,WX,0.0,XwX); 
       status=gsl_linalg_cholesky_decomp(XwX);
       if (status==GSL_EDOM) {
          if (mmRef->warning==TRUE) {
             printf("Warning: singular matrix in betaEst: ");
             gsl_matrix_set_identity (XwX); 
             gsl_blas_dsyrk (CblasLower,CblasTrans,1.0,Xref,0.0,XwX);
          //  displaymatrix(Xref, "Xref");
	  // displaymatrix(XwX, "XX^T");
          //   printf("calc(XX')=%.8f\n", calcDet(XwX)); 
             status=gsl_linalg_cholesky_decomp(XwX);
             if (status==GSL_EDOM)  
                printf("X^TX is singular - check case resampling or input design matrix!\n");
             else {
                for (i=0; i<nRows; i++) {
                   mij = gsl_matrix_get(Mu, i, id);
                   wij = sqrt(weifunc(mij, th));
                   if (wij<mintol) printf("weight[%d, %d]=%.4f is too close to zero\n", i, id, wij);
                }
             } 
             printf("An eps*I is added to the singular matrix.\n");
          }
          gsl_matrix_set_identity (XwX);
          gsl_blas_dsyrk (CblasLower,CblasTrans,1.0,WX,mintol,XwX); 
          gsl_linalg_cholesky_decomp(XwX);
       }
       gsl_blas_dgemv(CblasTrans,1.0,WX,z,0.0,Xwz);
       gsl_linalg_cholesky_solve (XwX, Xwz, &bj.vector);

   // Debug for nan
/*   if (gsl_vector_get(&bj.vector, 1)!=gsl_vector_get(&bj.vector, 1)) {
       displayvector(&bj.vector, "bj");
       displayvector(z, "z");
       gsl_vector_view mj=gsl_matrix_column(Mu, id);
       displayvector(&mj.vector, "mj");
       printf("weight\n");
       for (i=0; i<nRows; i++){
           printf("%.4f ", sqrt(weifunc(mij, th)));
       }
       printf("\n");
       displaymatrix(XwX, "XwX");
       exit(-1);
   }   
*/
       // Given bj, update eta, mu
       dev_old = dev[id];
       isValid=predict(bj, id, th);
       dev_grad=(dev[id]-dev_old)/(ABS(dev[id])+0.1);
       *(tol)=ABS(dev_grad);  

       step1 = 0;
       // If divergent or increasing deviance, half step
       // (step>1) -> (step>0) gives weired results for NBin fit       
       // below works for boundary values, esp BIN fit but not NBin fit
       while ((dev_grad>eps)&(step>1)){
            gsl_vector_add (&bj.vector, coef_old);
            gsl_vector_scale (&bj.vector, 0.5);
       //     dev_old=dev[id];
            isValid=predict(bj, id, th);
            dev_grad=(dev[id]-dev_old)/(ABS(dev[id])+0.1); 
            *tol=ABS(dev_grad);
            if (*tol<eps) break;
            step1++;
            if (step1>10) {
            //   printf("\t Half step stopped at iter %d: gradient=%.8f\n", step1, dev_grad);
               break;
            }
       }
       if (isValid==TRUE) gsl_vector_memcpy (coef_old, &bj.vector);
      
       step++;
       if (*tol<eps) break;
   } 

   gsl_vector_free(z);
   gsl_matrix_free(WX); 
   gsl_matrix_free(XwX); 
   gsl_vector_free(Xwz); 
   gsl_vector_free(coef_old); 

   return step;
}
Exemplo n.º 4
0
 /**
  * C++ version of gsl_linalg_cholesky_solve().
  * @param cholesky A Cholesky decomposition matrix
  * @param b A vector
  * @param x A vector
  * @return Error code on failure
  */
 inline int cholesky_solve( matrix const& cholesky, vector const& b, vector& x ){
   return gsl_linalg_cholesky_solve( cholesky.get(), b.get(), x.get() ); } 
Exemplo n.º 5
0
// Wald Test used in both summary and anova (polymophism)
int GlmTest::GeeWald(glm *Alt, gsl_matrix *LL, gsl_vector *teststat)
{
    gsl_set_error_handler_off();

    unsigned int i, j, l;
    double alpha, result, sum=0;
    unsigned int nP = Alt->nParams;
    unsigned int nDF = LL->size1;
    unsigned int nVars=tm->nVars, nRows=tm->nRows;
    int status;

    gsl_vector *LBeta = gsl_vector_alloc(nVars*nDF);
    gsl_vector_set_zero(LBeta);
    gsl_matrix *w1jX1=gsl_matrix_alloc(nRows, nP);
    gsl_matrix *XwX=gsl_matrix_alloc(nP, nP);
    gsl_matrix *Rl2 = gsl_matrix_alloc(nDF, nP);
    gsl_matrix *IinvN = gsl_matrix_alloc(nDF, nDF);
    gsl_matrix *IinvRl = gsl_matrix_alloc(nVars*nDF, nVars*nDF);
    gsl_vector *tmp = gsl_vector_alloc(nVars*nDF);
    gsl_vector_view tmp2, wj, LBj, bj; //, dj; 
    gsl_matrix_view Rl;

    gsl_matrix_set_zero(IinvRl);
    GrpMat *Z = (GrpMat*)malloc(nVars*sizeof(GrpMat));
    for (j=0; j<nVars; j++){
       Z[j].matrix = gsl_matrix_alloc(nP, nRows);
       // w1jX1 = W^1/2 * X
       wj=gsl_matrix_column(Alt->wHalf, j);
       for (i=0; i<nP; i++)
           gsl_matrix_set_col (w1jX1, i, &wj.vector);
       gsl_matrix_mul_elements (w1jX1, Alt->Xref);

       // LBeta = L*Beta       
       LBj=gsl_vector_subvector(LBeta, j*nDF, nDF);
       bj=gsl_matrix_column(Alt->Beta, j);
       gsl_blas_dgemv(CblasNoTrans,1,LL,&bj.vector,0,&LBj.vector);

       // Z = (X^T W X)^-1 * X^T W^1/2. 
       gsl_matrix_set_identity(XwX);
       gsl_blas_dsyrk (CblasLower,CblasTrans,1.0,w1jX1,0.0,XwX);
       status=gsl_linalg_cholesky_decomp (XwX);
       if (status==GSL_EDOM) {
          if (tm->warning==TRUE) 
             printf("Warning:singular matrix in wald test. An eps*I is added to the singular matrix.\n");
          gsl_matrix_set_identity(XwX);
          gsl_blas_dsyrk(CblasLower,CblasTrans,1.0,w1jX1,eps,XwX);
          gsl_linalg_cholesky_decomp(XwX);
       }
       gsl_linalg_cholesky_invert(XwX);
       gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,XwX,w1jX1,0.0, Z[j].matrix);

       gsl_matrix_memcpy(Rl2, LL);
       gsl_blas_dtrmm (CblasRight,CblasLower,CblasNoTrans,CblasNonUnit,1.0,XwX,Rl2); // L*(X'WX)^-1
       gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, Rl2, LL, 0.0, IinvN); // L*(X^T*W*X)^-1*L^T 

       if ( (tm->punit!=NONE) || (tm->corr==IDENTITY) ) {
          status=gsl_linalg_cholesky_decomp (IinvN);
          if (status==GSL_EDOM) {
             if (tm->warning==TRUE) 
             printf("Warning:singular IinvN in wald test.\n");
          }
          tmp2=gsl_vector_subvector(tmp, 0, nDF);
          gsl_linalg_cholesky_solve (IinvN, &LBj.vector, &tmp2.vector);
          gsl_blas_ddot (&LBj.vector, &tmp2.vector, &result);
          gsl_vector_set(teststat, j+1, sqrt(result));
          sum = sum + result;
       }

       if (tm->corr!=IDENTITY) {
          // IinvRl=L*vSandRl*L^T 
          for (l=0; l<=j; l++) {
              Rl=gsl_matrix_submatrix(IinvRl,j*nDF,l*nDF,nDF,nDF);
              alpha = gsl_matrix_get(Rlambda, j, l);
              // borrow XwX space to store vSandRl
              gsl_blas_dgemm(CblasNoTrans,CblasTrans,alpha,Z[j].matrix,Z[l].matrix, 0.0, XwX); 
              // Rl2 = L*vSandRl*L^T
              gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,LL,XwX,0.0,Rl2);
              gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,Rl2,LL,0.0,&Rl.matrix);
          } // end l
       }  // end if (tm->corr) 

    } // end for j=1:nVars       

    if ( tm->corr==IDENTITY ) 
        gsl_vector_set(teststat, 0, sqrt(sum));
    else {
        status=gsl_linalg_cholesky_decomp (IinvRl);
        if (status==GSL_EDOM) {
           if (tm->warning==TRUE) 
           printf("Warning:singular matrix in multivariate wald test.\n");
        }
        gsl_linalg_cholesky_solve (IinvRl, LBeta, tmp);
        gsl_blas_ddot (LBeta, tmp, &result);
        gsl_vector_set(teststat, 0, sqrt(result));
    }

    // free memory
    for (j=0; j<nVars; j++) 
        gsl_matrix_free(Z[j].matrix);
    free(Z);
    gsl_vector_free(LBeta);
    gsl_matrix_free(w1jX1);
    gsl_matrix_free(XwX);
    gsl_matrix_free(Rl2);
    gsl_matrix_free(IinvN);
    gsl_matrix_free(IinvRl);
    gsl_vector_free(tmp);

    return SUCCESS;
}
Exemplo n.º 6
0
int GlmTest::GeeScore(gsl_matrix *X1, glm *PtrNull, gsl_vector *teststat)
{
    gsl_set_error_handler_off();

    double result, alpha, sum=0;
    unsigned int i, j, l, nP = X1->size2;
    unsigned int nVars=tm->nVars, nRows=tm->nRows;
    int status;

    gsl_vector *U = gsl_vector_alloc(nVars*nP);
    gsl_matrix *kRlNull = gsl_matrix_alloc(nVars*nP, nVars*nP);
    gsl_matrix_set_zero (kRlNull);
    gsl_matrix *XwX = gsl_matrix_alloc(nP, nP);
    gsl_vector *tmp=gsl_vector_alloc(nVars*nP);
    gsl_vector_view wj, uj, rj, tmp2; //, dj;
    gsl_matrix_view Rl;

    GrpMat *Z = (GrpMat*)malloc(nVars*sizeof(GrpMat));
    for (j=0; j<nVars; j++) {
        Z[j].matrix = gsl_matrix_alloc(nRows, nP);
        // get W^1/2 * X
        wj = gsl_matrix_column (PtrNull->wHalf, j);
        for (i=0; i<nP; i++)
            gsl_matrix_set_col (Z[j].matrix, i, &wj.vector);
        gsl_matrix_mul_elements (Z[j].matrix, X1);

        uj=gsl_vector_subvector(U, j*nP, nP);
        rj=gsl_matrix_column(PtrNull->Res, j);
        gsl_blas_dgemv(CblasTrans, 1, Z[j].matrix, &rj.vector, 0, &uj.vector);

        if ( (tm->punit!=NONE) || (tm->corr==IDENTITY) ) {
           gsl_matrix_set_identity(XwX);
           gsl_blas_dsyrk(CblasLower, CblasTrans, 1, Z[j].matrix, 0, XwX);
           status=gsl_linalg_cholesky_decomp(XwX); 
           if (status==GSL_EDOM) {
              if (tm->warning==TRUE)
                  printf("Warning: singular matrix in score test. An eps*I is added to the singular matrix.\n");
              gsl_matrix_set_identity(XwX);
              gsl_blas_dsyrk(CblasLower,CblasTrans,1,Z[j].matrix,eps,XwX);
              gsl_linalg_cholesky_decomp(XwX); 
           }
           tmp2=gsl_vector_subvector(tmp, 0, nP);
           gsl_linalg_cholesky_solve(XwX, &uj.vector, &tmp2.vector);
           gsl_blas_ddot(&uj.vector, &tmp2.vector, &result);
           gsl_vector_set(teststat, j+1, result);
           sum = sum+result;           
        }

        if ( tm->corr!=IDENTITY) {
            for (l=0; l<=j; l++) { // lower half
                alpha = gsl_matrix_get(Rlambda, j, l);
                Rl=gsl_matrix_submatrix(kRlNull,j*nP,l*nP,nP,nP);
                gsl_blas_dgemm(CblasTrans, CblasNoTrans, alpha, Z[j].matrix, Z[l].matrix, 0, &Rl.matrix);
            }
        }
    } // end for j=1:nVars

    // multivariate test stat   
    if ( tm->corr==IDENTITY ) gsl_vector_set(teststat, 0, sum);
    else {    
        status=gsl_linalg_cholesky_decomp (kRlNull);
        if (status==GSL_EDOM) {
           if (tm->warning==TRUE) 
              printf("Warning:singular kRlNull in multivariate score test.\n");
        }
        gsl_linalg_cholesky_solve (kRlNull, U, tmp);
        gsl_blas_ddot (U, tmp, &result);
        gsl_vector_set(teststat, 0, result);
    }

   // clear memory
    gsl_vector_free(U);
    gsl_vector_free(tmp);
    gsl_matrix_free(XwX);
    gsl_matrix_free(kRlNull);
    for (j=0; j<nVars; j++) gsl_matrix_free(Z[j].matrix);
    free(Z);

    return SUCCESS;

}
Exemplo n.º 7
0
/** From a vector-field dataset, compute the vector-valued weighting factors,
 *  \f$\vec{c}_j\f$. Info is returned in the rbf structure.
 *
 *
 *  \param[in]                       v   -   pointer to an array of position vectors.
 *  \param[in]                       B   -   pointer to array of corresponding field vectors.
 *  \param[in]                       n   -   number of (v, B) pairs defined.
 *  \param[in]                     eps   -   smoothing factor in scalar RBF.
 *  \param[in]      RadialBasisFunction  -   RBF to use. Can be LGM_RBF_GAUSSIAN, LGM_RBF_MULTIQUADRIC
 *
 *  \return  pointer to structure containing info for RBF interpolation. User
 *           is responsible for freeing with Lgm_DFI_RBF_Free().
 *
 *  \author  M. G. Henderson
 *  date    January 24, 2012
 *
 *
 */
Lgm_DFI_RBF_Info *Lgm_DFI_RBF_Init( unsigned long int *I_data, Lgm_Vector *v, Lgm_Vector *B, int n, double eps, int RadialBasisFunction ) {

    int              i, j, ii, jj, p, q, n3, s;
    double           *d, **a, Phi[3][3], val;
    gsl_matrix       *A, *V;
    gsl_vector       *D, *c, *S, *Work;
    Lgm_DFI_RBF_Info *rbf;

    n3 = 3*n;
    A = gsl_matrix_calloc( n3, n3 );
    c = gsl_vector_alloc( n3 );
    D = gsl_vector_calloc( n3 );


    /*
     * Save info needed to do an evaluation.
     */
    rbf = ( Lgm_DFI_RBF_Info *)calloc( 1, sizeof(*rbf) );
    rbf->RadialBasisFunction = RadialBasisFunction;
    rbf->eps = eps;
    rbf->n   = n;
    rbf->n3  = n3;
    LGM_ARRAY_1D( rbf->LookUpKey, n, unsigned long int);
    LGM_ARRAY_1D( rbf->v, n, Lgm_Vector);
    LGM_ARRAY_1D( rbf->c, n, Lgm_Vector);
    for ( i=0; i<n; i++ ) {
        rbf->LookUpKey[i] = I_data[i];
        rbf->v[i] = v[i];
    }
    // This subtraction doesntm seem to work out very well...
//    rbf->Bx0 = B[0].x;
//    rbf->By0 = B[0].y;
//    rbf->Bz0 = B[0].z;

double Bbkg;
for ( Bbkg = 0.0, i=0; i<n; i++ ) Bbkg += B[i].x; rbf->Bx0 = Bbkg/(double)n;
for ( Bbkg = 0.0, i=0; i<n; i++ ) Bbkg += B[i].y; rbf->By0 = Bbkg/(double)n;
for ( Bbkg = 0.0, i=0; i<n; i++ ) Bbkg += B[i].z; rbf->Bz0 = Bbkg/(double)n;
    rbf->Bx0 = 0.0;
    rbf->By0 = 0.0;
    rbf->Bz0 = 0.0;
    
    /*
     * Fill d array. (Subtract off the field at the nearest point v[0] -- See
     * McNally [2011].) We add this field back on later.
     */
    for (i=0; i<n; i++){
        gsl_vector_set( D, 3*i+0, B[i].x - rbf->Bx0 );
        gsl_vector_set( D, 3*i+1, B[i].y - rbf->By0 );
        gsl_vector_set( D, 3*i+2, B[i].z - rbf->Bz0 );
    }


    /*
     *                                             [  row0  ]
     * Fill A matrix. In C, order is A[row][col] = [  row1  ]
     *                                             [  row2  ]
     */
    for ( i=0; i<n; i++ ) { // locate start row for subarray
        ii = 3*i;

        for ( j=0; j<n; j++ ) { // locate start column for subarray
            jj = 3*j;

            // Get Phi( v_i - v_j )
            Lgm_DFI_RBF_Phi( &v[i], &v[j], Phi, rbf );

            for ( p=0; p<3; p++ ){ // subarray row
                for ( q=0; q<3; q++ ){  // subarray column
                    gsl_matrix_set( A, ii+p, jj+q, Phi[p][q] );
                }
            }


        }

    }

    /*
    for (i=0; i<n; i++ ) {
        printf("v%02d = %8g %8g %8g   B%02d = %8g %8g %8g\n", i, v[i].x, v[i].y, v[i].z, i, B[i].x, B[i].y, B[i].z );
    }
    for (i=0; i<n3; i++){
        for (j=0; j<n3; j++){
            printf("%8g ", gsl_matrix_get(A, i, j ) );
        }
        printf("\n");
    }
    */




    /*
     * Now we need to solve the system of equation;
     *
     *      d = ac
     *
     *  for c.
     *
     *  First create gsl_vector and gsl_matrix views of the d and A arrays.
     *  Then compute Cholesky decomposition of the a array. Then solve the
     *  system to get c.
     *
     */
    if ( LGM_DFI_RBF_SOLVER == LGM_CHOLESKY_DECOMP ){
        gsl_linalg_cholesky_decomp( A );
        gsl_linalg_cholesky_solve( A, D, c );
    } else if ( LGM_DFI_RBF_SOLVER == LGM_PLU_DECOMP ){
        gsl_permutation *P = gsl_permutation_alloc( n3 );
        gsl_linalg_LU_decomp( A, P, &s );
        gsl_linalg_LU_solve( A, P, D, c );
        gsl_permutation_free( P );
    } else if ( LGM_DFI_RBF_SOLVER == LGM_SVD ){
        V    = gsl_matrix_calloc( n3, n3 );
        S    = gsl_vector_alloc( n3 );
        Work = gsl_vector_alloc( n3 );
        gsl_linalg_SV_decomp( A, V, S, Work );
        gsl_linalg_SV_solve( A, V, S, D, c );
        gsl_vector_free( Work );
        gsl_vector_free( S );
        gsl_matrix_free( V );
    }

    for (i=0; i<n; i++){
        rbf->c[i].x = gsl_vector_get( c, 3*i+0 );
        rbf->c[i].y = gsl_vector_get( c, 3*i+1 );
        rbf->c[i].z = gsl_vector_get( c, 3*i+2 );
    }


    
    gsl_vector_free( D );
    gsl_vector_free( c );
    gsl_matrix_free( A );

    return( rbf );

}
Exemplo n.º 8
0
int main(int argc, char **argv) {
	const int MAX_ITER  = 50;
	const double RELTOL = 1e-2;
	const double ABSTOL = 1e-4;

	/*
	 * Some bookkeeping variables for MPI. The 'rank' of a process is its numeric id
	 * in the process pool. For example, if we run a program via `mpirun -np 4 foo', then
	 * the process ranks are 0 through 3. Here, N and size are the total number of processes 
	 * running (in this example, 4).
	 */

	int rank;
	int size;

	MPI_Init(&argc, &argv);               // Initialize the MPI execution environment
	MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Determine current running process
	MPI_Comm_size(MPI_COMM_WORLD, &size); // Total number of processes
	double N = (double) size;             // Number of subsystems/slaves for ADMM

	/* Read in local data */

	int skinny;           // A flag indicating whether the matrix A is fat or skinny
	FILE *f;
	int m, n;
	int row, col;
	double entry;

	/*
	 * Subsystem n will look for files called An.dat and bn.dat 
	 * in the current directory; these are its local data and do not need to be
	 * visible to any other processes. Note that
	 * m and n here refer to the dimensions of the *local* coefficient matrix.
	 */

	/* Read A */
	char s[20];
	sprintf(s, "data/A%d.dat", rank + 1);
	printf("[%d] reading %s\n", rank, s);

	f = fopen(s, "r");
	if (f == NULL) {
		printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s);
		exit(EXIT_FAILURE);
	}
	mm_read_mtx_array_size(f, &m, &n);	
	gsl_matrix *A = gsl_matrix_calloc(m, n);
	for (int i = 0; i < m*n; i++) {
		row = i % m;
		col = floor(i/m);
		fscanf(f, "%lf", &entry);
		gsl_matrix_set(A, row, col, entry);
	}
	fclose(f);

	/* Read b */
	sprintf(s, "data/b%d.dat", rank + 1);
	printf("[%d] reading %s\n", rank, s);

	f = fopen(s, "r");
	if (f == NULL) {
		printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s);
		exit(EXIT_FAILURE);
	}
	mm_read_mtx_array_size(f, &m, &n);
	gsl_vector *b = gsl_vector_calloc(m);
	for (int i = 0; i < m; i++) {
		fscanf(f, "%lf", &entry);
		gsl_vector_set(b, i, entry);
	}
	fclose(f);

	m = A->size1;
	n = A->size2;
	skinny = (m >= n);

	/*
	 * These are all variables related to ADMM itself. There are many
	 * more variables than in the Matlab implementation because we also
	 * require vectors and matrices to store various intermediate results.
	 * The naming scheme follows the Matlab version of this solver.
	 */ 

	double rho = 1.0;

	gsl_vector *x      = gsl_vector_calloc(n);
	gsl_vector *u      = gsl_vector_calloc(n);
	gsl_vector *z      = gsl_vector_calloc(n);
	gsl_vector *y      = gsl_vector_calloc(n);
	gsl_vector *r      = gsl_vector_calloc(n);
	gsl_vector *zprev  = gsl_vector_calloc(n);
	gsl_vector *zdiff  = gsl_vector_calloc(n);

	gsl_vector *q      = gsl_vector_calloc(n);
	gsl_vector *w      = gsl_vector_calloc(n);
	gsl_vector *Aq     = gsl_vector_calloc(m);
	gsl_vector *p      = gsl_vector_calloc(m);

	gsl_vector *Atb    = gsl_vector_calloc(n);

	double send[3]; // an array used to aggregate 3 scalars at once
	double recv[3]; // used to receive the results of these aggregations

	double nxstack  = 0;
	double nystack  = 0;
	double prires   = 0;
	double dualres  = 0;
	double eps_pri  = 0;
	double eps_dual = 0;

	/* Precompute and cache factorizations */

	gsl_blas_dgemv(CblasTrans, 1, A, b, 0, Atb); // Atb = A^T b

	/*
	 * The lasso regularization parameter here is just hardcoded
	 * to 0.5 for simplicity. Using the lambda_max heuristic would require 
	 * network communication, since it requires looking at the *global* A^T b.
	 */

	double lambda = 0.5;
	if (rank == 0) {
		printf("using lambda: %.4f\n", lambda);
	}

	gsl_matrix *L;

	/* Use the matrix inversion lemma for efficiency; see section 4.2 of the paper */
	if (skinny) {
		/* L = chol(AtA + rho*I) */
		L = gsl_matrix_calloc(n,n);

		gsl_matrix *AtA = gsl_matrix_calloc(n,n);
		gsl_blas_dsyrk(CblasLower, CblasTrans, 1, A, 0, AtA);

		gsl_matrix *rhoI = gsl_matrix_calloc(n,n);
		gsl_matrix_set_identity(rhoI);
		gsl_matrix_scale(rhoI, rho);

		gsl_matrix_memcpy(L, AtA);
		gsl_matrix_add(L, rhoI);
		gsl_linalg_cholesky_decomp(L);

		gsl_matrix_free(AtA);
		gsl_matrix_free(rhoI);
	} else {
		/* L = chol(I + 1/rho*AAt) */
		L = gsl_matrix_calloc(m,m);

		gsl_matrix *AAt = gsl_matrix_calloc(m,m);
		gsl_blas_dsyrk(CblasLower, CblasNoTrans, 1, A, 0, AAt);
		gsl_matrix_scale(AAt, 1/rho);

		gsl_matrix *eye = gsl_matrix_calloc(m,m);
		gsl_matrix_set_identity(eye);

		gsl_matrix_memcpy(L, AAt);
		gsl_matrix_add(L, eye);
		gsl_linalg_cholesky_decomp(L);

		gsl_matrix_free(AAt);
		gsl_matrix_free(eye);
	}

	/* Main ADMM solver loop */

	int iter = 0;
	if (rank == 0) {
		printf("%3s %10s %10s %10s %10s %10s\n", "#", "r norm", "eps_pri", "s norm", "eps_dual", "objective");		
	}
    double startAllTime, endAllTime;
	startAllTime = MPI_Wtime();
	while (iter < MAX_ITER) {

        /* u-update: u = u + x - z */
		gsl_vector_sub(x, z);
		gsl_vector_add(u, x);

		/* x-update: x = (A^T A + rho I) \ (A^T b + rho z - y) */
		gsl_vector_memcpy(q, z);
		gsl_vector_sub(q, u);
		gsl_vector_scale(q, rho);
		gsl_vector_add(q, Atb);   // q = A^T b + rho*(z - u)


        double tmp, tmpq;
		gsl_blas_ddot(x, x, &tmp);
		gsl_blas_ddot(q, q, &tmpq);

		if (skinny) {
			/* x = U \ (L \ q) */
			gsl_linalg_cholesky_solve(L, q, x);
		} else {
			/* x = q/rho - 1/rho^2 * A^T * (U \ (L \ (A*q))) */
			gsl_blas_dgemv(CblasNoTrans, 1, A, q, 0, Aq);
			gsl_linalg_cholesky_solve(L, Aq, p);
			gsl_blas_dgemv(CblasTrans, 1, A, p, 0, x); /* now x = A^T * (U \ (L \ (A*q)) */
			gsl_vector_scale(x, -1/(rho*rho));
			gsl_vector_scale(q, 1/rho);
			gsl_vector_add(x, q);
		}

		/*
		 * Message-passing: compute the global sum over all processors of the
		 * contents of w and t. Also, update z.
		 */

		gsl_vector_memcpy(w, x);
		gsl_vector_add(w, u);   // w = x + u

		gsl_blas_ddot(r, r, &send[0]); 
		gsl_blas_ddot(x, x, &send[1]);
		gsl_blas_ddot(u, u, &send[2]);
		send[2] /= pow(rho, 2);

		gsl_vector_memcpy(zprev, z);

		// could be reduced to a single Allreduce call by concatenating send to w
		MPI_Allreduce(w->data, z->data,  n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		MPI_Allreduce(send,    recv,     3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

		prires  = sqrt(recv[0]);  /* sqrt(sum ||r_i||_2^2) */
		nxstack = sqrt(recv[1]);  /* sqrt(sum ||x_i||_2^2) */
		nystack = sqrt(recv[2]);  /* sqrt(sum ||y_i||_2^2) */

		gsl_vector_scale(z, 1/N);
		soft_threshold(z, lambda/(N*rho));

		/* Termination checks */

		/* dual residual */
		gsl_vector_memcpy(zdiff, z);
		gsl_vector_sub(zdiff, zprev);
		dualres = sqrt(N) * rho * gsl_blas_dnrm2(zdiff); /* ||s^k||_2^2 = N rho^2 ||z - zprev||_2^2 */

		/* compute primal and dual feasibility tolerances */
		eps_pri  = sqrt(n*N)*ABSTOL + RELTOL * fmax(nxstack, sqrt(N)*gsl_blas_dnrm2(z));
		eps_dual = sqrt(n*N)*ABSTOL + RELTOL * nystack;

		if (rank == 0) {
			printf("%3d %10.4f %10.4f %10.4f %10.4f %10.4f\n", iter, 
					prires, eps_pri, dualres, eps_dual, objective(A, b, lambda, z));
		}

		if (prires <= eps_pri && dualres <= eps_dual) {
			break;
		}

		/* Compute residual: r = x - z */
		gsl_vector_memcpy(r, x);
		gsl_vector_sub(r, z);

		iter++;
	}

	/* Have the master write out the results to disk */
	if (rank == 0) { 
        endAllTime = MPI_Wtime();
        printf("Elapsed time is: %lf \n", endAllTime - startAllTime);

		f = fopen("data/solution.dat", "w");
		gsl_vector_fprintf(f, z, "%lf");
		fclose(f);
	}

	MPI_Finalize(); /* Shut down the MPI execution environment */

	/* Clear memory */
	gsl_matrix_free(A);
	gsl_matrix_free(L);
	gsl_vector_free(b);
	gsl_vector_free(x);
	gsl_vector_free(u);
	gsl_vector_free(z);
	gsl_vector_free(y);
	gsl_vector_free(r);
	gsl_vector_free(w);
	gsl_vector_free(zprev);
	gsl_vector_free(zdiff);
	gsl_vector_free(q);
	gsl_vector_free(Aq);
	gsl_vector_free(Atb);
	gsl_vector_free(p);

	return EXIT_SUCCESS;
}
Exemplo n.º 9
0
static void smf1_fit_qui_job( void *job_data, int *status ) {
/*
*  Name:
*     smf1_fit_qui_job

*  Purpose:
*     Calculate I, Q and U for a block of bolometers.

*  Invocation:
*     void smf1_fit_qui_job( void *job_data, int *status )

*  Arguments:
*     job_data = void * (Given)
*        Pointer to the data needed by the job. Should be a pointer to a
*        smfFitQUIJobData structure.
*     status = int * (Given and Returned)
*        Pointer to global status.

*  Description:
*     This routine calculate the I, Q and U values for each bolometer in
*     a block of bolometers. It runs within a thread instigated by
*     smf_fit_qui.

*/

/* Local Variables: */
   AstFrameSet *wcs;          /* WCS FrameSet for current time slice */
   AstMapping *g2s;           /* GRID to SKY mapping */
   AstMapping *s2f;           /* SKY to focal plane mapping */
   const JCMTState *allstates;/* Pointer to array of JCMTState structures */
   const JCMTState *state;    /* JCMTState info for current time slice */
   dim_t b1;                  /* First bolometer index */
   dim_t b2;                  /* Last bolometer index */
   dim_t box_size;            /* NFirst time slice in box */
   dim_t ibolo;               /* Bolometer index */
   dim_t ibox;
   dim_t ncol;
   dim_t nbolo;               /* Total number of bolometers */
   double *dat;               /* Pointer to start of input data values */
   double *din;               /* Pointer to input data array for bolo/time */
   double *ipi;               /* Pointer to output I array */
   double *ipq;               /* Pointer to output Q array */
   double *ipu;               /* Pointer to output U array */
   double *ipv;               /* Pointer to output weights array */
   double *pm;
   double *ps;
   double angle;              /* Phase angle for FFT */
   double angrot;             /* Angle from focal plane X axis to fixed analyser */
   double c1;
   double c2;
   double c4;
   double c8;
   double cosval;             /* Cos of angrot */
   double fit;
   double fx[2];              /* Focal plane X coord at bolometer and northern point*/
   double fy[2];              /* Focal plane Y coord at bolometer and northern point*/
   double gx;                 /* GRID X coord at bolometer */
   double gy;                 /* GRID Y coord at bolometer */
   double matrix[ NPAR*NPAR ];
   double paoff;              /* WPLATE value corresponding to POL_ANG=0.0 */
   double phi;                /* Angle from fixed analyser to effective analyser */
   double res;
   double s1;                 /* Sum of weighted cosine terms */
   double s2;                 /* Sum of weighted sine terms */
   double s4;
   double s8;
   double sinval;             /* Sin of angrot */
   double solution[ NPAR ];
   double sum1;               /* Sum of squared residuals */
   double sums[NSUM];         /* Sum of bolometer values */
   double sx[2];              /* SKY X coord at bolometer and northern point*/
   double sy[2];              /* SKY Y coord at bolometer and northern point*/
   double tr_angle;
   double twophi;
   double vector[ NPAR ];
   double wplate;             /* Angle from fixed analyser to have-wave plate */
   gsl_matrix_view gsl_m;
   gsl_vector_view gsl_b;
   gsl_vector_view gsl_x;
   int ipolcrd;               /* Reference direction for pol_ang */
   int nsum1;
   int pasign;                /* +1 or -1 indicating sense of POL_ANG value */
   smfFitQUIJobData *pdata;   /* Pointer to job data */
   smf_qual_t *qin;           /* Pointer to input quality array for bolo/time */
   smf_qual_t *qua;           /* Pointer to start of input quality values */

/* Check inherited status */
   if( *status != SAI__OK ) return;

/* Begin an AST context */
   astBegin;

/* Create views of the matrix and vector buffers that can be used by GSL. */
   gsl_m = gsl_matrix_view_array( matrix, NPAR, NPAR );
   gsl_b = gsl_vector_view_array( vector, NPAR );
   gsl_x = gsl_vector_view_array( solution, NPAR );

/* Get a pointer to the job data, and then extract its contents into a
   set of local variables. */
   pdata = (smfFitQUIJobData *) job_data;

   b1 = pdata->b1;
   b2 = pdata->b2;
   nbolo = pdata->nbolo;
   ncol = pdata->ncol;

   dat = pdata->dat + b1;
   qua = pdata->qua + b1;
   allstates = pdata->allstates;

   ipi = pdata->ipi ? pdata->ipi + b1 : NULL;
   ipq = pdata->ipq + b1;
   ipu = pdata->ipu + b1;
   ipv = pdata->ipv + b1;

   ipolcrd = pdata->ipolcrd;
   pasign = pdata->pasign;
   paoff = pdata->paoff;
   angrot = pdata->angrot;
   box_size = pdata->box_size;

   wcs = pdata->wcs;
   if( wcs ) {
      astLock( wcs, 0 );

/* Get the mapping from GRID to SKY. */
      g2s = astSimplify( astGetMapping( wcs, AST__BASE, AST__CURRENT ));

/* Get the mapping from SKY to focal plane (x,y) (the index of the FPLANE
   Frame is fixed at 3 by file sc2ast.c). */
      s2f = astSimplify( astGetMapping( wcs, AST__CURRENT, 3 ) );

   } else{
      g2s = s2f = NULL;
   }

/* Check we have something to do. */
   if( b1 < nbolo && *status == SAI__OK ) {

/* Loop round all bolometers to be processed by this thread. */
      for( ibolo = b1; ibolo <= b2; ibolo++,qua++,dat++ ) {

/* If the returned Stokes parameters are to be with respect to Tracking
   North, get the angle from tracking north at the current bolometer to
   focal plane Y, measured positive in the sense of rotation from focal
   plane Y to focal plane X (note this angle may change across the focal
   plane due to focal plane distortion). Otherwise, use zero. */
         if( pdata->wcs ) {

/* Get the grid coords of the current bolometer, and transform them to SKY
   coordinates using the FrameSet. */
            gx = ibolo % ncol + 1;
            gy = ibolo / ncol + 1;
            astTran2( g2s, 1, &gx, &gy, 1, sx, sy );

/* Increment the sky position slightly to the north. */
            sx[ 1 ] = sx[ 0 ];
            sy[ 1 ] = sy[ 0 ] + 1.0E-6;

/* Transform both sky positions into focal plane coords. */
            astTran2( s2f, 2, sx, sy, 1, fx, fy );

/* Get the angle from north to focal plane Y, measured positive in the
   sense of rotation from focal plane Y to focal plane X. */
            if( fx[0] != VAL__BADD && fy[0] != VAL__BADD &&
                fx[1] != VAL__BADD && fy[1] != VAL__BADD ) {
               tr_angle = atan2( fx[0] - fx[1], fy[1] - fy[0] );
            } else {
               tr_angle = VAL__BADD;
            }

         } else {
            tr_angle = 0.0;
         }

/* If the whole bolometer is bad, put bad values into the outputs. */
         if( *qua & SMF__Q_BADB || tr_angle == VAL__BADD ) {
            if( ipi ) *(ipi++) = VAL__BADD;
            *(ipq++) = VAL__BADD;
            *(ipu++) = VAL__BADD;
            *(ipv++) = VAL__BADD;

/* If the bolometer is good, calculate and store the output i, q and u
   values. */
         } else {

/* Initialise pointers to the first input data value, quality value and
   state info to be used in the current fitting box. */
            din = dat;
            qin = qua;
            state = allstates;

/* Form the sums needed to calculate the best fit Q, U and I. This
   involves looping over all input samples that fall within the fitting box
   centred on the current output sample. The 44 sums are stored in the
   "sums" array. Initialise it to hold zeros.  */
            memset( sums, 0, NSUM*sizeof(*sums) );
            for( ibox = 0; ibox <  box_size; ibox++,state++ ) {

/* Get the POL_ANG value for this time slice. */
               angle = state->pol_ang;

/* Check the input sample has not been flagged during cleaning and is
   not bad. */
               if( !( *qin & SMF__Q_FIT ) && *din != VAL__BADD &&
                   angle != VAL__BADD ) {

/* Following SUN/223 (section "Single-beam polarimetry"/"The Polarimeter"),
   get the angle from the fixed analyser to the half-waveplate axis, in radians.
   Positive rotation is from focal plane axis 1 (x) to focal plane axis 2 (y).

   Not sure about the sign of tcs_az/tr_ang at the moment so do not use them
   yet. */
                  wplate = 0.0;
                  if( ipolcrd == 0 ) {
                     wplate = pasign*angle + paoff;

                  } else if( *status == SAI__OK ) {
                     *status = SAI__ERROR;
                     errRepf( "", "smf_fit_qui: currently only POL_CRD = "
                              "FPLANE is supported.", status );
                  }

/*
                     if( ipolcrd == 1 ) {
                        wplate += state->tcs_az_ang;
                     } else if( ipolcrd == 2 ) {
                        wplate += state->tcs_tr_ang;
                     }
*/

/* Get the angle from the fixed analyser to the effective analyser
   position (see SUN/223 again). The effective analyser angle rotates twice
   as fast as the half-wave plate which is why there is a factor of 2 here. */
                  phi = 2*wplate;
                  twophi = 2*phi;

/* Form the trig values needed for the sums. */
                  s8 = sin( 2*twophi );
                  c8 = cos( 2*twophi );
                  s4 = sin( twophi );
                  c4 = cos( twophi );
                  s2 = sin( phi );
                  c2 = cos( phi );
                  s1 = sin( wplate );
                  c1 = cos( wplate );

/* Update the sums. The order of the following lines define the index
   within "sums" at which each sum is stored. */
                  ps = sums;
                  *(ps++) += s4*s4;
                  *(ps++) += s4*c4;
                  *(ps++) += s4*s2;
                  *(ps++) += s4*c2;
                  *(ps++) += s4*s1;
                  *(ps++) += s4*c1;
                  *(ps++) += s4*ibox;
                  *(ps++) += s4;
                  *(ps++) += s4*(*din);

                  *(ps++) += s2*c4;
                  *(ps++) += s2*s2;
                  *(ps++) += s2*c2;
                  *(ps++) += s2*s1;
                  *(ps++) += s2*c1;
                  *(ps++) += s2*ibox;
                  *(ps++) += s2;
                  *(ps++) += s2*(*din);

                  *(ps++) += s1*c4;
                  *(ps++) += s1*c2;
                  *(ps++) += s1*s1;
                  *(ps++) += s1*c1;
                  *(ps++) += s1*ibox;
                  *(ps++) += s1;
                  *(ps++) += s1*(*din);

                  *(ps++) += c4*c4;
                  *(ps++) += c4*c2;
                  *(ps++) += c4*c1;
                  *(ps++) += c4*ibox;
                  *(ps++) += c4;
                  *(ps++) += c4*(*din);

                  *(ps++) += c2*c2;
                  *(ps++) += c2*c1;
                  *(ps++) += c2*ibox;
                  *(ps++) += c2;
                  *(ps++) += c2*(*din);

                  *(ps++) += c1*c1;
                  *(ps++) += c1*ibox;
                  *(ps++) += c1;
                  *(ps++) += c1*(*din);

                  *(ps++) += ibox*ibox;
                  *(ps++) += ibox;
                  *(ps++) += ibox*(*din);

                  *(ps++) += 1.0;
                  *(ps++) += *din;

                  *(ps++) += s4*s8;
                  *(ps++) += s4*c8;

                  *(ps++) += s2*s8;
                  *(ps++) += s2*c8;

                  *(ps++) += s1*s8;
                  *(ps++) += s1*c8;

                  *(ps++) += s8*c4;
                  *(ps++) += s8*c2;
                  *(ps++) += s8*c1;
                  *(ps++) += s8*ibox;
                  *(ps++) += s8;
                  *(ps++) += s8*(*din);
                  *(ps++) += s8*s8;
                  *(ps++) += s8*c8;

                  *(ps++) += c4*c8;

                  *(ps++) += c2*c8;

                  *(ps++) += c1*c8;

                  *(ps++) += c8*ibox;
                  *(ps++) += c8;
                  *(ps++) += c8*(*din);
                  *(ps++) += c8*c8;
               }

               din += nbolo;
               qin += nbolo;

            }

/* Now find the parameters of the best fit. First check that there were
   sufficient good samples in the fitting box. */
            if( sums[42] > 0.8*box_size ) {

/* Copy the sums to the correct elements of the 10x10 matrix. */
               pm = matrix;
               *(pm++) = sums[ 0 ];
               *(pm++) = sums[ 1 ];
               *(pm++) = sums[ 2 ];
               *(pm++) = sums[ 3 ];
               *(pm++) = sums[ 4 ];
               *(pm++) = sums[ 5 ];
               *(pm++) = sums[ 6 ];
               *(pm++) = sums[ 7 ];
               *(pm++) = sums[ 44 ];
               *(pm++) = sums[ 45 ];


               *(pm++) = sums[ 1 ];
               *(pm++) = sums[ 24 ];
               *(pm++) = sums[ 9 ];
               *(pm++) = sums[ 25 ];
               *(pm++) = sums[ 17 ];
               *(pm++) = sums[ 26 ];
               *(pm++) = sums[ 27 ];
               *(pm++) = sums[ 28 ];
               *(pm++) = sums[ 50 ];
               *(pm++) = sums[ 58 ];

               *(pm++) = sums[ 2 ];
               *(pm++) = sums[ 9 ];
               *(pm++) = sums[ 10 ];
               *(pm++) = sums[ 11 ];
               *(pm++) = sums[ 12 ];
               *(pm++) = sums[ 13 ];
               *(pm++) = sums[ 14 ];
               *(pm++) = sums[ 15 ];
               *(pm++) = sums[ 46 ];
               *(pm++) = sums[ 47 ];

               *(pm++) = sums[ 3 ];
               *(pm++) = sums[ 25 ];
               *(pm++) = sums[ 11 ];
               *(pm++) = sums[ 30 ];
               *(pm++) = sums[ 18 ];
               *(pm++) = sums[ 31 ];
               *(pm++) = sums[ 32 ];
               *(pm++) = sums[ 33 ];
               *(pm++) = sums[ 51 ];
               *(pm++) = sums[ 59 ];

               *(pm++) = sums[ 4 ];
               *(pm++) = sums[ 17 ];
               *(pm++) = sums[ 12 ];
               *(pm++) = sums[ 18 ];
               *(pm++) = sums[ 19 ];
               *(pm++) = sums[ 20 ];
               *(pm++) = sums[ 21 ];
               *(pm++) = sums[ 22 ];
               *(pm++) = sums[ 48 ];
               *(pm++) = sums[ 49 ];

               *(pm++) = sums[ 5 ];
               *(pm++) = sums[ 26 ];
               *(pm++) = sums[ 13 ];
               *(pm++) = sums[ 31 ];
               *(pm++) = sums[ 20 ];
               *(pm++) = sums[ 35 ];
               *(pm++) = sums[ 36 ];
               *(pm++) = sums[ 37 ];
               *(pm++) = sums[ 52 ];
               *(pm++) = sums[ 60 ];

               *(pm++) = sums[ 6 ];
               *(pm++) = sums[ 27 ];
               *(pm++) = sums[ 14 ];
               *(pm++) = sums[ 32 ];
               *(pm++) = sums[ 21 ];
               *(pm++) = sums[ 36 ];
               *(pm++) = sums[ 39 ];
               *(pm++) = sums[ 40 ];
               *(pm++) = sums[ 53 ];
               *(pm++) = sums[ 61 ];

               *(pm++) = sums[ 7 ];
               *(pm++) = sums[ 28 ];
               *(pm++) = sums[ 15 ];
               *(pm++) = sums[ 33 ];
               *(pm++) = sums[ 22 ];
               *(pm++) = sums[ 37 ];
               *(pm++) = sums[ 40 ];
               *(pm++) = sums[ 42 ];
               *(pm++) = sums[ 54 ];
               *(pm++) = sums[ 62 ];

               *(pm++) = sums[ 44 ];
               *(pm++) = sums[ 50 ];
               *(pm++) = sums[ 46 ];
               *(pm++) = sums[ 51 ];
               *(pm++) = sums[ 48 ];
               *(pm++) = sums[ 52 ];
               *(pm++) = sums[ 53 ];
               *(pm++) = sums[ 54 ];
               *(pm++) = sums[ 56 ];
               *(pm++) = sums[ 57 ];

               *(pm++) = sums[ 45 ];
               *(pm++) = sums[ 58 ];
               *(pm++) = sums[ 47 ];
               *(pm++) = sums[ 59 ];
               *(pm++) = sums[ 49 ];
               *(pm++) = sums[ 60 ];
               *(pm++) = sums[ 61 ];
               *(pm++) = sums[ 62 ];
               *(pm++) = sums[ 57 ];
               *(pm++) = sums[ 64 ];

/* Copy the remaining sums to the correct elements of the 8 vector. */
               pm = vector;
               *(pm++) = sums[ 8 ];
               *(pm++) = sums[ 29 ];
               *(pm++) = sums[ 16 ];
               *(pm++) = sums[ 34 ];
               *(pm++) = sums[ 23 ];
               *(pm++) = sums[ 38 ];
               *(pm++) = sums[ 41 ];
               *(pm++) = sums[ 43 ];
               *(pm++) = sums[ 55 ];
               *(pm++) = sums[ 63 ];

/* Find the solution to the 10x10 set of linear equations. The matrix is
   symmetric and positive-definite so use Cholesky decomposition.  */
               memset( solution, 0, NPAR*sizeof(*solution) );
               gsl_linalg_cholesky_decomp( &gsl_m.matrix );
               gsl_linalg_cholesky_solve( &gsl_m.matrix, &gsl_b.vector,
                                          &gsl_x.vector );

/* Modify Q and U so they use the requested reference direction, and store in
   the output arrays. */
               cosval = cos( 2*( angrot - tr_angle ) );
               sinval = sin( 2*( angrot - tr_angle ) );
               *(ipq++) = 2*( -solution[ 1 ]*cosval + solution[ 0 ]*sinval );
               *(ipu++) = 2*( -solution[ 1 ]*sinval - solution[ 0 ]*cosval );

/* Store the correspoinding I value. */
               if( ipi ) *(ipi++) = solution[ 6 ]*box_size + 2*solution[ 7 ];

/* Loop over the data again in the same way to calculate the variance of the
   residuals between the above fit and the supplied data. */
               din = dat;
               qin = qua;
               state = allstates;

               sum1 = 0.0;
               nsum1 = 0;

               for( ibox = 0; ibox <  box_size; ibox++,state++ ) {
                  angle = state->pol_ang;

                  if( !( *qin & SMF__Q_FIT ) && *din != VAL__BADD &&
                        angle != VAL__BADD ) {
                     wplate = pasign*angle + paoff;
/*
                        if( ipolcrd == 1 ) {
                           wplate += state->tcs_az_ang;
                        } else if( ipolcrd == 2 ) {
                           wplate += state->tcs_tr_ang;
                        }
*/
                     phi = 2*wplate;
                     twophi = 2*phi;

                     s8 = sin( 2*twophi );
                     c8 = cos( 2*twophi );
                     s4 = sin( twophi );
                     c4 = cos( twophi );
                     s2 = sin( phi );
                     c2 = cos( phi );
                     s1 = sin( wplate );
                     c1 = cos( wplate );

                     fit = solution[0]*s4 +
                           solution[1]*c4 +
                           solution[2]*s2 +
                           solution[3]*c2 +
                           solution[4]*s1 +
                           solution[5]*c1 +
                           solution[6]*ibox +
                           solution[7] +
                           solution[8]*s8 +
                           solution[9]*c8;

                     res = *din - fit;

                     sum1 += res*res;
                     nsum1++;
                  }

                  din += nbolo;
                  qin += nbolo;
               }

/* Calculate the variance of the residuals, and then scale it to get the
   notional variance for the returned Q,. U and I values. The scaling
   factor is determined emprically to get reasonable agreement between these
   notional variances and the noise actually seen in the Q and U values
   for 10 test observations. The reason for storing these as Q/U variances
   rather than as a weights component in the SMURF extension is so that
   makemap can pick them up easily and use them to initialise the NOI
   model, which is used for weighting the bolometer data when forming the
   COM model on the first iteration. */
               *(ipv++) = 0.0253*sum1/nsum1;

/* Store bad values if there were too few good samples in the fitting
   box. */
            } else {
               if( ipi ) *(ipi++) = VAL__BADD;
               *(ipq++) = VAL__BADD;
               *(ipu++) = VAL__BADD;
               *(ipv++) = VAL__BADD;
            }
         }
      }
   }

   if( wcs ) {
      g2s = astAnnul( g2s );
      s2f = astAnnul( s2f );
      astUnlock( wcs, 1 );
   }

/* End the AST context */
   astEnd;
}