void matrix_gram_set( const matrix_type * X , matrix_type * G, bool col) { int G_rows = matrix_get_rows( G ); int G_cols = matrix_get_columns( G ); int X_rows = matrix_get_rows( X ); int X_cols = matrix_get_columns( X ); if (col) { // Calculate X' · X if ((G_rows == G_cols) && (X_cols == G_rows)) matrix_dgemm( G , X , X , true , false , 1 , 0); else util_abort("%s: dimension mismatch \n",__func__); } else { // Calculate X · X' if ((G_rows == G_cols) && (X_rows == G_rows)) matrix_dgemm( G , X , X , false , true , 1 , 0); else util_abort("%s: dimension mismatch \n",__func__); } }
void eigcg_vec_mult(Vector **V,int m, Float *QZ, int n, int f_size_cb) //QZ is saved in column major format. { Float **Vptr=(Float* *)smalloc(m*sizeof(Float *)); for(int i=0;i<m;i++)Vptr[i]=(Float *)V[i]; //implementation 3. unroll. 96.1 seconds ~75Mflops // int BLOCK_SIZE=12; // int xlen=BLOCK_SIZE*n; // Float *x=(Float *)fmalloc(BLOCK_SIZE*n*sizeof(Float)); // for(int block=0;block<f_size_cb/BLOCK_SIZE;block++) // { // std::memset(x,0,xlen*sizeof(Float)); // matrix_dgemm(BLOCK_SIZE,n,m,Vptr,QZ,x); // } // //implementation 4 //reorder QZ to 4x4 blocks Float *aux=(Float*)fmalloc(m*n*sizeof(Float)); Float *pt=aux; int BLOCK_SIZE=4; const int m_cblocks = m / BLOCK_SIZE + (m%BLOCK_SIZE ? 1 : 0); const int n_cblocks = n / BLOCK_SIZE + (n%BLOCK_SIZE ? 1 : 0); for(int c=0;c<n_cblocks;++c)// row direction { const int i=c*BLOCK_SIZE; for(int r=0;r<m_cblocks;++r) //column direction { const int j=r*BLOCK_SIZE; for(int ci=i;(ci<i+BLOCK_SIZE) && (ci<n); ci++) for(int rj=j;(rj<j+BLOCK_SIZE)&&(rj<m);rj++) *(pt++)=QZ[ci*m+rj]; } } BLOCK_SIZE=12; int xlen=BLOCK_SIZE*n; Float *x=(Float *)fmalloc(BLOCK_SIZE*n*sizeof(Float)); for(int block=0;block<f_size_cb/BLOCK_SIZE;block++) { std::memset(x,0,xlen*sizeof(Float)); matrix_dgemm(BLOCK_SIZE,n,m,Vptr,aux,x); } sfree(aux); sfree(x); sfree(Vptr); }
void rml_enkf_common_initA__( matrix_type * A , matrix_type * S , matrix_type * Cd , matrix_type * E , matrix_type * D , double truncation, double lamda, matrix_type * Udr, double * Wdr, matrix_type * VdTr) { int nrobs = matrix_get_rows( S ); int ens_size = matrix_get_columns( S ); double a = lamda + 1; matrix_type *tmp = matrix_alloc (nrobs, ens_size); double nsc = 1/sqrt(ens_size-1); printf("The lamda Value is %5.5f\n",lamda); printf("The Value of Truncation is %4.2f \n",truncation); matrix_subtract_row_mean( S ); /* Shift away the mean in the ensemble predictions*/ matrix_inplace_diag_sqrt(Cd); matrix_dgemm(tmp, Cd, S,false, false, 1.0, 0.0); matrix_scale(tmp, nsc); printf("The Scaling of data matrix completed !\n "); // SVD(S) = Ud * Wd * Vd(T) int nsign = enkf_linalg_svd_truncation(tmp , truncation , -1 , DGESVD_MIN_RETURN , Wdr , Udr , VdTr); /* After this we only work with the reduced dimension matrices */ printf("The number of siginificant ensembles are %d \n ",nsign); matrix_type * X1 = matrix_alloc( nsign, ens_size); matrix_type * X2 = matrix_alloc (nsign, ens_size ); matrix_type * X3 = matrix_alloc (ens_size, ens_size ); // Compute the matrices X1,X2,X3 and dA enkf_linalg_rml_enkfX1(X1, Udr ,D ,Cd ); //X1 = Ud(T)*Cd(-1/2)*D -- D= -(dk-d0) enkf_linalg_rml_enkfX2(X2, Wdr ,X1 ,a, nsign); //X2 = ((a*Ipd)+Wd^2)^-1 * X1 matrix_free(X1); enkf_linalg_rml_enkfX3(X3, VdTr ,Wdr,X2, nsign); //X3 = Vd *Wd*X2 printf("The X3 matrix is computed !\n "); matrix_type *dA1= matrix_alloc (matrix_get_rows(A), ens_size); matrix_type * Dm = matrix_alloc_copy( A ); matrix_subtract_row_mean( Dm ); /* Remove the mean from the ensemble of model parameters*/ matrix_scale(Dm, nsc); enkf_linalg_rml_enkfdA(dA1, Dm, X3); //dA = Dm * X3 matrix_inplace_add(A,dA1); //dA matrix_free(X3); matrix_free(Dm); matrix_free(dA1); }
void matrix_matmul(matrix_type * C, const matrix_type * A , const matrix_type * B) { matrix_dgemm( C , A , B , false , false , 1 , 0); }
void matrix_matmul_with_transpose(matrix_type * C, const matrix_type * A , const matrix_type * B , bool transA , bool transB) { matrix_dgemm( C , A , B , transA , transB , 1 , 0); }
void lars_estimate(lars_type * lars , int max_vars , double max_beta , bool verbose) { int nvars = matrix_get_columns( lars->X ); int nsample = matrix_get_rows( lars->X ); matrix_type * X = matrix_alloc( nsample, nvars ); // Allocate local X and Y variables matrix_type * Y = matrix_alloc( nsample, 1 ); // which will hold the normalized data lars_estimate_init( lars , X , Y); // during the estimation process. { matrix_type * G = matrix_alloc_gram( X , true ); matrix_type * mu = matrix_alloc( nsample , 1 ); matrix_type * C = matrix_alloc( nvars , 1 ); matrix_type * Y_mu = matrix_alloc_copy( Y ); int_vector_type * active_set = int_vector_alloc(0,0); int_vector_type * inactive_set = int_vector_alloc(0,0); int active_size; if ((max_vars <= 0) || (max_vars > nvars)) max_vars = nvars; { int i; for (i=0; i < nvars; i++) int_vector_iset( inactive_set , i , i ); } matrix_set( mu , 0 ); while (true) { double maxC = 0; /* The first step is to calculate the correlations between the covariates, and the current residual. All the currently inactive covariates are searched; the covariate with the greatest correlation with (Y - mu) is selected and added to the active set. */ matrix_sub( Y_mu , Y , mu ); // Y_mu = Y - mu matrix_dgemm( C , X , Y_mu , true , false , 1.0 , 0); // C = X' * Y_mu { int i; int max_set_index = 0; for (i=0; i < int_vector_size( inactive_set ); i++) { int set_index = i; int var_index = int_vector_iget( inactive_set , set_index ); double value = fabs( matrix_iget(C , var_index , 0) ); if (value > maxC) { maxC = value; max_set_index = set_index; } } /* Remove element corresponding to max_set_index from the inactive set and add it to the active set: */ int_vector_append( active_set , int_vector_idel( inactive_set , max_set_index )); } active_size = int_vector_size( active_set ); /* Now we have calculated the correlations between all the covariates and the current residual @Y_mu. The correlations are stored in the matrix @C. The value of the maximum correlation is stored in @maxC. Based on the value of @maxC we have added one new covariate to the model, technically by moving the index from @inactive_set to @active_set. */ /*****************************************************************/ { matrix_type * weights = matrix_alloc( active_size , 1); double scale; /*****************************************************************/ /* This scope should compute and initialize the variables @weights and @scale. */ { matrix_type * subG = matrix_alloc( active_size , active_size ); matrix_type * STS = matrix_alloc( active_size , active_size ); matrix_type * sign_vector = matrix_alloc( active_size , 1); int i , j; /* STS = S' o S where 'o' is the Schur product and S is given by: [ s1 s2 s3 s4 ] S = [ s1 s2 s3 s4 ] [ s1 s2 s3 s4 ] [ s1 s2 s3 s4 ] Where si is the sign of the correlation between (active) variable 'i' and Y_mu. */ for (i=0; i < active_size ; i++) { int vari = int_vector_iget( active_set , i ); double signi = sgn( matrix_iget( C , vari , 0)); matrix_iset( sign_vector , i , 0 , signi ); for (j=0; j < active_size; j++) { int varj = int_vector_iget( active_set , j ); double signj = sgn( matrix_iget( C , varj , 0)); matrix_iset( STS , i , j , signi * signj ); } } // Extract the elements from G corresponding to active indices and // copy to the matrix subG: for (i=0; i < active_size ; i++) { int ii = int_vector_iget( active_set , i ); for (j=0; j < active_size; j++) { int jj = int_vector_iget( active_set , j ); matrix_iset( subG , i , j , matrix_iget(G , ii , jj)); } } // Weights matrix_inplace_mul( subG , STS ); matrix_inv( subG ); { matrix_type * ones = matrix_alloc( active_size , 1 ); matrix_type * GA1 = matrix_alloc( active_size , 1 ); matrix_set( ones , 1.0 ); matrix_matmul( GA1 , subG , ones ); scale = 1.0 / sqrt( matrix_get_column_sum( GA1 , 0 )); matrix_mul( weights , GA1 , sign_vector ); matrix_scale( weights , scale ); matrix_free( GA1 ); matrix_free( ones ); } matrix_free( sign_vector ); matrix_free( subG ); matrix_free( STS ); } /******************************************************************/ /* The variables weight and scale have been calculated, proceed to calculate the step length @gamma. */ { int i; double gamma; { matrix_type * u = matrix_alloc( nsample , 1 ); int j; for (i=0; i < nsample; i++) { double row_sum = 0; for (j =0; j < active_size; j++) row_sum += matrix_iget( X , i , int_vector_iget( active_set , j)) * matrix_iget(weights , j , 0 ); matrix_iset( u , i , 0 , row_sum ); } gamma = maxC / scale; if (active_size < matrix_get_columns( X )) { matrix_type * equi_corr = matrix_alloc( nvars , 1 ); matrix_dgemm( equi_corr , X , u , true , false , 1.0 , 0); // equi_corr = X'·u for (i=0; i < (nvars - active_size); i++) { int var_index = int_vector_iget( inactive_set , i ); double gamma1 = (maxC - matrix_iget(C , var_index , 0 )) / ( scale - matrix_iget( equi_corr , var_index , 0)); double gamma2 = (maxC + matrix_iget(C , var_index , 0 )) / ( scale + matrix_iget( equi_corr , var_index , 0)); if ((gamma1 > 0) && (gamma1 < gamma)) gamma = gamma1; if ((gamma2 > 0) && (gamma2 < gamma)) gamma = gamma2; } matrix_free( equi_corr ); } /* Update the current estimated 'location' mu. */ matrix_scale( u , gamma ); matrix_inplace_add( mu , u ); matrix_free( u ); } /* We have calculated the step length @gamma, and the @weights. Update the @beta matrix. */ for (i=0; i < active_size; i++) matrix_iset( lars->beta , int_vector_iget( active_set , i ) , active_size - 1 , gamma * matrix_iget( weights , i , 0)); if (active_size > 1) for (i=0; i < nvars; i++) matrix_iadd( lars->beta , i , active_size - 1 , matrix_iget( lars->beta , i , active_size - 2)); matrix_free( weights ); } } if (active_size == max_vars) break; if (max_beta > 0) { double beta_norm2 = matrix_get_column_abssum( lars->beta , active_size - 1 ); if (beta_norm2 > max_beta) { // We stop - we will use an interpolation between this beta estimate and // the previous, to ensure that the |beta| = max_beta criteria is satisfied. if (active_size >= 2) { double beta_norm1 = matrix_get_column_abssum( lars->beta , active_size - 2 ); double s = (max_beta - beta_norm1)/(beta_norm2 - beta_norm1); { int j; for (j=0; j < nvars; j++) { double beta1 = matrix_iget( lars->beta , j , active_size - 2 ); double beta2 = matrix_iget( lars->beta , j , active_size - 1 ); matrix_iset( lars->beta , j , active_size - 1 , beta1 + s*(beta2 - beta1)); } } } break; } } } matrix_free( G ); matrix_free( mu ); matrix_free( C ); matrix_free( Y_mu ); int_vector_free( active_set ); int_vector_free( inactive_set ); matrix_resize( lars->beta , nvars , active_size , true ); if (verbose) matrix_pretty_fprint( lars->beta , "beta" , "%12.5f" , stdout ); lars_select_beta( lars , active_size - 1); } matrix_free( X ); matrix_free( Y ); }