/* Currently only used as 'support' function for the matrix_det function. */ static void matrix_dgetrf__( matrix_type * A, int * ipiv, int * info) { int lda = matrix_get_column_stride( A ); int m = matrix_get_rows( A ); int n = matrix_get_columns( A ); dgetrf_( &m , &n , matrix_get_data( A ) , &lda , ipiv , info); }
void matrix_dorgqr(matrix_type * A , double * tau, int num_reflectors) { /* num_reflectors == length of tau. */ int lda = matrix_get_column_stride( A ); int m = matrix_get_rows( A ); int n = matrix_get_columns( A ); double * work = util_malloc(sizeof * work ); int worksize; int info; /* Determine optimal worksize. */ worksize = -1; dorgqr_(&m , &n , &num_reflectors , matrix_get_data( A ), &lda , tau , work , &worksize , &info); if (info != 0) util_abort("%s: dorgqf routine failed with info:%d \n",__func__ , info); worksize = ( int ) work[0]; { double * tmp = realloc(work , sizeof * work * worksize ); if (tmp == NULL) { /* OK - we could not get the optimal worksize, try again with the minimum. */ worksize = n; work = util_realloc(work , sizeof * work * worksize ); } else work = tmp; /* The request for optimal worksize succeeded */ } /* Second call - do the actual computation. */ dorgqr_(&m , &n , &num_reflectors , matrix_get_data( A ), &lda , tau , work , &worksize , &info); if (info != 0) util_abort("%s: dorqf routine failed with info:%d \n",__func__ , info); free( work ); }
int matrix_gram_schmidt_process(matrix_t *q, matrix_t *p) { int n; matrix_t *tmp; assert(q); assert(p); assert(matrix_get_columns(q) >= matrix_get_columns(p)); assert(matrix_get_rows(q) >= matrix_get_rows(p)); tmp = matrix_new_and_copy(p); n = matrix_self_gram_schmidt_process(tmp, 0, 0, matrix_get_columns(tmp), matrix_get_rows(tmp)); matrix_copy_matrix(q, 0, 0, tmp, 0, 0, n, matrix_get_rows(tmp)); matrix_destroy(tmp); return n; }
void matrix_gram_set( const matrix_type * X , matrix_type * G, bool col) { int G_rows = matrix_get_rows( G ); int G_cols = matrix_get_columns( G ); int X_rows = matrix_get_rows( X ); int X_cols = matrix_get_columns( X ); if (col) { // Calculate X' · X if ((G_rows == G_cols) && (X_cols == G_rows)) matrix_dgemm( G , X , X , true , false , 1 , 0); else util_abort("%s: dimension mismatch \n",__func__); } else { // Calculate X · X' if ((G_rows == G_cols) && (X_rows == G_rows)) matrix_dgemm( G , X , X , false , true , 1 , 0); else util_abort("%s: dimension mismatch \n",__func__); } }
matrix_t *matrix_new_and_gram_schmidt_process(matrix_t *p) { int n; matrix_t *tmp, *q; assert(p); tmp = matrix_new_and_copy(p); n = matrix_self_gram_schmidt_process(tmp, 0, 0, matrix_get_columns(tmp), matrix_get_rows(tmp)); if (n == matrix_get_columns(tmp)) return tmp; q = matrix_new(n, matrix_get_rows(tmp), false); matrix_copy_matrix(q, 0, 0, tmp, 0, 0, n, matrix_get_rows(tmp)); matrix_destroy(tmp); return q; }
matrix_t *cmatrix_new_and_gram_schmidt_process(matrix_t *p) { int n; matrix_t *q, *tmp; assert(p); assert(matrix_is_imaginary(p)); tmp = matrix_new_and_copy(p); n = cmatrix_self_gram_schmidt_process(tmp, 0, 0, matrix_get_columns(tmp), matrix_get_rows(tmp)); if (n == matrix_get_columns(tmp)) return tmp; q = matrix_new(n, matrix_get_rows(tmp), true); cmatrix_copy_cmatrix(q, 0, 0, tmp, 0, 0, n, matrix_get_rows(tmp)); matrix_destroy(tmp); return q; }
void lars_select_beta( lars_type * lars , int beta_index) { int nvars = matrix_get_rows( lars->beta ); if (lars->beta0 == NULL) lars->beta0 = matrix_alloc( nvars , 1 ); { matrix_type * beta_vector = matrix_alloc( nvars , 1 ); matrix_copy_column( beta_vector , lars->beta , 0 , beta_index ); lars->Y0 = regression_unscale( beta_vector , lars->X_norm , lars->X_mean , lars->Y_mean , lars->beta0 ); matrix_free( beta_vector ); } }
matrix_type * matrix_alloc_column_compressed_copy(const matrix_type * src, const bool_vector_type * mask) { if (bool_vector_size( mask ) != matrix_get_columns( src )) util_abort("%s: size mismatch. Src matrix has %d rows mask has:%d elements\n", __func__ , matrix_get_rows( src ) , bool_vector_size( mask )); { int target_columns = bool_vector_count_equal( mask , true ); matrix_type * target = matrix_alloc( matrix_get_rows( src ) , target_columns ); matrix_column_compressed_memcpy( target , src , mask ); return target; } }
void test_readwrite() { test_work_area_type * test_area = test_work_area_alloc("matrix-test"); { rng_type * rng = rng_alloc(MZRAN , INIT_DEV_URANDOM ); matrix_type * m1 = matrix_alloc(3 , 3); matrix_type * m2 = matrix_alloc(3 , 3); matrix_random_init( m1 , rng ); matrix_assign(m2 , m1); test_assert_true( matrix_equal( m1 , m2 ) ); { FILE * stream = util_fopen("m1" , "w"); matrix_fwrite( m1 , stream ); fclose( stream ); } matrix_random_init( m1 , rng ); test_assert_false( matrix_equal( m1 , m2 ) ); { FILE * stream = util_fopen("m1" , "r"); matrix_free( m1 ); m1 = matrix_alloc(1,1); printf("-----------------------------------------------------------------\n"); matrix_fread( m1 , stream ); test_assert_int_equal( matrix_get_rows(m1) , matrix_get_rows( m2)); test_assert_int_equal( matrix_get_columns(m1) , matrix_get_columns( m2)); util_fseek( stream , 0 , SEEK_SET); { matrix_type * m3 = matrix_fread_alloc( stream ); test_assert_true( matrix_equal( m2 , m3 )); matrix_free( m3 ); } fclose( stream ); } test_assert_true( matrix_equal( m1 , m2 ) ); matrix_free( m2 ); matrix_free( m1 ); rng_free( rng ); } test_work_area_free( test_area ); }
void rml_enkf_common_store_state( matrix_type * state , const matrix_type * A , const bool_vector_type * ens_mask ) { matrix_resize( state , matrix_get_rows( A ) , bool_vector_size( ens_mask ) , false); { const int ens_size = bool_vector_size( ens_mask ); int active_index = 0; for (int iens = 0; iens < ens_size; iens++) { if (bool_vector_iget( ens_mask , iens )) matrix_copy_column( state , A , iens , active_index ); else matrix_set_const_column( state , iens , 0); } } }
matrix_type * matrix_alloc_gram( const matrix_type * X , bool col) { int X_rows = matrix_get_rows( X ); int X_columns = matrix_get_columns( X ); matrix_type * G; if (col) G = matrix_alloc( X_columns , X_columns ); else G = matrix_alloc( X_rows , X_rows ); matrix_gram_set( X , G , col); return G; }
double matrix_row_column_dot_product(const matrix_type * m1 , int row1 , const matrix_type * m2 , int col2) { if (m1->columns != m2->rows) util_abort("%s: size mismatch: m1:[%d,%d] m2:[%d,%d] \n",__func__ , matrix_get_rows( m1 ) , matrix_get_columns( m1 ) , matrix_get_rows( m2 ) , matrix_get_columns( m2 )); { int k; double sum = 0; for( k = 0; k < m1->columns; k++) sum += m1->data[ GET_INDEX(m1 , row1 , k) ] * m2->data[ GET_INDEX(m2, k , col2) ]; return sum; } }
// Scale rows by the entries in the vector Csc void rml_enkf_common_scaleA(matrix_type *A , const double * Csc, bool invert ){ int nrows = matrix_get_rows(A); if (invert) { for (int i=0; i< nrows ; i++) { double sc= 1/Csc[i]; matrix_scale_row(A, i, sc); } } else { for (int i=0; i< nrows ; i++) { double sc= Csc[i]; matrix_scale_row(A, i, sc); } } }
void rml_enkf_common_recover_state( const matrix_type * state , matrix_type * A , const bool_vector_type * ens_mask ) { const int ens_size = bool_vector_size( ens_mask ); const int active_size = bool_vector_count_equal( ens_mask , true ); const int rows = matrix_get_rows( state ); matrix_resize( A , rows , active_size , false ); { int active_index = 0; for (int iens = 0; iens < ens_size; iens++) { if (bool_vector_iget( ens_mask , iens )) matrix_copy_column( A , state , active_index , iens ); } } }
void matrix_dgemv(const matrix_type * A , const double *x , double * y, bool transA , double alpha , double beta) { int m = matrix_get_rows( A ); int n = matrix_get_columns( A ); int lda = matrix_get_column_stride( A ); int incx = 1; int incy = 1; char transA_c; if (transA) transA_c = 'T'; else transA_c = 'N'; dgemv_(&transA_c , &m , &n , &alpha , matrix_get_data( A ) , &lda , x , &incx , &beta , y , &incy); }
void matrix_dgesv(matrix_type * A , matrix_type * B) { matrix_lapack_assert_square( A ); matrix_lapack_assert_fortran_layout( B ); { int n = matrix_get_rows( A ); int lda = matrix_get_column_stride( A ); int ldb = matrix_get_column_stride( B ); int nrhs = matrix_get_columns( B ); long int * ipivot = util_calloc( n , sizeof * ipivot ); int info; dgesv_(&n , &nrhs , matrix_get_data( A ) , &lda , ipivot , matrix_get_data( B ), &ldb , &info); if (info != 0) util_abort("%s: low level lapack routine: dgesv() failed with info:%d \n",__func__ , info); free(ipivot); } }
void test_state() { rng_type * rng = rng_alloc( MZRAN , INIT_DEFAULT ); int ens_size = 10; int active_size = 8; int rows = 100; matrix_type * state = matrix_alloc(1,1); bool_vector_type * ens_mask = bool_vector_alloc(ens_size , false); matrix_type * A = matrix_alloc( rows , active_size); matrix_type * A2 = matrix_alloc( rows, active_size ); matrix_type * A3 = matrix_alloc( 1,1 ); for (int i=0; i < active_size; i++) bool_vector_iset( ens_mask , i + 1 , true ); matrix_random_init(A , rng); rml_enkf_common_store_state( state , A , ens_mask ); test_assert_int_equal( matrix_get_rows( state ) , rows ); test_assert_int_equal( matrix_get_columns( state ) , ens_size ); { int g; int a = 0; for (g=0; g < ens_size; g++) { if (bool_vector_iget( ens_mask , g )) { test_assert_true( matrix_columns_equal( state , g , A , a )); a++; } } } rml_enkf_common_recover_state( state , A2 , ens_mask); rml_enkf_common_recover_state( state , A3 , ens_mask); test_assert_true( matrix_equal( A , A2 )); test_assert_true( matrix_equal( A , A3 )); bool_vector_free( ens_mask ); matrix_free( state ); matrix_free( A ); }
void lars_estimate(lars_type * lars , int max_vars , double max_beta , bool verbose) { int nvars = matrix_get_columns( lars->X ); int nsample = matrix_get_rows( lars->X ); matrix_type * X = matrix_alloc( nsample, nvars ); // Allocate local X and Y variables matrix_type * Y = matrix_alloc( nsample, 1 ); // which will hold the normalized data lars_estimate_init( lars , X , Y); // during the estimation process. { matrix_type * G = matrix_alloc_gram( X , true ); matrix_type * mu = matrix_alloc( nsample , 1 ); matrix_type * C = matrix_alloc( nvars , 1 ); matrix_type * Y_mu = matrix_alloc_copy( Y ); int_vector_type * active_set = int_vector_alloc(0,0); int_vector_type * inactive_set = int_vector_alloc(0,0); int active_size; if ((max_vars <= 0) || (max_vars > nvars)) max_vars = nvars; { int i; for (i=0; i < nvars; i++) int_vector_iset( inactive_set , i , i ); } matrix_set( mu , 0 ); while (true) { double maxC = 0; /* The first step is to calculate the correlations between the covariates, and the current residual. All the currently inactive covariates are searched; the covariate with the greatest correlation with (Y - mu) is selected and added to the active set. */ matrix_sub( Y_mu , Y , mu ); // Y_mu = Y - mu matrix_dgemm( C , X , Y_mu , true , false , 1.0 , 0); // C = X' * Y_mu { int i; int max_set_index = 0; for (i=0; i < int_vector_size( inactive_set ); i++) { int set_index = i; int var_index = int_vector_iget( inactive_set , set_index ); double value = fabs( matrix_iget(C , var_index , 0) ); if (value > maxC) { maxC = value; max_set_index = set_index; } } /* Remove element corresponding to max_set_index from the inactive set and add it to the active set: */ int_vector_append( active_set , int_vector_idel( inactive_set , max_set_index )); } active_size = int_vector_size( active_set ); /* Now we have calculated the correlations between all the covariates and the current residual @Y_mu. The correlations are stored in the matrix @C. The value of the maximum correlation is stored in @maxC. Based on the value of @maxC we have added one new covariate to the model, technically by moving the index from @inactive_set to @active_set. */ /*****************************************************************/ { matrix_type * weights = matrix_alloc( active_size , 1); double scale; /*****************************************************************/ /* This scope should compute and initialize the variables @weights and @scale. */ { matrix_type * subG = matrix_alloc( active_size , active_size ); matrix_type * STS = matrix_alloc( active_size , active_size ); matrix_type * sign_vector = matrix_alloc( active_size , 1); int i , j; /* STS = S' o S where 'o' is the Schur product and S is given by: [ s1 s2 s3 s4 ] S = [ s1 s2 s3 s4 ] [ s1 s2 s3 s4 ] [ s1 s2 s3 s4 ] Where si is the sign of the correlation between (active) variable 'i' and Y_mu. */ for (i=0; i < active_size ; i++) { int vari = int_vector_iget( active_set , i ); double signi = sgn( matrix_iget( C , vari , 0)); matrix_iset( sign_vector , i , 0 , signi ); for (j=0; j < active_size; j++) { int varj = int_vector_iget( active_set , j ); double signj = sgn( matrix_iget( C , varj , 0)); matrix_iset( STS , i , j , signi * signj ); } } // Extract the elements from G corresponding to active indices and // copy to the matrix subG: for (i=0; i < active_size ; i++) { int ii = int_vector_iget( active_set , i ); for (j=0; j < active_size; j++) { int jj = int_vector_iget( active_set , j ); matrix_iset( subG , i , j , matrix_iget(G , ii , jj)); } } // Weights matrix_inplace_mul( subG , STS ); matrix_inv( subG ); { matrix_type * ones = matrix_alloc( active_size , 1 ); matrix_type * GA1 = matrix_alloc( active_size , 1 ); matrix_set( ones , 1.0 ); matrix_matmul( GA1 , subG , ones ); scale = 1.0 / sqrt( matrix_get_column_sum( GA1 , 0 )); matrix_mul( weights , GA1 , sign_vector ); matrix_scale( weights , scale ); matrix_free( GA1 ); matrix_free( ones ); } matrix_free( sign_vector ); matrix_free( subG ); matrix_free( STS ); } /******************************************************************/ /* The variables weight and scale have been calculated, proceed to calculate the step length @gamma. */ { int i; double gamma; { matrix_type * u = matrix_alloc( nsample , 1 ); int j; for (i=0; i < nsample; i++) { double row_sum = 0; for (j =0; j < active_size; j++) row_sum += matrix_iget( X , i , int_vector_iget( active_set , j)) * matrix_iget(weights , j , 0 ); matrix_iset( u , i , 0 , row_sum ); } gamma = maxC / scale; if (active_size < matrix_get_columns( X )) { matrix_type * equi_corr = matrix_alloc( nvars , 1 ); matrix_dgemm( equi_corr , X , u , true , false , 1.0 , 0); // equi_corr = X'·u for (i=0; i < (nvars - active_size); i++) { int var_index = int_vector_iget( inactive_set , i ); double gamma1 = (maxC - matrix_iget(C , var_index , 0 )) / ( scale - matrix_iget( equi_corr , var_index , 0)); double gamma2 = (maxC + matrix_iget(C , var_index , 0 )) / ( scale + matrix_iget( equi_corr , var_index , 0)); if ((gamma1 > 0) && (gamma1 < gamma)) gamma = gamma1; if ((gamma2 > 0) && (gamma2 < gamma)) gamma = gamma2; } matrix_free( equi_corr ); } /* Update the current estimated 'location' mu. */ matrix_scale( u , gamma ); matrix_inplace_add( mu , u ); matrix_free( u ); } /* We have calculated the step length @gamma, and the @weights. Update the @beta matrix. */ for (i=0; i < active_size; i++) matrix_iset( lars->beta , int_vector_iget( active_set , i ) , active_size - 1 , gamma * matrix_iget( weights , i , 0)); if (active_size > 1) for (i=0; i < nvars; i++) matrix_iadd( lars->beta , i , active_size - 1 , matrix_iget( lars->beta , i , active_size - 2)); matrix_free( weights ); } } if (active_size == max_vars) break; if (max_beta > 0) { double beta_norm2 = matrix_get_column_abssum( lars->beta , active_size - 1 ); if (beta_norm2 > max_beta) { // We stop - we will use an interpolation between this beta estimate and // the previous, to ensure that the |beta| = max_beta criteria is satisfied. if (active_size >= 2) { double beta_norm1 = matrix_get_column_abssum( lars->beta , active_size - 2 ); double s = (max_beta - beta_norm1)/(beta_norm2 - beta_norm1); { int j; for (j=0; j < nvars; j++) { double beta1 = matrix_iget( lars->beta , j , active_size - 2 ); double beta2 = matrix_iget( lars->beta , j , active_size - 1 ); matrix_iset( lars->beta , j , active_size - 1 , beta1 + s*(beta2 - beta1)); } } } break; } } } matrix_free( G ); matrix_free( mu ); matrix_free( C ); matrix_free( Y_mu ); int_vector_free( active_set ); int_vector_free( inactive_set ); matrix_resize( lars->beta , nvars , active_size , true ); if (verbose) matrix_pretty_fprint( lars->beta , "beta" , "%12.5f" , stdout ); lars_select_beta( lars , active_size - 1); } matrix_free( X ); matrix_free( Y ); }
int lars_get_sample( const lars_type * lars ) { return matrix_get_rows( lars->X ); }
static double stepwise_test_var( stepwise_type * stepwise , int test_var , int blocks) { double prediction_error = 0; bool_vector_iset( stepwise->active_set , test_var , true ); // Temporarily activate this variable { int nvar = matrix_get_columns( stepwise->X0 ); int nsample = matrix_get_rows( stepwise->X0 ); int block_size = nsample / blocks; bool_vector_type * active_rows = bool_vector_alloc( nsample, true ); /*True Cross-Validation: */ int * randperms = util_calloc( nsample , sizeof * randperms ); for (int i=0; i < nsample; i++) randperms[i] = i; /* Randomly perturb ensemble indices */ rng_shuffle_int( stepwise->rng , randperms , nsample ); for (int iblock = 0; iblock < blocks; iblock++) { int validation_start = iblock * block_size; int validation_end = validation_start + block_size - 1; if (iblock == (blocks - 1)) validation_end = nsample - 1; /* Ensure that the active_rows vector has a block consisting of the interval [validation_start : validation_end] which is set to false, and the remaining part of the vector is set to true. */ { bool_vector_set_all(active_rows, true); /* If blocks == 1 that means all datapoint are used in the regression, and then subsequently reused in the R2 calculation. */ if (blocks > 1) { for (int i = validation_start; i <= validation_end; i++) { bool_vector_iset( active_rows , randperms[i] , false ); } } } /* Evaluate the prediction error on the validation part of the dataset. */ { stepwise_estimate__( stepwise , active_rows ); { int irow; matrix_type * x_vector = matrix_alloc( 1 , nvar ); //matrix_type * e_vector = matrix_alloc( 1 , nvar ); for (irow=validation_start; irow <= validation_end; irow++) { matrix_copy_row( x_vector , stepwise->X0 , 0 , randperms[irow]); //matrix_copy_row( e_vector , stepwise->E0 , 0 , randperms[irow]); { double true_value = matrix_iget( stepwise->Y0 , randperms[irow] , 0 ); double estimated_value = stepwise_eval__( stepwise , x_vector ); prediction_error += (true_value - estimated_value) * (true_value - estimated_value); //double e_estimated_value = stepwise_eval__( stepwise , e_vector ); //prediction_error += e_estimated_value*e_estimated_value; } } matrix_free( x_vector ); } } } free( randperms ); bool_vector_free( active_rows ); } /*inactivate the test_var-variable after completion*/ bool_vector_iset( stepwise->active_set , test_var , false ); return prediction_error; }
int stepwise_get_nsample( stepwise_type * stepwise ) { return matrix_get_rows( stepwise->X0 ); }
void matrix_dgemm(matrix_type *C , const matrix_type *A , const matrix_type * B , bool transA, bool transB , double alpha , double beta) { int m = matrix_get_rows( C ); int n = matrix_get_columns( C ); int lda = matrix_get_column_stride( A ); int ldb = matrix_get_column_stride( B ); int ldc = matrix_get_column_stride( C ); char transA_c; char transB_c; int k , innerA, innerB , outerA , outerB; if (transA) k = matrix_get_rows( A ); else k = matrix_get_columns( A ); if (transA) { innerA = matrix_get_rows(A); outerA = matrix_get_columns(A); transA_c = 'T'; } else { innerA = matrix_get_columns(A); outerA = matrix_get_rows(A); transA_c = 'N'; } if (transB) { innerB = matrix_get_columns( B ); outerB = matrix_get_rows( B ); transB_c = 'T'; } else { transB_c = 'N'; innerB = matrix_get_rows( B ); outerB = matrix_get_columns( B ); } /* This is the dimension check which must pass: -------------------------------------------------- A | B | Columns(A) = Rows(B) Trans(A) | Trans(B) | Rows(A) = Columns(B) A | Trans(B) | Columns(A) = Columns(B) Trans(A) | B | Rows(A) = Rows(B) -------------------------------------------------- -------------------------------------------------- A | Rows(A) = Rows(C) Trans(A) | Columns(A) = Rows(C) B | Columns(B) = Columns(C) Trans(B) | Rows(B) = Columns(B) -------------------------------------------------- */ if (innerA != innerB) { dgemm_debug(C,A,B,transA , transB); util_abort("%s: matrix size mismatch between A and B \n", __func__); } if (outerA != matrix_get_rows( C )) { dgemm_debug(C,A,B,transA , transB); printf("outerA:%d rows(C):%d \n",outerA , matrix_get_rows( C )); util_abort("%s: matrix size mismatch between A and C \n",__func__); } if (outerB != matrix_get_columns( C )) { dgemm_debug(C,A,B,transA , transB); util_abort("%s: matrix size mismatch between B and C \n",__func__); } if (!ldc >= util_int_max(1 , m)) { dgemm_debug(C,A,B,transA , transB); fprintf(stderr,"Tried to capture blas message: \"** On entry to DGEMM parameter 13 had an illegal value\"\n"); fprintf(stderr,"m:%d ldc:%d ldc should be >= max(1,%d) \n",m,ldc,m); util_abort("%s: invalid value for ldc\n",__func__); } dgemm_(&transA_c , // 1 &transB_c , // 2 &m , // 3 &n , // 4 &k , // 5 &alpha , // 6 matrix_get_data( A ) , // 7 &lda , // 8 matrix_get_data( B ) , // 9 &ldb , // 10 &beta , // 11 matrix_get_data( C ) , // 12 &ldc); // 13 }
void stepwise_estimate( stepwise_type * stepwise , double deltaR2_limit , int CV_blocks) { int nvar = matrix_get_columns( stepwise->X0 ); int nsample = matrix_get_rows( stepwise->X0 ); double currentR2 = -1; bool_vector_type * active_rows = bool_vector_alloc( nsample , true ); /*Reset beta*/ for (int i = 0; i < nvar; i++) { matrix_iset(stepwise->beta, i , 0 , 0.0); } bool_vector_set_all( stepwise->active_set , false ); double MSE_min = 10000000; double Prev_MSE_min = MSE_min; double minR2 = -1; while (true) { int best_var = 0; Prev_MSE_min = MSE_min; /* Go through all the inactive variables, and calculate the resulting prediction error IF this particular variable is added; keep track of the variable which gives the lowest prediction error. */ for (int ivar = 0; ivar < nvar; ivar++) { if (!bool_vector_iget( stepwise->active_set , ivar)) { double newR2 = stepwise_test_var(stepwise , ivar , CV_blocks); if ((minR2 < 0) || (newR2 < minR2)) { minR2 = newR2; best_var = ivar; } } } /* If the best relative improvement in prediction error is better than @deltaR2_limit, the corresponding variable is added to the active set, and we return to repeat the loop one more time. Otherwise we just exit. */ { MSE_min = minR2; double deltaR2 = MSE_min / Prev_MSE_min; if (( currentR2 < 0) || deltaR2 < deltaR2_limit) { bool_vector_iset( stepwise->active_set , best_var , true ); currentR2 = minR2; bool_vector_set_all(active_rows, true); stepwise_estimate__( stepwise , active_rows ); } else { /* The gain in prediction error is so small that we just leave the building. */ /* NB! Need one final compuation of beta (since the test_var function does not reset the last tested beta value !) */ bool_vector_set_all(active_rows, true); stepwise_estimate__( stepwise , active_rows ); break; } if (bool_vector_count_equal( stepwise->active_set , true) == matrix_get_columns( stepwise->X0 )) { stepwise_estimate__( stepwise , active_rows ); break; /* All variables are active. */ } } } stepwise_set_R2(stepwise, currentR2); bool_vector_free( active_rows ); }
matrix_type * matrix_alloc_transpose( const matrix_type * A) { matrix_type * B = matrix_alloc( matrix_get_columns( A ) , matrix_get_rows( A )); matrix_transpose( A , B ); return B; }
void bootstrap_enkf_updateA(void * module_data , matrix_type * A , matrix_type * S , matrix_type * R , matrix_type * dObs , matrix_type * E , matrix_type * D ) { bootstrap_enkf_data_type * bootstrap_data = bootstrap_enkf_data_safe_cast( module_data ); { const int num_cpu_threads = 4; int ens_size = matrix_get_columns( A ); matrix_type * X = matrix_alloc( ens_size , ens_size ); matrix_type * A0 = matrix_alloc_copy( A ); matrix_type * S_resampled = matrix_alloc_copy( S ); matrix_type * A_resampled = matrix_alloc( matrix_get_rows(A0) , matrix_get_columns( A0 )); int ** iens_resample = alloc_iens_resample( bootstrap_data->rng , ens_size ); { int ensemble_members_loop; for ( ensemble_members_loop = 0; ensemble_members_loop < ens_size; ensemble_members_loop++) { int unique_bootstrap_components; int ensemble_counter; /* Resample A and meas_data. Here we are careful to resample the working copy.*/ { { int_vector_type * bootstrap_components = int_vector_alloc( ens_size , 0); for (ensemble_counter = 0; ensemble_counter < ens_size; ensemble_counter++) { int random_column = iens_resample[ ensemble_members_loop][ensemble_counter]; int_vector_iset( bootstrap_components , ensemble_counter , random_column ); matrix_copy_column( A_resampled , A0 , ensemble_counter , random_column ); matrix_copy_column( S_resampled , S , ensemble_counter , random_column ); } int_vector_select_unique( bootstrap_components ); unique_bootstrap_components = int_vector_size( bootstrap_components ); int_vector_free( bootstrap_components ); } if (bootstrap_data->doCV) { const bool_vector_type * ens_mask = NULL; cv_enkf_init_update( bootstrap_data->cv_enkf_data , ens_mask , S_resampled , R , dObs , E , D); cv_enkf_initX( bootstrap_data->cv_enkf_data , X , A_resampled , S_resampled , R , dObs , E , D); } else std_enkf_initX(bootstrap_data->std_enkf_data , X , NULL , S_resampled,R, dObs, E,D ); matrix_inplace_matmul_mt1( A_resampled , X , num_cpu_threads ); matrix_inplace_add( A_resampled , A0 ); matrix_copy_column( A , A_resampled, ensemble_members_loop, ensemble_members_loop); } } } free_iens_resample( iens_resample , ens_size); matrix_free( X ); matrix_free( S_resampled ); matrix_free( A_resampled ); matrix_free( A0 ); } }
matrix_type * matrix_alloc_matmul(const matrix_type * A, const matrix_type * B) { matrix_type * C = matrix_alloc( matrix_get_rows( A ) , matrix_get_columns( B )); matrix_matmul( C , A , B ); return C; }
/** Will not respect strides - that is considered low level data layout. */ static matrix_type * matrix_alloc_copy__( const matrix_type * src , bool safe_mode) { matrix_type * copy = matrix_alloc__( matrix_get_rows( src ), matrix_get_columns( src ) , safe_mode); if (copy != NULL) matrix_assign(copy , src); return copy; }
static double stepwise_estimate__( stepwise_type * stepwise , bool_vector_type * active_rows) { matrix_type * X; matrix_type * E; matrix_type * Y; double y_mean = 0; int nvar = matrix_get_columns( stepwise->X0 ); int nsample = matrix_get_rows( stepwise->X0 ); nsample = bool_vector_count_equal( active_rows , true ); nvar = bool_vector_count_equal( stepwise->active_set , true ); matrix_set( stepwise->beta , 0 ); // It is essential to make sure that old finite values in the beta0 vector do not hang around. /* Extracting the data used for regression, and storing them in the temporary local matrices X and Y. Selecting data is based both on which varibles are active (stepwise->active_set) and which rows should be used for regression, versus which should be used for validation (@active_rows). */ if ((nsample < matrix_get_rows( stepwise->X0 )) || (nvar < matrix_get_columns( stepwise->X0 ))) { X = matrix_alloc( nsample , nvar ); E = matrix_alloc( nsample , nvar ); Y = matrix_alloc( nsample , 1); { int icol,irow; // Running over all values. int arow,acol; // Running over active values. arow = 0; for (irow = 0; irow < matrix_get_rows( stepwise->X0 ); irow++) { if (bool_vector_iget( active_rows , irow )) { acol = 0; for (icol = 0; icol < matrix_get_columns( stepwise->X0 ); icol++) { if (bool_vector_iget( stepwise->active_set , icol )) { matrix_iset( X , arow , acol , matrix_iget( stepwise->X0 , irow , icol )); matrix_iset( E , arow , acol , matrix_iget( stepwise->E0 , irow , icol )); acol++; } } matrix_iset( Y , arow , 0 , matrix_iget( stepwise->Y0 , irow , 0 )); arow++; } } } } else { X = matrix_alloc_copy( stepwise->X0 ); E = matrix_alloc_copy( stepwise->E0 ); Y = matrix_alloc_copy( stepwise->Y0 ); } { if (stepwise->X_mean != NULL) matrix_free( stepwise->X_mean); stepwise->X_mean = matrix_alloc( 1 , nvar ); if (stepwise->X_norm != NULL) matrix_free( stepwise->X_norm); stepwise->X_norm = matrix_alloc( 1 , nvar ); matrix_type * beta = matrix_alloc( nvar , 1); /* This is the beta vector as estimated from the OLS estimator. */ regression_augmented_OLS( X , Y , E, beta ); /* In this code block the beta/tmp_beta vector which is dense with fewer elements than the full model is scattered into the beta0 vector which has full size and @nvar elements. */ { int ivar,avar; avar = 0; for (ivar = 0; ivar < matrix_get_columns( stepwise->X0 ); ivar++) { if (bool_vector_iget( stepwise->active_set , ivar )) { matrix_iset( stepwise->beta , ivar , 0 , matrix_iget( beta , avar , 0)); avar++; } } } matrix_free( beta ); } matrix_free( X ); matrix_free( E ); matrix_free( Y ); return y_mean; }
void rml_enkf_common_initA__( matrix_type * A , matrix_type * S , matrix_type * Cd , matrix_type * E , matrix_type * D , double truncation, double lamda, matrix_type * Udr, double * Wdr, matrix_type * VdTr) { int nrobs = matrix_get_rows( S ); int ens_size = matrix_get_columns( S ); double a = lamda + 1; matrix_type *tmp = matrix_alloc (nrobs, ens_size); double nsc = 1/sqrt(ens_size-1); printf("The lamda Value is %5.5f\n",lamda); printf("The Value of Truncation is %4.2f \n",truncation); matrix_subtract_row_mean( S ); /* Shift away the mean in the ensemble predictions*/ matrix_inplace_diag_sqrt(Cd); matrix_dgemm(tmp, Cd, S,false, false, 1.0, 0.0); matrix_scale(tmp, nsc); printf("The Scaling of data matrix completed !\n "); // SVD(S) = Ud * Wd * Vd(T) int nsign = enkf_linalg_svd_truncation(tmp , truncation , -1 , DGESVD_MIN_RETURN , Wdr , Udr , VdTr); /* After this we only work with the reduced dimension matrices */ printf("The number of siginificant ensembles are %d \n ",nsign); matrix_type * X1 = matrix_alloc( nsign, ens_size); matrix_type * X2 = matrix_alloc (nsign, ens_size ); matrix_type * X3 = matrix_alloc (ens_size, ens_size ); // Compute the matrices X1,X2,X3 and dA enkf_linalg_rml_enkfX1(X1, Udr ,D ,Cd ); //X1 = Ud(T)*Cd(-1/2)*D -- D= -(dk-d0) enkf_linalg_rml_enkfX2(X2, Wdr ,X1 ,a, nsign); //X2 = ((a*Ipd)+Wd^2)^-1 * X1 matrix_free(X1); enkf_linalg_rml_enkfX3(X3, VdTr ,Wdr,X2, nsign); //X3 = Vd *Wd*X2 printf("The X3 matrix is computed !\n "); matrix_type *dA1= matrix_alloc (matrix_get_rows(A), ens_size); matrix_type * Dm = matrix_alloc_copy( A ); matrix_subtract_row_mean( Dm ); /* Remove the mean from the ensemble of model parameters*/ matrix_scale(Dm, nsc); enkf_linalg_rml_enkfdA(dA1, Dm, X3); //dA = Dm * X3 matrix_inplace_add(A,dA1); //dA matrix_free(X3); matrix_free(Dm); matrix_free(dA1); }
int matrix_dsyevx(bool compute_eig_vectors , dsyevx_eig_enum which_values , /* DSYEVX | DSYEVX_VALUE_INTERVAL | DSYEVX_INDEX_INTERVAL */ dsyevx_uplo_enum uplo, matrix_type * A , /* The input matrix - is modified by the dsyevx() function. */ double VL , /* Lower limit when using DSYEVX_VALUE_INTERVAL */ double VU , /* Upper limit when using DSYEVX_VALUE_INTERVAL */ int IL , /* Lower index when using DSYEVX_INDEX_INTERVAL */ int IU , /* Upper index when using DSYEVX_INDEX_INTERVAL */ double *eig_values , /* The calcualated eigenvalues */ matrix_type * Z ) { /* The eigenvectors as columns vectors */ int lda = matrix_get_column_stride( A ); int n = matrix_get_rows( A ); char jobz; char range; char uplo_c; if (compute_eig_vectors) jobz = 'V'; else jobz = 'N'; switch(which_values) { case(DSYEVX_ALL): range = 'A'; break; case(DSYEVX_VALUE_INTERVAL): range = 'V'; break; case(DSYEVX_INDEX_INTERVAL): range = 'I'; break; default: util_abort("%s: internal error \n",__func__); } if (uplo == DSYEVX_AUPPER) uplo_c = 'U'; else if (uplo == DSYEVX_ALOWER) uplo_c = 'L'; else util_abort("%s: internal error \n",__func__); if (!matrix_is_quadratic( A )) util_abort("%s: matrix A must be quadratic \n",__func__); { int num_eigenvalues , ldz, info , worksize; int * ifail = util_calloc( n , sizeof * ifail ); int * iwork = util_calloc( 5 * n , sizeof * iwork ); double * work = util_calloc( 1 , sizeof * work ); double * z_data; double abstol = 0.0; /* SHopuld */ if (compute_eig_vectors) { ldz = matrix_get_column_stride( Z ); z_data = matrix_get_data( Z ); } else { /* In this case we can accept that Z == NULL */ ldz = 1; z_data = NULL; } /* First call to determine optimal worksize. */ worksize = -1; info = 0; dsyevx_( &jobz, /* 1 */ &range, /* 2 */ &uplo_c, /* 3 */ &n, /* 4 */ matrix_get_data( A ), /* 5 */ &lda , /* 6 */ &VL , /* 7 */ &VU , /* 8 */ &IL , /* 9 */ &IU , /* 10 */ &abstol , /* 11 */ &num_eigenvalues , /* 12 */ eig_values , /* 13 */ z_data , /* 14 */ &ldz , /* 15 */ work , /* 16 */ &worksize , /* 17 */ iwork , /* 18 */ ifail , /* 19 */ &info); /* 20 */ worksize = (int) work[0]; { double * tmp = realloc(work , sizeof * work * worksize ); if (tmp == NULL) { /* OK - we could not get the optimal worksize, try again with the minimum. */ worksize = 8 * n; work = util_realloc(work , sizeof * work * worksize ); } else work = tmp; /* The request for optimal worksize succeeded */ } /* Second call: do the job */ info = 0; dsyevx_( &jobz, &range, &uplo_c, &n, matrix_get_data( A ), &lda , &VL , &VU , &IL , &IU , &abstol , &num_eigenvalues , eig_values , z_data , &ldz , work , &worksize , iwork , ifail , &info); free( ifail ); free( work ); free( iwork ); return num_eigenvalues; } }