void enkf_config_node_set_internalize(enkf_config_node_type * node, int report_step) { ert_impl_type impl_type = enkf_config_node_get_impl_type( node ); if (impl_type == CONTAINER) { int inode; int container_size = enkf_config_node_container_size( node ); for (inode = 0; inode < container_size; inode++) { enkf_config_node_type * child_node = enkf_config_node_container_iget( node , inode ); enkf_config_node_set_internalize( child_node , report_step ); } } else { if (node->internalize == NULL) node->internalize = bool_vector_alloc( 0 , false ); bool_vector_iset( node->internalize , report_step , true); } }
static void state_map_select_matching__( state_map_type * map , bool_vector_type * select_target , int select_mask , bool select) { state_map_assert_writable(map); pthread_rwlock_rdlock( &map->rw_lock ); { { const int * map_ptr = int_vector_get_ptr( map->state ); int size = util_int_min(int_vector_size( map->state ), bool_vector_size(select_target)); for (int i=0; i < size; i++) { int state_value = map_ptr[i]; if (state_value & select_mask) bool_vector_iset( select_target , i , select); } } pthread_rwlock_unlock( &map->rw_lock ); } }
/** This function will load an active map from the enkf_fs filesystem. */ void gen_data_config_load_active( gen_data_config_type * config , enkf_fs_type * fs, int report_step , bool force_load) { bool fs_changed = false; if (fs != config->read_fs) { config->read_fs = fs; fs_changed = true; } if (!config->dynamic) return; /* This is used as a GEN_PARAM instance - and the loading of mask is not an option. */ pthread_mutex_lock( &config->update_lock ); { if ( force_load || (int_vector_iget( config->data_size_vector , report_step ) > 0)) { if (config->active_report_step != report_step || fs_changed) { char * filename = util_alloc_sprintf("%s_active" , config->key ); FILE * stream = enkf_fs_open_excase_tstep_file( fs , filename , report_step); if (stream != NULL) { bool_vector_fread( config->active_mask , stream ); fclose( stream ); } else { int gen_data_size = int_vector_safe_iget( config->data_size_vector, report_step ); if (gen_data_size < 0) { fprintf(stderr,"** Fatal internal error in function:%s \n",__func__); fprintf(stderr,"\n"); fprintf(stderr," 1: The active mask file:%s was not found \n",filename); fprintf(stderr," 2: The size of the gen_data vectors has not been set\n"); fprintf(stderr,"\n"); fprintf(stderr,"We can not create a suitable active_mask. Code should call gen_data_config_has_active_mask()\n\n"); util_abort("%s: fatal internal error - could not create a suitable active_mask \n",__func__); } else { fprintf(stdout,"** Info: could not locate active data elements file %s, filling active vector with true all elements active \n",filename); bool_vector_reset( config->active_mask ); bool_vector_iset( config->active_mask, gen_data_size - 1, true); } } free( filename ); } } config->active_report_step = report_step; } pthread_mutex_unlock( &config->update_lock ); }
void forward_initialize_node(enkf_main_type * enkf_main, const char * init_file, enkf_node_type * field_node) { { const int ens_size = enkf_main_get_ensemble_size( enkf_main ); bool_vector_type * iactive = bool_vector_alloc(0, false); bool_vector_iset( iactive , ens_size - 1 , true ); enkf_main_create_run_path(enkf_main , iactive , 0); bool_vector_free(iactive); } { int iens = 0; enkf_state_type * state = enkf_main_iget_state( enkf_main , iens ); enkf_fs_type * fs = enkf_main_get_fs(enkf_main); run_arg_type * run_arg = run_arg_alloc_ENSEMBLE_EXPERIMENT( fs , 0 ,0 , "simulations/run0"); enkf_state_forward_init( state , run_arg); } }
void test_state() { rng_type * rng = rng_alloc( MZRAN , INIT_DEFAULT ); int ens_size = 10; int active_size = 8; int rows = 100; matrix_type * state = matrix_alloc(1,1); bool_vector_type * ens_mask = bool_vector_alloc(ens_size , false); matrix_type * A = matrix_alloc( rows , active_size); matrix_type * A2 = matrix_alloc( rows, active_size ); matrix_type * A3 = matrix_alloc( 1,1 ); for (int i=0; i < active_size; i++) bool_vector_iset( ens_mask , i + 1 , true ); matrix_random_init(A , rng); rml_enkf_common_store_state( state , A , ens_mask ); test_assert_int_equal( matrix_get_rows( state ) , rows ); test_assert_int_equal( matrix_get_columns( state ) , ens_size ); { int g; int a = 0; for (g=0; g < ens_size; g++) { if (bool_vector_iget( ens_mask , g )) { test_assert_true( matrix_columns_equal( state , g , A , a )); a++; } } } rml_enkf_common_recover_state( state , A2 , ens_mask); rml_enkf_common_recover_state( state , A3 , ens_mask); test_assert_true( matrix_equal( A , A2 )); test_assert_true( matrix_equal( A , A3 )); bool_vector_free( ens_mask ); matrix_free( state ); matrix_free( A ); }
void obs_vector_ensemble_chi2(const obs_vector_type * obs_vector , enkf_fs_type * fs, bool_vector_type * valid , int step1 , int step2 , int iens1 , int iens2 , state_enum load_state , double ** chi2) { int step; enkf_node_type * enkf_node = enkf_node_alloc( obs_vector->config_node ); node_id_type node_id; node_id.state = load_state; for (step = step1; step <= step2; step++) { int iens; node_id.report_step = step; { void * obs_node = vector_iget( obs_vector->nodes , step); if (obs_node == NULL) { for (iens = iens1; iens < iens2; iens++) chi2[step][iens] = 0; } else { for (iens = iens1; iens < iens2; iens++) { node_id.iens = iens; if (enkf_node_try_load( enkf_node , fs , node_id)) chi2[step][iens] = obs_vector_chi2__(obs_vector , step , enkf_node , node_id); else { chi2[step][iens] = 0; // Missing data - this member will be marked as invalid in the misfit calculations. bool_vector_iset( valid , iens , false ); } } } } } enkf_node_free( enkf_node ); }
void model_config_set_load_state( model_config_type * config , int report_step) { bool_vector_iset(config->__load_state , report_step , true); }
void model_config_init(model_config_type * model_config , const config_type * config , int ens_size , const ext_joblist_type * joblist , int last_history_restart , const sched_file_type * sched_file , const ecl_sum_type * refcase) { model_config->forward_model = forward_model_alloc( joblist ); model_config_set_refcase( model_config , refcase ); if (config_item_set( config , FORWARD_MODEL_KEY )) { char * config_string = config_alloc_joined_string( config , FORWARD_MODEL_KEY , " "); forward_model_parse_init( model_config->forward_model , config_string ); free(config_string); } if (config_item_set( config , ENKF_SCHED_FILE_KEY)) model_config_set_enkf_sched_file(model_config , config_get_value(config , ENKF_SCHED_FILE_KEY )); if (config_item_set( config, RUNPATH_KEY)) { model_config_add_runpath( model_config , DEFAULT_RUNPATH_KEY , config_get_value(config , RUNPATH_KEY) ); model_config_select_runpath( model_config , DEFAULT_RUNPATH_KEY ); } { history_source_type source_type = DEFAULT_HISTORY_SOURCE; if (config_item_set( config , HISTORY_SOURCE_KEY)) { const char * history_source = config_iget(config , HISTORY_SOURCE_KEY, 0,0); source_type = history_get_source_type( history_source ); } if (!model_config_select_history( model_config , source_type , sched_file , refcase )) if (!model_config_select_history( model_config , DEFAULT_HISTORY_SOURCE , sched_file , refcase )) if (!model_config_select_any_history( model_config , sched_file , refcase)) fprintf(stderr,"** Warning:: Do not have enough information to select a history source \n"); } if (model_config->history != NULL) { int num_restart = history_get_last_restart( model_config->history ); bool_vector_iset( model_config->internalize_state , num_restart - 1 , false ); bool_vector_iset( model_config->__load_state , num_restart - 1 , false ); } /* The full treatment of the SCHEDULE_PREDICTION_FILE keyword is in the ensemble_config file, because the functionality is implemented as (quite) plain GEN_KW instance. Here we just check if it is present or not. */ if (config_item_set(config , SCHEDULE_PREDICTION_FILE_KEY)) model_config->has_prediction = true; else model_config->has_prediction = false; if (config_item_set(config , CASE_TABLE_KEY)) model_config_set_case_table( model_config , ens_size , config_iget( config , CASE_TABLE_KEY , 0,0)); if (config_item_set( config , ENSPATH_KEY)) model_config_set_enspath( model_config , config_get_value(config , ENSPATH_KEY)); if (config_item_set( config , JOBNAME_KEY)) model_config_set_jobname_fmt( model_config , config_get_value(config , JOBNAME_KEY)); if (config_item_set( config , RFTPATH_KEY)) model_config_set_rftpath( model_config , config_get_value(config , RFTPATH_KEY)); if (config_item_set( config , DBASE_TYPE_KEY)) model_config_set_dbase_type( model_config , config_get_value(config , DBASE_TYPE_KEY)); if (config_item_set( config , MAX_RESAMPLE_KEY)) model_config_set_max_internal_submit( model_config , config_get_value_as_int( config , MAX_RESAMPLE_KEY )); { const char * export_file_name; if (config_item_set( config , GEN_KW_EXPORT_FILE_KEY)) export_file_name = config_get_value(config, GEN_KW_EXPORT_FILE_KEY); else export_file_name = DEFAULT_GEN_KW_EXPORT_FILE; model_config_set_gen_kw_export_file(model_config, export_file_name); } }
void stepwise_estimate( stepwise_type * stepwise , double deltaR2_limit , int CV_blocks) { int nvar = matrix_get_columns( stepwise->X0 ); int nsample = matrix_get_rows( stepwise->X0 ); double currentR2 = -1; bool_vector_type * active_rows = bool_vector_alloc( nsample , true ); /*Reset beta*/ for (int i = 0; i < nvar; i++) { matrix_iset(stepwise->beta, i , 0 , 0.0); } bool_vector_set_all( stepwise->active_set , false ); double MSE_min = 10000000; double Prev_MSE_min = MSE_min; double minR2 = -1; while (true) { int best_var = 0; Prev_MSE_min = MSE_min; /* Go through all the inactive variables, and calculate the resulting prediction error IF this particular variable is added; keep track of the variable which gives the lowest prediction error. */ for (int ivar = 0; ivar < nvar; ivar++) { if (!bool_vector_iget( stepwise->active_set , ivar)) { double newR2 = stepwise_test_var(stepwise , ivar , CV_blocks); if ((minR2 < 0) || (newR2 < minR2)) { minR2 = newR2; best_var = ivar; } } } /* If the best relative improvement in prediction error is better than @deltaR2_limit, the corresponding variable is added to the active set, and we return to repeat the loop one more time. Otherwise we just exit. */ { MSE_min = minR2; double deltaR2 = MSE_min / Prev_MSE_min; if (( currentR2 < 0) || deltaR2 < deltaR2_limit) { bool_vector_iset( stepwise->active_set , best_var , true ); currentR2 = minR2; bool_vector_set_all(active_rows, true); stepwise_estimate__( stepwise , active_rows ); } else { /* The gain in prediction error is so small that we just leave the building. */ /* NB! Need one final compuation of beta (since the test_var function does not reset the last tested beta value !) */ bool_vector_set_all(active_rows, true); stepwise_estimate__( stepwise , active_rows ); break; } if (bool_vector_count_equal( stepwise->active_set , true) == matrix_get_columns( stepwise->X0 )) { stepwise_estimate__( stepwise , active_rows ); break; /* All variables are active. */ } } } stepwise_set_R2(stepwise, currentR2); bool_vector_free( active_rows ); }
static double stepwise_test_var( stepwise_type * stepwise , int test_var , int blocks) { double prediction_error = 0; bool_vector_iset( stepwise->active_set , test_var , true ); // Temporarily activate this variable { int nvar = matrix_get_columns( stepwise->X0 ); int nsample = matrix_get_rows( stepwise->X0 ); int block_size = nsample / blocks; bool_vector_type * active_rows = bool_vector_alloc( nsample, true ); /*True Cross-Validation: */ int * randperms = util_calloc( nsample , sizeof * randperms ); for (int i=0; i < nsample; i++) randperms[i] = i; /* Randomly perturb ensemble indices */ rng_shuffle_int( stepwise->rng , randperms , nsample ); for (int iblock = 0; iblock < blocks; iblock++) { int validation_start = iblock * block_size; int validation_end = validation_start + block_size - 1; if (iblock == (blocks - 1)) validation_end = nsample - 1; /* Ensure that the active_rows vector has a block consisting of the interval [validation_start : validation_end] which is set to false, and the remaining part of the vector is set to true. */ { bool_vector_set_all(active_rows, true); /* If blocks == 1 that means all datapoint are used in the regression, and then subsequently reused in the R2 calculation. */ if (blocks > 1) { for (int i = validation_start; i <= validation_end; i++) { bool_vector_iset( active_rows , randperms[i] , false ); } } } /* Evaluate the prediction error on the validation part of the dataset. */ { stepwise_estimate__( stepwise , active_rows ); { int irow; matrix_type * x_vector = matrix_alloc( 1 , nvar ); //matrix_type * e_vector = matrix_alloc( 1 , nvar ); for (irow=validation_start; irow <= validation_end; irow++) { matrix_copy_row( x_vector , stepwise->X0 , 0 , randperms[irow]); //matrix_copy_row( e_vector , stepwise->E0 , 0 , randperms[irow]); { double true_value = matrix_iget( stepwise->Y0 , randperms[irow] , 0 ); double estimated_value = stepwise_eval__( stepwise , x_vector ); prediction_error += (true_value - estimated_value) * (true_value - estimated_value); //double e_estimated_value = stepwise_eval__( stepwise , e_vector ); //prediction_error += e_estimated_value*e_estimated_value; } } matrix_free( x_vector ); } } } free( randperms ); bool_vector_free( active_rows ); } /*inactivate the test_var-variable after completion*/ bool_vector_iset( stepwise->active_set , test_var , false ); return prediction_error; }