// check_pair() is called by BOINC code to validate any results arriving // after the canonical result has been chosen. //------------------------------------------------------------ int check_pair(RESULT& new_result, RESULT& canonical, bool& retry) { //------------------------------------------------------------ int retval; SAH_RESULT sah_new, sah_canonical; DB_RESULT db_result; retry=false; // init log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] getting new result file %s\n", new_result.id, new_result.name ); retval = get_result_file((RESULT&)new_result, sah_new); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] read/parse of %s FAILED with retval %d\n", new_result.id, new_result.name, retval ); // A directory problem may be transient. if (retval == ERR_OPENDIR) { retry = true; retval = 0; goto return_retval; } else { // a non-transient, non-recoverable error new_result.outcome = RESULT_OUTCOME_VALIDATE_ERROR; new_result.validate_state = VALIDATE_STATE_INVALID; retval = 0; goto return_retval; } } log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] getting canonical result file %s\n", canonical.id, canonical.name ); retval = get_result_file((RESULT &)canonical, sah_canonical); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] read/parse of %s FAILED with retval %d\n", canonical.id, canonical.name, retval ); // A directory problem may be transient. if (retval == ERR_OPENDIR) { retry = true; retval = 0; goto return_retval; } else { // a non-transient, non-recoverable error - set new_result to error. new_result.outcome = RESULT_OUTCOME_VALIDATE_ERROR; new_result.validate_state = VALIDATE_STATE_INVALID; retval = 0; goto return_retval; } } if (sah_canonical.weakly_similar(sah_new)) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[CANONICAL RESULT#%d (%d signals) and NEW RESULT#%d (%d signals)] ARE weakly similar\n", canonical.id, sah_canonical.num_signals, new_result.id, sah_new.num_signals ); new_result.validate_state = VALIDATE_STATE_VALID; } else { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[CANONICAL RESULT#%d (%d signals) and NEW RESULT#%d (%d signals)] are NOT weakly similar\n", canonical.id, sah_canonical.num_signals, new_result.id, sah_new.num_signals ); new_result.validate_state = VALIDATE_STATE_INVALID; handle_cuda_notvalid(canonical, new_result); // temporary for cuda debugging } if (new_result.validate_state == VALIDATE_STATE_VALID) { log_cuda_result(new_result, canonical.granted_credit); } retval = 0; return_retval: return(retval); }
// check_set() is called from BOINC code and is passed a vector of all // received results for work unit. check_set() determines the canonical // result and flags each result as to whether it is similar enough to the // canonical result to be given credit. check_set provides BOINC with both // the canonical ID and the amount of credit to be granted to each validated // result. As a matter of policy the validator does not do values checking. // The canonical result could have bad values. The detection and flagging // of this situation is a function of the assimilator. // //------------------------------------------------------------ int check_set( vector<RESULT>& results, WORKUNIT& wu, int& canonicalid, double& granted_credit, bool& retry) { //------------------------------------------------------------ // Note that SAH_RESULT is not the same type as the standard sah // result as it appears in the app and the science backend. Rather // it simply contains a vector of all the signals returned in a // a given result, along with functions used to validate that result // (ie that set of signals). // I should rename the type. jeffc vector<SAH_RESULT> sah_results; //SAH_RESULT s; RESULT r; DB_RESULT db_result; unsigned int i, j, k, good_result_count=0; bool found, err_opendir=false; double max_credit, min_credit, sum; int max_credit_i=-1, min_credit_i=-1, nvalid, retval; vector<bool> bad_result; retry=false; // init log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "check_set() checking %d results\n", results.size() ); // read and parse the result files // for (i=0; i<results.size(); i++) { SAH_RESULT s = {0}; log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] getting result file %s\n", results[i].id, results[i].name ); retval = get_result_file(results[i], s); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] read/parse of %s FAILED with retval %d\n", results[i].id, results[i].name, retval ); // A directory problem may be transient. if (retval == ERR_OPENDIR) { retry = true; } else { // a non-transient, non-recoverable error results[i].outcome = RESULT_OUTCOME_VALIDATE_ERROR; results[i].validate_state = VALIDATE_STATE_INVALID; } retval = 0; } else { good_result_count++; } sah_results.push_back(s); // s may be a null result in case of IO error } // If IO errors took us below min_quorum, bail. if (good_result_count < wu.min_quorum) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[WU#%d] IO error(s) led to less than quorum results. Will retry WU upon receiving more results.\n", wu.id ); canonicalid = 0; retval = 0; goto return_retval; } // flag results with bad values for (i=0; i<sah_results.size(); i++) { if (!sah_results[i].have_result) continue; if (sah_results[i].bad_values()) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%d] has one or more bad values.\n", results[i].id ); bad_result.push_back(true); validate_stats.bad_results++; } } if (results.size() == 1) { validate_single(results, canonicalid, granted_credit); } else { validate_plural(results, sah_results, canonicalid, granted_credit); } for (i=0; i<results.size(); i++) { if (results[i].validate_state == VALIDATE_STATE_VALID) { log_cuda_result(results[i], granted_credit); } } retval = 0; return_retval: return(retval); }
// check_pair() is called by BOINC code to validate any results arriving // after the canonical result has been chosen. //------------------------------------------------------------ int check_pair(RESULT& new_result, RESULT& canonical, bool& retry) { //------------------------------------------------------------ int retval; SAH_RESULT sah_new, sah_canonical; DB_RESULT db_result; retry=false; // init log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%lld] getting new result file %s\n", new_result.id, new_result.name ); retval = get_result_file((RESULT&)new_result, sah_new); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%lld] read/parse of %s FAILED with retval %d\n", new_result.id, new_result.name, retval ); // Someone is trying to process v7 workunits with v6 if (!sah_new.found_best_autocorr && !(sah_new.is_overflow || canonical.runtime_outlier) && !strcmp(app_name,"setiathome_v7")) { new_result.validate_state = VALIDATE_STATE_INVALID; retval = 0; log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%lld] is from an invalid app_version\n", new_result.id ); goto return_retval; } // A directory problem may be transient. if (retval == ERR_OPENDIR) { retry = true; retval = 0; goto return_retval; } else { // a non-transient, non-recoverable error new_result.outcome = RESULT_OUTCOME_VALIDATE_ERROR; new_result.validate_state = VALIDATE_STATE_INVALID; retval = 0; goto return_retval; } } log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%lld] getting canonical result file %s\n", canonical.id, canonical.name ); retval = get_result_file((RESULT &)canonical, sah_canonical); if (retval) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[RESULT#%lld] read/parse of %s FAILED with retval %d\n", canonical.id, canonical.name, retval ); // A directory problem may be transient. if (retval == ERR_OPENDIR) { retry = true; retval = 0; goto return_retval; } else { // a non-transient, non-recoverable error - set new_result to error. new_result.outcome = RESULT_OUTCOME_VALIDATE_ERROR; new_result.validate_state = VALIDATE_STATE_INVALID; retval = 0; goto return_retval; } } if (sah_canonical.weakly_similar(sah_new)) { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[CANONICAL RESULT#%lld (%d signals) and NEW RESULT#%lld (%d signals)] ARE weakly similar\n", canonical.id, sah_canonical.num_signals, new_result.id, sah_new.num_signals ); new_result.validate_state = VALIDATE_STATE_VALID; } else { log_messages.printf( SCHED_MSG_LOG::MSG_DEBUG, "[CANONICAL RESULT#%lld (%d signals) and NEW RESULT#%lld (%lld signals)] are NOT weakly similar\n", canonical.id, sah_canonical.num_signals, new_result.id, sah_new.num_signals ); new_result.validate_state = VALIDATE_STATE_INVALID; handle_cuda_notvalid(canonical, new_result); // temporary for cuda debugging } if (new_result.validate_state == VALIDATE_STATE_VALID) { log_cuda_result(new_result, canonical.granted_credit); } retval = 0; return_retval: return(retval); }