// Recovery logic: decide what to do in response to // host failures and upload/download completions. // // One way to do this would be to store a bunch of state info // with each node in the file's tree, // and do things by local tree traversal. // // However, it's a lot simpler (for me, the programmer) // to store minimal state info, // and to reconstruct state info using // a top-down tree traversal in response to each event. // Actually we do 2 traversals: // 1) plan phase: // We see whether every node recoverable, // and if so compute its "recovery set": // the set of children from which it can be recovered // with minimal cost (i.e. network traffic). // Decide whether each chunk currently on the server needs to remain. // 2) action phase // Based on the results of phase 1, // decide whether to start upload/download of chunks, // and whether to delete data currently on server // void META_CHUNK::recovery_plan() { vector<DATA_UNIT*> recoverable; vector<DATA_UNIT*> present; unsigned int i; have_unrecoverable_children = false; // make lists of children in various states // for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; c->in_recovery_set = false; c->data_needed = false; c->data_now_present = false; c->recovery_plan(); switch (c->status) { case PRESENT: present.push_back(c); break; case RECOVERABLE: recoverable.push_back(c); break; case UNRECOVERABLE: have_unrecoverable_children = true; break; } } // based on states of children, decide what state we're in // if ((int)(present.size()) >= coding.n) { status = PRESENT; sort(present.begin(), present.end(), compare_cost); present.resize(coding.n); cost = 0; for (i=0; i<present.size(); i++) { DATA_UNIT* c= present[i]; cost += c->cost; c->in_recovery_set = true; } } else if ((int)(present.size() + recoverable.size()) >= coding.n) { status = RECOVERABLE; unsigned int j = coding.n - present.size(); sort(recoverable.begin(), recoverable.end(), compare_cost); cost = 0; for (i=0; i<present.size(); i++) { DATA_UNIT* c= present[i]; c->in_recovery_set = true; } for (i=0; i<j; i++) { DATA_UNIT* c= recoverable[i]; c->in_recovery_set = true; cost += c->cost; } } else { status = UNRECOVERABLE; } }
// Compute min_failures: the smallest # of host failures // that would make this unit unrecoverable. // int META_CHUNK::compute_min_failures() { unsigned int i; for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; c->compute_min_failures(); } // Because of recovery action, // some of our children may have changed status and fault tolerance, // so ours may have changed too. // Recompute them. // vector<DATA_UNIT*> recoverable; vector<DATA_UNIT*> present; for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; switch (c->status) { case PRESENT: present.push_back(c); break; case RECOVERABLE: recoverable.push_back(c); break; } } if ((int)(present.size()) >= coding.n) { status = PRESENT; min_failures = dfile->policy.max_ft; } else if ((int)(present.size() + recoverable.size()) >= coding.n) { status = RECOVERABLE; // our min_failures is the least X such that some X host failures // would make this node unrecoverable // sort(recoverable.begin(), recoverable.end(), compare_min_failures); min_failures = 0; unsigned int k = coding.n - present.size(); // we'd need to recover K recoverable children unsigned int j = recoverable.size() - k + 1; // a loss of J recoverable children would make this impossible // the loss of J recoverable children would make us unrecoverable // Sum the min_failures of the J children with smallest min_failures // for (i=0; i<j; i++) { DATA_UNIT* c = recoverable[i]; if (debug_ft) { printf(" Min failures of %s: %d\n", c->name, c->min_failures); }; min_failures += c->min_failures; } if (debug_ft) { printf(" our min failures: %d\n", min_failures); } } return 0; }
int META_CHUNK::recovery_action(double now) { unsigned int i; int retval; if (data_now_present) { status = PRESENT; } if (debug_status) { printf(" meta chunk %s: status %s have_unrec_children %d\n", name, status_str(status), have_unrecoverable_children ); } for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; if (debug_status) { printf(" child %s status %s in rec set %d\n", c->name, status_str(c->status), c->in_recovery_set ); } switch (status) { case PRESENT: if (c->status == UNRECOVERABLE) { c->data_now_present = true; } break; case RECOVERABLE: if (c->in_recovery_set && have_unrecoverable_children) { c->data_needed = true; } break; case UNRECOVERABLE: break; } retval = c->recovery_action(now); if (retval) return retval; } return 0; }
void META_CHUNK::recovery_action() { unsigned int i; if (data_now_present) { status = PRESENT; } #ifdef DEBUG_RECOVERY printf("meta chunk action %s state %s unrec children %d\n", name, status_str(status), have_unrecoverable_children ); #endif for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; #ifdef DEBUG_RECOVERY printf(" child %s status %s in rec set %d\n", c->name, status_str(c->status), c->in_recovery_set ); #endif switch (status) { case PRESENT: if (c->status == UNRECOVERABLE) { c->data_now_present = true; } break; case RECOVERABLE: if (c->in_recovery_set && have_unrecoverable_children) { c->data_needed = true; } break; case UNRECOVERABLE: break; } c->recovery_action(); } // because of recovery action, some of our children may have changed // status and fault tolerance, source may have changed too. // Recompute them. // vector<DATA_UNIT*> recoverable; vector<DATA_UNIT*> present; for (i=0; i<children.size(); i++) { DATA_UNIT* c = children[i]; switch (c->status) { case PRESENT: present.push_back(c); break; case RECOVERABLE: recoverable.push_back(c); break; } } if ((int)(present.size()) >= coding.n) { status = PRESENT; min_failures = INT_MAX; } else if ((int)(present.size() + recoverable.size()) >= coding.n) { status = RECOVERABLE; // our min_failures is the least X such that some X host failures // would make this node unrecoverable // sort(recoverable.begin(), recoverable.end(), compare_min_failures); min_failures = 0; unsigned int k = coding.n - present.size(); // we'd need to recover K recoverable children unsigned int j = recoverable.size() - k + 1; // a loss of J recoverable children would make this impossible // the loss of J recoverable children would make us unrecoverable // Sum the min_failures of the J children with smallest min_failures // for (i=0; i<j; i++) { DATA_UNIT* c = recoverable[i]; printf(" Min failures of %s: %d\n", c->name, c->min_failures); min_failures += c->min_failures; } printf(" our min failures: %d\n", min_failures); } }