void LogProcess::updated(const string& path) { if (znode + "/replicas" == path) { regroup(); // Reset a watch on the replicas. int ret = zk->getChildren(znode + "/replicas", true, NULL); if (ret != ZOK) { LOG(FATAL) << "Failed to set a watch on '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } } else { CHECK(znode + "/coordinators" == path); elect(); // Reset a watch on the coordinators. int ret = zk->getChildren(znode + "/coordinators", true, NULL); if (ret != ZOK) { LOG(FATAL) << "Failed to set a watch on '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } } }
Future<bool> ZooKeeperSlavesManagerStorage::remove(const string& hostname, uint16_t port) { // TODO(benh): Use ZooKeeperSlavesManagerStorage::parse to clean up code. int ret; string result; Stat stat; ret = zk->get(znode, true, &result, &stat); if (ret != ZOK) { LOG(WARNING) << "Slaves manager storage failed to get '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } ostringstream out; out << hostname << ":" << port; size_t index = result.find(out.str()); if (index == string::npos) { LOG(WARNING) << "Slaves manager storage could not remove slave " << hostname << ":" << port << " because not currently active or inactive"; return false; } else if (index == 0) { LOG(WARNING) << "Bad data in '" << znode; return false; } if (result[index - 1] == '=') { if (result[index + out.str().size()] == '\n') { result.erase(index, out.str().size()); } else { result.erase(index, out.str().size() + 1); } } else { result.erase(index - 1, out.str().size() + 1); } // Set the data in the znode. ret = zk->set(znode, result, stat.version); if (ret != ZOK) { LOG(WARNING) << "Slaves manager storage could not remove slave " << hostname << ":" << port << " from '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } return true; }
Future<bool> ZooKeeperSlavesManagerStorage::add(const string& hostname, uint16_t port) { // TODO(benh): Use ZooKeeperSlavesManagerStorage::parse to clean up code. int ret; string result; Stat stat; ret = zk->get(znode, true, &result, &stat); if (ret != ZOK) { LOG(WARNING) << "Slaves manager storage failed to get '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } ostringstream out; out << hostname << ":" << port; if (result.size() == 0) { out << "active=" << hostname << ":" << port << "\n"; out << "inactive=\n"; result = out.str(); } else { const string active("active="); size_t index = result.find(active); if (index == string::npos) { LOG(WARNING) << "Slaves manager storage found bad data in '" << znode << "', could not find 'active='"; return false; } if (result[index + active.size()] != '\n') { out << ","; } result.insert(index + active.size(), out.str()); } // Set the data in the znode. ret = zk->set(znode, result, stat.version); if (ret != ZOK) { LOG(WARNING) << "Slaves manager storage could not add slave " << hostname << ":" << port << " to '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } return true; }
Future<bool> ZooKeeperSlavesManagerStorage::updated(const string& path) { int ret; string result; if (path == znode) { LOG(INFO) << "Slaves manager storage found updates in ZooKeeper " << "... propogating changes"; ret = zk->get(znode, true, &result, NULL); if (ret != ZOK) { LOG(WARNING) << "Slaves manager storage failed to get '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } // Parse what's in ZooKeeper into active/inactive hostname port pairs. multihashmap<string, uint16_t> active; if (parse("active=", result, &active)) { process::dispatch(slavesManager, &SlavesManager::updateActive, active); } multihashmap<string, uint16_t> inactive; if (parse("inactive=", result, &inactive)) { process::dispatch(slavesManager, &SlavesManager::updateInactive, inactive); } } else { LOG(WARNING) << "Slaves manager stoage not expecting changes to path '" << path << "' in ZooKeeper"; return false; } return true; }
Future<bool> ZooKeeperSlavesManagerStorage::connected() { int ret; static const string delimiter = "/"; // Assume the znode that was created does not end with a "/". CHECK(znode.at(znode.length() - 1) != '/'); // Create directory path znodes as necessary. size_t index = znode.find(delimiter, 0); while (index < string::npos) { // Get out the prefix to create. index = znode.find(delimiter, index + 1); string prefix = znode.substr(0, index); // Create the node (even if it already exists). ret = zk->create(prefix, "", ZOO_OPEN_ACL_UNSAFE, 0, NULL); if (ret != ZOK && ret != ZNODEEXISTS) { // Okay, consider this a failure (maybe we lost our connection // to ZooKeeper), increment the failure count, log the issue, // and perhaps try again when ZooKeeper issues get sorted out. LOG(WARNING) << "Slaves manager storage failed to create '" << znode << "' in ZooKeeper! (" << zk->message(ret) << ")"; return false; } } // Reconcile what's in the znodes versus what we have in memory // (this also puts watches on these znodes). return updated(znode); }
void LogProcess::regroup() { vector<string> results; int ret = zk->getChildren(znode + "/replicas", false, &results); if (ret != ZOK) { LOG(FATAL) << "Failed to get children of '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } set<UPID> current; set<UPID> added; set<UPID> removed; foreach (const string& result, results) { string s; int ret = zk->get(znode + "/replicas/" + result, false, &s, NULL); UPID pid = s; current.insert(pid); }
void LogProcess::connected() { LOG(INFO) << "Log connected to ZooKeeper"; int ret; string result; // Assume the znode that was created does not end with a "/". CHECK(znode.size() == 0 || znode.at(znode.size() - 1) != '/'); // Create directory path znodes as necessary. size_t index = znode.find("/", 0); while (index < string::npos) { // Get out the prefix to create. index = znode.find("/", index + 1); string prefix = znode.substr(0, index); LOG(INFO) << "Log trying to create znode '" << prefix << "' in ZooKeeper"; // Create the node (even if it already exists). ret = zk->create( prefix, "", ZOO_OPEN_ACL_UNSAFE, // ZOO_CREATOR_ALL_ACL, // needs authentication 0, &result); if (ret != ZOK && ret != ZNODEEXISTS) { LOG(FATAL) << "Failed to create '" << prefix << "' in ZooKeeper: " << zk->message(ret); } } // Now create the "replicas" znode. LOG(INFO) << "Log trying to create znode '" << znode << "/replicas" << "' in ZooKeeper"; // Create the node (even if it already exists). ret = zk->create(znode + "/replicas", "", ZOO_OPEN_ACL_UNSAFE, // ZOO_CREATOR_ALL_ACL, // needs authentication 0, &result); if (ret != ZOK && ret != ZNODEEXISTS) { LOG(FATAL) << "Failed to create '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } // Now create the "coordinators" znode. LOG(INFO) << "Log trying to create znode '" << znode << "/coordinators" << "' in ZooKeeper"; // Create the node (even if it already exists). ret = zk->create(znode + "/coordinators", "", ZOO_OPEN_ACL_UNSAFE, // ZOO_CREATOR_ALL_ACL, // needs authentication 0, &result); if (ret != ZOK && ret != ZNODEEXISTS) { LOG(FATAL) << "Failed to create '" << znode << "/coordinators" << "' in ZooKeeper: " << zk->message(ret); } // Okay, create our replica, group, and coordinator. replica = new ReplicaProcess(file); spawn(replica); group = new GroupProcess(); spawn(group); coordinator = new Coordinator(quorum, replica, group); // Set a watch on the replicas. ret = zk->getChildren(znode + "/replicas", true, NULL); if (ret != ZOK) { LOG(FATAL) << "Failed to set a watch on '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } // Set a watch on the coordinators. ret = zk->getChildren(znode + "/coordinators", true, NULL); if (ret != ZOK) { LOG(FATAL) << "Failed to set a watch on '" << znode << "/replicas" << "' in ZooKeeper: " << zk->message(ret); } // Add an ephemeral znode for our replica and coordinator. ret = zk->create(znode + "/replicas/", replica->self(), ZOO_OPEN_ACL_UNSAFE, // ZOO_CREATOR_ALL_ACL, // needs authentication ZOO_SEQUENCE | ZOO_EPHEMERAL, &result); if (ret != ZOK) { LOG(FATAL) << "Failed to create an ephmeral node at '" << znode << "/replica/" << "' in ZooKeeper: " << zk->message(ret); } ret = zk->create(znode + "/coordinators/", "", ZOO_OPEN_ACL_UNSAFE, // ZOO_CREATOR_ALL_ACL, // needs authentication ZOO_SEQUENCE | ZOO_EPHEMERAL, &result); if (ret != ZOK) { LOG(FATAL) << "Failed to create an ephmeral node at '" << znode << "/replica/" << "' in ZooKeeper: " << zk->message(ret); } // Save the sequence id but only grab the basename, e.g., // "/path/to/znode/000000131" => "000000131". result = utils::os::basename(result); try { id = boost::lexical_cast<uint64_t>(result); } catch (boost::bad_lexical_cast&) { LOG(FATAL) << "Failed to convert '" << result << "' into an integer"; } // Run an election! elect(); }