bool Endpoint::Set(const char* ip, int port, bool resolv) { struct sockaddr_in *sa = (struct sockaddr_in *) &saBuffer; memset((char *) sa, 0, sizeof(sa)); sa->sin_family = AF_INET; sa->sin_port = htons((uint16_t)port); if (inet_aton(ip, &sa->sin_addr) == 0) { if (resolv) { if (!DNS_ResolveIpv4(ip, &sa->sin_addr)) { Log_Trace("DNS resolv failed"); return false; } else return true; } Log_Trace("inet_aton() failed"); return false; } return true; }
void ReplicatedLog::OnRequestChosen(PaxosMessage& imsg) { Buffer value; PaxosMessage omsg; #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnRequestChosen, imsg.paxosID = %U, mine = %U", imsg.paxosID, GetPaxosID()); #endif if (imsg.paxosID >= GetPaxosID()) return; // the node is lagging and needs to catch-up context->GetDatabase()->GetAcceptedValue(imsg.paxosID, value); if (value.GetLength() > 0) { Log_Trace("Sending paxosID %d to node %d", imsg.paxosID, imsg.nodeID); omsg.LearnValue(imsg.paxosID, MY_NODEID, 0, value); } else { Log_Trace("Node requested a paxosID I no longer have"); omsg.StartCatchup(paxosID, MY_NODEID); } context->GetTransport()->SendMessage(imsg.nodeID, omsg); }
void SingleKeyspaceDB::OnExpiryTimer() { uint64_t expiryTime; Cursor cursor; ByteString key; Log_Trace(); table->Iterate(NULL, cursor); kdata.Set("!!t:"); if (!cursor.Start(kdata)) ASSERT_FAIL(); cursor.Close(); if (kdata.length < 2) ASSERT_FAIL(); if (kdata.buffer[0] != '!' || kdata.buffer[1] != '!') ASSERT_FAIL(); ReadExpiryTime(kdata, expiryTime, key); table->Delete(NULL, kdata); table->Delete(NULL, key); WriteExpiryKey(kdata, key); table->Delete(NULL, kdata); Log_Trace("Expiring key: %.*s", key.length, key.buffer); InitExpiryTimer(); }
Table::Table(Database* database, const char *name, int pageSize) : database(database) { DbTxn *txnid = NULL; const char *filename = name; const char *dbname = NULL; DBTYPE type = DB_BTREE; u_int32_t flags = DB_CREATE | DB_AUTO_COMMIT | DB_NOMMAP #ifdef DB_READ_UNCOMMITTED | DB_READ_UNCOMMITTED #endif ; int mode = 0; db = new Db(database->env, 0); if (pageSize != 0) db->set_pagesize(pageSize); Log_Trace(); if (db->open(txnid, filename, dbname, type, flags, mode) != 0) { db->close(0); if (IsFolder(filename)) { STOP_FAIL(rprintf( "Could not create database file '%s' " "because a folder '%s' exists", filename, filename), 1); } STOP_FAIL("Could not open database", 1); } Log_Trace(); }
bool Endpoint::Set(const char* ip_port, bool resolv) { const char* p; int port; bool ret; Buffer ipbuf; p = ip_port; if (!IsValidEndpoint(ReadBuffer(ip_port))) return false; p = strrchr(ip_port, ':'); if (p == NULL) { Log_Trace("No ':' in host specification"); return false; } ipbuf.Append(ip_port, p - ip_port); ipbuf.NullTerminate(); p++; port = -1; port = atoi(p); if (port < 1 || port > 65535) { Log_Trace("atoi() failed to produce a sensible value"); return false; } ret = Set(ipbuf.GetBuffer(), port, resolv); return ret; }
bool Endpoint::Set(const char* ip_port, bool resolv) { const char* p; int port; bool ret; DynArray<32> ipbuf; p = ip_port; while (*p != '\0' && *p != ':') p++; if (*p == '\0') { Log_Trace("No ':' in host specification"); return false; } ipbuf.Append(ip_port, p - ip_port); ipbuf.Append("", 1); p++; port = -1; port = atoi(p); if (port < 1 || port > 65535) { Log_Trace("atoi() failed to produce a sensible value"); return false; } ret = Set(ipbuf.buffer, port, resolv); return ret; }
void ReplicatedKeyspaceDB::Append() { ByteString bs; KeyspaceOp* op; KeyspaceOp**it; uint64_t expiryTime; Log_Trace(); if (ops.Length() == 0) return; pvalue.length = 0; bs.Set(pvalue); unsigned numAppended = 0; for (it = ops.Head(); it != NULL; it = ops.Next(it)) { op = *it; if (op->appended) ASSERT_FAIL(); if (op->IsExpiry() && op->type != KeyspaceOp::CLEAR_EXPIRIES) { // at this point we have up-to-date info on the expiry time expiryTime = GetExpiryTime(op->key); op->prevExpiryTime = expiryTime; } msg.FromKeyspaceOp(op); if (msg.Write(bs)) { pvalue.length += bs.length; bs.Advance(bs.length); op->appended = true; numAppended++; if (op->IsExpiry()) { // one expiry command per paxos round break; } } else break; } if (pvalue.length > 0) { estimatedLength -= pvalue.length; if (estimatedLength < 0) estimatedLength = 0; RLOG->Append(pvalue); Log_Trace("appending %d ops (length: %d)", numAppended, pvalue.length); } }
void TransportTCPWriter::OnClose() { Log_Trace("endpoint = %s", endpoint.ToString()); if (!connectTimeout.IsActive()) { Log_Trace("reset"); EventLoop::Reset(&connectTimeout); } }
void Database::Checkpoint() { int ret; Log_Trace("started"); ret = env->txn_checkpoint(100*1000 /* in kilobytes */, 0, 0); if (ret < 0) ASSERT_FAIL(); Log_Trace("finished"); }
void ReplicatedLog::OnLearnLease() { Log_Trace("context->IsLeaseOwner() = %s", (context->IsLeaseOwner() ? "true" : "false")); Log_Trace("!proposer.IsActive() = %s", (!proposer.IsActive() ? "true" : "false")); Log_Trace("!proposer.state.multi = %s", (!proposer.state.multi ? "true" : "false")); if (context->IsLeaseOwner() && !proposer.IsActive() && !proposer.state.multi) { Log_Trace("Appending dummy to enable MultiPaxos"); TryAppendDummy(); } }
void ReplicatedKeyspaceDB::OnMasterLeaseExpired() { Log_Trace("ops.size() = %d", ops.Length()); if (!RLOG->IsMaster() && !asyncAppenderActive) FailKeyspaceOps(); Log_Trace("ops.size() = %d", ops.Length()); EventLoop::Remove(&expiryTimer); }
void PaxosLearner::OnLearnChosen(PaxosMsg& msg_) { Log_Trace(); msg = msg_; state.learned = true; state.value.Set(msg.value); Log_Trace("+++ Consensus for paxosID = %" PRIu64 " is %.*s +++", paxosID, state.value.length, state.value.buffer); }
void ReplicatedLog::ProcessLearnChosen(uint64_t nodeID, uint64_t runID) { bool ownAppend; Buffer learnedValue; learnedValue.Write(acceptor.state.acceptedValue); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Round completed for paxosID = %U", paxosID); Log_Trace("+++ Value for paxosID = %U: %B +++", paxosID, &learnedValue); if (context->GetHighestPaxosID() > 0 && paxosID < context->GetHighestPaxosID() && !IsLeaseOwner()) { Log_Debug("Paxos-based catchup, highest seen paxosID is %U, currently at %U", context->GetHighestPaxosID(), paxosID); if (paxosID == (context->GetHighestPaxosID() - 1)) Log_Debug("Paxos-based catchup complete..."); } #endif if (context->GetHighestPaxosID() > 0 && paxosID < (context->GetHighestPaxosID() - 1)) context->GetDatabase()->Commit(); NewPaxosRound(); // increments paxosID, clears proposer, acceptor if (paxosID <= context->GetHighestPaxosID()) RequestChosen(nodeID); ownAppend = proposer.state.multi; if (nodeID == MY_NODEID && runID == REPLICATION_CONFIG->GetRunID() && context->IsLeaseOwner()) { if (!proposer.state.multi) { proposer.state.multi = true; context->OnIsLeader(); } proposer.state.multi = true; Log_Trace("Multi paxos enabled"); } else { proposer.state.multi = false; Log_Trace("Multi paxos disabled"); } if (BUFCMP(&learnedValue, &dummy)) OnAppendComplete(); else context->OnAppend(paxosID - 1, learnedValue, ownAppend); // new convention: QuorumContext::OnAppend() must call // ReplicatedLog::OnAppendComplete() // when it's done! }
bool Table::Truncate(Transaction* tx) { Log_Trace(); u_int32_t count; u_int32_t flags = 0; int ret; DbTxn* txn; txn = tx ? tx->txn : NULL; // TODO error handling if ((ret = db->truncate(txn, &count, flags)) != 0) Log_Trace("truncate() failed"); return true; }
void MessageConnection::OnFlushWrites() { // flushWrites YieldTimer arrived Log_Trace(); TCPConnection::TryFlush(); }
void PLeaseProposer::OnPrepareResponse() { Log_Trace(); if (!state.preparing || msg.proposalID != state.proposalID) return; numReceived++; if (msg.type == PLEASE_PREPARE_REJECTED) numRejected++; else if (msg.type == PLEASE_PREPARE_PREVIOUSLY_ACCEPTED && msg.acceptedProposalID >= state.highestReceivedProposalID) { state.highestReceivedProposalID = msg.acceptedProposalID; state.leaseOwner = msg.leaseOwner; } if (numRejected >= ceil((double)(RCONF->GetNumNodes()) / 2)) { StartPreparing(); return; } // see if we have enough positive replies to advance if ((numReceived - numRejected) >= RCONF->MinMajority()) StartProposing(); }
bool StorageChunkWriter::WriteDataPages() { unsigned i; StorageDataPage* dataPage; for (i = 0; i < file->numDataPages; i++) { if (env->shuttingDown) return false; while (env->yieldThreads) { Log_Trace("Yielding..."); MSleep(YIELD_TIME); } dataPage = file->dataPages[i]; writeBuffer.Clear(); dataPage->Write(writeBuffer); //ASSERT(writeBuffer.GetLength() == dataPage->GetSize()); if (!WriteBuffer()) return false; } return true; }
void PaxosProposer::StopProposing() { Log_Trace(); state.proposing = false; EventLoop::Remove(&proposeTimeout); }
void PaxosProposer::OnProposeResponse(PaxosMessage& imsg) { PaxosMessage omsg; Log_Trace("msg.nodeID = %u", imsg.nodeID); if (!state.proposing || imsg.proposalID != state.proposalID) return; if (imsg.type == PAXOS_PROPOSE_REJECTED) { Log_Debug("Propose rejected, quorumID: %U", context->GetQuorumID()); vote->RegisterRejected(imsg.nodeID); } else vote->RegisterAccepted(imsg.nodeID); if (vote->IsRejected()) { StopProposing(); EventLoop::Add(&restartTimeout); } else if (vote->IsAccepted()) { // a majority have accepted our proposal, we have consensus StopProposing(); omsg.LearnProposal(context->GetPaxosID(), MY_NODEID, state.proposalID); BroadcastMessage(omsg); state.learnSent = true; } }
void MessageConnection::Connect(Endpoint& endpoint_) { Log_Trace(); endpoint = endpoint_; readBuffer.Allocate(MESSAGING_BUFFER_THRESHOLD * 2); TCPConnection::Connect(endpoint, MESSAGING_CONNECT_TIMEOUT); }
bool LogCache::Push(uint64_t paxosID, ByteString value, bool commit) { ByteArray<128> buf; Transaction* transaction; Log_Trace("Storing paxosID %" PRIu64 " with length %d", paxosID, value.length); transaction = RLOG->GetTransaction(); if (!transaction->IsActive()) transaction->Begin(); WriteRoundID(buf, paxosID); table->Set(transaction, buf, value); // delete old if ((int64_t)(paxosID - logCacheSize) >= 0) { paxosID -= logCacheSize; WriteRoundID(buf, paxosID); table->Delete(transaction, buf); } if (commit) transaction->Commit(); return true; }
bool Socket::SetNonblocking() { int ret; if (fd < 0) { Log_Trace("SetNonblocking on invalid file descriptor"); return false; } ret = fcntl(fd, F_GETFL, 0); if (ret < 0) { Log_Errno(); return false; } ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK); if (ret < 0) { Log_Errno(); return false; } return true; }
void SingleKeyspaceDB::InitExpiryTimer() { uint64_t expiryTime; Cursor cursor; ByteString key; Log_Trace(); EventLoop::Remove(&expiryTimer); table->Iterate(NULL, cursor); kdata.Set("!!t:"); if (!cursor.Start(kdata)) return; cursor.Close(); if (kdata.length < 2) return; if (kdata.buffer[0] != '!' || kdata.buffer[1] != '!') return; ReadExpiryTime(kdata, expiryTime, key); expiryTimer.Set(expiryTime); EventLoop::Add(&expiryTimer); }
void ReplicatedLog::OnMessage(PaxosMessage& imsg) { Log_Trace(); bool processed; processed = false; if (imsg.type == PAXOS_PREPARE_REQUEST) processed = OnPrepareRequest(imsg); else if (imsg.IsPrepareResponse()) processed = OnPrepareResponse(imsg); else if (imsg.type == PAXOS_PROPOSE_REQUEST) processed = OnProposeRequest(imsg); else if (imsg.IsProposeResponse()) processed = OnProposeResponse(imsg); else if (imsg.IsLearn()) processed = OnLearnChosen(imsg); else if (imsg.type == PAXOS_REQUEST_CHOSEN) processed = OnRequestChosen(imsg); else if (imsg.type == PAXOS_START_CATCHUP) processed = OnStartCatchup(imsg); else ASSERT_FAIL(); if (processed) context->OnMessageProcessed(); }
void ReplicatedKeyspaceDB::FailKeyspaceOps() { Log_Trace(); KeyspaceOp **it; KeyspaceOp *op; for (it = ops.Head(); it != NULL; /* advanded in body */) { op = *it; it = ops.Remove(it); op->status = false; if (op->service) op->service->OnComplete(op); else { assert(op->type == KeyspaceOp::EXPIRE); delete op; } } expiryAdded = false; if (ops.Length() > 0) ASSERT_FAIL(); }
static bool RequestWriteNotification(IOOperation* ioop) { DWORD numBytes; WSABUF wsabuf; IODesc* iod; int ret; // Log_Trace("fd.index = %d", ioop->fd.index); iod = GetIODesc(ioop->fd); assert(iod->write == NULL); wsabuf.buf = NULL; wsabuf.len = 0; memset(&iod->ovlWrite, 0, sizeof(OVERLAPPED)); if (WSASend(ioop->fd.sock, &wsabuf, 1, &numBytes, 0, &iod->ovlWrite, NULL) == SOCKET_ERROR) { ret = WSAGetLastError(); if (ret != WSA_IO_PENDING) { Log_Trace("ret = %d", ret); return false; } } iod->write = ioop; ioop->active = true; return true; }
void PaxosProposer::BroadcastMessage(PaxosMessage& omsg) { Log_Trace(); vote->Reset(); context->GetTransport()->BroadcastMessage(omsg); }
void ContextTransport::OnMessage(uint64_t nodeID, ReadBuffer msg) { int nread; char proto; Log_Trace("%R", &msg); if (msg.GetLength() < 2) ASSERT_FAIL(); nread = msg.Readf("%c:", &proto); if (nread < 2) ASSERT_FAIL(); msg.Advance(2); switch (proto) { case PROTOCOL_CLUSTER: OnClusterMessage(nodeID, msg); break; case PROTOCOL_QUORUM: OnQuorumMessage(nodeID, msg); break; default: ASSERT_FAIL(); break; } }
bool AddKq(int ident, short filter, IOOperation* ioop) { int nev; struct kevent ev; struct timespec timeout = { 0, 0 }; if (kq < 0) { Log_Trace("kq < 0"); return false; } EV_SET(&ev, ident, filter, EV_ADD | EV_ONESHOT, 0, 0, ioop); // add our interest in the event nev = kevent(kq, &ev, 1, NULL, 0, &timeout); if (nev < 0) { Log_Errno(); return false; } if (ioop) ioop->active = true; return true; }
bool Socket::Create(Proto proto_) { int stype; if (fd >= 0) { Log_Trace("Called Create() on existing socket"); return false; } if (proto_ == UDP) stype = SOCK_DGRAM; else stype = SOCK_STREAM; fd = socket(AF_INET, stype, 0); if (fd < 0) { Log_Errno(); return false; } proto = proto_; listening = false; // SetReceiveBufferSize(64*KiB); return true; }