static inline void UpdateClock() { static struct timeval tv; uint64_t prevMsec; uint64_t msec; uint64_t diff; prevMsec = clockMsec; gettimeofday(&tv, NULL); msec = (uint64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000 + clockCorrectionMsec; if (prevMsec != 0) { if (prevMsec > msec) { // system clock went backwards diff = prevMsec - msec; Log_Debug("Softclock diff negative, possible system clock adjustment: %U", diff); // update correction with diff, plus some more to make time strictly increasing AtomicExchangeU64(clockCorrectionMsec, clockCorrectionMsec + diff + CLOCK_RESOLUTION); // recalculate msec msec = (uint64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000 + clockCorrectionMsec; } else if (msec - prevMsec > MAX(2 * CLOCK_RESOLUTION, 100)) { // only logging Log_Debug("Softclock diff: %U", msec - prevMsec); } } AtomicExchangeU64(clockMsec, msec); }
void ReplicatedLog::ProcessLearnChosen(uint64_t nodeID, uint64_t runID) { bool ownAppend; Buffer learnedValue; learnedValue.Write(acceptor.state.acceptedValue); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Round completed for paxosID = %U", paxosID); Log_Trace("+++ Value for paxosID = %U: %B +++", paxosID, &learnedValue); if (context->GetHighestPaxosID() > 0 && paxosID < context->GetHighestPaxosID() && !IsLeaseOwner()) { Log_Debug("Paxos-based catchup, highest seen paxosID is %U, currently at %U", context->GetHighestPaxosID(), paxosID); if (paxosID == (context->GetHighestPaxosID() - 1)) Log_Debug("Paxos-based catchup complete..."); } #endif if (context->GetHighestPaxosID() > 0 && paxosID < (context->GetHighestPaxosID() - 1)) context->GetDatabase()->Commit(); NewPaxosRound(); // increments paxosID, clears proposer, acceptor if (paxosID <= context->GetHighestPaxosID()) RequestChosen(nodeID); ownAppend = proposer.state.multi; if (nodeID == MY_NODEID && runID == REPLICATION_CONFIG->GetRunID() && context->IsLeaseOwner()) { if (!proposer.state.multi) { proposer.state.multi = true; context->OnIsLeader(); } proposer.state.multi = true; Log_Trace("Multi paxos enabled"); } else { proposer.state.multi = false; Log_Trace("Multi paxos disabled"); } if (BUFCMP(&learnedValue, &dummy)) OnAppendComplete(); else context->OnAppend(paxosID - 1, learnedValue, ownAppend); // new convention: QuorumContext::OnAppend() must call // ReplicatedLog::OnAppendComplete() // when it's done! }
void ReplicatedLog::OnLearnChosen(PaxosMessage& imsg) { uint64_t runID; #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen begin"); #endif if (context->GetDatabase()->IsCommiting()) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Database is commiting, dropping Paxos message"); #endif return; } Log_Trace(); if (imsg.paxosID > paxosID) { RequestChosen(imsg.nodeID); // I am lagging and need to catch-up return; } else if (imsg.paxosID < paxosID) return; if (imsg.type == PAXOS_LEARN_VALUE) { runID = imsg.runID; acceptor.state.accepted = true; acceptor.state.acceptedValue.Write(imsg.value); } else if (imsg.type == PAXOS_LEARN_PROPOSAL && acceptor.state.accepted && acceptor.state.acceptedProposalID == imsg.proposalID) { runID = acceptor.state.acceptedRunID; } else { RequestChosen(imsg.nodeID); return; } ProcessLearnChosen(imsg.nodeID, runID); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen end"); #endif }
int IRQ_AddHandler(int IRQ, tIRQ_Handler Handler, void *Ptr) { if( IRQ < 0 || IRQ >= N_IRQS-32 ) { return 1; } IRQ += 32; // 32 internal IRQs // - Enable IRQ, clear pending and send to CPU 1 only gpGIC_DistributorBase[GICD_ISENABLER0+IRQ/32] = 1 << (IRQ & (32-1)); ((Uint8*)&gpGIC_DistributorBase[GICD_ITARGETSR0])[IRQ] = 1; gpGIC_DistributorBase[GICD_ICPENDR0+IRQ/32] = 1 << (IRQ & (32-1)); // TODO: Does the GIC need to handle IRQ sharing? if( gaIRQ_Handlers[IRQ] ) { Log_Warning("GIC", "IRQ %i already handled by %p, %p ignored", IRQ, gaIRQ_Handlers[IRQ], Handler); return 2; } gaIRQ_Handlers[IRQ] = Handler; gaIRQ_HandlerData[IRQ] = Ptr; Log_Debug("GIC", "IRQ %i handled by %p(%p)", IRQ, Handler, Ptr); // DEBUG! Trip the interrupt gpGIC_DistributorBase[GICD_ISPENDR0+IRQ/32] = 1 << (IRQ & (32-1)); return 0; }
void GIC_IRQHandler(void) { Uint32 num = gpGIC_InterfaceBase[GICC_IAR]; Log_Debug("GIC", "IRQ 0x%x", num); gaIRQ_Handlers[num]( num, gaIRQ_HandlerData[num] ); gpGIC_InterfaceBase[GICC_EOIR] = num; }
void ReplicatedLog::OnRequestChosen(PaxosMessage& imsg) { Buffer value; PaxosMessage omsg; #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnRequestChosen, imsg.paxosID = %U, mine = %U", imsg.paxosID, GetPaxosID()); #endif if (imsg.paxosID >= GetPaxosID()) return; // the node is lagging and needs to catch-up context->GetDatabase()->GetAcceptedValue(imsg.paxosID, value); if (value.GetLength() > 0) { Log_Trace("Sending paxosID %d to node %d", imsg.paxosID, imsg.nodeID); omsg.LearnValue(imsg.paxosID, MY_NODEID, 0, value); } else { Log_Trace("Node requested a paxosID I no longer have"); omsg.StartCatchup(paxosID, MY_NODEID); } context->GetTransport()->SendMessage(imsg.nodeID, omsg); }
void Signal::Wait() { DWORD ret; bool stuck; SetWaiting(true); stuck = false; while (true) { DWORD timeout = 10000; //ret = WaitForSingleObject((HANDLE) impl.event, INFINITE); ret = WaitForSingleObject((HANDLE) impl.event, timeout); if (ret == WAIT_FAILED) Log_Errno(); if (ret != WAIT_OBJECT_0 && ret != WAIT_TIMEOUT) Log_Debug("WaitForSingleObject: ret %d", ret); if (ret == WAIT_TIMEOUT) { if (stuck == false) { //Log_Debug("Waiting for long: %p in %U", this, ThreadPool::GetThreadID()); stuck = true; } continue; } break; } SetWaiting(false); }
void PaxosProposer::OnProposeResponse(PaxosMessage& imsg) { PaxosMessage omsg; Log_Trace("msg.nodeID = %u", imsg.nodeID); if (!state.proposing || imsg.proposalID != state.proposalID) return; if (imsg.type == PAXOS_PROPOSE_REJECTED) { Log_Debug("Propose rejected"); vote->RegisterRejected(imsg.nodeID); } else vote->RegisterAccepted(imsg.nodeID); // see if we have enough positive replies to advance if (vote->IsAccepted()) { // a majority have accepted our proposal, we have consensus StopProposing(); omsg.LearnProposal(context->GetPaxosID(), MY_NODEID, state.proposalID); BroadcastMessage(omsg); } else if (vote->IsComplete()) StartPreparing(); }
void Proc_StartUser(Uint Entrypoint, Uint Base, int ArgC, const char **ArgV, int DataSize) { int i; const char **envp; tVAddr delta; // Log_Debug("Proc", "Proc_StartUser: (Entrypoint=%p, Base=%p, ArgC=%i, ArgV=%p, DataSize=0x%x)", // Entrypoint, Base, ArgC, ArgV, DataSize); // Write data to the user's stack Uint32 *usr_sp = (void*)MM_NewUserStack(); if( !usr_sp ) { Log_KernelPanic("Proc", "Creation of user stack failed"); } usr_sp -= (DataSize+3)/4; memcpy(usr_sp, ArgV, DataSize); free(ArgV); // Adjust user's copy of the arguments delta = (tVAddr)usr_sp - (tVAddr)ArgV; ArgV = (void*)usr_sp; for(i = 0; ArgV[i]; i ++) ArgV[i] += delta; envp = &ArgV[i+1]; for(i = 0; envp[i]; i ++) envp[i] += delta; *--usr_sp = (Uint32)envp; *--usr_sp = (Uint32)ArgV; *--usr_sp = (Uint32)ArgC; *--usr_sp = Base; // Drop to user code Log_Debug("Proc", "Proc_int_DropToUser(%p, %p)", Entrypoint, usr_sp); Proc_int_DropToUser(Entrypoint, (Uint32)usr_sp); }
void PaxosProposer::OnProposeResponse(PaxosMessage& imsg) { PaxosMessage omsg; Log_Trace("msg.nodeID = %u", imsg.nodeID); if (!state.proposing || imsg.proposalID != state.proposalID) return; if (imsg.type == PAXOS_PROPOSE_REJECTED) { Log_Debug("Propose rejected, quorumID: %U", context->GetQuorumID()); vote->RegisterRejected(imsg.nodeID); } else vote->RegisterAccepted(imsg.nodeID); if (vote->IsRejected()) { StopProposing(); EventLoop::Add(&restartTimeout); } else if (vote->IsAccepted()) { // a majority have accepted our proposal, we have consensus StopProposing(); omsg.LearnProposal(context->GetPaxosID(), MY_NODEID, state.proposalID); BroadcastMessage(omsg); state.learnSent = true; } }
void ShardMigrationWriter::OnWriteReadyness() { Log_Debug("ShardMigrationWriter::OnWriteReadyness()"); uint64_t bytesBegin; ASSERT(quorumProcessor != NULL); if (!quorumProcessor->IsPrimary() || !shardServer->GetConfigState()->isMigrating || (shardServer->GetConfigState()->isMigrating && (shardServer->GetConfigState()->migrateSrcShardID != srcShardID || shardServer->GetConfigState()->migrateDstShardID != dstShardID || shardServer->GetConfigState()->migrateQuorumID != quorumID))) { Abort(); return; } if (sendFirst) { sendFirst = false; SendFirst(); } else { bytesBegin = bytesSent; while (bytesSent < bytesBegin + SHARD_MIGRATION_WRITER_GRAN) { if (!cursor) break; SendNext(); } } }
void ReplicatedLog::OnStartCatchup(PaxosMessage& imsg) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnStartCatchup"); #endif if (imsg.nodeID == context->GetLeaseOwner()) context->OnStartCatchup(); }
bool ShardLockManager::TryLock(ReadBuffer key, ClientSession* session) { int cmpres; Buffer keyBuffer; ShardLock* lock; ShardLock* newLock; keyBuffer.Write(key); lock = lockTree.Locate(keyBuffer, cmpres); if (!FOUND_IN_TREE(lock, cmpres)) { // not in tree newLock = NewLock(); newLock->key.Write(key); lockTree.InsertAt(newLock, lock, cmpres); lock = newLock; } else if (lock->locked) return false; else { // in tree, not locked // in lock cache list ASSERT(lock->listCacheNode.next != lock); lockCacheList.Remove(lock); // not in lock expiry list ASSERT(lock->listExpiryNode.next == lock); // not in lock pool list ASSERT(lock->listPoolNode.next == lock); } // in tree, not locked lock->locked = true; lock->session = session; numLocked++; lock->expireTime = EventLoop::Now() + lockExpireTime; // add to expiry list lockExpiryList.Append(lock); UpdateExpireLockTimeout(); // in lock tree ASSERT(lock->treeNode.IsInTree()); // not in lock cache list ASSERT(lock->listCacheNode.next == lock); // in lock expiry list ASSERT(lock->listExpiryNode.next != lock); // not in lock pool list ASSERT(lock->listPoolNode.next == lock); Log_Debug("Lock %R acquired.", &key); return true; }
void ReplicatedLog::OnPrepareRequest(PaxosMessage& imsg) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnPrepareRequest"); #endif acceptor.OnPrepareRequest(imsg); OnRequest(imsg); }
// === CODE === int GIC_Install(char **Arguments) { // Initialise Log_Debug("GIC", "Dist: %P, Interface: %P", gGIC_DistributorAddr, gGIC_InterfaceAddr); gpGIC_InterfaceBase = (void*)MM_MapHWPages(gGIC_InterfaceAddr, 1); LOG("gpGIC_InterfaceBase = %p", gpGIC_InterfaceBase); gpGIC_DistributorBase = (void*)MM_MapHWPages(gGIC_DistributorAddr, 1); LOG("gpGIC_DistributorBase = %p", gpGIC_DistributorBase); gpGIC_InterfaceBase[GICC_CTLR] = 0; // Disable CPU interaface LOG("GICC_IAR = %x (CTLR=0)", gpGIC_InterfaceBase[GICC_IAR]); gpGIC_InterfaceBase[GICC_PMR] = 0xFF; // Effectively disable prioritories gpGIC_InterfaceBase[GICC_CTLR] = 1; // Enable CPU gpGIC_DistributorBase[GICD_CTLR] = 1; // Enable Distributor gpIRQHandler = GIC_IRQHandler; __asm__ __volatile__ ("cpsie if"); // Enable IRQs and FIQs #if 0 for( int i = 0; i < N_IRQS/32; i ++ ) { Log_Debug("GIC", "GICD_ISENABLER%i %x = %08x", i, GICD_ISENABLER0 + i, gpGIC_DistributorBase[GICD_ISENABLER0+i]); gpGIC_DistributorBase[GICD_ISENABLER0+i] = 0; } #endif #if 0 // Testing - First 32 actual interrupts enabled gpGIC_DistributorBase[GICD_ISENABLER0+1] = 0xFFFFFFFF; for( int i = 0; i < 32/4; i ++ ) gpGIC_DistributorBase[GICD_ITARGETSR0+8+i] = 0x01010101; #endif // Clear out pending IRQs. gpGIC_InterfaceBase[GICC_EOIR] = gpGIC_InterfaceBase[GICC_IAR]; return MODULE_ERR_OK; }
// === CODE === void PMemMap_DumpBlocks(tPMemMapEnt *map, int NEnts) { for( int i = 0; i < NEnts; i ++ ) { Log_Debug("PMemMap", "%i: %i 0x%02x %08llx+%llx (end %llx)", i, map[i].Type, map[i].NUMADomain, map[i].Start, map[i].Length, map[i].Start + map[i].Length ); } }
void ReplicatedLog::OnProposeResponse(PaxosMessage& imsg) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnProposeResponse"); #endif Log_Trace(); if (imsg.paxosID == paxosID) proposer.OnProposeResponse(imsg); }
void ShardMigrationWriter::Begin(ClusterMessage& request) { ConfigState* configState; ConfigShard* configShard; ConfigShardServer* configShardServer; ASSERT(!isActive); ASSERT(cursor == NULL); configState = shardServer->GetConfigState(); configShard = configState->GetShard(request.srcShardID); ASSERT(configShard != NULL); configShardServer = configState->GetShardServer(request.nodeID); ASSERT(configShardServer != NULL); quorumProcessor = shardServer->GetQuorumProcessor(configShard->quorumID); ASSERT(quorumProcessor != NULL); isActive = true; nodeID = request.nodeID; quorumID = request.quorumID; srcShardID = request.srcShardID; dstShardID = request.dstShardID; bytesTotal = environment->GetSize(QUORUM_DATABASE_DATA_CONTEXT, srcShardID); startTime = NowClock(); CONTEXT_TRANSPORT->AddNode(nodeID, configShardServer->endpoint); Log_Debug("ShardMigrationWriter::Begin() nodeID = %U", nodeID); Log_Debug("ShardMigrationWriter::Begin() quorumID = %U", quorumID); Log_Debug("ShardMigrationWriter::Begin() srcShardID = %U", srcShardID); Log_Debug("ShardMigrationWriter::Begin() dstShardID = %U", dstShardID); Log_Message("Migrating shard %U into quorum %U as shard %U (sending)", srcShardID, quorumID, dstShardID); sendFirst = true; EventLoop::Add(&onTimeout); CONTEXT_TRANSPORT->RegisterWriteReadyness(nodeID, MFUNC(ShardMigrationWriter, OnWriteReadyness)); }
void Signal::Wait() { DWORD ret; SetWaiting(true); ret = WaitForSingleObject((HANDLE) impl.event, INFINITE); if (ret == WAIT_FAILED) Log_Errno(); if (ret != WAIT_OBJECT_0) Log_Debug("WaitForSingleObject: ret %d", ret); SetWaiting(false); }
/** * Detect if a volume is Ext2 formatted */ int Ext2_Detect(int FD) { tExt2_SuperBlock sb; size_t len; len = VFS_ReadAt(FD, 1024, 1024, &sb); if( len != 1024 ) { Log_Debug("Ext2", "_Detect: Read failed? (0x%x != 1024)", len); return 0; } switch(sb.s_magic) { case 0xEF53: return 2; default: Log_Debug("Ext2", "_Detect: s_magic = 0x%x", sb.s_magic); return 0; } }
bool ReplicatedLog::OnPrepareRequest(PaxosMessage& imsg) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::OnPrepareRequest"); #endif bool processed = acceptor.OnPrepareRequest(imsg); OnRequest(imsg); return processed; }
void ReplicatedLog::TryAppendDummy() { Log_Trace(); if (proposer.IsActive()) return; proposer.SetUseTimeouts(true); Append(dummy); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Appending DUMMY!"); #endif }
void ReplicatedLog::Append(Buffer& value) { Log_Trace(); if (proposer.IsActive() || proposer.IsLearnSent()) return; context->OnStartProposing(); proposer.Propose(value); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Proposing for paxosID = %U", GetPaxosID()); #endif }
void ReplicatedLog::RequestChosen(uint64_t nodeID) { PaxosMessage omsg; if (context->IsLeaseOwner() || EventLoop::Now() - lastRequestChosenTime < REQUEST_CHOSEN_TIMEOUT) return; lastRequestChosenTime = EventLoop::Now(); omsg.RequestChosen(GetPaxosID(), MY_NODEID); context->GetTransport()->SendMessage(nodeID, omsg); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("ReplicatedLog::RequestChosen, paxosID = %U, to = %U", GetPaxosID(), nodeID); #endif }
void StorageDeleteFileChunkJob::Execute() { Stopwatch sw; Buffer filename; Log_Message("Deleting file chunk %U from disk", chunk->GetChunkID()); sw.Start(); filename.Write(chunk->GetFilename()); delete chunk; chunk = NULL; StorageFileDeleter::Delete(filename.GetBuffer()); sw.Stop(); Log_Debug("Deleted, elapsed: %U", (uint64_t) sw.Elapsed()); }
void PaxosProposer::OnProposeTimeout() { Log_Debug("OnProposeTimeout, quorumID: %U", context->GetQuorumID()); Log_Trace(); ASSERT(state.proposing); if (context->IsPaxosBlocked()) { EventLoop::Add(&proposeTimeout); return; } if (useTimeouts || vote->IsRejected()) StartPreparing(); else EventLoop::Add(&proposeTimeout); }
void PaxosProposer::OnPrepareResponse(PaxosMessage& imsg) { Log_Trace("msg.nodeID = %u", imsg.nodeID); if (!state.preparing || imsg.proposalID != state.proposalID) return; if (imsg.type == PAXOS_PREPARE_REJECTED) vote->RegisterRejected(imsg.nodeID); else vote->RegisterAccepted(imsg.nodeID); if (imsg.type == PAXOS_PREPARE_REJECTED) { Log_Debug("Prepare rejected, quorumID: %U", context->GetQuorumID()); if (imsg.promisedProposalID > state.highestPromisedProposalID) state.highestPromisedProposalID = imsg.promisedProposalID; } else if (imsg.type == PAXOS_PREPARE_PREVIOUSLY_ACCEPTED && imsg.acceptedProposalID >= state.highestReceivedProposalID) { /* the >= could be replaced by > which would result in less copys * however this would result in complications in multi paxos * in the multi paxos steady state this branch is inactive * it only runs after leader failure * so it's ok */ state.highestReceivedProposalID = imsg.acceptedProposalID; state.proposedRunID = imsg.runID; ASSERT(imsg.value.GetLength() > 0); state.proposedValue.Write(imsg.value); } if (vote->IsRejected()) { StopPreparing(); EventLoop::Add(&restartTimeout); } else if (vote->IsAccepted()) StartProposing(); }
void PaxosProposer::OnRestartTimeout() { Log_Debug("OnRestartTimeout, quorumID: %U", context->GetQuorumID()); Log_Trace(); ASSERT(!state.preparing); ASSERT(!state.proposing); ASSERT(!prepareTimeout.IsActive()); ASSERT(!proposeTimeout.IsActive()); if (context->IsPaxosBlocked()) { EventLoop::Add(&restartTimeout); return; } if (useTimeouts) StartPreparing(); else EventLoop::Add(&restartTimeout); }
void ShardMigrationWriter::SendFirst() { ClusterMessage msg; ReadBuffer key; ReadBuffer value; ASSERT(cursor == NULL); cursor = environment->GetBulkCursor(QUORUM_DATABASE_DATA_CONTEXT, srcShardID); cursor->SetOnBlockShard(MFUNC(ShardMigrationWriter, OnBlockShard)); msg.ShardMigrationBegin(quorumID, srcShardID, dstShardID); CONTEXT_TRANSPORT->SendClusterMessage(nodeID, msg); Log_Debug("ShardMigrationWriter sending BEGIN"); // send first KV kv = cursor->First(); if (kv) SendItem(kv); else SendCommit(); }
void ReplicatedLog::TryAppendDummy() { Log_Trace(); proposer.SetUseTimeouts(true); if (proposer.IsActive() || proposer.IsLearnSent()) { appendDummyNext = true; return; } if (waitingOnAppend) { appendDummyNext = true; Log_Message("TryAppendDummy called, but waitingOnAppend = true"); return; } Append(dummy); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Appending DUMMY!"); #endif }