/* * Callback routine when the required locks are obtained. * Called from parent context */ static void ctdb_lock_handler(struct tevent_context *ev, struct tevent_fd *tfd, uint16_t flags, void *private_data) { struct lock_context *lock_ctx; char c; bool locked; double t; int id; lock_ctx = talloc_get_type_abort(private_data, struct lock_context); /* cancel the timeout event */ TALLOC_FREE(lock_ctx->ttimer); t = timeval_elapsed(&lock_ctx->start_time); id = lock_bucket_id(t); /* Read the status from the child process */ if (sys_read(lock_ctx->fd[0], &c, 1) != 1) { locked = false; } else { locked = (c == 0 ? true : false); } /* Update statistics */ CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_calls); if (lock_ctx->ctdb_db) { CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_calls); } if (locked) { if (lock_ctx->ctdb_db) { CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]); CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, lock_ctx->start_time); CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t); CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]); } } else { CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_failed); if (lock_ctx->ctdb_db) { CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_failed); } } process_callbacks(lock_ctx, locked); }
/* * Lock record / db depending on type */ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, TDB_DATA key, uint32_t priority, void (*callback)(void *, bool), void *private_data, enum lock_type type, bool auto_mark) { struct lock_context *lock_ctx = NULL; struct lock_request *request; if (callback == NULL) { DEBUG(DEBUG_WARNING, ("No callback function specified, not locking\n")); return NULL; } #if 0 /* Disable this optimization to ensure first-in-first-out fair * scheduling of lock requests */ /* get a context for this key - search only the pending contexts, * current contexts might in the middle of processing callbacks */ lock_ctx = find_lock_context(ctdb->lock_pending, ctdb_db, key, priority, type); #endif /* No existing context, create one */ if (lock_ctx == NULL) { lock_ctx = talloc_zero(ctdb, struct lock_context); if (lock_ctx == NULL) { DEBUG(DEBUG_ERR, ("Failed to create a new lock context\n")); return NULL; } lock_ctx->type = type; lock_ctx->ctdb = ctdb; lock_ctx->ctdb_db = ctdb_db; lock_ctx->key.dsize = key.dsize; if (key.dsize > 0) { lock_ctx->key.dptr = talloc_memdup(lock_ctx, key.dptr, key.dsize); } else { lock_ctx->key.dptr = NULL; } lock_ctx->priority = priority; lock_ctx->auto_mark = auto_mark; lock_ctx->child = -1; lock_ctx->block_child = -1; DLIST_ADD_END(ctdb->lock_pending, lock_ctx, NULL); ctdb->lock_num_pending++; CTDB_INCREMENT_STAT(ctdb, locks.num_pending); if (ctdb_db) { CTDB_INCREMENT_DB_STAT(ctdb_db, locks.num_pending); } /* Start the timer when we activate the context */ lock_ctx->start_time = timeval_current(); }
/* send a packet to a client */ static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr) { CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent); if (hdr->operation == CTDB_REQ_MESSAGE) { if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) { DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n")); talloc_free(client); return -1; } } return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length); }
/* handle a timeout of a control */ static void ctdb_control_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { struct ctdb_control_state *state = talloc_get_type(private_data, struct ctdb_control_state); TALLOC_CTX *tmp_ctx = talloc_new(ev); CTDB_INCREMENT_STAT(state->ctdb, timeouts.control); talloc_steal(tmp_ctx, state); state->callback(state->ctdb, -1, tdb_null, "ctdb_control timed out", state->private_data); talloc_free(tmp_ctx); }
/* called by the transport layer when a packet comes in */ static void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length) { struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; CTDB_INCREMENT_STAT(ctdb, node_packets_recv); /* up the counter for this source node, so we know its alive */ if (ctdb_validate_pnn(ctdb, hdr->srcnode)) { /* as a special case, redirected calls don't increment the rx_cnt */ if (hdr->operation != CTDB_REQ_CALL || ((struct ctdb_req_call_old *)hdr)->hopcount == 0) { ctdb->nodes[hdr->srcnode]->rx_cnt++; } } ctdb_input_pkt(ctdb, hdr); }
/* called when we need to process a packet. This can be a requeued packet after a lockwait, or a real packet from another node */ void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) { TALLOC_CTX *tmp_ctx; /* place the packet as a child of the tmp_ctx. We then use talloc_free() below to free it. If any of the calls want to keep it, then they will steal it somewhere else, and the talloc_free() will only free the tmp_ctx */ tmp_ctx = talloc_new(ctdb); talloc_steal(tmp_ctx, hdr); DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from " "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length, hdr->srcnode, hdr->destnode)); switch (hdr->operation) { case CTDB_REQ_CALL: case CTDB_REPLY_CALL: case CTDB_REQ_DMASTER: case CTDB_REPLY_DMASTER: /* we don't allow these calls when banned */ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) { DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u" " request %u" " length %u from node %u to %u while node" " is banned\n", hdr->operation, hdr->reqid, hdr->length, hdr->srcnode, hdr->destnode)); goto done; } /* for ctdb_call inter-node operations verify that the remote node that sent us the call is running in the same generation instance as this node */ if (ctdb->vnn_map->generation != hdr->generation) { DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u" " request %u" " length %u from node %u to %u had an" " invalid generation id:%u while our" " generation id is:%u\n", hdr->operation, hdr->reqid, hdr->length, hdr->srcnode, hdr->destnode, hdr->generation, ctdb->vnn_map->generation)); goto done; } } switch (hdr->operation) { case CTDB_REQ_CALL: CTDB_INCREMENT_STAT(ctdb, node.req_call); ctdb_request_call(ctdb, hdr); break; case CTDB_REPLY_CALL: CTDB_INCREMENT_STAT(ctdb, node.reply_call); ctdb_reply_call(ctdb, hdr); break; case CTDB_REPLY_ERROR: CTDB_INCREMENT_STAT(ctdb, node.reply_error); ctdb_reply_error(ctdb, hdr); break; case CTDB_REQ_DMASTER: CTDB_INCREMENT_STAT(ctdb, node.req_dmaster); ctdb_request_dmaster(ctdb, hdr); break; case CTDB_REPLY_DMASTER: CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster); ctdb_reply_dmaster(ctdb, hdr); break; case CTDB_REQ_MESSAGE: CTDB_INCREMENT_STAT(ctdb, node.req_message); ctdb_request_message(ctdb, hdr); break; case CTDB_REQ_CONTROL: CTDB_INCREMENT_STAT(ctdb, node.req_control); ctdb_request_control(ctdb, hdr); break; case CTDB_REPLY_CONTROL: CTDB_INCREMENT_STAT(ctdb, node.reply_control); ctdb_reply_control(ctdb, hdr); break; case CTDB_REQ_KEEPALIVE: CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv); break; default: DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n", __location__, hdr->operation)); break; } done: talloc_free(tmp_ctx); }
/* * Schedule a new lock child process * Set up callback handler and timeout handler */ static void ctdb_lock_schedule(struct ctdb_context *ctdb) { struct lock_context *lock_ctx, *next_ctx, *active_ctx; int ret; TALLOC_CTX *tmp_ctx; const char *helper = BINDIR "/ctdb_lock_helper"; static const char *prog = NULL; char **args; if (prog == NULL) { const char *t; t = getenv("CTDB_LOCK_HELPER"); if (t != NULL) { prog = talloc_strdup(ctdb, t); } else { prog = talloc_strdup(ctdb, helper); } CTDB_NO_MEMORY_VOID(ctdb, prog); } if (ctdb->lock_pending == NULL) { return; } /* Find a lock context with requests */ lock_ctx = ctdb->lock_pending; while (lock_ctx != NULL) { next_ctx = lock_ctx->next; if (! lock_ctx->req_queue) { DEBUG(DEBUG_INFO, ("Removing lock context without lock requests\n")); DLIST_REMOVE(ctdb->lock_pending, lock_ctx); ctdb->lock_num_pending--; CTDB_DECREMENT_STAT(ctdb, locks.num_pending); if (lock_ctx->ctdb_db) { CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending); } talloc_free(lock_ctx); } else { active_ctx = find_lock_context(ctdb->lock_current, lock_ctx->ctdb_db, lock_ctx->key, lock_ctx->priority, lock_ctx->type); if (active_ctx == NULL) { if (lock_ctx->ctdb_db == NULL || lock_ctx->ctdb_db->lock_num_current < MAX_LOCK_PROCESSES_PER_DB) { /* Found a lock context with lock requests */ break; } } /* There is already a child waiting for the * same key. So don't schedule another child * just yet. */ } lock_ctx = next_ctx; } if (lock_ctx == NULL) { return; } lock_ctx->child = -1; ret = pipe(lock_ctx->fd); if (ret != 0) { DEBUG(DEBUG_ERR, ("Failed to create pipe in ctdb_lock_schedule\n")); return; } set_close_on_exec(lock_ctx->fd[0]); /* Create data for child process */ tmp_ctx = talloc_new(lock_ctx); if (tmp_ctx == NULL) { DEBUG(DEBUG_ERR, ("Failed to allocate memory for helper args\n")); close(lock_ctx->fd[0]); close(lock_ctx->fd[1]); return; } /* Create arguments for lock helper */ args = lock_helper_args(tmp_ctx, lock_ctx, lock_ctx->fd[1]); if (args == NULL) { DEBUG(DEBUG_ERR, ("Failed to create lock helper args\n")); close(lock_ctx->fd[0]); close(lock_ctx->fd[1]); talloc_free(tmp_ctx); return; } lock_ctx->child = vfork(); if (lock_ctx->child == (pid_t)-1) { DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n")); close(lock_ctx->fd[0]); close(lock_ctx->fd[1]); talloc_free(tmp_ctx); return; } /* Child process */ if (lock_ctx->child == 0) { ret = execv(prog, args); if (ret < 0) { DEBUG(DEBUG_ERR, ("Failed to execute helper %s (%d, %s)\n", prog, errno, strerror(errno))); } _exit(1); } /* Parent process */ ctdb_track_child(ctdb, lock_ctx->child); close(lock_ctx->fd[1]); talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor); talloc_free(tmp_ctx); /* Set up timeout handler */ lock_ctx->ttimer = tevent_add_timer(ctdb->ev, lock_ctx, timeval_current_ofs(10, 0), ctdb_lock_timeout_handler, (void *)lock_ctx); if (lock_ctx->ttimer == NULL) { ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } /* Set up callback */ lock_ctx->tfd = tevent_add_fd(ctdb->ev, lock_ctx, lock_ctx->fd[0], EVENT_FD_READ, ctdb_lock_handler, (void *)lock_ctx); if (lock_ctx->tfd == NULL) { TALLOC_FREE(lock_ctx->ttimer); ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } tevent_fd_set_auto_close(lock_ctx->tfd); /* Move the context from pending to current */ DLIST_REMOVE(ctdb->lock_pending, lock_ctx); ctdb->lock_num_pending--; DLIST_ADD_END(ctdb->lock_current, lock_ctx, NULL); if (lock_ctx->ctdb_db) { lock_ctx->ctdb_db->lock_num_current++; CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current); CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current); } }