Beispiel #1
0
static int ctdb_ibw_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length)
{
	struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
	int	rc;

	assert(length>=sizeof(uint32_t));
	assert(cn!=NULL);

	if (cn->conn==NULL) {
		DEBUG(DEBUG_ERR, ("ctdb_ibw_queue_pkt: conn is NULL\n"));
		return -1;
	}

	if (cn->conn->state==IBWC_CONNECTED) {
		rc = ctdb_ibw_send_pkt(cn->conn, data, length);
	} else {
		struct ctdb_ibw_msg *p = talloc_zero(cn, struct ctdb_ibw_msg);
		CTDB_NO_MEMORY(node->ctdb, p);

		p->data = talloc_memdup(p, data, length);
		CTDB_NO_MEMORY(node->ctdb, p->data);

		p->length = length;

		DLIST_ADD_AFTER(cn->queue, p, cn->queue_last);
		cn->queue_last = p;
		cn->qcnt++;

		rc = 0;
	}

	return rc;
}
Beispiel #2
0
/**
 * freeze a database
 */
int32_t ctdb_control_db_freeze(struct ctdb_context *ctdb,
                               struct ctdb_req_control_old *c,
                               uint32_t db_id,
                               bool *async_reply)
{
    struct ctdb_db_context *ctdb_db;
    struct ctdb_db_freeze_waiter *w;

    ctdb_db = find_ctdb_db(ctdb, db_id);
    if (ctdb_db == NULL) {
        DEBUG(DEBUG_ERR, ("Freeze db for unknown dbid 0x%08x\n", db_id));
        return -1;
    }

    if (ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
        DEBUG(DEBUG_ERR, ("Freeze db: %s frozen\n", ctdb_db->db_name));
        return 0;
    }

    ctdb_start_db_freeze(ctdb_db);

    /* add ourselves to the list of waiters */
    w = talloc(ctdb_db->freeze_handle, struct ctdb_db_freeze_waiter);
    CTDB_NO_MEMORY(ctdb, w);
    w->ctdb = ctdb;
    w->private_data = talloc_steal(w, c);
    w->status = -1;
    talloc_set_destructor(w, ctdb_db_freeze_waiter_destructor);
    DLIST_ADD(ctdb_db->freeze_handle->waiters, w);

    *async_reply = true;
    return 0;
}
Beispiel #3
0
/*
  register a server id
  a serverid that is registered with ctdb will be automatically unregistered
  once the client domain socket dissappears.
 */
int32_t ctdb_control_register_server_id(struct ctdb_context *ctdb, 
				 uint32_t client_id,
				 TDB_DATA indata)
{
	struct ctdb_server_id *server_id;
	struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);


	if (client == NULL) {
		DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
		return 1;
	}

	/* hang the server_id structure off client before storing it in the
	   tree so that is will be automatically destroyed when client
	   is destroyed. 
	   when the structure is free'd it will be automatically
	   removed from the tree
	*/
	server_id = talloc_zero(client, struct ctdb_server_id);
	CTDB_NO_MEMORY(ctdb, server_id);
	memcpy(server_id, indata.dptr, sizeof(struct ctdb_server_id));

	trbt_insertarray32_callback(ctdb->server_ids, SERVER_ID_KEY_SIZE,
		get_server_id_key(server_id), 
		add_server_id_callback, server_id);

	return 0;
}
Beispiel #4
0
/**
 * Freeze a single database
 */
static int db_freeze(struct ctdb_db_context *ctdb_db, void *private_data)
{
    struct ctdb_freeze_handle *h = talloc_get_type_abort(
                                       private_data, struct ctdb_freeze_handle);
    struct ctdb_db_freeze_waiter *w;

    ctdb_start_db_freeze(ctdb_db);

    w = talloc(ctdb_db->freeze_handle, struct ctdb_db_freeze_waiter);
    CTDB_NO_MEMORY(h->ctdb, w);
    w->ctdb = h->ctdb;
    w->private_data = h;
    w->status = -1;
    talloc_set_destructor(w, db_freeze_waiter_destructor);

    if (ctdb_db->freeze_mode == CTDB_FREEZE_FROZEN) {
        /* Early return if already frozen */
        w->status = 0;
        talloc_free(w);
        return 0;
    }

    DLIST_ADD(ctdb_db->freeze_handle->waiters, w);

    return 0;
}
Beispiel #5
0
/*
  choose the transport we will use
*/
int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
{
    ctdb->transport = talloc_strdup(ctdb, transport);
    CTDB_NO_MEMORY(ctdb, ctdb->transport);

    return 0;
}
Beispiel #6
0
static int ctdb_run_notification_script_child(struct ctdb_context *ctdb, const char *event)
{
	struct stat st;
	int ret;
	char *cmd;

	if (stat(ctdb->notification_script, &st) != 0) {
		DEBUG(DEBUG_ERR,("Could not stat notification script %s. Can not send notifications.\n", ctdb->notification_script));
		return -1;
	}
	if (!(st.st_mode & S_IXUSR)) {
		DEBUG(DEBUG_ERR,("Notification script %s is not executable.\n", ctdb->notification_script));
		return -1;
	}

	cmd = talloc_asprintf(ctdb, "%s %s\n", ctdb->notification_script, event);
	CTDB_NO_MEMORY(ctdb, cmd);

	ret = system(cmd);
	/* if the system() call was successful, translate ret into the
	   return code from the command
	*/
	if (ret != -1) {
		ret = WEXITSTATUS(ret);
	}
	if (ret != 0) {
		DEBUG(DEBUG_ERR,("Notification script \"%s\" failed with error %d\n", cmd, ret));
	}

	return ret;
}
Beispiel #7
0
/*
  dump talloc memory hierarchy, returning it as a blob to the client
 */
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
    /* dump to a file, then send the file as a blob */
    FILE *f;
    long fsize;
    f = tmpfile();
    if (f == NULL) {
        DEBUG(DEBUG_ERR,(__location__ " Unable to open tmpfile - %s\n", strerror(errno)));
        return -1;
    }
    talloc_report_full(NULL, f);
    fsize = ftell(f);
    if (fsize == -1) {
        DEBUG(DEBUG_ERR, (__location__ " Unable to get file size - %s\n",
                          strerror(errno)));
        fclose(f);
        return -1;
    }
    rewind(f);
    outdata->dptr = talloc_size(outdata, fsize);
    if (outdata->dptr == NULL) {
        fclose(f);
        CTDB_NO_MEMORY(ctdb, outdata->dptr);
    }
    outdata->dsize = fread(outdata->dptr, 1, fsize, f);
    fclose(f);
    if (outdata->dsize != fsize) {
        DEBUG(DEBUG_ERR,(__location__ " Unable to read tmpfile\n"));
        return -1;
    }
    return 0;
}
Beispiel #8
0
/*
  write a record to a normal database
*/
int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, 
		    struct ctdb_ltdb_header *header, TDB_DATA data)
{
	struct ctdb_context *ctdb = ctdb_db->ctdb;
	TDB_DATA rec;
	int ret;
	bool seqnum_suppressed = false;

	if (ctdb_db->ctdb_ltdb_store_fn) {
		return ctdb_db->ctdb_ltdb_store_fn(ctdb_db, key, header, data);
	}

	if (ctdb->flags & CTDB_FLAG_TORTURE) {
		struct ctdb_ltdb_header *h2;
		rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
		h2 = (struct ctdb_ltdb_header *)rec.dptr;
		if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) {
			DEBUG(DEBUG_CRIT,("RSN regression! %llu %llu\n",
				 (unsigned long long)h2->rsn, (unsigned long long)header->rsn));
		}
		if (rec.dptr) free(rec.dptr);
	}

	rec.dsize = sizeof(*header) + data.dsize;
	rec.dptr = talloc_size(ctdb, rec.dsize);
	CTDB_NO_MEMORY(ctdb, rec.dptr);

	memcpy(rec.dptr, header, sizeof(*header));
	memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize);

	/* Databases with seqnum updates enabled only get their seqnum
	   changes when/if we modify the data */
	if (ctdb_db->seqnum_update != NULL) {
		TDB_DATA old;
		old = tdb_fetch(ctdb_db->ltdb->tdb, key);

		if ( (old.dsize == rec.dsize)
		&& !memcmp(old.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dsize-sizeof(struct ctdb_ltdb_header)) ) {
			tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
			seqnum_suppressed = true;
		}
		if (old.dptr) free(old.dptr);
	}
	ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
	if (ret != 0) {
		DEBUG(DEBUG_ERR, (__location__ " Failed to store dynamic data\n"));
	}
	if (seqnum_suppressed) {
		tdb_add_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
	}

	talloc_free(rec.dptr);

	return ret;
}
Beispiel #9
0
/* Load a nodes list file into a nodes array */
static int convert_node_map_to_list(struct ctdb_context *ctdb,
                                    TALLOC_CTX *mem_ctx,
                                    struct ctdb_node_map_old *node_map,
                                    struct ctdb_node ***nodes,
                                    uint32_t *num_nodes)
{
    int i;

    *nodes = talloc_zero_array(mem_ctx,
                               struct ctdb_node *, node_map->num);
    CTDB_NO_MEMORY(ctdb, *nodes);
    *num_nodes = node_map->num;

    for (i = 0; i < node_map->num; i++) {
        struct ctdb_node *node;

        node = talloc_zero(*nodes, struct ctdb_node);
        CTDB_NO_MEMORY(ctdb, node);
        (*nodes)[i] = node;

        node->address = node_map->nodes[i].addr;
        node->name = talloc_asprintf(node, "%s:%u",
                                     ctdb_addr_to_str(&node->address),
                                     ctdb_addr_to_port(&node->address));

        node->flags = node_map->nodes[i].flags;
        if (!(node->flags & NODE_FLAGS_DELETED)) {
            node->flags = NODE_FLAGS_UNHEALTHY;
        }
        node->flags |= NODE_FLAGS_DISCONNECTED;

        node->pnn = i;
        node->ctdb = ctdb;
        node->dead_count = 0;
    }

    return 0;
}
Beispiel #10
0
/*
  initialise tcp portion of ctdb
*/
int ctdb_tcp_init(struct ctdb_context *ctdb)
{
    struct ctdb_tcp *ctcp;
    ctcp = talloc_zero(ctdb, struct ctdb_tcp);
    CTDB_NO_MEMORY(ctdb, ctcp);

    ctcp->listen_fd = -1;
    ctcp->ctdb      = ctdb;
    ctdb->private_data = ctcp;
    ctdb->methods = &ctdb_tcp_methods;

    talloc_set_destructor(ctcp, tcp_ctcp_destructor);
    return 0;
}
Beispiel #11
0
/*
  setup the local node address
*/
int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
{
    ctdb->address = talloc(ctdb, ctdb_sock_addr);
    CTDB_NO_MEMORY(ctdb, ctdb->address);

    if (ctdb_parse_address(ctdb, address, ctdb->address) != 0) {
        return -1;
    }

    ctdb->name = talloc_asprintf(ctdb, "%s:%u",
                                 ctdb_addr_to_str(ctdb->address),
                                 ctdb_addr_to_port(ctdb->address));
    return 0;
}
Beispiel #12
0
/*
  initialise tcp portion of a ctdb node
*/
static int ctdb_tcp_add_node(struct ctdb_node *node)
{
    struct ctdb_tcp_node *tnode;
    tnode = talloc_zero(node, struct ctdb_tcp_node);
    CTDB_NO_MEMORY(node->ctdb, tnode);

    tnode->fd = -1;
    node->private_data = tnode;
    talloc_set_destructor(tnode, tnode_destructor);

    tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT,
                                        ctdb_tcp_tnode_cb, node, "to-node-%s", node->name);

    return 0;
}
Beispiel #13
0
/*
  fetch a record from the ltdb, separating out the header information
  and returning the body of the record. A valid (initial) header is
  returned if the record is not present
*/
int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, 
		    TDB_DATA key, struct ctdb_ltdb_header *header, 
		    TALLOC_CTX *mem_ctx, TDB_DATA *data)
{
	TDB_DATA rec;
	struct ctdb_context *ctdb = ctdb_db->ctdb;

	rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
	if (rec.dsize < sizeof(*header)) {
		/* return an initial header */
		if (rec.dptr) free(rec.dptr);
		if (ctdb->vnn_map == NULL) {
			/* called from the client */
			ZERO_STRUCTP(data);
			header->dmaster = (uint32_t)-1;
			return -1;
		}
		ltdb_initial_header(ctdb_db, key, header);
		if (data) {
			*data = tdb_null;
		}
		if (ctdb_db->persistent || header->dmaster == ctdb_db->ctdb->pnn) {
			if (ctdb_ltdb_store(ctdb_db, key, header, tdb_null) != 0) {
				DEBUG(DEBUG_NOTICE,
				      (__location__ "failed to store initial header\n"));
			}
		}
		return 0;
	}

	*header = *(struct ctdb_ltdb_header *)rec.dptr;

	if (data) {
		data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header);
		data->dptr = talloc_memdup(mem_ctx, 
					   sizeof(struct ctdb_ltdb_header)+rec.dptr,
					   data->dsize);
	}

	free(rec.dptr);
	if (data) {
		CTDB_NO_MEMORY(ctdb, data->dptr);
	}

	return 0;
}
Beispiel #14
0
/*
  choose the recovery lock file
*/
int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
{
    if (ctdb->recovery_lock_file != NULL) {
        talloc_free(ctdb->recovery_lock_file);
        ctdb->recovery_lock_file = NULL;
    }

    if (file == NULL) {
        DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n"));
        return 0;
    }

    ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
    CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file);

    return 0;
}
Beispiel #15
0
/* 
   returns the ctdb uptime
*/
int32_t ctdb_control_uptime(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
	struct ctdb_uptime *uptime;

	uptime = talloc_zero(outdata, struct ctdb_uptime);
	CTDB_NO_MEMORY(ctdb, uptime);

	gettimeofday(&uptime->current_time, NULL);
	uptime->ctdbd_start_time       = ctdb->ctdbd_start_time;
	uptime->last_recovery_started  = ctdb->last_recovery_started;
	uptime->last_recovery_finished = ctdb->last_recovery_finished;

	outdata->dsize = sizeof(struct ctdb_uptime);
	outdata->dptr  = (uint8_t *)uptime;

	return 0;
}
Beispiel #16
0
int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
	struct ctdb_ban_time *bantime;

	bantime = talloc(outdata, struct ctdb_ban_time);
	CTDB_NO_MEMORY(ctdb, bantime);

	if (ctdb->banning_ctx != NULL) {
		*bantime = *(struct ctdb_ban_time *)(ctdb->banning_ctx);
	} else {
		bantime->pnn = ctdb->pnn;
		bantime->time = 0;
	}

	outdata->dptr  = (uint8_t *)bantime;
	outdata->dsize = sizeof(struct ctdb_ban_time);

	return 0;
}
Beispiel #17
0
/*
  fetch a record from the ltdb, separating out the header information
  and returning the body of the record. A valid (initial) header is
  returned if the record is not present
*/
int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, 
		    TDB_DATA key, struct ctdb_ltdb_header *header, 
		    TALLOC_CTX *mem_ctx, TDB_DATA *data)
{
	TDB_DATA rec;
	struct ctdb_context *ctdb = ctdb_db->ctdb;

	rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
	if (rec.dsize < sizeof(*header)) {
		TDB_DATA d2;
		/* return an initial header */
		if (rec.dptr) free(rec.dptr);
		if (ctdb->vnn_map == NULL) {
			/* called from the client */
			ZERO_STRUCTP(data);
			header->dmaster = (uint32_t)-1;
			return -1;
		}
		ltdb_initial_header(ctdb_db, key, header);
		ZERO_STRUCT(d2);
		if (data) {
			*data = d2;
		}
		ctdb_ltdb_store(ctdb_db, key, header, d2);
		return 0;
	}

	*header = *(struct ctdb_ltdb_header *)rec.dptr;

	if (data) {
		data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header);
		data->dptr = talloc_memdup(mem_ctx, 
					   sizeof(struct ctdb_ltdb_header)+rec.dptr,
					   data->dsize);
	}

	free(rec.dptr);
	if (data) {
		CTDB_NO_MEMORY(ctdb, data->dptr);
	}

	return 0;
}
Beispiel #18
0
/*
  parse a IP:port pair
*/
int ctdb_parse_address(struct ctdb_context *ctdb,
		       TALLOC_CTX *mem_ctx, const char *str,
		       struct ctdb_address *address)
{
	struct servent *se;

	setservent(0);
	se = getservbyname("ctdb", "tcp");
	endservent();
	
	address->address = talloc_strdup(mem_ctx, str);
	CTDB_NO_MEMORY(ctdb, address->address);

	if (se == NULL) {
		address->port = CTDB_PORT;
	} else {
		address->port = ntohs(se->s_port);
	}
	return 0;
}
Beispiel #19
0
/*
  freeze all the databases
 */
int32_t ctdb_control_freeze(struct ctdb_context *ctdb,
                            struct ctdb_req_control_old *c, bool *async_reply)
{
    struct ctdb_freeze_waiter *w;

    ctdb_start_freeze(ctdb);

    if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
        DEBUG(DEBUG_ERR, ("Freeze all: frozen\n"));
        /* we're already frozen */
        return 0;
    }

    if (ctdb->freeze_handle == NULL) {
        DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
        return -1;
    }

    /* If there are no databases, we are done. */
    if (ctdb->freeze_handle->num_total == 0) {
        return 0;
    }

    /* add ourselves to list of waiters */
    w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
    CTDB_NO_MEMORY(ctdb, w);
    w->ctdb     = ctdb;
    w->c        = talloc_steal(w, c);
    w->status   = -1;
    talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
    DLIST_ADD(ctdb->freeze_handle->waiters, w);

    /* we won't reply till later */
    *async_reply = true;
    return 0;
}
Beispiel #20
0
/*
  send a control message to a node
 */
int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
			     uint64_t srvid, uint32_t opcode, uint32_t client_id,
			     uint32_t flags,
			     TDB_DATA data,
			     ctdb_control_callback_fn_t callback,
			     void *private_data)
{
	struct ctdb_req_control_old *c;
	struct ctdb_control_state *state;
	size_t len;

	if (ctdb->methods == NULL) {
		DEBUG(DEBUG_INFO,(__location__ " Failed to send control. Transport is DOWN\n"));
		return -1;
	}

	if (((destnode == CTDB_BROADCAST_VNNMAP) || 
	     (destnode == CTDB_BROADCAST_ALL) ||
	     (destnode == CTDB_BROADCAST_CONNECTED)) && 
	    !(flags & CTDB_CTRL_FLAG_NOREPLY)) {
		DEBUG(DEBUG_CRIT,("Attempt to broadcast control without NOREPLY\n"));
		return -1;
	}

	if (destnode != CTDB_BROADCAST_VNNMAP && 
	    destnode != CTDB_BROADCAST_ALL && 
	    destnode != CTDB_BROADCAST_CONNECTED && 
	    (!ctdb_validate_pnn(ctdb, destnode) || 
	     (ctdb->nodes[destnode]->flags & NODE_FLAGS_DISCONNECTED))) {
		if (!(flags & CTDB_CTRL_FLAG_NOREPLY)) {
			callback(ctdb, -1, tdb_null, "ctdb_control to disconnected node", private_data);
		}
		return 0;
	}

	/* the state is made a child of private_data if possible. This means any reply
	   will be discarded if the private_data goes away */
	state = talloc(private_data?private_data:ctdb, struct ctdb_control_state);
	CTDB_NO_MEMORY(ctdb, state);

	state->reqid = reqid_new(ctdb->idr, state);
	state->callback = callback;
	state->private_data = private_data;
	state->ctdb = ctdb;
	state->flags = flags;

	talloc_set_destructor(state, ctdb_control_destructor);

	len = offsetof(struct ctdb_req_control_old, data) + data.dsize;
	c = ctdb_transport_allocate(ctdb, state, CTDB_REQ_CONTROL, len, 
				    struct ctdb_req_control_old);
	CTDB_NO_MEMORY(ctdb, c);
	talloc_set_name_const(c, "ctdb_req_control packet");

	c->hdr.destnode     = destnode;
	c->hdr.reqid        = state->reqid;
	c->opcode           = opcode;
	c->client_id        = client_id;
	c->flags            = flags;
	c->srvid            = srvid;
	c->datalen          = data.dsize;
	if (data.dsize) {
		memcpy(&c->data[0], data.dptr, data.dsize);
	}

	ctdb_queue_packet(ctdb, &c->hdr);	

	if (flags & CTDB_CTRL_FLAG_NOREPLY) {
		talloc_free(state);
		return 0;
	}

	if (ctdb->tunable.control_timeout) {
		tevent_add_timer(ctdb->ev, state,
				 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
				 ctdb_control_timeout, state);
	}

	talloc_free(c);
	return 0;
}
Beispiel #21
0
/**
 * write a record to a normal database
 *
 * This is the server-variant of the ctdb_ltdb_store function.
 * It contains logic to determine whether a record should be
 * stored or deleted. It also sends SCHEDULE_FOR_DELETION
 * controls to the local ctdb daemon if apporpriate.
 */
static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
				  TDB_DATA key,
				  struct ctdb_ltdb_header *header,
				  TDB_DATA data)
{
	struct ctdb_context *ctdb = ctdb_db->ctdb;
	TDB_DATA rec;
	int ret;
	bool seqnum_suppressed = false;
	bool keep = false;
	bool schedule_for_deletion = false;
	bool remove_from_delete_queue = false;
	uint32_t lmaster;

	if (ctdb->flags & CTDB_FLAG_TORTURE) {
		struct ctdb_ltdb_header *h2;
		rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
		h2 = (struct ctdb_ltdb_header *)rec.dptr;
		if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) {
			DEBUG(DEBUG_CRIT,("RSN regression! %llu %llu\n",
				 (unsigned long long)h2->rsn, (unsigned long long)header->rsn));
		}
		if (rec.dptr) free(rec.dptr);
	}

	if (ctdb->vnn_map == NULL) {
		/*
		 * Called from a client: always store the record
		 * Also don't call ctdb_lmaster since it uses the vnn_map!
		 */
		keep = true;
		goto store;
	}

	lmaster = ctdb_lmaster(ctdb_db->ctdb, &key);

	/*
	 * If we migrate an empty record off to another node
	 * and the record has not been migrated with data,
	 * delete the record instead of storing the empty record.
	 */
	if (data.dsize != 0) {
		keep = true;
	} else if (header->flags & CTDB_REC_RO_FLAGS) {
		keep = true;
	} else if (ctdb_db->persistent) {
		keep = true;
	} else if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
		/*
		 * The record is not created by the client but
		 * automatically by the ctdb_ltdb_fetch logic that
		 * creates a record with an initial header in the
		 * ltdb before trying to migrate the record from
		 * the current lmaster. Keep it instead of trying
		 * to delete the non-existing record...
		 */
		keep = true;
		schedule_for_deletion = true;
	} else if (header->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
		keep = true;
	} else if (ctdb_db->ctdb->pnn == lmaster) {
		/*
		 * If we are lmaster, then we usually keep the record.
		 * But if we retrieve the dmaster role by a VACUUM_MIGRATE
		 * and the record is empty and has never been migrated
		 * with data, then we should delete it instead of storing it.
		 * This is part of the vacuuming process.
		 *
		 * The reason that we usually need to store even empty records
		 * on the lmaster is that a client operating directly on the
		 * lmaster (== dmaster) expects the local copy of the record to
		 * exist after successful ctdb migrate call. If the record does
		 * not exist, the client goes into a migrate loop and eventually
		 * fails. So storing the empty record makes sure that we do not
		 * need to change the client code.
		 */
		if (!(header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED)) {
			keep = true;
		} else if (ctdb_db->ctdb->pnn != header->dmaster) {
			keep = true;
		}
	} else if (ctdb_db->ctdb->pnn == header->dmaster) {
		keep = true;
	}

	if (keep) {
		if (!ctdb_db->persistent &&
		    (ctdb_db->ctdb->pnn == header->dmaster) &&
		    !(header->flags & CTDB_REC_RO_FLAGS))
		{
			header->rsn++;

			if (data.dsize == 0) {
				schedule_for_deletion = true;
			}
		}
		remove_from_delete_queue = !schedule_for_deletion;
	}

store:
	/*
	 * The VACUUM_MIGRATED flag is only set temporarily for
	 * the above logic when the record was retrieved by a
	 * VACUUM_MIGRATE call and should not be stored in the
	 * database.
	 *
	 * The VACUUM_MIGRATE call is triggered by a vacuum fetch,
	 * and there are two cases in which the corresponding record
	 * is stored in the local database:
	 * 1. The record has been migrated with data in the past
	 *    (the MIGRATED_WITH_DATA record flag is set).
	 * 2. The record has been filled with data again since it
	 *    had been submitted in the VACUUM_FETCH message to the
	 *    lmaster.
	 * For such records it is important to not store the
	 * VACUUM_MIGRATED flag in the database.
	 */
	header->flags &= ~CTDB_REC_FLAG_VACUUM_MIGRATED;

	/*
	 * Similarly, clear the AUTOMATIC flag which should not enter
	 * the local database copy since this would require client
	 * modifications to clear the flag when the client stores
	 * the record.
	 */
	header->flags &= ~CTDB_REC_FLAG_AUTOMATIC;

	rec.dsize = sizeof(*header) + data.dsize;
	rec.dptr = talloc_size(ctdb, rec.dsize);
	CTDB_NO_MEMORY(ctdb, rec.dptr);

	memcpy(rec.dptr, header, sizeof(*header));
	memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize);

	/* Databases with seqnum updates enabled only get their seqnum
	   changes when/if we modify the data */
	if (ctdb_db->seqnum_update != NULL) {
		TDB_DATA old;
		old = tdb_fetch(ctdb_db->ltdb->tdb, key);

		if ( (old.dsize == rec.dsize)
		&& !memcmp(old.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dsize-sizeof(struct ctdb_ltdb_header)) ) {
			tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
			seqnum_suppressed = true;
		}
		if (old.dptr) free(old.dptr);
	}

	DEBUG(DEBUG_DEBUG, (__location__ " db[%s]: %s record: hash[0x%08x]\n",
			    ctdb_db->db_name,
			    keep?"storing":"deleting",
			    ctdb_hash(&key)));

	if (keep) {
		ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
	} else {
		ret = tdb_delete(ctdb_db->ltdb->tdb, key);
	}

	if (ret != 0) {
		int lvl = DEBUG_ERR;

		if (keep == false &&
		    tdb_error(ctdb_db->ltdb->tdb) == TDB_ERR_NOEXIST)
		{
			lvl = DEBUG_DEBUG;
		}

		DEBUG(lvl, (__location__ " db[%s]: Failed to %s record: "
			    "%d - %s\n",
			    ctdb_db->db_name,
			    keep?"store":"delete", ret,
			    tdb_errorstr(ctdb_db->ltdb->tdb)));

		schedule_for_deletion = false;
		remove_from_delete_queue = false;
	}
	if (seqnum_suppressed) {
		tdb_add_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
	}

	talloc_free(rec.dptr);

	if (schedule_for_deletion) {
		int ret2;
		ret2 = ctdb_local_schedule_for_deletion(ctdb_db, header, key);
		if (ret2 != 0) {
			DEBUG(DEBUG_ERR, (__location__ " ctdb_local_schedule_for_deletion failed.\n"));
		}
	}

	if (remove_from_delete_queue) {
		ctdb_local_remove_from_delete_queue(ctdb_db, header, key);
	}

	return ret;
}
Beispiel #22
0
/*
  setup the notification script
*/
int ctdb_set_notification_script(struct ctdb_context *ctdb, const char *script)
{
	ctdb->notification_script = talloc_strdup(ctdb, script);
	CTDB_NO_MEMORY(ctdb, ctdb->notification_script);
	return 0;
}
Beispiel #23
0
/*
  local version of ctdb_call
*/
int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
		    struct ctdb_ltdb_header *header, TDB_DATA *data,
		    uint32_t caller)
{
	struct ctdb_call_info *c;
	struct ctdb_registered_call *fn;
	struct ctdb_context *ctdb = ctdb_db->ctdb;
	
	c = talloc(ctdb, struct ctdb_call_info);
	CTDB_NO_MEMORY(ctdb, c);

	c->key = call->key;
	c->call_data = &call->call_data;
	c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
	c->record_data.dsize = data->dsize;
	CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
	c->new_data = NULL;
	c->reply_data = NULL;
	c->status = 0;

	for (fn=ctdb_db->calls;fn;fn=fn->next) {
		if (fn->id == call->call_id) break;
	}
	if (fn == NULL) {
		ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
		talloc_free(c);
		return -1;
	}

	if (fn->fn(c) != 0) {
		ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
		talloc_free(c);
		return -1;
	}

	if (header->laccessor != caller) {
		header->lacount = 0;
	}
	header->laccessor = caller;
	header->lacount++;

	/* we need to force the record to be written out if this was a remote access,
	   so that the lacount is updated */
	if (c->new_data == NULL && header->laccessor != ctdb->vnn) {
		c->new_data = &c->record_data;
	}

	if (c->new_data) {
		if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
			ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
			talloc_free(c);
			return -1;
		}
	}

	if (c->reply_data) {
		call->reply_data = *c->reply_data;
		talloc_steal(ctdb, call->reply_data.dptr);
		talloc_set_name_const(call->reply_data.dptr, __location__);
	} else {
		call->reply_data.dptr = NULL;
		call->reply_data.dsize = 0;
	}
	call->status = c->status;

	talloc_free(c);

	return 0;
}
Beispiel #24
0
/*
  queue a packet for sending
*/
int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
{
	struct ctdb_queue_pkt *pkt;
	uint32_t length2, full_length;

	if (queue->alignment) {
		/* enforce the length and alignment rules from the tcp packet allocator */
		length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
		*(uint32_t *)data = length2;
	} else {
		length2 = length;
	}

	if (length2 != length) {
		memset(data+length, 0, length2-length);
	}

	full_length = length2;
	
	/* if the queue is empty then try an immediate write, avoiding
	   queue overhead. This relies on non-blocking sockets */
	if (queue->out_queue == NULL && queue->fd != -1 &&
	    !(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
		ssize_t n = write(queue->fd, data, length2);
		if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
			talloc_free(queue->fde);
			queue->fde = NULL;
			queue->fd = -1;
			tevent_schedule_immediate(queue->im, queue->ctdb->ev,
						  queue_dead, queue);
			/* yes, we report success, as the dead node is 
			   handled via a separate event */
			return 0;
		}
		if (n > 0) {
			data += n;
			length2 -= n;
		}
		if (length2 == 0) return 0;
	}

	pkt = talloc(queue, struct ctdb_queue_pkt);
	CTDB_NO_MEMORY(queue->ctdb, pkt);

	pkt->data = talloc_memdup(pkt, data, length2);
	CTDB_NO_MEMORY(queue->ctdb, pkt->data);

	pkt->length = length2;
	pkt->full_length = full_length;

	if (queue->out_queue == NULL && queue->fd != -1) {
		EVENT_FD_WRITEABLE(queue->fde);
	}

	DLIST_ADD_END(queue->out_queue, pkt, NULL);

	queue->out_queue_length++;

	if (queue->ctdb->tunable.verbose_memory_names != 0) {
		struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
		switch (hdr->operation) {
		case CTDB_REQ_CONTROL: {
			struct ctdb_req_control *c = (struct ctdb_req_control *)hdr;
			talloc_set_name(pkt, "ctdb_queue_pkt: %s control opcode=%u srvid=%llu datalen=%u",
					queue->name, (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
			break;
		}
		case CTDB_REQ_MESSAGE: {
			struct ctdb_req_message *m = (struct ctdb_req_message *)hdr;
			talloc_set_name(pkt, "ctdb_queue_pkt: %s message srvid=%llu datalen=%u",
					queue->name, (unsigned long long)m->srvid, (unsigned)m->datalen);
			break;
		}
		default:
			talloc_set_name(pkt, "ctdb_queue_pkt: %s operation=%u length=%u src=%u dest=%u",
					queue->name, (unsigned)hdr->operation, (unsigned)hdr->length,
					(unsigned)hdr->srcnode, (unsigned)hdr->destnode);
			break;
		}
	}

	return 0;
}
Beispiel #25
0
/*
  main program
*/
int main(int argc, const char *argv[])
{
	struct ctdb_context *ctdb;
	int interactive = 0;

	struct poptOption popt_options[] = {
		POPT_AUTOHELP
		POPT_CTDB_CMDLINE
		{ "interactive", 'i', POPT_ARG_NONE, &interactive, 0, "don't fork", NULL },
		{ "public-addresses", 0, POPT_ARG_STRING, &options.public_address_list, 0, "public address list file", "filename" },
		{ "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"},
		{ "event-script-dir", 0, POPT_ARG_STRING, &options.event_script_dir, 0, "event script directory", "dirname" },
		{ "logging", 0, POPT_ARG_STRING, &options.logging, 0, "logging method to be used", NULL },
		{ "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" },
		{ "notification-script", 0, POPT_ARG_STRING, &options.notification_script, 0, "notification script", "filename" },
		{ "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" },
		{ "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
		{ "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
		{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
		{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
		{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock, 0, "recovery lock", "lock" },
		{ "pidfile", 0, POPT_ARG_STRING, &ctdbd_pidfile, 0, "location of PID file", "filename" },
		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
		{ "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
		{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
		{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
		{ "no-lmaster", 0, POPT_ARG_NONE, &options.no_lmaster, 0, "disable lmaster role on this node", NULL },
		{ "no-recmaster", 0, POPT_ARG_NONE, &options.no_recmaster, 0, "disable recmaster role on this node", NULL },
		{ "script-log-level", 0, POPT_ARG_INT, &options.script_log_level, 0, "log level of event script output", NULL },
		{ "nopublicipcheck", 0, POPT_ARG_NONE, &options.no_publicipcheck, 0, "don't check we have/don't have the correct public ip addresses", NULL },
		{ "max-persistent-check-errors", 0, POPT_ARG_INT,
		  &options.max_persistent_check_errors, 0,
		  "max allowed persistent check errors (default 0)", NULL },
		{ "sloppy-start", 0, POPT_ARG_NONE, &fast_start, 0, "Do not perform full recovery on start", NULL },
		POPT_TABLEEND
	};
	int opt, ret;
	const char **extra_argv;
	int extra_argc = 0;
	poptContext pc;
	struct tevent_context *ev;

	pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);

	while ((opt = poptGetNextOpt(pc)) != -1) {
		switch (opt) {
		default:
			fprintf(stderr, "Invalid option %s: %s\n", 
				poptBadOption(pc, 0), poptStrerror(opt));
			exit(1);
		}
	}

	/* setup the remaining options for the main program to use */
	extra_argv = poptGetArgs(pc);
	if (extra_argv) {
		extra_argv++;
		while (extra_argv[extra_argc]) extra_argc++;
	}

	talloc_enable_null_tracking();

	fault_setup();

	ev = tevent_context_init(NULL);
	if (ev == NULL) {
		DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
		exit(1);
	}
	tevent_loop_allow_nesting(ev);

	ctdb = ctdb_cmdline_init(ev);

	ctdb->start_as_disabled = options.start_as_disabled;
	ctdb->start_as_stopped  = options.start_as_stopped;

	script_log_level = options.script_log_level;

	if (!ctdb_logging_init(ctdb, options.logging)) {
		exit(1);
	}

	DEBUG(DEBUG_NOTICE,("CTDB starting on node\n"));

	gettimeofday(&ctdb->ctdbd_start_time, NULL);
	gettimeofday(&ctdb->last_recovery_started, NULL);
	gettimeofday(&ctdb->last_recovery_finished, NULL);
	ctdb->recovery_mode    = CTDB_RECOVERY_NORMAL;
	ctdb->recovery_master  = (uint32_t)-1;
	ctdb->upcalls          = &ctdb_upcalls;

	if (options.recovery_lock == NULL) {
		DEBUG(DEBUG_WARNING, ("Recovery lock not set\n"));
	}
	ctdb->recovery_lock = options.recovery_lock;

	TALLOC_FREE(ctdb->idr);
	ret = reqid_init(ctdb, 0, &ctdb->idr);;
	if (ret != 0) {
		DEBUG(DEBUG_ALERT, ("reqid_init failed (%s)\n", strerror(ret)));
		exit(1);
	}

	ctdb_tunables_set_defaults(ctdb);

	ret = ctdb_set_transport(ctdb, options.transport);
	if (ret == -1) {
		DEBUG(DEBUG_ALERT,("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb)));
		exit(1);
	}

	/* tell ctdb what address to listen on */
	if (options.myaddress) {
		ret = ctdb_set_address(ctdb, options.myaddress);
		if (ret == -1) {
			DEBUG(DEBUG_ALERT,("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb)));
			exit(1);
		}
	}

	/* set ctdbd capabilities */
	ctdb->capabilities = CTDB_CAP_DEFAULT;
	if (options.no_lmaster != 0) {
		ctdb->capabilities &= ~CTDB_CAP_LMASTER;
	}
	if (options.no_recmaster != 0) {
		ctdb->capabilities &= ~CTDB_CAP_RECMASTER;
	}

	/* Initialise this node's PNN to the unknown value.  This will
	 * be set to the correct value by either ctdb_add_node() as
	 * part of loading the nodes file or by
	 * ctdb_tcp_listen_automatic() when the transport is
	 * initialised.  At some point we should de-optimise this and
	 * pull it out into ctdb_start_daemon() so it is done clearly
	 * and only in one place.
	 */
	ctdb->pnn = -1;

	/* Default value for CTDB_BASE - don't override */
	setenv("CTDB_BASE", CTDB_ETCDIR, 0);

	/* tell ctdb what nodes are available */
	if (options.nlist != NULL) {
		ctdb->nodes_file = options.nlist;
	} else {
		ctdb->nodes_file =
			talloc_asprintf(ctdb, "%s/nodes", getenv("CTDB_BASE"));
		if (ctdb->nodes_file == NULL) {
			DEBUG(DEBUG_ALERT,(__location__ " Out of memory\n"));
			exit(1);
		}
	}
	ctdb_load_nodes_file(ctdb);

	ctdb->db_directory = options.db_dir;
	mkdir_p_or_die(ctdb->db_directory, 0700);

	ctdb->db_directory_persistent = options.db_dir_persistent;
	mkdir_p_or_die(ctdb->db_directory_persistent, 0700);

	ctdb->db_directory_state = options.db_dir_state;
	mkdir_p_or_die(ctdb->db_directory_state, 0700);

	if (options.public_interface) {
		ctdb->default_public_interface = talloc_strdup(ctdb, options.public_interface);
		CTDB_NO_MEMORY(ctdb, ctdb->default_public_interface);
	}

	if (options.event_script_dir != NULL) {
		ctdb->event_script_dir = options.event_script_dir;
	} else {
		ctdb->event_script_dir = talloc_asprintf(ctdb, "%s/events.d",
							 getenv("CTDB_BASE"));
		if (ctdb->event_script_dir == NULL) {
			DEBUG(DEBUG_ALERT,(__location__ " Out of memory\n"));
			exit(1);
		}
	}

	if (options.notification_script != NULL) {
		ret = ctdb_set_notification_script(ctdb, options.notification_script);
		if (ret == -1) {
			DEBUG(DEBUG_ALERT,("Unable to setup notification script\n"));
			exit(1);
		}
	}

	ctdb->valgrinding = (options.valgrinding == 1);
	ctdb->do_setsched = (options.nosetsched != 1);
	if (ctdb->valgrinding) {
		ctdb->do_setsched = false;
	}

	ctdb->public_addresses_file = options.public_address_list;
	ctdb->do_checkpublicip = (options.no_publicipcheck == 0);

	if (options.max_persistent_check_errors < 0) {
		ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
	} else {
		ctdb->max_persistent_check_errors = (uint64_t)options.max_persistent_check_errors;
	}

	/* start the protocol running (as a child) */
	return ctdb_start_daemon(ctdb, interactive?false:true);
}