Exemple #1
0
/*
  construct an initial header for a record with no ltdb header yet
*/
static void ltdb_initial_header(struct ctdb_db_context *ctdb_db, 
				TDB_DATA key,
				struct ctdb_ltdb_header *header)
{
	ZERO_STRUCTP(header);
	/* initial dmaster is the lmaster */
	header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
	header->laccessor = header->dmaster;
}
Exemple #2
0
/*
  construct an initial header for a record with no ltdb header yet
*/
static void ltdb_initial_header(struct ctdb_db_context *ctdb_db, 
				TDB_DATA key,
				struct ctdb_ltdb_header *header)
{
	ZERO_STRUCTP(header);
	/* initial dmaster is the lmaster */
	header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
	header->flags = CTDB_REC_FLAG_AUTOMATIC;
}
Exemple #3
0
/*
  called when a CTDB_REPLY_REDIRECT packet comes in

  This packet arrives when we have sent a CTDB_REQ_CALL request and
  the node that received it is not the dmaster for the given key. We
  are given a hint as to what node to try next.
*/
void ctdb_reply_redirect(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
	struct ctdb_reply_redirect *c = (struct ctdb_reply_redirect *)hdr;
	struct ctdb_call_state *state;

	state = idr_find_type(ctdb->idr, hdr->reqid, struct ctdb_call_state);
	if (state == NULL) return;

	talloc_steal(state, c);
	
	/* don't allow for too many redirects */
	if (state->redirect_count++ == CTDB_MAX_REDIRECT) {
		c->dmaster = ctdb_lmaster(ctdb, &state->call.key);
	}

	/* send it off again */
	state->node = ctdb->nodes[c->dmaster];
	state->c->hdr.destnode = c->dmaster;

	ctdb_queue_packet(ctdb, &state->c->hdr);
}
Exemple #4
0
/*
  send a dmaster request (give another node the dmaster for a record)

  This is always sent to the lmaster, which ensures that the lmaster
  always knows who the dmaster is. The lmaster will then send a
  CTDB_REPLY_DMASTER to the new dmaster
*/
static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, 
				   struct ctdb_req_call *c, 
				   struct ctdb_ltdb_header *header,
				   TDB_DATA *key, TDB_DATA *data)
{
	struct ctdb_req_dmaster *r;
	struct ctdb_context *ctdb = ctdb_db->ctdb;
	int len;
	
	len = offsetof(struct ctdb_req_dmaster, data) + key->dsize + data->dsize;
	r = ctdb->methods->allocate_pkt(ctdb, len);
	CTDB_NO_MEMORY_FATAL(ctdb, r);
	talloc_set_name_const(r, "send_dmaster packet");
	r->hdr.length    = len;
	r->hdr.ctdb_magic = CTDB_MAGIC;
	r->hdr.ctdb_version = CTDB_VERSION;
	r->hdr.operation = CTDB_REQ_DMASTER;
	r->hdr.destnode  = ctdb_lmaster(ctdb, key);
	r->hdr.srcnode   = ctdb->vnn;
	r->hdr.reqid     = c->hdr.reqid;
	r->db_id         = c->db_id;
	r->dmaster       = c->hdr.srcnode;
	r->keylen        = key->dsize;
	r->datalen       = data->dsize;
	memcpy(&r->data[0], key->dptr, key->dsize);
	memcpy(&r->data[key->dsize], data->dptr, data->dsize);

	/* XXX - probably not necessary when lmaster==dmaster
	   update the ltdb to record the new dmaster */
	header->dmaster = r->hdr.destnode;
	ctdb_ltdb_store(ctdb_db, *key, header, *data);
	
	ctdb_queue_packet(ctdb, &r->hdr);

	talloc_free(r);
}
Exemple #5
0
/*
  called when a CTDB_REQ_DMASTER packet comes in

  this comes into the lmaster for a record when the current dmaster
  wants to give up the dmaster role and give it to someone else
*/
void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
	struct ctdb_req_dmaster *c = (struct ctdb_req_dmaster *)hdr;
	struct ctdb_reply_dmaster *r;
	TDB_DATA key, data, data2;
	struct ctdb_ltdb_header header;
	struct ctdb_db_context *ctdb_db;
	int ret, len;
	TALLOC_CTX *tmp_ctx;

	key.dptr = c->data;
	key.dsize = c->keylen;
	data.dptr = c->data + c->keylen;
	data.dsize = c->datalen;

	ctdb_db = find_ctdb_db(ctdb, c->db_id);
	if (!ctdb_db) {
		ctdb_send_error(ctdb, hdr, -1,
				"Unknown database in request. db_id==0x%08x",
				c->db_id);
		return;
	}
	
	/* fetch the current record */
	ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header, hdr, &data2,
					   ctdb_recv_raw_pkt, ctdb);
	if (ret == -1) {
		ctdb_fatal(ctdb, "ctdb_req_dmaster failed to fetch record");
		return;
	}
	if (ret == -2) {
		DEBUG(2,(__location__ " deferring ctdb_request_dmaster\n"));
		return;
	}
	
	/* its a protocol error if the sending node is not the current dmaster */
	if (header.dmaster != hdr->srcnode && 
	    hdr->srcnode != ctdb_lmaster(ctdb_db->ctdb, &key)) {
		ctdb_fatal(ctdb, "dmaster request from non-master");
		return;
	}
	
	header.dmaster = c->dmaster;
	ret = ctdb_ltdb_store(ctdb_db, key, &header, data);
	ctdb_ltdb_unlock(ctdb_db, key);
	if (ret != 0) {
		ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster");
		return;
	}

	/* put the packet on a temporary context, allowing us to safely free
	   it below even if ctdb_reply_dmaster() has freed it already */
	tmp_ctx = talloc_new(ctdb);

	/* send the CTDB_REPLY_DMASTER */
	len = offsetof(struct ctdb_reply_dmaster, data) + data.dsize;
	r = ctdb->methods->allocate_pkt(tmp_ctx, len);
	CTDB_NO_MEMORY_FATAL(ctdb, r);

	talloc_set_name_const(r, "reply_dmaster packet");
	r->hdr.length    = len;
	r->hdr.ctdb_magic = CTDB_MAGIC;
	r->hdr.ctdb_version = CTDB_VERSION;
	r->hdr.operation = CTDB_REPLY_DMASTER;
	r->hdr.destnode  = c->dmaster;
	r->hdr.srcnode   = ctdb->vnn;
	r->hdr.reqid     = hdr->reqid;
	r->datalen       = data.dsize;
	memcpy(&r->data[0], data.dptr, data.dsize);

	ctdb_queue_packet(ctdb, &r->hdr);

	talloc_free(tmp_ctx);
}
Exemple #6
0
/**
 * write a record to a normal database
 *
 * This is the server-variant of the ctdb_ltdb_store function.
 * It contains logic to determine whether a record should be
 * stored or deleted. It also sends SCHEDULE_FOR_DELETION
 * controls to the local ctdb daemon if apporpriate.
 */
static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
				  TDB_DATA key,
				  struct ctdb_ltdb_header *header,
				  TDB_DATA data)
{
	struct ctdb_context *ctdb = ctdb_db->ctdb;
	TDB_DATA rec;
	int ret;
	bool seqnum_suppressed = false;
	bool keep = false;
	bool schedule_for_deletion = false;
	bool remove_from_delete_queue = false;
	uint32_t lmaster;

	if (ctdb->flags & CTDB_FLAG_TORTURE) {
		struct ctdb_ltdb_header *h2;
		rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
		h2 = (struct ctdb_ltdb_header *)rec.dptr;
		if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) {
			DEBUG(DEBUG_CRIT,("RSN regression! %llu %llu\n",
				 (unsigned long long)h2->rsn, (unsigned long long)header->rsn));
		}
		if (rec.dptr) free(rec.dptr);
	}

	if (ctdb->vnn_map == NULL) {
		/*
		 * Called from a client: always store the record
		 * Also don't call ctdb_lmaster since it uses the vnn_map!
		 */
		keep = true;
		goto store;
	}

	lmaster = ctdb_lmaster(ctdb_db->ctdb, &key);

	/*
	 * If we migrate an empty record off to another node
	 * and the record has not been migrated with data,
	 * delete the record instead of storing the empty record.
	 */
	if (data.dsize != 0) {
		keep = true;
	} else if (header->flags & CTDB_REC_RO_FLAGS) {
		keep = true;
	} else if (ctdb_db->persistent) {
		keep = true;
	} else if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
		/*
		 * The record is not created by the client but
		 * automatically by the ctdb_ltdb_fetch logic that
		 * creates a record with an initial header in the
		 * ltdb before trying to migrate the record from
		 * the current lmaster. Keep it instead of trying
		 * to delete the non-existing record...
		 */
		keep = true;
		schedule_for_deletion = true;
	} else if (header->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) {
		keep = true;
	} else if (ctdb_db->ctdb->pnn == lmaster) {
		/*
		 * If we are lmaster, then we usually keep the record.
		 * But if we retrieve the dmaster role by a VACUUM_MIGRATE
		 * and the record is empty and has never been migrated
		 * with data, then we should delete it instead of storing it.
		 * This is part of the vacuuming process.
		 *
		 * The reason that we usually need to store even empty records
		 * on the lmaster is that a client operating directly on the
		 * lmaster (== dmaster) expects the local copy of the record to
		 * exist after successful ctdb migrate call. If the record does
		 * not exist, the client goes into a migrate loop and eventually
		 * fails. So storing the empty record makes sure that we do not
		 * need to change the client code.
		 */
		if (!(header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED)) {
			keep = true;
		} else if (ctdb_db->ctdb->pnn != header->dmaster) {
			keep = true;
		}
	} else if (ctdb_db->ctdb->pnn == header->dmaster) {
		keep = true;
	}

	if (keep) {
		if (!ctdb_db->persistent &&
		    (ctdb_db->ctdb->pnn == header->dmaster) &&
		    !(header->flags & CTDB_REC_RO_FLAGS))
		{
			header->rsn++;

			if (data.dsize == 0) {
				schedule_for_deletion = true;
			}
		}
		remove_from_delete_queue = !schedule_for_deletion;
	}

store:
	/*
	 * The VACUUM_MIGRATED flag is only set temporarily for
	 * the above logic when the record was retrieved by a
	 * VACUUM_MIGRATE call and should not be stored in the
	 * database.
	 *
	 * The VACUUM_MIGRATE call is triggered by a vacuum fetch,
	 * and there are two cases in which the corresponding record
	 * is stored in the local database:
	 * 1. The record has been migrated with data in the past
	 *    (the MIGRATED_WITH_DATA record flag is set).
	 * 2. The record has been filled with data again since it
	 *    had been submitted in the VACUUM_FETCH message to the
	 *    lmaster.
	 * For such records it is important to not store the
	 * VACUUM_MIGRATED flag in the database.
	 */
	header->flags &= ~CTDB_REC_FLAG_VACUUM_MIGRATED;

	/*
	 * Similarly, clear the AUTOMATIC flag which should not enter
	 * the local database copy since this would require client
	 * modifications to clear the flag when the client stores
	 * the record.
	 */
	header->flags &= ~CTDB_REC_FLAG_AUTOMATIC;

	rec.dsize = sizeof(*header) + data.dsize;
	rec.dptr = talloc_size(ctdb, rec.dsize);
	CTDB_NO_MEMORY(ctdb, rec.dptr);

	memcpy(rec.dptr, header, sizeof(*header));
	memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize);

	/* Databases with seqnum updates enabled only get their seqnum
	   changes when/if we modify the data */
	if (ctdb_db->seqnum_update != NULL) {
		TDB_DATA old;
		old = tdb_fetch(ctdb_db->ltdb->tdb, key);

		if ( (old.dsize == rec.dsize)
		&& !memcmp(old.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dptr+sizeof(struct ctdb_ltdb_header),
			  rec.dsize-sizeof(struct ctdb_ltdb_header)) ) {
			tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
			seqnum_suppressed = true;
		}
		if (old.dptr) free(old.dptr);
	}

	DEBUG(DEBUG_DEBUG, (__location__ " db[%s]: %s record: hash[0x%08x]\n",
			    ctdb_db->db_name,
			    keep?"storing":"deleting",
			    ctdb_hash(&key)));

	if (keep) {
		ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
	} else {
		ret = tdb_delete(ctdb_db->ltdb->tdb, key);
	}

	if (ret != 0) {
		int lvl = DEBUG_ERR;

		if (keep == false &&
		    tdb_error(ctdb_db->ltdb->tdb) == TDB_ERR_NOEXIST)
		{
			lvl = DEBUG_DEBUG;
		}

		DEBUG(lvl, (__location__ " db[%s]: Failed to %s record: "
			    "%d - %s\n",
			    ctdb_db->db_name,
			    keep?"store":"delete", ret,
			    tdb_errorstr(ctdb_db->ltdb->tdb)));

		schedule_for_deletion = false;
		remove_from_delete_queue = false;
	}
	if (seqnum_suppressed) {
		tdb_add_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
	}

	talloc_free(rec.dptr);

	if (schedule_for_deletion) {
		int ret2;
		ret2 = ctdb_local_schedule_for_deletion(ctdb_db, header, key);
		if (ret2 != 0) {
			DEBUG(DEBUG_ERR, (__location__ " ctdb_local_schedule_for_deletion failed.\n"));
		}
	}

	if (remove_from_delete_queue) {
		ctdb_local_remove_from_delete_queue(ctdb_db, header, key);
	}

	return ret;
}
/*
  main program
*/
int main(int argc, const char *argv[])
{
	struct ctdb_context *ctdb;
	TDB_DATA key;

	struct poptOption popt_options[] = {
		POPT_AUTOHELP
		POPT_CTDB_CMDLINE
		{ "record",      'r', POPT_ARG_STRING, &TESTKEY, 0, "record", "string" },
		POPT_TABLEEND
	};
	int opt, ret;
	const char **extra_argv;
	int extra_argc = 0;
	poptContext pc;
	struct event_context *ev;

	pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);

	while ((opt = poptGetNextOpt(pc)) != -1) {
		switch (opt) {
		default:
			fprintf(stderr, "Invalid option %s: %s\n", 
				poptBadOption(pc, 0), poptStrerror(opt));
			exit(1);
		}
	}

	/* setup the remaining options for the main program to use */
	extra_argv = poptGetArgs(pc);
	if (extra_argv) {
		extra_argv++;
		while (extra_argv[extra_argc]) extra_argc++;
	}

	ev = event_context_init(NULL);

	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(5, 0));
	if (ctdb == NULL) {
		exit(1);
	}

	key.dptr  = discard_const(TESTKEY);
	key.dsize = strlen(TESTKEY);

	ret = ctdb_ctrl_getvnnmap(ctdb, timeval_zero(), CTDB_CURRENT_NODE, ctdb, &ctdb->vnn_map);
	if (ret != 0) {
		printf("failed to get vnnmap\n");
		exit(10);
	}
	printf("Record:%s\n", TESTKEY);
	printf("Lmaster : %d\n", ctdb_lmaster(ctdb, &key)); 

	/* attach to a specific database */
	ctdb_db = ctdb_attach(ctdb, timeval_current_ofs(5, 0), "test.tdb", false, 0);
	if (!ctdb_db) {
		printf("ctdb_attach failed - %s\n", ctdb_errstr(ctdb));
		exit(1);
	}

	printf("Waiting for cluster\n");
	while (1) {
		uint32_t recmode=1;
		ctdb_ctrl_getrecmode(ctdb, ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
		if (recmode == 0) break;
		event_loop_once(ev);
	}

	while (1) {
		fetch_lock_once(ctdb, ev);
	}

	return 0;
}