Example #1
0
afs_int32
SDISK_Truncate(struct rx_call *rxcall, struct ubik_tid *atid,
	       afs_int32 afile, afs_int32 alen)
{
    afs_int32 code;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    DBHOLD(ubik_dbase);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }
    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans->type != UBIK_WRITETRANS) {
	code = UBADTYPE;
	goto done;
    }

    urecovery_CheckTid(atid, 0);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }
    code = udisk_truncate(ubik_currentTrans, afile, alen);
done:
    DBRELE(ubik_dbase);
    return code;
}
Example #2
0
afs_int32
SDISK_Truncate(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index,
	       afs_int32 afile, afs_int32 alen)
{
    afs_int32 code;
    struct ubik_dbase *dbase;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }

    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    dbase = ubik_currentTrans[index]->dbase;
    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	return USYNC;
    }
    code = udisk_truncate(ubik_currentTrans[index], afile, alen);
    DBRELE(dbase);
    return code;
}
Example #3
0
afs_int32
SDISK_Abort(struct rx_call *rxcall, struct ubik_tid *atid)
{
    afs_int32 code;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    DBHOLD(ubik_dbase);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }
    /* sanity check to make sure only write trans appear here  */
    if (ubik_currentTrans->type != UBIK_WRITETRANS) {
	code = UBADTYPE;
	goto done;
    }

    urecovery_CheckTid(atid, 0);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }

    code = udisk_abort(ubik_currentTrans);
    /* If the thread is not waiting for lock - ok to end it */
    if (ubik_currentTrans->locktype != LOCKWAIT) {
	udisk_end(ubik_currentTrans);
    }
    ubik_currentTrans = (struct ubik_trans *)0;
done:
    DBRELE(ubik_dbase);
    return code;
}
Example #4
0
afs_int32
SDISK_SetVersion(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index,
		 struct ubik_version *oldversionp,
		 struct ubik_version *newversionp)
{
    afs_int32 code = 0;
    struct ubik_dbase *dbase;

    if ((code = ubik_CheckAuth(rxcall))) {
	return (code);
    }

    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }

    /* Should not get this for the sync site */
    if (ubeacon_AmSyncSite()) {
	return UDEADLOCK;
    }

    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    dbase = ubik_currentTrans[index]->dbase;
    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	return USYNC;
    }

    /* Set the label if its version matches the sync-site's */
    if ((oldversionp->epoch == ubik_dbVersion[index].epoch)
	&& (oldversionp->counter == ubik_dbVersion[index].counter)) {
	code = (*dbase->setlabel) (dbase, 0, newversionp);
	if (!code) {
	    dbase->version = *newversionp;
	    ubik_dbVersion[index] = *newversionp;
	}
    } else {
	code = USYNC;
    }

    DBRELE(dbase);
    return code;
}
Example #5
0
/*!
 * \brief Write a vector of data
 */
afs_int32
SDISK_WriteV(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index,
	     iovec_wrt *io_vector, iovec_buf *io_buffer)
{
    afs_int32 code, i, offset;
    struct ubik_dbase *dbase;
    struct ubik_iovec *iovec;
    char *iobuf;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }

    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    dbase = ubik_currentTrans[index]->dbase;
    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	return USYNC;
    }

    iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
    iobuf = (char *)io_buffer->iovec_buf_val;
    for (i = 0, offset = 0; i < io_vector->iovec_wrt_len; i++) {
	/* Sanity check for going off end of buffer */
	if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
	    code = UINTERNAL;
	} else {
	    code =
		udisk_write(ubik_currentTrans[index], iovec[i].file, &iobuf[offset],
			    iovec[i].position, iovec[i].length);
	}
	if (code)
	    break;

	offset += iovec[i].length;
    }

    DBRELE(dbase);
    return code;
}
Example #6
0
afs_int32
SDISK_SetVersion(struct rx_call *rxcall, struct ubik_tid *atid,
		 struct ubik_version *oldversionp,
		 struct ubik_version *newversionp)
{
    afs_int32 code = 0;

    if ((code = ubik_CheckAuth(rxcall))) {
	return (code);
    }
    DBHOLD(ubik_dbase);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }
    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans->type != UBIK_WRITETRANS) {
	code = UBADTYPE;
	goto done;
    }

    /* Should not get this for the sync site */
    if (ubeacon_AmSyncSite()) {
	code = UDEADLOCK;
	goto done;
    }

    urecovery_CheckTid(atid, 0);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }

    /* Set the label if its version matches the sync-site's */
    if (uvote_eq_dbVersion(*oldversionp)) {
	UBIK_VERSION_LOCK;
	code = (*ubik_dbase->setlabel) (ubik_dbase, 0, newversionp);
	if (!code) {
	    ubik_dbase->version = *newversionp;
	    uvote_set_dbVersion(*newversionp);
	}
	UBIK_VERSION_UNLOCK;
    } else {
	code = USYNC;
    }
done:
    DBRELE(ubik_dbase);
    return code;
}
Example #7
0
/* apos and alen are not used */
afs_int32
SDISK_Lock(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index,
	   afs_int32 afile, afs_int32 apos, afs_int32 alen, afs_int32 atype)
{
    afs_int32 code;
    struct ubik_dbase *dbase;
    struct ubik_trans *ubik_thisTrans;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    /* sanity check to make sure only write trans appear here */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }
    if (alen != 1) {
	return UBADLOCK;
    }
    dbase = ubik_currentTrans[index]->dbase;
    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	return USYNC;
    }

    ubik_thisTrans = ubik_currentTrans[index];
    code = ulock_getLock(ubik_currentTrans[index], atype, 1);

    /* While waiting, the transaction may have been ended/
     * aborted from under us (urecovery_CheckTid). In that
     * case, end the transaction here.
     */
    if (!code && (ubik_currentTrans[index] != ubik_thisTrans)) {
	udisk_end(ubik_thisTrans);
	code = USYNC;
    }

    DBRELE(dbase);
    return code;
}
Example #8
0
afs_int32
SDISK_Commit(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index)
{
    afs_int32 code;
    struct ubik_dbase *dbase;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }

    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    /*
     * sanity check to make sure only write trans appear here
     */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }

    dbase = ubik_currentTrans[index]->dbase;

    ObtainWriteLock(&dbase->cache_lock);

    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	ReleaseWriteLock(&dbase->cache_lock);
	return USYNC;
    }

    code = udisk_commit(ubik_currentTrans[index]);
    if (code == 0) {
	/* sync site should now match */
	ubik_dbVersion[index] = ubik_dbase[index]->version;
    }
    DBRELE(dbase);
    ReleaseWriteLock(&dbase->cache_lock);
    return code;
}
Example #9
0
/* the rest of these guys handle remote execution of write
 * transactions: this is the code executed on the other servers when a
 * sync site is executing a write transaction.
 */
afs_int32
SDISK_Begin(struct rx_call *rxcall, struct ubik_tid *atid)
{
    afs_int32 code;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    DBHOLD(ubik_dbase);
    urecovery_CheckTid(atid, 1);
    code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
    if (!code && ubik_currentTrans) {
	/* label this trans with the right trans id */
	ubik_currentTrans->tid.epoch = atid->epoch;
	ubik_currentTrans->tid.counter = atid->counter;
    }
    DBRELE(ubik_dbase);
    return code;
}
Example #10
0
afs_int32
SDISK_Abort(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index)
{
    afs_int32 code;
    struct ubik_dbase *dbase;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }

    if (!ubik_currentTrans[index]) {
	return USYNC;
    }
    /* sanity check to make sure only write trans appear here  */
    if (ubik_currentTrans[index]->type != UBIK_WRITETRANS) {
	return UBADTYPE;
    }

    if (!ubik_dbase[index]) {
	return ENOENT;
    }

    dbase = ubik_currentTrans[index]->dbase;
    DBHOLD(dbase);
    urecovery_CheckTid(atid, index);
    if (!ubik_currentTrans[index]) {
	DBRELE(dbase);
	return USYNC;
    }

    code = udisk_abort(ubik_currentTrans[index]);
    /* If the thread is not waiting for lock - ok to end it */
#if !defined(UBIK_PAUSE)
    if (ubik_currentTrans[index]->locktype != LOCKWAIT) {
#endif /* UBIK_PAUSE */
	udisk_end(ubik_currentTrans[index]);
#if !defined(UBIK_PAUSE)
    }
#endif /* UBIK_PAUSE */
    ubik_currentTrans[index] = (struct ubik_trans *)0;
    DBRELE(dbase);
    return code;
}
Example #11
0
afs_int32
SDISK_Commit(struct rx_call *rxcall, struct ubik_tid *atid)
{
    afs_int32 code;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    ObtainWriteLock(&ubik_dbase->cache_lock);
    DBHOLD(ubik_dbase);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }
    /*
     * sanity check to make sure only write trans appear here
     */
    if (ubik_currentTrans->type != UBIK_WRITETRANS) {
	code = UBADTYPE;
	goto done;
    }

    urecovery_CheckTid(atid, 0);
    if (!ubik_currentTrans) {
	code = USYNC;
	goto done;
    }

    code = udisk_commit(ubik_currentTrans);
    if (code == 0) {
	/* sync site should now match */
	uvote_set_dbVersion(ubik_dbase->version);
    }
done:
    DBRELE(ubik_dbase);
    ReleaseWriteLock(&ubik_dbase->cache_lock);
    return code;
}
Example #12
0
/* the rest of these guys handle remote execution of write
 * transactions: this is the code executed on the other servers when a
 * sync site is executing a write transaction.
 */
afs_int32
SDISK_Begin(struct rx_call *rxcall, struct ubik_tid *atid, afs_int32 index)
{
    afs_int32 code;

    if ((code = ubik_CheckAuth(rxcall))) {
	return code;
    }
    if (!ubik_dbase[index]) {
	return ENOENT;
    }
    DBHOLD(ubik_dbase[index]);
    if (urecovery_AllBetter(ubik_dbase[index], 0) == 0) {
	code = UNOQUORUM;
	goto out;
    }
    urecovery_CheckTid(atid, index);
    if (ubik_currentTrans[index]) {
	/* If the thread is not waiting for lock - ok to end it */
#if !defined(UBIK_PAUSE)
	if (ubik_currentTrans[index]->locktype != LOCKWAIT) {
#endif /* UBIK_PAUSE */
	    udisk_end(ubik_currentTrans[index]);
#if !defined(UBIK_PAUSE)
	}
#endif /* UBIK_PAUSE */
	ubik_currentTrans[index] = (struct ubik_trans *)0;
    }
    code = udisk_begin(ubik_dbase[index], UBIK_WRITETRANS, &ubik_currentTrans[index]);
    if (!code && ubik_currentTrans[index]) {
	/* label this trans with the right trans id */
	ubik_currentTrans[index]->tid.epoch = atid->epoch;
	ubik_currentTrans[index]->tid.counter = atid->counter;
    }
out:
    DBRELE(ubik_dbase[index]);
    return code;
}
Example #13
0
/*!
 * \brief called by the sync site to handle vote beacons; if aconn is null, this is a
 * local call
 *
 * \returns 0 or time when the vote was sent.  It returns 0 if we are
 * not voting for this sync site, or the time we actually voted yes, if
 * non-zero.
 */
afs_int32
SVOTE_Beacon(struct rx_call * rxcall, afs_int32 astate,
	     afs_int32 astart, struct ubik_version * avers,
	     struct ubik_tid * atid)
{
    afs_int32 otherHost;
    afs_int32 now;
    afs_int32 vote;
    struct rx_connection *aconn;
    struct rx_peer *rxp;
    struct ubik_server *ts;
    int isClone = 0;
    char hoststr[16];

    if (rxcall) {		/* caller's host */
	aconn = rx_ConnectionOf(rxcall);
	rxp = rx_PeerOf(aconn);
	otherHost = rx_HostOf(rxp);

	/* get the primary interface address for this host.  */
	/* This is the identifier that ubik uses. */
	otherHost = ubikGetPrimaryInterfaceAddr(otherHost);
	if (!otherHost) {
	    ubik_dprint("Received beacon from unknown host %s\n",
			afs_inet_ntoa_r(rx_HostOf(rxp), hoststr));
	    return 0;		/* I don't know about you: vote no */
	}
	for (ts = ubik_servers; ts; ts = ts->next) {
	    if (ts->addr[0] == otherHost)
		break;
	}
	if (!ts)
	    ubik_dprint("Unknown host %x has sent a beacon\n", otherHost);
	if (ts && ts->isClone)
	    isClone = 1;
    } else {
	otherHost = ubik_host[0];	/* this host */
	isClone = amIClone;
    }

    ubik_dprint("Received beacon type %d from host %s\n", astate,
		afs_inet_ntoa_r(otherHost, hoststr));

    /* compute the lowest server we've heard from.  We'll try to only vote for
     * this dude if we don't already have a synchronization site.  Also, don't
     * let a very old lowestHost confusing things forever.  We pick a new
     * lowestHost after BIGTIME seconds to limit the damage if this host
     * actually crashes.  Finally, we also count in this computation: don't
     * pick someone else if we're even better!
     *
     * Note that the test below must be <=, not <, so that we keep refreshing
     * lowestTime.  Otherwise it will look like we haven't heard from
     * lowestHost in a while and another host could slip in.  */


    /* First compute the lowest host we've heard from, whether we want them
     * for a sync site or not.  If we haven't heard from a site in BIGTIME
     * seconds, we ignore its presence in lowestHost: it may have crashed.
     * Note that we don't ever let anyone appear in our lowestHost if we're
     * lower than them, 'cause we know we're up. */
    /* But do not consider clones for lowesHost since they never may become
     * sync site */
    UBIK_VOTE_LOCK;
    now = FT_ApproxTime();	/* close to current time */
    if (!isClone
	&& (ntohl((afs_uint32)otherHost) <= ntohl((afs_uint32)vote_globals.lowestHost)
	    || vote_globals.lowestTime + BIGTIME < now)) {
	vote_globals.lowestTime = now;
	vote_globals.lowestHost = otherHost;
    }
    /* why do we need this next check?  Consider the case where each of two
     * servers decides the other is lowestHost.  Each stops sending beacons
     * 'cause the other is there.  Not obvious that this process terminates:
     * i.e. each guy could restart procedure and again think other side is
     * lowest.  Need to prove: if one guy in the system is lowest and knows
     * he's lowest, these loops don't occur.  because if someone knows he's
     * lowest, he will send out beacons telling others to vote for him. */
    if (!amIClone
	&& (ntohl((afs_uint32) ubik_host[0]) <= ntohl((afs_uint32)vote_globals.lowestHost)
	    || vote_globals.lowestTime + BIGTIME < now)) {
	vote_globals.lowestTime = now;
	vote_globals.lowestHost = ubik_host[0];
    }

    /* tell if we've heard from a sync site recently (even if we're not voting
     * for this dude yet).  After a while, time the guy out. */
    if (astate) {		/* this guy is a sync site */
	vote_globals.syncHost = otherHost;
	vote_globals.syncTime = now;
    } else if (vote_globals.syncTime + BIGTIME < now) {
	if (vote_globals.syncHost) {
	    ubik_dprint
		("Ubik: Lost contact with sync-site %s (NOT in quorum)\n",
		 afs_inet_ntoa_r(vote_globals.syncHost, hoststr));
	}
	vote_globals.syncHost = 0;
    }

    /* decide how to vote */
    vote = 0;			/* start off voting no */

    /* if we this guy isn't a sync site, we don't really have to vote for him.
     * We get to apply some heuristics to try to avoid weird oscillation sates
     * in the voting procedure. */
    if (astate == 0) {
	/* in here only if this guy doesn't claim to be a sync site */

	/* lowestHost is also trying for our votes, then just say no. */
	if (ntohl(vote_globals.lowestHost) != ntohl(otherHost)) {
	    goto done_zero;
	}

	/* someone else *is* a sync site, just say no */
	if (vote_globals.syncHost && vote_globals.syncHost != otherHost)
	    goto done_zero;
    } else if (vote_globals.lastYesHost == 0xffffffff && otherHost == ubik_host[0]) {
	/* fast startup if this is the only non-clone */
	int i = 0;
	for (ts = ubik_servers; ts; ts = ts->next) {
	    if (ts->addr[0] == otherHost)
		continue;
	    if (!ts->isClone)
		i++;
	}
	if (!i)
	    vote_globals.lastYesHost = otherHost;
    }


    if (isClone)
	goto done_zero;		/* clone never can become sync site */

    /* Don't promise sync site support to more than one host every BIGTIME
     * seconds.  This is the heart of our invariants in this system. */
    if (vote_globals.ubik_lastYesTime + BIGTIME < now || otherHost == vote_globals.lastYesHost) {
	if ((vote_globals.ubik_lastYesTime + BIGTIME < now) || (otherHost != vote_globals.lastYesHost)
	    || (vote_globals.lastYesState != astate)) {
	    /* A new vote or a change in the vote or changed quorum */
	    ubik_dprint("Ubik: vote 'yes' for %s %s\n",
			afs_inet_ntoa_r(otherHost, hoststr),
			(astate ? "(in quorum)" : "(NOT in quorum)"));
	}

	vote = now;		/* vote yes */
	vote_globals.ubik_lastYesTime = now;	/* remember when we voted yes */
	vote_globals.lastYesClaim = astart;	/* remember for computing when sync site expires */
	vote_globals.lastYesHost = otherHost;	/* and who for */
	vote_globals.lastYesState = astate;	/* remember if site is a sync site */
	vote_globals.ubik_dbVersion = *avers;	/* resync value */
	vote_globals.ubik_dbTid = *atid;	/* transaction id, if any, of active trans */
	UBIK_VOTE_UNLOCK;
	DBHOLD(ubik_dbase);
	urecovery_CheckTid(atid, 0);	/* check if current write trans needs aborted */
	DBRELE(ubik_dbase);
    } else {
	UBIK_VOTE_UNLOCK;
    }
    return vote;
done_zero:
    UBIK_VOTE_UNLOCK;
    return 0;
}