Exemple #1
0
/**
 * @param aname Volume name.
 * @param acell Cell id.
 * @param agood
 * @param areq Request type.
 * @param locktype Type of lock to be used.
 * @return Volume or NULL if failure.
 */
static struct volume *
afs_NewVolumeByName(char *aname, afs_int32 acell, int agood,
		    struct vrequest *areq, afs_int32 locktype)
{
    afs_int32 code, type = 0;
    struct volume *tv, *tv1;
    struct vldbentry *tve;
    struct nvldbentry *ntve;
    struct uvldbentry *utve;
    struct cell *tcell;
    char *tbuffer, *ve;
    struct afs_conn *tconn;
    struct vrequest treq;
    struct rx_connection *rxconn;

    if (strlen(aname) > VL_MAXNAMELEN)	/* Invalid volume name */
	return NULL;

    tcell = afs_GetCell(acell, READ_LOCK);
    if (!tcell) {
	return NULL;
    }

    /* allow null request if we don't care about ENODEV/ETIMEDOUT distinction */
    if (!areq)
	areq = &treq;


    afs_Trace2(afs_iclSetp, CM_TRACE_GETVOL, ICL_TYPE_STRING, aname,
	       ICL_TYPE_POINTER, aname);
    tbuffer = osi_AllocLargeSpace(AFS_LRALLOCSIZ);
    tve = (struct vldbentry *)(tbuffer + 1024);
    ntve = (struct nvldbentry *)tve;
    utve = (struct uvldbentry *)tve;
    afs_InitReq(&treq, afs_osi_credp);	/* *must* be unauth for vldb */
    do {
	tconn =
	    afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
			     &treq, SHARED_LOCK, 0, &rxconn);
	if (tconn) {
	    if (tconn->srvr->server->flags & SNO_LHOSTS) {
		type = 0;
		RX_AFS_GUNLOCK();
		code = VL_GetEntryByNameO(rxconn, aname, tve);
		RX_AFS_GLOCK();
	    } else if (tconn->srvr->server->flags & SYES_LHOSTS) {
		type = 1;
		RX_AFS_GUNLOCK();
		code = VL_GetEntryByNameN(rxconn, aname, ntve);
		RX_AFS_GLOCK();
	    } else {
		type = 2;
		RX_AFS_GUNLOCK();
		code = VL_GetEntryByNameU(rxconn, aname, utve);
		RX_AFS_GLOCK();
		if (!(tconn->srvr->server->flags & SVLSRV_UUID)) {
		    if (code == RXGEN_OPCODE) {
			type = 1;
			RX_AFS_GUNLOCK();
			code = VL_GetEntryByNameN(rxconn, aname, ntve);
			RX_AFS_GLOCK();
			if (code == RXGEN_OPCODE) {
			    type = 0;
			    tconn->srvr->server->flags |= SNO_LHOSTS;
			    RX_AFS_GUNLOCK();
			    code = VL_GetEntryByNameO(rxconn, aname, tve);
			    RX_AFS_GLOCK();
			} else if (!code)
			    tconn->srvr->server->flags |= SYES_LHOSTS;
		    } else if (!code)
			tconn->srvr->server->flags |= SVLSRV_UUID;
		}
		lastnvcode = code;
	    }
	} else
	    code = -1;
    } while (afs_Analyze(tconn, rxconn, code, NULL, &treq, -1,	/* no op code for this */
			 SHARED_LOCK, tcell));

    if (code) {
	/* If the client has yet to contact this cell and contact failed due
	 * to network errors, mark the VLDB servers as back up.
	 * That the client tried and failed can be determined from the
	 * fact that there was a downtime incident, but CHasVolRef is not set.
	 */
    /* RT 48959 - unclear if this should really go */
#if 0
	if (areq->networkError && !(tcell->states & CHasVolRef)) {
	    int i;
	    struct server *sp;
	    struct srvAddr *sap;
	    for (i = 0; i < AFS_MAXCELLHOSTS; i++) {
		if ((sp = tcell->cellHosts[i]) == NULL)
		    break;
		for (sap = sp->addr; sap; sap = sap->next_sa)
		    afs_MarkServerUpOrDown(sap, 0);
	    }
	}
#endif
	afs_CopyError(&treq, areq);
	osi_FreeLargeSpace(tbuffer);
	afs_PutCell(tcell, READ_LOCK);
	return NULL;
    }
    /*
     * Check to see if this cell has not yet referenced a volume.  If
     * it hasn't, it's just about to change its status, and we need to mark
     * this fact down. Note that it is remotely possible that afs_SetupVolume
     * could fail and we would still not have a volume reference.
     */
    if (!(tcell->states & CHasVolRef)) {
	tcell->states |= CHasVolRef;
	afs_stats_cmperf.numCellsContacted++;
    }
    /*First time a volume in this cell has been referenced */
    if (type == 2)
	ve = (char *)utve;
    else if (type == 1)
	ve = (char *)ntve;
    else
	ve = (char *)tve;
    tv = afs_SetupVolume(0, aname, ve, tcell, agood, type, &treq);
    if ((agood == 3) && tv && tv->backVol) {
	/*
	 * This means that very soon we'll ask for the BK volume so
	 * we'll prefetch it (well we did already.)
	 */
	tv1 =
	    afs_SetupVolume(tv->backVol, (char *)0, ve, tcell, 0, type, &treq);
	if (tv1) {
	    tv1->refCount--;
	}
    }
    if ((agood >= 2) && tv && tv->roVol) {
	/*
	 * This means that very soon we'll ask for the RO volume so
	 * we'll prefetch it (well we did already.)
	 */
	tv1 = afs_SetupVolume(tv->roVol, NULL, ve, tcell, 0, type, &treq);
	if (tv1) {
	    tv1->refCount--;
	}
    }
    osi_FreeLargeSpace(tbuffer);
    afs_PutCell(tcell, READ_LOCK);
    return tv;

}				/*afs_NewVolumeByName */
/*------------------------------------------------------------------------
 * EXPORTED afs_Analyze
 *
 * Description:
 *	Analyze the outcome of an RPC operation, taking whatever support
 *	actions are necessary.
 *
 * Arguments:
 *	aconn : Ptr to the relevant connection on which the call was made.
 *	acode : The return code experienced by the RPC.
 *	afid  : The FID of the file involved in the action.  This argument
 *		may be null if none was involved.
 *	areq  : The request record associated with this operation.
 *      op    : which RPC we are analyzing.
 *      cellp : pointer to a cell struct.  Must provide either fid or cell.
 *
 * Returns:
 *	Non-zero value if the related RPC operation should be retried,
 *	zero otherwise.
 *
 * Environment:
 *	This routine is typically called in a do-while loop, causing the
 *	embedded RPC operation to be called repeatedly if appropriate
 *	until whatever error condition (if any) is intolerable.
 *
 * Side Effects:
 *	As advertised.
 *
 * NOTE:
 *	The retry return value is used by afs_StoreAllSegments to determine
 *	if this is a temporary or permanent error.
 *------------------------------------------------------------------------*/
int
afs_Analyze(register struct afs_conn *aconn, afs_int32 acode,
	    struct VenusFid *afid, register struct vrequest *areq, int op,
	    afs_int32 locktype, struct cell *cellp)
{
    afs_int32 i;
    struct srvAddr *sa;
    struct server *tsp;
    struct volume *tvp;
    afs_int32 shouldRetry = 0;
    afs_int32 serversleft = 1;
    struct afs_stats_RPCErrors *aerrP;
    afs_int32 markeddown;

 
 
    if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
	/* On reconnection, act as connected. XXX: for now.... */
        /* SXW - This may get very tired after a while. We should try and
	 *       intercept all RPCs before they get here ... */
	/*printf("afs_Analyze: disconnected\n");*/
	afs_FinalizeReq(areq);
	if (aconn) {
	    /* SXW - I suspect that this will _never_ happen - we shouldn't
	     *       get a connection because we're disconnected !!!*/
	    afs_PutConn(aconn, locktype);
	}
	return 0;
    }
    
    AFS_STATCNT(afs_Analyze);
    afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
	       ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
	       areq->uid);

    aerrP = (struct afs_stats_RPCErrors *)0;

    if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
	aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);

    afs_FinalizeReq(areq);
    if (!aconn && areq->busyCount) {	/* one RPC or more got VBUSY/VRESTARTING */

	tvp = afs_FindVolume(afid, READ_LOCK);
	if (tvp) {
	    afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
			 (afid ? afid->Fid.Volume : 0),
			 (tvp->name ? tvp->name : ""),
			 ((tvp->serverHost[0]
			   && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
			  cell->cellName : ""));

	    for (i = 0; i < MAXHOSTS; i++) {
		if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
		    tvp->status[i] = not_busy;
		}
		if (tvp->status[i] == not_busy)
		    shouldRetry = 1;
	    }
	    afs_PutVolume(tvp, READ_LOCK);
	} else {
	    afs_warnuser("afs: Waiting for busy volume %u\n",
			 (afid ? afid->Fid.Volume : 0));
	}

	if (areq->busyCount > 100) {
	    if (aerrP)
		(aerrP->err_Volume)++;
	    areq->volumeError = VOLBUSY;
	    shouldRetry = 0;
	} else {
	    VSleep(afs_BusyWaitPeriod);	/* poll periodically */
	}
	if (shouldRetry != 0)
	    areq->busyCount++;

	return shouldRetry;	/* should retry */
    }

    if (!aconn || !aconn->srvr) {
	if (!areq->volumeError) {
	    if (aerrP)
		(aerrP->err_Network)++;
	    if (hm_retry_int && !(areq->flags & O_NONBLOCK) &&	/* "hard" mount */
		((afid && afs_IsPrimaryCellNum(afid->Cell))
		 || (cellp && afs_IsPrimaryCell(cellp)))) {
		if (!afid) {
		    afs_warnuser
			("afs: hard-mount waiting for a vlserver to return to service\n");
		    VSleep(hm_retry_int);
		    afs_CheckServers(1, cellp);
		    shouldRetry = 1;
		} else {
		    tvp = afs_FindVolume(afid, READ_LOCK);
		    if (!tvp || (tvp->states & VRO)) {
			shouldRetry = hm_retry_RO;
		    } else {
			shouldRetry = hm_retry_RW;
		    }
		    if (tvp)
			afs_PutVolume(tvp, READ_LOCK);
		    if (shouldRetry) {
			afs_warnuser
			    ("afs: hard-mount waiting for volume %u\n",
			     afid->Fid.Volume);
			VSleep(hm_retry_int);
			afs_CheckServers(1, cellp);
		    }
		}
	    } /* if (hm_retry_int ... */
	    else {
		areq->networkError = 1;
	    }
	}
	return shouldRetry;
    }

    /* Find server associated with this connection. */
    sa = aconn->srvr;
    tsp = sa->server;

    /* Before we do anything with acode, make sure we translate it back to
     * a system error */
    if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
	acode = et_to_sys_error(acode);

    if (acode == 0) {
	/* If we previously took an error, mark this volume not busy */
	if (areq->volumeError) {
	    tvp = afs_FindVolume(afid, READ_LOCK);
	    if (tvp) {
		for (i = 0; i < MAXHOSTS; i++) {
		    if (tvp->serverHost[i] == tsp) {
			tvp->status[i] = not_busy;
		    }
		}
		afs_PutVolume(tvp, READ_LOCK);
	    }
	}

	afs_PutConn(aconn, locktype);
	return 0;
    }

    /* If network troubles, mark server as having bogued out again. */
    /* VRESTARTING is < 0 because of backward compatibility issues 
     * with 3.4 file servers and older cache managers */
#ifdef AFS_64BIT_CLIENT
    if (acode == -455)
	acode = 455;
#endif /* AFS_64BIT_CLIENT */
    if ((acode < 0) && (acode != VRESTARTING)) {
	if (acode == RX_CALL_TIMEOUT) {
	    serversleft = afs_BlackListOnce(areq, afid, tsp);
	    areq->idleError++;
	    if (serversleft) {
		shouldRetry = 1;
	    } else {
		shouldRetry = 0;
	    }
	    /* By doing this, we avoid ever marking a server down
	     * in an idle timeout case. That's because the server is 
	     * still responding and may only be letting a single vnode
	     * time out. We otherwise risk having the server continually
	     * be marked down, then up, then down again... 
	     */
	    goto out;
	} 
	markeddown = afs_ServerDown(sa);
	ForceNewConnections(sa); /**multi homed clients lock:afs_xsrvAddr? */
	if (aerrP)
	    (aerrP->err_Server)++;
#if 0
	/* retry *once* when the server is timed out in case of NAT */
	if (markeddown && acode == RX_CALL_DEAD) {
	    aconn->forceConnectFS = 1;
	    shouldRetry = 1;
	}
#endif
    }

    if (acode == VBUSY || acode == VRESTARTING) {
	if (acode == VBUSY) {
	    areq->busyCount++;
	    if (aerrP)
		(aerrP->err_VolumeBusies)++;
	} else
	    areq->busyCount = 1;

	tvp = afs_FindVolume(afid, READ_LOCK);
	if (tvp) {
	    for (i = 0; i < MAXHOSTS; i++) {
		if (tvp->serverHost[i] == tsp) {
		    tvp->status[i] = rdwr_busy;	/* can't tell which yet */
		    /* to tell which, have to look at the op code. */
		}
	    }
	    afs_PutVolume(tvp, READ_LOCK);
	} else {
	    afs_warnuser("afs: Waiting for busy volume %u in cell %s\n",
			 (afid ? afid->Fid.Volume : 0), tsp->cell->cellName);
	    VSleep(afs_BusyWaitPeriod);	/* poll periodically */
	}
	shouldRetry = 1;
	acode = 0;
    } else if (acode == VICETOKENDEAD
	       || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
	/* any rxkad error is treated as token expiration */
	struct unixuser *tu;
	/*
	 * I'm calling these errors protection errors, since they involve
	 * faulty authentication.
	 */
	if (aerrP)
	    (aerrP->err_Protection)++;

	tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
	if (tu) {
	    if (acode == VICETOKENDEAD) {
		aconn->forceConnectFS = 1;
	    } else if (acode == RXKADEXPIRED) {
		aconn->forceConnectFS = 0;	/* don't check until new tokens set */
		aconn->user->states |= UTokensBad;
		afs_warnuser
		    ("afs: Tokens for user of AFS id %d for cell %s have expired\n",
		     tu->vid, aconn->srvr->server->cell->cellName);
	    } else {
		serversleft = afs_BlackListOnce(areq, afid, tsp);
		areq->tokenError++;

		if (serversleft) {
		    afs_warnuser
			("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d\n",
			 tu->vid, aconn->srvr->server->cell->cellName, acode);
		    shouldRetry = 1;
		} else {
		    areq->tokenError = 0;
		    aconn->forceConnectFS = 0;	/* don't check until new tokens set */
		    aconn->user->states |= UTokensBad;
		    afs_warnuser
			("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
			 tu->vid, aconn->srvr->server->cell->cellName, acode);
		}
	    }
	    afs_PutUser(tu, READ_LOCK);
	} else {
	    /* The else case shouldn't be possible and should probably be replaced by a panic? */
	    if (acode == VICETOKENDEAD) {
		aconn->forceConnectFS = 1;
	    } else if (acode == RXKADEXPIRED) {
		aconn->forceConnectFS = 0;	/* don't check until new tokens set */
		aconn->user->states |= UTokensBad;
		afs_warnuser
		    ("afs: Tokens for user %d for cell %s have expired\n",
		     areq->uid, aconn->srvr->server->cell->cellName);
	    } else {
		aconn->forceConnectFS = 0;	/* don't check until new tokens set */
		aconn->user->states |= UTokensBad;
		afs_warnuser
		    ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
		     areq->uid, aconn->srvr->server->cell->cellName, acode);
	    }
	}
	shouldRetry = 1;	/* Try again (as root). */
    }
    /* Check for access violation. */
    else if (acode == EACCES) {
	/* should mark access error in non-existent per-user global structure */
	if (aerrP)
	    (aerrP->err_Protection)++;
	areq->accessError = 1;
	if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
	    areq->permWriteError = 1;
	shouldRetry = 0;
    }
    /* check for ubik errors; treat them like crashed servers */
    else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
	afs_ServerDown(sa);
	if (aerrP)
	    (aerrP->err_Server)++;
	shouldRetry = 1;	/* retryable (maybe one is working) */
	VSleep(1);		/* just in case */
    }
    /* Check for bad volume data base / missing volume. */
    else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
	     || acode == VNOSERVICE || acode == VMOVED) {
	struct cell *tcell;
	int same;

	shouldRetry = 1;
	areq->volumeError = VOLMISSING;
	if (aerrP)
	    (aerrP->err_Volume)++;
	if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
	    same = VLDB_Same(afid, areq);
	    tvp = afs_FindVolume(afid, READ_LOCK);
	    if (tvp) {
		for (i = 0; i < MAXHOSTS && tvp->serverHost[i]; i++) {
		    if (tvp->serverHost[i] == tsp) {
			if (tvp->status[i] == end_not_busy)
			    tvp->status[i] = offline;
			else
			    tvp->status[i]++;
		    } else if (!same) {
			tvp->status[i] = not_busy;	/* reset the others */
		    }
		}
		afs_PutVolume(tvp, READ_LOCK);
	    }
	}
    } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) {	/* vlserver errors */
	shouldRetry = 0;
	areq->volumeError = VOLMISSING;
    } else if (acode >= 0) {
	if (aerrP)
	    (aerrP->err_Other)++;
	if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
	    areq->permWriteError = 1;
	shouldRetry = 0;	/* Other random Vice error. */
    } else if (acode == RX_MSGSIZE) {	/* same meaning as EMSGSIZE... */
	VSleep(1);		/* Just a hack for desperate times. */
	if (aerrP)
	    (aerrP->err_Other)++;
	shouldRetry = 1;	/* packet was too big, please retry call */
    }

    if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
	/* If we get here, code < 0 and we have network/Server troubles.
	 * areq->networkError is not set here, since we always
	 * retry in case there is another server.  However, if we find
	 * no connection (aconn == 0) we set the networkError flag.
	 */
	afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
	if (aerrP)
	    (aerrP->err_Server)++;
	VSleep(1);		/* Just a hack for desperate times. */
	shouldRetry = 1;
    }
out:
    /* now unlock the connection and return */
    afs_PutConn(aconn, locktype);
    return (shouldRetry);
}				/*afs_Analyze */