Esempio n. 1
0
static int give_capabilities (pid_t pid)
{
	cap_t caps = cap_init();
	const unsigned caps_size = 4;
	cap_value_t cap_list[] =
		{ CAP_SETPCAP, CAP_SYS_NICE, CAP_SYS_RESOURCE, CAP_IPC_LOCK} ;

	if (caps == NULL) {
		fprintf (stderr, "jackstart: could not allocate capability working storage\n");
		return -1;
	}
	cap_clear(caps);
	if (capgetp (pid, caps)) {
		fprintf (stderr, "jackstart: could not get capabilities for process %d\n", pid);
		cap_clear(caps);
	}
	cap_set_flag(caps, CAP_EFFECTIVE, caps_size, cap_list , CAP_SET);
	cap_set_flag(caps, CAP_INHERITABLE, caps_size, cap_list , CAP_SET);
	cap_set_flag(caps, CAP_PERMITTED, caps_size, cap_list , CAP_SET);
	if (capsetp (pid, caps)) {
		fprintf (stderr, "jackstart: could not give capabilities: %s\n", strerror (errno));
		cap_free (caps);
		return -1;
	}
	cap_free (caps);
	return 0;
}
Esempio n. 2
0
int cap_bprm_set_security (struct linux_binprm *bprm)
{
	/* Copied from fs/exec.c:prepare_binprm. */

	/* We don't have VFS support for capabilities yet */
	cap_clear (bprm->cap_inheritable);
	cap_clear (bprm->cap_permitted);
	cap_clear (bprm->cap_effective);

	/*  To support inheritance of root-permissions and suid-root
	 *  executables under compatibility mode, we raise all three
	 *  capability sets for the file.
	 *
	 *  If only the real uid is 0, we only raise the inheritable
	 *  and permitted sets of the executable file.
	 */

	if (!issecure (SECURE_NOROOT)) {
		if (bprm->e_uid == 0 || current->uid == 0) {
			cap_set_full (bprm->cap_inheritable);
			cap_set_full (bprm->cap_permitted);
		}
		if (bprm->e_uid == 0)
			cap_set_full (bprm->cap_effective);
	}
	return 0;
}
Esempio n. 3
0
void UnixMurmur::finalcap() {
#ifdef Q_OS_LINUX
	cap_value_t caps[] = {CAP_SYS_RESOURCE};
	struct rlimit r;

	if (! bRoot)
		return;

	if (getrlimit(RLIMIT_RTPRIO, &r) != 0) {
		qCritical("Failed to get priority limits.");
	} else {
		qWarning("Resource limits were %ld %ld", r.rlim_cur, r.rlim_max);
		r.rlim_cur = r.rlim_max = 1;
		if (setrlimit(RLIMIT_RTPRIO, &r) != 0) {
			qCritical("Failed to set priority limits.");
		}
	}

	int ncap = sizeof(caps)/sizeof(cap_value_t);

	cap_t c = cap_init();
	cap_clear(c);
	cap_set_flag(c, CAP_EFFECTIVE, ncap, caps, CAP_SET);
	cap_set_flag(c, CAP_PERMITTED, ncap, caps, CAP_SET);
	if (cap_set_proc(c) != 0) {
		qCritical("Failed to set final capabilities");
	} else {
		qWarning("Successfully dropped capabilities");
	}
	cap_free(c);
#endif
}
Esempio n. 4
0
int limit_capabilities(void)
{
	cap_t cap_p;
	const cap_value_t caps[] = {
		CAP_NET_ADMIN,
		CAP_NET_RAW,
	};
	int i;

	cap_p = cap_init();
	if (!cap_p) {
		perror("cap_get_proc");
		return -1;
	}

	for (i = 0; i < ARRAY_SIZE(caps); i++) {
		if (cap_clear(cap_p) < 0) {
			perror("cap_clear");
			return -1;
		}

		if (cap_set_flag(cap_p, CAP_PERMITTED, ARRAY_SIZE(caps) - i, caps + i, CAP_SET) < 0) {
			perror("cap_set_flag");
			return -1;
		}

		if (cap_set_proc(cap_p) < 0)
			continue;

		break;
	}

	if (i == ARRAY_SIZE(caps)) {
		perror("cap_set_proc");
		if (errno != EPERM)
			return -1;
	}

	if (prctl(PR_SET_KEEPCAPS, 1) < 0) {
		perror("prctl");
		return -1;
	}

	if (setuid(getuid()) < 0) {
		perror("setuid");
		return -1;
	}

	if (prctl(PR_SET_KEEPCAPS, 0) < 0) {
		perror("prctl");
		return -1;
	}

	cap_free(cap_p);

	uid = getuid();
	euid = geteuid();

	return 0;
}
Esempio n. 5
0
/* 
 * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 * a process after a call to setuid, setreuid, or setresuid.
 *
 *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 *  {r,e,s}uid != 0, the permitted and effective capabilities are
 *  cleared.
 *
 *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 *  capabilities of the process are cleared.
 *
 *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 *  capabilities are set to the permitted capabilities.
 *
 *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
 *  never happen.
 *
 *  -astor 
 *
 * cevans - New behaviour, Oct '99
 * A process may, via prctl(), elect to keep its capabilities when it
 * calls setuid() and switches away from uid==0. Both permitted and
 * effective sets will be retained.
 * Without this change, it was impossible for a daemon to drop only some
 * of its privilege. The call to setuid(!=0) would drop all privileges!
 * Keeping uid 0 is not an option because uid 0 owns too many vital
 * files..
 * Thanks to Olaf Kirch and Peter Benie for spotting this.
 */
static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
					int old_suid)
{
	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
	    !current->keep_capabilities) {
		cap_clear (current->cap_permitted);
		cap_clear (current->cap_effective);
	}
	if (old_euid == 0 && current->euid != 0) {
		cap_clear (current->cap_effective);
	}
	if (old_euid != 0 && current->euid == 0) {
		current->cap_effective = current->cap_permitted;
	}
}
Esempio n. 6
0
static int issueReinit() {
    cap_t caps = cap_init();
    (void)cap_clear(caps);
    (void)cap_set_proc(caps);
    (void)cap_free(caps);

    int sock = TEMP_FAILURE_RETRY(socket_local_client(
        "logd", ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_STREAM));
    if (sock < 0) return -errno;

    static const char reinitStr[] = "reinit";
    ssize_t ret = TEMP_FAILURE_RETRY(write(sock, reinitStr, sizeof(reinitStr)));
    if (ret < 0) return -errno;

    struct pollfd p;
    memset(&p, 0, sizeof(p));
    p.fd = sock;
    p.events = POLLIN;
    ret = TEMP_FAILURE_RETRY(poll(&p, 1, 1000));
    if (ret < 0) return -errno;
    if ((ret == 0) || !(p.revents & POLLIN)) return -ETIME;

    static const char success[] = "success";
    char buffer[sizeof(success) - 1];
    memset(buffer, 0, sizeof(buffer));
    ret = TEMP_FAILURE_RETRY(read(sock, buffer, sizeof(buffer)));
    if (ret < 0) return -errno;

    return strncmp(buffer, success, sizeof(success) - 1) != 0;
}
Esempio n. 7
0
int avahi_caps_reduce2(void) {
    int ret = 0;
    cap_t caps;
    static cap_value_t cap_values[] = { CAP_SYS_CHROOT };

    /* Reduce our caps to the bare minimum and tell Linux not to keep
     * them across setuid(). This is called after we drop
     * privileges. */
    
    /* No longer retain caps across setuid() */
    if (prctl(PR_SET_KEEPCAPS, 0, 0, 0, 0) < 0) {
        avahi_log_error("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno));
        ret = -1;
    }

    caps = cap_init();
    assert(caps);
    cap_clear(caps);

    /* setuid() zeroed our effective caps, let's get them back */
    cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_values, CAP_SET);
    cap_set_flag(caps, CAP_PERMITTED, 1, cap_values, CAP_SET);

    if (cap_set_proc(caps) < 0) {
        avahi_log_error("cap_set_proc() failed: %s", strerror(errno));
        ret = -1;
    }
    cap_free(caps);

    return ret;
}
Esempio n. 8
0
int avahi_caps_reduce(void) {
    int ret = 0;
    cap_t caps;
    static cap_value_t cap_values[] = { CAP_SYS_CHROOT, CAP_SETUID, CAP_SETGID };
    
    /* Let's reduce our caps to the minimum set and tell Linux to keep
     * them across setuid(). This is called before we drop
     * privileges. */
    
    caps = cap_init();
    assert(caps);
    cap_clear(caps);

    cap_set_flag(caps, CAP_EFFECTIVE, 3, cap_values, CAP_SET);
    cap_set_flag(caps, CAP_PERMITTED, 3, cap_values, CAP_SET);
    
    if (cap_set_proc(caps) < 0) {
        avahi_log_error("cap_set_proc() failed: %s", strerror(errno));
        ret = -1;
    }
    cap_free(caps);

    /* Retain capabilities across setuid() */
    if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) < 0) {
        avahi_log_error("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno));
        ret = -1;
    }

    return ret;
}
// Setting linux capabilities (permitted, effective and inheritable) for the current process.
// Permitted set indicates the capabilities what could be set for the process.
// Effective set is a subset of permitted set, they are actually effective.
// Inheritable set indicates the capabilities what the children will inherit from the current process.
static bool setCapabilities(cap_value_t* capabilityList, int length)
{
    // Capabilities should be initialized without flags.
    cap_t capabilities = cap_init();
    if (!capabilities) {
        fprintf(stderr, "Failed to initialize process capabilities: %s.\n", strerror(errno));
        return false;
    }

    if (cap_clear(capabilities) == -1) {
        fprintf(stderr, "Failed to clear process capabilities: %s.\n", strerror(errno));
        return false;
    }

    if (capabilityList && length) {
        if (cap_set_flag(capabilities, CAP_EFFECTIVE, length, capabilityList, CAP_SET) == -1
            || cap_set_flag(capabilities, CAP_INHERITABLE, length, capabilityList, CAP_SET) == -1
            || cap_set_flag(capabilities, CAP_PERMITTED, length, capabilityList, CAP_SET) == -1) {
            fprintf(stderr, "Failed to set process capability flags: %s.\n", strerror(errno));
            cap_free(capabilities);
            return false;
        }
    }

    if (cap_set_proc(capabilities) == -1) {
        fprintf(stderr, "Failed to set process capabilities: %s.\n", strerror(errno));
        cap_free(capabilities);
        return false;
    }

    cap_free(capabilities);
    return true;
}
Esempio n. 10
0
static bool setMinimalCapabilities() {
  cap_t cap_d = cap_init();

  if (cap_d != nullptr) {
    cap_value_t cap_list[] = {CAP_NET_BIND_SERVICE, CAP_SYS_RESOURCE,
                              CAP_SYS_NICE};

    cap_clear(cap_d);

    if (cap_set_flag(cap_d, CAP_PERMITTED, 3, cap_list, CAP_SET) < 0 ||
        cap_set_flag(cap_d, CAP_EFFECTIVE, 3, cap_list, CAP_SET) < 0) {
      Logger::Error("cap_set_flag failed");
      return false;
    }

    if (cap_set_proc(cap_d) == -1) {
      Logger::Error("cap_set_proc failed");
      return false;
    }

    if (cap_free(cap_d) == -1) {
      Logger::Error("cap_free failed");
      return false;
    }

    prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
    return true;
  }
  return false;
}
Esempio n. 11
0
void UnixMurmur::initialcap() {
#ifdef Q_OS_LINUX
	cap_value_t caps[] = {CAP_NET_ADMIN, CAP_SETUID, CAP_SETGID, CAP_CHOWN, CAP_SYS_RESOURCE, CAP_DAC_OVERRIDE };

	if (! bRoot)
		return;

	int ncap = sizeof(caps)/sizeof(cap_value_t);

	if (geteuid() != 0)
		ncap--;

	cap_t c = cap_init();
	cap_clear(c);
	cap_set_flag(c, CAP_EFFECTIVE, ncap, caps, CAP_SET);
	cap_set_flag(c, CAP_INHERITABLE, ncap, caps, CAP_SET);
	cap_set_flag(c, CAP_PERMITTED, ncap, caps, CAP_SET);
	if (cap_set_proc(c) != 0) {
		qCritical("Failed to set initial capabilities");
	} else {
		prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
	}
	cap_free(c);
#endif
}
Esempio n. 12
0
int krg_cap_prepare_binprm(struct linux_binprm *bprm)
{
	/* The model needs changes with filesystem support ... */
#if 0
	cap_clear(bprm->krg_cap_forced);
	cap_set_full(bprm->krg_cap_permitted);
	cap_set_full(bprm->krg_cap_effective);
#endif /* 0 */
	return 0;
}
Esempio n. 13
0
static int dummy_capget (struct task_struct *target, kernel_cap_t * effective,
			 kernel_cap_t * inheritable, kernel_cap_t * permitted)
{
	if (target->euid == 0) {
		cap_set_full(*permitted);
		cap_set_init_eff(*effective);
	} else {
		cap_clear(*permitted);
		cap_clear(*effective);
	}

	cap_clear(*inheritable);

	if (target->fsuid != 0) {
		*permitted = cap_drop_fs_set(*permitted);
		*effective = cap_drop_fs_set(*effective);
	}
	return 0;
}
Esempio n. 14
0
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{
	const struct cred *old_cred;
	struct cred *override_cred;
	struct path path;
	struct inode *inode;
	int res;

	if (mode & ~S_IRWXO)	
		return -EINVAL;

	override_cred = prepare_creds();
	if (!override_cred)
		return -ENOMEM;

	override_cred->fsuid = override_cred->uid;
	override_cred->fsgid = override_cred->gid;

	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
		
		if (override_cred->uid)
			cap_clear(override_cred->cap_effective);
		else
			override_cred->cap_effective =
				override_cred->cap_permitted;
	}

	old_cred = override_creds(override_cred);

	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
	if (res)
		goto out;

	inode = path.dentry->d_inode;

	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
		res = -EACCES;
		if (path.mnt->mnt_flags & MNT_NOEXEC)
			goto out_path_release;
	}

	res = inode_permission(inode, mode | MAY_ACCESS);
	
	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
		goto out_path_release;
	if (__mnt_is_readonly(path.mnt))
		res = -EROFS;

out_path_release:
	path_put(&path);
out:
	revert_creds(old_cred);
	put_cred(override_cred);
	return res;
}
Esempio n. 15
0
/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 */
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
{
	struct nameidata nd;
	int old_fsuid, old_fsgid;
	kernel_cap_t old_cap;
	int res;
	const struct _inode *inode;

	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
		return -EINVAL;

	old_fsuid = current->fsuid;
	old_fsgid = current->fsgid;
	old_cap = current->cap_effective;

	current->fsuid = current->uid;
	current->fsgid = current->gid;

	/*
	 * Clear the capabilities if we switch to a non-root user
	 *
	 * FIXME: There is a race here against sys_capset.  The
	 * capabilities can change yet we will restore the old
	 * value below.  We should hold task_capabilities_lock,
	 * but we cannot because user_path_walk can sleep.
	 */
	if (current->uid)
		cap_clear(current->cap_effective);
	else
		current->cap_effective = current->cap_permitted;
	
	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
	if (res)
		goto out;

	inode = d_get_inode_ro(nd.dentry);
	res = vfs_permission(inode, &nd, mode);
	/* SuS v2 requires we report a read only fs too */
	if(res || !(mode & S_IWOTH) ||
	   special_file(inode->i_mode))
		goto out_path_release;

	if(IS_RDONLY(inode))
		res = -EROFS;

out_path_release:
	path_release(&nd);
out:
	current->fsuid = old_fsuid;
	current->fsgid = old_fsgid;
	current->cap_effective = old_cap;
	
	return res;
}
Esempio n. 16
0
/*
 * Truncate a file.
 * The calling routines must make sure to update the ctime
 * field and call notify_change.
 *
 * XXX Nobody calls this thing? -DaveM
 * N.B. After this call fhp needs an fh_put
 */
int
nfsd_truncate(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long size)
{
	struct dentry	*dentry;
	struct inode	*inode;
	struct iattr	newattrs;
	int		err;
	kernel_cap_t	saved_cap;

	err = fh_verify(rqstp, fhp, S_IFREG, MAY_WRITE | MAY_TRUNC);
	if (err)
		goto out;

	dentry = fhp->fh_dentry;
	inode = dentry->d_inode;

	err = get_write_access(inode);
	if (err)
		goto out_nfserr;

	/* Things look sane, lock and do it. */
	fh_lock(fhp);
	DQUOT_INIT(inode);
	newattrs.ia_size = size;
	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
	if (current->fsuid != 0) {
		saved_cap = current->cap_effective;
		cap_clear(current->cap_effective);
	}
	err = notify_change(dentry, &newattrs);
	if (current->fsuid != 0)
		current->cap_effective = saved_cap;
	if (!err) {
		vmtruncate(inode, size);
		if (inode->i_op && inode->i_op->truncate)
			inode->i_op->truncate(inode);
	}
	put_write_access(inode);
	DQUOT_DROP(inode);
	fh_unlock(fhp);
out_nfserr:
	if (err)
		err = nfserrno(-err);
out:
	return err;
}
Esempio n. 17
0
int avahi_caps_drop_all(void) {
    cap_t caps;
    int ret = 0;

    /* Drop all capabilities and turn ourselves into a normal user process */

    caps = cap_init();
    assert(caps);
    cap_clear(caps);
    
    if (cap_set_proc(caps) < 0) {
        avahi_log_error("cap_set_proc() failed: %s", strerror(errno));
        ret = -1;
    }
    cap_free(caps);
    
    return ret;
}
Esempio n. 18
0
File: utils.c Progetto: dtaht/tc-adv
void drop_cap(void)
{
#ifdef HAVE_LIBCAP
	/* don't harmstring root/sudo */
	if (getuid() != 0 && geteuid() != 0) {
		cap_t capabilities;

		capabilities = cap_get_proc();
		if (!capabilities)
			exit(EXIT_FAILURE);
		if (cap_clear(capabilities) != 0)
			exit(EXIT_FAILURE);
		if (cap_set_proc(capabilities) != 0)
			exit(EXIT_FAILURE);
		cap_free(capabilities);
	}
#endif
}
Esempio n. 19
0
static int check_capabilities (void)
{
	cap_t caps = cap_init();
	cap_flag_value_t cap;
	pid_t pid;
	int have_all_caps = 1;

	if (caps == NULL) {
		fprintf (stderr, "jackstart: could not allocate capability working storage\n");
		return 0;
	}
	pid = getpid ();
	cap_clear (caps);
	if (capgetp (pid, caps)) {
		fprintf (stderr, "jackstart: could not get capabilities for process %d\n", pid);
		return 0;
	}
	/* check that we are able to give capabilites to other processes */
	cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &cap);
	if (cap == CAP_CLEAR) {
		have_all_caps = 0;
		goto done;
	}
	/* check that we have the capabilities we want to transfer */
	cap_get_flag(caps, CAP_SYS_NICE, CAP_EFFECTIVE, &cap);
	if (cap == CAP_CLEAR) {
		have_all_caps = 0;
		goto done;
	}
	cap_get_flag(caps, CAP_SYS_RESOURCE, CAP_EFFECTIVE, &cap);
	if (cap == CAP_CLEAR) {
		have_all_caps = 0;
		goto done;
	}
	cap_get_flag(caps, CAP_IPC_LOCK, CAP_EFFECTIVE, &cap);
	if (cap == CAP_CLEAR) {
		have_all_caps = 0;
		goto done;
	}
  done:
	cap_free (caps);
	return have_all_caps;
}
Esempio n. 20
0
/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 */
asmlinkage long sys_access(const char * filename, int mode)
{
	struct nameidata nd;
	int old_fsuid, old_fsgid;
	kernel_cap_t old_cap;
	int res;

	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
		return -EINVAL;

	old_fsuid = current->fsuid;
	old_fsgid = current->fsgid;
	old_cap = current->cap_effective;

	current->fsuid = current->uid;
	current->fsgid = current->gid;

	/* Clear the capabilities if we switch to a non-root user */
	if (current->uid)
		cap_clear(current->cap_effective);
	else
		current->cap_effective = current->cap_permitted;

	res = user_path_walk(filename, &nd);
	if (!res) {
		res = permission(nd.dentry->d_inode, mode);
		/* SuS v2 requires we report a read only fs too */
		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
		   && !special_file(nd.dentry->d_inode->i_mode))
			res = -EROFS;
		path_release(&nd);
	}

	current->fsuid = old_fsuid;
	current->fsgid = old_fsgid;
	current->cap_effective = old_cap;

	return res;
}
Esempio n. 21
0
File: packet.c Progetto: russor/mtr
/*  Drop SUID privileges.  To be used after accquiring raw sockets.  */
static
int drop_elevated_permissions(
    void)
{
#ifdef HAVE_LIBCAP
    cap_t cap;
#endif

    /*  Drop any suid permissions granted  */
    if (setgid(getgid()) || setuid(getuid())) {
        return -1;
    }

    if (geteuid() != getuid() || getegid() != getgid()) {
        return -1;
    }

    /*
       Drop all process capabilities.
       This will revoke anything granted by a commandline 'setcap'
     */
#ifdef HAVE_LIBCAP
    cap = cap_get_proc();
    if (cap == NULL) {
        return -1;
    }
    if (cap_clear(cap)) {
        return -1;
    }
    if (cap_set_proc(cap)) {
        return -1;
    }
#endif

    return 0;
}
void drop_capabilities(void)
{
	/* the capabilities that we *need* in order to operate */
	static cap_value_t suidcaps[] = {
		CAP_CHOWN,
		CAP_KILL,
		CAP_SYS_CHROOT,
		CAP_SETUID,
		CAP_SETGID,
		CAP_NET_BIND_SERVICE,
		/* we may want to open any config/log files */
		CAP_DAC_OVERRIDE
	};
	cap_t caps;

	caps = cap_init();
	cap_clear(caps);
	cap_set_flag(caps, CAP_PERMITTED,
		     N_ELEMENTS(suidcaps), suidcaps, CAP_SET);
	cap_set_flag(caps, CAP_EFFECTIVE,
		     N_ELEMENTS(suidcaps), suidcaps, CAP_SET);
	cap_set_proc(caps);
	cap_free(caps);
}
Esempio n. 23
0
/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 */
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{
	const struct cred *old_cred;
	struct cred *override_cred;
	struct path path;
	struct inode *inode;
	int res;

	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
		return -EINVAL;

	override_cred = prepare_creds();
	if (!override_cred)
		return -ENOMEM;

	override_cred->fsuid = override_cred->uid;
	override_cred->fsgid = override_cred->gid;

	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
		/* Clear the capabilities if we switch to a non-root user */
		if (override_cred->uid)
			cap_clear(override_cred->cap_effective);
		else
			override_cred->cap_effective =
				override_cred->cap_permitted;
	}

	old_cred = override_creds(override_cred);

	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
	if (res)
		goto out;

	inode = path.dentry->d_inode;

	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
		/*
		 * MAY_EXEC on regular files is denied if the fs is mounted
		 * with the "noexec" flag.
		 */
		res = -EACCES;
		if (path.mnt->mnt_flags & MNT_NOEXEC)
			goto out_path_release;
	}

	res = inode_permission(inode, mode | MAY_ACCESS);
	/* SuS v2 requires we report a read only fs too */
	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
		goto out_path_release;
	/*
	 * This is a rare case where using __mnt_is_readonly()
	 * is OK without a mnt_want/drop_write() pair.  Since
	 * no actual write to the fs is performed here, we do
	 * not need to telegraph to that to anyone.
	 *
	 * By doing this, we accept that this access is
	 * inherently racy and know that the fs may change
	 * state before we even see this result.
	 */
	if (__mnt_is_readonly(path.mnt))
		res = -EROFS;

out_path_release:
	path_put(&path);
out:
	revert_creds(old_cred);
	put_cred(override_cred);
	return res;
}
Esempio n. 24
0
static int drop_privs(bool klogd, bool auditd) {
    // Tricky, if ro.build.type is "eng" then this is true because of the
    // side effect that ro.debuggable == 1 as well, else it is false.
    bool eng =
        __android_logger_property_get_bool("ro.build.type", BOOL_DEFAULT_FALSE);

    struct sched_param param;
    memset(&param, 0, sizeof(param));

    if (set_sched_policy(0, SP_BACKGROUND) < 0) {
        android::prdebug("failed to set background scheduling policy");
        if (!eng) return -1;
    }

    if (sched_setscheduler((pid_t)0, SCHED_BATCH, &param) < 0) {
        android::prdebug("failed to set batch scheduler");
        if (!eng) return -1;
    }

    if (setpriority(PRIO_PROCESS, 0, ANDROID_PRIORITY_BACKGROUND) < 0) {
        android::prdebug("failed to set background cgroup");
        if (!eng) return -1;
    }

    if (!eng && (prctl(PR_SET_DUMPABLE, 0) < 0)) {
        android::prdebug("failed to clear PR_SET_DUMPABLE");
        return -1;
    }

    if (prctl(PR_SET_KEEPCAPS, 1) < 0) {
        android::prdebug("failed to set PR_SET_KEEPCAPS");
        if (!eng) return -1;
    }

    std::unique_ptr<struct _cap_struct, int (*)(void*)> caps(cap_init(),
                                                             cap_free);
    if (cap_clear(caps.get()) < 0) return -1;
    cap_value_t cap_value[] = { CAP_SETGID,  // must be first for below
                                klogd ? CAP_SYSLOG : CAP_SETGID,
                                auditd ? CAP_AUDIT_CONTROL : CAP_SETGID };
    if (cap_set_flag(caps.get(), CAP_PERMITTED, arraysize(cap_value), cap_value,
                     CAP_SET) < 0) {
        return -1;
    }
    if (cap_set_flag(caps.get(), CAP_EFFECTIVE, arraysize(cap_value), cap_value,
                     CAP_SET) < 0) {
        return -1;
    }
    if (cap_set_proc(caps.get()) < 0) {
        android::prdebug(
            "failed to set CAP_SETGID, CAP_SYSLOG or CAP_AUDIT_CONTROL (%d)",
            errno);
        if (!eng) return -1;
    }

    gid_t groups[] = { AID_READPROC };

    if (setgroups(arraysize(groups), groups) == -1) {
        android::prdebug("failed to set AID_READPROC groups");
        if (!eng) return -1;
    }

    if (setgid(AID_LOGD) != 0) {
        android::prdebug("failed to set AID_LOGD gid");
        if (!eng) return -1;
    }

    if (setuid(AID_LOGD) != 0) {
        android::prdebug("failed to set AID_LOGD uid");
        if (!eng) return -1;
    }

    if (cap_set_flag(caps.get(), CAP_PERMITTED, 1, cap_value, CAP_CLEAR) < 0) {
        return -1;
    }
    if (cap_set_flag(caps.get(), CAP_EFFECTIVE, 1, cap_value, CAP_CLEAR) < 0) {
        return -1;
    }
    if (cap_set_proc(caps.get()) < 0) {
        android::prdebug("failed to clear CAP_SETGID (%d)", errno);
        if (!eng) return -1;
    }

    return 0;
}
Esempio n. 25
0
static void* reinit_thread_start(void* /*obj*/) {
    prctl(PR_SET_NAME, "logd.daemon");
    set_sched_policy(0, SP_BACKGROUND);
    setpriority(PRIO_PROCESS, 0, ANDROID_PRIORITY_BACKGROUND);

    // We should drop to AID_LOGD, if we are anything else, we have
    // even lesser privileges and accept our fate.
    gid_t groups[] = {
        AID_SYSTEM,        // search access to /data/system path
        AID_PACKAGE_INFO,  // readonly access to /data/system/packages.list
    };
    if (setgroups(arraysize(groups), groups) == -1) {
        android::prdebug(
            "logd.daemon: failed to set AID_SYSTEM AID_PACKAGE_INFO groups");
    }
    if (setgid(AID_LOGD) != 0) {
        android::prdebug("logd.daemon: failed to set AID_LOGD gid");
    }
    if (setuid(AID_LOGD) != 0) {
        android::prdebug("logd.daemon: failed to set AID_LOGD uid");
    }

    cap_t caps = cap_init();
    (void)cap_clear(caps);
    (void)cap_set_proc(caps);
    (void)cap_free(caps);

    while (reinit_running && !sem_wait(&reinit) && reinit_running) {
        // uidToName Privileged Worker
        if (uid) {
            name = nullptr;

            // if we got the perms wrong above, this would spam if we reported
            // problems with acquisition of an uid name from the packages.
            (void)packagelist_parse(package_list_parser_cb, nullptr);

            uid = 0;
            sem_post(&uidName);
            continue;
        }

        if (fdDmesg >= 0) {
            static const char reinit_message[] = { KMSG_PRIORITY(LOG_INFO),
                                                   'l',
                                                   'o',
                                                   'g',
                                                   'd',
                                                   '.',
                                                   'd',
                                                   'a',
                                                   'e',
                                                   'm',
                                                   'o',
                                                   'n',
                                                   ':',
                                                   ' ',
                                                   'r',
                                                   'e',
                                                   'i',
                                                   'n',
                                                   'i',
                                                   't',
                                                   '\n' };
            write(fdDmesg, reinit_message, sizeof(reinit_message));
        }

        // Anything that reads persist.<property>
        if (logBuf) {
            logBuf->init();
            logBuf->initPrune(nullptr);
        }
        android::ReReadEventLogTags();
    }

    return nullptr;
}
Esempio n. 26
0
int main(void)
{
    uid_t       user;
    cap_value_t root_caps[2] = { CAP_SYS_NICE, CAP_SETUID };
    cap_value_t user_caps[1] = { CAP_SYS_NICE, CAP_SYS_ADMIN };
    cap_t       capabilities;

    /* Get real user ID. */
    //user = getuid();
	user = 1000;

    /* Get full root privileges. Normally being effectively root
     * (see man 7 credentials, User and Group Identifiers, for explanation
     *  for effective versus real identity) is enough, but some security
     * modules restrict actions by processes that are only effectively root.
     * To make sure we don't hit those problems, we switch to root fully. */
    if (setresuid(0, 0, 0)) {
        fprintf(stderr, "Cannot switch to root: %s.\n", strerror(errno));
        return 1;
    }

    /* Create an empty set of capabilities. */
    capabilities = cap_init();

    /* Capabilities have three subsets:
     *      INHERITABLE:    Capabilities permitted after an execv()
     *      EFFECTIVE:      Currently effective capabilities
     *      PERMITTED:      Limiting set for the two above.
     * See man 7 capabilities for details, Thread Capability Sets.
     *
     * We need the following capabilities:
     *      CAP_SYS_NICE    For nice(2), setpriority(2),
     *                      sched_setscheduler(2), sched_setparam(2),
     *                      sched_setaffinity(2), etc.
     *      CAP_SETUID      For setuid(), setresuid()
     * in the last two subsets. We do not need to retain any capabilities
     * over an exec().
    */
    if (cap_set_flag(capabilities, CAP_PERMITTED, sizeof root_caps / sizeof root_caps[0], root_caps, CAP_SET) ||
        cap_set_flag(capabilities, CAP_EFFECTIVE, sizeof root_caps / sizeof root_caps[0], root_caps, CAP_SET)) {
        fprintf(stderr, "Cannot manipulate capability data structure as root: %s.\n", strerror(errno));
        return 1;
    }

    /* Above, we just manipulated the data structure describing the flags,
     * not the capabilities themselves. So, set those capabilities now. */
    if (cap_set_proc(capabilities)) {
        fprintf(stderr, "Cannot set capabilities as root: %s.\n", strerror(errno));
        return 1;
    }

    /* We wish to retain the capabilities across the identity change,
     * so we need to tell the kernel. */
    if (prctl(PR_SET_KEEPCAPS, 1L)) {
        fprintf(stderr, "Cannot keep capabilities after dropping privileges: %s.\n", strerror(errno));
        return 1;
    }

    /* Drop extra privileges (aside from capabilities) by switching
     * to the original real user. */
    if (setresuid(user, user, user)) {
        fprintf(stderr, "Cannot drop root privileges: %s.\n", strerror(errno));
        return 1;
    }

    /* We can still switch to a different user due to having the CAP_SETUID
     * capability. Let's clear the capability set, except for the CAP_SYS_NICE
     * in the permitted and effective sets. */
    if (cap_clear(capabilities)) {
        fprintf(stderr, "Cannot clear capability data structure: %s.\n", strerror(errno));
        return 1;
    }
    if (cap_set_flag(capabilities, CAP_PERMITTED, sizeof user_caps / sizeof user_caps[0], user_caps, CAP_SET) ||
        cap_set_flag(capabilities, CAP_EFFECTIVE, sizeof user_caps / sizeof user_caps[0], user_caps, CAP_SET)) {
        fprintf(stderr, "Cannot manipulate capability data structure as user: %s.\n", strerror(errno));
        return 1;
    }

    /* Apply modified capabilities. */
    if (cap_set_proc(capabilities)) {
        fprintf(stderr, "Cannot set capabilities as user: %s.\n", strerror(errno));
        return 1;
    }

    /*
     * Now we have just the normal user privileges,
     * plus user_caps.
    */

    test_priority("SCHED_OTHER", SCHED_OTHER);
    test_priority("SCHED_FIFO", SCHED_FIFO);
    test_priority("SCHED_RR", SCHED_RR);

	int ret;
	ret = nice(-20);
	printf("nice: %d\n", ret);

	while(1)
	{
		sleep(1);
	}
    return 0;
}
Esempio n. 27
0
int caps_actually_set_test(void)
{
	int whichcap, finalret = 0, ret;
	cap_t fcap, pcap, cap_fullpi;
	cap_value_t capvalue[1];
	int i;

	fcap = cap_init();
	pcap = cap_init();
	if (!fcap || !pcap) {
		perror("cap_init");
		exit(2);
	}

	create_fifo();

	int num_caps;

	for (num_caps = 0;; num_caps++) {
		ret = prctl(PR_CAPBSET_READ, num_caps);
		/*
		 * Break from the loop in this manner to avoid incrementing,
		 * then having to decrement value.
		 */
		if (ret == -1)
			break;
	}

	/* first, try each bit in fP (forced) with fE on and off. */
	for (whichcap = 0; whichcap < num_caps; whichcap++) {
		/*
		 * fP=whichcap, fE=fI=0
		 * pP'=whichcap, pI'=pE'=0
		 */
		capvalue[0] = whichcap;
		cap_clear(fcap);
		cap_set_flag(fcap, CAP_PERMITTED, 1, capvalue, CAP_SET);
		ret = cap_set_file(TSTPATH, fcap);
		if (ret) {
			tst_resm(TINFO, "%d\n", whichcap);
			continue;
		}
		ret = fork_drop_and_exec(DROP_PERMS, fcap);
		if (ret) {
			tst_resm(TINFO,
				 "Failed CAP_PERMITTED=%d CAP_EFFECTIVE=0\n",
				 whichcap);
			if (!finalret)
				finalret = ret;
		}

/* SERGE here */
		/*
		 * fP = fE = whichcap, fI = 0
		 * pP = pE = whichcap, pI = 0
		 */
		cap_clear(fcap);
		cap_set_flag(fcap, CAP_PERMITTED, 1, capvalue, CAP_SET);
		cap_set_flag(fcap, CAP_EFFECTIVE, 1, capvalue, CAP_SET);
		ret = cap_set_file(TSTPATH, fcap);
		if (ret) {
			tst_resm(TINFO, "%d\n", whichcap);
			continue;
		}
		ret = fork_drop_and_exec(DROP_PERMS, fcap);
		if (ret) {
			tst_resm(TINFO,
				 "Failed CAP_PERMITTED=%d CAP_EFFECTIVE=1\n",
				 whichcap);
			if (!finalret)
				finalret = ret;
		}
	}

	cap_free(pcap);
	cap_free(fcap);
	cap_fullpi = cap_init();
	for (i = 0; i < num_caps; i++) {
		capvalue[0] = i;
		cap_set_flag(cap_fullpi, CAP_INHERITABLE, 1, capvalue, CAP_SET);
	}

	/*
	 * For the inheritable tests, we want to make sure pI starts
	 * filled.
	 */
	ret = cap_set_proc(cap_fullpi);
	if (ret)
		tst_resm(TINFO, "Could not fill pI.  pI tests will fail.\n");

	/*
	 * next try each bit in fI
	 * The first two attemps have the bit which is in fI in pI.
	 *     This should result in the bit being in pP'.
	 *     If fE was set then it should also be in pE'.
	 * The last attempt starts with an empty pI.
	 *     This should result in empty capability, as there were
	 *     no bits to be inherited from the original process.
	 */
	for (whichcap = 0; whichcap < num_caps; whichcap++) {
		cap_t cmpcap;
		capvalue[0] = whichcap;

		/*
		 * fI=whichcap, fP=fE=0
		 * pI=full
		 * pI'=full, pP'=whichcap, pE'=0
		 */
		/* fill pI' */
		pcap = cap_dup(cap_fullpi);
		/* pP' = whichcap */
		cap_set_flag(pcap, CAP_PERMITTED, 1, capvalue, CAP_SET);

		/* fI = whichcap */
		fcap = cap_init();
		cap_set_flag(fcap, CAP_INHERITABLE, 1, capvalue, CAP_SET);
		ret = cap_set_file(TSTPATH, fcap);
		if (ret) {
			tst_resm(TINFO, "%d\n", whichcap);
			continue;
		}
		ret = fork_drop_and_exec(KEEP_PERMS, pcap);
		if (ret) {
			tst_resm(TINFO, "Failed with_perms CAP_INHERITABLE=%d "
				 "CAP_EFFECTIVE=0\n", whichcap);
			if (!finalret)
				finalret = ret;
		}

		/*
		 * fI=fE=whichcap, fP=0
		 * pI=full
		 * pI'=full, pP'=whichcap, pE'=whichcap
		 *
		 * Note that only fE and pE' change, so keep prior
		 * fcap and pcap and set those bits.
		 */

		cap_set_flag(fcap, CAP_EFFECTIVE, 1, capvalue, CAP_SET);
		cap_set_flag(pcap, CAP_EFFECTIVE, 1, capvalue, CAP_SET);
		ret = cap_set_file(TSTPATH, fcap);
		if (ret) {
			tst_resm(TINFO, "%d\n", whichcap);
			continue;
		}
		/* The actual result will be a full pI, with
		 * pE and pP containing just whichcap. */
		cmpcap = cap_dup(cap_fullpi);
		cap_set_flag(cmpcap, CAP_PERMITTED, 1, capvalue, CAP_SET);
		cap_set_flag(cmpcap, CAP_EFFECTIVE, 1, capvalue, CAP_SET);
		ret = fork_drop_and_exec(KEEP_PERMS, cmpcap);
		cap_free(cmpcap);
		if (ret) {
			tst_resm(TINFO, "Failed with_perms CAP_INHERITABLE=%d "
				 "CAP_EFFECTIVE=1\n", whichcap);
			if (!finalret)
				finalret = ret;
		}

		/*
		 * fI=fE=whichcap, fP=0  (so fcap is same as before)
		 * pI=0  (achieved using DROP_PERMS)
		 * pI'=pP'=pE'=0
		 */
		cap_clear(pcap);
		ret = fork_drop_and_exec(DROP_PERMS, pcap);
		if (ret) {
			tst_resm(TINFO,
				 "Failed without_perms CAP_INHERITABLE=%d",
				 whichcap);
			if (!finalret)
				finalret = ret;
		}

		cap_free(fcap);
		cap_free(pcap);
	}

	cap_free(cap_fullpi);

	return finalret;
}
Esempio n. 28
0
/*
 * Check for a user's access permissions to this inode.
 */
int
nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
{
	struct inode	*inode = dentry->d_inode;
	int		err;
	kernel_cap_t	saved_cap;

	if (acc == MAY_NOP)
		return 0;
#if 0
	dprintk("nfsd: permission 0x%x%s%s%s%s%s mode 0%o%s%s%s\n",
		acc,
		(acc & MAY_READ)?	" read"  : "",
		(acc & MAY_WRITE)?	" write" : "",
		(acc & MAY_EXEC)?	" exec"  : "",
		(acc & MAY_SATTR)?	" sattr" : "",
		(acc & MAY_TRUNC)?	" trunc" : "",
		inode->i_mode,
		IS_IMMUTABLE(inode)?	" immut" : "",
		IS_APPEND(inode)?	" append" : "",
		IS_RDONLY(inode)?	" ro" : "");
	dprintk("      owner %d/%d user %d/%d\n",
		inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
#endif
#ifndef CONFIG_NFSD_SUN
        if (dentry->d_mounts != dentry) {
		return nfserr_perm;
	}
#endif

	if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
		if (EX_RDONLY(exp) || IS_RDONLY(inode))
			return nfserr_rofs;
		if (S_ISDIR(inode->i_mode) && nfsd_iscovered(dentry, exp))
			return nfserr_perm;
		if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
			return nfserr_perm;
	}
	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
		return nfserr_perm;

	/*
	 * The file owner always gets access permission. This is to make
	 * file access work even when the client has done a fchmod(fd, 0).
	 *
	 * However, `cp foo bar' should fail nevertheless when bar is
	 * readonly. A sensible way to do this might be to reject all
	 * attempts to truncate a read-only file, because a creat() call
	 * always implies file truncation.
	 */
	if (inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */)
		return 0;

	if (current->fsuid != 0) {
		saved_cap = current->cap_effective;
		cap_clear(current->cap_effective);
	}

	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));

	/* Allow read access to binaries even when mode 111 */
	if (err == -EACCES && S_ISREG(inode->i_mode) && acc == MAY_READ)
		err = permission(inode, MAY_EXEC);

	if (current->fsuid != 0)
		current->cap_effective = saved_cap;

	return err? nfserrno(-err) : 0;
}
Esempio n. 29
0
/*
 * Set various file attributes.
 * N.B. After this call fhp needs an fh_put
 */
int
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap)
{
	struct dentry	*dentry;
	struct inode	*inode;
	int		accmode = MAY_SATTR;
	int		ftype = 0;
	int		imode;
	int		err;

	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
		accmode |= MAY_WRITE;
	if (iap->ia_valid & ATTR_SIZE)
		ftype = S_IFREG;

	/* Get inode */
	err = fh_verify(rqstp, fhp, ftype, accmode);
	if (err)
		goto out;

	dentry = fhp->fh_dentry;
	inode = dentry->d_inode;

	err = inode_change_ok(inode, iap);
	if (err)
		goto out_nfserr;

	/* The size case is special... */
	if (iap->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode))
printk("nfsd_setattr: size change??\n");
		if (iap->ia_size < inode->i_size) {
			err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC);
			if (err)
				goto out;
		}
		err = get_write_access(inode);
		if (err)
			goto out_nfserr;
		/* N.B. Should we update the inode cache here? */
		inode->i_size = iap->ia_size;
		if (inode->i_op && inode->i_op->truncate)
			inode->i_op->truncate(inode);
		mark_inode_dirty(inode);
		put_write_access(inode);
		iap->ia_valid &= ~ATTR_SIZE;
		iap->ia_valid |= ATTR_MTIME;
		iap->ia_mtime = CURRENT_TIME;
	}

	imode = inode->i_mode;
	if (iap->ia_valid & ATTR_MODE) {
		iap->ia_mode &= S_IALLUGO;
		imode = iap->ia_mode |= (imode & ~S_IALLUGO);
	}

	/* Revoke setuid/setgid bit on chown/chgrp */
	if ((iap->ia_valid & ATTR_UID) && (imode & S_ISUID)
	 && iap->ia_uid != inode->i_uid) {
		iap->ia_valid |= ATTR_MODE;
		iap->ia_mode = imode &= ~S_ISUID;
	}
	if ((iap->ia_valid & ATTR_GID) && (imode & S_ISGID)
	 && iap->ia_gid != inode->i_gid) {
		iap->ia_valid |= ATTR_MODE;
		iap->ia_mode = imode &= ~S_ISGID;
	}

	/* Change the attributes. */
	if (iap->ia_valid) {
		kernel_cap_t	saved_cap = 0;

		iap->ia_valid |= ATTR_CTIME;
		iap->ia_ctime = CURRENT_TIME;
		if (current->fsuid != 0) {
			saved_cap = current->cap_effective;
			cap_clear(current->cap_effective);
		}
		err = notify_change(dentry, iap);
		if (current->fsuid != 0)
			current->cap_effective = saved_cap;
		if (err)
			goto out_nfserr;
		if (EX_ISSYNC(fhp->fh_export))
			write_inode_now(inode);
	}
	err = 0;
out:
	return err;

out_nfserr:
	err = nfserrno(-err);
	goto out;
}
Esempio n. 30
0
/*
 * Write data to a file.
 * The stable flag requests synchronous writes.
 * N.B. After this call fhp needs an fh_put
 */
int
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
				char *buf, unsigned long cnt, int stable)
{
	struct svc_export	*exp;
	struct file		file;
	struct dentry		*dentry;
	struct inode		*inode;
	mm_segment_t		oldfs;
	int			err = 0;
#ifdef CONFIG_QUOTA
	uid_t			saved_euid;
#endif

	if (!cnt)
		goto out;
	err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_WRITE, &file);
	if (err)
		goto out;
	err = nfserr_perm;
	if (!file.f_op->write)
		goto out_close;

	dentry = file.f_dentry;
	inode = dentry->d_inode;
	exp   = fhp->fh_export;

	/*
	 * Request sync writes if
	 *  -	the sync export option has been set, or
	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
	 * When gathered writes have been configured for this volume,
	 * flushing the data to disk is handled separately below.
	 */
	if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp))
		file.f_flags |= O_SYNC;

	fh_lock(fhp);			/* lock inode */
	file.f_pos = offset;		/* set write offset */

	/* Write the data. */
	oldfs = get_fs(); set_fs(KERNEL_DS);
#ifdef CONFIG_QUOTA
	/* This is for disk quota. */
	saved_euid = current->euid;
	current->euid = current->fsuid;
	err = file.f_op->write(&file, buf, cnt, &file.f_pos);
	current->euid = saved_euid;
#else
	err = file.f_op->write(&file, buf, cnt, &file.f_pos);
#endif
	set_fs(oldfs);

	/* clear setuid/setgid flag after write */
	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) {
		struct iattr	ia;
		kernel_cap_t	saved_cap;

		ia.ia_valid = ATTR_MODE;
		ia.ia_mode  = inode->i_mode & ~(S_ISUID | S_ISGID);
		if (current->fsuid != 0) {
			saved_cap = current->cap_effective;
			cap_clear(current->cap_effective);
		}
		notify_change(dentry, &ia);
		if (current->fsuid != 0)
			current->cap_effective = saved_cap;
	}

	fh_unlock(fhp);			/* unlock inode */

	if (err >= 0 && stable) {
		static unsigned long	last_ino = 0;
		static kdev_t		last_dev = NODEV;

		/*
		 * Gathered writes: If another process is currently
		 * writing to the file, there's a high chance
		 * this is another nfsd (triggered by a bulk write
		 * from a client's biod). Rather than syncing the
		 * file with each write request, we sleep for 10 msec.
		 *
		 * I don't know if this roughly approximates
		 * C. Juszak's idea of gathered writes, but it's a
		 * nice and simple solution (IMHO), and it seems to
		 * work:-)
		 */
		if (EX_WGATHER(exp) && (inode->i_writecount > 1
		 || (last_ino == inode->i_ino && last_dev == inode->i_dev))) {
#if 0
			interruptible_sleep_on_timeout(&inode->i_wait, 10 * HZ / 1000);
#else
			dprintk("nfsd: write defer %d\n", current->pid);
			schedule_timeout((HZ+99)/100);
			dprintk("nfsd: write resume %d\n", current->pid);
#endif
		}

		if (inode->i_state & I_DIRTY) {
			dprintk("nfsd: write sync %d\n", current->pid);
			nfsd_sync(inode, &file);
			write_inode_now(inode);
		}
		wake_up(&inode->i_wait);
		last_ino = inode->i_ino;
		last_dev = inode->i_dev;
	}

	dprintk("nfsd: write complete\n");
	if (err >= 0)
		err = 0;
	else 
		err = nfserrno(-err);
out_close:
	nfsd_close(&file);
out:
	return err;
}