コード例 #1
0
ファイル: ip_gre.c プロジェクト: vps2fast/openvz-kernel
static void ipgre_err(struct sk_buff *skb, u32 info)
{

/* All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.

   Moreover, Cisco "wise men" put GRE key to the third word
   in GRE header. It makes impossible maintaining even soft state for keyed
   GRE tunnels with enabled checksum. Tell them "thank you".

   Well, I wonder, rfc1812 was written by Cisco employee,
   what the hell these idiots break standrads established
   by themself???
 */

	struct iphdr *iph = (struct iphdr *)skb->data;
	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
	int grehlen = (iph->ihl<<2) + 4;
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
	struct ip_tunnel *t;
	__be16 flags;

	flags = p[0];
	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
		if (flags&(GRE_VERSION|GRE_ROUTING))
			return;
		if (flags&GRE_KEY) {
			grehlen += 4;
			if (flags&GRE_CSUM)
				grehlen += 4;
		}
	}

	/* If only 8 bytes returned, keyed message will be dropped here */
	if (skb_headlen(skb) < grehlen)
		return;

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
		return;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
			return;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

	read_lock(&ipgre_lock);
	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
				flags & GRE_KEY ?
				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
				p[1]);
	if (t == NULL || t->parms.iph.daddr == 0 ||
	    ipv4_is_multicast(t->parms.iph.daddr))
		goto out;

	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		goto out;

	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
out:
	read_unlock(&ipgre_lock);
	return;
}
コード例 #2
0
ファイル: br_input.c プロジェクト: fgeraci/cs518-sched
static int br_handle_frame_finish(struct sk_buff *skb)
{
	struct net_bridge *br;
	unsigned char *dest;
	struct net_bridge_fdb_entry *dst;
	struct net_bridge_port *p;
	int passedup;

	dest = skb->mac.ethernet->h_dest;

	p = skb->dev->br_port;
	if (p == NULL)
		goto err_nolock;

	br = p->br;
	read_lock(&br->lock);
	if (skb->dev->br_port == NULL)
		goto err;

	passedup = 0;
	if (br->dev.flags & IFF_PROMISC) {
		struct sk_buff *skb2;

		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (skb2 != NULL) {
			passedup = 1;
			br_pass_frame_up(br, skb2);
		}
	}

	if (dest[0] & 1) {
		br_flood_forward(br, skb, !passedup);
		if (!passedup)
			br_pass_frame_up(br, skb);
		goto out;
	}

	dst = br_fdb_get(br, dest);
	if (dst != NULL && dst->is_local) {
		if (!passedup)
			br_pass_frame_up(br, skb);
		else
			kfree_skb(skb);
		br_fdb_put(dst);
		goto out;
	}

	if (dst != NULL) {
		br_forward(dst->dst, skb);
		br_fdb_put(dst);
		goto out;
	}

	br_flood_forward(br, skb, 0);

out:
	read_unlock(&br->lock);
	return 0;

err:
	read_unlock(&br->lock);
err_nolock:
	kfree_skb(skb);
	return 0;
}
コード例 #3
0
ファイル: exit.c プロジェクト: dkhapun/hafala2
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
	int flag, retval;
	DECLARE_WAITQUEUE(wait, current);
	struct task_struct *tsk;
	
	HW2_DBG("*** %s:%d:%s(pid=%d)\n", __FILE__, __LINE__, __FUNCTION__, pid);

	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
		return -EINVAL;

	HW2_DBG("a");
	add_wait_queue(&current->wait_chldexit,&wait);
	HW2_DBG("a");
repeat:
	HW2_DBG("a");
	flag = 0;
	HW2_DBG("a");
	current->state = TASK_INTERRUPTIBLE;
	HW2_DBG("a");
	read_lock(&tasklist_lock);
	HW2_DBG("a");
	tsk = current;
	HW2_DBG("a");
	do {
		struct task_struct *p;
	HW2_DBG("b");
	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
			if (pid>0) {
				if (p->pid != pid)
					continue;
			} else if (!pid) {
				if (p->pgrp != current->pgrp)
					continue;
			} else if (pid != -1) {
				if (p->pgrp != -pid)
					continue;
			}
			/* Wait for all children (clone and not) if __WALL is set;
			 * otherwise, wait for clone children *only* if __WCLONE is
			 * set; otherwise, wait for non-clone children *only*.  (Note:
			 * A "clone" child here is one that reports to its parent
			 * using a signal other than SIGCHLD.) */
			if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
			    && !(options & __WALL))
				continue;
			flag = 1;
			switch (p->state) {
			case TASK_STOPPED:
				if (!p->exit_code)
					continue;
				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
					continue;
				read_unlock(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
				if (!retval && stat_addr) 
					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
				if (!retval) {
					p->exit_code = 0;
					retval = p->pid;
				}
				goto end_wait4;
			case TASK_ZOMBIE:
				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
				read_unlock(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				if (!retval && stat_addr)
					retval = put_user(p->exit_code, stat_addr);
				if (retval)
					goto end_wait4; 
				retval = p->pid;
				if (p->p_opptr != p->p_pptr) {
					write_lock_irq(&tasklist_lock);
					REMOVE_LINKS(p);
					p->p_pptr = p->p_opptr;
					SET_LINKS(p);
					do_notify_parent(p, SIGCHLD);
					write_unlock_irq(&tasklist_lock);
				} else
					release_task(p);
				goto end_wait4;
			default:
				continue;
			}
		}
	HW2_DBG("c");
		if (options & __WNOTHREAD)
			break;
	HW2_DBG("c");
		tsk = next_thread(tsk);
	HW2_DBG("c");
	} while (tsk != current);
	HW2_DBG("d");
	read_unlock(&tasklist_lock);
	HW2_DBG("d flag=%d\n", flag);
	if (flag) {
		retval = 0;
		if (options & WNOHANG)
			goto end_wait4;
		retval = -ERESTARTSYS;
		if (signal_pending(current))
			goto end_wait4;
	HW2_DBG("e");
		schedule();
	HW2_DBG("f");
		goto repeat;
	}
	HW2_DBG("g");
	retval = -ECHILD;
end_wait4:
	HW2_DBG("h");
	current->state = TASK_RUNNING;
	HW2_DBG("h");
	remove_wait_queue(&current->wait_chldexit,&wait);
	HW2_DBG("h");
	return retval;
}
コード例 #4
0
bool MutableMetric::changed() {
    boost::shared_lock<boost::shared_mutex> read_lock(this->changed_mutex_);
    return this->changed_;
}
コード例 #5
0
ファイル: sit.c プロジェクト: mikuhatsune001/linux2.6.32
static int ipip6_err(struct sk_buff *skb, u32 info)
{

/* All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
 */
	struct iphdr *iph = (struct iphdr*)skb->data;
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
	struct ip_tunnel *t;
	int err;

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
		return 0;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return 0;
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
			return 0;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return 0;
		break;
	}

	err = -ENOENT;

	read_lock(&ipip6_lock);
	t = ipip6_tunnel_lookup(dev_net(skb->dev),
				skb->dev,
				iph->daddr,
				iph->saddr);
	if (t == NULL || t->parms.iph.daddr == 0)
		goto out;

	err = 0;
	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		goto out;

	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
out:
	read_unlock(&ipip6_lock);
	return err;
}
コード例 #6
0
int sys_ptrace(long request, long pid, long addr, long data)
{
    struct task_struct *child;
    int ret = -EPERM;

    lock_kernel();
    if (request == PTRACE_TRACEME) {
        /* are we already being traced? */
        if (current->ptrace & PT_PTRACED)
            goto out;
        /* set the ptrace bit in the process flags. */
        current->ptrace |= PT_PTRACED;
        ret = 0;
        goto out;
    }
    ret = -ESRCH;
    read_lock(&tasklist_lock);
    child = find_task_by_pid(pid);
    if (child)
        get_task_struct(child);
    read_unlock(&tasklist_lock);
    if (!child)
        goto out;

    ret = -EPERM;
    if (pid == 1)		/* you may not mess with init */
        goto out_tsk;

    if (request == PTRACE_ATTACH) {
        ret = ptrace_attach(child);
        goto out_tsk;
    }

    ret = ptrace_check_attach(child, request == PTRACE_KILL);
    if (ret < 0)
        goto out_tsk;

    switch (request) {
    /* when I and D space are separate, these will need to be fixed. */
    case PTRACE_PEEKTEXT: /* read word at location addr. */
    case PTRACE_PEEKDATA: {
        unsigned long tmp;
        int copied;

        copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
        ret = -EIO;
        if (copied != sizeof(tmp))
            break;
        ret = put_user(tmp,(unsigned long *) data);
        break;
    }

    /* read the word at location addr in the USER area. */
    /* XXX this will need fixing for 64-bit */
    case PTRACE_PEEKUSR: {
        unsigned long index, tmp;

        ret = -EIO;
        /* convert to index and check */
        index = (unsigned long) addr >> 2;
        if ((addr & 3) || index > PT_FPSCR)
            break;

        if (index < PT_FPR0) {
            tmp = get_reg(child, (int) index);
        } else {
            if (child->thread.regs != NULL
                    && child->thread.regs->msr & MSR_FP)
                giveup_fpu(child);
            tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0];
        }
        ret = put_user(tmp,(unsigned long *) data);
        break;
    }

    /* If I and D space are separate, this will have to be fixed. */
    case PTRACE_POKETEXT: /* write the word at location addr. */
    case PTRACE_POKEDATA:
        ret = 0;
        if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
            break;
        ret = -EIO;
        break;

    /* write the word at location addr in the USER area */
    /* XXX this will need fixing for 64-bit */
    case PTRACE_POKEUSR: {
        unsigned long index;

        ret = -EIO;
        /* convert to index and check */
        index = (unsigned long) addr >> 2;
        if ((addr & 3) || index > PT_FPSCR)
            break;

        if (index == PT_ORIG_R3)
            break;
        if (index < PT_FPR0) {
            ret = put_reg(child, index, data);
        } else {
            if (child->thread.regs != NULL
                    && child->thread.regs->msr & MSR_FP)
                giveup_fpu(child);
            ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data;
            ret = 0;
        }
        break;
    }

    case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
    case PTRACE_CONT: { /* restart after signal. */
        ret = -EIO;
        if ((unsigned long) data > _NSIG)
            break;
        if (request == PTRACE_SYSCALL)
            child->ptrace |= PT_TRACESYS;
        else
            child->ptrace &= ~PT_TRACESYS;
        child->exit_code = data;
        /* make sure the single step bit is not set. */
        clear_single_step(child);
        wake_up_process(child);
        ret = 0;
        break;
    }

    /*
     * make the child exit.  Best I can do is send it a sigkill.
     * perhaps it should be put in the status that it wants to
     * exit.
     */
    case PTRACE_KILL: {
        ret = 0;
        if (child->state == TASK_ZOMBIE)	/* already dead */
            break;
        child->exit_code = SIGKILL;
        /* make sure the single step bit is not set. */
        clear_single_step(child);
        wake_up_process(child);
        break;
    }

    case PTRACE_SINGLESTEP: {  /* set the trap flag. */
        ret = -EIO;
        if ((unsigned long) data > _NSIG)
            break;
        child->ptrace &= ~PT_TRACESYS;
        set_single_step(child);
        child->exit_code = data;
        /* give it a chance to run. */
        wake_up_process(child);
        ret = 0;
        break;
    }

    case PTRACE_DETACH:
        ret = ptrace_detach(child, data);
        break;

#ifdef CONFIG_ALTIVEC
    case PTRACE_GETVRREGS:
        /* Get the child altivec register state. */
        if (child->thread.regs->msr & MSR_VEC)
            giveup_altivec(child);
        ret = get_vrregs((unsigned long *)data, child);
        break;

    case PTRACE_SETVRREGS:
        /* Set the child altivec register state. */
        /* this is to clear the MSR_VEC bit to force a reload
         * of register state from memory */
        if (child->thread.regs->msr & MSR_VEC)
            giveup_altivec(child);
        ret = set_vrregs(child, (unsigned long *)data);
        break;
#endif

    default:
        ret = -EIO;
        break;
    }
out_tsk:
    free_task_struct(child);
out:
    unlock_kernel();
    return ret;
}
コード例 #7
0
ファイル: af_econet.c プロジェクト: dmgerman/original
static int econet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
			  struct scm_cookie *scm)
{
	struct sock *sk = sock->sk;
	struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name;
	struct net_device *dev;
	struct ec_addr addr;
	int err;
	unsigned char port, cb;
	struct sk_buff *skb;
	struct ec_cb *eb;
#ifdef CONFIG_ECONET_NATIVE
	unsigned short proto = 0;
#endif
#ifdef CONFIG_ECONET_AUNUDP
	struct msghdr udpmsg;
	struct iovec iov[msg->msg_iovlen+1];
	struct aunhdr ah;
	struct sockaddr_in udpdest;
	__kernel_size_t size;
	int i;
	mm_segment_t oldfs;
#endif
		
	/*
	 *	Check the flags. 
	 */

	if (msg->msg_flags&~MSG_DONTWAIT) 
		return(-EINVAL);

	/*
	 *	Get and verify the address. 
	 */
	 
	if (saddr == NULL) {
		addr.station = sk->protinfo.af_econet->station;
		addr.net = sk->protinfo.af_econet->net;
		port = sk->protinfo.af_econet->port;
		cb = sk->protinfo.af_econet->cb;
	} else {
		if (msg->msg_namelen < sizeof(struct sockaddr_ec)) 
			return -EINVAL;
		addr.station = saddr->addr.station;
		addr.net = saddr->addr.net;
		port = saddr->port;
		cb = saddr->cb;
	}

	/* Look for a device with the right network number. */
	dev = net2dev_map[addr.net];

	/* If not directly reachable, use some default */
	if (dev == NULL)
	{
		dev = net2dev_map[0];
		/* No interfaces at all? */
		if (dev == NULL)
			return -ENETDOWN;
	}

	if (dev->type == ARPHRD_ECONET)
	{
		/* Real hardware Econet.  We're not worthy etc. */
#ifdef CONFIG_ECONET_NATIVE
		atomic_inc(&dev->refcnt);
		
		skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0, 
					  msg->msg_flags & MSG_DONTWAIT, &err);
		if (skb==NULL)
			goto out_unlock;
		
		skb_reserve(skb, (dev->hard_header_len+15)&~15);
		skb->nh.raw = skb->data;
		
		eb = (struct ec_cb *)&skb->cb;
		
		eb->cookie = saddr->cookie;
		eb->sec = *saddr;
		eb->sent = ec_tx_done;

		if (dev->hard_header) {
			int res;
			struct ec_framehdr *fh;
			err = -EINVAL;
			res = dev->hard_header(skb, dev, ntohs(proto), 
					       &addr, NULL, len);
			/* Poke in our control byte and
			   port number.  Hack, hack.  */
			fh = (struct ec_framehdr *)(skb->data);
			fh->cb = cb;
			fh->port = port;
			if (sock->type != SOCK_DGRAM) {
				skb->tail = skb->data;
				skb->len = 0;
			} else if (res < 0)
				goto out_free;
		}
		
		/* Copy the data. Returns -EFAULT on error */
		err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
		skb->protocol = proto;
		skb->dev = dev;
		skb->priority = sk->priority;
		if (err)
			goto out_free;
		
		err = -ENETDOWN;
		if (!(dev->flags & IFF_UP))
			goto out_free;
		
		/*
		 *	Now send it
		 */
		
		dev_queue_xmit(skb);
		dev_put(dev);
		return(len);

	out_free:
		kfree_skb(skb);
	out_unlock:
		if (dev)
			dev_put(dev);
#else
		err = -EPROTOTYPE;
#endif
		return err;
	}

#ifdef CONFIG_ECONET_AUNUDP
	/* AUN virtual Econet. */

	if (udpsock == NULL)
		return -ENETDOWN;		/* No socket - can't send */
	
	/* Make up a UDP datagram and hand it off to some higher intellect. */

	memset(&udpdest, 0, sizeof(udpdest));
	udpdest.sin_family = AF_INET;
	udpdest.sin_port = htons(AUN_PORT);

	/* At the moment we use the stupid Acorn scheme of Econet address
	   y.x maps to IP a.b.c.x.  This should be replaced with something
	   more flexible and more aware of subnet masks.  */
	{
		struct in_device *idev = in_dev_get(dev);
		unsigned long network = 0;
		if (idev) {
			read_lock(&idev->lock);
			if (idev->ifa_list)
				network = ntohl(idev->ifa_list->ifa_address) & 
					0xffffff00;		/* !!! */
			read_unlock(&idev->lock);
			in_dev_put(idev);
		}
		udpdest.sin_addr.s_addr = htonl(network | addr.station);
	}

	ah.port = port;
	ah.cb = cb & 0x7f;
	ah.code = 2;		/* magic */
	ah.pad = 0;

	/* tack our header on the front of the iovec */
	size = sizeof(struct aunhdr);
	iov[0].iov_base = (void *)&ah;
	iov[0].iov_len = size;
	for (i = 0; i < msg->msg_iovlen; i++) {
		void *base = msg->msg_iov[i].iov_base;
		size_t len = msg->msg_iov[i].iov_len;
		/* Check it now since we switch to KERNEL_DS later. */
		if ((err = verify_area(VERIFY_READ, base, len)) < 0)
			return err;
		iov[i+1].iov_base = base;
		iov[i+1].iov_len = len;
		size += len;
	}

	/* Get a skbuff (no data, just holds our cb information) */
	if ((skb = sock_alloc_send_skb(sk, 0, 0, 
			     msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
		return err;

	eb = (struct ec_cb *)&skb->cb;

	eb->cookie = saddr->cookie;
	eb->timeout = (5*HZ);
	eb->start = jiffies;
	ah.handle = aun_seq;
	eb->seq = (aun_seq++);
	eb->sec = *saddr;

	skb_queue_tail(&aun_queue, skb);

	udpmsg.msg_name = (void *)&udpdest;
	udpmsg.msg_namelen = sizeof(udpdest);
	udpmsg.msg_iov = &iov[0];
	udpmsg.msg_iovlen = msg->msg_iovlen + 1;
	udpmsg.msg_control = NULL;
	udpmsg.msg_controllen = 0;
	udpmsg.msg_flags=0;

	oldfs = get_fs(); set_fs(KERNEL_DS);	/* More privs :-) */
	err = sock_sendmsg(udpsock, &udpmsg, size);
	set_fs(oldfs);
#else
	err = -EPROTOTYPE;
#endif
	return err;
}
コード例 #8
0
/*
 * lmk_state_store() will called by framework,
 * the framework will send the pid of process that need to be swapped
 */
static ssize_t lmk_state_store(struct device *dev,
			       struct device_attribute *attr,
			       const char *buf, size_t size)
{
	sscanf(buf, "%d,%d", &lmk_kill_pid, &lmk_kill_ok);

	/* if the screen on, the optimized compcache will stop */
	if (atomic_read(&optimize_comp_on) != 1)
		return size;

	if (lmk_kill_ok == 1) {
		struct task_struct *p;
		struct task_struct *selected = NULL;
		struct sysinfo ramzswap_info = { 0 };

		/*
		 * check the free RAM and swap area,
		 * stop the optimized compcache in cpu idle case;
		 * leave some swap area for using in low memory case
		 */
		si_swapinfo(&ramzswap_info);
		si_meminfo(&ramzswap_info);

		if ((ramzswap_info.freeswap < CHECK_FREE_SWAPSPACE) ||
		    (ramzswap_info.freeram < CHECK_FREE_MEMORY)) {
			lmk_kill_ok = 0;
			return size;
		}

		read_lock(&tasklist_lock);
		for_each_process(p) {
			if ((p->pid == lmk_kill_pid) &&
			    (__task_cred(p)->uid > 10000)) {
				task_lock(p);
				selected = p;
				if (!selected->mm || !selected->signal) {
					task_unlock(p);
					selected = NULL;
					pr_info("idletime compcache: process is being killed\n");
					break;
				}
				else {
#if SWAP_PROCESS_DEBUG_LOG > 0
					pr_info("idletime compcache: swap process pid %d, name %s, task_size %ld\n",
						p->pid, p->comm, get_mm_rss(p->mm));
#endif
				}
				break;
			}
		}
		read_unlock(&tasklist_lock);

		if (selected) {
			struct zone *zone0 = NULL, *zone1 = NULL;
			LIST_HEAD(zone0_page_list);
			LIST_HEAD(zone1_page_list);
			int pages_tofree = 0, pages_freed = 0;

			pages_tofree =
			    shrink_pages(selected->mm, &zone0, &zone0_page_list,
					 &zone1, &zone1_page_list, 0x7FFFFFFF);
			task_unlock(selected);
			pages_freed =
			    swap_pages(zone0, &zone0_page_list, zone1,
				       &zone1_page_list);
			lmk_kill_ok = 0;

		}
	}

	return size;
}
コード例 #9
0
int
bond_alb_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct bonding *bond = (struct bonding *) dev->priv;
	struct ethhdr *eth_data = (struct ethhdr *)skb->data;
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct slave *tx_slave = NULL;
	char do_tx_balance = 1;
	int hash_size = 0;
	u32 hash_index = 0;
	u8 *hash_start = NULL;
	u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};

	if (!IS_UP(dev)) { /* bond down */
		dev_kfree_skb(skb);
		return 0;
	}

	/* make sure that the current_slave and the slaves list do
	 * not change during tx
	 */
	read_lock(&bond->lock);

	if (bond->slave_cnt == 0) {
		/* no suitable interface, frame not sent */
		dev_kfree_skb(skb);
		read_unlock(&bond->lock);
		return 0;
	}

	read_lock(&bond->ptrlock);

	switch (ntohs(skb->protocol)) {
	case ETH_P_IP:
		if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
		    (skb->nh.iph->daddr == 0xffffffff)) {
			do_tx_balance = 0;
			break;
		}
		hash_start = (char*)&(skb->nh.iph->daddr);
		hash_size = 4;
		break;

	case ETH_P_IPV6:
		if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
			do_tx_balance = 0;
			break;
		}

		hash_start = (char*)&(skb->nh.ipv6h->daddr);
		hash_size = 16;
		break;

	case ETH_P_IPX:
		if (skb->nh.ipxh->ipx_checksum !=
		    __constant_htons(IPX_NO_CHECKSUM)) {
			/* something is wrong with this packet */
			do_tx_balance = 0;
			break;
		}

		if (skb->nh.ipxh->ipx_type !=
		    __constant_htons(IPX_TYPE_NCP)) {
			/* The only protocol worth balancing in
			 * this family since it has an "ARP" like
			 * mechanism
			 */
			do_tx_balance = 0;
			break;
		}

		hash_start = (char*)eth_data->h_dest;
		hash_size = ETH_ALEN;
		break;

	case ETH_P_ARP:
		do_tx_balance = 0;
		if (bond_info->rlb_enabled) {
			tx_slave = rlb_arp_xmit(skb, bond);
		}
		break;

	default:
		do_tx_balance = 0;
		break;
	}

	if (do_tx_balance) {
		hash_index = _simple_hash(hash_start, hash_size);
		tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
	}

	if (!tx_slave) {
		/* unbalanced or unassigned, send through primary */
		tx_slave = bond->current_slave;
		bond_info->unbalanced_load += skb->len;
	}

	if (tx_slave && SLAVE_IS_OK(tx_slave)) {
		skb->dev = tx_slave->dev;
		if (tx_slave != bond->current_slave) {
			memcpy(eth_data->h_source,
				tx_slave->dev->dev_addr,
				ETH_ALEN);
		}
		dev_queue_xmit(skb);
	} else {
		/* no suitable interface, frame not sent */
		if (tx_slave) {
			tlb_clear_slave(bond, tx_slave, 0);
		}
		dev_kfree_skb(skb);
	}

	read_unlock(&bond->ptrlock);
	read_unlock(&bond->lock);
	return 0;
}
コード例 #10
0
ファイル: itimer.c プロジェクト: jameshilliard/20-4-4
int do_getitimer(int which, struct itimerval *value)
{
	struct task_struct *tsk = current;
	cputime_t cinterval, cval;

	switch (which) {
	case ITIMER_REAL:
		spin_lock_irq(&tsk->sighand->siglock);
		value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
		value->it_interval =
			ktime_to_timeval(tsk->signal->it_real_incr);
		spin_unlock_irq(&tsk->sighand->siglock);
		break;
	case ITIMER_VIRTUAL:
		read_lock(&tasklist_lock);
		spin_lock_irq(&tsk->sighand->siglock);
		cval = tsk->signal->it_virt_expires;
		cinterval = tsk->signal->it_virt_incr;
		if (!cputime_eq(cval, cputime_zero)) {
			struct task_struct *t = tsk;
			cputime_t utime = tsk->signal->utime;
			do {
				utime = cputime_add(utime, t->utime);
				t = next_thread(t);
			} while (t != tsk);
			if (cputime_le(cval, utime)) { /* about to fire */
				cval = jiffies_to_cputime(1);
			} else {
				cval = cputime_sub(cval, utime);
			}
		}
		spin_unlock_irq(&tsk->sighand->siglock);
		read_unlock(&tasklist_lock);
		cputime_to_timeval(cval, &value->it_value);
		cputime_to_timeval(cinterval, &value->it_interval);
		break;
	case ITIMER_PROF:
		read_lock(&tasklist_lock);
		spin_lock_irq(&tsk->sighand->siglock);
		cval = tsk->signal->it_prof_expires;
		cinterval = tsk->signal->it_prof_incr;
		if (!cputime_eq(cval, cputime_zero)) {
			struct task_struct *t = tsk;
			cputime_t ptime = cputime_add(tsk->signal->utime,
						      tsk->signal->stime);
			do {
				ptime = cputime_add(ptime,
						    cputime_add(t->utime,
								t->stime));
				t = next_thread(t);
			} while (t != tsk);
			if (cputime_le(cval, ptime)) { /* about to fire */
				cval = jiffies_to_cputime(1);
			} else {
				cval = cputime_sub(cval, ptime);
			}
		}
		spin_unlock_irq(&tsk->sighand->siglock);
		read_unlock(&tasklist_lock);
		cputime_to_timeval(cval, &value->it_value);
		cputime_to_timeval(cinterval, &value->it_interval);
		break;
	default:
		return(-EINVAL);
	}
	return 0;
}
コード例 #11
0
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *p;
#ifdef ENHANCED_LMK_ROUTINE
	struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,};
#else
	struct task_struct *selected = NULL;
#endif
	int rem = 0;
	int tasksize;
	int i;
	int min_adj = OOM_ADJUST_MAX + 1;
#ifdef ENHANCED_LMK_ROUTINE
	int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,};
	int selected_oom_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,};
	int all_selected_oom = 0;
#else
	int selected_tasksize = 0;
	int selected_oom_adj;
#endif
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES);
	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);

	/*
	 * If we already have a death outstanding, then
	 * bail out right away; indicating to vmscan
	 * that we have nothing further to offer on
	 * this pass.
	 *
	 */
#ifdef ENHANCED_LMK_ROUTINE
	for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
		if (lowmem_deathpending[i] &&
			time_before_eq(jiffies, lowmem_deathpending_timeout))
			return 0;
	}
#else
	if (lowmem_deathpending &&
	    time_before_eq(jiffies, lowmem_deathpending_timeout))
		return 0;
#endif

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
			     sc->nr_to_scan, sc->gfp_mask, other_free, other_file,
			     min_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
		return rem;
	}

#ifdef ENHANCED_LMK_ROUTINE
	for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++)
		selected_oom_adj[i] = min_adj;
#else
	selected_oom_adj = min_adj;
#endif

	read_lock(&tasklist_lock);
	for_each_process(p) {
		struct mm_struct *mm;
		struct signal_struct *sig;
		int oom_adj;

		task_lock(p);
		mm = p->mm;
		sig = p->signal;
		if (!mm || !sig) {
			task_unlock(p);
			continue;
		}
		oom_adj = sig->oom_adj;
		if (oom_adj < min_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;

#ifdef ENHANCED_LMK_ROUTINE
		for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
			if (all_selected_oom >= LOWMEM_DEATHPENDING_DEPTH) {
				if (oom_adj < selected_oom_adj[i])
					continue;
			if (oom_adj == selected_oom_adj[i] &&
				tasksize <= selected_tasksize[i])
				continue;
			} else if (selected[i])
				continue;

			selected[i] = p;
			selected_tasksize[i] = tasksize;
			selected_oom_adj[i] = oom_adj;

			if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH)
				all_selected_oom++;

			lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
				p->pid, p->comm, oom_adj, tasksize);

			break;
		}
#else
		if (selected) {
			if (oom_adj < selected_oom_adj)
				continue;
			if (oom_adj == selected_oom_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_adj = oom_adj;
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_adj, tasksize);
#endif
	}
#ifdef ENHANCED_LMK_ROUTINE
	for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
		if (selected[i]) {
			lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
				selected[i]->pid, selected[i]->comm,
				selected_oom_adj[i], selected_tasksize[i]);
			lowmem_deathpending[i] = selected[i];
			lowmem_deathpending_timeout = jiffies + HZ;
			force_sig(SIGKILL, selected[i]);
			rem -= selected_tasksize[i];
		}
	}
#else
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_adj, selected_tasksize);
		lowmem_deathpending = selected;
		lowmem_deathpending_timeout = jiffies + HZ;
		force_sig(SIGKILL, selected);
		rem -= selected_tasksize;
	}
#endif
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	read_unlock(&tasklist_lock);
	return rem;
}
コード例 #12
0
ファイル: itimer.c プロジェクト: jameshilliard/20-4-4
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
	struct task_struct *tsk = current;
	struct hrtimer *timer;
	ktime_t expires;
	cputime_t cval, cinterval, nval, ninterval;

	/*
	 * Validate the timevals in value.
	 *
	 * Note: Although the spec requires that invalid values shall
	 * return -EINVAL, we just fixup the value and print a limited
	 * number of warnings in order not to break users of this
	 * historical misfeature.
	 *
	 * Scheduled for replacement in March 2007
	 */
	check_itimerval(value);

	MARK(kernel_timer_set_itimer, "%d %ld %ld %ld %ld",
			which,
			value->it_interval.tv_sec,
			value->it_interval.tv_usec,
			value->it_value.tv_sec,
			value->it_value.tv_usec);

	switch (which) {
	case ITIMER_REAL:
again:
		spin_lock_irq(&tsk->sighand->siglock);
		timer = &tsk->signal->real_timer;
		if (ovalue) {
			ovalue->it_value = itimer_get_remtime(timer);
			ovalue->it_interval
				= ktime_to_timeval(tsk->signal->it_real_incr);
		}
		/* We are sharing ->siglock with it_real_fn() */
		if (hrtimer_try_to_cancel(timer) < 0) {
			spin_unlock_irq(&tsk->sighand->siglock);
			goto again;
		}
		tsk->signal->it_real_incr =
			timeval_to_ktime(value->it_interval);
		expires = timeval_to_ktime(value->it_value);
		if (expires.tv64 != 0)
			hrtimer_start(timer, expires, HRTIMER_REL);
		spin_unlock_irq(&tsk->sighand->siglock);
		break;
	case ITIMER_VIRTUAL:
		nval = timeval_to_cputime(&value->it_value);
		ninterval = timeval_to_cputime(&value->it_interval);
		read_lock(&tasklist_lock);
		spin_lock_irq(&tsk->sighand->siglock);
		cval = tsk->signal->it_virt_expires;
		cinterval = tsk->signal->it_virt_incr;
		if (!cputime_eq(cval, cputime_zero) ||
		    !cputime_eq(nval, cputime_zero)) {
			if (cputime_gt(nval, cputime_zero))
				nval = cputime_add(nval,
						   jiffies_to_cputime(1));
			set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
					      &nval, &cval);
		}
		tsk->signal->it_virt_expires = nval;
		tsk->signal->it_virt_incr = ninterval;
		spin_unlock_irq(&tsk->sighand->siglock);
		read_unlock(&tasklist_lock);
		if (ovalue) {
			cputime_to_timeval(cval, &ovalue->it_value);
			cputime_to_timeval(cinterval, &ovalue->it_interval);
		}
		break;
	case ITIMER_PROF:
		nval = timeval_to_cputime(&value->it_value);
		ninterval = timeval_to_cputime(&value->it_interval);
		read_lock(&tasklist_lock);
		spin_lock_irq(&tsk->sighand->siglock);
		cval = tsk->signal->it_prof_expires;
		cinterval = tsk->signal->it_prof_incr;
		if (!cputime_eq(cval, cputime_zero) ||
		    !cputime_eq(nval, cputime_zero)) {
			if (cputime_gt(nval, cputime_zero))
				nval = cputime_add(nval,
						   jiffies_to_cputime(1));
			set_process_cpu_timer(tsk, CPUCLOCK_PROF,
					      &nval, &cval);
		}
		tsk->signal->it_prof_expires = nval;
		tsk->signal->it_prof_incr = ninterval;
		spin_unlock_irq(&tsk->sighand->siglock);
		read_unlock(&tasklist_lock);
		if (ovalue) {
			cputime_to_timeval(cval, &ovalue->it_value);
			cputime_to_timeval(cinterval, &ovalue->it_interval);
		}
		break;
	default:
		return -EINVAL;
	}
	return 0;
}
コード例 #13
0
static int try_to_freeze_tasks(bool user_only)
{
	struct task_struct *g, *p;
	unsigned long end_time;
	unsigned int todo;
	bool wq_busy = false;
	struct timeval start, end;
	u64 elapsed_msecs64;
	unsigned int elapsed_msecs;
	bool wakeup = false;
	int sleep_usecs = USEC_PER_MSEC;
	char suspend_abort[MAX_SUSPEND_ABORT_LEN];

	do_gettimeofday(&start);

	end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);

	if (!user_only)
		freeze_workqueues_begin();

	while (true) {
		todo = 0;
		read_lock(&tasklist_lock);
		do_each_thread(g, p) {
			if (p == current || !freeze_task(p))
				continue;

			if (!freezer_should_skip(p))
				todo++;
		} while_each_thread(g, p);
		read_unlock(&tasklist_lock);

		if (!user_only) {
			wq_busy = freeze_workqueues_busy();
			todo += wq_busy;
		}

		if (!todo || time_after(jiffies, end_time))
			break;

		if (pm_wakeup_pending()) {
			pm_get_active_wakeup_sources(suspend_abort,
				MAX_SUSPEND_ABORT_LEN);
			log_suspend_abort_reason(suspend_abort);
			wakeup = true;
			break;
		}

		/*
		 * We need to retry, but first give the freezing tasks some
		 * time to enter the refrigerator.  Start with an initial
		 * 1 ms sleep followed by exponential backoff until 8 ms.
		 */
		usleep_range(sleep_usecs / 2, sleep_usecs);
		if (sleep_usecs < 8 * USEC_PER_MSEC)
			sleep_usecs *= 2;
	}

	do_gettimeofday(&end);
	elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
	do_div(elapsed_msecs64, NSEC_PER_MSEC);
	elapsed_msecs = elapsed_msecs64;

	if (wakeup) {
		printk("\n");
		printk(KERN_ERR "Freezing of tasks aborted after %d.%03d seconds",
		       elapsed_msecs / 1000, elapsed_msecs % 1000);
	} else if (todo) {
		printk("\n");
		printk(KERN_ERR "Freezing of tasks failed after %d.%03d seconds"
		       " (%d tasks refusing to freeze, wq_busy=%d):\n",
		       elapsed_msecs / 1000, elapsed_msecs % 1000,
		       todo - wq_busy, wq_busy);

		read_lock(&tasklist_lock);
		do_each_thread(g, p) {
			if (p != current && !freezer_should_skip(p)
			    && freezing(p) && !frozen(p))
				sched_show_task(p);
		} while_each_thread(g, p);
		read_unlock(&tasklist_lock);
	} else {
コード例 #14
0
ファイル: ip_gre.c プロジェクト: vps2fast/openvz-kernel
static int ipgre_rcv(struct sk_buff *skb)
{
	struct iphdr *iph;
	u8     *h;
	__be16    flags;
	__sum16   csum = 0;
	__be32 key = 0;
	u32    seqno = 0;
	struct ip_tunnel *tunnel;
	int    offset = 4;
	__be16 gre_proto;
	unsigned int len;

	if (!pskb_may_pull(skb, 16))
		goto drop_nolock;

	iph = ip_hdr(skb);
	h = skb->data;
	flags = *(__be16*)h;

	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
		/* - Version must be 0.
		   - We do not support routing headers.
		 */
		if (flags&(GRE_VERSION|GRE_ROUTING))
			goto drop_nolock;

		if (flags&GRE_CSUM) {
			switch (skb->ip_summed) {
			case CHECKSUM_COMPLETE:
				csum = csum_fold(skb->csum);
				if (!csum)
					break;
				/* fall through */
			case CHECKSUM_NONE:
				skb->csum = 0;
				csum = __skb_checksum_complete(skb);
				skb->ip_summed = CHECKSUM_COMPLETE;
			}
			offset += 4;
		}
		if (flags&GRE_KEY) {
			key = *(__be32*)(h + offset);
			offset += 4;
		}
		if (flags&GRE_SEQ) {
			seqno = ntohl(*(__be32*)(h + offset));
			offset += 4;
		}
	}

	gre_proto = *(__be16 *)(h + 2);

	read_lock(&ipgre_lock);
	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
					  iph->saddr, iph->daddr, key,
					  gre_proto))) {
		struct net_device_stats *stats = &tunnel->dev->stats;

		secpath_reset(skb);

		skb->protocol = gre_proto;
		/* WCCP version 1 and 2 protocol decoding.
		 * - Change protocol to IP
		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
		 */
		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
			skb->protocol = htons(ETH_P_IP);
			if ((*(h + offset) & 0xF0) != 0x40)
				offset += 4;
		}

		skb->mac_header = skb->network_header;
		__pskb_pull(skb, offset);
		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
		skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
		if (ipv4_is_multicast(iph->daddr)) {
			/* Looped back packet, drop it! */
			if (skb_rtable(skb)->fl.iif == 0)
				goto drop;
			stats->multicast++;
			skb->pkt_type = PACKET_BROADCAST;
		}
#endif

		if (((flags&GRE_CSUM) && csum) ||
		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
			stats->rx_crc_errors++;
			stats->rx_errors++;
			goto drop;
		}
		if (tunnel->parms.i_flags&GRE_SEQ) {
			if (!(flags&GRE_SEQ) ||
			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
				stats->rx_fifo_errors++;
				stats->rx_errors++;
				goto drop;
			}
			tunnel->i_seqno = seqno + 1;
		}

		len = skb->len;

		/* Warning: All skb pointers will be invalidated! */
		if (tunnel->dev->type == ARPHRD_ETHER) {
			if (!pskb_may_pull(skb, ETH_HLEN)) {
				stats->rx_length_errors++;
				stats->rx_errors++;
				goto drop;
			}

			iph = ip_hdr(skb);
			skb->protocol = eth_type_trans(skb, tunnel->dev);
			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
		}

		stats->rx_packets++;
		stats->rx_bytes += len;
		skb->dev = tunnel->dev;
		skb_dst_drop(skb);
		nf_reset(skb);

		skb_reset_network_header(skb);
		ipgre_ecn_decapsulate(iph, skb);

		netif_rx(skb);
		read_unlock(&ipgre_lock);
		return(0);
	}
	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);

drop:
	read_unlock(&ipgre_lock);
drop_nolock:
	kfree_skb(skb);
	return(0);
}
コード例 #15
0
ファイル: inode.c プロジェクト: ReneNyffenegger/linux
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
			 int sysfile_type)
{
	int rc = -ESTALE;
	struct inode *inode = NULL;
	struct super_block *sb = osb->sb;
	struct ocfs2_find_inode_args args;
	journal_t *journal = OCFS2_SB(sb)->journal->j_journal;

	trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
			       sysfile_type);

	/* Ok. By now we've either got the offsets passed to us by the
	 * caller, or we just pulled them off the bh. Lets do some
	 * sanity checks to make sure they're OK. */
	if (blkno == 0) {
		inode = ERR_PTR(-EINVAL);
		mlog_errno(PTR_ERR(inode));
		goto bail;
	}

	args.fi_blkno = blkno;
	args.fi_flags = flags;
	args.fi_ino = ino_from_blkno(sb, blkno);
	args.fi_sysfile_type = sysfile_type;

	inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
			     ocfs2_init_locked_inode, &args);
	/* inode was *not* in the inode cache. 2.6.x requires
	 * us to do our own read_inode call and unlock it
	 * afterwards. */
	if (inode == NULL) {
		inode = ERR_PTR(-ENOMEM);
		mlog_errno(PTR_ERR(inode));
		goto bail;
	}
	trace_ocfs2_iget5_locked(inode->i_state);
	if (inode->i_state & I_NEW) {
		rc = ocfs2_read_locked_inode(inode, &args);
		unlock_new_inode(inode);
	}
	if (is_bad_inode(inode)) {
		iput(inode);
		inode = ERR_PTR(rc);
		goto bail;
	}

	/*
	 * Set transaction id's of transactions that have to be committed
	 * to finish f[data]sync. We set them to currently running transaction
	 * as we cannot be sure that the inode or some of its metadata isn't
	 * part of the transaction - the inode could have been reclaimed and
	 * now it is reread from disk.
	 */
	if (journal) {
		transaction_t *transaction;
		tid_t tid;
		struct ocfs2_inode_info *oi = OCFS2_I(inode);

		read_lock(&journal->j_state_lock);
		if (journal->j_running_transaction)
			transaction = journal->j_running_transaction;
		else
			transaction = journal->j_committing_transaction;
		if (transaction)
			tid = transaction->t_tid;
		else
			tid = journal->j_commit_sequence;
		read_unlock(&journal->j_state_lock);
		oi->i_sync_tid = tid;
		oi->i_datasync_tid = tid;
	}

bail:
	if (!IS_ERR(inode)) {
		trace_ocfs2_iget_end(inode, 
			(unsigned long long)OCFS2_I(inode)->ip_blkno);
	}

	return inode;
}
コード例 #16
0
void
bond_alb_monitor(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct slave *slave = NULL;

	read_lock(&bond->lock);

	if ((bond->slave_cnt == 0) || !(bond->device->flags & IFF_UP)) {
		bond_info->tx_rebalance_counter = 0;
		bond_info->lp_counter = 0;
		goto out;
	}

	bond_info->tx_rebalance_counter++;
	bond_info->lp_counter++;

	/* send learning packets */
	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
		/* change of current_slave involves swapping of mac addresses.
		 * in order to avoid this swapping from happening while
		 * sending the learning packets, the ptrlock must be held for
		 * read.
		 */
		read_lock(&bond->ptrlock);
		slave = bond_get_first_slave(bond);
		while (slave) {
			alb_send_learning_packets(slave,slave->dev->dev_addr);
			slave = bond_get_next_slave(bond, slave);
		}
		read_unlock(&bond->ptrlock);

		bond_info->lp_counter = 0;
	}

	/* rebalance tx traffic */
	if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
		read_lock(&bond->ptrlock);
		slave = bond_get_first_slave(bond);
		while (slave) {
			tlb_clear_slave(bond, slave, 1);
			if (slave == bond->current_slave) {
				SLAVE_TLB_INFO(slave).load =
					bond_info->unbalanced_load /
						BOND_TLB_REBALANCE_INTERVAL;
				bond_info->unbalanced_load = 0;
			}
			slave = bond_get_next_slave(bond, slave);
		}
		read_unlock(&bond->ptrlock);
		bond_info->tx_rebalance_counter = 0;
	}

	/* handle rlb stuff */
	if (bond_info->rlb_enabled) {
		/* the following code changes the promiscuity of the
		 * the current_slave. It needs to be locked with a
		 * write lock to protect from other code that also
		 * sets the promiscuity.
		 */
		write_lock(&bond->ptrlock);
		if (bond_info->primary_is_promisc &&
		    (++bond_info->rlb_promisc_timeout_counter >=
			RLB_PROMISC_TIMEOUT)) {

			bond_info->rlb_promisc_timeout_counter = 0;

			/* If the primary was set to promiscuous mode
			 * because a slave was disabled then
			 * it can now leave promiscuous mode.
			 */
			dev_set_promiscuity(bond->current_slave->dev, -1);
			bond_info->primary_is_promisc = 0;
		}
		write_unlock(&bond->ptrlock);

		if (bond_info->rlb_rebalance == 1) {
			bond_info->rlb_rebalance = 0;
			rlb_rebalance(bond);
		}

		/* check if clients need updating */
		if (bond_info->rx_ntt) {
			if (bond_info->rlb_update_delay_counter) {
				--bond_info->rlb_update_delay_counter;
			} else {
				rlb_update_rx_clients(bond);
				if (bond_info->rlb_update_retry_counter) {
					--bond_info->rlb_update_retry_counter;
				} else {
					bond_info->rx_ntt = 0;
				}
			}
		}
	}

out:
	read_unlock(&bond->lock);

	if (bond->device->flags & IFF_UP) {
		/* re-arm the timer */
		mod_timer(&(bond_info->alb_timer),
			jiffies + (HZ/ALB_TIMER_TICKS_PER_SEC));
	}
}
コード例 #17
0
static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask)
{
	struct task_struct *p;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	int min_adj = OOM_ADJUST_MAX + 1;
	int selected_tasksize = 0;
	int selected_oom_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES);
	int other_file = global_page_state(NR_FILE_PAGES);

	
	if (lowmem_deathpending)
		return 0;

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_adj = lowmem_adj[i];
			break;
		}
	}
	if (nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n",
			     nr_to_scan, gfp_mask, other_free, other_file,
			     min_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %d, %x, return %d\n",
			     nr_to_scan, gfp_mask, rem);
		return rem;
	}
	selected_oom_adj = min_adj;

	read_lock(&tasklist_lock);
	for_each_process(p) {
		struct mm_struct *mm;
		struct signal_struct *sig;
		int oom_adj;

		task_lock(p);
		mm = p->mm;
		sig = p->signal;
		if (!mm || !sig) {
			task_unlock(p);
			continue;
		}
		oom_adj = sig->oom_adj;
		if (oom_adj < min_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_adj < selected_oom_adj)
				continue;
			if (oom_adj == selected_oom_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_adj = oom_adj;
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_adj, selected_tasksize);
		lowmem_deathpending = selected;
		task_free_register(&task_nb);
		force_sig(SIGKILL, selected);
		rem -= selected_tasksize;
	}
	lowmem_print(4, "lowmem_shrink %d, %x, return %d\n",
		     nr_to_scan, gfp_mask, rem);
	read_unlock(&tasklist_lock);
	return rem;
}
コード例 #18
0
ファイル: sit.c プロジェクト: b3rnik/dsl-n55u-bender
static int ipip6_err(struct sk_buff *skb, u32 info)
{
#ifndef I_WISH_WORLD_WERE_PERFECT

    /* It is not :-( All the routers (except for Linux) return only
       8 bytes of packet payload. It means, that precise relaying of
       ICMP in the real Internet is absolutely infeasible.
     */
    struct iphdr *iph = (struct iphdr*)skb->data;
    int type = skb->h.icmph->type;
    int code = skb->h.icmph->code;
    struct ip_tunnel *t;
    int err;

    switch (type) {
    default:
    case ICMP_PARAMETERPROB:
        return 0;

    case ICMP_DEST_UNREACH:
        switch (code) {
        case ICMP_SR_FAILED:
        case ICMP_PORT_UNREACH:
            /* Impossible event. */
            return 0;
        case ICMP_FRAG_NEEDED:
            /* Soft state for pmtu is maintained by IP core. */
            return 0;
        default:
            /* All others are translated to HOST_UNREACH.
               rfc2003 contains "deep thoughts" about NET_UNREACH,
               I believe they are just ether pollution. --ANK
             */
            break;
        }
        break;
    case ICMP_TIME_EXCEEDED:
        if (code != ICMP_EXC_TTL)
            return 0;
        break;
    }

    err = -ENOENT;

    read_lock(&ipip6_lock);
    t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
    if (t == NULL || t->parms.iph.daddr == 0)
        goto out;

    err = 0;
    if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
        goto out;

    if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
        t->err_count++;
    else
        t->err_count = 1;
    t->err_time = jiffies;
out:
    read_unlock(&ipip6_lock);
    return err;
#else
    struct iphdr *iph = (struct iphdr*)dp;
    int hlen = iph->ihl<<2;
    struct ipv6hdr *iph6;
    int type = skb->h.icmph->type;
    int code = skb->h.icmph->code;
    int rel_type = 0;
    int rel_code = 0;
    int rel_info = 0;
    struct sk_buff *skb2;
    struct rt6_info *rt6i;

    if (len < hlen + sizeof(struct ipv6hdr))
        return;
    iph6 = (struct ipv6hdr*)(dp + hlen);

    switch (type) {
    default:
        return;
    case ICMP_PARAMETERPROB:
        if (skb->h.icmph->un.gateway < hlen)
            return;

        /* So... This guy found something strange INSIDE encapsulated
           packet. Well, he is fool, but what can we do ?
         */
        rel_type = ICMPV6_PARAMPROB;
        rel_info = skb->h.icmph->un.gateway - hlen;
        break;

    case ICMP_DEST_UNREACH:
        switch (code) {
        case ICMP_SR_FAILED:
        case ICMP_PORT_UNREACH:
            /* Impossible event. */
            return;
        case ICMP_FRAG_NEEDED:
            /* Too complicated case ... */
            return;
        default:
            /* All others are translated to HOST_UNREACH.
               rfc2003 contains "deep thoughts" about NET_UNREACH,
               I believe, it is just ether pollution. --ANK
             */
            rel_type = ICMPV6_DEST_UNREACH;
            rel_code = ICMPV6_ADDR_UNREACH;
            break;
        }
        break;
    case ICMP_TIME_EXCEEDED:
        if (code != ICMP_EXC_TTL)
            return;
        rel_type = ICMPV6_TIME_EXCEED;
        rel_code = ICMPV6_EXC_HOPLIMIT;
        break;
    }

    /* Prepare fake skb to feed it to icmpv6_send */
    skb2 = skb_clone(skb, GFP_ATOMIC);
    if (skb2 == NULL)
        return 0;
    dst_release(skb2->dst);
    skb2->dst = NULL;
    skb_pull(skb2, skb->data - (u8*)iph6);
    skb2->nh.raw = skb2->data;

    /* Try to guess incoming interface */
    rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
    if (rt6i && rt6i->rt6i_dev) {
        skb2->dev = rt6i->rt6i_dev;

        rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);

        if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
            struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
            if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
                rel_type = ICMPV6_DEST_UNREACH;
                rel_code = ICMPV6_ADDR_UNREACH;
            }
            icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
        }
    }
    kfree_skb(skb2);
    return 0;
#endif
}
コード例 #19
0
ファイル: ptrace.c プロジェクト: OpenHMR/Open-HMR600
asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
{
	struct task_struct *child;
	unsigned long tmp;
	int ret;

	lock_kernel();
	ret = -EPERM;
	if (request == PTRACE_TRACEME) {
		/* are we already being traced? */
		if (current->ptrace & PT_PTRACED)
			goto out;
		ret = security_ptrace(current->parent, current);
		if (ret)
			goto out;
		/* set the ptrace bit in the process flags. */
		current->ptrace |= PT_PTRACED;
		ret = 0;
		goto out;
	}
	ret = -ESRCH;
	read_lock(&tasklist_lock);
	child = find_task_by_pid(pid);
	if (child)
		get_task_struct(child);
	read_unlock(&tasklist_lock);
	if (!child)
		goto out;

	ret = -EPERM;
	if (pid == 1)		/* you may not mess with init */
		goto out_tsk;

	if (request == PTRACE_ATTACH) {
		ret = ptrace_attach(child);
		goto out_tsk;
	}

	ret = ptrace_check_attach(child, request == PTRACE_KILL);
	if (ret < 0)
		goto out_tsk;

	switch (request) {
		/* when I and D space are separate, these will need to be fixed. */
	case PTRACE_PEEKTEXT: /* read word at location addr. */
	case PTRACE_PEEKDATA: {
		int copied;

		ret = -EIO;
		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
			break;

		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
		if (copied != sizeof(tmp))
			break;

		ret = put_user(tmp,(unsigned long *) data);
		break;
	}

		/* read the word at location addr in the USER area. */
	case PTRACE_PEEKUSR: {
		tmp = 0;
		ret = -EIO;
		if ((addr & 3) || addr < 0)
			break;

		ret = 0;
		switch (addr >> 2) {
		case 0 ... PT__END - 1:
			tmp = get_reg(child, addr >> 2);
			break;

		case PT__END + 0:
			tmp = child->mm->end_code - child->mm->start_code;
			break;

		case PT__END + 1:
			tmp = child->mm->end_data - child->mm->start_data;
			break;

		case PT__END + 2:
			tmp = child->mm->start_stack - child->mm->start_brk;
			break;

		case PT__END + 3:
			tmp = child->mm->start_code;
			break;

		case PT__END + 4:
			tmp = child->mm->start_stack;
			break;

		default:
			ret = -EIO;
			break;
		}

		if (ret == 0)
			ret = put_user(tmp, (unsigned long *) data);
		break;
	}

		/* when I and D space are separate, this will have to be fixed. */
	case PTRACE_POKETEXT: /* write the word at location addr. */
	case PTRACE_POKEDATA:
		ret = -EIO;
		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
			break;
		if (access_process_vm(child, addr, &data, sizeof(data), 1) != sizeof(data))
			break;
		ret = 0;
		break;

	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
		ret = -EIO;
		if ((addr & 3) || addr < 0)
			break;

		ret = 0;
		switch (addr >> 2) {
		case 0 ... PT__END-1:
			ret = put_reg(child, addr >> 2, data);
			break;

		default:
			ret = -EIO;
			break;
		}
		break;

	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
	case PTRACE_CONT: /* restart after signal. */
		ret = -EIO;
		if (!valid_signal(data))
			break;
		if (request == PTRACE_SYSCALL)
			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		else
			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		child->exit_code = data;
		ptrace_disable(child);
		wake_up_process(child);
		ret = 0;
		break;

		/* make the child exit.  Best I can do is send it a sigkill.
		 * perhaps it should be put in the status that it wants to
		 * exit.
		 */
	case PTRACE_KILL:
		ret = 0;
		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
			break;
		child->exit_code = SIGKILL;
		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
		ptrace_disable(child);
		wake_up_process(child);
		break;

	case PTRACE_SINGLESTEP:  /* set the trap flag. */
		ret = -EIO;
		if (!valid_signal(data))
			break;
		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
		ptrace_enable(child);
		child->exit_code = data;
		wake_up_process(child);
		ret = 0;
		break;

	case PTRACE_DETACH:	/* detach a process that was attached. */
		ret = ptrace_detach(child, data);
		break;

	case PTRACE_GETREGS: { /* Get all integer regs from the child. */
		int i;
		for (i = 0; i < PT__GPEND; i++) {
			tmp = get_reg(child, i);
			if (put_user(tmp, (unsigned long *) data)) {
				ret = -EFAULT;
				break;
			}
			data += sizeof(long);
		}
		ret = 0;
		break;
	}

	case PTRACE_SETREGS: { /* Set all integer regs in the child. */
		int i;
		for (i = 0; i < PT__GPEND; i++) {
			if (get_user(tmp, (unsigned long *) data)) {
				ret = -EFAULT;
				break;
			}
			put_reg(child, i, tmp);
			data += sizeof(long);
		}
		ret = 0;
		break;
	}

	case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
		ret = 0;
		if (copy_to_user((void *) data,
				 &child->thread.user->f,
				 sizeof(child->thread.user->f)))
			ret = -EFAULT;
		break;
	}

	case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
		ret = 0;
		if (copy_from_user(&child->thread.user->f,
				   (void *) data,
				   sizeof(child->thread.user->f)))
			ret = -EFAULT;
		break;
	}

	case PTRACE_GETFDPIC:
		tmp = 0;
		switch (addr) {
		case PTRACE_GETFDPIC_EXEC:
			tmp = child->mm->context.exec_fdpic_loadmap;
			break;
		case PTRACE_GETFDPIC_INTERP:
			tmp = child->mm->context.interp_fdpic_loadmap;
			break;
		default:
			break;
		}

		ret = 0;
		if (put_user(tmp, (unsigned long *) data)) {
			ret = -EFAULT;
			break;
		}
		break;

	default:
		ret = -EIO;
		break;
	}
out_tsk:
	put_task_struct(child);
out:
	unlock_kernel();
	return ret;
}
コード例 #20
0
ファイル: xenbus_client.c プロジェクト: CSCLOG/beaglebone
/**
 * xenbus_map_ring_valloc
 * @dev: xenbus device
 * @gnt_ref: grant reference
 * @vaddr: pointer to address to be filled out by mapping
 *
 * Based on Rusty Russell's skeleton driver's map_page.
 * Map a page of memory into this domain from another domain's grant table.
 * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
 * page to that address, and sets *vaddr to that address.
 * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
 * or -ENOMEM on error. If an error is returned, device will switch to
 * XenbusStateClosing and the error message will be saved in XenStore.
 */
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
{
	struct gnttab_map_grant_ref op = {
		.flags = GNTMAP_host_map,
		.ref   = gnt_ref,
		.dom   = dev->otherend_id,
	};
	struct vm_struct *area;

	*vaddr = NULL;

	area = xen_alloc_vm_area(PAGE_SIZE);
	if (!area)
		return -ENOMEM;

	op.host_addr = (unsigned long)area->addr;

	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
		BUG();

	if (op.status != GNTST_okay) {
		xen_free_vm_area(area);
		xenbus_dev_fatal(dev, op.status,
				 "mapping in shared page %d from domain %d",
				 gnt_ref, dev->otherend_id);
		return op.status;
	}

	/* Stuff the handle in an unused field */
	area->phys_addr = (unsigned long)op.handle;

	*vaddr = area->addr;
	return 0;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);


/**
 * xenbus_map_ring
 * @dev: xenbus device
 * @gnt_ref: grant reference
 * @handle: pointer to grant handle to be filled
 * @vaddr: address to be mapped to
 *
 * Map a page of memory into this domain from another domain's grant table.
 * xenbus_map_ring does not allocate the virtual address space (you must do
 * this yourself!). It only maps in the page to the specified address.
 * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
 * or -ENOMEM on error. If an error is returned, device will switch to
 * XenbusStateClosing and the error message will be saved in XenStore.
 */
int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
		    grant_handle_t *handle, void *vaddr)
{
	struct gnttab_map_grant_ref op = {
		.host_addr = (unsigned long)vaddr,
		.flags     = GNTMAP_host_map,
		.ref       = gnt_ref,
		.dom       = dev->otherend_id,
	};

	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
		BUG();

	if (op.status != GNTST_okay) {
		xenbus_dev_fatal(dev, op.status,
				 "mapping in shared page %d from domain %d",
				 gnt_ref, dev->otherend_id);
	} else
		*handle = op.handle;

	return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring);


/**
 * xenbus_unmap_ring_vfree
 * @dev: xenbus device
 * @vaddr: addr to unmap
 *
 * Based on Rusty Russell's skeleton driver's unmap_page.
 * Unmap a page of memory in this domain that was imported from another domain.
 * Use xenbus_unmap_ring_vfree if you mapped in your memory with
 * xenbus_map_ring_valloc (it will free the virtual address space).
 * Returns 0 on success and returns GNTST_* on error
 * (see xen/include/interface/grant_table.h).
 */
int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
{
	struct vm_struct *area;
	struct gnttab_unmap_grant_ref op = {
		.host_addr = (unsigned long)vaddr,
	};

	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
	 * method so that we don't have to muck with vmalloc internals here.
	 * We could force the user to hang on to their struct vm_struct from
	 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
	 * this API.
	 */
	read_lock(&vmlist_lock);
	for (area = vmlist; area != NULL; area = area->next) {
		if (area->addr == vaddr)
			break;
	}
	read_unlock(&vmlist_lock);

	if (!area) {
		xenbus_dev_error(dev, -ENOENT,
				 "can't find mapped virtual address %p", vaddr);
		return GNTST_bad_virt_addr;
	}

	op.handle = (grant_handle_t)area->phys_addr;

	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
		BUG();

	if (op.status == GNTST_okay)
		xen_free_vm_area(area);
	else
		xenbus_dev_error(dev, op.status,
				 "unmapping page at handle %d error %d",
				 (int16_t)area->phys_addr, op.status);

	return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);


/**
 * xenbus_unmap_ring
 * @dev: xenbus device
 * @handle: grant handle
 * @vaddr: addr to unmap
 *
 * Unmap a page of memory in this domain that was imported from another domain.
 * Returns 0 on success and returns GNTST_* on error
 * (see xen/include/interface/grant_table.h).
 */
int xenbus_unmap_ring(struct xenbus_device *dev,
		      grant_handle_t handle, void *vaddr)
{
	struct gnttab_unmap_grant_ref op = {
		.host_addr = (unsigned long)vaddr,
		.handle    = handle,
	};

	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
		BUG();

	if (op.status != GNTST_okay)
		xenbus_dev_error(dev, op.status,
				 "unmapping page at handle %d error %d",
				 handle, op.status);

	return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring);


/**
 * xenbus_read_driver_state
 * @path: path for driver
 *
 * Return the state of the driver rooted at the given store path, or
 * XenbusStateUnknown if no state can be read.
 */
enum xenbus_state xenbus_read_driver_state(const char *path)
{
	enum xenbus_state result;
	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
	if (err)
		result = XenbusStateUnknown;

	return result;
}
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
コード例 #21
0
ファイル: ehca_irq.c プロジェクト: MrJwiz/UBER-M
void ehca_process_eq(struct ehca_shca *shca, int is_irq)
{
    struct ehca_eq *eq = &shca->eq;
    struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
    u64 eqe_value, ret;
    int eqe_cnt, i;
    int eq_empty = 0;

    spin_lock(&eq->irq_spinlock);
    if (is_irq) {
        const int max_query_cnt = 100;
        int query_cnt = 0;
        int int_state = 1;
        do {
            int_state = hipz_h_query_int_state(
                            shca->ipz_hca_handle, eq->ist);
            query_cnt++;
            iosync();
        } while (int_state && query_cnt < max_query_cnt);
        if (unlikely((query_cnt == max_query_cnt)))
            ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
                     int_state, query_cnt);
    }

    /* read out all eqes */
    eqe_cnt = 0;
    do {
        u32 token;
        eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
        if (!eqe_cache[eqe_cnt].eqe)
            break;
        eqe_value = eqe_cache[eqe_cnt].eqe->entry;
        if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
            token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
            read_lock(&ehca_cq_idr_lock);
            eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
            if (eqe_cache[eqe_cnt].cq)
                atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
            read_unlock(&ehca_cq_idr_lock);
            if (!eqe_cache[eqe_cnt].cq) {
                ehca_err(&shca->ib_device,
                         "Invalid eqe for non-existing cq "
                         "token=%x", token);
                continue;
            }
        } else
            eqe_cache[eqe_cnt].cq = NULL;
        eqe_cnt++;
    } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
    if (!eqe_cnt) {
        if (is_irq)
            ehca_dbg(&shca->ib_device,
                     "No eqe found for irq event");
        goto unlock_irq_spinlock;
    } else if (!is_irq) {
        ret = hipz_h_eoi(eq->ist);
        if (ret != H_SUCCESS)
            ehca_err(&shca->ib_device,
                     "bad return code EOI -rc = %lld\n", ret);
        ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
    }
    if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
        ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
    /* enable irq for new packets */
    for (i = 0; i < eqe_cnt; i++) {
        if (eq->eqe_cache[i].cq)
            reset_eq_pending(eq->eqe_cache[i].cq);
    }
    /* check eq */
    spin_lock(&eq->spinlock);
    eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
    spin_unlock(&eq->spinlock);
    /* call completion handler for cached eqes */
    for (i = 0; i < eqe_cnt; i++)
        if (eq->eqe_cache[i].cq) {
            if (ehca_scaling_code)
                queue_comp_task(eq->eqe_cache[i].cq);
            else {
                struct ehca_cq *cq = eq->eqe_cache[i].cq;
                comp_event_callback(cq);
                if (atomic_dec_and_test(&cq->nr_events))
                    wake_up(&cq->wait_completion);
            }
        } else {
            ehca_dbg(&shca->ib_device, "Got non completion event");
            parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
        }
    /* poll eq if not empty */
    if (eq_empty)
        goto unlock_irq_spinlock;
    do {
        struct ehca_eqe *eqe;
        eqe = ehca_poll_eq(shca, &shca->eq);
        if (!eqe)
            break;
        process_eqe(shca, eqe);
    } while (1);

unlock_irq_spinlock:
    spin_unlock(&eq->irq_spinlock);
}
コード例 #22
0
static int lowmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask)
{
	struct task_struct *p;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	int min_adj = OOM_ADJUST_MAX + 1;
	int selected_tasksize = 0;
	int selected_oom_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES);
#ifdef SEC_ADJUST_LMK
	int other_file = global_page_state(NR_INACTIVE_FILE) +
						global_page_state(NR_ACTIVE_FILE);
#else
	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);
#endif

	/*
	 * If we already have a death outstanding, then
	 * bail out right away; indicating to vmscan
	 * that we have nothing further to offer on
	 * this pass.
	 *
	 */
	if (lowmem_deathpending &&
	    time_before_eq(jiffies, lowmem_deathpending_timeout))
		return 0;

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
#ifdef SEC_ADJUST_LMK
		if ((other_free + other_file) < lowmem_minfree[i])
#else
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i])
#endif
		{
			min_adj = lowmem_adj[i];
			break;
		}
	}
#ifdef SEC_ADJUST_LMK
	if (min_adj == OOM_ADJUST_MAX + 1)
		return 0;
#endif
	if (nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n",
			     nr_to_scan, gfp_mask, other_free, other_file,
			     min_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
#ifdef SEC_ADJUST_LMK
	if (nr_to_scan <= 0)
#else
	if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1)
#endif
	{
		lowmem_print(5, "lowmem_shrink %d, %x, return %d\n",
			     nr_to_scan, gfp_mask, rem);
		return rem;
	}
	selected_oom_adj = min_adj;

	read_lock(&tasklist_lock);
	for_each_process(p) {
		struct mm_struct *mm;
		struct signal_struct *sig;
		int oom_adj;

		task_lock(p);
		mm = p->mm;
		sig = p->signal;
		if (!mm || !sig) {
			task_unlock(p);
			continue;
		}
		oom_adj = sig->oom_adj;
		if (oom_adj < min_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_adj < selected_oom_adj)
				continue;
			if (oom_adj == selected_oom_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_adj = oom_adj;
		lowmem_print(2, "select %d ,  %d,  %d, \n",
			     p->pid,  oom_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "send  to %d ,  %d, %d\n",
			     selected->pid, 
			     selected_oom_adj, selected_tasksize);
		lowmem_deathpending = selected;
		lowmem_deathpending_timeout = jiffies + HZ;
		force_sig(SIGKILL, selected);
		rem -= selected_tasksize;
	}
#ifdef SEC_ADJUST_LMK
	else
		rem = -1;
#endif
	lowmem_print(4, "lowmem_shrink %d, %x, return %d\n",
		     nr_to_scan, gfp_mask, rem);
	read_unlock(&tasklist_lock);
	return rem;
}
コード例 #23
0
ファイル: sit.c プロジェクト: mikuhatsune001/linux2.6.32
static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
				struct ip_tunnel_prl __user *a)
{
	struct ip_tunnel_prl kprl, *kp;
	struct ip_tunnel_prl_entry *prl;
	unsigned int cmax, c = 0, ca, len;
	int ret = 0;

	if (copy_from_user(&kprl, a, sizeof(kprl)))
		return -EFAULT;
	cmax = kprl.datalen / sizeof(kprl);
	if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
		cmax = 1;

	/* For simple GET or for root users,
	 * we try harder to allocate.
	 */
	kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
		NULL;

	read_lock(&ipip6_lock);

	ca = t->prl_count < cmax ? t->prl_count : cmax;

	if (!kp) {
		/* We don't try hard to allocate much memory for
		 * non-root users.
		 * For root users, retry allocating enough memory for
		 * the answer.
		 */
		kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
		if (!kp) {
			ret = -ENOMEM;
			goto out;
		}
	}

	c = 0;
	for (prl = t->prl; prl; prl = prl->next) {
		if (c >= cmax)
			break;
		if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
			continue;
		kp[c].addr = prl->addr;
		kp[c].flags = prl->flags;
		c++;
		if (kprl.addr != htonl(INADDR_ANY))
			break;
	}
out:
	read_unlock(&ipip6_lock);

	len = sizeof(*kp) * c;
	ret = 0;
	if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
		ret = -EFAULT;

	kfree(kp);

	return ret;
}
コード例 #24
0
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
{
	read_lock(&rxrpc_connection_lock);
	return seq_list_start_head(&rxrpc_connections, *_pos);
}
コード例 #25
0
/**
 * Enforce quota, it's called in the declaration of each operation.
 * qsd_op_end() will then be called later once all the operations have been
 * completed in order to release/adjust the quota space.
 *
 * \param env   - the environment passed by the caller
 * \param qsd   - is the qsd instance associated with the device in charge of
 *                the operation.
 * \param trans - is the quota transaction information
 * \param qi    - qid & space required by current operation
 * \param flags - if the operation is write, return caller no user/group and
 *                sync commit flags
 *
 * \retval 0            - success
 * \retval -EDQUOT      - out of quota
 * \retval -EINPROGRESS - inform client to retry write
 * \retval -ve          - other appropriate errors
 */
int qsd_op_begin(const struct lu_env *env, struct qsd_instance *qsd,
		 struct lquota_trans *trans, struct lquota_id_info *qi,
		 int *flags)
{
	int	i, rc;
	bool	found = false;
	ENTRY;

	if (unlikely(qsd == NULL))
		RETURN(0);

	/* We don't enforce quota until the qsd_instance is started */
	read_lock(&qsd->qsd_lock);
	if (!qsd->qsd_started) {
		read_unlock(&qsd->qsd_lock);
		RETURN(0);
	}
	read_unlock(&qsd->qsd_lock);

	/* ignore block quota on MDTs, ignore inode quota on OSTs */
	if ((!qsd->qsd_is_md && !qi->lqi_is_blk) ||
	    (qsd->qsd_is_md && qi->lqi_is_blk))
		RETURN(0);

	/* ignore quota enforcement request when:
	 *    - quota isn't enforced for this quota type
	 * or - the user/group is root */
	if (!qsd_type_enabled(qsd, qi->lqi_type) || qi->lqi_id.qid_uid == 0)
		RETURN(0);

	LASSERTF(trans->lqt_id_cnt <= QUOTA_MAX_TRANSIDS, "id_cnt=%d",
		 trans->lqt_id_cnt);
	/* check whether we already allocated a slot for this id */
	for (i = 0; i < trans->lqt_id_cnt; i++) {
		if (qid_equal(qi, &trans->lqt_ids[i])) {
			found = true;
			/* make sure we are not mixing inodes & blocks */
			LASSERT(trans->lqt_ids[i].lqi_is_blk == qi->lqi_is_blk);
			break;
		}
	}

	if (!found) {
		if (unlikely(i >= QUOTA_MAX_TRANSIDS)) {
			CERROR("%s: more than %d qids enforced for a "
			       "transaction?\n", qsd->qsd_svname, i);
			RETURN(-EINVAL);
		}

		/* fill new slot */
		trans->lqt_ids[i].lqi_id     = qi->lqi_id;
		trans->lqt_ids[i].lqi_type   = qi->lqi_type;
		trans->lqt_ids[i].lqi_is_blk = qi->lqi_is_blk;
		trans->lqt_id_cnt++;
	}

	/* manage quota enforcement for this ID */
	rc = qsd_op_begin0(env, qsd->qsd_type_array[qi->lqi_type],
			   &trans->lqt_ids[i], qi->lqi_space, flags);
	RETURN(rc);
}
コード例 #26
0
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *p;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	int min_adj = OOM_ADJUST_MAX + 1;
	int selected_tasksize = 0;
	int selected_oom_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES);
	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);
	struct zone *zone;

	if (offlining) {
		/* Discount all free space in the section being offlined */
		for_each_zone(zone) {
			 if (zone_idx(zone) == ZONE_MOVABLE) {
				other_free -= zone_page_state(zone,
						NR_FREE_PAGES);
				lowmem_print(4, "lowmem_shrink discounted "
					"%lu pages in movable zone\n",
					zone_page_state(zone, NR_FREE_PAGES));
			}
		}
	}
	/*
	 * If we already have a death outstanding, then
	 * bail out right away; indicating to vmscan
	 * that we have nothing further to offer on
	 * this pass.
	 *
	 */
	if (lowmem_deathpending &&
	    time_before_eq(jiffies, lowmem_deathpending_timeout))
	{
		return 0;
	}

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
			     sc->nr_to_scan, sc->gfp_mask, other_free, other_file,
			     min_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) {
		lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
		return rem;
	}
	selected_oom_adj = min_adj;

	read_lock(&tasklist_lock);
	for_each_process(p) {
		struct mm_struct *mm;
		struct signal_struct *sig;
		int oom_adj;

		task_lock(p);
		mm = p->mm;
		sig = p->signal;
		if (!mm || !sig) {
			task_unlock(p);
			continue;
		}
		oom_adj = sig->oom_adj;
		if (oom_adj < min_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_adj < selected_oom_adj)
				continue;
			if (oom_adj == selected_oom_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_adj = oom_adj;
		 //p10582 2012/05/23 : 1 sec time 전에 메모리 확보 실패 한 경우 예외 처리.
		if(lowmem_deathpending && selected != lowmem_deathpending)
		{
		   if(selected_oom_adj > 5){
				force_sig(SIGKILL, selected);
				lowmem_print(1, "time out send sigkill to %d (%s), adj %d, size %d ****\n",
					 selected->pid, selected->comm,
					 selected_oom_adj, selected_tasksize);
				selected=NULL;
				continue;
		   }
		}
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_adj, selected_tasksize);
		lowmem_deathpending = selected;
		//p10582 2012/05/23 : 1 sec time 전에 메모리 확보 실패 한 경우 문제 되기 때문에  300 ms 로 조정 함.
		lowmem_deathpending_timeout = jiffies + (3*HZ/10);
		//lowmem_deathpending_timeout = jiffies + HZ;
		
		force_sig(SIGKILL, selected);
		rem -= selected_tasksize;
	}
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	read_unlock(&tasklist_lock);
	return rem;
}
コード例 #27
0
static int try_to_freeze_tasks(bool sig_only)
{
	struct task_struct *g, *p;
	unsigned long end_time;
	unsigned int todo;
	struct timeval start, end;
	u64 elapsed_csecs64;
	unsigned int elapsed_csecs;
	unsigned int wakeup = 0;

	do_gettimeofday(&start);

	end_time = jiffies + TIMEOUT;
	do {
		todo = 0;
		read_lock(&tasklist_lock);
		do_each_thread(g, p) {
			if (frozen(p) || !freezeable(p))
				continue;

			if (!freeze_task(p, sig_only))
				continue;

			/*
			 * Now that we've done set_freeze_flag, don't
			 * perturb a task in TASK_STOPPED or TASK_TRACED.
			 * It is "frozen enough".  If the task does wake
			 * up, it will immediately call try_to_freeze.
			 */
			if (!task_is_stopped_or_traced(p) &&
			    !freezer_should_skip(p))
				todo++;
		} while_each_thread(g, p);
		read_unlock(&tasklist_lock);
		yield();			/* Yield is okay here */
		if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) {
			wakeup = 1;
			break;
		}
		if (time_after(jiffies, end_time))
			break;
	} while (todo);

	do_gettimeofday(&end);
	elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
	do_div(elapsed_csecs64, NSEC_PER_SEC / 100);
	elapsed_csecs = elapsed_csecs64;

	if (todo) {
		/* This does not unfreeze processes that are already frozen
		 * (we have slightly ugly calling convention in that respect,
		 * and caller must call thaw_processes() if something fails),
		 * but it cleans up leftover PF_FREEZE requests.
		 */
		printk("\n");
		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
				"(%d tasks refusing to freeze):\n",
				wakeup ? "aborted" : "failed",
				elapsed_csecs / 100, elapsed_csecs % 100, todo);
		if(!wakeup)
			show_state();
		read_lock(&tasklist_lock);
		do_each_thread(g, p) {
			task_lock(p);
			if (freezing(p) && !freezer_should_skip(p) &&
							elapsed_csecs > 100)
				printk(KERN_ERR " %s\n", p->comm);
			cancel_freezing(p);
			task_unlock(p);
		} while_each_thread(g, p);
		read_unlock(&tasklist_lock);
	} else {
コード例 #28
0
int br_handle_frame(struct sk_buff *skb)
{
	struct net_bridge *br;
	unsigned char *dest;
	struct net_bridge_port *p;

	dest = skb->mac.ethernet->h_dest;

	p = skb->dev->br_port;
	if (p == NULL)
		goto err_nolock;

	br = p->br;
	read_lock(&br->lock);
	if (skb->dev->br_port == NULL)
		goto err;

	if (!(br->dev.flags & IFF_UP) ||
	    p->state == BR_STATE_DISABLED)
		goto err;

	if (skb->mac.ethernet->h_source[0] & 1)
		goto err;

	if (p->state == BR_STATE_LEARNING ||
	    p->state == BR_STATE_FORWARDING)
		br_fdb_insert(br, p, skb->mac.ethernet->h_source, 0);

	if (br->stp_enabled &&
	    !memcmp(dest, bridge_ula, 5) &&
	    !(dest[5] & 0xF0))
		goto handle_special_frame;

	if (p->state == BR_STATE_FORWARDING) {
		if (br_should_route_hook && br_should_route_hook(&skb)) {
			read_unlock(&br->lock);
			return -1;
		}

		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
			br_handle_frame_finish);
		read_unlock(&br->lock);
		return 0;
	}

err:
	read_unlock(&br->lock);
err_nolock:
	kfree_skb(skb);
	return 0;

handle_special_frame:
	if (!dest[5]) {
		br_stp_handle_bpdu(skb);
		read_unlock(&br->lock);
		return 0;
	}

	read_unlock(&br->lock);
	kfree_skb(skb);
	return 0;
}
コード例 #29
0
static int android_oom_handler(struct notifier_block *nb,
				      unsigned long val, void *data)
{
	struct task_struct *tsk;
#ifdef MULTIPLE_OOM_KILLER
	struct task_struct *selected[OOM_DEPTH] = {NULL,};
#else
	struct task_struct *selected = NULL;
#endif
	int rem = 0;
	int tasksize;
	int i;
	int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
#ifdef MULTIPLE_OOM_KILLER
	int selected_tasksize[OOM_DEPTH] = {0,};
	int selected_oom_score_adj[OOM_DEPTH] = {OOM_ADJUST_MAX,};
	int all_selected_oom = 0;
	int max_selected_oom_idx = 0;
#else
	int selected_tasksize = 0;
	int selected_oom_score_adj;
#endif
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO
	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL/5, 1);
#endif

	unsigned long *freed = data;
#if defined(CONFIG_CMA_PAGE_COUNTING)
	unsigned long nr_cma_free;
	unsigned long nr_cma_inactive_file;
	unsigned long nr_cma_active_file;
	int other_free;
	int other_file;

	nr_cma_free = global_page_state(NR_FREE_CMA_PAGES);
	other_free = global_page_state(NR_FREE_PAGES) - nr_cma_free;

	nr_cma_inactive_file = global_page_state(NR_CMA_INACTIVE_FILE);
	nr_cma_active_file = global_page_state(NR_CMA_ACTIVE_FILE);
	other_file = global_page_state(NR_FILE_PAGES) -
					global_page_state(NR_SHMEM) -
					total_swapcache_pages -
					nr_cma_inactive_file -
					nr_cma_active_file;
#endif

	/* show status */
	pr_warning("%s invoked Android-oom-killer: "
		"oom_adj=%d, oom_score_adj=%d\n",
		current->comm, current->signal->oom_adj,
		current->signal->oom_score_adj);
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO
	if (__ratelimit(&oom_rs)) {
		dump_stack();
		show_mem(SHOW_MEM_FILTER_NODES);
		dump_tasks_info();
	}
#endif

	min_score_adj = 0;
#ifdef MULTIPLE_OOM_KILLER
	for (i = 0; i < OOM_DEPTH; i++)
		selected_oom_score_adj[i] = min_score_adj;
#else
	selected_oom_score_adj = min_score_adj;
#endif

	read_lock(&tasklist_lock);
	for_each_process(tsk) {
		struct task_struct *p;
		int oom_score_adj;
#ifdef MULTIPLE_OOM_KILLER
		int is_exist_oom_task = 0;
#endif

		if (tsk->flags & PF_KTHREAD)
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		oom_score_adj = p->signal->oom_score_adj;
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(p->mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;

		lowmem_print(2, "oom: ------ %d (%s), adj %d, size %d\n",
			     p->pid, p->comm, oom_score_adj, tasksize);
#ifdef MULTIPLE_OOM_KILLER
		if (all_selected_oom < OOM_DEPTH) {
			for (i = 0; i < OOM_DEPTH; i++) {
				if (!selected[i]) {
					is_exist_oom_task = 1;
					max_selected_oom_idx = i;
					break;
				}
			}
		} else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj ||
			(selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj &&
			selected_tasksize[max_selected_oom_idx] < tasksize)) {
			is_exist_oom_task = 1;
		}

		if (is_exist_oom_task) {
			selected[max_selected_oom_idx] = p;
			selected_tasksize[max_selected_oom_idx] = tasksize;
			selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj;

			if (all_selected_oom < OOM_DEPTH)
				all_selected_oom++;

			if (all_selected_oom == OOM_DEPTH) {
				for (i = 0; i < OOM_DEPTH; i++) {
					if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx])
						max_selected_oom_idx = i;
					else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] &&
						selected_tasksize[i] < selected_tasksize[max_selected_oom_idx])
						max_selected_oom_idx = i;
				}
			}

			lowmem_print(2, "oom: max_selected_oom_idx(%d) select %d (%s), adj %d, \
					size %d, to kill\n",
				max_selected_oom_idx, p->pid, p->comm, oom_score_adj, tasksize);
		}
#else
		if (selected) {
			if (oom_score_adj < selected_oom_score_adj)
				continue;
			if (oom_score_adj == selected_oom_score_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_score_adj = oom_score_adj;
		lowmem_print(2, "oom: select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_score_adj, tasksize);
#endif
	}
#ifdef MULTIPLE_OOM_KILLER
	for (i = 0; i < OOM_DEPTH; i++) {
		if (selected[i]) {
#if defined(CONFIG_CMA_PAGE_COUNTING)
			lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, "
				"size %d ofree %d ofile %d "
				"cma_free %lu cma_i_file %lu cma_a_file %lu\n",
				selected[i]->pid, selected[i]->comm,
				selected_oom_score_adj[i],
				selected_tasksize[i],
				other_free, other_file,
				nr_cma_free, nr_cma_inactive_file, nr_cma_active_file);
#else
			lowmem_print(1, "oom: send sigkill to %d (%s), adj %d,\
				     size %d\n",
				     selected[i]->pid, selected[i]->comm,
				     selected_oom_score_adj[i],
				     selected_tasksize[i]);
#endif
			send_sig(SIGKILL, selected[i], 0);
			rem -= selected_tasksize[i];
			*freed += (unsigned long)selected_tasksize[i];
#ifdef OOM_COUNT_READ
			oom_count++;
#endif

		}
	}
#else
	if (selected) {
		lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_score_adj, selected_tasksize);
		send_sig(SIGKILL, selected, 0);
		set_tsk_thread_flag(selected, TIF_MEMDIE);
		rem -= selected_tasksize;
		*freed += (unsigned long)selected_tasksize;
#ifdef OOM_COUNT_READ
		oom_count++;
#endif
	}
#endif
	read_unlock(&tasklist_lock);

	lowmem_print(2, "oom: get memory %lu", *freed);
	return rem;
}
コード例 #30
0
ファイル: fig14.12.c プロジェクト: chenjianlong/books-code
int main (int argc, char *argv[])
{
	int			fd;
	pid_t		pid;
	char		buf[5];
	struct stat	statbuf;

	if (argc != 2) {
		fprintf (stderr, "usage: %s filename\n", argv[0]);
		exit (1);
	}

	if ((fd = open (argv[1], O_RDWR | O_CREAT | O_TRUNC, FILE_MODE)) < 0) {
		err_sys ("open error");
	}
	if (write (fd, "abcdef", 6) != 6) {
		err_sys ("write error");
	}

	/* trun on set-group-ID and turn off group-execute */
	if (fstat (fd, &statbuf) < 0) {
		err_sys ("fstat error");
	}
	if (fchmod (fd, (statbuf.st_mode & ~S_IXGRP) | S_ISGID) < 0) {
		err_sys ("fchmod error");
	}

	TELL_WAIT ();

	if ((pid = fork ()) < 0) {
		err_sys ("fork error");
	} else if (pid > 0) {		/* parent */
		/* write lock entire file */
		if (write_lock (fd, 0, SEEK_SET, 0) < 0) {
			err_sys ("write_lock error");
		}

		TELL_CHILD (pid);

		if (waitpid (pid, NULL, 0) < 0) {
			err_sys ("waitpid error");
		}
	} else {					/* child */
		WAIT_PARENT ();			/* wait for parent to set lock */

		set_fl (fd, O_NONBLOCK);

		/* first let's see what error we get if region is locked */
		if (read_lock (fd, 0, SEEK_SET, 0) != -1) {	/* no wait */
			err_sys ("child: read lock succeeded");
		}
		printf ("read_lock of already-locked region returns %d\n", errno);

		/* now try to read the mandatory locked file */
		if (lseek (fd, 0, SEEK_SET) == -1) {
			err_sys ("lseek error");
		}
		if (read (fd, buf, 2) < 0) {
			err_ret ("read failed (mandatory locking works)");
		} else {
			printf ("read OK (no mandatory locking), buf = %2.2s\n", buf);
		}
	}
	exit (0);
}