这个函数在2.6.35之后改动还是很大的
先回顾几个和 可调文件系统有关的 变量,下面的代码里面要提到用处
netdev_max_backlog
------------------
Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
The input queue is managed by softnet_data->input_pkt_queue. Each input queue has a maximum length given by the global variable netdev_max_backlog, whose value is 300. This means that each CPU can have up to 300 frames in its input queue waiting to be processed, regardless of the number of devices in the system.[*]
net.core.netdev_max_backlog = 300000
偷偷的看一眼在什么地方
int netif_rx(struct sk_buff *skb){
//...
enqueue_to_backlog(skb, get_cpu(), &qtail);
//....
}
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
//....
if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
if (skb_queue_len(&sd->input_pkt_queue)) {
enqueue:
/...
}
goto enqueue;
}
sd->dropped++;
//...
}
netdev_budget
----------------------
每个设备在一次poll过程中 可以处理的最大packets
Maximum number of packets taken from all interfaces in one polling cycle (NAPI
poll). In one polling cycle interfaces which are registered to polling are
probed in a round-robin manner. The limit of packets in one such probe can be
set per-device via sysfs class/net/<device>/weight .
weight_p
-----------------
我换了个名字 ,够重要吧 ,记住也是一个 per CPU -V /proc/sys/net/core*
dev_weight - INTEGER
The maximum number of packets that kernel can handle on a NAPI
interrupt, it's a Per-CPU variable.
Default: 64
继续偷看一下这里
static int __init net_dev_init(void)
{
//...
#ifdef CONFIG_RPS
sd->csd.func = rps_trigger_softirq;
sd->csd.info = sd;
sd->csd.flags = 0;
sd->cpu = i;
#endif
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
sd->backlog.gro_list = NULL;
sd->backlog.gro_count = 0;
//...
}
好了 相信你还记得吧
static int __init net_dev_init(void){
/*注册的软中断行为吗*/
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
}
原文这个不变:
Frames can wait in two places for net_rx_action to process them:
A shared CPU-specific queue
Non-NAPI devices' interrupt handlers, which call netif_rx, place frames into the softnet_data->input_pkt_queue of the CPU on which the interrupt handlers run.
由 netif_rx 处理过 放在 softnet_data->input_pkt_queue 的代码
Device memory
The poll method used by NAPI drivers extracts frames directly from the device
在看一下和 napi调度有关的标志
enum {
NAPI_STATE_SCHED, /* Poll is scheduled */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
};
struct napi_struct synchronization rules
========================================
napi->poll:
Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
driver's dev->close method will invoke napi_disable() on
all NAPI instances which will do a sleeping poll on the
NAPI_STATE_SCHED napi->state bit, waiting for all pending
NAPI activity to cease.
Context: softirq
will be called with interrupts disabled by netconsole.
明白了吗 ,设备驱动 如果调用dev->close(stop napi)方法 就会引起
NAPI_STATE_DISABLE 被 (napi_disable)设置, 这是外部做的
一般是 struct net_device_ops 里面的
::
* int (*ndo_stop)(struct net_device *dev);
* This function is called when network device transistions to the down
* state.
下面就是主要的 net_rx_action
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
local_irq_disable();
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
/* If softirq window is exhuasted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
/*当前可用的配额如果用完了,就直接去增加需要的time_squeeze*/
if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
goto softnet_break;
/*虽然打开了 中断,但是他只会在 poll_list队尾加, 而poll 只在队首处理<又一个避免锁的方法>*/
local_irq_enable();
/*得到 softnet_data中挂着的struct napi_struc*/
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
/*锁定该 struct napi_struc ,并且记录当前调度的CPU*/
have = netpoll_poll_lock(n);
weight = n->weight;
/*检查 NAPI_STATE_SCHED位 ,避免在挂接,移除链表时候的竞争,有点 while(flag) pthread_con_wait; 的味道*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
trace_napi_poll(n);
}
WARN_ON_ONCE(work > weight);
/*消耗一个budget*/
budget -= work;
local_irq_disable();
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
* still "owns" the NAPI instance and therefore can
* move the instance around on the list at-will.
*/
/*如果队列中包和能处理的包 数目一样, 意味着要消耗光整个weight*/
if (unlikely(work == weight)) {
/*如果设置了NAPI_STATE_DISABLE */
if (unlikely(napi_disable_pending(n))) {
local_irq_enable();
/*既然不让干活了, 就洗洗闪人吧*/
napi_complete(n);
local_irq_disable();
} else
/*还有活干 就先把napi poll_list 移到 poll_list末尾*/
list_move_tail(&n->poll_list, &sd->poll_list);
}
netpoll_poll_unlock(have);
}
out:
/*这个函数末尾的时候看一下 ,记住是开启RPS 后生效*/
net_rps_action_and_irq_enable(sd);
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
/*看一下这个就行了
*tcp_dma_copybreak - INTEGER
*Lower limit, in bytes, of the size of socket reads that will be
*offloaded to a DMA copy engine, if one is present in the system
*and CONFIG_NET_DMA is enabled.
*Default: 4096*/
dma_issue_pending_all();
#endif
return;
softnet_break:
/*等待下一次的调度处理未处理的buffer,NET_RX_SOFTIRQ很快就要开始咯*/
sd->time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
static void net_rps_action_and_irq_enable(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
struct softnet_data *remsd = sd->rps_ipi_list;
/*专属队列rps_ipi_list 上面已经有了buffer要处理*/
if (remsd) {
sd->rps_ipi_list = NULL;
local_irq_enable();
/* Send pending IPI's to kick RPS processing on remote cpus. */
while (remsd) {
struct softnet_data *next = remsd->rps_ipi_next;
/*还记得我们在net_dev_init中存过吗*/
if (cpu_online(remsd->cpu))
/*记得csd把 就是那个struct softnet_data中的
*struct call_single_data csd ____cacheline_aligned_in_smp;
*<在SMP架构中用于结构体对齐>
*/
/*当你的函数希望指定CPU去运行,就需要用这个函数
*写下去就要涉及整个SMP.c 了,只要知道这里用指定的负载CPU执行了*rps_trigger_softirq;*/
__smp_call_function_single(remsd->cpu,
&remsd->csd, 0);
remsd = next;
}
} else
#endif
local_irq_enable();
}
而
static void rps_trigger_softirq(void *data)
{
struct softnet_data *sd = data;
____napi_schedule(sd, &sd->backlog);
sd->received_rps++;
}
而
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
是不是有点搞笑。。。
好了回过头去 看看 那个napi的 poll吧
同样你还记得 net_dev_init 里面的
sd->backlog.poll = process_backlog;吧
这是一个通用的 process_backlog ,有的驱动程序这个注册函数会自己写napi
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
/*根据struct napi_struct 找到 per CPU-V struct softnet_data*/
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
#ifdef CONFIG_RPS
/*先看看是不是rps_ipi_list上面已经有东西了,如果有就赶紧处理掉*/
if (sd->rps_ipi_list) {
local_irq_disable();
net_rps_action_and_irq_enable(sd);
}
#endif
napi->weight = weight_p;
local_irq_disable();
/*当前的quota 还有没到上限*/
while (work < quota) {
struct sk_buff *skb;
unsigned int qlen;
/*挨个的出队列接受L2->L3处理*/
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
__netif_receive_skb(skb);
local_irq_disable();
/*该CPU struct napi_struct 中处理计数增加,用于CPU负载计算*/
input_queue_head_incr(sd);
/*随时检查上限*/
if (++work >= quota) {
local_irq_enable();
return work;
}
}
/*把该CPU 的softnet_data ->input_pkt_queue 保护起来 */
rps_lock(sd);
/*处理完之后 就直接把process_queue队列链接到 input_pkt_queue上去 等待后面的机会处理*/
qlen = skb_queue_len(&sd->input_pkt_queue);
if (qlen)
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
/*如果活全部干完了*/
if (qlen < quota - work) {
/*因为只有当前的cpu 会操作这个napi结构, 所以取消NAPI_STATE_SCHED
*标志, 删除napi poll_list队列都是安全的 也不需要 smp_mb() */
list_del(&napi->poll_list);
napi->state = 0;
quota = work + qlen;
}
rps_unlock(sd);
}
local_irq_enable();
/*返回用掉了的work数*/
return work;
}
分享到:
相关推荐
原创的友善之臂的mini6410 linux-2.6.38内核的uart1 串口驱动,使用platform_device方式,压缩包里面有驱动源代码、编译好了的ko文件、使用说明文档、用户例程的源代码和可执行程序,但是并没有给出直接编译驱动和...
Mini6410 Kernel-patch Linux-2.6.38
ubuntu编译ARM内核linux-2.6.38所需的工具
这是我在ubuntu10.10编译内核2.6.38的过程,编译过程自己又重新把过程整成书面形式,感兴趣的自己可以下去看看。
基于 linux-2.6.38 内核的嵌入式驱动常用的函数调用
开发linux系统必备,在linux下可以方便的进行裁剪和定制
最新的Linux2.6操作系统,Linux-2.6.38.4,加入了新的功能,下载体验吧
基于FS2410的开发板,移植CS8900A,包括移植文档和源码。
讲解linux2.6.38内核驱动常用函数的常用函数,对理解内核的使用有帮助
linux-2.6.21 内核 基于arm_s3c2440
对2.6.38内核中wifi驱动中出现的SD卡热插拔,wifi驱动进行了一个初步的分析.有什么不对的地方,还希望高手指点!
erver01> tar xvf linux-2.6.38.tar.bz2 server01> cd linux-2.6.38 server01> make mrproper server01> cp ../linux-2.6.18-xxx/.config .config #此处为了防止遗漏某些模块的添加,将原来的内核配置文件拷贝过来...
linux v2.6.38 source, Linux device driver for RTL8192SU .
TQ2440的u_boot不能引导2.6.36以上的内核,用TQ2440的u_boot修改的,可以引导linux2.6.36以上的内核,同时兼容linux2.6.37以下老版本内核 用法: #make distclean #make Wamy2440_config #make 下载u-boot.bin后...
基于Tiny6410上移植_kernel-2.6.38,欢迎大家下载学习
详细介绍linux2.6.38.8 nand 与yaffs2 移植
linux-2.6.38-tiny6410:tiny6410开发板,Linux内核源码学习
Linux驱动程序刚接触,虽然不是很清楚,但是总归是慢慢学习的过程。我的环境是Fedora14虚拟机。内核版本是2.6.38.1,其中的实现过程存在很多的问题,主要是因为很多的内核函数发生了较大的差别.其中最大的可能是...
Linux简单的字符驱动例子,改自宋宝华的书,移植2.6.38成功 使用make modules编译,生成的ko放到tiny6410开发板,可以insmod和rmmod 此例是移植到了2.6.38,书上原有的例子不适合2.6.36以上。因为ioctl被改名了
Linux kernel-2.6.38.1-i686.cfg