`
sunzixun
  • 浏览: 74767 次
  • 性别: Icon_minigender_1
  • 来自: 苏州
社区版块
存档分类
最新评论

<Linux Network 2.6.38> L2-L3

阅读更多

 

__netif_receive_skb 是一个很关键的函数 ,可以看成L2-L3 的分水岭(如果该协议需要到L3的话)

 

net_rx_action 做完了之后基本上 

 

struct sk_buff

{

        //... ... ...

        unsigned short  protocol;

       // ... ... ...

}; 就已经被设置了

 

 

在看 __netif_receive_skb 之前 先看一下这几个东西

 

  这是网络协议解包的主要注册结构体

 

 

struct net_protocol {
	int			(*handler)(struct sk_buff *skb);
	void			(*err_handler)(struct sk_buff *skb, u32 info);
	int			(*gso_send_check)(struct sk_buff *skb);
	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
					       int features);
	struct sk_buff	      **(*gro_receive)(struct sk_buff **head,
					       struct sk_buff *skb);
	int			(*gro_complete)(struct sk_buff *skb);
	unsigned int		no_policy:1,
				netns_ok:1;
};

 

 

他们会用一个 hash 链表链接起来

 

 

#define PTYPE_HASH_SIZE (16)

#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)

 


static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

 

 

kernel 写道
Why 16. Because with 16 the only overlap we get on a hash of the
low nibble of the protocol value is RARP/SNAP/X.25.

 

还有一个可调文件系统接口

 

 

netdev_tstamp_prequeue

----------------------

 

If set to 0, RX packet timestamps can be sampled after RPS processing, when

the target CPU processes packets. It might give some delay on timestamps, but

permit to distribute the load on several cpus.

 

If set to 1 (default), timestamps are sampled as soon as possible, before

queueing.

/sys/net/core/netdev_tstamp_prequeue

 

 

由于这个函数涉及很多特殊协议的处理,vlan实现我也只知道皮毛,这里只简单介绍一下

 

static int __netif_receive_skb(struct sk_buff *skb)
{
    struct packet_type *ptype, *pt_prev;
    rx_handler_func_t *rx_handler;
    struct net_device *orig_dev;
    struct net_device *master;
    struct net_device *null_or_orig;
    struct net_device *orig_or_bond;
    int ret = NET_RX_DROP;
    __be16 type;
    /*如果设置了 可能会有一些timestamps的延迟,默认是!1 ,不启用*/
    if (!netdev_tstamp_prequeue)
        net_timestamp_check(skb);
    trace_netif_receive_skb(skb);


    /*netpoll 需要处理这个帧吗,要的话用netpoll_rx 处理*/
    if (netpoll_receive_skb(skb))
        return NET_RX_DROP;

    /*赋值设备的接口序号*/
    if (!skb->skb_iif)
        skb->skb_iif = skb->dev->ifindex;
/*决定包的命运和走向*/
 null_or_orig = NULL;
    orig_dev = skb->dev;
    master = ACCESS_ONCE(orig_dev->master);

    if (skb->deliver_no_wcard)
        null_or_orig = orig_dev;
    else if (master) {
        if (skb_bond_should_drop(skb, master)) {
            skb->deliver_no_wcard = 1;
            null_or_orig = orig_dev; /* deliver only exact match */
        } else
            skb->dev = master;
    }
 /*为L3的处理,校准相应的指针和赋值*/
    __this_cpu_inc(softnet_data.processed);
    skb_reset_network_header(skb);
    skb_reset_transport_header(skb);
    skb->mac_len = skb->network_header - skb->mac_header;



    //...
    /* 处理 bridge or macvlan 的情况*/
    //...

    /*这里就是主要的根据 注册了的协议处理函数 去调用处理 deliver_skb()*/
    type = skb->protocol;
    list_for_each_entry_rcu(ptype,
            &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
        if (ptype->type == type && (ptype->dev == null_or_orig ||
             ptype->dev == skb->dev || ptype->dev == orig_dev ||
             ptype->dev == orig_or_bond)) {
            if (pt_prev)
                ret = deliver_skb(skb, pt_prev, orig_dev);
            pt_prev = ptype;
        }
    }

    if (pt_prev) {
        ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    } else {
        atomic_long_inc(&skb->dev->rx_dropped);
        kfree_skb(skb);
        ret = NET_RX_DROP;
    }

out:
    rcu_read_unlock();
    return ret;
}
 

 

INET TCP/IP协议实现的linux版本

INET is implemented using the  BSD Socket

 * interface as the means of communication with the user level.

 

 

INET里面可以看到 

fs_initcall(inet_init);

 

 

 

Protocol.h:

struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];

#define MAX_INET_PROTOS 256

 

 

 

static int __init inet_init(void)
{
	struct sk_buff *dummy_skb;
	struct inet_protosw *q;
	struct list_head *r;
	int rc = -EINVAL;
	

	/*保存端口的位图结构 
	*Inet_connection_sock.c : unsigned long *sysctl_local_reserved_ports;*/
	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
	if (!sysctl_local_reserved_ports)
		goto out;

	/*socket 层 到 transport 层 接口的注册<struct proto >,都是BSD接口		*<accept,setsockopt,recvmsg......>*/
	rc = proto_register(&tcp_prot, 1);
	if (rc)
		goto out_free_reserved_ports;

	rc = proto_register(&udp_prot, 1);
	if (rc)
		goto out_unregister_tcp_proto;

	rc = proto_register(&raw_prot, 1);
	if (rc)
		goto out_unregister_udp_proto;

	/*PF_INET 协议族的注册<struct net_proto_family> */
	
	(void)sock_register(&inet_family_ops);

#ifdef CONFIG_SYSCTL
	/*文件系统接口*/
	ip_static_sysctl_init();
#endif

	/*
	 *	Add all the base protocols.
	 */
	/*直接用了cmpxchg 指令来把协议设置到 inet_protos[256]对应位中*/
	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICAST
	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif

	/* Register the socket-side information for inet_create. */
	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
		INIT_LIST_HEAD(r);

	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
		inet_register_protosw(q);

	/*
	 *	Set the ARP module up
	 */

	arp_init();

	/*
	 *	Set the IP module up
	 */

	ip_init();

	tcp_v4_init();

	/* Setup TCP slab cache for open requests. */
	tcp_init();

	/* Setup UDP memory threshold */
	udp_init();

	/* Add UDP-Lite (RFC 3828) */
	udplite4_register();

	/*
	 *	Set the ICMP layer up
	 */

	if (icmp_init() < 0)
		panic("Failed to create the ICMP control socket.\n");

	/*
	 *	Initialise the multicast router
	 */
#if defined(CONFIG_IP_MROUTE)
	if (ip_mr_init())
		printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif
	/*
	 *	Initialise per-cpu ipv4 mibs
	 */

	if (init_ipv4_mibs())
		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");

	ipv4_proc_init();

	ipfrag_init();
	/*这里用 ptype_head() 在 ptype_base[16] (如果设置了ETH_P_ALL 就在	*ptype_all)里面设置相应位 */
	dev_add_pack(&ip_packet_type);

	rc = 0;
out:
	return rc;
out_unregister_udp_proto:
	proto_unregister(&udp_prot);
out_unregister_tcp_proto:
	proto_unregister(&tcp_prot);
out_free_reserved_ports:
	kfree(sysctl_local_reserved_ports);
	goto out;
}

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
	if (pt->type == htons(ETH_P_ALL))
		return &ptype_all;
	else
		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
 

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics