add pf_ring patches for kernel and libpcap

git-svn-id: svn://svn.openwrt.org/openwrt/trunk/openwrt@2266 3c298f89-4303-0410-b956-a3cf2f4a3e73
author: nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73> 2005-10-22 22:03:56 +0000
committer: nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73> 2005-10-22 22:03:56 +0000
commit: 30bf34f05c0f66d9d9221f7ebfe9808024337dd4 (patch)
tree: bfaeed0ba28cc75ed562c61a7516fba097e78e13 /target/linux
parent: 2be847dc58145c4946781ebde7cda49ce0e5f099 (diff)
2 files changed, 11743 insertions, 0 deletions
diff --git a/target/linux/linux-2.4/patches/generic/223-pf_ring.patch b/target/linux/linux-2.4/patches/generic/223-pf_ring.patch
new file mode 100644
index 000000000..1235e1044
--- /dev/null
+++ b/target/linux/linux-2.4/patches/generic/223-pf_ring.patch
@@ -0,0 +1,6444 @@
+diff --unified --recursive --new-file linux-2.4.30/include/linux/ring.h linux-2.4.30-1-686-smp-ring3/include/linux/ring.h
+--- linux-2.4.30/include/linux/ring.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/include/linux/ring.h	2005-10-22 23:08:27.388011250 +0200
+@@ -0,0 +1,108 @@
++/*
++ * Definitions for packet ring
++ *
++ * 2004 - Luca Deri <deri@ntop.org>
++ */
++#ifndef __RING_H
++#define __RING_H
++
++
++#define INCLUDE_MAC_INFO
++
++#ifdef INCLUDE_MAC_INFO
++#define SKB_DISPLACEMENT    14 /* Include MAC address information */
++#else
++#define SKB_DISPLACEMENT    0  /* Do NOT include MAC address information */
++#endif
++
++#define RING_MAGIC
++#define RING_MAGIC_VALUE      0x88
++#define RING_FLOWSLOT_VERSION    5
++#define RING_VERSION          "3.0"
++
++#define SO_ADD_TO_CLUSTER        99
++#define SO_REMOVE_FROM_CLUSTER  100
++#define SO_SET_REFLECTOR        101
++
++/* *********************************** */
++
++#ifndef HAVE_PCAP
++struct pcap_pkthdr {
++  struct timeval ts;    /* time stamp */
++  u_int32_t caplen;     /* length of portion present */
++  u_int32_t len;        /* length this packet (off wire) */
++};
++#endif
++
++/* *********************************** */
++
++enum cluster_type {
++  cluster_per_flow = 0,
++  cluster_round_robin
++};
++
++/* *********************************** */
++
++#define RING_MIN_SLOT_SIZE    (60+sizeof(struct pcap_pkthdr))
++#define RING_MAX_SLOT_SIZE    (1514+sizeof(struct pcap_pkthdr))
++
++/* *********************************** */
++
++typedef struct flowSlotInfo {
++  u_int16_t version, sample_rate;
++  u_int32_t tot_slots, slot_len, tot_mem;
++  
++  u_int64_t tot_pkts, tot_lost;
++  u_int64_t tot_insert, tot_read;  
++  u_int16_t insert_idx;
++  u_int16_t remove_idx;
++} FlowSlotInfo;
++
++/* *********************************** */
++
++typedef struct flowSlot {
++#ifdef RING_MAGIC
++  u_char     magic;      /* It must alwasy be zero */
++#endif
++  u_char     slot_state; /* 0=empty, 1=full   */
++  u_char     bucket;     /* bucket[bucketLen] */
++} FlowSlot;
++
++/* *********************************** */
++
++#ifdef __KERNEL__ 
++
++FlowSlotInfo* getRingPtr(void);
++int allocateRing(char *deviceName, u_int numSlots,
++		 u_int bucketLen, u_int sampleRate);
++unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
++void deallocateRing(void);
++
++/* ************************* */
++
++typedef int (*handle_ring_skb)(struct sk_buff *skb,
++			       u_char recv_packet, u_char real_skb);
++extern handle_ring_skb get_skb_ring_handler(void);
++extern void set_skb_ring_handler(handle_ring_skb the_handler);
++extern void do_skb_ring_handler(struct sk_buff *skb,
++				u_char recv_packet, u_char real_skb);
++
++typedef int (*handle_ring_buffer)(struct net_device *dev, 
++				     char *data, int len);
++extern handle_ring_buffer get_buffer_ring_handler(void);
++extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
++extern int do_buffer_ring_handler(struct net_device *dev,
++				  char *data, int len);
++#endif /* __KERNEL__  */
++
++/* *********************************** */
++
++#define PF_RING          27      /* Packet Ring */
++#define SOCK_RING        PF_RING
++
++/* ioctl() */
++#define SIORINGPOLL      0x8888
++
++/* *********************************** */
++
++#endif /* __RING_H */
+diff --unified --recursive --new-file linux-2.4.30/include/net/sock.h linux-2.4.30-1-686-smp-ring3/include/net/sock.h
+--- linux-2.4.30/include/net/sock.h	2004-11-17 12:54:22.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/include/net/sock.h	2005-10-22 23:08:27.976048000 +0200
+@@ -699,6 +699,9 @@
+ #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
+ 		struct packet_opt	*af_packet;
+ #endif
++#if defined(CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	  struct ring_opt *pf_ring;
++#endif
+ #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
+ 		x25_cb			*x25;
+ #endif
+diff --unified --recursive --new-file linux-2.4.30/include/net/sock.h.ORG linux-2.4.30-1-686-smp-ring3/include/net/sock.h.ORG
+--- linux-2.4.30/include/net/sock.h.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/include/net/sock.h.ORG	2005-10-22 23:08:27.940045750 +0200
+@@ -0,0 +1,1400 @@
++/*
++ * INET		An implementation of the TCP/IP protocol suite for the LINUX
++ *		operating system.  INET is implemented using the  BSD Socket
++ *		interface as the means of communication with the user level.
++ *
++ *		Definitions for the AF_INET socket handler.
++ *
++ * Version:	@(#)sock.h	1.0.4	05/13/93
++ *
++ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
++ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
++ *		Florian La Roche <flla@stud.uni-sb.de>
++ *
++ * Fixes:
++ *		Alan Cox	:	Volatiles in skbuff pointers. See
++ *					skbuff comments. May be overdone,
++ *					better to prove they can be removed
++ *					than the reverse.
++ *		Alan Cox	:	Added a zapped field for tcp to note
++ *					a socket is reset and must stay shut up
++ *		Alan Cox	:	New fields for options
++ *	Pauline Middelink	:	identd support
++ *		Alan Cox	:	Eliminate low level recv/recvfrom
++ *		David S. Miller	:	New socket lookup architecture.
++ *              Steve Whitehouse:       Default routines for sock_ops
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ */
++#ifndef _SOCK_H
++#define _SOCK_H
++
++#include <linux/config.h>
++#include <linux/timer.h>
++#include <linux/cache.h>
++#include <linux/in.h>		/* struct sockaddr_in */
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++#include <linux/in6.h>		/* struct sockaddr_in6 */
++#include <linux/ipv6.h>		/* dest_cache, inet6_options */
++#include <linux/icmpv6.h>
++#include <net/if_inet6.h>	/* struct ipv6_mc_socklist */
++#endif
++
++#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
++#include <linux/icmp.h>
++#endif
++#include <linux/tcp.h>		/* struct tcphdr */
++#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
++#include <net/sctp/structs.h>	/* struct sctp_opt */
++#endif
++
++#include <linux/netdevice.h>
++#include <linux/skbuff.h>	/* struct sk_buff */
++#include <net/protocol.h>		/* struct inet_protocol */
++#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
++#include <net/x25.h>
++#endif
++#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
++#include <linux/if_wanpipe.h>
++#endif
++
++#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
++#include <net/ax25.h>
++#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
++#include <net/netrom.h>
++#endif
++#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
++#include <net/rose.h>
++#endif
++#endif
++
++#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
++#include <linux/if_pppox.h>
++#include <linux/ppp_channel.h>   /* struct ppp_channel */
++#endif
++
++#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
++#if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
++#include <net/spx.h>
++#else
++#include <net/ipx.h>
++#endif /* CONFIG_SPX */
++#endif /* CONFIG_IPX */
++
++#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
++#include <linux/atalk.h>
++#endif
++
++#if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
++#include <net/dn.h>
++#endif
++
++#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
++#include <net/irda/irda.h>
++#endif
++
++#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
++struct atm_vcc;
++#endif
++
++#ifdef CONFIG_FILTER
++#include <linux/filter.h>
++#endif
++
++#include <asm/atomic.h>
++#include <net/dst.h>
++
++
++/* The AF_UNIX specific socket options */
++struct unix_opt {
++	struct unix_address	*addr;
++	struct dentry *		dentry;
++	struct vfsmount *	mnt;
++	struct semaphore	readsem;
++	struct sock *		other;
++	struct sock **		list;
++	struct sock *		gc_tree;
++	atomic_t		inflight;
++	rwlock_t		lock;
++	wait_queue_head_t	peer_wait;
++};
++
++
++/* Once the IPX ncpd patches are in these are going into protinfo. */
++#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
++struct ipx_opt {
++	ipx_address		dest_addr;
++	ipx_interface		*intrfc;
++	unsigned short		port;
++#ifdef CONFIG_IPX_INTERN
++	unsigned char           node[IPX_NODE_LEN];
++#endif
++	unsigned short		type;
++/* 
++ * To handle special ncp connection-handling sockets for mars_nwe,
++ * the connection number must be stored in the socket.
++ */
++	unsigned short		ipx_ncp_conn;
++};
++#endif
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++struct ipv6_pinfo {
++	struct in6_addr 	saddr;
++	struct in6_addr 	rcv_saddr;
++	struct in6_addr		daddr;
++	struct in6_addr		*daddr_cache;
++
++	__u32			flow_label;
++	__u32			frag_size;
++	int			hop_limit;
++	int			mcast_hops;
++	int			mcast_oif;
++
++	/* pktoption flags */
++	union {
++		struct {
++			__u8	srcrt:2,
++			        rxinfo:1,
++				rxhlim:1,
++				hopopts:1,
++				dstopts:1,
++                                authhdr:1,
++                                rxflow:1;
++		} bits;
++		__u8		all;
++	} rxopt;
++
++	/* sockopt flags */
++	__u8			mc_loop:1,
++	                        recverr:1,
++	                        sndflow:1,
++	                        pmtudisc:2,
++				ipv6only:1;
++
++	struct ipv6_mc_socklist	*ipv6_mc_list;
++	struct ipv6_ac_socklist	*ipv6_ac_list;
++	struct ipv6_fl_socklist *ipv6_fl_list;
++	__u32			dst_cookie;
++
++	struct ipv6_txoptions	*opt;
++	struct sk_buff		*pktoptions;
++};
++
++struct raw6_opt {
++	__u32			checksum;	/* perform checksum */
++	__u32			offset;		/* checksum offset  */
++
++	struct icmp6_filter	filter;
++};
++
++#define __ipv6_only_sock(sk)	((sk)->net_pinfo.af_inet6.ipv6only)
++#define ipv6_only_sock(sk)	((sk)->family == PF_INET6 && \
++				 (sk)->net_pinfo.af_inet6.ipv6only)
++#else
++#define __ipv6_only_sock(sk)	0
++#define ipv6_only_sock(sk)	0
++#endif /* IPV6 */
++
++#if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE)
++struct raw_opt {
++	struct icmp_filter	filter;
++};
++#endif
++
++#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
++struct inet_opt
++{
++	int			ttl;			/* TTL setting */
++	int			tos;			/* TOS */
++	unsigned	   	cmsg_flags;
++	struct ip_options	*opt;
++	unsigned char		hdrincl;		/* Include headers ? */
++	__u8			mc_ttl;			/* Multicasting TTL */
++	__u8			mc_loop;		/* Loopback */
++	unsigned		recverr : 1,
++				freebind : 1;
++	__u16			id;			/* ID counter for DF pkts */
++	__u8			pmtudisc;
++	int			mc_index;		/* Multicast device index */
++	__u32			mc_addr;
++	struct ip_mc_socklist	*mc_list;		/* Group array */
++};
++#endif
++
++#if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
++struct pppoe_opt
++{
++	struct net_device      *dev;	  /* device associated with socket*/
++	struct pppoe_addr	pa;	  /* what this socket is bound to*/
++	struct sockaddr_pppox	relay;	  /* what socket data will be
++					     relayed to (PPPoE relaying) */
++};
++
++struct pppox_opt
++{
++	struct ppp_channel	chan;
++	struct sock		*sk;
++	struct pppox_opt	*next;	  /* for hash table */
++	union {
++		struct pppoe_opt pppoe;
++	} proto;
++};
++#define pppoe_dev	proto.pppoe.dev
++#define pppoe_pa	proto.pppoe.pa
++#define pppoe_relay	proto.pppoe.relay
++#endif
++
++/* This defines a selective acknowledgement block. */
++struct tcp_sack_block {
++	__u32	start_seq;
++	__u32	end_seq;
++};
++
++enum tcp_congestion_algo {
++ 	TCP_RENO=0,
++ 	TCP_VEGAS,
++ 	TCP_WESTWOOD,
++ 	TCP_BIC,
++};
++ 
++struct tcp_opt {
++	int	tcp_header_len;	/* Bytes of tcp header to send		*/
++
++/*
++ *	Header prediction flags
++ *	0x5?10 << 16 + snd_wnd in net byte order
++ */
++	__u32	pred_flags;
++
++/*
++ *	RFC793 variables by their proper names. This means you can
++ *	read the code and the spec side by side (and laugh ...)
++ *	See RFC793 and RFC1122. The RFC writes these in capitals.
++ */
++ 	__u32	rcv_nxt;	/* What we want to receive next 	*/
++ 	__u32	snd_nxt;	/* Next sequence we send		*/
++
++ 	__u32	snd_una;	/* First byte we want an ack for	*/
++ 	__u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
++	__u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
++	__u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
++
++	/* Delayed ACK control data */
++	struct {
++		__u8	pending;	/* ACK is pending */
++		__u8	quick;		/* Scheduled number of quick acks	*/
++		__u8	pingpong;	/* The session is interactive		*/
++		__u8	blocked;	/* Delayed ACK was blocked by socket lock*/
++		__u32	ato;		/* Predicted tick of soft clock		*/
++		unsigned long timeout;	/* Currently scheduled timeout		*/
++		__u32	lrcvtime;	/* timestamp of last received data packet*/
++		__u16	last_seg_size;	/* Size of last incoming segment	*/
++		__u16	rcv_mss;	/* MSS used for delayed ACK decisions	*/ 
++	} ack;
++
++	/* Data for direct copy to user */
++	struct {
++		struct sk_buff_head	prequeue;
++		struct task_struct	*task;
++		struct iovec		*iov;
++		int			memory;
++		int			len;
++	} ucopy;
++
++	__u32	snd_wl1;	/* Sequence for window update		*/
++	__u32	snd_wnd;	/* The window we expect to receive	*/
++	__u32	max_window;	/* Maximal window ever seen from peer	*/
++	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
++	__u16	mss_cache;	/* Cached effective mss, not including SACKS */
++	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
++	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
++	__u8	ca_state;	/* State of fast-retransmit machine 	*/
++	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
++
++	__u8	reordering;	/* Packet reordering metric.		*/
++	__u8	queue_shrunk;	/* Write queue has been shrunk recently.*/
++	__u8	defer_accept;	/* User waits for some data after accept() */
++
++/* RTT measurement */
++	__u8	backoff;	/* backoff				*/
++	__u32	srtt;		/* smothed round trip time << 3		*/
++	__u32	mdev;		/* medium deviation			*/
++	__u32	mdev_max;	/* maximal mdev for the last rtt period	*/
++	__u32	rttvar;		/* smoothed mdev_max			*/
++	__u32	rtt_seq;	/* sequence number to update rttvar	*/
++	__u32	rto;		/* retransmit timeout			*/
++
++	__u32	packets_out;	/* Packets which are "in flight"	*/
++	__u32	left_out;	/* Packets which leaved network		*/
++	__u32	retrans_out;	/* Retransmitted packets out		*/
++
++
++/*
++ *	Slow start and congestion control (see also Nagle, and Karn & Partridge)
++ */
++ 	__u32	snd_ssthresh;	/* Slow start size threshold		*/
++ 	__u32	snd_cwnd;	/* Sending congestion window		*/
++ 	__u16	snd_cwnd_cnt;	/* Linear increase counter		*/
++	__u16	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
++	__u32	snd_cwnd_used;
++	__u32	snd_cwnd_stamp;
++
++	/* Two commonly used timers in both sender and receiver paths. */
++	unsigned long		timeout;
++ 	struct timer_list	retransmit_timer;	/* Resend (no ack)	*/
++ 	struct timer_list	delack_timer;		/* Ack delay 		*/
++
++	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
++
++	struct tcp_func		*af_specific;	/* Operations which are AF_INET{4,6} specific	*/
++	struct sk_buff		*send_head;	/* Front of stuff to transmit			*/
++	struct page		*sndmsg_page;	/* Cached page for sendmsg			*/
++	u32			sndmsg_off;	/* Cached offset for sendmsg			*/
++
++ 	__u32	rcv_wnd;	/* Current receiver window		*/
++	__u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
++	__u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
++	__u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
++	__u32	copied_seq;	/* Head of yet unread data		*/
++/*
++ *      Options received (usually on last packet, some only on SYN packets).
++ */
++	char	tstamp_ok,	/* TIMESTAMP seen on SYN packet		*/
++		wscale_ok,	/* Wscale seen on SYN packet		*/
++		sack_ok;	/* SACK seen on SYN packet		*/
++	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
++        __u8	snd_wscale;	/* Window scaling received from sender	*/
++        __u8	rcv_wscale;	/* Window scaling to send to receiver	*/
++	__u8	nonagle;	/* Disable Nagle algorithm?             */
++	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
++
++/*	PAWS/RTTM data	*/
++        __u32	rcv_tsval;	/* Time stamp value             	*/
++        __u32	rcv_tsecr;	/* Time stamp echo reply        	*/
++        __u32	ts_recent;	/* Time stamp to echo next		*/
++        long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
++
++/*	SACKs data	*/
++	__u16	user_mss;  	/* mss requested by user in ioctl */
++	__u8	dsack;		/* D-SACK is scheduled			*/
++	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
++	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
++	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
++
++	__u32	window_clamp;	/* Maximal window to advertise		*/
++	__u32	rcv_ssthresh;	/* Current window clamp			*/
++	__u8	probes_out;	/* unanswered 0 window probes		*/
++	__u8	num_sacks;	/* Number of SACK blocks		*/
++	__u16	advmss;		/* Advertised MSS			*/
++
++	__u8	syn_retries;	/* num of allowed syn retries */
++	__u8	ecn_flags;	/* ECN status bits.			*/
++	__u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
++	__u32	lost_out;	/* Lost packets				*/
++	__u32	sacked_out;	/* SACK'd packets			*/
++	__u32	fackets_out;	/* FACK'd packets			*/
++	__u32	high_seq;	/* snd_nxt at onset of congestion	*/
++
++	__u32	retrans_stamp;	/* Timestamp of the last retransmit,
++				 * also used in SYN-SENT to remember stamp of
++				 * the first SYN. */
++	__u32	undo_marker;	/* tracking retrans started here. */
++	int	undo_retrans;	/* number of undoable retransmissions. */
++	__u32	urg_seq;	/* Seq of received urgent pointer */
++	__u16	urg_data;	/* Saved octet of OOB data and control flags */
++	__u8	pending;	/* Scheduled timer event	*/
++	__u8	urg_mode;	/* In urgent mode		*/
++	__u32	snd_up;		/* Urgent pointer		*/
++
++	/* The syn_wait_lock is necessary only to avoid tcp_get_info having
++	 * to grab the main lock sock while browsing the listening hash
++	 * (otherwise it's deadlock prone).
++	 * This lock is acquired in read mode only from tcp_get_info() and
++	 * it's acquired in write mode _only_ from code that is actively
++	 * changing the syn_wait_queue. All readers that are holding
++	 * the master sock lock don't need to grab this lock in read mode
++	 * too as the syn_wait_queue writes are always protected from
++	 * the main sock lock.
++	 */
++	rwlock_t		syn_wait_lock;
++	struct tcp_listen_opt	*listen_opt;
++
++	/* FIFO of established children */
++	struct open_request	*accept_queue;
++	struct open_request	*accept_queue_tail;
++
++	int			write_pending;	/* A write to socket waits to start. */
++
++	unsigned int		keepalive_time;	  /* time before keep alive takes place */
++	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
++	int			linger2;
++
++	__u8			adv_cong;    /* Using Vegas, Westwood, or BIC */
++	__u8                    frto_counter; /* Number of new acks after RTO */
++	__u32                   frto_highmark; /* snd_nxt when RTO occurred */
++
++	unsigned long last_synq_overflow; 
++
++/* Receiver side RTT estimation */
++	struct {
++		__u32	rtt;
++		__u32	seq;
++		__u32	time;
++	} rcv_rtt_est;
++
++/* Receiver queue space */
++	struct {
++		int	space;
++		__u32	seq;
++		__u32	time;
++	} rcvq_space;
++
++/* TCP Westwood structure */
++        struct {
++                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
++                __u32    bw_est;           /* bandwidth estimate */
++                __u32    rtt_win_sx;       /* here starts a new evaluation... */
++                __u32    bk;
++                __u32    snd_una;          /* used for evaluating the number of acked bytes */
++                __u32    cumul_ack;
++                __u32    accounted;
++                __u32    rtt;
++                __u32    rtt_min;          /* minimum observed RTT */
++        } westwood;
++
++/* Vegas variables */
++	struct {
++		__u32	beg_snd_nxt;	/* right edge during last RTT */
++		__u32	beg_snd_una;	/* left edge  during last RTT */
++		__u32	beg_snd_cwnd;	/* saves the size of the cwnd */
++		__u8	doing_vegas_now;/* if true, do vegas for this RTT */
++		__u16	cntRTT;		/* # of RTTs measured within last RTT */
++		__u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
++		__u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
++	} vegas;
++
++	/* BI TCP Parameters */
++	struct {
++		__u32	cnt;		/* increase cwnd by 1 after this number of ACKs */
++		__u32 	last_max_cwnd;	/* last maximium snd_cwnd */
++		__u32	last_cwnd;	/* the last snd_cwnd */
++		__u32   last_stamp;     /* time when updated last_cwnd */
++	} bictcp;
++};
++
++ 	
++/*
++ * This structure really needs to be cleaned up.
++ * Most of it is for TCP, and not used by any of
++ * the other protocols.
++ */
++
++/*
++ * The idea is to start moving to a newer struct gradualy
++ * 
++ * IMHO the newer struct should have the following format:
++ * 
++ *	struct sock {
++ *		sockmem [mem, proto, callbacks]
++ *
++ *		union or struct {
++ *			ax25;
++ *		} ll_pinfo;
++ *	
++ *		union {
++ *			ipv4;
++ *			ipv6;
++ *			ipx;
++ *			netrom;
++ *			rose;
++ * 			x25;
++ *		} net_pinfo;
++ *
++ *		union {
++ *			tcp;
++ *			udp;
++ *			spx;
++ *			netrom;
++ *		} tp_pinfo;
++ *
++ *	}
++ *
++ * The idea failed because IPv6 transition asssumes dual IP/IPv6 sockets.
++ * So, net_pinfo is IPv6 are really, and protinfo unifies all another
++ * private areas.
++ */
++
++/* Define this to get the sk->debug debugging facility. */
++#define SOCK_DEBUGGING
++#ifdef SOCK_DEBUGGING
++#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0)
++#else
++#define SOCK_DEBUG(sk, msg...) do { } while (0)
++#endif
++
++/* This is the per-socket lock.  The spinlock provides a synchronization
++ * between user contexts and software interrupt processing, whereas the
++ * mini-semaphore synchronizes multiple users amongst themselves.
++ */
++typedef struct {
++	spinlock_t		slock;
++	unsigned int		users;
++	wait_queue_head_t	wq;
++} socket_lock_t;
++
++#define sock_lock_init(__sk) \
++do {	spin_lock_init(&((__sk)->lock.slock)); \
++	(__sk)->lock.users = 0; \
++	init_waitqueue_head(&((__sk)->lock.wq)); \
++} while(0)
++
++struct sock {
++	/* Socket demultiplex comparisons on incoming packets. */
++	__u32			daddr;		/* Foreign IPv4 addr			*/
++	__u32			rcv_saddr;	/* Bound local IPv4 addr		*/
++	__u16			dport;		/* Destination port			*/
++	unsigned short		num;		/* Local port				*/
++	int			bound_dev_if;	/* Bound device index if != 0		*/
++
++	/* Main hash linkage for various protocol lookup tables. */
++	struct sock		*next;
++	struct sock		**pprev;
++	struct sock		*bind_next;
++	struct sock		**bind_pprev;
++
++	volatile unsigned char	state,		/* Connection state			*/
++				zapped;		/* In ax25 & ipx means not linked	*/
++	__u16			sport;		/* Source port				*/
++
++	unsigned short		family;		/* Address family			*/
++	unsigned char		reuse;		/* SO_REUSEADDR setting			*/
++	unsigned char		shutdown;
++	atomic_t		refcnt;		/* Reference count			*/
++
++	socket_lock_t		lock;		/* Synchronizer...			*/
++	int			rcvbuf;		/* Size of receive buffer in bytes	*/
++
++	wait_queue_head_t	*sleep;		/* Sock wait queue			*/
++	struct dst_entry	*dst_cache;	/* Destination cache			*/
++	rwlock_t		dst_lock;
++	atomic_t		rmem_alloc;	/* Receive queue bytes committed	*/
++	struct sk_buff_head	receive_queue;	/* Incoming packets			*/
++	atomic_t		wmem_alloc;	/* Transmit queue bytes committed	*/
++	struct sk_buff_head	write_queue;	/* Packet sending queue			*/
++	atomic_t		omem_alloc;	/* "o" is "option" or "other" */
++	int			wmem_queued;	/* Persistent queue size */
++	int			forward_alloc;	/* Space allocated forward. */
++	__u32			saddr;		/* Sending source			*/
++	unsigned int		allocation;	/* Allocation mode			*/
++	int			sndbuf;		/* Size of send buffer in bytes		*/
++	struct sock		*prev;
++
++	/* Not all are volatile, but some are, so we might as well say they all are.
++	 * XXX Make this a flag word -DaveM
++	 */
++	volatile char		dead,
++				done,
++				urginline,
++				keepopen,
++				linger,
++				destroy,
++				no_check,
++				broadcast,
++				bsdism;
++	unsigned char		debug;
++	unsigned char		rcvtstamp;
++	unsigned char		use_write_queue;
++	unsigned char		userlocks;
++	/* Hole of 3 bytes. Try to pack. */
++	int			route_caps;
++	int			proc;
++	unsigned long	        lingertime;
++
++	int			hashent;
++	struct sock		*pair;
++
++	/* The backlog queue is special, it is always used with
++	 * the per-socket spinlock held and requires low latency
++	 * access.  Therefore we special case it's implementation.
++	 */
++	struct {
++		struct sk_buff *head;
++		struct sk_buff *tail;
++	} backlog;
++
++	rwlock_t		callback_lock;
++
++	/* Error queue, rarely used. */
++	struct sk_buff_head	error_queue;
++
++	struct proto		*prot;
++
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++	union {
++		struct ipv6_pinfo	af_inet6;
++	} net_pinfo;
++#endif
++
++	union {
++		struct tcp_opt		af_tcp;
++#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
++		struct sctp_opt		af_sctp;
++#endif
++#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
++		struct raw_opt		tp_raw4;
++#endif
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++		struct raw6_opt		tp_raw;
++#endif /* CONFIG_IPV6 */
++#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
++		struct spx_opt		af_spx;
++#endif /* CONFIG_SPX */
++
++	} tp_pinfo;
++
++	int			err, err_soft;	/* Soft holds errors that don't
++						   cause failure but are the cause
++						   of a persistent failure not just
++						   'timed out' */
++	unsigned short		ack_backlog;
++	unsigned short		max_ack_backlog;
++	__u32			priority;
++	unsigned short		type;
++	unsigned char		localroute;	/* Route locally only */
++	unsigned char		protocol;
++	struct ucred		peercred;
++	int			rcvlowat;
++	long			rcvtimeo;
++	long			sndtimeo;
++
++#ifdef CONFIG_FILTER
++	/* Socket Filtering Instructions */
++	struct sk_filter      	*filter;
++#endif /* CONFIG_FILTER */
++
++	/* This is where all the private (optional) areas that don't
++	 * overlap will eventually live. 
++	 */
++	union {
++		void *destruct_hook;
++	  	struct unix_opt	af_unix;
++#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
++		struct inet_opt af_inet;
++#endif
++#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
++		struct atalk_sock	af_at;
++#endif
++#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
++		struct ipx_opt		af_ipx;
++#endif
++#if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
++		struct dn_scp           dn;
++#endif
++#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
++		struct packet_opt	*af_packet;
++#endif
++#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
++		x25_cb			*x25;
++#endif
++#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
++		ax25_cb			*ax25;
++#endif
++#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
++		nr_cb			*nr;
++#endif
++#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
++		rose_cb			*rose;
++#endif
++#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
++		struct pppox_opt	*pppox;
++#endif
++		struct netlink_opt	*af_netlink;
++#if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
++		struct econet_opt	*af_econet;
++#endif
++#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
++		struct atm_vcc		*af_atm;
++#endif
++#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
++		struct irda_sock        *irda;
++#endif
++#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
++               struct wanpipe_opt      *af_wanpipe;
++#endif
++	} protinfo;  		
++
++
++	/* This part is used for the timeout functions. */
++	struct timer_list	timer;		/* This is the sock cleanup timer. */
++	struct timeval		stamp;
++
++	/* Identd and reporting IO signals */
++	struct socket		*socket;
++
++	/* RPC layer private data */
++	void			*user_data;
++  
++	/* Callbacks */
++	void			(*state_change)(struct sock *sk);
++	void			(*data_ready)(struct sock *sk,int bytes);
++	void			(*write_space)(struct sock *sk);
++	void			(*error_report)(struct sock *sk);
++
++  	int			(*backlog_rcv) (struct sock *sk,
++						struct sk_buff *skb);  
++	void                    (*destruct)(struct sock *sk);
++};
++
++/* The per-socket spinlock must be held here. */
++#define sk_add_backlog(__sk, __skb)			\
++do {	if((__sk)->backlog.tail == NULL) {		\
++		(__sk)->backlog.head =			\
++		     (__sk)->backlog.tail = (__skb);	\
++	} else {					\
++		((__sk)->backlog.tail)->next = (__skb);	\
++		(__sk)->backlog.tail = (__skb);		\
++	}						\
++	(__skb)->next = NULL;				\
++} while(0)
++
++/* IP protocol blocks we attach to sockets.
++ * socket layer -> transport layer interface
++ * transport -> network interface is defined by struct inet_proto
++ */
++struct proto {
++	void			(*close)(struct sock *sk, 
++					long timeout);
++	int			(*connect)(struct sock *sk,
++				        struct sockaddr *uaddr, 
++					int addr_len);
++	int			(*disconnect)(struct sock *sk, int flags);
++
++	struct sock *		(*accept) (struct sock *sk, int flags, int *err);
++
++	int			(*ioctl)(struct sock *sk, int cmd,
++					 unsigned long arg);
++	int			(*init)(struct sock *sk);
++	int			(*destroy)(struct sock *sk);
++	void			(*shutdown)(struct sock *sk, int how);
++	int			(*setsockopt)(struct sock *sk, int level, 
++					int optname, char *optval, int optlen);
++	int			(*getsockopt)(struct sock *sk, int level, 
++					int optname, char *optval, 
++					int *option);  	 
++	int			(*sendmsg)(struct sock *sk, struct msghdr *msg,
++					   int len);
++	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
++					int len, int noblock, int flags, 
++					int *addr_len);
++	int			(*bind)(struct sock *sk, 
++					struct sockaddr *uaddr, int addr_len);
++
++	int			(*backlog_rcv) (struct sock *sk, 
++						struct sk_buff *skb);
++
++	/* Keeping track of sk's, looking them up, and port selection methods. */
++	void			(*hash)(struct sock *sk);
++	void			(*unhash)(struct sock *sk);
++	int			(*get_port)(struct sock *sk, unsigned short snum);
++
++	char			name[32];
++
++	struct {
++		int inuse;
++		u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
++	} stats[NR_CPUS];
++};
++
++/* Called with local bh disabled */
++static __inline__ void sock_prot_inc_use(struct proto *prot)
++{
++	prot->stats[smp_processor_id()].inuse++;
++}
++
++static __inline__ void sock_prot_dec_use(struct proto *prot)
++{
++	prot->stats[smp_processor_id()].inuse--;
++}
++
++/* About 10 seconds */
++#define SOCK_DESTROY_TIME (10*HZ)
++
++/* Sockets 0-1023 can't be bound to unless you are superuser */
++#define PROT_SOCK	1024
++
++#define SHUTDOWN_MASK	3
++#define RCV_SHUTDOWN	1
++#define SEND_SHUTDOWN	2
++
++#define SOCK_SNDBUF_LOCK	1
++#define SOCK_RCVBUF_LOCK	2
++#define SOCK_BINDADDR_LOCK	4
++#define SOCK_BINDPORT_LOCK	8
++
++
++/* Used by processes to "lock" a socket state, so that
++ * interrupts and bottom half handlers won't change it
++ * from under us. It essentially blocks any incoming
++ * packets, so that we won't get any new data or any
++ * packets that change the state of the socket.
++ *
++ * While locked, BH processing will add new packets to
++ * the backlog queue.  This queue is processed by the
++ * owner of the socket lock right before it is released.
++ *
++ * Since ~2.3.5 it is also exclusive sleep lock serializing
++ * accesses from user process context.
++ */
++extern void __lock_sock(struct sock *sk);
++extern void __release_sock(struct sock *sk);
++#define lock_sock(__sk) \
++do {	spin_lock_bh(&((__sk)->lock.slock)); \
++	if ((__sk)->lock.users != 0) \
++		__lock_sock(__sk); \
++	(__sk)->lock.users = 1; \
++	spin_unlock_bh(&((__sk)->lock.slock)); \
++} while(0)
++
++#define release_sock(__sk) \
++do {	spin_lock_bh(&((__sk)->lock.slock)); \
++	if ((__sk)->backlog.tail != NULL) \
++		__release_sock(__sk); \
++	(__sk)->lock.users = 0; \
++        if (waitqueue_active(&((__sk)->lock.wq))) wake_up(&((__sk)->lock.wq)); \
++	spin_unlock_bh(&((__sk)->lock.slock)); \
++} while(0)
++
++/* BH context may only use the following locking interface. */
++#define bh_lock_sock(__sk)	spin_lock(&((__sk)->lock.slock))
++#define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->lock.slock))
++
++extern struct sock *		sk_alloc(int family, int priority, int zero_it);
++extern void			sk_free(struct sock *sk);
++
++extern struct sk_buff		*sock_wmalloc(struct sock *sk,
++					      unsigned long size, int force,
++					      int priority);
++extern struct sk_buff		*sock_rmalloc(struct sock *sk,
++					      unsigned long size, int force,
++					      int priority);
++extern void			sock_wfree(struct sk_buff *skb);
++extern void			sock_rfree(struct sk_buff *skb);
++
++extern int			sock_setsockopt(struct socket *sock, int level,
++						int op, char *optval,
++						int optlen);
++
++extern int			sock_getsockopt(struct socket *sock, int level,
++						int op, char *optval, 
++						int *optlen);
++extern struct sk_buff 		*sock_alloc_send_skb(struct sock *sk,
++						     unsigned long size,
++						     int noblock,
++						     int *errcode);
++extern struct sk_buff 		*sock_alloc_send_pskb(struct sock *sk,
++						      unsigned long header_len,
++						      unsigned long data_len,
++						      int noblock,
++						      int *errcode);
++extern void *sock_kmalloc(struct sock *sk, int size, int priority);
++extern void sock_kfree_s(struct sock *sk, void *mem, int size);
++
++/*
++ * Functions to fill in entries in struct proto_ops when a protocol
++ * does not implement a particular function.
++ */
++extern int                      sock_no_release(struct socket *);
++extern int                      sock_no_bind(struct socket *, 
++					     struct sockaddr *, int);
++extern int                      sock_no_connect(struct socket *,
++						struct sockaddr *, int, int);
++extern int                      sock_no_socketpair(struct socket *,
++						   struct socket *);
++extern int                      sock_no_accept(struct socket *,
++					       struct socket *, int);
++extern int                      sock_no_getname(struct socket *,
++						struct sockaddr *, int *, int);
++extern unsigned int             sock_no_poll(struct file *, struct socket *,
++					     struct poll_table_struct *);
++extern int                      sock_no_ioctl(struct socket *, unsigned int,
++					      unsigned long);
++extern int			sock_no_listen(struct socket *, int);
++extern int                      sock_no_shutdown(struct socket *, int);
++extern int			sock_no_getsockopt(struct socket *, int , int,
++						   char *, int *);
++extern int			sock_no_setsockopt(struct socket *, int, int,
++						   char *, int);
++extern int 			sock_no_fcntl(struct socket *, 
++					      unsigned int, unsigned long);
++extern int                      sock_no_sendmsg(struct socket *,
++						struct msghdr *, int,
++						struct scm_cookie *);
++extern int                      sock_no_recvmsg(struct socket *,
++						struct msghdr *, int, int,
++						struct scm_cookie *);
++extern int			sock_no_mmap(struct file *file,
++					     struct socket *sock,
++					     struct vm_area_struct *vma);
++extern ssize_t			sock_no_sendpage(struct socket *sock,
++						struct page *page,
++						int offset, size_t size, 
++						int flags);
++
++/*
++ *	Default socket callbacks and setup code
++ */
++ 
++extern void sock_def_destruct(struct sock *);
++
++/* Initialise core socket variables */
++extern void sock_init_data(struct socket *sock, struct sock *sk);
++
++extern void sklist_remove_socket(struct sock **list, struct sock *sk);
++extern void sklist_insert_socket(struct sock **list, struct sock *sk);
++extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
++
++#ifdef CONFIG_FILTER
++
++/**
++ *	sk_filter - run a packet through a socket filter
++ *	@sk: sock associated with &sk_buff
++ *	@skb: buffer to filter
++ *	@needlock: set to 1 if the sock is not locked by caller.
++ *
++ * Run the filter code and then cut skb->data to correct size returned by
++ * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
++ * than pkt_len we keep whole skb->data. This is the socket level
++ * wrapper to sk_run_filter. It returns 0 if the packet should
++ * be accepted or -EPERM if the packet should be tossed.
++ */
++
++static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
++{
++	int err = 0;
++
++	if (sk->filter) {
++		struct sk_filter *filter;
++		
++		if (needlock)
++			bh_lock_sock(sk);
++		
++		filter = sk->filter;
++		if (filter) {
++			int pkt_len = sk_run_filter(skb, filter->insns,
++						    filter->len);
++			if (!pkt_len)
++				err = -EPERM;
++			else
++				skb_trim(skb, pkt_len);
++		}
++
++		if (needlock)
++			bh_unlock_sock(sk);
++	}
++	return err;
++}
++
++/**
++ *	sk_filter_release: Release a socket filter
++ *	@sk: socket
++ *	@fp: filter to remove
++ *
++ *	Remove a filter from a socket and release its resources.
++ */
++ 
++static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
++{
++	unsigned int size = sk_filter_len(fp);
++
++	atomic_sub(size, &sk->omem_alloc);
++
++	if (atomic_dec_and_test(&fp->refcnt))
++		kfree(fp);
++}
++
++static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
++{
++	atomic_inc(&fp->refcnt);
++	atomic_add(sk_filter_len(fp), &sk->omem_alloc);
++}
++
++#else
++
++static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
++{
++	return 0;
++}
++
++#endif /* CONFIG_FILTER */
++
++/*
++ * Socket reference counting postulates.
++ *
++ * * Each user of socket SHOULD hold a reference count.
++ * * Each access point to socket (an hash table bucket, reference from a list,
++ *   running timer, skb in flight MUST hold a reference count.
++ * * When reference count hits 0, it means it will never increase back.
++ * * When reference count hits 0, it means that no references from
++ *   outside exist to this socket and current process on current CPU
++ *   is last user and may/should destroy this socket.
++ * * sk_free is called from any context: process, BH, IRQ. When
++ *   it is called, socket has no references from outside -> sk_free
++ *   may release descendant resources allocated by the socket, but
++ *   to the time when it is called, socket is NOT referenced by any
++ *   hash tables, lists etc.
++ * * Packets, delivered from outside (from network or from another process)
++ *   and enqueued on receive/error queues SHOULD NOT grab reference count,
++ *   when they sit in queue. Otherwise, packets will leak to hole, when
++ *   socket is looked up by one cpu and unhasing is made by another CPU.
++ *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
++ *   (leak to backlog). Packet socket does all the processing inside
++ *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
++ *   use separate SMP lock, so that they are prone too.
++ */
++
++/* Grab socket reference count. This operation is valid only
++   when sk is ALREADY grabbed f.e. it is found in hash table
++   or a list and the lookup is made under lock preventing hash table
++   modifications.
++ */
++
++static inline void sock_hold(struct sock *sk)
++{
++	atomic_inc(&sk->refcnt);
++}
++
++/* Ungrab socket in the context, which assumes that socket refcnt
++   cannot hit zero, f.e. it is true in context of any socketcall.
++ */
++static inline void __sock_put(struct sock *sk)
++{
++	atomic_dec(&sk->refcnt);
++}
++
++/* Ungrab socket and destroy it, if it was the last reference. */
++static inline void sock_put(struct sock *sk)
++{
++	if (atomic_dec_and_test(&sk->refcnt))
++		sk_free(sk);
++}
++
++/* Detach socket from process context.
++ * Announce socket dead, detach it from wait queue and inode.
++ * Note that parent inode held reference count on this struct sock,
++ * we do not release it in this function, because protocol
++ * probably wants some additional cleanups or even continuing
++ * to work with this socket (TCP).
++ */
++static inline void sock_orphan(struct sock *sk)
++{
++	write_lock_bh(&sk->callback_lock);
++	sk->dead = 1;
++	sk->socket = NULL;
++	sk->sleep = NULL;
++	write_unlock_bh(&sk->callback_lock);
++}
++
++static inline void sock_graft(struct sock *sk, struct socket *parent)
++{
++	write_lock_bh(&sk->callback_lock);
++	sk->sleep = &parent->wait;
++	parent->sk = sk;
++	sk->socket = parent;
++	write_unlock_bh(&sk->callback_lock);
++}
++
++static inline int sock_i_uid(struct sock *sk)
++{
++	int uid;
++
++	read_lock(&sk->callback_lock);
++	uid = sk->socket ? sk->socket->inode->i_uid : 0;
++	read_unlock(&sk->callback_lock);
++	return uid;
++}
++
++static inline unsigned long sock_i_ino(struct sock *sk)
++{
++	unsigned long ino;
++
++	read_lock(&sk->callback_lock);
++	ino = sk->socket ? sk->socket->inode->i_ino : 0;
++	read_unlock(&sk->callback_lock);
++	return ino;
++}
++
++static inline struct dst_entry *
++__sk_dst_get(struct sock *sk)
++{
++	return sk->dst_cache;
++}
++
++static inline struct dst_entry *
++sk_dst_get(struct sock *sk)
++{
++	struct dst_entry *dst;
++
++	read_lock(&sk->dst_lock);
++	dst = sk->dst_cache;
++	if (dst)
++		dst_hold(dst);
++	read_unlock(&sk->dst_lock);
++	return dst;
++}
++
++static inline void
++__sk_dst_set(struct sock *sk, struct dst_entry *dst)
++{
++	struct dst_entry *old_dst;
++
++	old_dst = sk->dst_cache;
++	sk->dst_cache = dst;
++	dst_release(old_dst);
++}
++
++static inline void
++sk_dst_set(struct sock *sk, struct dst_entry *dst)
++{
++	write_lock(&sk->dst_lock);
++	__sk_dst_set(sk, dst);
++	write_unlock(&sk->dst_lock);
++}
++
++static inline void
++__sk_dst_reset(struct sock *sk)
++{
++	struct dst_entry *old_dst;
++
++	old_dst = sk->dst_cache;
++	sk->dst_cache = NULL;
++	dst_release(old_dst);
++}
++
++static inline void
++sk_dst_reset(struct sock *sk)
++{
++	write_lock(&sk->dst_lock);
++	__sk_dst_reset(sk);
++	write_unlock(&sk->dst_lock);
++}
++
++static inline struct dst_entry *
++__sk_dst_check(struct sock *sk, u32 cookie)
++{
++	struct dst_entry *dst = sk->dst_cache;
++
++	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
++		sk->dst_cache = NULL;
++		return NULL;
++	}
++
++	return dst;
++}
++
++static inline struct dst_entry *
++sk_dst_check(struct sock *sk, u32 cookie)
++{
++	struct dst_entry *dst = sk_dst_get(sk);
++
++	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
++		sk_dst_reset(sk);
++		return NULL;
++	}
++
++	return dst;
++}
++
++
++/*
++ * 	Queue a received datagram if it will fit. Stream and sequenced
++ *	protocols can't normally use this as they need to fit buffers in
++ *	and play with them.
++ *
++ * 	Inlined as it's very short and called for pretty much every
++ *	packet ever received.
++ */
++
++static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
++{
++	sock_hold(sk);
++	skb->sk = sk;
++	skb->destructor = sock_wfree;
++	atomic_add(skb->truesize, &sk->wmem_alloc);
++}
++
++static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
++{
++	skb->sk = sk;
++	skb->destructor = sock_rfree;
++	atomic_add(skb->truesize, &sk->rmem_alloc);
++}
++
++static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
++{
++	int err = 0;
++	int skb_len;
++
++	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
++	   number of warnings when compiling with -W --ANK
++	 */
++	if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	/* It would be deadlock, if sock_queue_rcv_skb is used
++	   with socket lock! We assume that users of this
++	   function are lock free.
++	*/
++	err = sk_filter(sk, skb, 1);
++	if (err)
++		goto out;
++
++	skb->dev = NULL;
++	skb_set_owner_r(skb, sk);
++
++	/* Cache the SKB length before we tack it onto the receive
++	 * queue.  Once it is added it no longer belongs to us and
++	 * may be freed by other threads of control pulling packets
++	 * from the queue.
++	 */
++	skb_len = skb->len;
++
++	skb_queue_tail(&sk->receive_queue, skb);
++	if (!sk->dead)
++		sk->data_ready(sk,skb_len);
++out:
++	return err;
++}
++
++static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
++{
++	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
++	   number of warnings when compiling with -W --ANK
++	 */
++	if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
++		return -ENOMEM;
++	skb_set_owner_r(skb, sk);
++	skb_queue_tail(&sk->error_queue,skb);
++	if (!sk->dead)
++		sk->data_ready(sk,skb->len);
++	return 0;
++}
++
++/*
++ *	Recover an error report and clear atomically
++ */
++ 
++static inline int sock_error(struct sock *sk)
++{
++	int err=xchg(&sk->err,0);
++	return -err;
++}
++
++static inline unsigned long sock_wspace(struct sock *sk)
++{
++	int amt = 0;
++
++	if (!(sk->shutdown & SEND_SHUTDOWN)) {
++		amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
++		if (amt < 0) 
++			amt = 0;
++	}
++	return amt;
++}
++
++static inline void sk_wake_async(struct sock *sk, int how, int band)
++{
++	if (sk->socket && sk->socket->fasync_list)
++		sock_wake_async(sk->socket, how, band);
++}
++
++#define SOCK_MIN_SNDBUF 2048
++#define SOCK_MIN_RCVBUF 256
++
++/*
++ *	Default write policy as shown to user space via poll/select/SIGIO
++ */
++static inline int sock_writeable(struct sock *sk) 
++{
++	return atomic_read(&sk->wmem_alloc) < (sk->sndbuf / 2);
++}
++
++static inline int gfp_any(void)
++{
++	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
++}
++
++static inline long sock_rcvtimeo(struct sock *sk, int noblock)
++{
++	return noblock ? 0 : sk->rcvtimeo;
++}
++
++static inline long sock_sndtimeo(struct sock *sk, int noblock)
++{
++	return noblock ? 0 : sk->sndtimeo;
++}
++
++static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
++{
++	return (waitall ? len : min_t(int, sk->rcvlowat, len)) ? : 1;
++}
++
++/* Alas, with timeout socket operations are not restartable.
++ * Compare this to poll().
++ */
++static inline int sock_intr_errno(long timeo)
++{
++	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
++}
++
++static __inline__ void
++sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
++{
++	if (sk->rcvtstamp)
++		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp);
++	else
++		sk->stamp = skb->stamp;
++}
++
++/* 
++ *	Enable debug/info messages 
++ */
++
++#if 0
++#define NETDEBUG(x)	do { } while (0)
++#else
++#define NETDEBUG(x)	do { x; } while (0)
++#endif
++
++/*
++ * Macros for sleeping on a socket. Use them like this:
++ *
++ * SOCK_SLEEP_PRE(sk)
++ * if (condition)
++ * 	schedule();
++ * SOCK_SLEEP_POST(sk)
++ *
++ */
++
++#define SOCK_SLEEP_PRE(sk) 	{ struct task_struct *tsk = current; \
++				DECLARE_WAITQUEUE(wait, tsk); \
++				tsk->state = TASK_INTERRUPTIBLE; \
++				add_wait_queue((sk)->sleep, &wait); \
++				release_sock(sk);
++
++#define SOCK_SLEEP_POST(sk)	tsk->state = TASK_RUNNING; \
++				remove_wait_queue((sk)->sleep, &wait); \
++				lock_sock(sk); \
++				}
++
++extern __u32 sysctl_wmem_max;
++extern __u32 sysctl_rmem_max;
++
++#endif	/* _SOCK_H */
+diff --unified --recursive --new-file linux-2.4.30/net/Config.in linux-2.4.30-1-686-smp-ring3/net/Config.in
+--- linux-2.4.30/net/Config.in	2005-01-19 15:10:13.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/Config.in	2005-10-22 23:08:28.028051250 +0200
+@@ -15,6 +15,9 @@
+    bool '  Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
+ fi
+ bool 'Socket Filtering'  CONFIG_FILTER
++if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_FILTER" = "y" ]; then
++    source net/ring/Config.in
++fi
+ tristate 'Unix domain sockets' CONFIG_UNIX
+ bool 'TCP/IP networking' CONFIG_INET
+ if [ "$CONFIG_INET" = "y" ]; then
+diff --unified --recursive --new-file linux-2.4.30/net/Config.in.ORG linux-2.4.30-1-686-smp-ring3/net/Config.in.ORG
+--- linux-2.4.30/net/Config.in.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/Config.in.ORG	2005-10-22 23:08:28.020050750 +0200
+@@ -0,0 +1,107 @@
++#
++# Network configuration
++#
++mainmenu_option next_comment
++comment 'Networking options'
++tristate 'Packet socket' CONFIG_PACKET
++if [ "$CONFIG_PACKET" != "n" ]; then
++   bool '  Packet socket: mmapped IO' CONFIG_PACKET_MMAP
++fi
++
++tristate 'Netlink device emulation' CONFIG_NETLINK_DEV
++
++bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER
++if [ "$CONFIG_NETFILTER" = "y" ]; then
++   bool '  Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
++fi
++bool 'Socket Filtering'  CONFIG_FILTER
++tristate 'Unix domain sockets' CONFIG_UNIX
++bool 'TCP/IP networking' CONFIG_INET
++if [ "$CONFIG_INET" = "y" ]; then
++   source net/ipv4/Config.in
++   if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++#   IPv6 as module will cause a CRASH if you try to unload it
++      tristate '  The IPv6 protocol (EXPERIMENTAL)' CONFIG_IPV6
++      if [ "$CONFIG_IPV6" != "n" ]; then
++	 source net/ipv6/Config.in
++      fi
++   fi
++   if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++      source net/khttpd/Config.in
++   fi
++   if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++      source net/sctp/Config.in
++   fi
++fi
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++   tristate 'Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)' CONFIG_ATM
++   if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then
++      if [ "$CONFIG_INET" = "y" ]; then
++	 dep_tristate '  Classical IP over ATM' CONFIG_ATM_CLIP $CONFIG_ATM
++	 if [ "$CONFIG_ATM_CLIP" != "n" ]; then
++	    bool '    Do NOT send ICMP if no neighbour' CONFIG_ATM_CLIP_NO_ICMP
++	 fi
++      fi
++      dep_tristate '  LAN Emulation (LANE) support' CONFIG_ATM_LANE $CONFIG_ATM
++      if [ "$CONFIG_INET" = "y" -a "$CONFIG_ATM_LANE" != "n" ]; then
++	 tristate '    Multi-Protocol Over ATM (MPOA) support' CONFIG_ATM_MPOA
++      fi
++      dep_tristate '  RFC1483/2684 Bridged protocols' CONFIG_ATM_BR2684 $CONFIG_ATM
++      if [ "$CONFIG_ATM_BR2684" != "n" ]; then
++            bool '    Per-VC IP filter kludge' CONFIG_ATM_BR2684_IPFILTER
++      fi
++   fi
++fi
++tristate '802.1Q VLAN Support' CONFIG_VLAN_8021Q
++
++comment ' '
++tristate 'The IPX protocol' CONFIG_IPX
++if [ "$CONFIG_IPX" != "n" ]; then
++   source net/ipx/Config.in
++fi
++
++tristate 'Appletalk protocol support' CONFIG_ATALK
++if [ "$CONFIG_ATALK" != "n" ]; then
++   source drivers/net/appletalk/Config.in
++fi
++
++tristate 'DECnet Support' CONFIG_DECNET
++if [ "$CONFIG_DECNET" != "n" ]; then
++   source net/decnet/Config.in
++fi
++dep_tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE $CONFIG_INET
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++   tristate 'CCITT X.25 Packet Layer (EXPERIMENTAL)' CONFIG_X25
++   tristate 'LAPB Data Link Driver (EXPERIMENTAL)' CONFIG_LAPB
++   bool '802.2 LLC (EXPERIMENTAL)' CONFIG_LLC
++   bool 'Frame Diverter (EXPERIMENTAL)' CONFIG_NET_DIVERT
++#   if [ "$CONFIG_LLC" = "y" ]; then
++#      bool '  Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI
++#   fi
++   if [ "$CONFIG_INET" = "y" ]; then
++      tristate 'Acorn Econet/AUN protocols (EXPERIMENTAL)' CONFIG_ECONET
++      if [ "$CONFIG_ECONET" != "n" ]; then
++	 bool '  AUN over UDP' CONFIG_ECONET_AUNUDP
++	 bool '  Native Econet' CONFIG_ECONET_NATIVE
++      fi
++   fi
++   tristate 'WAN router' CONFIG_WAN_ROUTER
++   bool 'Fast switching (read help!)' CONFIG_NET_FASTROUTE
++   bool 'Forwarding between high speed interfaces' CONFIG_NET_HW_FLOWCONTROL
++fi
++
++mainmenu_option next_comment
++comment 'QoS and/or fair queueing'
++bool 'QoS and/or fair queueing' CONFIG_NET_SCHED
++if [ "$CONFIG_NET_SCHED" = "y" ]; then
++   source net/sched/Config.in
++fi
++#bool 'Network code profiler' CONFIG_NET_PROFILE
++endmenu
++
++mainmenu_option next_comment
++comment 'Network testing'
++dep_tristate 'Packet Generator (USE WITH CAUTION)' CONFIG_NET_PKTGEN $CONFIG_PROC_FS
++endmenu
++
++endmenu
+diff --unified --recursive --new-file linux-2.4.30/net/Makefile linux-2.4.30-1-686-smp-ring3/net/Makefile
+--- linux-2.4.30/net/Makefile	2004-08-08 01:26:06.000000000 +0200
++++ linux-2.4.30-1-686-smp-ring3/net/Makefile	2005-10-22 23:08:27.928045000 +0200
+@@ -7,7 +7,7 @@
+ 
+ O_TARGET :=	network.o
+ 
+-mod-subdirs :=	ipv4/netfilter ipv6/netfilter ipx irda bluetooth atm netlink sched core sctp 802
++mod-subdirs :=	ipv4/netfilter ipv6/netfilter ipx irda bluetooth atm netlink sched core sctp 802 ring
+ export-objs :=	netsyms.o
+ 
+ subdir-y :=	core ethernet
+@@ -46,6 +46,7 @@
+ subdir-$(CONFIG_DECNET)		+= decnet
+ subdir-$(CONFIG_ECONET)		+= econet
+ subdir-$(CONFIG_VLAN_8021Q)           += 8021q
++subdir-$(CONFIG_RING)		+= ring
+ 
+ ifeq ($(CONFIG_NETFILTER),y)
+   mod-subdirs += ipv4/ipvs
+diff --unified --recursive --new-file linux-2.4.30/net/Makefile.ORG linux-2.4.30-1-686-smp-ring3/net/Makefile.ORG
+--- linux-2.4.30/net/Makefile.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/Makefile.ORG	2005-10-22 23:08:27.916044250 +0200
+@@ -0,0 +1,61 @@
++#
++# Makefile for the linux networking.
++#
++# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
++# Rewritten to use lists instead of if-statements.
++#
++
++O_TARGET :=	network.o
++
++mod-subdirs :=	ipv4/netfilter ipv6/netfilter ipx irda bluetooth atm netlink sched core sctp 802
++export-objs :=	netsyms.o
++
++subdir-y :=	core ethernet
++subdir-m :=	ipv4 # hum?
++
++
++subdir-$(CONFIG_NET)		+= 802 sched netlink
++subdir-$(CONFIG_IPV6)		+= ipv6
++subdir-$(CONFIG_INET)		+= ipv4
++subdir-$(CONFIG_NETFILTER)	+= ipv4/netfilter
++subdir-$(CONFIG_UNIX)		+= unix
++subdir-$(CONFIG_IP_SCTP)	+= sctp
++
++ifneq ($(CONFIG_IPV6),n)
++ifneq ($(CONFIG_IPV6),)
++subdir-$(CONFIG_NETFILTER)	+= ipv6/netfilter
++endif
++endif
++
++subdir-$(CONFIG_KHTTPD)		+= khttpd
++subdir-$(CONFIG_PACKET)		+= packet
++subdir-$(CONFIG_NET_SCHED)	+= sched
++subdir-$(CONFIG_BRIDGE)		+= bridge
++subdir-$(CONFIG_IPX)		+= ipx
++subdir-$(CONFIG_ATALK)		+= appletalk
++subdir-$(CONFIG_WAN_ROUTER)	+= wanrouter
++subdir-$(CONFIG_X25)		+= x25
++subdir-$(CONFIG_LAPB)		+= lapb
++subdir-$(CONFIG_NETROM)		+= netrom
++subdir-$(CONFIG_ROSE)		+= rose
++subdir-$(CONFIG_AX25)		+= ax25
++subdir-$(CONFIG_IRDA)		+= irda
++subdir-$(CONFIG_BLUEZ)		+= bluetooth
++subdir-$(CONFIG_SUNRPC)		+= sunrpc
++subdir-$(CONFIG_ATM)		+= atm
++subdir-$(CONFIG_DECNET)		+= decnet
++subdir-$(CONFIG_ECONET)		+= econet
++subdir-$(CONFIG_VLAN_8021Q)           += 8021q
++
++ifeq ($(CONFIG_NETFILTER),y)
++  mod-subdirs += ipv4/ipvs
++  subdir-$(CONFIG_IP_VS) += ipv4/ipvs
++endif
++
++obj-y	:= socket.o $(join $(subdir-y), $(patsubst %,/%.o,$(notdir $(subdir-y))))
++ifeq ($(CONFIG_NET),y)
++obj-$(CONFIG_MODULES)		+= netsyms.o
++obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
++endif
++
++include $(TOPDIR)/Rules.make
+diff --unified --recursive --new-file linux-2.4.30/net/core/dev.c linux-2.4.30-1-686-smp-ring3/net/core/dev.c
+--- linux-2.4.30/net/core/dev.c	2005-04-04 03:42:20.000000000 +0200
++++ linux-2.4.30-1-686-smp-ring3/net/core/dev.c	2005-10-22 23:08:27.900043250 +0200
+@@ -104,6 +104,56 @@
+ #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+ #include <net/iw_handler.h>
+ #endif	/* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++
++/* #define RING_DEBUG */
++
++#include <linux/ring.h>
++#include <linux/version.h>
++
++static handle_ring_skb ring_handler = NULL;
++
++handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
++
++void set_skb_ring_handler(handle_ring_skb the_handler) {
++  ring_handler = the_handler;
++}
++
++void do_skb_ring_handler(struct sk_buff *skb,
++			 u_char recv_packet, u_char real_skb) {
++  if(ring_handler)
++    ring_handler(skb, recv_packet, real_skb);
++}
++
++/* ******************* */
++
++static handle_ring_buffer buffer_ring_handler = NULL;
++
++handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
++
++void set_buffer_ring_handler(handle_ring_buffer the_handler) {
++  buffer_ring_handler = the_handler;
++}
++
++int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
++  if(buffer_ring_handler) {
++    buffer_ring_handler(dev, data, len);
++    return(1);
++  } else 
++    return(0);
++}
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++EXPORT_SYMBOL(get_skb_ring_handler);
++EXPORT_SYMBOL(set_skb_ring_handler);
++EXPORT_SYMBOL(do_skb_ring_handler);
++
++EXPORT_SYMBOL(get_buffer_ring_handler);
++EXPORT_SYMBOL(set_buffer_ring_handler);
++EXPORT_SYMBOL(do_buffer_ring_handler);
++#endif
++
++#endif
+ #ifdef CONFIG_PLIP
+ extern int plip_init(void);
+ #endif
+@@ -1066,6 +1116,10 @@
+ 			return -ENOMEM;
+ 	}
+ 
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler) ring_handler(skb, 0, 1);
++#endif /* CONFIG_RING */
++
+ 	/* Grab device queue */
+ 	spin_lock_bh(&dev->queue_lock);
+ 	q = dev->qdisc;
+@@ -1278,6 +1332,13 @@
+ 	struct softnet_data *queue;
+ 	unsigned long flags;
+ 
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler && ring_handler(skb, 1, 1)) {
++	  /* The packet has been copied into a ring */
++	  return(NET_RX_SUCCESS);
++	}
++#endif /* CONFIG_RING */
++
+ 	if (skb->stamp.tv_sec == 0)
+ 		do_gettimeofday(&skb->stamp);
+ 
+@@ -1464,6 +1525,13 @@
+ 	int ret = NET_RX_DROP;
+ 	unsigned short type;
+ 
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler && ring_handler(skb, 1, 1)) {
++	  /* The packet has been copied into a ring */
++	  return(NET_RX_SUCCESS);
++	}
++#endif /* CONFIG_RING */
++
+ 	if (skb->stamp.tv_sec == 0)
+ 		do_gettimeofday(&skb->stamp);
+ 
+diff --unified --recursive --new-file linux-2.4.30/net/core/dev.c.ORG linux-2.4.30-1-686-smp-ring3/net/core/dev.c.ORG
+--- linux-2.4.30/net/core/dev.c.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/core/dev.c.ORG	2005-10-22 23:08:27.472016500 +0200
+@@ -0,0 +1,2926 @@
++/*
++ * 	NET3	Protocol independent device support routines.
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ *
++ *	Derived from the non IP parts of dev.c 1.0.19
++ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
++ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
++ *
++ *	Additional Authors:
++ *		Florian la Roche <rzsfl@rz.uni-sb.de>
++ *		Alan Cox <gw4pts@gw4pts.ampr.org>
++ *		David Hinds <dahinds@users.sourceforge.net>
++ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
++ *		Adam Sulmicki <adam@cfar.umd.edu>
++ *              Pekka Riikonen <priikone@poesidon.pspt.fi>
++ *
++ *	Changes:
++ *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set to 2
++ *                                      if register_netdev gets called before
++ *                                      net_dev_init & also removed a few lines
++ *                                      of code in the process.
++ *		Alan Cox	:	device private ioctl copies fields back.
++ *		Alan Cox	:	Transmit queue code does relevant stunts to
++ *					keep the queue safe.
++ *		Alan Cox	:	Fixed double lock.
++ *		Alan Cox	:	Fixed promisc NULL pointer trap
++ *		????????	:	Support the full private ioctl range
++ *		Alan Cox	:	Moved ioctl permission check into drivers
++ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
++ *		Alan Cox	:	100 backlog just doesn't cut it when
++ *					you start doing multicast video 8)
++ *		Alan Cox	:	Rewrote net_bh and list manager.
++ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
++ *		Alan Cox	:	Took out transmit every packet pass
++ *					Saved a few bytes in the ioctl handler
++ *		Alan Cox	:	Network driver sets packet type before calling netif_rx. Saves
++ *					a function call a packet.
++ *		Alan Cox	:	Hashed net_bh()
++ *		Richard Kooijman:	Timestamp fixes.
++ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
++ *		Alan Cox	:	Device lock protection.
++ *		Alan Cox	: 	Fixed nasty side effect of device close changes.
++ *		Rudi Cilibrasi	:	Pass the right thing to set_mac_address()
++ *		Dave Miller	:	32bit quantity for the device lock to make it work out
++ *					on a Sparc.
++ *		Bjorn Ekwall	:	Added KERNELD hack.
++ *		Alan Cox	:	Cleaned up the backlog initialise.
++ *		Craig Metz	:	SIOCGIFCONF fix if space for under
++ *					1 device.
++ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
++ *					is no device open function.
++ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
++ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
++ *		Cyrus Durgin	:	Cleaned for KMOD
++ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
++ *					A network device unload needs to purge
++ *					the backlog queue.
++ *	Paul Rusty Russell	:	SIOCSIFNAME
++ *              Pekka Riikonen  :	Netdev boot-time settings code
++ *              Andrew Morton   :       Make unregister_netdevice wait indefinitely on dev->refcnt
++ * 		J Hadi Salim	:	- Backlog queue sampling
++ *				        - netif_rx() feedback	
++ */
++
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <asm/bitops.h>
++#include <linux/config.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <linux/skbuff.h>
++#include <linux/brlock.h>
++#include <net/sock.h>
++#include <linux/rtnetlink.h>
++#include <linux/proc_fs.h>
++#include <linux/stat.h>
++#include <linux/if_bridge.h>
++#include <linux/divert.h>
++#include <net/dst.h>
++#include <net/pkt_sched.h>
++#include <net/profile.h>
++#include <net/checksum.h>
++#include <linux/highmem.h>
++#include <linux/init.h>
++#include <linux/kmod.h>
++#include <linux/module.h>
++#if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
++#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
++#include <net/iw_handler.h>
++#endif	/* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
++#ifdef CONFIG_PLIP
++extern int plip_init(void);
++#endif
++
++
++/* This define, if set, will randomly drop a packet when congestion
++ * is more than moderate.  It helps fairness in the multi-interface
++ * case when one of them is a hog, but it kills performance for the
++ * single interface case so it is off now by default.
++ */
++#undef RAND_LIE
++
++/* Setting this will sample the queue lengths and thus congestion
++ * via a timer instead of as each packet is received.
++ */
++#undef OFFLINE_SAMPLE
++
++NET_PROFILE_DEFINE(dev_queue_xmit)
++NET_PROFILE_DEFINE(softnet_process)
++
++const char *if_port_text[] = {
++  "unknown",
++  "BNC",
++  "10baseT",
++  "AUI",
++  "100baseT",
++  "100baseTX",
++  "100baseFX"
++};
++
++/*
++ *	The list of packet types we will receive (as opposed to discard)
++ *	and the routines to invoke.
++ *
++ *	Why 16. Because with 16 the only overlap we get on a hash of the
++ *	low nibble of the protocol value is RARP/SNAP/X.25.
++ *
++ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
++ *             sure which should go first, but I bet it won't make much
++ *             difference if we are running VLANs.  The good news is that
++ *             this protocol won't be in the list unless compiled in, so
++ *             the average user (w/out VLANs) will not be adversly affected.
++ *             --BLG
++ *
++ *		0800	IP
++ *		8100    802.1Q VLAN
++ *		0001	802.3
++ *		0002	AX.25
++ *		0004	802.2
++ *		8035	RARP
++ *		0005	SNAP
++ *		0805	X.25
++ *		0806	ARP
++ *		8137	IPX
++ *		0009	Localtalk
++ *		86DD	IPv6
++ */
++
++static struct packet_type *ptype_base[16];		/* 16 way hashed list */
++static struct packet_type *ptype_all = NULL;		/* Taps */
++
++#ifdef OFFLINE_SAMPLE
++static void sample_queue(unsigned long dummy);
++static struct timer_list samp_timer = { function: sample_queue };
++#endif
++
++#ifdef CONFIG_HOTPLUG
++static int net_run_sbin_hotplug(struct net_device *dev, char *action);
++#else
++#define net_run_sbin_hotplug(dev, action) ({ 0; })
++#endif
++
++/*
++ *	Our notifier list
++ */
++ 
++static struct notifier_block *netdev_chain=NULL;
++
++/*
++ *	Device drivers call our routines to queue packets here. We empty the
++ *	queue in the local softnet handler.
++ */
++struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
++
++#ifdef CONFIG_NET_FASTROUTE
++int netdev_fastroute;
++int netdev_fastroute_obstacles;
++#endif
++
++
++/******************************************************************************************
++
++		Protocol management and registration routines
++
++*******************************************************************************************/
++
++/*
++ *	For efficiency
++ */
++
++int netdev_nit=0;
++
++/*
++ *	Add a protocol ID to the list. Now that the input handler is
++ *	smarter we can dispense with all the messy stuff that used to be
++ *	here.
++ *
++ *	BEWARE!!! Protocol handlers, mangling input packets,
++ *	MUST BE last in hash buckets and checking protocol handlers
++ *	MUST start from promiscous ptype_all chain in net_bh.
++ *	It is true now, do not change it.
++ *	Explantion follows: if protocol handler, mangling packet, will
++ *	be the first on list, it is not able to sense, that packet
++ *	is cloned and should be copied-on-write, so that it will
++ *	change it and subsequent readers will get broken packet.
++ *							--ANK (980803)
++ */
++
++/**
++ *	dev_add_pack - add packet handler
++ *	@pt: packet type declaration
++ * 
++ *	Add a protocol handler to the networking stack. The passed &packet_type
++ *	is linked into kernel lists and may not be freed until it has been
++ *	removed from the kernel lists.
++ */
++ 
++void dev_add_pack(struct packet_type *pt)
++{
++	int hash;
++
++	br_write_lock_bh(BR_NETPROTO_LOCK);
++
++#ifdef CONFIG_NET_FASTROUTE
++	/* Hack to detect packet socket */
++	if ((pt->data) && ((int)(pt->data)!=1)) {
++		netdev_fastroute_obstacles++;
++		dev_clear_fastroute(pt->dev);
++	}
++#endif
++	if (pt->type == htons(ETH_P_ALL)) {
++		netdev_nit++;
++		pt->next=ptype_all;
++		ptype_all=pt;
++	} else {
++		hash=ntohs(pt->type)&15;
++		pt->next = ptype_base[hash];
++		ptype_base[hash] = pt;
++	}
++	br_write_unlock_bh(BR_NETPROTO_LOCK);
++}
++
++
++/**
++ *	dev_remove_pack	 - remove packet handler
++ *	@pt: packet type declaration
++ * 
++ *	Remove a protocol handler that was previously added to the kernel
++ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
++ *	from the kernel lists and can be freed or reused once this function
++ *	returns.
++ */
++ 
++void dev_remove_pack(struct packet_type *pt)
++{
++	struct packet_type **pt1;
++
++	br_write_lock_bh(BR_NETPROTO_LOCK);
++
++	if (pt->type == htons(ETH_P_ALL)) {
++		netdev_nit--;
++		pt1=&ptype_all;
++	} else {
++		pt1=&ptype_base[ntohs(pt->type)&15];
++	}
++
++	for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
++		if (pt == (*pt1)) {
++			*pt1 = pt->next;
++#ifdef CONFIG_NET_FASTROUTE
++			if (pt->data)
++				netdev_fastroute_obstacles--;
++#endif
++			br_write_unlock_bh(BR_NETPROTO_LOCK);
++			return;
++		}
++	}
++	br_write_unlock_bh(BR_NETPROTO_LOCK);
++	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
++}
++
++/******************************************************************************
++
++		      Device Boot-time Settings Routines
++
++*******************************************************************************/
++
++/* Boot time configuration table */
++static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
++
++/**
++ *	netdev_boot_setup_add	- add new setup entry
++ *	@name: name of the device
++ *	@map: configured settings for the device
++ *
++ *	Adds new setup entry to the dev_boot_setup list.  The function
++ *	returns 0 on error and 1 on success.  This is a generic routine to
++ *	all netdevices.
++ */
++int netdev_boot_setup_add(char *name, struct ifmap *map)
++{
++	struct netdev_boot_setup *s;
++	int i;
++
++	s = dev_boot_setup;
++	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
++			memset(s[i].name, 0, sizeof(s[i].name));
++			strcpy(s[i].name, name);
++			memcpy(&s[i].map, map, sizeof(s[i].map));
++			break;
++		}
++	}
++
++	if (i >= NETDEV_BOOT_SETUP_MAX)
++		return 0;
++
++	return 1;
++}
++
++/**
++ *	netdev_boot_setup_check	- check boot time settings
++ *	@dev: the netdevice
++ *
++ * 	Check boot time settings for the device.
++ *	The found settings are set for the device to be used
++ *	later in the device probing.
++ *	Returns 0 if no settings found, 1 if they are.
++ */
++int netdev_boot_setup_check(struct net_device *dev)
++{
++	struct netdev_boot_setup *s;
++	int i;
++
++	s = dev_boot_setup;
++	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
++		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
++			dev->irq 	= s[i].map.irq;
++			dev->base_addr 	= s[i].map.base_addr;
++			dev->mem_start 	= s[i].map.mem_start;
++			dev->mem_end 	= s[i].map.mem_end;
++			return 1;
++		}
++	}
++	return 0;
++}
++
++/*
++ * Saves at boot time configured settings for any netdevice.
++ */
++int __init netdev_boot_setup(char *str)
++{
++	int ints[5];
++	struct ifmap map;
++
++	str = get_options(str, ARRAY_SIZE(ints), ints);
++	if (!str || !*str)
++		return 0;
++
++	/* Save settings */
++	memset(&map, 0, sizeof(map));
++	if (ints[0] > 0)
++		map.irq = ints[1];
++	if (ints[0] > 1)
++		map.base_addr = ints[2];
++	if (ints[0] > 2)
++		map.mem_start = ints[3];
++	if (ints[0] > 3)
++		map.mem_end = ints[4];
++
++	/* Add new entry to the list */	
++	return netdev_boot_setup_add(str, &map);
++}
++
++__setup("netdev=", netdev_boot_setup);
++
++/*****************************************************************************************
++
++			    Device Interface Subroutines
++
++******************************************************************************************/
++
++/**
++ *	__dev_get_by_name	- find a device by its name 
++ *	@name: name to find
++ *
++ *	Find an interface by name. Must be called under RTNL semaphore
++ *	or @dev_base_lock. If the name is found a pointer to the device
++ *	is returned. If the name is not found then %NULL is returned. The
++ *	reference counters are not incremented so the caller must be
++ *	careful with locks.
++ */
++ 
++
++struct net_device *__dev_get_by_name(const char *name)
++{
++	struct net_device *dev;
++
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
++			return dev;
++	}
++	return NULL;
++}
++
++/**
++ *	dev_get_by_name		- find a device by its name
++ *	@name: name to find
++ *
++ *	Find an interface by name. This can be called from any 
++ *	context and does its own locking. The returned handle has
++ *	the usage count incremented and the caller must use dev_put() to
++ *	release it when it is no longer needed. %NULL is returned if no
++ *	matching device is found.
++ */
++
++struct net_device *dev_get_by_name(const char *name)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_name(name);
++	if (dev)
++		dev_hold(dev);
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/* 
++   Return value is changed to int to prevent illegal usage in future.
++   It is still legal to use to check for device existence.
++
++   User should understand, that the result returned by this function
++   is meaningless, if it was not issued under rtnl semaphore.
++ */
++
++/**
++ *	dev_get	-	test if a device exists
++ *	@name:	name to test for
++ *
++ *	Test if a name exists. Returns true if the name is found. In order
++ *	to be sure the name is not allocated or removed during the test the
++ *	caller must hold the rtnl semaphore.
++ *
++ *	This function primarily exists for back compatibility with older
++ *	drivers. 
++ */
++ 
++int dev_get(const char *name)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_name(name);
++	read_unlock(&dev_base_lock);
++	return dev != NULL;
++}
++
++/**
++ *	__dev_get_by_index - find a device by its ifindex
++ *	@ifindex: index of device
++ *
++ *	Search for an interface by index. Returns %NULL if the device
++ *	is not found or a pointer to the device. The device has not
++ *	had its reference counter increased so the caller must be careful
++ *	about locking. The caller must hold either the RTNL semaphore
++ *	or @dev_base_lock.
++ */
++
++struct net_device * __dev_get_by_index(int ifindex)
++{
++	struct net_device *dev;
++
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		if (dev->ifindex == ifindex)
++			return dev;
++	}
++	return NULL;
++}
++
++
++/**
++ *	dev_get_by_index - find a device by its ifindex
++ *	@ifindex: index of device
++ *
++ *	Search for an interface by index. Returns NULL if the device
++ *	is not found or a pointer to the device. The device returned has 
++ *	had a reference added and the pointer is safe until the user calls
++ *	dev_put to indicate they have finished with it.
++ */
++
++struct net_device * dev_get_by_index(int ifindex)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_index(ifindex);
++	if (dev)
++		dev_hold(dev);
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/**
++ *	dev_getbyhwaddr - find a device by its hardware address
++ *	@type: media type of device
++ *	@ha: hardware address
++ *
++ *	Search for an interface by MAC address. Returns NULL if the device
++ *	is not found or a pointer to the device. The caller must hold the
++ *	rtnl semaphore. The returned device has not had its ref count increased
++ *	and the caller must therefore be careful about locking
++ *
++ *	BUGS:
++ *	If the API was consistent this would be __dev_get_by_hwaddr
++ */
++
++struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
++{
++	struct net_device *dev;
++
++	ASSERT_RTNL();
++
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		if (dev->type == type &&
++		    memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
++			return dev;
++	}
++	return NULL;
++}
++
++/**
++ *	dev_get_by_flags - find any device with given flags
++ *	@if_flags: IFF_* values
++ *	@mask: bitmask of bits in if_flags to check
++ *
++ *	Search for any interface with the given flags. Returns NULL if a device
++ *	is not found or a pointer to the device. The device returned has 
++ *	had a reference added and the pointer is safe until the user calls
++ *	dev_put to indicate they have finished with it.
++ */
++
++struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_flags(if_flags, mask);
++	if (dev)
++		dev_hold(dev);
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/**
++ *	__dev_get_by_flags - find any device with given flags
++ *	@if_flags: IFF_* values
++ *	@mask: bitmask of bits in if_flags to check
++ *
++ *	Search for any interface with the given flags. Returns NULL if a device
++ *	is not found or a pointer to the device. The caller must hold either
++ *	the RTNL semaphore or @dev_base_lock.
++ */
++
++struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
++{
++	struct net_device *dev;
++
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		if (((dev->flags ^ if_flags) & mask) == 0)
++			return dev;
++	}
++	return NULL;
++}
++
++/**
++ *	dev_alloc_name - allocate a name for a device
++ *	@dev: device 
++ *	@name: name format string
++ *
++ *	Passed a format string - eg "lt%d" it will try and find a suitable
++ *	id. Not efficient for many devices, not called a lot. The caller
++ *	must hold the dev_base or rtnl lock while allocating the name and
++ *	adding the device in order to avoid duplicates. Returns the number
++ *	of the unit assigned or a negative errno code.
++ */
++
++int dev_alloc_name(struct net_device *dev, const char *name)
++{
++	int i;
++	char buf[32];
++	char *p;
++
++	/*
++	 * Verify the string as this thing may have come from
++	 * the user.  There must be either one "%d" and no other "%"
++	 * characters, or no "%" characters at all.
++	 */
++	p = strchr(name, '%');
++	if (p && (p[1] != 'd' || strchr(p+2, '%')))
++		return -EINVAL;
++
++	/*
++	 * If you need over 100 please also fix the algorithm...
++	 */
++	for (i = 0; i < 100; i++) {
++		snprintf(buf,sizeof(buf),name,i);
++		if (__dev_get_by_name(buf) == NULL) {
++			strcpy(dev->name, buf);
++			return i;
++		}
++	}
++	return -ENFILE;	/* Over 100 of the things .. bail out! */
++}
++
++/**
++ *	dev_alloc - allocate a network device and name
++ *	@name: name format string
++ *	@err: error return pointer
++ *
++ *	Passed a format string, eg. "lt%d", it will allocate a network device
++ *	and space for the name. %NULL is returned if no memory is available.
++ *	If the allocation succeeds then the name is assigned and the 
++ *	device pointer returned. %NULL is returned if the name allocation
++ *	failed. The cause of an error is returned as a negative errno code
++ *	in the variable @err points to.
++ *
++ *	The caller must hold the @dev_base or RTNL locks when doing this in
++ *	order to avoid duplicate name allocations.
++ */
++
++struct net_device *dev_alloc(const char *name, int *err)
++{
++	struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
++	if (dev == NULL) {
++		*err = -ENOBUFS;
++		return NULL;
++	}
++	memset(dev, 0, sizeof(struct net_device));
++	*err = dev_alloc_name(dev, name);
++	if (*err < 0) {
++		kfree(dev);
++		return NULL;
++	}
++	return dev;
++}
++
++/**
++ *	netdev_state_change - device changes state
++ *	@dev: device to cause notification
++ *
++ *	Called to indicate a device has changed state. This function calls
++ *	the notifier chains for netdev_chain and sends a NEWLINK message
++ *	to the routing socket.
++ */
++ 
++void netdev_state_change(struct net_device *dev)
++{
++	if (dev->flags&IFF_UP) {
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
++		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
++	}
++}
++
++
++#ifdef CONFIG_KMOD
++
++/**
++ *	dev_load 	- load a network module
++ *	@name: name of interface
++ *
++ *	If a network interface is not present and the process has suitable
++ *	privileges this function loads the module. If module loading is not
++ *	available in this kernel then it becomes a nop.
++ */
++
++void dev_load(const char *name)
++{
++	if (!dev_get(name) && capable(CAP_SYS_MODULE))
++		request_module(name);
++}
++
++#else
++
++extern inline void dev_load(const char *unused){;}
++
++#endif
++
++static int default_rebuild_header(struct sk_buff *skb)
++{
++	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
++	kfree_skb(skb);
++	return 1;
++}
++
++/**
++ *	dev_open	- prepare an interface for use. 
++ *	@dev:	device to open
++ *
++ *	Takes a device from down to up state. The device's private open
++ *	function is invoked and then the multicast lists are loaded. Finally
++ *	the device is moved into the up state and a %NETDEV_UP message is
++ *	sent to the netdev notifier chain.
++ *
++ *	Calling this function on an active interface is a nop. On a failure
++ *	a negative errno code is returned.
++ */
++ 
++int dev_open(struct net_device *dev)
++{
++	int ret = 0;
++
++	/*
++	 *	Is it already up?
++	 */
++
++	if (dev->flags&IFF_UP)
++		return 0;
++
++	/*
++	 *	Is it even present?
++	 */
++	if (!netif_device_present(dev))
++		return -ENODEV;
++
++	/*
++	 *	Call device private open method
++	 */
++	if (try_inc_mod_count(dev->owner)) {
++		set_bit(__LINK_STATE_START, &dev->state);
++		if (dev->open) {
++			ret = dev->open(dev);
++			if (ret != 0) {
++				clear_bit(__LINK_STATE_START, &dev->state);
++				if (dev->owner)
++					__MOD_DEC_USE_COUNT(dev->owner);
++			}
++		}
++	} else {
++		ret = -ENODEV;
++	}
++
++	/*
++	 *	If it went open OK then:
++	 */
++	 
++	if (ret == 0) 
++	{
++		/*
++		 *	Set the flags.
++		 */
++		dev->flags |= IFF_UP;
++
++		/*
++		 *	Initialize multicasting status 
++		 */
++		dev_mc_upload(dev);
++
++		/*
++		 *	Wakeup transmit queue engine
++		 */
++		dev_activate(dev);
++
++		/*
++		 *	... and announce new interface.
++		 */
++		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
++	}
++	return(ret);
++}
++
++#ifdef CONFIG_NET_FASTROUTE
++
++static void dev_do_clear_fastroute(struct net_device *dev)
++{
++	if (dev->accept_fastpath) {
++		int i;
++
++		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
++			struct dst_entry *dst;
++
++			write_lock_irq(&dev->fastpath_lock);
++			dst = dev->fastpath[i];
++			dev->fastpath[i] = NULL;
++			write_unlock_irq(&dev->fastpath_lock);
++
++			dst_release(dst);
++		}
++	}
++}
++
++void dev_clear_fastroute(struct net_device *dev)
++{
++	if (dev) {
++		dev_do_clear_fastroute(dev);
++	} else {
++		read_lock(&dev_base_lock);
++		for (dev = dev_base; dev; dev = dev->next)
++			dev_do_clear_fastroute(dev);
++		read_unlock(&dev_base_lock);
++	}
++}
++#endif
++
++/**
++ *	dev_close - shutdown an interface.
++ *	@dev: device to shutdown
++ *
++ *	This function moves an active device into down state. A 
++ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
++ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
++ *	chain.
++ */
++ 
++int dev_close(struct net_device *dev)
++{
++	if (!(dev->flags&IFF_UP))
++		return 0;
++
++	/*
++	 *	Tell people we are going down, so that they can
++	 *	prepare to death, when device is still operating.
++	 */
++	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
++
++	dev_deactivate(dev);
++
++	clear_bit(__LINK_STATE_START, &dev->state);
++
++	/* Synchronize to scheduled poll. We cannot touch poll list,
++	 * it can be even on different cpu. So just clear netif_running(),
++	 * and wait when poll really will happen. Actually, the best place
++	 * for this is inside dev->stop() after device stopped its irq
++	 * engine, but this requires more changes in devices. */
++
++	smp_mb__after_clear_bit(); /* Commit netif_running(). */
++	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
++		/* No hurry. */
++		current->state = TASK_INTERRUPTIBLE;
++		schedule_timeout(1);
++	}
++
++	/*
++	 *	Call the device specific close. This cannot fail.
++	 *	Only if device is UP
++	 *
++	 *	We allow it to be called even after a DETACH hot-plug
++	 *	event.
++	 */
++	 
++	if (dev->stop)
++		dev->stop(dev);
++
++	/*
++	 *	Device is now down.
++	 */
++
++	dev->flags &= ~IFF_UP;
++#ifdef CONFIG_NET_FASTROUTE
++	dev_clear_fastroute(dev);
++#endif
++
++	/*
++	 *	Tell people we are down
++	 */
++	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
++
++	/*
++	 * Drop the module refcount
++	 */
++	if (dev->owner)
++		__MOD_DEC_USE_COUNT(dev->owner);
++
++	return(0);
++}
++
++
++/*
++ *	Device change register/unregister. These are not inline or static
++ *	as we export them to the world.
++ */
++ 
++/**
++ *	register_netdevice_notifier - register a network notifier block
++ *	@nb: notifier
++ *
++ *	Register a notifier to be called when network device events occur.
++ *	The notifier passed is linked into the kernel structures and must
++ *	not be reused until it has been unregistered. A negative errno code
++ *	is returned on a failure.
++ */
++
++int register_netdevice_notifier(struct notifier_block *nb)
++{
++	return notifier_chain_register(&netdev_chain, nb);
++}
++
++/**
++ *	unregister_netdevice_notifier - unregister a network notifier block
++ *	@nb: notifier
++ *
++ *	Unregister a notifier previously registered by
++ *	register_netdevice_notifier(). The notifier is unlinked into the
++ *	kernel structures and may then be reused. A negative errno code
++ *	is returned on a failure.
++ */
++
++int unregister_netdevice_notifier(struct notifier_block *nb)
++{
++	return notifier_chain_unregister(&netdev_chain,nb);
++}
++
++/*
++ *	Support routine. Sends outgoing frames to any network
++ *	taps currently in use.
++ */
++
++void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct packet_type *ptype;
++	do_gettimeofday(&skb->stamp);
++
++	br_read_lock(BR_NETPROTO_LOCK);
++	for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) 
++	{
++		/* Never send packets back to the socket
++		 * they originated from - MvS (miquels@drinkel.ow.org)
++		 */
++		if ((ptype->dev == dev || !ptype->dev) &&
++			((struct sock *)ptype->data != skb->sk))
++		{
++			struct sk_buff *skb2;
++			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
++				break;
++
++			/* skb->nh should be correctly
++			   set by sender, so that the second statement is
++			   just protection against buggy protocols.
++			 */
++			skb2->mac.raw = skb2->data;
++
++			if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
++				if (net_ratelimit())
++					printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
++				skb2->nh.raw = skb2->data;
++			}
++
++			skb2->h.raw = skb2->nh.raw;
++			skb2->pkt_type = PACKET_OUTGOING;
++			ptype->func(skb2, skb->dev, ptype);
++		}
++	}
++	br_read_unlock(BR_NETPROTO_LOCK);
++}
++
++/* Calculate csum in the case, when packet is misrouted.
++ * If it failed by some reason, ignore and send skb with wrong
++ * checksum.
++ */
++struct sk_buff * skb_checksum_help(struct sk_buff *skb)
++{
++	int offset;
++	unsigned int csum;
++
++	offset = skb->h.raw - skb->data;
++	if (offset > (int)skb->len)
++		BUG();
++	csum = skb_checksum(skb, offset, skb->len-offset, 0);
++
++	offset = skb->tail - skb->h.raw;
++	if (offset <= 0)
++		BUG();
++	if (skb->csum+2 > offset)
++		BUG();
++
++	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
++	skb->ip_summed = CHECKSUM_NONE;
++	return skb;
++}
++
++#ifdef CONFIG_HIGHMEM
++/* Actually, we should eliminate this check as soon as we know, that:
++ * 1. IOMMU is present and allows to map all the memory.
++ * 2. No high memory really exists on this machine.
++ */
++
++static inline int
++illegal_highdma(struct net_device *dev, struct sk_buff *skb)
++{
++	int i;
++
++	if (dev->features&NETIF_F_HIGHDMA)
++		return 0;
++
++	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
++		if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
++			return 1;
++
++	return 0;
++}
++#else
++#define illegal_highdma(dev, skb)	(0)
++#endif
++
++/**
++ *	dev_queue_xmit - transmit a buffer
++ *	@skb: buffer to transmit
++ *	
++ *	Queue a buffer for transmission to a network device. The caller must
++ *	have set the device and priority and built the buffer before calling this 
++ *	function. The function can be called from an interrupt.
++ *
++ *	A negative errno code is returned on a failure. A success does not
++ *	guarantee the frame will be transmitted as it may be dropped due
++ *	to congestion or traffic shaping.
++ */
++
++int dev_queue_xmit(struct sk_buff *skb)
++{
++	struct net_device *dev = skb->dev;
++	struct Qdisc  *q;
++
++	if (skb_shinfo(skb)->frag_list &&
++	    !(dev->features&NETIF_F_FRAGLIST) &&
++	    skb_linearize(skb, GFP_ATOMIC) != 0) {
++		kfree_skb(skb);
++		return -ENOMEM;
++	}
++
++	/* Fragmented skb is linearized if device does not support SG,
++	 * or if at least one of fragments is in highmem and device
++	 * does not support DMA from it.
++	 */
++	if (skb_shinfo(skb)->nr_frags &&
++	    (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
++	    skb_linearize(skb, GFP_ATOMIC) != 0) {
++		kfree_skb(skb);
++		return -ENOMEM;
++	}
++
++	/* If packet is not checksummed and device does not support
++	 * checksumming for this protocol, complete checksumming here.
++	 */
++	if (skb->ip_summed == CHECKSUM_HW &&
++	    (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
++	     (!(dev->features&NETIF_F_IP_CSUM) ||
++	      skb->protocol != htons(ETH_P_IP)))) {
++		if ((skb = skb_checksum_help(skb)) == NULL)
++			return -ENOMEM;
++	}
++
++	/* Grab device queue */
++	spin_lock_bh(&dev->queue_lock);
++	q = dev->qdisc;
++	if (q->enqueue) {
++		int ret = q->enqueue(skb, q);
++
++		qdisc_run(dev);
++
++		spin_unlock_bh(&dev->queue_lock);
++		return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
++	}
++
++	/* The device has no queue. Common case for software devices:
++	   loopback, all the sorts of tunnels...
++
++	   Really, it is unlikely that xmit_lock protection is necessary here.
++	   (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
++	   However, it is possible, that they rely on protection
++	   made by us here.
++
++	   Check this and shot the lock. It is not prone from deadlocks.
++	   Either shot noqueue qdisc, it is even simpler 8)
++	 */
++	if (dev->flags&IFF_UP) {
++		int cpu = smp_processor_id();
++
++		if (dev->xmit_lock_owner != cpu) {
++			spin_unlock(&dev->queue_lock);
++			spin_lock(&dev->xmit_lock);
++			dev->xmit_lock_owner = cpu;
++
++			if (!netif_queue_stopped(dev)) {
++				if (netdev_nit)
++					dev_queue_xmit_nit(skb,dev);
++
++				if (dev->hard_start_xmit(skb, dev) == 0) {
++					dev->xmit_lock_owner = -1;
++					spin_unlock_bh(&dev->xmit_lock);
++					return 0;
++				}
++			}
++			dev->xmit_lock_owner = -1;
++			spin_unlock_bh(&dev->xmit_lock);
++			if (net_ratelimit())
++				printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name);
++			kfree_skb(skb);
++			return -ENETDOWN;
++		} else {
++			/* Recursion is detected! It is possible, unfortunately */
++			if (net_ratelimit())
++				printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
++		}
++	}
++	spin_unlock_bh(&dev->queue_lock);
++
++	kfree_skb(skb);
++	return -ENETDOWN;
++}
++
++
++/*=======================================================================
++			Receiver routines
++  =======================================================================*/
++
++int netdev_max_backlog = 300;
++int weight_p = 64;            /* old backlog weight */
++/* These numbers are selected based on intuition and some
++ * experimentatiom, if you have more scientific way of doing this
++ * please go ahead and fix things.
++ */
++int no_cong_thresh = 10;
++int no_cong = 20;
++int lo_cong = 100;
++int mod_cong = 290;
++
++struct netif_rx_stats netdev_rx_stat[NR_CPUS];
++
++
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++atomic_t netdev_dropping = ATOMIC_INIT(0);
++static unsigned long netdev_fc_mask = 1;
++unsigned long netdev_fc_xoff = 0;
++spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
++
++static struct
++{
++	void (*stimul)(struct net_device *);
++	struct net_device *dev;
++} netdev_fc_slots[BITS_PER_LONG];
++
++int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
++{
++	int bit = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&netdev_fc_lock, flags);
++	if (netdev_fc_mask != ~0UL) {
++		bit = ffz(netdev_fc_mask);
++		netdev_fc_slots[bit].stimul = stimul;
++		netdev_fc_slots[bit].dev = dev;
++		set_bit(bit, &netdev_fc_mask);
++		clear_bit(bit, &netdev_fc_xoff);
++	}
++	spin_unlock_irqrestore(&netdev_fc_lock, flags);
++	return bit;
++}
++
++void netdev_unregister_fc(int bit)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&netdev_fc_lock, flags);
++	if (bit > 0) {
++		netdev_fc_slots[bit].stimul = NULL;
++		netdev_fc_slots[bit].dev = NULL;
++		clear_bit(bit, &netdev_fc_mask);
++		clear_bit(bit, &netdev_fc_xoff);
++	}
++	spin_unlock_irqrestore(&netdev_fc_lock, flags);
++}
++
++static void netdev_wakeup(void)
++{
++	unsigned long xoff;
++
++	spin_lock(&netdev_fc_lock);
++	xoff = netdev_fc_xoff;
++	netdev_fc_xoff = 0;
++	while (xoff) {
++		int i = ffz(~xoff);
++		xoff &= ~(1<<i);
++		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
++	}
++	spin_unlock(&netdev_fc_lock);
++}
++#endif
++
++static void get_sample_stats(int cpu)
++{
++#ifdef RAND_LIE
++	unsigned long rd;
++	int rq;
++#endif
++	int blog = softnet_data[cpu].input_pkt_queue.qlen;
++	int avg_blog = softnet_data[cpu].avg_blog;
++
++	avg_blog = (avg_blog >> 1)+ (blog >> 1);
++
++	if (avg_blog > mod_cong) {
++		/* Above moderate congestion levels. */
++		softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
++#ifdef RAND_LIE
++		rd = net_random();
++		rq = rd % netdev_max_backlog;
++		if (rq < avg_blog) /* unlucky bastard */
++			softnet_data[cpu].cng_level = NET_RX_DROP;
++#endif
++	} else if (avg_blog > lo_cong) {
++		softnet_data[cpu].cng_level = NET_RX_CN_MOD;
++#ifdef RAND_LIE
++		rd = net_random();
++		rq = rd % netdev_max_backlog;
++			if (rq < avg_blog) /* unlucky bastard */
++				softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
++#endif
++	} else if (avg_blog > no_cong) 
++		softnet_data[cpu].cng_level = NET_RX_CN_LOW;
++	else  /* no congestion */
++		softnet_data[cpu].cng_level = NET_RX_SUCCESS;
++
++	softnet_data[cpu].avg_blog = avg_blog;
++}
++
++#ifdef OFFLINE_SAMPLE
++static void sample_queue(unsigned long dummy)
++{
++/* 10 ms 0r 1ms -- i dont care -- JHS */
++	int next_tick = 1;
++	int cpu = smp_processor_id();
++
++	get_sample_stats(cpu);
++	next_tick += jiffies;
++	mod_timer(&samp_timer, next_tick);
++}
++#endif
++
++
++/**
++ *	netif_rx	-	post buffer to the network code
++ *	@skb: buffer to post
++ *
++ *	This function receives a packet from a device driver and queues it for
++ *	the upper (protocol) levels to process.  It always succeeds. The buffer
++ *	may be dropped during processing for congestion control or by the 
++ *	protocol layers.
++ *      
++ *	return values:
++ *	NET_RX_SUCCESS	(no congestion)           
++ *	NET_RX_CN_LOW     (low congestion) 
++ *	NET_RX_CN_MOD     (moderate congestion)
++ *	NET_RX_CN_HIGH    (high congestion) 
++ *	NET_RX_DROP    (packet was dropped)
++ *      
++ *      
++ */
++
++int netif_rx(struct sk_buff *skb)
++{
++	int this_cpu = smp_processor_id();
++	struct softnet_data *queue;
++	unsigned long flags;
++
++	if (skb->stamp.tv_sec == 0)
++		do_gettimeofday(&skb->stamp);
++
++	/* The code is rearranged so that the path is the most
++	   short when CPU is congested, but is still operating.
++	 */
++	queue = &softnet_data[this_cpu];
++
++	local_irq_save(flags);
++
++	netdev_rx_stat[this_cpu].total++;
++	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
++		if (queue->input_pkt_queue.qlen) {
++			if (queue->throttle)
++				goto drop;
++
++enqueue:
++			dev_hold(skb->dev);
++			__skb_queue_tail(&queue->input_pkt_queue,skb);
++			local_irq_restore(flags);
++#ifndef OFFLINE_SAMPLE
++			get_sample_stats(this_cpu);
++#endif
++			return queue->cng_level;
++		}
++
++		if (queue->throttle) {
++			queue->throttle = 0;
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++			if (atomic_dec_and_test(&netdev_dropping))
++				netdev_wakeup();
++#endif
++		}
++
++		netif_rx_schedule(&queue->blog_dev);
++		goto enqueue;
++	}
++
++	if (queue->throttle == 0) {
++		queue->throttle = 1;
++		netdev_rx_stat[this_cpu].throttled++;
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++		atomic_inc(&netdev_dropping);
++#endif
++	}
++
++drop:
++	netdev_rx_stat[this_cpu].dropped++;
++	local_irq_restore(flags);
++
++	kfree_skb(skb);
++	return NET_RX_DROP;
++}
++
++/* Deliver skb to an old protocol, which is not threaded well
++   or which do not understand shared skbs.
++ */
++static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
++{
++	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
++	int ret = NET_RX_DROP;
++
++
++	if (!last) {
++		skb = skb_clone(skb, GFP_ATOMIC);
++		if (skb == NULL)
++			return ret;
++	}
++	if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
++		kfree_skb(skb);
++		return ret;
++	}
++
++	/* The assumption (correct one) is that old protocols
++	   did not depened on BHs different of NET_BH and TIMER_BH.
++	 */
++
++	/* Emulate NET_BH with special spinlock */
++	spin_lock(&net_bh_lock);
++
++	/* Disable timers and wait for all timers completion */
++	tasklet_disable(bh_task_vec+TIMER_BH);
++
++	ret = pt->func(skb, skb->dev, pt);
++
++	tasklet_hi_enable(bh_task_vec+TIMER_BH);
++	spin_unlock(&net_bh_lock);
++	return ret;
++}
++
++static __inline__ void skb_bond(struct sk_buff *skb)
++{
++	struct net_device *dev = skb->dev;
++
++	if (dev->master) {
++		skb->real_dev = skb->dev;
++		skb->dev = dev->master;
++	}
++}
++
++static void net_tx_action(struct softirq_action *h)
++{
++	int cpu = smp_processor_id();
++
++	if (softnet_data[cpu].completion_queue) {
++		struct sk_buff *clist;
++
++		local_irq_disable();
++		clist = softnet_data[cpu].completion_queue;
++		softnet_data[cpu].completion_queue = NULL;
++		local_irq_enable();
++
++		while (clist != NULL) {
++			struct sk_buff *skb = clist;
++			clist = clist->next;
++
++			BUG_TRAP(atomic_read(&skb->users) == 0);
++			__kfree_skb(skb);
++		}
++	}
++
++	if (softnet_data[cpu].output_queue) {
++		struct net_device *head;
++
++		local_irq_disable();
++		head = softnet_data[cpu].output_queue;
++		softnet_data[cpu].output_queue = NULL;
++		local_irq_enable();
++
++		while (head != NULL) {
++			struct net_device *dev = head;
++			head = head->next_sched;
++
++			smp_mb__before_clear_bit();
++			clear_bit(__LINK_STATE_SCHED, &dev->state);
++
++			if (spin_trylock(&dev->queue_lock)) {
++				qdisc_run(dev);
++				spin_unlock(&dev->queue_lock);
++			} else {
++				netif_schedule(dev);
++			}
++		}
++	}
++}
++
++
++#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
++void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
++#endif
++
++static __inline__ int handle_bridge(struct sk_buff *skb,
++				     struct packet_type *pt_prev)
++{
++	int ret = NET_RX_DROP;
++
++	if (pt_prev) {
++		if (!pt_prev->data)
++			ret = deliver_to_old_ones(pt_prev, skb, 0);
++		else {
++			atomic_inc(&skb->users);
++			ret = pt_prev->func(skb, skb->dev, pt_prev);
++		}
++	}
++
++	br_handle_frame_hook(skb);
++	return ret;
++}
++
++
++#ifdef CONFIG_NET_DIVERT
++static inline int handle_diverter(struct sk_buff *skb)
++{
++	/* if diversion is supported on device, then divert */
++	if (skb->dev->divert && skb->dev->divert->divert)
++		divert_frame(skb);
++	return 0;
++}
++#endif   /* CONFIG_NET_DIVERT */
++
++int netif_receive_skb(struct sk_buff *skb)
++{
++	struct packet_type *ptype, *pt_prev;
++	int ret = NET_RX_DROP;
++	unsigned short type;
++
++	if (skb->stamp.tv_sec == 0)
++		do_gettimeofday(&skb->stamp);
++
++	skb_bond(skb);
++
++	netdev_rx_stat[smp_processor_id()].total++;
++
++#ifdef CONFIG_NET_FASTROUTE
++	if (skb->pkt_type == PACKET_FASTROUTE) {
++		netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
++		return dev_queue_xmit(skb);
++	}
++#endif
++
++	skb->h.raw = skb->nh.raw = skb->data;
++
++	pt_prev = NULL;
++	for (ptype = ptype_all; ptype; ptype = ptype->next) {
++		if (!ptype->dev || ptype->dev == skb->dev) {
++			if (pt_prev) {
++				if (!pt_prev->data) {
++					ret = deliver_to_old_ones(pt_prev, skb, 0);
++				} else {
++					atomic_inc(&skb->users);
++					ret = pt_prev->func(skb, skb->dev, pt_prev);
++				}
++			}
++			pt_prev = ptype;
++		}
++	}
++
++#ifdef CONFIG_NET_DIVERT
++	if (skb->dev->divert && skb->dev->divert->divert)
++		ret = handle_diverter(skb);
++#endif /* CONFIG_NET_DIVERT */
++			
++#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
++	if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL &&
++	    skb->pkt_type != PACKET_LOOPBACK) {
++		return handle_bridge(skb, pt_prev);
++	}
++#endif
++
++	type = skb->protocol;
++	for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
++		if (ptype->type == type &&
++		    (!ptype->dev || ptype->dev == skb->dev)) {
++			if (pt_prev) {
++				if (!pt_prev->data) {
++					ret = deliver_to_old_ones(pt_prev, skb, 0);
++				} else {
++					atomic_inc(&skb->users);
++					ret = pt_prev->func(skb, skb->dev, pt_prev);
++				}
++			}
++			pt_prev = ptype;
++		}
++	}
++
++	if (pt_prev) {
++		if (!pt_prev->data) {
++			ret = deliver_to_old_ones(pt_prev, skb, 1);
++		} else {
++			ret = pt_prev->func(skb, skb->dev, pt_prev);
++		}
++	} else {
++		kfree_skb(skb);
++		/* Jamal, now you will not able to escape explaining
++		 * me how you were going to use this. :-)
++		 */
++		ret = NET_RX_DROP;
++	}
++
++	return ret;
++}
++
++static int process_backlog(struct net_device *backlog_dev, int *budget)
++{
++	int work = 0;
++	int quota = min(backlog_dev->quota, *budget);
++	int this_cpu = smp_processor_id();
++	struct softnet_data *queue = &softnet_data[this_cpu];
++	unsigned long start_time = jiffies;
++
++	for (;;) {
++		struct sk_buff *skb;
++		struct net_device *dev;
++
++		local_irq_disable();
++		skb = __skb_dequeue(&queue->input_pkt_queue);
++		if (skb == NULL)
++			goto job_done;
++		local_irq_enable();
++
++		dev = skb->dev;
++
++		netif_receive_skb(skb);
++
++		dev_put(dev);
++
++		work++;
++
++		if (work >= quota || jiffies - start_time > 1)
++			break;
++
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++		if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
++			queue->throttle = 0;
++			if (atomic_dec_and_test(&netdev_dropping)) {
++				netdev_wakeup();
++				break;
++			}
++		}
++#endif
++	}
++
++	backlog_dev->quota -= work;
++	*budget -= work;
++	return -1;
++
++job_done:
++	backlog_dev->quota -= work;
++	*budget -= work;
++
++	list_del(&backlog_dev->poll_list);
++	smp_mb__before_clear_bit();
++	netif_poll_enable(backlog_dev);
++
++	if (queue->throttle) {
++		queue->throttle = 0;
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++		if (atomic_dec_and_test(&netdev_dropping))
++			netdev_wakeup();
++#endif
++	}
++	local_irq_enable();
++	return 0;
++}
++
++static void net_rx_action(struct softirq_action *h)
++{
++	int this_cpu = smp_processor_id();
++	struct softnet_data *queue = &softnet_data[this_cpu];
++	unsigned long start_time = jiffies;
++	int budget = netdev_max_backlog;
++
++	br_read_lock(BR_NETPROTO_LOCK);
++	local_irq_disable();
++
++	while (!list_empty(&queue->poll_list)) {
++		struct net_device *dev;
++
++		if (budget <= 0 || jiffies - start_time > 1)
++			goto softnet_break;
++
++		local_irq_enable();
++
++		dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
++
++		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
++			local_irq_disable();
++			list_del(&dev->poll_list);
++			list_add_tail(&dev->poll_list, &queue->poll_list);
++			if (dev->quota < 0)
++				dev->quota += dev->weight;
++			else
++				dev->quota = dev->weight;
++		} else {
++			dev_put(dev);
++			local_irq_disable();
++		}
++	}
++
++	local_irq_enable();
++	br_read_unlock(BR_NETPROTO_LOCK);
++	return;
++
++softnet_break:
++	netdev_rx_stat[this_cpu].time_squeeze++;
++	__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
++
++	local_irq_enable();
++	br_read_unlock(BR_NETPROTO_LOCK);
++}
++
++static gifconf_func_t * gifconf_list [NPROTO];
++
++/**
++ *	register_gifconf	-	register a SIOCGIF handler
++ *	@family: Address family
++ *	@gifconf: Function handler
++ *
++ *	Register protocol dependent address dumping routines. The handler
++ *	that is passed must not be freed or reused until it has been replaced
++ *	by another handler.
++ */
++ 
++int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
++{
++	if (family>=NPROTO)
++		return -EINVAL;
++	gifconf_list[family] = gifconf;
++	return 0;
++}
++
++
++/*
++ *	Map an interface index to its name (SIOCGIFNAME)
++ */
++
++/*
++ *	We need this ioctl for efficient implementation of the
++ *	if_indextoname() function required by the IPv6 API.  Without
++ *	it, we would have to search all the interfaces to find a
++ *	match.  --pb
++ */
++
++static int dev_ifname(struct ifreq *arg)
++{
++	struct net_device *dev;
++	struct ifreq ifr;
++
++	/*
++	 *	Fetch the caller's info block. 
++	 */
++	
++	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++		return -EFAULT;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_index(ifr.ifr_ifindex);
++	if (!dev) {
++		read_unlock(&dev_base_lock);
++		return -ENODEV;
++	}
++
++	strcpy(ifr.ifr_name, dev->name);
++	read_unlock(&dev_base_lock);
++
++	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++		return -EFAULT;
++	return 0;
++}
++
++/*
++ *	Perform a SIOCGIFCONF call. This structure will change
++ *	size eventually, and there is nothing I can do about it.
++ *	Thus we will need a 'compatibility mode'.
++ */
++
++static int dev_ifconf(char *arg)
++{
++	struct ifconf ifc;
++	struct net_device *dev;
++	char *pos;
++	int len;
++	int total;
++	int i;
++
++	/*
++	 *	Fetch the caller's info block. 
++	 */
++	
++	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
++		return -EFAULT;
++
++	pos = ifc.ifc_buf;
++	len = ifc.ifc_len;
++
++	/*
++	 *	Loop over the interfaces, and write an info block for each. 
++	 */
++
++	total = 0;
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		for (i=0; i<NPROTO; i++) {
++			if (gifconf_list[i]) {
++				int done;
++				if (pos==NULL) {
++					done = gifconf_list[i](dev, NULL, 0);
++				} else {
++					done = gifconf_list[i](dev, pos+total, len-total);
++				}
++				if (done<0) {
++					return -EFAULT;
++				}
++				total += done;
++			}
++		}
++  	}
++
++	/*
++	 *	All done.  Write the updated control block back to the caller. 
++	 */
++	ifc.ifc_len = total;
++
++	if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
++		return -EFAULT; 
++
++	/* 
++	 * 	Both BSD and Solaris return 0 here, so we do too.
++	 */
++	return 0;
++}
++
++/*
++ *	This is invoked by the /proc filesystem handler to display a device
++ *	in detail.
++ */
++
++#ifdef CONFIG_PROC_FS
++
++static int sprintf_stats(char *buffer, struct net_device *dev)
++{
++	struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
++	int size;
++	
++	if (stats)
++		size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
++ 		   dev->name,
++		   stats->rx_bytes,
++		   stats->rx_packets, stats->rx_errors,
++		   stats->rx_dropped + stats->rx_missed_errors,
++		   stats->rx_fifo_errors,
++		   stats->rx_length_errors + stats->rx_over_errors
++		   + stats->rx_crc_errors + stats->rx_frame_errors,
++		   stats->rx_compressed, stats->multicast,
++		   stats->tx_bytes,
++		   stats->tx_packets, stats->tx_errors, stats->tx_dropped,
++		   stats->tx_fifo_errors, stats->collisions,
++		   stats->tx_carrier_errors + stats->tx_aborted_errors
++		   + stats->tx_window_errors + stats->tx_heartbeat_errors,
++		   stats->tx_compressed);
++	else
++		size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
++
++	return size;
++}
++
++/*
++ *	Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
++ *	to create /proc/net/dev
++ */
++ 
++static int dev_get_info(char *buffer, char **start, off_t offset, int length)
++{
++	int len = 0;
++	off_t begin = 0;
++	off_t pos = 0;
++	int size;
++	struct net_device *dev;
++
++
++	size = sprintf(buffer, 
++		"Inter-|   Receive                                                |  Transmit\n"
++		" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n");
++	
++	pos += size;
++	len += size;
++	
++
++	read_lock(&dev_base_lock);
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		size = sprintf_stats(buffer+len, dev);
++		len += size;
++		pos = begin + len;
++				
++		if (pos < offset) {
++			len = 0;
++			begin = pos;
++		}
++		if (pos > offset + length)
++			break;
++	}
++	read_unlock(&dev_base_lock);
++
++	*start = buffer + (offset - begin);	/* Start of wanted data */
++	len -= (offset - begin);		/* Start slop */
++	if (len > length)
++		len = length;			/* Ending slop */
++	if (len < 0)
++		len = 0;
++	return len;
++}
++
++static int dev_proc_stats(char *buffer, char **start, off_t offset,
++			  int length, int *eof, void *data)
++{
++	int i, lcpu;
++	int len=0;
++
++	for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
++		i = cpu_logical_map(lcpu);
++		len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
++			       netdev_rx_stat[i].total,
++			       netdev_rx_stat[i].dropped,
++			       netdev_rx_stat[i].time_squeeze,
++			       netdev_rx_stat[i].throttled,
++			       netdev_rx_stat[i].fastroute_hit,
++			       netdev_rx_stat[i].fastroute_success,
++			       netdev_rx_stat[i].fastroute_defer,
++			       netdev_rx_stat[i].fastroute_deferred_out,
++#if 0
++			       netdev_rx_stat[i].fastroute_latency_reduction
++#else
++			       netdev_rx_stat[i].cpu_collision
++#endif
++			       );
++	}
++
++	len -= offset;
++
++	if (len > length)
++		len = length;
++	if (len < 0)
++		len = 0;
++
++	*start = buffer + offset;
++	*eof = 1;
++
++	return len;
++}
++
++#endif	/* CONFIG_PROC_FS */
++
++
++/**
++ *	netdev_set_master	-	set up master/slave pair
++ *	@slave: slave device
++ *	@master: new master device
++ *
++ *	Changes the master device of the slave. Pass %NULL to break the
++ *	bonding. The caller must hold the RTNL semaphore. On a failure
++ *	a negative errno code is returned. On success the reference counts
++ *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
++ *	function returns zero.
++ */
++ 
++int netdev_set_master(struct net_device *slave, struct net_device *master)
++{
++	struct net_device *old = slave->master;
++
++	ASSERT_RTNL();
++
++	if (master) {
++		if (old)
++			return -EBUSY;
++		dev_hold(master);
++	}
++
++	br_write_lock_bh(BR_NETPROTO_LOCK);
++	slave->master = master;
++	br_write_unlock_bh(BR_NETPROTO_LOCK);
++
++	if (old)
++		dev_put(old);
++
++	if (master)
++		slave->flags |= IFF_SLAVE;
++	else
++		slave->flags &= ~IFF_SLAVE;
++
++	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
++	return 0;
++}
++
++/**
++ *	dev_set_promiscuity	- update promiscuity count on a device
++ *	@dev: device
++ *	@inc: modifier
++ *
++ *	Add or remove promsicuity from a device. While the count in the device
++ *	remains above zero the interface remains promiscuous. Once it hits zero
++ *	the device reverts back to normal filtering operation. A negative inc
++ *	value is used to drop promiscuity on the device.
++ */
++ 
++void dev_set_promiscuity(struct net_device *dev, int inc)
++{
++	unsigned short old_flags = dev->flags;
++
++	dev->flags |= IFF_PROMISC;
++	if ((dev->promiscuity += inc) == 0)
++		dev->flags &= ~IFF_PROMISC;
++	if (dev->flags^old_flags) {
++#ifdef CONFIG_NET_FASTROUTE
++		if (dev->flags&IFF_PROMISC) {
++			netdev_fastroute_obstacles++;
++			dev_clear_fastroute(dev);
++		} else
++			netdev_fastroute_obstacles--;
++#endif
++		dev_mc_upload(dev);
++		printk(KERN_INFO "device %s %s promiscuous mode\n",
++		       dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
++	}
++}
++
++/**
++ *	dev_set_allmulti	- update allmulti count on a device
++ *	@dev: device
++ *	@inc: modifier
++ *
++ *	Add or remove reception of all multicast frames to a device. While the
++ *	count in the device remains above zero the interface remains listening
++ *	to all interfaces. Once it hits zero the device reverts back to normal
++ *	filtering operation. A negative @inc value is used to drop the counter
++ *	when releasing a resource needing all multicasts.
++ */
++
++void dev_set_allmulti(struct net_device *dev, int inc)
++{
++	unsigned short old_flags = dev->flags;
++
++	dev->flags |= IFF_ALLMULTI;
++	if ((dev->allmulti += inc) == 0)
++		dev->flags &= ~IFF_ALLMULTI;
++	if (dev->flags^old_flags)
++		dev_mc_upload(dev);
++}
++
++int dev_change_flags(struct net_device *dev, unsigned flags)
++{
++	int ret;
++	int old_flags = dev->flags;
++
++	/*
++	 *	Set the flags on our device.
++	 */
++
++	dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
++			       IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
++				       (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
++
++	/*
++	 *	Load in the correct multicast list now the flags have changed.
++	 */				
++
++	dev_mc_upload(dev);
++
++	/*
++	 *	Have we downed the interface. We handle IFF_UP ourselves
++	 *	according to user attempts to set it, rather than blindly
++	 *	setting it.
++	 */
++
++	ret = 0;
++	if ((old_flags^flags)&IFF_UP)	/* Bit is different  ? */
++	{
++		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
++
++		if (ret == 0) 
++			dev_mc_upload(dev);
++	}
++
++	if (dev->flags&IFF_UP &&
++	    ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
++
++	if ((flags^dev->gflags)&IFF_PROMISC) {
++		int inc = (flags&IFF_PROMISC) ? +1 : -1;
++		dev->gflags ^= IFF_PROMISC;
++		dev_set_promiscuity(dev, inc);
++	}
++
++	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
++	   is important. Some (broken) drivers set IFF_PROMISC, when
++	   IFF_ALLMULTI is requested not asking us and not reporting.
++	 */
++	if ((flags^dev->gflags)&IFF_ALLMULTI) {
++		int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
++		dev->gflags ^= IFF_ALLMULTI;
++		dev_set_allmulti(dev, inc);
++	}
++
++	if (old_flags^dev->flags)
++		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
++
++	return ret;
++}
++
++/*
++ *	Perform the SIOCxIFxxx calls. 
++ */
++ 
++static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
++{
++	struct net_device *dev;
++	int err;
++
++	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
++		return -ENODEV;
++
++	switch(cmd) 
++	{
++		case SIOCGIFFLAGS:	/* Get interface flags */
++			ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
++				|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
++			if (netif_running(dev) && netif_carrier_ok(dev))
++				ifr->ifr_flags |= IFF_RUNNING;
++			return 0;
++
++		case SIOCSIFFLAGS:	/* Set interface flags */
++			return dev_change_flags(dev, ifr->ifr_flags);
++		
++		case SIOCGIFMETRIC:	/* Get the metric on the interface (currently unused) */
++			ifr->ifr_metric = 0;
++			return 0;
++			
++		case SIOCSIFMETRIC:	/* Set the metric on the interface (currently unused) */
++			return -EOPNOTSUPP;
++	
++		case SIOCGIFMTU:	/* Get the MTU of a device */
++			ifr->ifr_mtu = dev->mtu;
++			return 0;
++	
++		case SIOCSIFMTU:	/* Set the MTU of a device */
++			if (ifr->ifr_mtu == dev->mtu)
++				return 0;
++
++			/*
++			 *	MTU must be positive.
++			 */
++			 
++			if (ifr->ifr_mtu<0)
++				return -EINVAL;
++
++			if (!netif_device_present(dev))
++				return -ENODEV;
++
++			if (dev->change_mtu)
++				err = dev->change_mtu(dev, ifr->ifr_mtu);
++			else {
++				dev->mtu = ifr->ifr_mtu;
++				err = 0;
++			}
++			if (!err && dev->flags&IFF_UP)
++				notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
++			return err;
++
++		case SIOCGIFHWADDR:
++			memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
++			ifr->ifr_hwaddr.sa_family=dev->type;
++			return 0;
++				
++		case SIOCSIFHWADDR:
++			if (dev->set_mac_address == NULL)
++				return -EOPNOTSUPP;
++			if (ifr->ifr_hwaddr.sa_family!=dev->type)
++				return -EINVAL;
++			if (!netif_device_present(dev))
++				return -ENODEV;
++			err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
++			if (!err)
++				notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
++			return err;
++			
++		case SIOCSIFHWBROADCAST:
++			if (ifr->ifr_hwaddr.sa_family!=dev->type)
++				return -EINVAL;
++			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
++			notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
++			return 0;
++
++		case SIOCGIFMAP:
++			ifr->ifr_map.mem_start=dev->mem_start;
++			ifr->ifr_map.mem_end=dev->mem_end;
++			ifr->ifr_map.base_addr=dev->base_addr;
++			ifr->ifr_map.irq=dev->irq;
++			ifr->ifr_map.dma=dev->dma;
++			ifr->ifr_map.port=dev->if_port;
++			return 0;
++			
++		case SIOCSIFMAP:
++			if (dev->set_config) {
++				if (!netif_device_present(dev))
++					return -ENODEV;
++				return dev->set_config(dev,&ifr->ifr_map);
++			}
++			return -EOPNOTSUPP;
++			
++		case SIOCADDMULTI:
++			if (dev->set_multicast_list == NULL ||
++			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
++				return -EINVAL;
++			if (!netif_device_present(dev))
++				return -ENODEV;
++			dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
++			return 0;
++
++		case SIOCDELMULTI:
++			if (dev->set_multicast_list == NULL ||
++			    ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
++				return -EINVAL;
++			if (!netif_device_present(dev))
++				return -ENODEV;
++			dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
++			return 0;
++
++		case SIOCGIFINDEX:
++			ifr->ifr_ifindex = dev->ifindex;
++			return 0;
++
++		case SIOCGIFTXQLEN:
++			ifr->ifr_qlen = dev->tx_queue_len;
++			return 0;
++
++		case SIOCSIFTXQLEN:
++			if (ifr->ifr_qlen<0)
++				return -EINVAL;
++			dev->tx_queue_len = ifr->ifr_qlen;
++			return 0;
++
++		case SIOCSIFNAME:
++			if (dev->flags&IFF_UP)
++				return -EBUSY;
++			/* Check if name contains a wildcard */
++			if (strchr(ifr->ifr_newname, '%')) {
++				char format[IFNAMSIZ + 1];
++				int ret;
++				memcpy(format, ifr->ifr_newname, IFNAMSIZ);
++				format[IFNAMSIZ-1] = 0;
++				/* Find a free name based on format.
++				 * dev_alloc_name() replaces "%d" with at max
++				 * 2 digits, so no name overflow. - Jean II */
++				ret = dev_alloc_name(dev, format);
++				if (ret < 0)
++					return ret;
++				/* Copy the new name back to caller. */
++				strncpy(ifr->ifr_newname, dev->name, IFNAMSIZ);
++			} else {
++				if (__dev_get_by_name(ifr->ifr_newname))
++					return -EEXIST;
++				memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
++				dev->name[IFNAMSIZ-1] = 0;
++			}
++			notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
++			return 0;
++
++		/*
++		 *	Unknown or private ioctl
++		 */
++
++		default:
++			if ((cmd >= SIOCDEVPRIVATE &&
++			    cmd <= SIOCDEVPRIVATE + 15) ||
++			    cmd == SIOCBONDENSLAVE ||
++			    cmd == SIOCBONDRELEASE ||
++			    cmd == SIOCBONDSETHWADDR ||
++			    cmd == SIOCBONDSLAVEINFOQUERY ||
++			    cmd == SIOCBONDINFOQUERY ||
++			    cmd == SIOCBONDCHANGEACTIVE ||
++			    cmd == SIOCGMIIPHY ||
++			    cmd == SIOCGMIIREG ||
++			    cmd == SIOCSMIIREG ||
++			    cmd == SIOCWANDEV) {
++				if (dev->do_ioctl) {
++					if (!netif_device_present(dev))
++						return -ENODEV;
++					return dev->do_ioctl(dev, ifr, cmd);
++				}
++				return -EOPNOTSUPP;
++			}
++
++	}
++	return -EINVAL;
++}
++
++/*
++ *	This function handles all "interface"-type I/O control requests. The actual
++ *	'doing' part of this is dev_ifsioc above.
++ */
++
++/**
++ *	dev_ioctl	-	network device ioctl
++ *	@cmd: command to issue
++ *	@arg: pointer to a struct ifreq in user space
++ *
++ *	Issue ioctl functions to devices. This is normally called by the
++ *	user space syscall interfaces but can sometimes be useful for 
++ *	other purposes. The return value is the return from the syscall if
++ *	positive or a negative errno code on error.
++ */
++
++int dev_ioctl(unsigned int cmd, void *arg)
++{
++	struct ifreq ifr;
++	int ret;
++	char *colon;
++
++	/* One special case: SIOCGIFCONF takes ifconf argument
++	   and requires shared lock, because it sleeps writing
++	   to user space.
++	 */
++	   
++	if (cmd == SIOCGIFCONF) {
++		rtnl_shlock();
++		ret = dev_ifconf((char *) arg);
++		rtnl_shunlock();
++		return ret;
++	}
++	if (cmd == SIOCGIFNAME) {
++		return dev_ifname((struct ifreq *)arg);
++	}
++
++	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++		return -EFAULT;
++
++	ifr.ifr_name[IFNAMSIZ-1] = 0;
++
++	colon = strchr(ifr.ifr_name, ':');
++	if (colon)
++		*colon = 0;
++
++	/*
++	 *	See which interface the caller is talking about. 
++	 */
++	 
++	switch(cmd) 
++	{
++		/*
++		 *	These ioctl calls:
++		 *	- can be done by all.
++		 *	- atomic and do not require locking.
++		 *	- return a value
++		 */
++		 
++		case SIOCGIFFLAGS:
++		case SIOCGIFMETRIC:
++		case SIOCGIFMTU:
++		case SIOCGIFHWADDR:
++		case SIOCGIFSLAVE:
++		case SIOCGIFMAP:
++		case SIOCGIFINDEX:
++		case SIOCGIFTXQLEN:
++			dev_load(ifr.ifr_name);
++			read_lock(&dev_base_lock);
++			ret = dev_ifsioc(&ifr, cmd);
++			read_unlock(&dev_base_lock);
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++					return -EFAULT;
++			}
++			return ret;
++
++		case SIOCETHTOOL:
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			ret = dev_ethtool(&ifr);
++			rtnl_unlock();
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr,
++						 sizeof(struct ifreq)))
++					ret = -EFAULT;
++			}
++			return ret;
++
++		/*
++		 *	These ioctl calls:
++		 *	- require superuser power.
++		 *	- require strict serialization.
++		 *	- return a value
++		 */
++		 
++		case SIOCSIFNAME:
++		case SIOCGMIIPHY:
++		case SIOCGMIIREG:
++			if (!capable(CAP_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			dev_probe_lock();
++			rtnl_lock();
++			ret = dev_ifsioc(&ifr, cmd);
++			rtnl_unlock();
++			dev_probe_unlock();
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++					return -EFAULT;
++			}
++			return ret;
++
++		/*
++		 *	These ioctl calls:
++		 *	- require superuser power.
++		 *	- require strict serialization.
++		 *	- do not return a value
++		 */
++		 
++		case SIOCSIFFLAGS:
++		case SIOCSIFMETRIC:
++		case SIOCSIFMTU:
++		case SIOCSIFMAP:
++		case SIOCSIFHWADDR:
++		case SIOCSIFSLAVE:
++		case SIOCADDMULTI:
++		case SIOCDELMULTI:
++		case SIOCSIFHWBROADCAST:
++		case SIOCSIFTXQLEN:
++		case SIOCSMIIREG:
++		case SIOCBONDENSLAVE:
++		case SIOCBONDRELEASE:
++		case SIOCBONDSETHWADDR:
++		case SIOCBONDSLAVEINFOQUERY:
++		case SIOCBONDINFOQUERY:
++		case SIOCBONDCHANGEACTIVE:
++			if (!capable(CAP_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			dev_probe_lock();
++			rtnl_lock();
++			ret = dev_ifsioc(&ifr, cmd);
++			rtnl_unlock();
++			dev_probe_unlock();
++			return ret;
++	
++		case SIOCGIFMEM:
++			/* Get the per device memory space. We can add this but currently
++			   do not support it */
++		case SIOCSIFMEM:
++			/* Set the per device memory buffer space. Not applicable in our case */
++		case SIOCSIFLINK:
++			return -EINVAL;
++
++		/*
++		 *	Unknown or private ioctl.
++		 */	
++		 
++		default:
++			if (cmd == SIOCWANDEV ||
++			    (cmd >= SIOCDEVPRIVATE &&
++			     cmd <= SIOCDEVPRIVATE + 15)) {
++				dev_load(ifr.ifr_name);
++				dev_probe_lock();
++				rtnl_lock();
++				ret = dev_ifsioc(&ifr, cmd);
++				rtnl_unlock();
++				dev_probe_unlock();
++				if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++					return -EFAULT;
++				return ret;
++			}
++#ifdef WIRELESS_EXT
++			/* Take care of Wireless Extensions */
++			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
++				/* If command is `set a parameter', or
++				 * `get the encoding parameters', check if
++				 * the user has the right to do it */
++				if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
++					if(!capable(CAP_NET_ADMIN))
++						return -EPERM;
++				}
++				dev_load(ifr.ifr_name);
++				rtnl_lock();
++				/* Follow me in net/core/wireless.c */
++				ret = wireless_process_ioctl(&ifr, cmd);
++				rtnl_unlock();
++				if (!ret && IW_IS_GET(cmd) &&
++				    copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++					return -EFAULT;
++				return ret;
++			}
++#endif	/* WIRELESS_EXT */
++			return -EINVAL;
++	}
++}
++
++
++/**
++ *	dev_new_index	-	allocate an ifindex
++ *
++ *	Returns a suitable unique value for a new device interface
++ *	number.  The caller must hold the rtnl semaphore or the
++ *	dev_base_lock to be sure it remains unique.
++ */
++ 
++int dev_new_index(void)
++{
++	static int ifindex;
++	for (;;) {
++		if (++ifindex <= 0)
++			ifindex=1;
++		if (__dev_get_by_index(ifindex) == NULL)
++			return ifindex;
++	}
++}
++
++static int dev_boot_phase = 1;
++
++/**
++ *	register_netdevice	- register a network device
++ *	@dev: device to register
++ *	
++ *	Take a completed network device structure and add it to the kernel
++ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
++ *	chain. 0 is returned on success. A negative errno code is returned
++ *	on a failure to set up the device, or if the name is a duplicate.
++ *
++ *	Callers must hold the rtnl semaphore.  See the comment at the
++ *	end of Space.c for details about the locking.  You may want
++ *	register_netdev() instead of this.
++ *
++ *	BUGS:
++ *	The locking appears insufficient to guarantee two parallel registers
++ *	will not get the same name.
++ */
++
++int net_dev_init(void);
++
++int register_netdevice(struct net_device *dev)
++{
++	struct net_device *d, **dp;
++#ifdef CONFIG_NET_DIVERT
++	int ret;
++#endif
++
++	spin_lock_init(&dev->queue_lock);
++	spin_lock_init(&dev->xmit_lock);
++	dev->xmit_lock_owner = -1;
++#ifdef CONFIG_NET_FASTROUTE
++	dev->fastpath_lock=RW_LOCK_UNLOCKED;
++#endif
++
++	if (dev_boot_phase)
++		net_dev_init();
++
++#ifdef CONFIG_NET_DIVERT
++	ret = alloc_divert_blk(dev);
++	if (ret)
++		return ret;
++#endif /* CONFIG_NET_DIVERT */
++	
++	dev->iflink = -1;
++
++	/* Init, if this function is available */
++	if (dev->init && dev->init(dev) != 0) {
++#ifdef CONFIG_NET_DIVERT
++		free_divert_blk(dev);
++#endif
++		return -EIO;
++	}
++
++	dev->ifindex = dev_new_index();
++	if (dev->iflink == -1)
++		dev->iflink = dev->ifindex;
++
++	/* Check for existence, and append to tail of chain */
++	for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
++		if (d == dev || strcmp(d->name, dev->name) == 0) {
++#ifdef CONFIG_NET_DIVERT
++			free_divert_blk(dev);
++#endif
++			return -EEXIST;
++		}
++	}
++	
++	/* Fix illegal SG+CSUM combinations. */
++	if ((dev->features & NETIF_F_SG) &&
++	    !(dev->features & (NETIF_F_IP_CSUM |
++			       NETIF_F_NO_CSUM |
++			       NETIF_F_HW_CSUM))) {
++		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
++		       dev->name);
++		dev->features &= ~NETIF_F_SG;
++	}
++
++	/*
++	 *	nil rebuild_header routine,
++	 *	that should be never called and used as just bug trap.
++	 */
++
++	if (dev->rebuild_header == NULL)
++		dev->rebuild_header = default_rebuild_header;
++
++	/*
++	 *	Default initial state at registry is that the
++	 *	device is present.
++	 */
++
++	set_bit(__LINK_STATE_PRESENT, &dev->state);
++
++	dev->next = NULL;
++	dev_init_scheduler(dev);
++	write_lock_bh(&dev_base_lock);
++	*dp = dev;
++	dev_hold(dev);
++	dev->deadbeaf = 0;
++	write_unlock_bh(&dev_base_lock);
++
++	/* Notify protocols, that a new device appeared. */
++	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
++
++	net_run_sbin_hotplug(dev, "register");
++
++	return 0;
++}
++
++/**
++ *	netdev_finish_unregister - complete unregistration
++ *	@dev: device
++ *
++ *	Destroy and free a dead device. A value of zero is returned on
++ *	success.
++ */
++ 
++int netdev_finish_unregister(struct net_device *dev)
++{
++	BUG_TRAP(dev->ip_ptr==NULL);
++	BUG_TRAP(dev->ip6_ptr==NULL);
++	BUG_TRAP(dev->dn_ptr==NULL);
++
++	if (!dev->deadbeaf) {
++		printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
++		return 0;
++	}
++#ifdef NET_REFCNT_DEBUG
++	printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
++	       (dev->features & NETIF_F_DYNALLOC)?"":", old style");
++#endif
++	if (dev->destructor)
++		dev->destructor(dev);
++	if (dev->features & NETIF_F_DYNALLOC)
++		kfree(dev);
++	return 0;
++}
++
++/**
++ *	unregister_netdevice - remove device from the kernel
++ *	@dev: device
++ *
++ *	This function shuts down a device interface and removes it
++ *	from the kernel tables. On success 0 is returned, on a failure
++ *	a negative errno code is returned.
++ *
++ *	Callers must hold the rtnl semaphore.  See the comment at the
++ *	end of Space.c for details about the locking.  You may want
++ *	unregister_netdev() instead of this.
++ */
++
++int unregister_netdevice(struct net_device *dev)
++{
++	unsigned long now, warning_time;
++	struct net_device *d, **dp;
++
++	/* If device is running, close it first. */
++	if (dev->flags & IFF_UP)
++		dev_close(dev);
++
++	BUG_TRAP(dev->deadbeaf==0);
++	dev->deadbeaf = 1;
++
++	/* And unlink it from device chain. */
++	for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
++		if (d == dev) {
++			write_lock_bh(&dev_base_lock);
++			*dp = d->next;
++			write_unlock_bh(&dev_base_lock);
++			break;
++		}
++	}
++	if (d == NULL) {
++		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
++		return -ENODEV;
++	}
++
++	/* Synchronize to net_rx_action. */
++	br_write_lock_bh(BR_NETPROTO_LOCK);
++	br_write_unlock_bh(BR_NETPROTO_LOCK);
++
++	if (dev_boot_phase == 0) {
++#ifdef CONFIG_NET_FASTROUTE
++		dev_clear_fastroute(dev);
++#endif
++
++		/* Shutdown queueing discipline. */
++		dev_shutdown(dev);
++
++		net_run_sbin_hotplug(dev, "unregister");
++
++		/* Notify protocols, that we are about to destroy
++		   this device. They should clean all the things.
++		 */
++		notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
++
++		/*
++		 *	Flush the multicast chain
++		 */
++		dev_mc_discard(dev);
++	}
++
++	if (dev->uninit)
++		dev->uninit(dev);
++
++	/* Notifier chain MUST detach us from master device. */
++	BUG_TRAP(dev->master==NULL);
++
++#ifdef CONFIG_NET_DIVERT
++	free_divert_blk(dev);
++#endif
++
++	if (dev->features & NETIF_F_DYNALLOC) {
++#ifdef NET_REFCNT_DEBUG
++		if (atomic_read(&dev->refcnt) != 1)
++			printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
++#endif
++		dev_put(dev);
++		return 0;
++	}
++
++	/* Last reference is our one */
++	if (atomic_read(&dev->refcnt) == 1) {
++		dev_put(dev);
++		return 0;
++	}
++
++#ifdef NET_REFCNT_DEBUG
++	printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
++#endif
++
++	/* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
++	   it means that someone in the kernel still has a reference
++	   to this device and we cannot release it.
++
++	   "New style" devices have destructors, hence we can return from this
++	   function and destructor will do all the work later.  As of kernel 2.4.0
++	   there are very few "New Style" devices.
++
++	   "Old style" devices expect that the device is free of any references
++	   upon exit from this function.
++	   We cannot return from this function until all such references have
++	   fallen away.  This is because the caller of this function will probably
++	   immediately kfree(*dev) and then be unloaded via sys_delete_module.
++
++	   So, we linger until all references fall away.  The duration of the
++	   linger is basically unbounded! It is driven by, for example, the
++	   current setting of sysctl_ipfrag_time.
++
++	   After 1 second, we start to rebroadcast unregister notifications
++	   in hope that careless clients will release the device.
++
++	 */
++
++	now = warning_time = jiffies;
++	while (atomic_read(&dev->refcnt) != 1) {
++		if ((jiffies - now) > 1*HZ) {
++			/* Rebroadcast unregister notification */
++			notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
++		}
++		current->state = TASK_INTERRUPTIBLE;
++		schedule_timeout(HZ/4);
++		current->state = TASK_RUNNING;
++		if ((jiffies - warning_time) > 10*HZ) {
++			printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
++					"become free. Usage count = %d\n",
++					dev->name, atomic_read(&dev->refcnt));
++			warning_time = jiffies;
++		}
++	}
++	dev_put(dev);
++	return 0;
++}
++
++
++/*
++ *	Initialize the DEV module. At boot time this walks the device list and
++ *	unhooks any devices that fail to initialise (normally hardware not 
++ *	present) and leaves us with a valid list of present and active devices.
++ *
++ */
++
++extern void net_device_init(void);
++extern void ip_auto_config(void);
++struct proc_dir_entry *proc_net_drivers;
++#ifdef CONFIG_NET_DIVERT
++extern void dv_init(void);
++#endif /* CONFIG_NET_DIVERT */
++
++
++/*
++ *       Callers must hold the rtnl semaphore.  See the comment at the
++ *       end of Space.c for details about the locking.
++ */
++int __init net_dev_init(void)
++{
++	struct net_device *dev, **dp;
++	int i;
++
++	if (!dev_boot_phase)
++		return 0;
++
++
++#ifdef CONFIG_NET_DIVERT
++	dv_init();
++#endif /* CONFIG_NET_DIVERT */
++	
++	/*
++	 *	Initialise the packet receive queues.
++	 */
++
++	for (i = 0; i < NR_CPUS; i++) {
++		struct softnet_data *queue;
++
++		queue = &softnet_data[i];
++		skb_queue_head_init(&queue->input_pkt_queue);
++		queue->throttle = 0;
++		queue->cng_level = 0;
++		queue->avg_blog = 10; /* arbitrary non-zero */
++		queue->completion_queue = NULL;
++		INIT_LIST_HEAD(&queue->poll_list);
++		set_bit(__LINK_STATE_START, &queue->blog_dev.state);
++		queue->blog_dev.weight = weight_p;
++		queue->blog_dev.poll = process_backlog;
++		atomic_set(&queue->blog_dev.refcnt, 1);
++	}
++
++#ifdef CONFIG_NET_PROFILE
++	net_profile_init();
++	NET_PROFILE_REGISTER(dev_queue_xmit);
++	NET_PROFILE_REGISTER(softnet_process);
++#endif
++
++#ifdef OFFLINE_SAMPLE
++	samp_timer.expires = jiffies + (10 * HZ);
++	add_timer(&samp_timer);
++#endif
++
++	/*
++	 *	Add the devices.
++	 *	If the call to dev->init fails, the dev is removed
++	 *	from the chain disconnecting the device until the
++	 *	next reboot.
++	 *
++	 *	NB At boot phase networking is dead. No locking is required.
++	 *	But we still preserve dev_base_lock for sanity.
++	 */
++
++	dp = &dev_base;
++	while ((dev = *dp) != NULL) {
++		spin_lock_init(&dev->queue_lock);
++		spin_lock_init(&dev->xmit_lock);
++#ifdef CONFIG_NET_FASTROUTE
++		dev->fastpath_lock = RW_LOCK_UNLOCKED;
++#endif
++		dev->xmit_lock_owner = -1;
++		dev->iflink = -1;
++		dev_hold(dev);
++
++		/*
++		 * Allocate name. If the init() fails
++		 * the name will be reissued correctly.
++		 */
++		if (strchr(dev->name, '%'))
++			dev_alloc_name(dev, dev->name);
++
++		/* 
++		 * Check boot time settings for the device.
++		 */
++		netdev_boot_setup_check(dev);
++
++		if (dev->init && dev->init(dev)) {
++			/*
++			 * It failed to come up. It will be unhooked later.
++			 * dev_alloc_name can now advance to next suitable
++			 * name that is checked next.
++			 */
++			dev->deadbeaf = 1;
++			dp = &dev->next;
++		} else {
++			dp = &dev->next;
++			dev->ifindex = dev_new_index();
++			if (dev->iflink == -1)
++				dev->iflink = dev->ifindex;
++			if (dev->rebuild_header == NULL)
++				dev->rebuild_header = default_rebuild_header;
++			dev_init_scheduler(dev);
++			set_bit(__LINK_STATE_PRESENT, &dev->state);
++		}
++	}
++
++	/*
++	 * Unhook devices that failed to come up
++	 */
++	dp = &dev_base;
++	while ((dev = *dp) != NULL) {
++		if (dev->deadbeaf) {
++			write_lock_bh(&dev_base_lock);
++			*dp = dev->next;
++			write_unlock_bh(&dev_base_lock);
++			dev_put(dev);
++		} else {
++			dp = &dev->next;
++		}
++	}
++
++#ifdef CONFIG_PROC_FS
++	proc_net_create("dev", 0, dev_get_info);
++	create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
++	proc_net_drivers = proc_mkdir("net/drivers", 0);
++#ifdef WIRELESS_EXT
++	/* Available in net/core/wireless.c */
++	proc_net_create("wireless", 0, dev_get_wireless_info);
++#endif	/* WIRELESS_EXT */
++#endif	/* CONFIG_PROC_FS */
++
++	dev_boot_phase = 0;
++
++	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
++	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
++
++	dst_init();
++	dev_mcast_init();
++
++#ifdef CONFIG_NET_SCHED
++	pktsched_init();
++#endif
++	/*
++	 *	Initialise network devices
++	 */
++	 
++	net_device_init();
++
++	return 0;
++}
++
++#ifdef CONFIG_HOTPLUG
++
++/* Notify userspace when a netdevice event occurs,
++ * by running '/sbin/hotplug net' with certain
++ * environment variables set.
++ */
++
++static int net_run_sbin_hotplug(struct net_device *dev, char *action)
++{
++	char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
++	int i;
++
++	sprintf(ifname, "INTERFACE=%s", dev->name);
++	sprintf(action_str, "ACTION=%s", action);
++
++        i = 0;
++        argv[i++] = hotplug_path;
++        argv[i++] = "net";
++        argv[i] = 0;
++
++	i = 0;
++	/* minimal command environment */
++	envp [i++] = "HOME=/";
++	envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
++	envp [i++] = ifname;
++	envp [i++] = action_str;
++	envp [i] = 0;
++	
++	return call_usermodehelper(argv [0], argv, envp);
++}
++#endif
+diff --unified --recursive --new-file linux-2.4.30/net/netsyms.c linux-2.4.30-1-686-smp-ring3/net/netsyms.c
+--- linux-2.4.30/net/netsyms.c	2005-04-04 03:42:20.000000000 +0200
++++ linux-2.4.30-1-686-smp-ring3/net/netsyms.c	2005-10-22 23:08:28.016050500 +0200
+@@ -628,3 +628,18 @@
+ #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
+ 
+ #endif  /* CONFIG_NET */
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++#include <linux/version.h>
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++#include <linux/ring.h>
++
++EXPORT_SYMBOL(get_skb_ring_handler);
++EXPORT_SYMBOL(set_skb_ring_handler);
++EXPORT_SYMBOL(do_skb_ring_handler);
++EXPORT_SYMBOL(get_buffer_ring_handler);
++EXPORT_SYMBOL(set_buffer_ring_handler);
++EXPORT_SYMBOL(do_buffer_ring_handler);
++#endif
++
++#endif
+diff --unified --recursive --new-file linux-2.4.30/net/ring/Config.in linux-2.4.30-1-686-smp-ring3/net/ring/Config.in
+--- linux-2.4.30/net/ring/Config.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/ring/Config.in	2005-10-22 23:08:28.048052500 +0200
+@@ -0,0 +1,4 @@
++#
++# PF_RING
++#
++tristate '  PF_RING (EXPERIMENTAL)' CONFIG_RING
+diff --unified --recursive --new-file linux-2.4.30/net/ring/Makefile linux-2.4.30-1-686-smp-ring3/net/ring/Makefile
+--- linux-2.4.30/net/ring/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/ring/Makefile	2005-10-22 23:08:27.420013250 +0200
+@@ -0,0 +1,16 @@
++#
++# Makefile for the ring driver.
++#
++
++O_TARGET := ring.o
++
++export-objs := ring_packet.o
++
++obj-y := ring_packet.o
++
++ifeq ($(CONFIG_RING),m)
++  obj-m += $(O_TARGET)
++endif
++
++include $(TOPDIR)/Rules.make
++
+diff --unified --recursive --new-file linux-2.4.30/net/ring/ring_packet.c linux-2.4.30-1-686-smp-ring3/net/ring/ring_packet.c
+--- linux-2.4.30/net/ring/ring_packet.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.4.30-1-686-smp-ring3/net/ring/ring_packet.c	2005-10-22 23:08:27.440014500 +0200
+@@ -0,0 +1,1623 @@
++/*
++ *
++ * (C) 2004-05 - Luca Deri <deri@ntop.org>
++ *
++ * This code includes patches courtesy of
++ * - Jeff Randall <jrandall@nexvu.com>
++ * - Helmut Manck <helmut.manck@secunet.com>
++ * - Brad Doctor <bdoctor@ps-ax.com>
++ *
++ */
++
++/* FIX: add an entry inside the /proc filesystem */
++
++#include <linux/version.h>
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/socket.h>
++#include <linux/skbuff.h>
++#include <linux/rtnetlink.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/init.h>
++#include <linux/filter.h>
++#include <linux/ring.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <linux/list.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#include <net/xfrm.h>
++#else
++#include <linux/poll.h>
++#endif
++#include <net/sock.h>
++#include <asm/io.h>   /* needed for virt_to_phys() */
++
++/* #define RING_DEBUG */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++static inline int remap_page_range(struct vm_area_struct *vma,
++				   unsigned long uvaddr,
++				   unsigned long paddr,
++				   unsigned long size,
++				   pgprot_t prot) {
++  return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
++			 size, prot));
++}
++#endif
++
++/* ************************************************* */
++
++#define CLUSTER_LEN       8
++
++struct ring_cluster {
++  u_short             cluster_id; /* 0 = no cluster */
++  u_short             num_cluster_elements;
++  enum cluster_type   hashing_mode;
++  u_short             hashing_id;
++  struct sock         *sk[CLUSTER_LEN];
++  struct ring_cluster *next;      /* NULL = last element of the cluster */
++};
++
++/* ************************************************* */
++
++struct ring_element {
++  struct list_head  list;
++  struct sock      *sk;
++};
++
++/* ************************************************* */
++
++struct ring_opt {
++  struct net_device *ring_netdev;
++
++  /* Cluster */
++  u_short cluster_id; /* 0 = no cluster */
++
++  /* Reflector */
++  struct net_device *reflector_dev;
++
++  /* Packet buffers */
++  unsigned long order;
++
++  /* Ring Slots */
++  unsigned long ring_memory;
++  FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
++  char *ring_slots;  /* Basically it points to ring_memory
++			+sizeof(FlowSlotInfo) */
++
++  /* Packet Sampling */
++  u_int pktToSample, sample_rate;
++
++  /* BPF Filter */
++  struct sk_filter *bpfFilter;
++
++  /* Locks */
++  atomic_t num_ring_slots_waiters;
++  wait_queue_head_t ring_slots_waitqueue;
++  rwlock_t ring_index_lock;
++
++  /* Indexes (Internal) */
++  u_int insert_page_id, insert_slot_id;
++};
++
++/* ************************************************* */
++
++/* List of all ring sockets. */
++static struct list_head ring_table;
++
++/* List of all clusters */
++static struct ring_cluster *ring_cluster_list;
++
++static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
++
++/* ********************************** */
++
++/* Forward */
++static struct proto_ops ring_ops;
++
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++static struct proto ring_proto;
++#endif
++
++static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
++			    u_char real_skb);
++static int buffer_ring_handler(struct net_device *dev, char *data, int len);
++static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
++
++/* Extern */
++
++/* ********************************** */
++
++/* Defaults */
++static u_int bucket_len = 128, num_slots = 4096, sample_rate = 1,
++  transparent_mode = 0, enable_tx_capture = 0;
++
++MODULE_PARM(bucket_len, "i");
++MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
++MODULE_PARM(num_slots,  "i");
++MODULE_PARM_DESC(num_slots,  "Number of ring slots");
++MODULE_PARM(sample_rate, "i");
++MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
++MODULE_PARM(transparent_mode, "i");
++MODULE_PARM_DESC(transparent_mode,
++		 "Set to 1 to set transparent mode "
++		 "(slower but backwards compatible)");
++MODULE_PARM(enable_tx_capture, "i");
++MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
++
++/* ********************************** */
++
++#define MIN_QUEUED_PKTS      64
++#define MAX_QUEUE_LOOPS      64
++
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
++#define ring_sk(__sk) ((__sk)->sk_protinfo)
++#else
++#define ring_sk_datatype(a) (a)
++#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
++#endif
++
++#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
++
++/*
++  int dev_queue_xmit(struct sk_buff *skb)
++  skb->dev;
++  struct net_device *dev_get_by_name(const char *name)
++*/
++
++/* ********************************** */
++
++static void ring_sock_destruct(struct sock *sk) {
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  skb_queue_purge(&sk->sk_receive_queue);
++
++  if (!sock_flag(sk, SOCK_DEAD)) {
++#if defined(RING_DEBUG)
++    printk("Attempt to release alive ring socket: %p\n", sk);
++#endif
++    return;
++  }
++
++  BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
++  BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
++#else
++
++  BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
++  BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
++
++  if (!sk->dead) {
++#if defined(RING_DEBUG)
++    printk("Attempt to release alive ring socket: %p\n", sk);
++#endif
++    return;
++  }
++#endif
++
++  kfree(ring_sk(sk));
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_DEC_USE_COUNT;
++#endif
++}
++
++/* ********************************** */
++/*
++ * ring_insert()
++ *
++ * store the sk in a new element and add it
++ * to the head of the list.
++ */
++static inline void ring_insert(struct sock *sk) {
++  struct ring_element *next;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_insert()\n");
++#endif
++
++  next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
++  if(next != NULL) {
++    next->sk = sk;
++    write_lock_irq(&ring_mgmt_lock);
++    list_add(&next->list, &ring_table);
++    write_unlock_irq(&ring_mgmt_lock);
++  } else {
++    if (net_ratelimit())
++      printk("RING: could not kmalloc slot!!\n");
++  }
++}
++
++/* ********************************** */
++/*
++ * ring_remove()
++ *
++ * For each of the elements in the list:
++ *  - check if this is the element we want to delete
++ *  - if it is, remove it from the list, and free it.
++ *
++ * stop when we find the one we're looking for (break),
++ * or when we reach the end of the list.
++ */
++static inline void ring_remove(struct sock *sk) {
++  struct list_head *ptr;
++  struct ring_element *entry;
++
++
++  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    entry = list_entry(ptr, struct ring_element, list);
++
++    if(entry->sk == sk) {
++      write_lock_irq(&ring_mgmt_lock);
++      list_del(ptr);
++      kfree(ptr);
++      write_unlock_irq(&ring_mgmt_lock);
++      break;
++    }
++  }
++
++}
++
++/* ********************************** */
++
++static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
++
++  if(pfr->ring_slots != NULL) {
++
++    u_int32_t tot_insert = pfr->slots_info->insert_idx,
++#if defined(RING_DEBUG)
++      tot_read = pfr->slots_info->tot_read, tot_pkts;
++#else
++    tot_read = pfr->slots_info->tot_read;
++#endif
++
++    if(tot_insert >= tot_read) {
++#if defined(RING_DEBUG)
++      tot_pkts = tot_insert-tot_read;
++#endif
++      return(tot_insert-tot_read);
++    } else {
++#if defined(RING_DEBUG)
++      tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
++#endif
++      return(((u_int32_t)-1)+tot_insert-tot_read);
++    }
++
++#if defined(RING_DEBUG)
++    printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
++	   tot_pkts, tot_insert, tot_read);
++#endif
++
++  } else
++    return(0);
++}
++
++/* ********************************** */
++
++static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
++#if defined(RING_DEBUG)
++  printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
++#endif
++
++  if(pfr->ring_slots != NULL) {
++    FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
++						  *pfr->slots_info->slot_len]);
++    return(slot);
++  } else
++    return(NULL);
++}
++
++/* ********************************** */
++
++static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
++#if defined(RING_DEBUG)
++  printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
++#endif
++
++  if(pfr->ring_slots != NULL)
++    return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
++					pfr->slots_info->slot_len]));
++  else
++    return(NULL);
++}
++
++/* ********************************** */
++
++static void add_skb_to_ring(struct sk_buff *skb,
++			    struct ring_opt *pfr,
++			    u_char recv_packet,
++			    u_char real_skb /* 1=skb 0=faked skb */) {
++  FlowSlot *theSlot;
++  int idx, displ;
++
++  if(recv_packet) {
++    /* Hack for identifying a packet received by the e1000 */
++    if(real_skb) {
++      displ = SKB_DISPLACEMENT;
++    } else
++      displ = 0; /* Received by the e1000 wrapper */
++  } else
++    displ = 0;
++
++  write_lock(&pfr->ring_index_lock);
++  pfr->slots_info->tot_pkts++;
++  write_unlock(&pfr->ring_index_lock);
++
++  /* BPF Filtering (from af_packet.c) */
++  if(pfr->bpfFilter != NULL) {
++    unsigned res = 1, len;
++
++    len = skb->len-skb->data_len;
++
++    write_lock(&pfr->ring_index_lock);
++    skb->data -= displ;
++    res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
++    skb->data += displ;
++    write_unlock(&pfr->ring_index_lock);
++
++    if(res == 0) {
++      /* Filter failed */
++
++#if defined(RING_DEBUG)
++      printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
++	     "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
++	     (int)skb->len, pfr->slots_info->tot_pkts,
++	     pfr->slots_info->insert_idx,
++	     skb->pkt_type, skb->cloned);
++#endif
++
++      return;
++    }
++  }
++
++  /* ************************** */
++
++  if(pfr->sample_rate > 1) {
++    if(pfr->pktToSample == 0) {
++      write_lock(&pfr->ring_index_lock);
++      pfr->pktToSample = pfr->sample_rate;
++      write_unlock(&pfr->ring_index_lock);
++    } else {
++      write_lock(&pfr->ring_index_lock);
++      pfr->pktToSample--;
++      write_unlock(&pfr->ring_index_lock);
++
++#if defined(RING_DEBUG)
++      printk("add_skb_to_ring(skb): sampled packet [len=%d]"
++	     "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
++	     (int)skb->len, pfr->slots_info->tot_pkts,
++	     pfr->slots_info->insert_idx,
++	     skb->pkt_type, skb->cloned);
++#endif
++      return;
++    }
++  }
++
++  /* ************************************* */
++
++  if((pfr->reflector_dev != NULL)
++     && (!netif_queue_stopped(pfr->reflector_dev))) {
++    int cpu = smp_processor_id();
++
++    /* increase reference counter so that this skb is not freed */
++    atomic_inc(&skb->users);
++
++    skb->data -= displ;
++
++    /* send it */
++    if (pfr->reflector_dev->xmit_lock_owner != cpu) {
++      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++      pfr->reflector_dev->xmit_lock_owner = cpu;
++      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++
++      if (pfr->reflector_dev->hard_start_xmit(skb,
++					      pfr->reflector_dev) == 0) {
++        spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++	pfr->reflector_dev->xmit_lock_owner = -1;
++	skb->data += displ;
++	spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++#if defined(RING_DEBUG)
++	printk("++ hard_start_xmit succeeded\n");
++#endif
++	return; /* OK */
++      }
++
++      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++      pfr->reflector_dev->xmit_lock_owner = -1;
++      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++    }
++
++#if defined(RING_DEBUG)
++    printk("++ hard_start_xmit failed\n");
++#endif
++    skb->data += displ;
++    return; /* -ENETDOWN */
++  }
++
++  /* ************************************* */
++
++#if defined(RING_DEBUG)
++  printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
++	 "[pkt_type=%d][cloned=%d]\n",
++	 (int)skb->len, pfr->slots_info->tot_pkts,
++	 pfr->slots_info->insert_idx,
++	 skb->pkt_type, skb->cloned);
++#endif
++
++  idx = pfr->slots_info->insert_idx;
++  theSlot = get_insert_slot(pfr);
++
++  if((theSlot != NULL) && (theSlot->slot_state == 0)) {
++    struct pcap_pkthdr *hdr;
++    unsigned int bucketSpace;
++    char *bucket;
++
++    /* Update Index */
++    idx++;
++
++    if(idx == pfr->slots_info->tot_slots) {
++      write_lock(&pfr->ring_index_lock);
++      pfr->slots_info->insert_idx = 0;
++      write_unlock(&pfr->ring_index_lock);
++    } else {
++      write_lock(&pfr->ring_index_lock);
++      pfr->slots_info->insert_idx = idx;
++      write_unlock(&pfr->ring_index_lock);
++    }
++
++    bucketSpace = pfr->slots_info->slot_len
++#ifdef RING_MAGIC
++      - sizeof(u_char)
++#endif
++      - sizeof(u_char)  /* flowSlot.slot_state */
++      - sizeof(struct pcap_pkthdr)
++      - 1 /* 10 */ /* safe boundary */;
++
++    bucket = &theSlot->bucket;
++    hdr = (struct pcap_pkthdr*)bucket;
++
++    if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
++
++    hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
++    hdr->caplen    = skb->len+displ;
++
++    if(hdr->caplen > bucketSpace)
++      hdr->caplen = bucketSpace;
++
++    hdr->len = skb->len+displ;
++    memcpy(&bucket[sizeof(struct pcap_pkthdr)],
++	   skb->data-displ, hdr->caplen);
++
++#if defined(RING_DEBUG)
++    {
++      static unsigned int lastLoss = 0;
++
++      if(pfr->slots_info->tot_lost
++	 && (lastLoss != pfr->slots_info->tot_lost)) {
++	printk("add_skb_to_ring(%d): [bucketSpace=%d]"
++	       "[hdr.caplen=%d][skb->len=%d]"
++	       "[pcap_pkthdr=%d][removeIdx=%d]"
++	       "[loss=%lu][page=%u][slot=%u]\n",
++	       idx-1, bucketSpace, hdr->caplen, skb->len,
++	       sizeof(struct pcap_pkthdr),
++	       pfr->slots_info->remove_idx,
++	       (long unsigned int)pfr->slots_info->tot_lost,
++	       pfr->insert_page_id, pfr->insert_slot_id);
++
++	lastLoss = pfr->slots_info->tot_lost;
++      }
++    }
++#endif
++
++    write_lock(&pfr->ring_index_lock);
++    pfr->slots_info->tot_insert++;
++    theSlot->slot_state = 1;
++    write_unlock(&pfr->ring_index_lock);
++  } else {
++    write_lock(&pfr->ring_index_lock);
++    pfr->slots_info->tot_lost++;
++    write_unlock(&pfr->ring_index_lock);
++
++#if defined(RING_DEBUG)
++    printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
++	   "[removeIdx=%u][insertIdx=%u]\n",
++	   (long unsigned int)pfr->slots_info->tot_lost,
++	   pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
++#endif
++  }
++
++  /* wakeup in case of poll() */
++  if(waitqueue_active(&pfr->ring_slots_waitqueue))
++    wake_up_interruptible(&pfr->ring_slots_waitqueue);
++}
++
++/* ********************************** */
++
++static u_int hash_skb(struct ring_cluster *cluster_ptr,
++		      struct sk_buff *skb, u_char recv_packet) {
++  u_int idx;
++  int displ;
++  struct iphdr *ip;
++
++  if(cluster_ptr->hashing_mode == cluster_round_robin) {
++    idx = cluster_ptr->hashing_id++;
++  } else {
++    /* Per-flow clustering */
++    if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
++      if(recv_packet)
++	displ = 0;
++      else
++	displ = SKB_DISPLACEMENT;
++
++      /*
++	skb->data+displ
++
++	Always points to to the IP part of the packet
++      */
++
++      ip = (struct iphdr*)(skb->data+displ);
++
++      idx = ip->saddr+ip->daddr+ip->protocol;
++
++      if(ip->protocol == IPPROTO_TCP) {
++	struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
++					      +sizeof(struct iphdr));
++	idx += tcp->source+tcp->dest;
++      } else if(ip->protocol == IPPROTO_UDP) {
++	struct udphdr *udp = (struct udphdr*)(skb->data+displ
++					      +sizeof(struct iphdr));
++	idx += udp->source+udp->dest;
++      }
++    } else
++      idx = skb->len;
++  }
++
++  return(idx % cluster_ptr->num_cluster_elements);
++}
++
++/* ********************************** */
++
++static int skb_ring_handler(struct sk_buff *skb,
++			    u_char recv_packet,
++			    u_char real_skb /* 1=skb 0=faked skb */) {
++  struct sock *skElement;
++  int rc = 0;
++  struct list_head *ptr;
++  struct ring_cluster *cluster_ptr;
++
++#ifdef PROFILING
++  uint64_t rdt = _rdtsc(), rdt1, rdt2;
++#endif
++
++  if((!skb) /* Invalid skb */
++     || ((!enable_tx_capture) && (!recv_packet))) {
++    /*
++      An outgoing packet is about to be sent out
++      but we decided not to handle transmitted
++      packets.
++    */
++    return(0);
++  }
++
++#if defined(RING_DEBUG)
++  if(0) {
++    printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
++	   skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
++  }
++#endif
++
++#ifdef PROFILING
++  rdt1 = _rdtsc();
++#endif
++
++  /* [1] Check unclustered sockets */
++  for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    struct ring_opt *pfr;
++    struct ring_element *entry;
++
++    entry = list_entry(ptr, struct ring_element, list);
++
++    read_lock(&ring_mgmt_lock);
++    skElement = entry->sk;
++    pfr = ring_sk(skElement);
++    read_unlock(&ring_mgmt_lock);
++
++    if((pfr != NULL)
++       && (pfr->cluster_id == 0 /* No cluster */)
++       && (pfr->ring_slots != NULL)
++       && (pfr->ring_netdev == skb->dev)) {
++      /* We've found the ring where the packet can be stored */
++      read_lock(&ring_mgmt_lock);
++      add_skb_to_ring(skb, pfr, recv_packet, real_skb);
++      read_unlock(&ring_mgmt_lock);
++
++      rc = 1; /* Ring found: we've done our job */
++    }
++  }
++
++  /* [2] Check socket clusters */
++  cluster_ptr = ring_cluster_list;
++
++  while(cluster_ptr != NULL) {
++    struct ring_opt *pfr;
++
++    if(cluster_ptr->num_cluster_elements > 0) {
++      u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
++
++      read_lock(&ring_mgmt_lock);
++      skElement = cluster_ptr->sk[skb_hash];
++      read_unlock(&ring_mgmt_lock);
++
++      if(skElement != NULL) {
++	pfr = ring_sk(skElement);
++
++	if((pfr != NULL)
++	   && (pfr->ring_slots != NULL)
++	   && (pfr->ring_netdev == skb->dev)) {
++	  /* We've found the ring where the packet can be stored */
++          read_lock(&ring_mgmt_lock);
++	  add_skb_to_ring(skb, pfr, recv_packet, real_skb);
++          read_unlock(&ring_mgmt_lock);
++
++	  rc = 1; /* Ring found: we've done our job */
++	}
++      }
++    }
++
++    cluster_ptr = cluster_ptr->next;
++  }
++
++#ifdef PROFILING
++  rdt1 = _rdtsc()-rdt1;
++#endif
++
++#ifdef PROFILING
++  rdt2 = _rdtsc();
++#endif
++
++  if(transparent_mode) rc = 0;
++
++  if((rc != 0) && real_skb)
++    dev_kfree_skb(skb); /* Free the skb */
++
++#ifdef PROFILING
++  rdt2 = _rdtsc()-rdt2;
++  rdt = _rdtsc()-rdt;
++
++#if defined(RING_DEBUG)
++  printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
++	 (int)rdt, rdt-rdt1,
++	 (int)((float)((rdt-rdt1)*100)/(float)rdt),
++	 rdt2,
++	 (int)((float)(rdt2*100)/(float)rdt));
++#endif
++#endif
++
++  return(rc); /*  0 = packet not handled */
++}
++
++/* ********************************** */
++
++struct sk_buff skb;
++
++static int buffer_ring_handler(struct net_device *dev,
++			       char *data, int len) {
++
++#if defined(RING_DEBUG)
++  printk("buffer_ring_handler: [dev=%s][len=%d]\n",
++	 dev->name == NULL ? "<NULL>" : dev->name, len);
++#endif
++
++  skb.dev = dev, skb.len = len, skb.data = data,
++    skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
++
++  skb_ring_handler(&skb, 1, 0 /* fake skb */);
++
++  return(0);
++}
++
++/* ********************************** */
++
++static int ring_create(struct socket *sock, int protocol) {
++  struct sock *sk;
++  struct ring_opt *pfr;
++  int err;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_create()\n");
++#endif
++
++  /* Are you root, superuser or so ? */
++  if(!capable(CAP_NET_ADMIN))
++    return -EPERM;
++
++  if(sock->type != SOCK_RAW)
++    return -ESOCKTNOSUPPORT;
++
++  if(protocol != htons(ETH_P_ALL))
++    return -EPROTONOSUPPORT;
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_INC_USE_COUNT;
++#endif
++
++  err = -ENOMEM;
++
++  // BD: -- broke this out to keep it more simple and clear as to what the
++  // options are.
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
++  sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
++#endif
++#endif
++
++  // BD: API changed in 2.6.12, ref:
++  // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++  sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
++#endif
++
++  if (sk == NULL)
++    goto out;
++
++  sock->ops = &ring_ops;
++  sock_init_data(sock, sk);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
++  sk_set_owner(sk, THIS_MODULE);
++#endif
++#endif
++
++  err = -ENOMEM;
++  ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
++
++  if (!(pfr = ring_sk(sk))) {
++    sk_free(sk);
++    goto out;
++  }
++  memset(pfr, 0, sizeof(*pfr));
++  init_waitqueue_head(&pfr->ring_slots_waitqueue);
++  pfr->ring_index_lock = RW_LOCK_UNLOCKED;
++  atomic_set(&pfr->num_ring_slots_waiters, 0);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  sk->sk_family       = PF_RING;
++  sk->sk_destruct     = ring_sock_destruct;
++#else
++  sk->family          = PF_RING;
++  sk->destruct        = ring_sock_destruct;
++  sk->num             = protocol;
++#endif
++
++  ring_insert(sk);
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_create() - created\n");
++#endif
++
++  return(0);
++ out:
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_DEC_USE_COUNT;
++#endif
++  return err;
++}
++
++/* *********************************************** */
++
++static int ring_release(struct socket *sock)
++{
++  struct sock *sk = sock->sk;
++  struct ring_opt *pfr = ring_sk(sk);
++
++  if(!sk)
++    return 0;
++
++#if defined(RING_DEBUG)
++  printk("RING: called ring_release\n");
++#endif
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_release entered\n");
++#endif
++
++  ring_remove(sk);
++
++  sock_orphan(sk);
++  sock->sk = NULL;
++
++  /* Free the ring buffer */
++  if(pfr->ring_memory) {
++    struct page *page, *page_end;
++
++    page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
++    for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
++      ClearPageReserved(page);
++
++    free_pages(pfr->ring_memory, pfr->order);
++  }
++
++  kfree(pfr);
++  ring_sk(sk) = NULL;
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  skb_queue_purge(&sk->sk_write_queue);
++#endif
++  sock_put(sk);
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_release leaving\n");
++#endif
++
++  return 0;
++}
++
++/* ********************************** */
++/*
++ * We create a ring for this socket and bind it to the specified device
++ */
++static int packet_ring_bind(struct sock *sk, struct net_device *dev)
++{
++  u_int the_slot_len;
++  u_int32_t tot_mem;
++  struct ring_opt *pfr = ring_sk(sk);
++  struct page *page, *page_end;
++
++  if(!dev) return(-1);
++
++#if defined(RING_DEBUG)
++  printk("RING: packet_ring_bind(%s) called\n", dev->name);
++#endif
++
++  /* **********************************************
++
++  *************************************
++  *                                   *
++  *        FlowSlotInfo               *
++  *                                   *
++  ************************************* <-+
++  *        FlowSlot                   *   |
++  *************************************   |
++  *        FlowSlot                   *   |
++  *************************************   +- num_slots
++  *        FlowSlot                   *   |
++  *************************************   |
++  *        FlowSlot                   *   |
++  ************************************* <-+
++
++  ********************************************** */
++
++  the_slot_len = sizeof(u_char)    /* flowSlot.slot_state */
++    + sizeof(u_short) /* flowSlot.slot_len   */
++    + bucket_len      /* flowSlot.bucket     */;
++
++  tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
++
++  /*
++    Calculate the value of the order parameter used later.
++    See http://www.linuxjournal.com/article.php?sid=1133
++  */
++  for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++)  ;
++
++  /*
++    We now try to allocate the memory as required. If we fail
++    we try to allocate a smaller amount or memory (hence a
++    smaller ring).
++  */
++  while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
++    if(pfr->order-- == 0)
++      break;
++
++  if(pfr->order == 0) {
++#if defined(RING_DEBUG)
++    printk("ERROR: not enough memory\n");
++#endif
++    return(-1);
++  } else {
++#if defined(RING_DEBUG)
++    printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
++	   PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
++#endif
++  }
++
++  tot_mem = PAGE_SIZE << pfr->order;
++  memset((char*)pfr->ring_memory, 0, tot_mem);
++
++  /* Now we need to reserve the pages */
++  page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
++  for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
++    SetPageReserved(page);
++
++  pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
++  pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
++
++  pfr->slots_info->version     = RING_FLOWSLOT_VERSION;
++  pfr->slots_info->slot_len    = the_slot_len;
++  pfr->slots_info->tot_slots   = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
++  pfr->slots_info->tot_mem     = tot_mem;
++  pfr->slots_info->sample_rate = sample_rate;
++
++#if defined(RING_DEBUG)
++  printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
++	 pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
++	 pfr->slots_info->tot_mem);
++#endif
++
++#ifdef RING_MAGIC
++  {
++    int i;
++
++    for(i=0; i<pfr->slots_info->tot_slots; i++) {
++      unsigned long idx = i*pfr->slots_info->slot_len;
++      FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
++      slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
++    }
++  }
++#endif
++
++  pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
++
++  /*
++    IMPORTANT
++    Leave this statement here as last one. In fact when
++    the ring_netdev != NULL the socket is ready to be used.
++  */
++  pfr->ring_netdev = dev;
++
++  return(0);
++}
++
++/* ************************************* */
++
++/* Bind to a device */
++static int ring_bind(struct socket *sock,
++		     struct sockaddr *sa, int addr_len)
++{
++  struct sock *sk=sock->sk;
++  struct net_device *dev = NULL;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_bind() called\n");
++#endif
++
++  /*
++   *	Check legality
++   */
++  if (addr_len != sizeof(struct sockaddr))
++    return -EINVAL;
++  if (sa->sa_family != PF_RING)
++    return -EINVAL;
++
++  /* Safety check: add trailing zero if missing */
++  sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
++
++#if defined(RING_DEBUG)
++  printk("RING: searching device %s\n", sa->sa_data);
++#endif
++
++  if((dev = __dev_get_by_name(sa->sa_data)) == NULL) {
++#if defined(RING_DEBUG)
++    printk("RING: search failed\n");
++#endif
++    return(-EINVAL);
++  } else
++    return(packet_ring_bind(sk, dev));
++}
++
++/* ************************************* */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++
++volatile void* virt_to_kseg(volatile void* address) {
++  pte_t *pte;
++  pud_t *pud;
++  unsigned long addr = (unsigned long)address;
++		       
++  pud = pud_offset(pgd_offset_k((unsigned long) address),
++		   (unsigned long) address);
++
++  /*
++     High-memory support courtesy of
++     Brad Doctor <bdoctor@ps-ax.com>
++  */
++#if defined(CONFIG_X86_PAE) && (!defined(CONFIG_NOHIGHMEM))
++  pte = pte_offset_map(pmd_offset(pud, addr), addr);
++#else
++  pte = pmd_offset_map(pud, addr);
++#endif
++
++  return((volatile void*)pte_page(*pte));
++}
++
++#else /* 2.4 */
++
++/* http://www.scs.ch/~frey/linux/memorymap.html */
++volatile void *virt_to_kseg(volatile void *address)
++{
++  pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte;
++  unsigned long va, ret = 0UL;
++
++  va=VMALLOC_VMADDR((unsigned long)address);
++
++  /* get the page directory. Use the kernel memory map. */
++  pgd = pgd_offset_k(va);
++
++  /* check whether we found an entry */
++  if (!pgd_none(*pgd))
++    {
++      /* get the page middle directory */
++      pmd = pmd_offset(pgd, va);
++      /* check whether we found an entry */
++      if (!pmd_none(*pmd))
++	{
++	  /* get a pointer to the page table entry */
++	  ptep = pte_offset(pmd, va);
++	  pte = *ptep;
++	  /* check for a valid page */
++	  if (pte_present(pte))
++	    {
++	      /* get the address the page is refering to */
++	      ret = (unsigned long)page_address(pte_page(pte));
++	      /* add the offset within the page to the page address */
++	      ret |= (va & (PAGE_SIZE -1));
++	    }
++	}
++    }
++  return((volatile void *)ret);
++}
++#endif
++
++/* ************************************* */
++
++static int ring_mmap(struct file *file,
++		     struct socket *sock,
++		     struct vm_area_struct *vma)
++{
++  struct sock *sk = sock->sk;
++  struct ring_opt *pfr = ring_sk(sk);
++  unsigned long size, start;
++  u_int pagesToMap;
++  char *ptr;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_mmap() called\n");
++#endif
++
++  if(pfr->ring_memory == 0) {
++#if defined(RING_DEBUG)
++    printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
++#endif
++    return -EINVAL;
++  }
++
++  size = (unsigned long)(vma->vm_end-vma->vm_start);
++
++  if(size % PAGE_SIZE) {
++#if defined(RING_DEBUG)
++    printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
++#endif
++    return(-EINVAL);
++  }
++
++  /* if userspace tries to mmap beyond end of our buffer, fail */
++  if(size > pfr->slots_info->tot_mem) {
++#if defined(RING_DEBUG)
++    printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
++#endif
++    return(-EINVAL);
++  }
++
++  pagesToMap = size/PAGE_SIZE;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
++#endif
++
++#if defined(RING_DEBUG)
++  printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
++	 pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
++	 pfr->ring_netdev->name);
++#endif
++
++  /* we do not want to have this area swapped out, lock it */
++  vma->vm_flags |= VM_LOCKED;
++  start = vma->vm_start;
++
++  /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
++  ptr = (char*)(start+PAGE_SIZE);
++
++  if(remap_page_range(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++		      vma,
++#endif
++		      start,
++		      __pa(pfr->ring_memory),
++		      PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
++#if defined(RING_DEBUG)
++    printk("remap_page_range() failed\n");
++#endif
++    return(-EAGAIN);
++  }
++
++#if defined(RING_DEBUG)
++  printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
++#endif
++
++  return 0;
++}
++
++/* ************************************* */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
++			struct msghdr *msg, size_t len, int flags)
++#else
++  static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
++			  int flags, struct scm_cookie *scm)
++#endif
++{
++  FlowSlot* slot;
++  struct ring_opt *pfr = ring_sk(sock->sk);
++  u_int32_t queued_pkts, num_loops = 0;
++
++#if defined(RING_DEBUG)
++  printk("ring_recvmsg called\n");
++#endif
++
++  slot = get_remove_slot(pfr);
++
++  while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
++    wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
++
++#if defined(RING_DEBUG)
++    printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
++	   slot->slot_state, queued_pkts, num_loops);
++#endif
++
++    if(queued_pkts > 0) {
++      if(num_loops++ > MAX_QUEUE_LOOPS)
++	break;
++    }
++  }
++
++#if defined(RING_DEBUG)
++  if(slot != NULL)
++    printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
++	   queued_pkts, num_loops);
++#endif
++
++  return(queued_pkts);
++}
++
++/* ************************************* */
++
++unsigned int ring_poll(struct file * file,
++		       struct socket *sock, poll_table *wait)
++{
++  FlowSlot* slot;
++  struct ring_opt *pfr = ring_sk(sock->sk);
++
++#if defined(RING_DEBUG)
++  printk("poll called\n");
++#endif
++
++  slot = get_remove_slot(pfr);
++
++  if((slot != NULL) && (slot->slot_state == 0))
++    poll_wait(file, &pfr->ring_slots_waitqueue, wait);
++
++#if defined(RING_DEBUG)
++  printk("poll returning %d\n", slot->slot_state);
++#endif
++
++  if((slot != NULL) && (slot->slot_state == 1))
++    return(POLLIN | POLLRDNORM);
++  else
++    return(0);
++}
++
++/* ************************************* */
++
++int add_to_cluster_list(struct ring_cluster *el,
++			struct sock *sock) {
++
++  if(el->num_cluster_elements == CLUSTER_LEN)
++    return(-1); /* Cluster full */
++
++  ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
++  el->sk[el->num_cluster_elements] = sock;
++  el->num_cluster_elements++;
++  return(0);
++}
++
++/* ************************************* */
++
++int remove_from_cluster_list(struct ring_cluster *el,
++			     struct sock *sock) {
++  int i, j;
++
++  for(i=0; i<CLUSTER_LEN; i++)
++    if(el->sk[i] == sock) {
++      el->num_cluster_elements--;
++
++      if(el->num_cluster_elements > 0) {
++	/* The cluster contains other elements */
++	for(j=i; j<CLUSTER_LEN-1; j++)
++	  el->sk[j] = el->sk[j+1];
++
++	el->sk[CLUSTER_LEN-1] = NULL;
++      } else {
++	/* Empty cluster */
++	memset(el->sk, 0, sizeof(el->sk));
++      }
++
++      return(0);
++    }
++
++  return(-1); /* Not found */
++}
++
++/* ************************************* */
++
++static int remove_from_cluster(struct sock *sock,
++			       struct ring_opt *pfr)
++{
++  struct ring_cluster *el;
++
++#if defined(RING_DEBUG)
++  printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
++#endif
++
++  if(pfr->cluster_id == 0 /* 0 = No Cluster */)
++    return(0); /* Noting to do */
++
++  el = ring_cluster_list;
++
++  while(el != NULL) {
++    if(el->cluster_id == pfr->cluster_id) {
++      return(remove_from_cluster_list(el, sock));
++    } else
++      el = el->next;
++  }
++
++  return(-EINVAL); /* Not found */
++}
++
++/* ************************************* */
++
++static int add_to_cluster(struct sock *sock,
++			  struct ring_opt *pfr,
++			  u_short cluster_id)
++{
++  struct ring_cluster *el;
++
++#ifndef RING_DEBUG
++  printk("--> add_to_cluster(%d)\n", cluster_id);
++#endif
++
++  if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
++
++  if(pfr->cluster_id != 0)
++    remove_from_cluster(sock, pfr);
++
++  el = ring_cluster_list;
++
++  while(el != NULL) {
++    if(el->cluster_id == cluster_id) {
++      return(add_to_cluster_list(el, sock));
++    } else
++      el = el->next;
++  }
++
++  /* There's no existing cluster. We need to create one */
++  if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
++    return(-ENOMEM);
++
++  el->cluster_id = cluster_id;
++  el->num_cluster_elements = 1;
++  el->hashing_mode = cluster_per_flow; /* Default */
++  el->hashing_id   = 0;
++
++  memset(el->sk, 0, sizeof(el->sk));
++  el->sk[0] = sock;
++  el->next = ring_cluster_list;
++  ring_cluster_list = el;
++  pfr->cluster_id = cluster_id;
++
++  return(0); /* 0 = OK */
++}
++
++/* ************************************* */
++
++/* Code taken/inspired from core/sock.c */
++static int ring_setsockopt(struct socket *sock,
++			   int level, int optname,
++			   char *optval, int optlen)
++{
++  struct ring_opt *pfr = ring_sk(sock->sk);
++  int val, found, ret = 0;
++  u_int cluster_id;
++  char devName[8];
++
++  if((optlen<sizeof(int)) || (pfr == NULL))
++    return(-EINVAL);
++
++  if (get_user(val, (int *)optval))
++    return -EFAULT;
++
++  found = 1;
++
++  switch(optname)
++    {
++    case SO_ATTACH_FILTER:
++      ret = -EINVAL;
++      if (optlen == sizeof(struct sock_fprog)) {
++	unsigned int fsize;
++	struct sock_fprog fprog;
++	struct sk_filter *filter;
++
++	ret = -EFAULT;
++
++	/*
++	  NOTE
++
++	  Do not call copy_from_user within a held
++	  splinlock (e.g. ring_mgmt_lock) as this caused
++	  problems when certain debugging was enabled under
++	  2.6.5 -- including hard lockups of the machine.
++	*/
++	if(copy_from_user(&fprog, optval, sizeof(fprog)))
++	  break;
++
++	fsize = sizeof(struct sock_filter) * fprog.len;
++	filter = kmalloc(fsize, GFP_KERNEL);
++
++	if(filter == NULL) {
++	  ret = -ENOMEM;
++	  break;
++	}
++
++	if(copy_from_user(filter->insns, fprog.filter, fsize))
++	  break;
++
++	filter->len = fprog.len;
++
++	if(sk_chk_filter(filter->insns, filter->len) != 0) {
++	  /* Bad filter specified */
++	  kfree(filter);
++	  pfr->bpfFilter = NULL;
++	  break;
++	}
++
++	/* get the lock, set the filter, release the lock */
++	write_lock(&ring_mgmt_lock);
++	pfr->bpfFilter = filter;
++	write_unlock(&ring_mgmt_lock);
++      }
++      ret = 0;
++      break;
++
++    case SO_DETACH_FILTER:
++      write_lock(&ring_mgmt_lock);
++      found = 1;
++      if(pfr->bpfFilter != NULL) {
++	kfree(pfr->bpfFilter);
++	pfr->bpfFilter = NULL;
++	write_unlock(&ring_mgmt_lock);
++	break;
++      }
++      ret = -ENONET;
++      break;
++
++    case SO_ADD_TO_CLUSTER:
++      if (optlen!=sizeof(val))
++	return -EINVAL;
++
++      if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
++	return -EFAULT;
++
++      write_lock(&ring_mgmt_lock);
++      ret = add_to_cluster(sock->sk, pfr, cluster_id);
++      write_unlock(&ring_mgmt_lock);
++      break;
++
++    case SO_REMOVE_FROM_CLUSTER:
++      write_lock(&ring_mgmt_lock);
++      ret = remove_from_cluster(sock->sk, pfr);
++      write_unlock(&ring_mgmt_lock);
++      break;
++
++    case SO_SET_REFLECTOR:
++      if(optlen >= (sizeof(devName)-1))
++	return -EINVAL;
++
++      if(optlen > 0) {
++	if(copy_from_user(devName, optval, optlen))
++	  return -EFAULT;
++      }
++
++      devName[optlen] = '\0';
++
++#if defined(RING_DEBUG)
++      printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
++#endif
++
++      write_lock(&ring_mgmt_lock);
++      pfr->reflector_dev = dev_get_by_name(devName);
++      write_unlock(&ring_mgmt_lock);
++
++#if defined(RING_DEBUG)
++      if(pfr->reflector_dev != NULL)
++	printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
++      else
++	printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
++#endif
++      break;
++
++    default:
++      found = 0;
++      break;
++    }
++
++  if(found)
++    return(ret);
++  else
++    return(sock_setsockopt(sock, level, optname, optval, optlen));
++}
++
++/* ************************************* */
++
++static int ring_ioctl(struct socket *sock,
++		      unsigned int cmd, unsigned long arg)
++{
++  switch(cmd)
++    {
++    case SIOCGIFFLAGS:
++    case SIOCSIFFLAGS:
++    case SIOCGIFCONF:
++    case SIOCGIFMETRIC:
++    case SIOCSIFMETRIC:
++    case SIOCGIFMEM:
++    case SIOCSIFMEM:
++    case SIOCGIFMTU:
++    case SIOCSIFMTU:
++    case SIOCSIFLINK:
++    case SIOCGIFHWADDR:
++    case SIOCSIFHWADDR:
++    case SIOCSIFMAP:
++    case SIOCGIFMAP:
++    case SIOCSIFSLAVE:
++    case SIOCGIFSLAVE:
++    case SIOCGIFINDEX:
++    case SIOCGIFNAME:
++    case SIOCGIFCOUNT:
++    case SIOCSIFHWBROADCAST:
++      return(dev_ioctl(cmd,(void *) arg));
++
++    default:
++      return -EOPNOTSUPP;
++    }
++
++  return 0;
++}
++
++/* ************************************* */
++
++static struct proto_ops ring_ops = {
++  .family	=	PF_RING,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  .owner	=	THIS_MODULE,
++#endif
++
++  /* Operations that make no sense on ring sockets. */
++  .connect	=	sock_no_connect,
++  .socketpair	=	sock_no_socketpair,
++  .accept	=	sock_no_accept,
++  .getname	=	sock_no_getname,
++  .listen	=	sock_no_listen,
++  .shutdown	=	sock_no_shutdown,
++  .sendpage	=	sock_no_sendpage,
++  .sendmsg	=	sock_no_sendmsg,
++  .getsockopt	=	sock_no_getsockopt,
++
++  /* Now the operations that really occur. */
++  .release	=	ring_release,
++  .bind		=	ring_bind,
++  .mmap		=	ring_mmap,
++  .poll		=	ring_poll,
++  .setsockopt	=	ring_setsockopt,
++  .ioctl	=	ring_ioctl,
++  .recvmsg	=	ring_recvmsg,
++};
++
++/* ************************************ */
++
++static struct net_proto_family ring_family_ops = {
++  .family	=	PF_RING,
++  .create	=	ring_create,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  .owner	=	THIS_MODULE,
++#endif
++};
++
++// BD: API changed in 2.6.12, ref:
++// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++static struct proto ring_proto = {
++  .name		=	"PF_RING",
++  .owner	=	THIS_MODULE,
++  .obj_size	=	sizeof(struct sock),
++};
++#endif
++
++/* ************************************ */
++
++static void __exit ring_exit(void)
++{
++  struct list_head *ptr;
++  struct ring_element *entry;
++
++  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    entry = list_entry(ptr, struct ring_element, list);
++    kfree(entry);
++  }
++
++  while(ring_cluster_list != NULL) {
++    struct ring_cluster *next = ring_cluster_list->next;
++    kfree(ring_cluster_list);
++    ring_cluster_list = next;
++  }
++
++  set_skb_ring_handler(NULL);
++  set_buffer_ring_handler(NULL);
++  sock_unregister(PF_RING);
++
++  printk("PF_RING shut down.\n");
++}
++
++/* ************************************ */
++
++static int __init ring_init(void)
++{
++  printk("Welcome to PF_RING %s\n(C) 2004 L.Deri <deri@ntop.org>\n",
++	 RING_VERSION);
++
++  INIT_LIST_HEAD(&ring_table);
++  ring_cluster_list = NULL;
++
++  sock_register(&ring_family_ops);
++
++  set_skb_ring_handler(skb_ring_handler);
++  set_buffer_ring_handler(buffer_ring_handler);
++
++  if(get_buffer_ring_handler() != buffer_ring_handler) {
++    printk("PF_RING: set_buffer_ring_handler FAILED\n");
++
++    set_skb_ring_handler(NULL);
++    set_buffer_ring_handler(NULL);
++    sock_unregister(PF_RING);
++    return -1;
++  } else {
++    printk("PF_RING: bucket length    %d bytes\n", bucket_len);
++    printk("PF_RING: ring slots       %d\n", num_slots);
++    printk("PF_RING: sample rate      %d [1=no sampling]\n", sample_rate);
++    printk("PF_RING: capture TX       %s\n",
++	   enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
++    printk("PF_RING: transparent mode %s\n",
++	   transparent_mode ? "Yes" : "No");
++
++    printk("PF_RING initialized correctly.\n");
++    return 0;
++  }
++}
++
++module_init(ring_init);
++module_exit(ring_exit);
++MODULE_LICENSE("GPL");
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++MODULE_ALIAS_NETPROTO(PF_RING);
++#endif
diff --git a/target/linux/linux-2.6/patches/generic/104-pf_ring.patch b/target/linux/linux-2.6/patches/generic/104-pf_ring.patch
new file mode 100644
index 000000000..759fb2cc9
--- /dev/null
+++ b/target/linux/linux-2.6/patches/generic/104-pf_ring.patch
@@ -0,0 +1,5299 @@
+diff --unified --recursive --new-file linux-2.6.12.5/include/linux/ring.h linux-2.6.12.5-1-686-smp-ring3/include/linux/ring.h
+--- linux-2.6.12.5/include/linux/ring.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/include/linux/ring.h	2005-10-22 23:50:44.951445250 +0200
+@@ -0,0 +1,108 @@
++/*
++ * Definitions for packet ring
++ *
++ * 2004 - Luca Deri <deri@ntop.org>
++ */
++#ifndef __RING_H
++#define __RING_H
++
++
++#define INCLUDE_MAC_INFO
++
++#ifdef INCLUDE_MAC_INFO
++#define SKB_DISPLACEMENT    14 /* Include MAC address information */
++#else
++#define SKB_DISPLACEMENT    0  /* Do NOT include MAC address information */
++#endif
++
++#define RING_MAGIC
++#define RING_MAGIC_VALUE      0x88
++#define RING_FLOWSLOT_VERSION    5
++#define RING_VERSION          "3.0"
++
++#define SO_ADD_TO_CLUSTER        99
++#define SO_REMOVE_FROM_CLUSTER  100
++#define SO_SET_REFLECTOR        101
++
++/* *********************************** */
++
++#ifndef HAVE_PCAP
++struct pcap_pkthdr {
++  struct timeval ts;    /* time stamp */
++  u_int32_t caplen;     /* length of portion present */
++  u_int32_t len;        /* length this packet (off wire) */
++};
++#endif
++
++/* *********************************** */
++
++enum cluster_type {
++  cluster_per_flow = 0,
++  cluster_round_robin
++};
++
++/* *********************************** */
++
++#define RING_MIN_SLOT_SIZE    (60+sizeof(struct pcap_pkthdr))
++#define RING_MAX_SLOT_SIZE    (1514+sizeof(struct pcap_pkthdr))
++
++/* *********************************** */
++
++typedef struct flowSlotInfo {
++  u_int16_t version, sample_rate;
++  u_int32_t tot_slots, slot_len, tot_mem;
++  
++  u_int64_t tot_pkts, tot_lost;
++  u_int64_t tot_insert, tot_read;  
++  u_int16_t insert_idx;
++  u_int16_t remove_idx;
++} FlowSlotInfo;
++
++/* *********************************** */
++
++typedef struct flowSlot {
++#ifdef RING_MAGIC
++  u_char     magic;      /* It must alwasy be zero */
++#endif
++  u_char     slot_state; /* 0=empty, 1=full   */
++  u_char     bucket;     /* bucket[bucketLen] */
++} FlowSlot;
++
++/* *********************************** */
++
++#ifdef __KERNEL__ 
++
++FlowSlotInfo* getRingPtr(void);
++int allocateRing(char *deviceName, u_int numSlots,
++		 u_int bucketLen, u_int sampleRate);
++unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
++void deallocateRing(void);
++
++/* ************************* */
++
++typedef int (*handle_ring_skb)(struct sk_buff *skb,
++			       u_char recv_packet, u_char real_skb);
++extern handle_ring_skb get_skb_ring_handler(void);
++extern void set_skb_ring_handler(handle_ring_skb the_handler);
++extern void do_skb_ring_handler(struct sk_buff *skb,
++				u_char recv_packet, u_char real_skb);
++
++typedef int (*handle_ring_buffer)(struct net_device *dev, 
++				     char *data, int len);
++extern handle_ring_buffer get_buffer_ring_handler(void);
++extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
++extern int do_buffer_ring_handler(struct net_device *dev,
++				  char *data, int len);
++#endif /* __KERNEL__  */
++
++/* *********************************** */
++
++#define PF_RING          27      /* Packet Ring */
++#define SOCK_RING        PF_RING
++
++/* ioctl() */
++#define SIORINGPOLL      0x8888
++
++/* *********************************** */
++
++#endif /* __RING_H */
+diff --unified --recursive --new-file linux-2.6.12.5/net/Kconfig linux-2.6.12.5-1-686-smp-ring3/net/Kconfig
+--- linux-2.6.12.5/net/Kconfig	2005-08-15 02:20:18.000000000 +0200
++++ linux-2.6.12.5-1-686-smp-ring3/net/Kconfig	2005-10-22 23:50:45.535481750 +0200
+@@ -72,6 +72,7 @@
+ 
+ 	  Say Y unless you know what you are doing.
+ 
++source "net/ring/Kconfig"
+ config INET
+ 	bool "TCP/IP networking"
+ 	---help---
+diff --unified --recursive --new-file linux-2.6.12.5/net/Makefile linux-2.6.12.5-1-686-smp-ring3/net/Makefile
+--- linux-2.6.12.5/net/Makefile	2005-08-15 02:20:18.000000000 +0200
++++ linux-2.6.12.5-1-686-smp-ring3/net/Makefile	2005-10-22 23:50:45.491479000 +0200
+@@ -41,6 +41,7 @@
+ obj-$(CONFIG_DECNET)		+= decnet/
+ obj-$(CONFIG_ECONET)		+= econet/
+ obj-$(CONFIG_VLAN_8021Q)	+= 8021q/
++obj-$(CONFIG_RING)		+= ring/
+ obj-$(CONFIG_IP_SCTP)		+= sctp/
+ 
+ ifeq ($(CONFIG_NET),y)
+diff --unified --recursive --new-file linux-2.6.12.5/net/Makefile.ORG linux-2.6.12.5-1-686-smp-ring3/net/Makefile.ORG
+--- linux-2.6.12.5/net/Makefile.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/net/Makefile.ORG	2005-10-22 23:50:45.483478500 +0200
+@@ -0,0 +1,48 @@
++#
++# Makefile for the linux networking.
++#
++# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
++# Rewritten to use lists instead of if-statements.
++#
++
++obj-y	:= nonet.o
++
++obj-$(CONFIG_NET)		:= socket.o core/
++
++tmp-$(CONFIG_COMPAT) 		:= compat.o
++obj-$(CONFIG_NET)		+= $(tmp-y)
++
++# LLC has to be linked before the files in net/802/
++obj-$(CONFIG_LLC)		+= llc/
++obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/
++obj-$(CONFIG_INET)		+= ipv4/
++obj-$(CONFIG_XFRM)		+= xfrm/
++obj-$(CONFIG_UNIX)		+= unix/
++ifneq ($(CONFIG_IPV6),)
++obj-y				+= ipv6/
++endif
++obj-$(CONFIG_PACKET)		+= packet/
++obj-$(CONFIG_NET_KEY)		+= key/
++obj-$(CONFIG_NET_SCHED)		+= sched/
++obj-$(CONFIG_BRIDGE)		+= bridge/
++obj-$(CONFIG_IPX)		+= ipx/
++obj-$(CONFIG_ATALK)		+= appletalk/
++obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
++obj-$(CONFIG_X25)		+= x25/
++obj-$(CONFIG_LAPB)		+= lapb/
++obj-$(CONFIG_NETROM)		+= netrom/
++obj-$(CONFIG_ROSE)		+= rose/
++obj-$(CONFIG_AX25)		+= ax25/
++obj-$(CONFIG_IRDA)		+= irda/
++obj-$(CONFIG_BT)		+= bluetooth/
++obj-$(CONFIG_SUNRPC)		+= sunrpc/
++obj-$(CONFIG_RXRPC)		+= rxrpc/
++obj-$(CONFIG_ATM)		+= atm/
++obj-$(CONFIG_DECNET)		+= decnet/
++obj-$(CONFIG_ECONET)		+= econet/
++obj-$(CONFIG_VLAN_8021Q)	+= 8021q/
++obj-$(CONFIG_IP_SCTP)		+= sctp/
++
++ifeq ($(CONFIG_NET),y)
++obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
++endif
+diff --unified --recursive --new-file linux-2.6.12.5/net/core/dev.c linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c
+--- linux-2.6.12.5/net/core/dev.c	2005-08-15 02:20:18.000000000 +0200
++++ linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c	2005-10-22 23:50:45.479478250 +0200
+@@ -115,6 +115,56 @@
+ #endif	/* CONFIG_NET_RADIO */
+ #include <asm/current.h>
+ 
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++
++/* #define RING_DEBUG */
++
++#include <linux/ring.h>
++#include <linux/version.h>
++
++static handle_ring_skb ring_handler = NULL;
++
++handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
++
++void set_skb_ring_handler(handle_ring_skb the_handler) {
++  ring_handler = the_handler;
++}
++
++void do_skb_ring_handler(struct sk_buff *skb,
++			 u_char recv_packet, u_char real_skb) {
++  if(ring_handler)
++    ring_handler(skb, recv_packet, real_skb);
++}
++
++/* ******************* */
++
++static handle_ring_buffer buffer_ring_handler = NULL;
++
++handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
++
++void set_buffer_ring_handler(handle_ring_buffer the_handler) {
++  buffer_ring_handler = the_handler;
++}
++
++int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
++  if(buffer_ring_handler) {
++    buffer_ring_handler(dev, data, len);
++    return(1);
++  } else 
++    return(0);
++}
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++EXPORT_SYMBOL(get_skb_ring_handler);
++EXPORT_SYMBOL(set_skb_ring_handler);
++EXPORT_SYMBOL(do_skb_ring_handler);
++
++EXPORT_SYMBOL(get_buffer_ring_handler);
++EXPORT_SYMBOL(set_buffer_ring_handler);
++EXPORT_SYMBOL(do_buffer_ring_handler);
++#endif
++
++#endif
+ /* This define, if set, will randomly drop a packet when congestion
+  * is more than moderate.  It helps fairness in the multi-interface
+  * case when one of them is a hog, but it kills performance for the
+@@ -1293,6 +1343,10 @@
+ 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+ #endif
+ 	if (q->enqueue) {
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler) ring_handler(skb, 0, 1);
++#endif /* CONFIG_RING */
++
+ 		/* Grab device queue */
+ 		spin_lock(&dev->queue_lock);
+ 
+@@ -1509,6 +1563,13 @@
+ 
+ 	preempt_disable();
+ 	err = netif_rx(skb);
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler && ring_handler(skb, 1, 1)) {
++	  /* The packet has been copied into a ring */
++	  return(NET_RX_SUCCESS);
++	}
++#endif /* CONFIG_RING */
++
+ 	if (local_softirq_pending())
+ 		do_softirq();
+ 	preempt_enable();
+@@ -1655,6 +1716,13 @@
+ 	int ret = NET_RX_DROP;
+ 	unsigned short type;
+ 
++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++	if(ring_handler && ring_handler(skb, 1, 1)) {
++	  /* The packet has been copied into a ring */
++	  return(NET_RX_SUCCESS);
++	}
++#endif /* CONFIG_RING */
++
+ 	/* if we've gotten here through NAPI, check netpoll */
+ 	if (skb->dev->poll && netpoll_rx(skb))
+ 		return NET_RX_DROP;
+diff --unified --recursive --new-file linux-2.6.12.5/net/core/dev.c.ORG linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c.ORG
+--- linux-2.6.12.5/net/core/dev.c.ORG	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c.ORG	2005-10-22 23:50:45.203461000 +0200
+@@ -0,0 +1,3385 @@
++/*
++ * 	NET3	Protocol independent device support routines.
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ *
++ *	Derived from the non IP parts of dev.c 1.0.19
++ * 		Authors:	Ross Biro
++ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
++ *
++ *	Additional Authors:
++ *		Florian la Roche <rzsfl@rz.uni-sb.de>
++ *		Alan Cox <gw4pts@gw4pts.ampr.org>
++ *		David Hinds <dahinds@users.sourceforge.net>
++ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
++ *		Adam Sulmicki <adam@cfar.umd.edu>
++ *              Pekka Riikonen <priikone@poesidon.pspt.fi>
++ *
++ *	Changes:
++ *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
++ *              			to 2 if register_netdev gets called
++ *              			before net_dev_init & also removed a
++ *              			few lines of code in the process.
++ *		Alan Cox	:	device private ioctl copies fields back.
++ *		Alan Cox	:	Transmit queue code does relevant
++ *					stunts to keep the queue safe.
++ *		Alan Cox	:	Fixed double lock.
++ *		Alan Cox	:	Fixed promisc NULL pointer trap
++ *		????????	:	Support the full private ioctl range
++ *		Alan Cox	:	Moved ioctl permission check into
++ *					drivers
++ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
++ *		Alan Cox	:	100 backlog just doesn't cut it when
++ *					you start doing multicast video 8)
++ *		Alan Cox	:	Rewrote net_bh and list manager.
++ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
++ *		Alan Cox	:	Took out transmit every packet pass
++ *					Saved a few bytes in the ioctl handler
++ *		Alan Cox	:	Network driver sets packet type before
++ *					calling netif_rx. Saves a function
++ *					call a packet.
++ *		Alan Cox	:	Hashed net_bh()
++ *		Richard Kooijman:	Timestamp fixes.
++ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
++ *		Alan Cox	:	Device lock protection.
++ *		Alan Cox	: 	Fixed nasty side effect of device close
++ *					changes.
++ *		Rudi Cilibrasi	:	Pass the right thing to
++ *					set_mac_address()
++ *		Dave Miller	:	32bit quantity for the device lock to
++ *					make it work out on a Sparc.
++ *		Bjorn Ekwall	:	Added KERNELD hack.
++ *		Alan Cox	:	Cleaned up the backlog initialise.
++ *		Craig Metz	:	SIOCGIFCONF fix if space for under
++ *					1 device.
++ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
++ *					is no device open function.
++ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
++ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
++ *		Cyrus Durgin	:	Cleaned for KMOD
++ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
++ *					A network device unload needs to purge
++ *					the backlog queue.
++ *	Paul Rusty Russell	:	SIOCSIFNAME
++ *              Pekka Riikonen  :	Netdev boot-time settings code
++ *              Andrew Morton   :       Make unregister_netdevice wait
++ *              			indefinitely on dev->refcnt
++ * 		J Hadi Salim	:	- Backlog queue sampling
++ *				        - netif_rx() feedback
++ */
++
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/config.h>
++#include <linux/cpu.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/rtnetlink.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/stat.h>
++#include <linux/if_bridge.h>
++#include <linux/divert.h>
++#include <net/dst.h>
++#include <net/pkt_sched.h>
++#include <net/checksum.h>
++#include <linux/highmem.h>
++#include <linux/init.h>
++#include <linux/kmod.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/netpoll.h>
++#include <linux/rcupdate.h>
++#include <linux/delay.h>
++#ifdef CONFIG_NET_RADIO
++#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
++#include <net/iw_handler.h>
++#endif	/* CONFIG_NET_RADIO */
++#include <asm/current.h>
++
++/* This define, if set, will randomly drop a packet when congestion
++ * is more than moderate.  It helps fairness in the multi-interface
++ * case when one of them is a hog, but it kills performance for the
++ * single interface case so it is off now by default.
++ */
++#undef RAND_LIE
++
++/* Setting this will sample the queue lengths and thus congestion
++ * via a timer instead of as each packet is received.
++ */
++#undef OFFLINE_SAMPLE
++
++/*
++ *	The list of packet types we will receive (as opposed to discard)
++ *	and the routines to invoke.
++ *
++ *	Why 16. Because with 16 the only overlap we get on a hash of the
++ *	low nibble of the protocol value is RARP/SNAP/X.25.
++ *
++ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
++ *             sure which should go first, but I bet it won't make much
++ *             difference if we are running VLANs.  The good news is that
++ *             this protocol won't be in the list unless compiled in, so
++ *             the average user (w/out VLANs) will not be adversly affected.
++ *             --BLG
++ *
++ *		0800	IP
++ *		8100    802.1Q VLAN
++ *		0001	802.3
++ *		0002	AX.25
++ *		0004	802.2
++ *		8035	RARP
++ *		0005	SNAP
++ *		0805	X.25
++ *		0806	ARP
++ *		8137	IPX
++ *		0009	Localtalk
++ *		86DD	IPv6
++ */
++
++static DEFINE_SPINLOCK(ptype_lock);
++static struct list_head ptype_base[16];	/* 16 way hashed list */
++static struct list_head ptype_all;		/* Taps */
++
++#ifdef OFFLINE_SAMPLE
++static void sample_queue(unsigned long dummy);
++static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
++#endif
++
++/*
++ * The @dev_base list is protected by @dev_base_lock and the rtln
++ * semaphore.
++ *
++ * Pure readers hold dev_base_lock for reading.
++ *
++ * Writers must hold the rtnl semaphore while they loop through the
++ * dev_base list, and hold dev_base_lock for writing when they do the
++ * actual updates.  This allows pure readers to access the list even
++ * while a writer is preparing to update it.
++ *
++ * To put it another way, dev_base_lock is held for writing only to
++ * protect against pure readers; the rtnl semaphore provides the
++ * protection against other writers.
++ *
++ * See, for example usages, register_netdevice() and
++ * unregister_netdevice(), which must be called with the rtnl
++ * semaphore held.
++ */
++struct net_device *dev_base;
++static struct net_device **dev_tail = &dev_base;
++DEFINE_RWLOCK(dev_base_lock);
++
++EXPORT_SYMBOL(dev_base);
++EXPORT_SYMBOL(dev_base_lock);
++
++#define NETDEV_HASHBITS	8
++static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
++static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
++
++static inline struct hlist_head *dev_name_hash(const char *name)
++{
++	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
++}
++
++static inline struct hlist_head *dev_index_hash(int ifindex)
++{
++	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
++}
++
++/*
++ *	Our notifier list
++ */
++
++static struct notifier_block *netdev_chain;
++
++/*
++ *	Device drivers call our routines to queue packets here. We empty the
++ *	queue in the local softnet handler.
++ */
++DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
++
++#ifdef CONFIG_SYSFS
++extern int netdev_sysfs_init(void);
++extern int netdev_register_sysfs(struct net_device *);
++extern void netdev_unregister_sysfs(struct net_device *);
++#else
++#define netdev_sysfs_init()	 	(0)
++#define netdev_register_sysfs(dev)	(0)
++#define	netdev_unregister_sysfs(dev)	do { } while(0)
++#endif
++
++
++/*******************************************************************************
++
++		Protocol management and registration routines
++
++*******************************************************************************/
++
++/*
++ *	For efficiency
++ */
++
++int netdev_nit;
++
++/*
++ *	Add a protocol ID to the list. Now that the input handler is
++ *	smarter we can dispense with all the messy stuff that used to be
++ *	here.
++ *
++ *	BEWARE!!! Protocol handlers, mangling input packets,
++ *	MUST BE last in hash buckets and checking protocol handlers
++ *	MUST start from promiscuous ptype_all chain in net_bh.
++ *	It is true now, do not change it.
++ *	Explanation follows: if protocol handler, mangling packet, will
++ *	be the first on list, it is not able to sense, that packet
++ *	is cloned and should be copied-on-write, so that it will
++ *	change it and subsequent readers will get broken packet.
++ *							--ANK (980803)
++ */
++
++/**
++ *	dev_add_pack - add packet handler
++ *	@pt: packet type declaration
++ *
++ *	Add a protocol handler to the networking stack. The passed &packet_type
++ *	is linked into kernel lists and may not be freed until it has been
++ *	removed from the kernel lists.
++ *
++ *	This call does not sleep therefore it can not 
++ *	guarantee all CPU's that are in middle of receiving packets
++ *	will see the new packet type (until the next received packet).
++ */
++
++void dev_add_pack(struct packet_type *pt)
++{
++	int hash;
++
++	spin_lock_bh(&ptype_lock);
++	if (pt->type == htons(ETH_P_ALL)) {
++		netdev_nit++;
++		list_add_rcu(&pt->list, &ptype_all);
++	} else {
++		hash = ntohs(pt->type) & 15;
++		list_add_rcu(&pt->list, &ptype_base[hash]);
++	}
++	spin_unlock_bh(&ptype_lock);
++}
++
++extern void linkwatch_run_queue(void);
++
++
++
++/**
++ *	__dev_remove_pack	 - remove packet handler
++ *	@pt: packet type declaration
++ *
++ *	Remove a protocol handler that was previously added to the kernel
++ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
++ *	from the kernel lists and can be freed or reused once this function
++ *	returns. 
++ *
++ *      The packet type might still be in use by receivers
++ *	and must not be freed until after all the CPU's have gone
++ *	through a quiescent state.
++ */
++void __dev_remove_pack(struct packet_type *pt)
++{
++	struct list_head *head;
++	struct packet_type *pt1;
++
++	spin_lock_bh(&ptype_lock);
++
++	if (pt->type == htons(ETH_P_ALL)) {
++		netdev_nit--;
++		head = &ptype_all;
++	} else
++		head = &ptype_base[ntohs(pt->type) & 15];
++
++	list_for_each_entry(pt1, head, list) {
++		if (pt == pt1) {
++			list_del_rcu(&pt->list);
++			goto out;
++		}
++	}
++
++	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
++out:
++	spin_unlock_bh(&ptype_lock);
++}
++/**
++ *	dev_remove_pack	 - remove packet handler
++ *	@pt: packet type declaration
++ *
++ *	Remove a protocol handler that was previously added to the kernel
++ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
++ *	from the kernel lists and can be freed or reused once this function
++ *	returns.
++ *
++ *	This call sleeps to guarantee that no CPU is looking at the packet
++ *	type after return.
++ */
++void dev_remove_pack(struct packet_type *pt)
++{
++	__dev_remove_pack(pt);
++	
++	synchronize_net();
++}
++
++/******************************************************************************
++
++		      Device Boot-time Settings Routines
++
++*******************************************************************************/
++
++/* Boot time configuration table */
++static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
++
++/**
++ *	netdev_boot_setup_add	- add new setup entry
++ *	@name: name of the device
++ *	@map: configured settings for the device
++ *
++ *	Adds new setup entry to the dev_boot_setup list.  The function
++ *	returns 0 on error and 1 on success.  This is a generic routine to
++ *	all netdevices.
++ */
++static int netdev_boot_setup_add(char *name, struct ifmap *map)
++{
++	struct netdev_boot_setup *s;
++	int i;
++
++	s = dev_boot_setup;
++	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
++			memset(s[i].name, 0, sizeof(s[i].name));
++			strcpy(s[i].name, name);
++			memcpy(&s[i].map, map, sizeof(s[i].map));
++			break;
++		}
++	}
++
++	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
++}
++
++/**
++ *	netdev_boot_setup_check	- check boot time settings
++ *	@dev: the netdevice
++ *
++ * 	Check boot time settings for the device.
++ *	The found settings are set for the device to be used
++ *	later in the device probing.
++ *	Returns 0 if no settings found, 1 if they are.
++ */
++int netdev_boot_setup_check(struct net_device *dev)
++{
++	struct netdev_boot_setup *s = dev_boot_setup;
++	int i;
++
++	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
++		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
++			dev->irq 	= s[i].map.irq;
++			dev->base_addr 	= s[i].map.base_addr;
++			dev->mem_start 	= s[i].map.mem_start;
++			dev->mem_end 	= s[i].map.mem_end;
++			return 1;
++		}
++	}
++	return 0;
++}
++
++
++/**
++ *	netdev_boot_base	- get address from boot time settings
++ *	@prefix: prefix for network device
++ *	@unit: id for network device
++ *
++ * 	Check boot time settings for the base address of device.
++ *	The found settings are set for the device to be used
++ *	later in the device probing.
++ *	Returns 0 if no settings found.
++ */
++unsigned long netdev_boot_base(const char *prefix, int unit)
++{
++	const struct netdev_boot_setup *s = dev_boot_setup;
++	char name[IFNAMSIZ];
++	int i;
++
++	sprintf(name, "%s%d", prefix, unit);
++
++	/*
++	 * If device already registered then return base of 1
++	 * to indicate not to probe for this interface
++	 */
++	if (__dev_get_by_name(name))
++		return 1;
++
++	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
++		if (!strcmp(name, s[i].name))
++			return s[i].map.base_addr;
++	return 0;
++}
++
++/*
++ * Saves at boot time configured settings for any netdevice.
++ */
++int __init netdev_boot_setup(char *str)
++{
++	int ints[5];
++	struct ifmap map;
++
++	str = get_options(str, ARRAY_SIZE(ints), ints);
++	if (!str || !*str)
++		return 0;
++
++	/* Save settings */
++	memset(&map, 0, sizeof(map));
++	if (ints[0] > 0)
++		map.irq = ints[1];
++	if (ints[0] > 1)
++		map.base_addr = ints[2];
++	if (ints[0] > 2)
++		map.mem_start = ints[3];
++	if (ints[0] > 3)
++		map.mem_end = ints[4];
++
++	/* Add new entry to the list */
++	return netdev_boot_setup_add(str, &map);
++}
++
++__setup("netdev=", netdev_boot_setup);
++
++/*******************************************************************************
++
++			    Device Interface Subroutines
++
++*******************************************************************************/
++
++/**
++ *	__dev_get_by_name	- find a device by its name
++ *	@name: name to find
++ *
++ *	Find an interface by name. Must be called under RTNL semaphore
++ *	or @dev_base_lock. If the name is found a pointer to the device
++ *	is returned. If the name is not found then %NULL is returned. The
++ *	reference counters are not incremented so the caller must be
++ *	careful with locks.
++ */
++
++struct net_device *__dev_get_by_name(const char *name)
++{
++	struct hlist_node *p;
++
++	hlist_for_each(p, dev_name_hash(name)) {
++		struct net_device *dev
++			= hlist_entry(p, struct net_device, name_hlist);
++		if (!strncmp(dev->name, name, IFNAMSIZ))
++			return dev;
++	}
++	return NULL;
++}
++
++/**
++ *	dev_get_by_name		- find a device by its name
++ *	@name: name to find
++ *
++ *	Find an interface by name. This can be called from any
++ *	context and does its own locking. The returned handle has
++ *	the usage count incremented and the caller must use dev_put() to
++ *	release it when it is no longer needed. %NULL is returned if no
++ *	matching device is found.
++ */
++
++struct net_device *dev_get_by_name(const char *name)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_name(name);
++	if (dev)
++		dev_hold(dev);
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/**
++ *	__dev_get_by_index - find a device by its ifindex
++ *	@ifindex: index of device
++ *
++ *	Search for an interface by index. Returns %NULL if the device
++ *	is not found or a pointer to the device. The device has not
++ *	had its reference counter increased so the caller must be careful
++ *	about locking. The caller must hold either the RTNL semaphore
++ *	or @dev_base_lock.
++ */
++
++struct net_device *__dev_get_by_index(int ifindex)
++{
++	struct hlist_node *p;
++
++	hlist_for_each(p, dev_index_hash(ifindex)) {
++		struct net_device *dev
++			= hlist_entry(p, struct net_device, index_hlist);
++		if (dev->ifindex == ifindex)
++			return dev;
++	}
++	return NULL;
++}
++
++
++/**
++ *	dev_get_by_index - find a device by its ifindex
++ *	@ifindex: index of device
++ *
++ *	Search for an interface by index. Returns NULL if the device
++ *	is not found or a pointer to the device. The device returned has
++ *	had a reference added and the pointer is safe until the user calls
++ *	dev_put to indicate they have finished with it.
++ */
++
++struct net_device *dev_get_by_index(int ifindex)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_index(ifindex);
++	if (dev)
++		dev_hold(dev);
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/**
++ *	dev_getbyhwaddr - find a device by its hardware address
++ *	@type: media type of device
++ *	@ha: hardware address
++ *
++ *	Search for an interface by MAC address. Returns NULL if the device
++ *	is not found or a pointer to the device. The caller must hold the
++ *	rtnl semaphore. The returned device has not had its ref count increased
++ *	and the caller must therefore be careful about locking
++ *
++ *	BUGS:
++ *	If the API was consistent this would be __dev_get_by_hwaddr
++ */
++
++struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
++{
++	struct net_device *dev;
++
++	ASSERT_RTNL();
++
++	for (dev = dev_base; dev; dev = dev->next)
++		if (dev->type == type &&
++		    !memcmp(dev->dev_addr, ha, dev->addr_len))
++			break;
++	return dev;
++}
++
++struct net_device *dev_getfirstbyhwtype(unsigned short type)
++{
++	struct net_device *dev;
++
++	rtnl_lock();
++	for (dev = dev_base; dev; dev = dev->next) {
++		if (dev->type == type) {
++			dev_hold(dev);
++			break;
++		}
++	}
++	rtnl_unlock();
++	return dev;
++}
++
++EXPORT_SYMBOL(dev_getfirstbyhwtype);
++
++/**
++ *	dev_get_by_flags - find any device with given flags
++ *	@if_flags: IFF_* values
++ *	@mask: bitmask of bits in if_flags to check
++ *
++ *	Search for any interface with the given flags. Returns NULL if a device
++ *	is not found or a pointer to the device. The device returned has 
++ *	had a reference added and the pointer is safe until the user calls
++ *	dev_put to indicate they have finished with it.
++ */
++
++struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
++{
++	struct net_device *dev;
++
++	read_lock(&dev_base_lock);
++	for (dev = dev_base; dev != NULL; dev = dev->next) {
++		if (((dev->flags ^ if_flags) & mask) == 0) {
++			dev_hold(dev);
++			break;
++		}
++	}
++	read_unlock(&dev_base_lock);
++	return dev;
++}
++
++/**
++ *	dev_valid_name - check if name is okay for network device
++ *	@name: name string
++ *
++ *	Network device names need to be valid file names to
++ *	to allow sysfs to work
++ */
++static int dev_valid_name(const char *name)
++{
++	return !(*name == '\0' 
++		 || !strcmp(name, ".")
++		 || !strcmp(name, "..")
++		 || strchr(name, '/'));
++}
++
++/**
++ *	dev_alloc_name - allocate a name for a device
++ *	@dev: device
++ *	@name: name format string
++ *
++ *	Passed a format string - eg "lt%d" it will try and find a suitable
++ *	id. Not efficient for many devices, not called a lot. The caller
++ *	must hold the dev_base or rtnl lock while allocating the name and
++ *	adding the device in order to avoid duplicates. Returns the number
++ *	of the unit assigned or a negative errno code.
++ */
++
++int dev_alloc_name(struct net_device *dev, const char *name)
++{
++	int i = 0;
++	char buf[IFNAMSIZ];
++	const char *p;
++	const int max_netdevices = 8*PAGE_SIZE;
++	long *inuse;
++	struct net_device *d;
++
++	p = strnchr(name, IFNAMSIZ-1, '%');
++	if (p) {
++		/*
++		 * Verify the string as this thing may have come from
++		 * the user.  There must be either one "%d" and no other "%"
++		 * characters.
++		 */
++		if (p[1] != 'd' || strchr(p + 2, '%'))
++			return -EINVAL;
++
++		/* Use one page as a bit array of possible slots */
++		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
++		if (!inuse)
++			return -ENOMEM;
++
++		for (d = dev_base; d; d = d->next) {
++			if (!sscanf(d->name, name, &i))
++				continue;
++			if (i < 0 || i >= max_netdevices)
++				continue;
++
++			/*  avoid cases where sscanf is not exact inverse of printf */
++			snprintf(buf, sizeof(buf), name, i);
++			if (!strncmp(buf, d->name, IFNAMSIZ))
++				set_bit(i, inuse);
++		}
++
++		i = find_first_zero_bit(inuse, max_netdevices);
++		free_page((unsigned long) inuse);
++	}
++
++	snprintf(buf, sizeof(buf), name, i);
++	if (!__dev_get_by_name(buf)) {
++		strlcpy(dev->name, buf, IFNAMSIZ);
++		return i;
++	}
++
++	/* It is possible to run out of possible slots
++	 * when the name is long and there isn't enough space left
++	 * for the digits, or if all bits are used.
++	 */
++	return -ENFILE;
++}
++
++
++/**
++ *	dev_change_name - change name of a device
++ *	@dev: device
++ *	@newname: name (or format string) must be at least IFNAMSIZ
++ *
++ *	Change name of a device, can pass format strings "eth%d".
++ *	for wildcarding.
++ */
++int dev_change_name(struct net_device *dev, char *newname)
++{
++	int err = 0;
++
++	ASSERT_RTNL();
++
++	if (dev->flags & IFF_UP)
++		return -EBUSY;
++
++	if (!dev_valid_name(newname))
++		return -EINVAL;
++
++	if (strchr(newname, '%')) {
++		err = dev_alloc_name(dev, newname);
++		if (err < 0)
++			return err;
++		strcpy(newname, dev->name);
++	}
++	else if (__dev_get_by_name(newname))
++		return -EEXIST;
++	else
++		strlcpy(dev->name, newname, IFNAMSIZ);
++
++	err = class_device_rename(&dev->class_dev, dev->name);
++	if (!err) {
++		hlist_del(&dev->name_hlist);
++		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
++	}
++
++	return err;
++}
++
++/**
++ *	netdev_features_change - device changes fatures
++ *	@dev: device to cause notification
++ *
++ *	Called to indicate a device has changed features.
++ */
++void netdev_features_change(struct net_device *dev)
++{
++	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
++}
++EXPORT_SYMBOL(netdev_features_change);
++
++/**
++ *	netdev_state_change - device changes state
++ *	@dev: device to cause notification
++ *
++ *	Called to indicate a device has changed state. This function calls
++ *	the notifier chains for netdev_chain and sends a NEWLINK message
++ *	to the routing socket.
++ */
++void netdev_state_change(struct net_device *dev)
++{
++	if (dev->flags & IFF_UP) {
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
++		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
++	}
++}
++
++/**
++ *	dev_load 	- load a network module
++ *	@name: name of interface
++ *
++ *	If a network interface is not present and the process has suitable
++ *	privileges this function loads the module. If module loading is not
++ *	available in this kernel then it becomes a nop.
++ */
++
++void dev_load(const char *name)
++{
++	struct net_device *dev;  
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_name(name);
++	read_unlock(&dev_base_lock);
++
++	if (!dev && capable(CAP_SYS_MODULE))
++		request_module("%s", name);
++}
++
++static int default_rebuild_header(struct sk_buff *skb)
++{
++	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
++	       skb->dev ? skb->dev->name : "NULL!!!");
++	kfree_skb(skb);
++	return 1;
++}
++
++
++/**
++ *	dev_open	- prepare an interface for use.
++ *	@dev:	device to open
++ *
++ *	Takes a device from down to up state. The device's private open
++ *	function is invoked and then the multicast lists are loaded. Finally
++ *	the device is moved into the up state and a %NETDEV_UP message is
++ *	sent to the netdev notifier chain.
++ *
++ *	Calling this function on an active interface is a nop. On a failure
++ *	a negative errno code is returned.
++ */
++int dev_open(struct net_device *dev)
++{
++	int ret = 0;
++
++	/*
++	 *	Is it already up?
++	 */
++
++	if (dev->flags & IFF_UP)
++		return 0;
++
++	/*
++	 *	Is it even present?
++	 */
++	if (!netif_device_present(dev))
++		return -ENODEV;
++
++	/*
++	 *	Call device private open method
++	 */
++	set_bit(__LINK_STATE_START, &dev->state);
++	if (dev->open) {
++		ret = dev->open(dev);
++		if (ret)
++			clear_bit(__LINK_STATE_START, &dev->state);
++	}
++
++ 	/*
++	 *	If it went open OK then:
++	 */
++
++	if (!ret) {
++		/*
++		 *	Set the flags.
++		 */
++		dev->flags |= IFF_UP;
++
++		/*
++		 *	Initialize multicasting status
++		 */
++		dev_mc_upload(dev);
++
++		/*
++		 *	Wakeup transmit queue engine
++		 */
++		dev_activate(dev);
++
++		/*
++		 *	... and announce new interface.
++		 */
++		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
++	}
++	return ret;
++}
++
++/**
++ *	dev_close - shutdown an interface.
++ *	@dev: device to shutdown
++ *
++ *	This function moves an active device into down state. A
++ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
++ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
++ *	chain.
++ */
++int dev_close(struct net_device *dev)
++{
++	if (!(dev->flags & IFF_UP))
++		return 0;
++
++	/*
++	 *	Tell people we are going down, so that they can
++	 *	prepare to death, when device is still operating.
++	 */
++	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
++
++	dev_deactivate(dev);
++
++	clear_bit(__LINK_STATE_START, &dev->state);
++
++	/* Synchronize to scheduled poll. We cannot touch poll list,
++	 * it can be even on different cpu. So just clear netif_running(),
++	 * and wait when poll really will happen. Actually, the best place
++	 * for this is inside dev->stop() after device stopped its irq
++	 * engine, but this requires more changes in devices. */
++
++	smp_mb__after_clear_bit(); /* Commit netif_running(). */
++	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
++		/* No hurry. */
++		current->state = TASK_INTERRUPTIBLE;
++		schedule_timeout(1);
++	}
++
++	/*
++	 *	Call the device specific close. This cannot fail.
++	 *	Only if device is UP
++	 *
++	 *	We allow it to be called even after a DETACH hot-plug
++	 *	event.
++	 */
++	if (dev->stop)
++		dev->stop(dev);
++
++	/*
++	 *	Device is now down.
++	 */
++
++	dev->flags &= ~IFF_UP;
++
++	/*
++	 * Tell people we are down
++	 */
++	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
++
++	return 0;
++}
++
++
++/*
++ *	Device change register/unregister. These are not inline or static
++ *	as we export them to the world.
++ */
++
++/**
++ *	register_netdevice_notifier - register a network notifier block
++ *	@nb: notifier
++ *
++ *	Register a notifier to be called when network device events occur.
++ *	The notifier passed is linked into the kernel structures and must
++ *	not be reused until it has been unregistered. A negative errno code
++ *	is returned on a failure.
++ *
++ * 	When registered all registration and up events are replayed
++ *	to the new notifier to allow device to have a race free 
++ *	view of the network device list.
++ */
++
++int register_netdevice_notifier(struct notifier_block *nb)
++{
++	struct net_device *dev;
++	int err;
++
++	rtnl_lock();
++	err = notifier_chain_register(&netdev_chain, nb);
++	if (!err) {
++		for (dev = dev_base; dev; dev = dev->next) {
++			nb->notifier_call(nb, NETDEV_REGISTER, dev);
++
++			if (dev->flags & IFF_UP) 
++				nb->notifier_call(nb, NETDEV_UP, dev);
++		}
++	}
++	rtnl_unlock();
++	return err;
++}
++
++/**
++ *	unregister_netdevice_notifier - unregister a network notifier block
++ *	@nb: notifier
++ *
++ *	Unregister a notifier previously registered by
++ *	register_netdevice_notifier(). The notifier is unlinked into the
++ *	kernel structures and may then be reused. A negative errno code
++ *	is returned on a failure.
++ */
++
++int unregister_netdevice_notifier(struct notifier_block *nb)
++{
++	return notifier_chain_unregister(&netdev_chain, nb);
++}
++
++/**
++ *	call_netdevice_notifiers - call all network notifier blocks
++ *      @val: value passed unmodified to notifier function
++ *      @v:   pointer passed unmodified to notifier function
++ *
++ *	Call all network notifier blocks.  Parameters and return value
++ *	are as for notifier_call_chain().
++ */
++
++int call_netdevice_notifiers(unsigned long val, void *v)
++{
++	return notifier_call_chain(&netdev_chain, val, v);
++}
++
++/* When > 0 there are consumers of rx skb time stamps */
++static atomic_t netstamp_needed = ATOMIC_INIT(0);
++
++void net_enable_timestamp(void)
++{
++	atomic_inc(&netstamp_needed);
++}
++
++void net_disable_timestamp(void)
++{
++	atomic_dec(&netstamp_needed);
++}
++
++static inline void net_timestamp(struct timeval *stamp)
++{
++	if (atomic_read(&netstamp_needed))
++		do_gettimeofday(stamp);
++	else {
++		stamp->tv_sec = 0;
++		stamp->tv_usec = 0;
++	}
++}
++
++/*
++ *	Support routine. Sends outgoing frames to any network
++ *	taps currently in use.
++ */
++
++void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct packet_type *ptype;
++	net_timestamp(&skb->stamp);
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(ptype, &ptype_all, list) {
++		/* Never send packets back to the socket
++		 * they originated from - MvS (miquels@drinkel.ow.org)
++		 */
++		if ((ptype->dev == dev || !ptype->dev) &&
++		    (ptype->af_packet_priv == NULL ||
++		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
++			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
++			if (!skb2)
++				break;
++
++			/* skb->nh should be correctly
++			   set by sender, so that the second statement is
++			   just protection against buggy protocols.
++			 */
++			skb2->mac.raw = skb2->data;
++
++			if (skb2->nh.raw < skb2->data ||
++			    skb2->nh.raw > skb2->tail) {
++				if (net_ratelimit())
++					printk(KERN_CRIT "protocol %04x is "
++					       "buggy, dev %s\n",
++					       skb2->protocol, dev->name);
++				skb2->nh.raw = skb2->data;
++			}
++
++			skb2->h.raw = skb2->nh.raw;
++			skb2->pkt_type = PACKET_OUTGOING;
++			ptype->func(skb2, skb->dev, ptype);
++		}
++	}
++	rcu_read_unlock();
++}
++
++/*
++ * Invalidate hardware checksum when packet is to be mangled, and
++ * complete checksum manually on outgoing path.
++ */
++int skb_checksum_help(struct sk_buff *skb, int inward)
++{
++	unsigned int csum;
++	int ret = 0, offset = skb->h.raw - skb->data;
++
++	if (inward) {
++		skb->ip_summed = CHECKSUM_NONE;
++		goto out;
++	}
++
++	if (skb_cloned(skb)) {
++		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
++		if (ret)
++			goto out;
++	}
++
++	if (offset > (int)skb->len)
++		BUG();
++	csum = skb_checksum(skb, offset, skb->len-offset, 0);
++
++	offset = skb->tail - skb->h.raw;
++	if (offset <= 0)
++		BUG();
++	if (skb->csum + 2 > offset)
++		BUG();
++
++	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
++	skb->ip_summed = CHECKSUM_NONE;
++out:	
++	return ret;
++}
++
++#ifdef CONFIG_HIGHMEM
++/* Actually, we should eliminate this check as soon as we know, that:
++ * 1. IOMMU is present and allows to map all the memory.
++ * 2. No high memory really exists on this machine.
++ */
++
++static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
++{
++	int i;
++
++	if (dev->features & NETIF_F_HIGHDMA)
++		return 0;
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
++		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
++			return 1;
++
++	return 0;
++}
++#else
++#define illegal_highdma(dev, skb)	(0)
++#endif
++
++extern void skb_release_data(struct sk_buff *);
++
++/* Keep head the same: replace data */
++int __skb_linearize(struct sk_buff *skb, int gfp_mask)
++{
++	unsigned int size;
++	u8 *data;
++	long offset;
++	struct skb_shared_info *ninfo;
++	int headerlen = skb->data - skb->head;
++	int expand = (skb->tail + skb->data_len) - skb->end;
++
++	if (skb_shared(skb))
++		BUG();
++
++	if (expand <= 0)
++		expand = 0;
++
++	size = skb->end - skb->head + expand;
++	size = SKB_DATA_ALIGN(size);
++	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
++	if (!data)
++		return -ENOMEM;
++
++	/* Copy entire thing */
++	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
++		BUG();
++
++	/* Set up shinfo */
++	ninfo = (struct skb_shared_info*)(data + size);
++	atomic_set(&ninfo->dataref, 1);
++	ninfo->tso_size = skb_shinfo(skb)->tso_size;
++	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
++	ninfo->nr_frags = 0;
++	ninfo->frag_list = NULL;
++
++	/* Offset between the two in bytes */
++	offset = data - skb->head;
++
++	/* Free old data. */
++	skb_release_data(skb);
++
++	skb->head = data;
++	skb->end  = data + size;
++
++	/* Set up new pointers */
++	skb->h.raw   += offset;
++	skb->nh.raw  += offset;
++	skb->mac.raw += offset;
++	skb->tail    += offset;
++	skb->data    += offset;
++
++	/* We are no longer a clone, even if we were. */
++	skb->cloned    = 0;
++
++	skb->tail     += skb->data_len;
++	skb->data_len  = 0;
++	return 0;
++}
++
++#define HARD_TX_LOCK(dev, cpu) {			\
++	if ((dev->features & NETIF_F_LLTX) == 0) {	\
++		spin_lock(&dev->xmit_lock);		\
++		dev->xmit_lock_owner = cpu;		\
++	}						\
++}
++
++#define HARD_TX_UNLOCK(dev) {				\
++	if ((dev->features & NETIF_F_LLTX) == 0) {	\
++		dev->xmit_lock_owner = -1;		\
++		spin_unlock(&dev->xmit_lock);		\
++	}						\
++}
++
++/**
++ *	dev_queue_xmit - transmit a buffer
++ *	@skb: buffer to transmit
++ *
++ *	Queue a buffer for transmission to a network device. The caller must
++ *	have set the device and priority and built the buffer before calling
++ *	this function. The function can be called from an interrupt.
++ *
++ *	A negative errno code is returned on a failure. A success does not
++ *	guarantee the frame will be transmitted as it may be dropped due
++ *	to congestion or traffic shaping.
++ *
++ * -----------------------------------------------------------------------------------
++ *      I notice this method can also return errors from the queue disciplines,
++ *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
++ *      be positive.
++ *
++ *      Regardless of the return value, the skb is consumed, so it is currently
++ *      difficult to retry a send to this method.  (You can bump the ref count
++ *      before sending to hold a reference for retry if you are careful.)
++ *
++ *      When calling this method, interrupts MUST be enabled.  This is because
++ *      the BH enable code must have IRQs enabled so that it will not deadlock.
++ *          --BLG
++ */
++
++int dev_queue_xmit(struct sk_buff *skb)
++{
++	struct net_device *dev = skb->dev;
++	struct Qdisc *q;
++	int rc = -ENOMEM;
++
++	if (skb_shinfo(skb)->frag_list &&
++	    !(dev->features & NETIF_F_FRAGLIST) &&
++	    __skb_linearize(skb, GFP_ATOMIC))
++		goto out_kfree_skb;
++
++	/* Fragmented skb is linearized if device does not support SG,
++	 * or if at least one of fragments is in highmem and device
++	 * does not support DMA from it.
++	 */
++	if (skb_shinfo(skb)->nr_frags &&
++	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
++	    __skb_linearize(skb, GFP_ATOMIC))
++		goto out_kfree_skb;
++
++	/* If packet is not checksummed and device does not support
++	 * checksumming for this protocol, complete checksumming here.
++	 */
++	if (skb->ip_summed == CHECKSUM_HW &&
++	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
++	     (!(dev->features & NETIF_F_IP_CSUM) ||
++	      skb->protocol != htons(ETH_P_IP))))
++	      	if (skb_checksum_help(skb, 0))
++	      		goto out_kfree_skb;
++
++	/* Disable soft irqs for various locks below. Also 
++	 * stops preemption for RCU. 
++	 */
++	local_bh_disable(); 
++
++	/* Updates of qdisc are serialized by queue_lock. 
++	 * The struct Qdisc which is pointed to by qdisc is now a 
++	 * rcu structure - it may be accessed without acquiring 
++	 * a lock (but the structure may be stale.) The freeing of the
++	 * qdisc will be deferred until it's known that there are no 
++	 * more references to it.
++	 * 
++	 * If the qdisc has an enqueue function, we still need to 
++	 * hold the queue_lock before calling it, since queue_lock
++	 * also serializes access to the device queue.
++	 */
++
++	q = rcu_dereference(dev->qdisc);
++#ifdef CONFIG_NET_CLS_ACT
++	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
++#endif
++	if (q->enqueue) {
++		/* Grab device queue */
++		spin_lock(&dev->queue_lock);
++
++		rc = q->enqueue(skb, q);
++
++		qdisc_run(dev);
++
++		spin_unlock(&dev->queue_lock);
++		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
++		goto out;
++	}
++
++	/* The device has no queue. Common case for software devices:
++	   loopback, all the sorts of tunnels...
++
++	   Really, it is unlikely that xmit_lock protection is necessary here.
++	   (f.e. loopback and IP tunnels are clean ignoring statistics
++	   counters.)
++	   However, it is possible, that they rely on protection
++	   made by us here.
++
++	   Check this and shot the lock. It is not prone from deadlocks.
++	   Either shot noqueue qdisc, it is even simpler 8)
++	 */
++	if (dev->flags & IFF_UP) {
++		int cpu = smp_processor_id(); /* ok because BHs are off */
++
++		if (dev->xmit_lock_owner != cpu) {
++
++			HARD_TX_LOCK(dev, cpu);
++
++			if (!netif_queue_stopped(dev)) {
++				if (netdev_nit)
++					dev_queue_xmit_nit(skb, dev);
++
++				rc = 0;
++				if (!dev->hard_start_xmit(skb, dev)) {
++					HARD_TX_UNLOCK(dev);
++					goto out;
++				}
++			}
++			HARD_TX_UNLOCK(dev);
++			if (net_ratelimit())
++				printk(KERN_CRIT "Virtual device %s asks to "
++				       "queue packet!\n", dev->name);
++		} else {
++			/* Recursion is detected! It is possible,
++			 * unfortunately */
++			if (net_ratelimit())
++				printk(KERN_CRIT "Dead loop on virtual device "
++				       "%s, fix it urgently!\n", dev->name);
++		}
++	}
++
++	rc = -ENETDOWN;
++	local_bh_enable();
++
++out_kfree_skb:
++	kfree_skb(skb);
++	return rc;
++out:
++	local_bh_enable();
++	return rc;
++}
++
++
++/*=======================================================================
++			Receiver routines
++  =======================================================================*/
++
++int netdev_max_backlog = 300;
++int weight_p = 64;            /* old backlog weight */
++/* These numbers are selected based on intuition and some
++ * experimentatiom, if you have more scientific way of doing this
++ * please go ahead and fix things.
++ */
++int no_cong_thresh = 10;
++int no_cong = 20;
++int lo_cong = 100;
++int mod_cong = 290;
++
++DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
++
++
++static void get_sample_stats(int cpu)
++{
++#ifdef RAND_LIE
++	unsigned long rd;
++	int rq;
++#endif
++	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
++	int blog = sd->input_pkt_queue.qlen;
++	int avg_blog = sd->avg_blog;
++
++	avg_blog = (avg_blog >> 1) + (blog >> 1);
++
++	if (avg_blog > mod_cong) {
++		/* Above moderate congestion levels. */
++		sd->cng_level = NET_RX_CN_HIGH;
++#ifdef RAND_LIE
++		rd = net_random();
++		rq = rd % netdev_max_backlog;
++		if (rq < avg_blog) /* unlucky bastard */
++			sd->cng_level = NET_RX_DROP;
++#endif
++	} else if (avg_blog > lo_cong) {
++		sd->cng_level = NET_RX_CN_MOD;
++#ifdef RAND_LIE
++		rd = net_random();
++		rq = rd % netdev_max_backlog;
++			if (rq < avg_blog) /* unlucky bastard */
++				sd->cng_level = NET_RX_CN_HIGH;
++#endif
++	} else if (avg_blog > no_cong)
++		sd->cng_level = NET_RX_CN_LOW;
++	else  /* no congestion */
++		sd->cng_level = NET_RX_SUCCESS;
++
++	sd->avg_blog = avg_blog;
++}
++
++#ifdef OFFLINE_SAMPLE
++static void sample_queue(unsigned long dummy)
++{
++/* 10 ms 0r 1ms -- i don't care -- JHS */
++	int next_tick = 1;
++	int cpu = smp_processor_id();
++
++	get_sample_stats(cpu);
++	next_tick += jiffies;
++	mod_timer(&samp_timer, next_tick);
++}
++#endif
++
++
++/**
++ *	netif_rx	-	post buffer to the network code
++ *	@skb: buffer to post
++ *
++ *	This function receives a packet from a device driver and queues it for
++ *	the upper (protocol) levels to process.  It always succeeds. The buffer
++ *	may be dropped during processing for congestion control or by the
++ *	protocol layers.
++ *
++ *	return values:
++ *	NET_RX_SUCCESS	(no congestion)
++ *	NET_RX_CN_LOW   (low congestion)
++ *	NET_RX_CN_MOD   (moderate congestion)
++ *	NET_RX_CN_HIGH  (high congestion)
++ *	NET_RX_DROP     (packet was dropped)
++ *
++ */
++
++int netif_rx(struct sk_buff *skb)
++{
++	int this_cpu;
++	struct softnet_data *queue;
++	unsigned long flags;
++
++	/* if netpoll wants it, pretend we never saw it */
++	if (netpoll_rx(skb))
++		return NET_RX_DROP;
++
++	if (!skb->stamp.tv_sec)
++		net_timestamp(&skb->stamp);
++
++	/*
++	 * The code is rearranged so that the path is the most
++	 * short when CPU is congested, but is still operating.
++	 */
++	local_irq_save(flags);
++	this_cpu = smp_processor_id();
++	queue = &__get_cpu_var(softnet_data);
++
++	__get_cpu_var(netdev_rx_stat).total++;
++	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
++		if (queue->input_pkt_queue.qlen) {
++			if (queue->throttle)
++				goto drop;
++
++enqueue:
++			dev_hold(skb->dev);
++			__skb_queue_tail(&queue->input_pkt_queue, skb);
++#ifndef OFFLINE_SAMPLE
++			get_sample_stats(this_cpu);
++#endif
++			local_irq_restore(flags);
++			return queue->cng_level;
++		}
++
++		if (queue->throttle)
++			queue->throttle = 0;
++
++		netif_rx_schedule(&queue->backlog_dev);
++		goto enqueue;
++	}
++
++	if (!queue->throttle) {
++		queue->throttle = 1;
++		__get_cpu_var(netdev_rx_stat).throttled++;
++	}
++
++drop:
++	__get_cpu_var(netdev_rx_stat).dropped++;
++	local_irq_restore(flags);
++
++	kfree_skb(skb);
++	return NET_RX_DROP;
++}
++
++int netif_rx_ni(struct sk_buff *skb)
++{
++	int err;
++
++	preempt_disable();
++	err = netif_rx(skb);
++	if (local_softirq_pending())
++		do_softirq();
++	preempt_enable();
++
++	return err;
++}
++
++EXPORT_SYMBOL(netif_rx_ni);
++
++static __inline__ void skb_bond(struct sk_buff *skb)
++{
++	struct net_device *dev = skb->dev;
++
++	if (dev->master) {
++		skb->real_dev = skb->dev;
++		skb->dev = dev->master;
++	}
++}
++
++static void net_tx_action(struct softirq_action *h)
++{
++	struct softnet_data *sd = &__get_cpu_var(softnet_data);
++
++	if (sd->completion_queue) {
++		struct sk_buff *clist;
++
++		local_irq_disable();
++		clist = sd->completion_queue;
++		sd->completion_queue = NULL;
++		local_irq_enable();
++
++		while (clist) {
++			struct sk_buff *skb = clist;
++			clist = clist->next;
++
++			BUG_TRAP(!atomic_read(&skb->users));
++			__kfree_skb(skb);
++		}
++	}
++
++	if (sd->output_queue) {
++		struct net_device *head;
++
++		local_irq_disable();
++		head = sd->output_queue;
++		sd->output_queue = NULL;
++		local_irq_enable();
++
++		while (head) {
++			struct net_device *dev = head;
++			head = head->next_sched;
++
++			smp_mb__before_clear_bit();
++			clear_bit(__LINK_STATE_SCHED, &dev->state);
++
++			if (spin_trylock(&dev->queue_lock)) {
++				qdisc_run(dev);
++				spin_unlock(&dev->queue_lock);
++			} else {
++				netif_schedule(dev);
++			}
++		}
++	}
++}
++
++static __inline__ int deliver_skb(struct sk_buff *skb,
++				  struct packet_type *pt_prev)
++{
++	atomic_inc(&skb->users);
++	return pt_prev->func(skb, skb->dev, pt_prev);
++}
++
++#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
++int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
++struct net_bridge;
++struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
++						unsigned char *addr);
++void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
++
++static __inline__ int handle_bridge(struct sk_buff **pskb,
++				    struct packet_type **pt_prev, int *ret)
++{
++	struct net_bridge_port *port;
++
++	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
++	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
++		return 0;
++
++	if (*pt_prev) {
++		*ret = deliver_skb(*pskb, *pt_prev);
++		*pt_prev = NULL;
++	} 
++	
++	return br_handle_frame_hook(port, pskb);
++}
++#else
++#define handle_bridge(skb, pt_prev, ret)	(0)
++#endif
++
++#ifdef CONFIG_NET_CLS_ACT
++/* TODO: Maybe we should just force sch_ingress to be compiled in
++ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
++ * a compare and 2 stores extra right now if we dont have it on
++ * but have CONFIG_NET_CLS_ACT
++ * NOTE: This doesnt stop any functionality; if you dont have 
++ * the ingress scheduler, you just cant add policies on ingress.
++ *
++ */
++static int ing_filter(struct sk_buff *skb) 
++{
++	struct Qdisc *q;
++	struct net_device *dev = skb->dev;
++	int result = TC_ACT_OK;
++	
++	if (dev->qdisc_ingress) {
++		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
++		if (MAX_RED_LOOP < ttl++) {
++			printk("Redir loop detected Dropping packet (%s->%s)\n",
++				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
++			return TC_ACT_SHOT;
++		}
++
++		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
++
++		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
++		if (NULL == skb->input_dev) {
++			skb->input_dev = skb->dev;
++			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
++		}
++		spin_lock(&dev->ingress_lock);
++		if ((q = dev->qdisc_ingress) != NULL)
++			result = q->enqueue(skb, q);
++		spin_unlock(&dev->ingress_lock);
++
++	}
++
++	return result;
++}
++#endif
++
++int netif_receive_skb(struct sk_buff *skb)
++{
++	struct packet_type *ptype, *pt_prev;
++	int ret = NET_RX_DROP;
++	unsigned short type;
++
++	/* if we've gotten here through NAPI, check netpoll */
++	if (skb->dev->poll && netpoll_rx(skb))
++		return NET_RX_DROP;
++
++	if (!skb->stamp.tv_sec)
++		net_timestamp(&skb->stamp);
++
++	skb_bond(skb);
++
++	__get_cpu_var(netdev_rx_stat).total++;
++
++	skb->h.raw = skb->nh.raw = skb->data;
++	skb->mac_len = skb->nh.raw - skb->mac.raw;
++
++	pt_prev = NULL;
++
++	rcu_read_lock();
++
++#ifdef CONFIG_NET_CLS_ACT
++	if (skb->tc_verd & TC_NCLS) {
++		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
++		goto ncls;
++	}
++#endif
++
++	list_for_each_entry_rcu(ptype, &ptype_all, list) {
++		if (!ptype->dev || ptype->dev == skb->dev) {
++			if (pt_prev) 
++				ret = deliver_skb(skb, pt_prev);
++			pt_prev = ptype;
++		}
++	}
++
++#ifdef CONFIG_NET_CLS_ACT
++	if (pt_prev) {
++		ret = deliver_skb(skb, pt_prev);
++		pt_prev = NULL; /* noone else should process this after*/
++	} else {
++		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
++	}
++
++	ret = ing_filter(skb);
++
++	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
++		kfree_skb(skb);
++		goto out;
++	}
++
++	skb->tc_verd = 0;
++ncls:
++#endif
++
++	handle_diverter(skb);
++
++	if (handle_bridge(&skb, &pt_prev, &ret))
++		goto out;
++
++	type = skb->protocol;
++	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
++		if (ptype->type == type &&
++		    (!ptype->dev || ptype->dev == skb->dev)) {
++			if (pt_prev) 
++				ret = deliver_skb(skb, pt_prev);
++			pt_prev = ptype;
++		}
++	}
++
++	if (pt_prev) {
++		ret = pt_prev->func(skb, skb->dev, pt_prev);
++	} else {
++		kfree_skb(skb);
++		/* Jamal, now you will not able to escape explaining
++		 * me how you were going to use this. :-)
++		 */
++		ret = NET_RX_DROP;
++	}
++
++out:
++	rcu_read_unlock();
++	return ret;
++}
++
++static int process_backlog(struct net_device *backlog_dev, int *budget)
++{
++	int work = 0;
++	int quota = min(backlog_dev->quota, *budget);
++	struct softnet_data *queue = &__get_cpu_var(softnet_data);
++	unsigned long start_time = jiffies;
++
++	backlog_dev->weight = weight_p;
++	for (;;) {
++		struct sk_buff *skb;
++		struct net_device *dev;
++
++		local_irq_disable();
++		skb = __skb_dequeue(&queue->input_pkt_queue);
++		if (!skb)
++			goto job_done;
++		local_irq_enable();
++
++		dev = skb->dev;
++
++		netif_receive_skb(skb);
++
++		dev_put(dev);
++
++		work++;
++
++		if (work >= quota || jiffies - start_time > 1)
++			break;
++
++	}
++
++	backlog_dev->quota -= work;
++	*budget -= work;
++	return -1;
++
++job_done:
++	backlog_dev->quota -= work;
++	*budget -= work;
++
++	list_del(&backlog_dev->poll_list);
++	smp_mb__before_clear_bit();
++	netif_poll_enable(backlog_dev);
++
++	if (queue->throttle)
++		queue->throttle = 0;
++	local_irq_enable();
++	return 0;
++}
++
++static void net_rx_action(struct softirq_action *h)
++{
++	struct softnet_data *queue = &__get_cpu_var(softnet_data);
++	unsigned long start_time = jiffies;
++	int budget = netdev_max_backlog;
++
++	
++	local_irq_disable();
++
++	while (!list_empty(&queue->poll_list)) {
++		struct net_device *dev;
++
++		if (budget <= 0 || jiffies - start_time > 1)
++			goto softnet_break;
++
++		local_irq_enable();
++
++		dev = list_entry(queue->poll_list.next,
++				 struct net_device, poll_list);
++		netpoll_poll_lock(dev);
++
++		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
++			netpoll_poll_unlock(dev);
++			local_irq_disable();
++			list_del(&dev->poll_list);
++			list_add_tail(&dev->poll_list, &queue->poll_list);
++			if (dev->quota < 0)
++				dev->quota += dev->weight;
++			else
++				dev->quota = dev->weight;
++		} else {
++			netpoll_poll_unlock(dev);
++			dev_put(dev);
++			local_irq_disable();
++		}
++	}
++out:
++	local_irq_enable();
++	return;
++
++softnet_break:
++	__get_cpu_var(netdev_rx_stat).time_squeeze++;
++	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
++	goto out;
++}
++
++static gifconf_func_t * gifconf_list [NPROTO];
++
++/**
++ *	register_gifconf	-	register a SIOCGIF handler
++ *	@family: Address family
++ *	@gifconf: Function handler
++ *
++ *	Register protocol dependent address dumping routines. The handler
++ *	that is passed must not be freed or reused until it has been replaced
++ *	by another handler.
++ */
++int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
++{
++	if (family >= NPROTO)
++		return -EINVAL;
++	gifconf_list[family] = gifconf;
++	return 0;
++}
++
++
++/*
++ *	Map an interface index to its name (SIOCGIFNAME)
++ */
++
++/*
++ *	We need this ioctl for efficient implementation of the
++ *	if_indextoname() function required by the IPv6 API.  Without
++ *	it, we would have to search all the interfaces to find a
++ *	match.  --pb
++ */
++
++static int dev_ifname(struct ifreq __user *arg)
++{
++	struct net_device *dev;
++	struct ifreq ifr;
++
++	/*
++	 *	Fetch the caller's info block.
++	 */
++
++	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++		return -EFAULT;
++
++	read_lock(&dev_base_lock);
++	dev = __dev_get_by_index(ifr.ifr_ifindex);
++	if (!dev) {
++		read_unlock(&dev_base_lock);
++		return -ENODEV;
++	}
++
++	strcpy(ifr.ifr_name, dev->name);
++	read_unlock(&dev_base_lock);
++
++	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++		return -EFAULT;
++	return 0;
++}
++
++/*
++ *	Perform a SIOCGIFCONF call. This structure will change
++ *	size eventually, and there is nothing I can do about it.
++ *	Thus we will need a 'compatibility mode'.
++ */
++
++static int dev_ifconf(char __user *arg)
++{
++	struct ifconf ifc;
++	struct net_device *dev;
++	char __user *pos;
++	int len;
++	int total;
++	int i;
++
++	/*
++	 *	Fetch the caller's info block.
++	 */
++
++	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
++		return -EFAULT;
++
++	pos = ifc.ifc_buf;
++	len = ifc.ifc_len;
++
++	/*
++	 *	Loop over the interfaces, and write an info block for each.
++	 */
++
++	total = 0;
++	for (dev = dev_base; dev; dev = dev->next) {
++		for (i = 0; i < NPROTO; i++) {
++			if (gifconf_list[i]) {
++				int done;
++				if (!pos)
++					done = gifconf_list[i](dev, NULL, 0);
++				else
++					done = gifconf_list[i](dev, pos + total,
++							       len - total);
++				if (done < 0)
++					return -EFAULT;
++				total += done;
++			}
++		}
++  	}
++
++	/*
++	 *	All done.  Write the updated control block back to the caller.
++	 */
++	ifc.ifc_len = total;
++
++	/*
++	 * 	Both BSD and Solaris return 0 here, so we do too.
++	 */
++	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
++}
++
++#ifdef CONFIG_PROC_FS
++/*
++ *	This is invoked by the /proc filesystem handler to display a device
++ *	in detail.
++ */
++static __inline__ struct net_device *dev_get_idx(loff_t pos)
++{
++	struct net_device *dev;
++	loff_t i;
++
++	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
++
++	return i == pos ? dev : NULL;
++}
++
++void *dev_seq_start(struct seq_file *seq, loff_t *pos)
++{
++	read_lock(&dev_base_lock);
++	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
++}
++
++void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++	++*pos;
++	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
++}
++
++void dev_seq_stop(struct seq_file *seq, void *v)
++{
++	read_unlock(&dev_base_lock);
++}
++
++static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
++{
++	if (dev->get_stats) {
++		struct net_device_stats *stats = dev->get_stats(dev);
++
++		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
++				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
++			   dev->name, stats->rx_bytes, stats->rx_packets,
++			   stats->rx_errors,
++			   stats->rx_dropped + stats->rx_missed_errors,
++			   stats->rx_fifo_errors,
++			   stats->rx_length_errors + stats->rx_over_errors +
++			     stats->rx_crc_errors + stats->rx_frame_errors,
++			   stats->rx_compressed, stats->multicast,
++			   stats->tx_bytes, stats->tx_packets,
++			   stats->tx_errors, stats->tx_dropped,
++			   stats->tx_fifo_errors, stats->collisions,
++			   stats->tx_carrier_errors +
++			     stats->tx_aborted_errors +
++			     stats->tx_window_errors +
++			     stats->tx_heartbeat_errors,
++			   stats->tx_compressed);
++	} else
++		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
++}
++
++/*
++ *	Called from the PROCfs module. This now uses the new arbitrary sized
++ *	/proc/net interface to create /proc/net/dev
++ */
++static int dev_seq_show(struct seq_file *seq, void *v)
++{
++	if (v == SEQ_START_TOKEN)
++		seq_puts(seq, "Inter-|   Receive                            "
++			      "                    |  Transmit\n"
++			      " face |bytes    packets errs drop fifo frame "
++			      "compressed multicast|bytes    packets errs "
++			      "drop fifo colls carrier compressed\n");
++	else
++		dev_seq_printf_stats(seq, v);
++	return 0;
++}
++
++static struct netif_rx_stats *softnet_get_online(loff_t *pos)
++{
++	struct netif_rx_stats *rc = NULL;
++
++	while (*pos < NR_CPUS)
++	       	if (cpu_online(*pos)) {
++			rc = &per_cpu(netdev_rx_stat, *pos);
++			break;
++		} else
++			++*pos;
++	return rc;
++}
++
++static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
++{
++	return softnet_get_online(pos);
++}
++
++static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++	++*pos;
++	return softnet_get_online(pos);
++}
++
++static void softnet_seq_stop(struct seq_file *seq, void *v)
++{
++}
++
++static int softnet_seq_show(struct seq_file *seq, void *v)
++{
++	struct netif_rx_stats *s = v;
++
++	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
++		   s->total, s->dropped, s->time_squeeze, s->throttled,
++		   s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
++		   s->fastroute_deferred_out,
++#if 0
++		   s->fastroute_latency_reduction
++#else
++		   s->cpu_collision
++#endif
++		  );
++	return 0;
++}
++
++static struct seq_operations dev_seq_ops = {
++	.start = dev_seq_start,
++	.next  = dev_seq_next,
++	.stop  = dev_seq_stop,
++	.show  = dev_seq_show,
++};
++
++static int dev_seq_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &dev_seq_ops);
++}
++
++static struct file_operations dev_seq_fops = {
++	.owner	 = THIS_MODULE,
++	.open    = dev_seq_open,
++	.read    = seq_read,
++	.llseek  = seq_lseek,
++	.release = seq_release,
++};
++
++static struct seq_operations softnet_seq_ops = {
++	.start = softnet_seq_start,
++	.next  = softnet_seq_next,
++	.stop  = softnet_seq_stop,
++	.show  = softnet_seq_show,
++};
++
++static int softnet_seq_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &softnet_seq_ops);
++}
++
++static struct file_operations softnet_seq_fops = {
++	.owner	 = THIS_MODULE,
++	.open    = softnet_seq_open,
++	.read    = seq_read,
++	.llseek  = seq_lseek,
++	.release = seq_release,
++};
++
++#ifdef WIRELESS_EXT
++extern int wireless_proc_init(void);
++#else
++#define wireless_proc_init() 0
++#endif
++
++static int __init dev_proc_init(void)
++{
++	int rc = -ENOMEM;
++
++	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
++		goto out;
++	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
++		goto out_dev;
++	if (wireless_proc_init())
++		goto out_softnet;
++	rc = 0;
++out:
++	return rc;
++out_softnet:
++	proc_net_remove("softnet_stat");
++out_dev:
++	proc_net_remove("dev");
++	goto out;
++}
++#else
++#define dev_proc_init() 0
++#endif	/* CONFIG_PROC_FS */
++
++
++/**
++ *	netdev_set_master	-	set up master/slave pair
++ *	@slave: slave device
++ *	@master: new master device
++ *
++ *	Changes the master device of the slave. Pass %NULL to break the
++ *	bonding. The caller must hold the RTNL semaphore. On a failure
++ *	a negative errno code is returned. On success the reference counts
++ *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
++ *	function returns zero.
++ */
++int netdev_set_master(struct net_device *slave, struct net_device *master)
++{
++	struct net_device *old = slave->master;
++
++	ASSERT_RTNL();
++
++	if (master) {
++		if (old)
++			return -EBUSY;
++		dev_hold(master);
++	}
++
++	slave->master = master;
++	
++	synchronize_net();
++
++	if (old)
++		dev_put(old);
++
++	if (master)
++		slave->flags |= IFF_SLAVE;
++	else
++		slave->flags &= ~IFF_SLAVE;
++
++	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
++	return 0;
++}
++
++/**
++ *	dev_set_promiscuity	- update promiscuity count on a device
++ *	@dev: device
++ *	@inc: modifier
++ *
++ *	Add or remove promsicuity from a device. While the count in the device
++ *	remains above zero the interface remains promiscuous. Once it hits zero
++ *	the device reverts back to normal filtering operation. A negative inc
++ *	value is used to drop promiscuity on the device.
++ */
++void dev_set_promiscuity(struct net_device *dev, int inc)
++{
++	unsigned short old_flags = dev->flags;
++
++	dev->flags |= IFF_PROMISC;
++	if ((dev->promiscuity += inc) == 0)
++		dev->flags &= ~IFF_PROMISC;
++	if (dev->flags ^ old_flags) {
++		dev_mc_upload(dev);
++		printk(KERN_INFO "device %s %s promiscuous mode\n",
++		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
++		       					       "left");
++	}
++}
++
++/**
++ *	dev_set_allmulti	- update allmulti count on a device
++ *	@dev: device
++ *	@inc: modifier
++ *
++ *	Add or remove reception of all multicast frames to a device. While the
++ *	count in the device remains above zero the interface remains listening
++ *	to all interfaces. Once it hits zero the device reverts back to normal
++ *	filtering operation. A negative @inc value is used to drop the counter
++ *	when releasing a resource needing all multicasts.
++ */
++
++void dev_set_allmulti(struct net_device *dev, int inc)
++{
++	unsigned short old_flags = dev->flags;
++
++	dev->flags |= IFF_ALLMULTI;
++	if ((dev->allmulti += inc) == 0)
++		dev->flags &= ~IFF_ALLMULTI;
++	if (dev->flags ^ old_flags)
++		dev_mc_upload(dev);
++}
++
++unsigned dev_get_flags(const struct net_device *dev)
++{
++	unsigned flags;
++
++	flags = (dev->flags & ~(IFF_PROMISC |
++				IFF_ALLMULTI |
++				IFF_RUNNING)) | 
++		(dev->gflags & (IFF_PROMISC |
++				IFF_ALLMULTI));
++
++	if (netif_running(dev) && netif_carrier_ok(dev))
++		flags |= IFF_RUNNING;
++
++	return flags;
++}
++
++int dev_change_flags(struct net_device *dev, unsigned flags)
++{
++	int ret;
++	int old_flags = dev->flags;
++
++	/*
++	 *	Set the flags on our device.
++	 */
++
++	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
++			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
++			       IFF_AUTOMEDIA)) |
++		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
++				    IFF_ALLMULTI));
++
++	/*
++	 *	Load in the correct multicast list now the flags have changed.
++	 */
++
++	dev_mc_upload(dev);
++
++	/*
++	 *	Have we downed the interface. We handle IFF_UP ourselves
++	 *	according to user attempts to set it, rather than blindly
++	 *	setting it.
++	 */
++
++	ret = 0;
++	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
++		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
++
++		if (!ret)
++			dev_mc_upload(dev);
++	}
++
++	if (dev->flags & IFF_UP &&
++	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
++					  IFF_VOLATILE)))
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
++
++	if ((flags ^ dev->gflags) & IFF_PROMISC) {
++		int inc = (flags & IFF_PROMISC) ? +1 : -1;
++		dev->gflags ^= IFF_PROMISC;
++		dev_set_promiscuity(dev, inc);
++	}
++
++	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
++	   is important. Some (broken) drivers set IFF_PROMISC, when
++	   IFF_ALLMULTI is requested not asking us and not reporting.
++	 */
++	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
++		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
++		dev->gflags ^= IFF_ALLMULTI;
++		dev_set_allmulti(dev, inc);
++	}
++
++	if (old_flags ^ dev->flags)
++		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
++
++	return ret;
++}
++
++int dev_set_mtu(struct net_device *dev, int new_mtu)
++{
++	int err;
++
++	if (new_mtu == dev->mtu)
++		return 0;
++
++	/*	MTU must be positive.	 */
++	if (new_mtu < 0)
++		return -EINVAL;
++
++	if (!netif_device_present(dev))
++		return -ENODEV;
++
++	err = 0;
++	if (dev->change_mtu)
++		err = dev->change_mtu(dev, new_mtu);
++	else
++		dev->mtu = new_mtu;
++	if (!err && dev->flags & IFF_UP)
++		notifier_call_chain(&netdev_chain,
++				    NETDEV_CHANGEMTU, dev);
++	return err;
++}
++
++int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
++{
++	int err;
++
++	if (!dev->set_mac_address)
++		return -EOPNOTSUPP;
++	if (sa->sa_family != dev->type)
++		return -EINVAL;
++	if (!netif_device_present(dev))
++		return -ENODEV;
++	err = dev->set_mac_address(dev, sa);
++	if (!err)
++		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
++	return err;
++}
++
++/*
++ *	Perform the SIOCxIFxxx calls.
++ */
++static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
++{
++	int err;
++	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
++
++	if (!dev)
++		return -ENODEV;
++
++	switch (cmd) {
++		case SIOCGIFFLAGS:	/* Get interface flags */
++			ifr->ifr_flags = dev_get_flags(dev);
++			return 0;
++
++		case SIOCSIFFLAGS:	/* Set interface flags */
++			return dev_change_flags(dev, ifr->ifr_flags);
++
++		case SIOCGIFMETRIC:	/* Get the metric on the interface
++					   (currently unused) */
++			ifr->ifr_metric = 0;
++			return 0;
++
++		case SIOCSIFMETRIC:	/* Set the metric on the interface
++					   (currently unused) */
++			return -EOPNOTSUPP;
++
++		case SIOCGIFMTU:	/* Get the MTU of a device */
++			ifr->ifr_mtu = dev->mtu;
++			return 0;
++
++		case SIOCSIFMTU:	/* Set the MTU of a device */
++			return dev_set_mtu(dev, ifr->ifr_mtu);
++
++		case SIOCGIFHWADDR:
++			if (!dev->addr_len)
++				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
++			else
++				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
++				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
++			ifr->ifr_hwaddr.sa_family = dev->type;
++			return 0;
++
++		case SIOCSIFHWADDR:
++			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
++
++		case SIOCSIFHWBROADCAST:
++			if (ifr->ifr_hwaddr.sa_family != dev->type)
++				return -EINVAL;
++			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
++			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
++			notifier_call_chain(&netdev_chain,
++					    NETDEV_CHANGEADDR, dev);
++			return 0;
++
++		case SIOCGIFMAP:
++			ifr->ifr_map.mem_start = dev->mem_start;
++			ifr->ifr_map.mem_end   = dev->mem_end;
++			ifr->ifr_map.base_addr = dev->base_addr;
++			ifr->ifr_map.irq       = dev->irq;
++			ifr->ifr_map.dma       = dev->dma;
++			ifr->ifr_map.port      = dev->if_port;
++			return 0;
++
++		case SIOCSIFMAP:
++			if (dev->set_config) {
++				if (!netif_device_present(dev))
++					return -ENODEV;
++				return dev->set_config(dev, &ifr->ifr_map);
++			}
++			return -EOPNOTSUPP;
++
++		case SIOCADDMULTI:
++			if (!dev->set_multicast_list ||
++			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
++				return -EINVAL;
++			if (!netif_device_present(dev))
++				return -ENODEV;
++			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
++					  dev->addr_len, 1);
++
++		case SIOCDELMULTI:
++			if (!dev->set_multicast_list ||
++			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
++				return -EINVAL;
++			if (!netif_device_present(dev))
++				return -ENODEV;
++			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
++					     dev->addr_len, 1);
++
++		case SIOCGIFINDEX:
++			ifr->ifr_ifindex = dev->ifindex;
++			return 0;
++
++		case SIOCGIFTXQLEN:
++			ifr->ifr_qlen = dev->tx_queue_len;
++			return 0;
++
++		case SIOCSIFTXQLEN:
++			if (ifr->ifr_qlen < 0)
++				return -EINVAL;
++			dev->tx_queue_len = ifr->ifr_qlen;
++			return 0;
++
++		case SIOCSIFNAME:
++			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
++			return dev_change_name(dev, ifr->ifr_newname);
++
++		/*
++		 *	Unknown or private ioctl
++		 */
++
++		default:
++			if ((cmd >= SIOCDEVPRIVATE &&
++			    cmd <= SIOCDEVPRIVATE + 15) ||
++			    cmd == SIOCBONDENSLAVE ||
++			    cmd == SIOCBONDRELEASE ||
++			    cmd == SIOCBONDSETHWADDR ||
++			    cmd == SIOCBONDSLAVEINFOQUERY ||
++			    cmd == SIOCBONDINFOQUERY ||
++			    cmd == SIOCBONDCHANGEACTIVE ||
++			    cmd == SIOCGMIIPHY ||
++			    cmd == SIOCGMIIREG ||
++			    cmd == SIOCSMIIREG ||
++			    cmd == SIOCBRADDIF ||
++			    cmd == SIOCBRDELIF ||
++			    cmd == SIOCWANDEV) {
++				err = -EOPNOTSUPP;
++				if (dev->do_ioctl) {
++					if (netif_device_present(dev))
++						err = dev->do_ioctl(dev, ifr,
++								    cmd);
++					else
++						err = -ENODEV;
++				}
++			} else
++				err = -EINVAL;
++
++	}
++	return err;
++}
++
++/*
++ *	This function handles all "interface"-type I/O control requests. The actual
++ *	'doing' part of this is dev_ifsioc above.
++ */
++
++/**
++ *	dev_ioctl	-	network device ioctl
++ *	@cmd: command to issue
++ *	@arg: pointer to a struct ifreq in user space
++ *
++ *	Issue ioctl functions to devices. This is normally called by the
++ *	user space syscall interfaces but can sometimes be useful for
++ *	other purposes. The return value is the return from the syscall if
++ *	positive or a negative errno code on error.
++ */
++
++int dev_ioctl(unsigned int cmd, void __user *arg)
++{
++	struct ifreq ifr;
++	int ret;
++	char *colon;
++
++	/* One special case: SIOCGIFCONF takes ifconf argument
++	   and requires shared lock, because it sleeps writing
++	   to user space.
++	 */
++
++	if (cmd == SIOCGIFCONF) {
++		rtnl_shlock();
++		ret = dev_ifconf((char __user *) arg);
++		rtnl_shunlock();
++		return ret;
++	}
++	if (cmd == SIOCGIFNAME)
++		return dev_ifname((struct ifreq __user *)arg);
++
++	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++		return -EFAULT;
++
++	ifr.ifr_name[IFNAMSIZ-1] = 0;
++
++	colon = strchr(ifr.ifr_name, ':');
++	if (colon)
++		*colon = 0;
++
++	/*
++	 *	See which interface the caller is talking about.
++	 */
++
++	switch (cmd) {
++		/*
++		 *	These ioctl calls:
++		 *	- can be done by all.
++		 *	- atomic and do not require locking.
++		 *	- return a value
++		 */
++		case SIOCGIFFLAGS:
++		case SIOCGIFMETRIC:
++		case SIOCGIFMTU:
++		case SIOCGIFHWADDR:
++		case SIOCGIFSLAVE:
++		case SIOCGIFMAP:
++		case SIOCGIFINDEX:
++		case SIOCGIFTXQLEN:
++			dev_load(ifr.ifr_name);
++			read_lock(&dev_base_lock);
++			ret = dev_ifsioc(&ifr, cmd);
++			read_unlock(&dev_base_lock);
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr,
++						 sizeof(struct ifreq)))
++					ret = -EFAULT;
++			}
++			return ret;
++
++		case SIOCETHTOOL:
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			ret = dev_ethtool(&ifr);
++			rtnl_unlock();
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr,
++						 sizeof(struct ifreq)))
++					ret = -EFAULT;
++			}
++			return ret;
++
++		/*
++		 *	These ioctl calls:
++		 *	- require superuser power.
++		 *	- require strict serialization.
++		 *	- return a value
++		 */
++		case SIOCGMIIPHY:
++		case SIOCGMIIREG:
++		case SIOCSIFNAME:
++			if (!capable(CAP_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			ret = dev_ifsioc(&ifr, cmd);
++			rtnl_unlock();
++			if (!ret) {
++				if (colon)
++					*colon = ':';
++				if (copy_to_user(arg, &ifr,
++						 sizeof(struct ifreq)))
++					ret = -EFAULT;
++			}
++			return ret;
++
++		/*
++		 *	These ioctl calls:
++		 *	- require superuser power.
++		 *	- require strict serialization.
++		 *	- do not return a value
++		 */
++		case SIOCSIFFLAGS:
++		case SIOCSIFMETRIC:
++		case SIOCSIFMTU:
++		case SIOCSIFMAP:
++		case SIOCSIFHWADDR:
++		case SIOCSIFSLAVE:
++		case SIOCADDMULTI:
++		case SIOCDELMULTI:
++		case SIOCSIFHWBROADCAST:
++		case SIOCSIFTXQLEN:
++		case SIOCSMIIREG:
++		case SIOCBONDENSLAVE:
++		case SIOCBONDRELEASE:
++		case SIOCBONDSETHWADDR:
++		case SIOCBONDSLAVEINFOQUERY:
++		case SIOCBONDINFOQUERY:
++		case SIOCBONDCHANGEACTIVE:
++		case SIOCBRADDIF:
++		case SIOCBRDELIF:
++			if (!capable(CAP_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			ret = dev_ifsioc(&ifr, cmd);
++			rtnl_unlock();
++			return ret;
++
++		case SIOCGIFMEM:
++			/* Get the per device memory space. We can add this but
++			 * currently do not support it */
++		case SIOCSIFMEM:
++			/* Set the per device memory buffer space.
++			 * Not applicable in our case */
++		case SIOCSIFLINK:
++			return -EINVAL;
++
++		/*
++		 *	Unknown or private ioctl.
++		 */
++		default:
++			if (cmd == SIOCWANDEV ||
++			    (cmd >= SIOCDEVPRIVATE &&
++			     cmd <= SIOCDEVPRIVATE + 15)) {
++				dev_load(ifr.ifr_name);
++				rtnl_lock();
++				ret = dev_ifsioc(&ifr, cmd);
++				rtnl_unlock();
++				if (!ret && copy_to_user(arg, &ifr,
++							 sizeof(struct ifreq)))
++					ret = -EFAULT;
++				return ret;
++			}
++#ifdef WIRELESS_EXT
++			/* Take care of Wireless Extensions */
++			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
++				/* If command is `set a parameter', or
++				 * `get the encoding parameters', check if
++				 * the user has the right to do it */
++				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
++					if (!capable(CAP_NET_ADMIN))
++						return -EPERM;
++				}
++				dev_load(ifr.ifr_name);
++				rtnl_lock();
++				/* Follow me in net/core/wireless.c */
++				ret = wireless_process_ioctl(&ifr, cmd);
++				rtnl_unlock();
++				if (IW_IS_GET(cmd) &&
++				    copy_to_user(arg, &ifr,
++					    	 sizeof(struct ifreq)))
++					ret = -EFAULT;
++				return ret;
++			}
++#endif	/* WIRELESS_EXT */
++			return -EINVAL;
++	}
++}
++
++
++/**
++ *	dev_new_index	-	allocate an ifindex
++ *
++ *	Returns a suitable unique value for a new device interface
++ *	number.  The caller must hold the rtnl semaphore or the
++ *	dev_base_lock to be sure it remains unique.
++ */
++static int dev_new_index(void)
++{
++	static int ifindex;
++	for (;;) {
++		if (++ifindex <= 0)
++			ifindex = 1;
++		if (!__dev_get_by_index(ifindex))
++			return ifindex;
++	}
++}
++
++static int dev_boot_phase = 1;
++
++/* Delayed registration/unregisteration */
++static DEFINE_SPINLOCK(net_todo_list_lock);
++static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
++
++static inline void net_set_todo(struct net_device *dev)
++{
++	spin_lock(&net_todo_list_lock);
++	list_add_tail(&dev->todo_list, &net_todo_list);
++	spin_unlock(&net_todo_list_lock);
++}
++
++/**
++ *	register_netdevice	- register a network device
++ *	@dev: device to register
++ *
++ *	Take a completed network device structure and add it to the kernel
++ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
++ *	chain. 0 is returned on success. A negative errno code is returned
++ *	on a failure to set up the device, or if the name is a duplicate.
++ *
++ *	Callers must hold the rtnl semaphore. You may want
++ *	register_netdev() instead of this.
++ *
++ *	BUGS:
++ *	The locking appears insufficient to guarantee two parallel registers
++ *	will not get the same name.
++ */
++
++int register_netdevice(struct net_device *dev)
++{
++	struct hlist_head *head;
++	struct hlist_node *p;
++	int ret;
++
++	BUG_ON(dev_boot_phase);
++	ASSERT_RTNL();
++
++	/* When net_device's are persistent, this will be fatal. */
++	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
++
++	spin_lock_init(&dev->queue_lock);
++	spin_lock_init(&dev->xmit_lock);
++	dev->xmit_lock_owner = -1;
++#ifdef CONFIG_NET_CLS_ACT
++	spin_lock_init(&dev->ingress_lock);
++#endif
++
++	ret = alloc_divert_blk(dev);
++	if (ret)
++		goto out;
++
++	dev->iflink = -1;
++
++	/* Init, if this function is available */
++	if (dev->init) {
++		ret = dev->init(dev);
++		if (ret) {
++			if (ret > 0)
++				ret = -EIO;
++			goto out_err;
++		}
++	}
++ 
++	if (!dev_valid_name(dev->name)) {
++		ret = -EINVAL;
++		goto out_err;
++	}
++
++	dev->ifindex = dev_new_index();
++	if (dev->iflink == -1)
++		dev->iflink = dev->ifindex;
++
++	/* Check for existence of name */
++	head = dev_name_hash(dev->name);
++	hlist_for_each(p, head) {
++		struct net_device *d
++			= hlist_entry(p, struct net_device, name_hlist);
++		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
++			ret = -EEXIST;
++ 			goto out_err;
++		}
++ 	}
++
++	/* Fix illegal SG+CSUM combinations. */
++	if ((dev->features & NETIF_F_SG) &&
++	    !(dev->features & (NETIF_F_IP_CSUM |
++			       NETIF_F_NO_CSUM |
++			       NETIF_F_HW_CSUM))) {
++		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
++		       dev->name);
++		dev->features &= ~NETIF_F_SG;
++	}
++
++	/* TSO requires that SG is present as well. */
++	if ((dev->features & NETIF_F_TSO) &&
++	    !(dev->features & NETIF_F_SG)) {
++		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
++		       dev->name);
++		dev->features &= ~NETIF_F_TSO;
++	}
++
++	/*
++	 *	nil rebuild_header routine,
++	 *	that should be never called and used as just bug trap.
++	 */
++
++	if (!dev->rebuild_header)
++		dev->rebuild_header = default_rebuild_header;
++
++	/*
++	 *	Default initial state at registry is that the
++	 *	device is present.
++	 */
++
++	set_bit(__LINK_STATE_PRESENT, &dev->state);
++
++	dev->next = NULL;
++	dev_init_scheduler(dev);
++	write_lock_bh(&dev_base_lock);
++	*dev_tail = dev;
++	dev_tail = &dev->next;
++	hlist_add_head(&dev->name_hlist, head);
++	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
++	dev_hold(dev);
++	dev->reg_state = NETREG_REGISTERING;
++	write_unlock_bh(&dev_base_lock);
++
++	/* Notify protocols, that a new device appeared. */
++	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
++
++	/* Finish registration after unlock */
++	net_set_todo(dev);
++	ret = 0;
++
++out:
++	return ret;
++out_err:
++	free_divert_blk(dev);
++	goto out;
++}
++
++/**
++ *	register_netdev	- register a network device
++ *	@dev: device to register
++ *
++ *	Take a completed network device structure and add it to the kernel
++ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
++ *	chain. 0 is returned on success. A negative errno code is returned
++ *	on a failure to set up the device, or if the name is a duplicate.
++ *
++ *	This is a wrapper around register_netdev that takes the rtnl semaphore
++ *	and expands the device name if you passed a format string to
++ *	alloc_netdev.
++ */
++int register_netdev(struct net_device *dev)
++{
++	int err;
++
++	rtnl_lock();
++
++	/*
++	 * If the name is a format string the caller wants us to do a
++	 * name allocation.
++	 */
++	if (strchr(dev->name, '%')) {
++		err = dev_alloc_name(dev, dev->name);
++		if (err < 0)
++			goto out;
++	}
++	
++	/*
++	 * Back compatibility hook. Kill this one in 2.5
++	 */
++	if (dev->name[0] == 0 || dev->name[0] == ' ') {
++		err = dev_alloc_name(dev, "eth%d");
++		if (err < 0)
++			goto out;
++	}
++
++	err = register_netdevice(dev);
++out:
++	rtnl_unlock();
++	return err;
++}
++EXPORT_SYMBOL(register_netdev);
++
++/*
++ * netdev_wait_allrefs - wait until all references are gone.
++ *
++ * This is called when unregistering network devices.
++ *
++ * Any protocol or device that holds a reference should register
++ * for netdevice notification, and cleanup and put back the
++ * reference if they receive an UNREGISTER event.
++ * We can get stuck here if buggy protocols don't correctly
++ * call dev_put. 
++ */
++static void netdev_wait_allrefs(struct net_device *dev)
++{
++	unsigned long rebroadcast_time, warning_time;
++
++	rebroadcast_time = warning_time = jiffies;
++	while (atomic_read(&dev->refcnt) != 0) {
++		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
++			rtnl_shlock();
++
++			/* Rebroadcast unregister notification */
++			notifier_call_chain(&netdev_chain,
++					    NETDEV_UNREGISTER, dev);
++
++			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
++				     &dev->state)) {
++				/* We must not have linkwatch events
++				 * pending on unregister. If this
++				 * happens, we simply run the queue
++				 * unscheduled, resulting in a noop
++				 * for this device.
++				 */
++				linkwatch_run_queue();
++			}
++
++			rtnl_shunlock();
++
++			rebroadcast_time = jiffies;
++		}
++
++		msleep(250);
++
++		if (time_after(jiffies, warning_time + 10 * HZ)) {
++			printk(KERN_EMERG "unregister_netdevice: "
++			       "waiting for %s to become free. Usage "
++			       "count = %d\n",
++			       dev->name, atomic_read(&dev->refcnt));
++			warning_time = jiffies;
++		}
++	}
++}
++
++/* The sequence is:
++ *
++ *	rtnl_lock();
++ *	...
++ *	register_netdevice(x1);
++ *	register_netdevice(x2);
++ *	...
++ *	unregister_netdevice(y1);
++ *	unregister_netdevice(y2);
++ *      ...
++ *	rtnl_unlock();
++ *	free_netdev(y1);
++ *	free_netdev(y2);
++ *
++ * We are invoked by rtnl_unlock() after it drops the semaphore.
++ * This allows us to deal with problems:
++ * 1) We can create/delete sysfs objects which invoke hotplug
++ *    without deadlocking with linkwatch via keventd.
++ * 2) Since we run with the RTNL semaphore not held, we can sleep
++ *    safely in order to wait for the netdev refcnt to drop to zero.
++ */
++static DECLARE_MUTEX(net_todo_run_mutex);
++void netdev_run_todo(void)
++{
++	struct list_head list = LIST_HEAD_INIT(list);
++	int err;
++
++
++	/* Need to guard against multiple cpu's getting out of order. */
++	down(&net_todo_run_mutex);
++
++	/* Not safe to do outside the semaphore.  We must not return
++	 * until all unregister events invoked by the local processor
++	 * have been completed (either by this todo run, or one on
++	 * another cpu).
++	 */
++	if (list_empty(&net_todo_list))
++		goto out;
++
++	/* Snapshot list, allow later requests */
++	spin_lock(&net_todo_list_lock);
++	list_splice_init(&net_todo_list, &list);
++	spin_unlock(&net_todo_list_lock);
++		
++	while (!list_empty(&list)) {
++		struct net_device *dev
++			= list_entry(list.next, struct net_device, todo_list);
++		list_del(&dev->todo_list);
++
++		switch(dev->reg_state) {
++		case NETREG_REGISTERING:
++			err = netdev_register_sysfs(dev);
++			if (err)
++				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
++				       dev->name, err);
++			dev->reg_state = NETREG_REGISTERED;
++			break;
++
++		case NETREG_UNREGISTERING:
++			netdev_unregister_sysfs(dev);
++			dev->reg_state = NETREG_UNREGISTERED;
++
++			netdev_wait_allrefs(dev);
++
++			/* paranoia */
++			BUG_ON(atomic_read(&dev->refcnt));
++			BUG_TRAP(!dev->ip_ptr);
++			BUG_TRAP(!dev->ip6_ptr);
++			BUG_TRAP(!dev->dn_ptr);
++
++
++			/* It must be the very last action, 
++			 * after this 'dev' may point to freed up memory.
++			 */
++			if (dev->destructor)
++				dev->destructor(dev);
++			break;
++
++		default:
++			printk(KERN_ERR "network todo '%s' but state %d\n",
++			       dev->name, dev->reg_state);
++			break;
++		}
++	}
++
++out:
++	up(&net_todo_run_mutex);
++}
++
++/**
++ *	alloc_netdev - allocate network device
++ *	@sizeof_priv:	size of private data to allocate space for
++ *	@name:		device name format string
++ *	@setup:		callback to initialize device
++ *
++ *	Allocates a struct net_device with private data area for driver use
++ *	and performs basic initialization.
++ */
++struct net_device *alloc_netdev(int sizeof_priv, const char *name,
++		void (*setup)(struct net_device *))
++{
++	void *p;
++	struct net_device *dev;
++	int alloc_size;
++
++	/* ensure 32-byte alignment of both the device and private area */
++	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
++	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
++
++	p = kmalloc(alloc_size, GFP_KERNEL);
++	if (!p) {
++		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
++		return NULL;
++	}
++	memset(p, 0, alloc_size);
++
++	dev = (struct net_device *)
++		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
++	dev->padded = (char *)dev - (char *)p;
++
++	if (sizeof_priv)
++		dev->priv = netdev_priv(dev);
++
++	setup(dev);
++	strcpy(dev->name, name);
++	return dev;
++}
++EXPORT_SYMBOL(alloc_netdev);
++
++/**
++ *	free_netdev - free network device
++ *	@dev: device
++ *
++ *	This function does the last stage of destroying an allocated device 
++ * 	interface. The reference to the device object is released.  
++ *	If this is the last reference then it will be freed.
++ */
++void free_netdev(struct net_device *dev)
++{
++#ifdef CONFIG_SYSFS
++	/*  Compatiablity with error handling in drivers */
++	if (dev->reg_state == NETREG_UNINITIALIZED) {
++		kfree((char *)dev - dev->padded);
++		return;
++	}
++
++	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
++	dev->reg_state = NETREG_RELEASED;
++
++	/* will free via class release */
++	class_device_put(&dev->class_dev);
++#else
++	kfree((char *)dev - dev->padded);
++#endif
++}
++ 
++/* Synchronize with packet receive processing. */
++void synchronize_net(void) 
++{
++	might_sleep();
++	synchronize_rcu();
++}
++
++/**
++ *	unregister_netdevice - remove device from the kernel
++ *	@dev: device
++ *
++ *	This function shuts down a device interface and removes it
++ *	from the kernel tables. On success 0 is returned, on a failure
++ *	a negative errno code is returned.
++ *
++ *	Callers must hold the rtnl semaphore.  You may want
++ *	unregister_netdev() instead of this.
++ */
++
++int unregister_netdevice(struct net_device *dev)
++{
++	struct net_device *d, **dp;
++
++	BUG_ON(dev_boot_phase);
++	ASSERT_RTNL();
++
++	/* Some devices call without registering for initialization unwind. */
++	if (dev->reg_state == NETREG_UNINITIALIZED) {
++		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
++				  "was registered\n", dev->name, dev);
++		return -ENODEV;
++	}
++
++	BUG_ON(dev->reg_state != NETREG_REGISTERED);
++
++	/* If device is running, close it first. */
++	if (dev->flags & IFF_UP)
++		dev_close(dev);
++
++	/* And unlink it from device chain. */
++	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
++		if (d == dev) {
++			write_lock_bh(&dev_base_lock);
++			hlist_del(&dev->name_hlist);
++			hlist_del(&dev->index_hlist);
++			if (dev_tail == &dev->next)
++				dev_tail = dp;
++			*dp = d->next;
++			write_unlock_bh(&dev_base_lock);
++			break;
++		}
++	}
++	if (!d) {
++		printk(KERN_ERR "unregister net_device: '%s' not found\n",
++		       dev->name);
++		return -ENODEV;
++	}
++
++	dev->reg_state = NETREG_UNREGISTERING;
++
++	synchronize_net();
++
++	/* Shutdown queueing discipline. */
++	dev_shutdown(dev);
++
++	
++	/* Notify protocols, that we are about to destroy
++	   this device. They should clean all the things.
++	*/
++	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
++	
++	/*
++	 *	Flush the multicast chain
++	 */
++	dev_mc_discard(dev);
++
++	if (dev->uninit)
++		dev->uninit(dev);
++
++	/* Notifier chain MUST detach us from master device. */
++	BUG_TRAP(!dev->master);
++
++	free_divert_blk(dev);
++
++	/* Finish processing unregister after unlock */
++	net_set_todo(dev);
++
++	synchronize_net();
++
++	dev_put(dev);
++	return 0;
++}
++
++/**
++ *	unregister_netdev - remove device from the kernel
++ *	@dev: device
++ *
++ *	This function shuts down a device interface and removes it
++ *	from the kernel tables. On success 0 is returned, on a failure
++ *	a negative errno code is returned.
++ *
++ *	This is just a wrapper for unregister_netdevice that takes
++ *	the rtnl semaphore.  In general you want to use this and not
++ *	unregister_netdevice.
++ */
++void unregister_netdev(struct net_device *dev)
++{
++	rtnl_lock();
++	unregister_netdevice(dev);
++	rtnl_unlock();
++}
++
++EXPORT_SYMBOL(unregister_netdev);
++
++#ifdef CONFIG_HOTPLUG_CPU
++static int dev_cpu_callback(struct notifier_block *nfb,
++			    unsigned long action,
++			    void *ocpu)
++{
++	struct sk_buff **list_skb;
++	struct net_device **list_net;
++	struct sk_buff *skb;
++	unsigned int cpu, oldcpu = (unsigned long)ocpu;
++	struct softnet_data *sd, *oldsd;
++
++	if (action != CPU_DEAD)
++		return NOTIFY_OK;
++
++	local_irq_disable();
++	cpu = smp_processor_id();
++	sd = &per_cpu(softnet_data, cpu);
++	oldsd = &per_cpu(softnet_data, oldcpu);
++
++	/* Find end of our completion_queue. */
++	list_skb = &sd->completion_queue;
++	while (*list_skb)
++		list_skb = &(*list_skb)->next;
++	/* Append completion queue from offline CPU. */
++	*list_skb = oldsd->completion_queue;
++	oldsd->completion_queue = NULL;
++
++	/* Find end of our output_queue. */
++	list_net = &sd->output_queue;
++	while (*list_net)
++		list_net = &(*list_net)->next_sched;
++	/* Append output queue from offline CPU. */
++	*list_net = oldsd->output_queue;
++	oldsd->output_queue = NULL;
++
++	raise_softirq_irqoff(NET_TX_SOFTIRQ);
++	local_irq_enable();
++
++	/* Process offline CPU's input_pkt_queue */
++	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
++		netif_rx(skb);
++
++	return NOTIFY_OK;
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
++
++/*
++ *	Initialize the DEV module. At boot time this walks the device list and
++ *	unhooks any devices that fail to initialise (normally hardware not
++ *	present) and leaves us with a valid list of present and active devices.
++ *
++ */
++
++/*
++ *       This is called single threaded during boot, so no need
++ *       to take the rtnl semaphore.
++ */
++static int __init net_dev_init(void)
++{
++	int i, rc = -ENOMEM;
++
++	BUG_ON(!dev_boot_phase);
++
++	net_random_init();
++
++	if (dev_proc_init())
++		goto out;
++
++	if (netdev_sysfs_init())
++		goto out;
++
++	INIT_LIST_HEAD(&ptype_all);
++	for (i = 0; i < 16; i++) 
++		INIT_LIST_HEAD(&ptype_base[i]);
++
++	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
++		INIT_HLIST_HEAD(&dev_name_head[i]);
++
++	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
++		INIT_HLIST_HEAD(&dev_index_head[i]);
++
++	/*
++	 *	Initialise the packet receive queues.
++	 */
++
++	for (i = 0; i < NR_CPUS; i++) {
++		struct softnet_data *queue;
++
++		queue = &per_cpu(softnet_data, i);
++		skb_queue_head_init(&queue->input_pkt_queue);
++		queue->throttle = 0;
++		queue->cng_level = 0;
++		queue->avg_blog = 10; /* arbitrary non-zero */
++		queue->completion_queue = NULL;
++		INIT_LIST_HEAD(&queue->poll_list);
++		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
++		queue->backlog_dev.weight = weight_p;
++		queue->backlog_dev.poll = process_backlog;
++		atomic_set(&queue->backlog_dev.refcnt, 1);
++	}
++
++#ifdef OFFLINE_SAMPLE
++	samp_timer.expires = jiffies + (10 * HZ);
++	add_timer(&samp_timer);
++#endif
++
++	dev_boot_phase = 0;
++
++	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
++	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
++
++	hotcpu_notifier(dev_cpu_callback, 0);
++	dst_init();
++	dev_mcast_init();
++	rc = 0;
++out:
++	return rc;
++}
++
++subsys_initcall(net_dev_init);
++
++EXPORT_SYMBOL(__dev_get_by_index);
++EXPORT_SYMBOL(__dev_get_by_name);
++EXPORT_SYMBOL(__dev_remove_pack);
++EXPORT_SYMBOL(__skb_linearize);
++EXPORT_SYMBOL(dev_add_pack);
++EXPORT_SYMBOL(dev_alloc_name);
++EXPORT_SYMBOL(dev_close);
++EXPORT_SYMBOL(dev_get_by_flags);
++EXPORT_SYMBOL(dev_get_by_index);
++EXPORT_SYMBOL(dev_get_by_name);
++EXPORT_SYMBOL(dev_ioctl);
++EXPORT_SYMBOL(dev_open);
++EXPORT_SYMBOL(dev_queue_xmit);
++EXPORT_SYMBOL(dev_remove_pack);
++EXPORT_SYMBOL(dev_set_allmulti);
++EXPORT_SYMBOL(dev_set_promiscuity);
++EXPORT_SYMBOL(dev_change_flags);
++EXPORT_SYMBOL(dev_set_mtu);
++EXPORT_SYMBOL(dev_set_mac_address);
++EXPORT_SYMBOL(free_netdev);
++EXPORT_SYMBOL(netdev_boot_setup_check);
++EXPORT_SYMBOL(netdev_set_master);
++EXPORT_SYMBOL(netdev_state_change);
++EXPORT_SYMBOL(netif_receive_skb);
++EXPORT_SYMBOL(netif_rx);
++EXPORT_SYMBOL(register_gifconf);
++EXPORT_SYMBOL(register_netdevice);
++EXPORT_SYMBOL(register_netdevice_notifier);
++EXPORT_SYMBOL(skb_checksum_help);
++EXPORT_SYMBOL(synchronize_net);
++EXPORT_SYMBOL(unregister_netdevice);
++EXPORT_SYMBOL(unregister_netdevice_notifier);
++EXPORT_SYMBOL(net_enable_timestamp);
++EXPORT_SYMBOL(net_disable_timestamp);
++EXPORT_SYMBOL(dev_get_flags);
++
++#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
++EXPORT_SYMBOL(br_handle_frame_hook);
++EXPORT_SYMBOL(br_fdb_get_hook);
++EXPORT_SYMBOL(br_fdb_put_hook);
++#endif
++
++#ifdef CONFIG_KMOD
++EXPORT_SYMBOL(dev_load);
++#endif
++
++EXPORT_PER_CPU_SYMBOL(softnet_data);
+diff --unified --recursive --new-file linux-2.6.12.5/net/ring/Kconfig linux-2.6.12.5-1-686-smp-ring3/net/ring/Kconfig
+--- linux-2.6.12.5/net/ring/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/net/ring/Kconfig	2005-10-22 23:50:45.539482000 +0200
+@@ -0,0 +1,14 @@
++config RING
++	tristate "PF_RING sockets (EXPERIMENTAL)"
++	depends on EXPERIMENTAL
++	---help---
++	  PF_RING socket family, optimized for packet capture.
++          If a PF_RING socket is bound to an adapter (via the bind() system
++          call), such adapter will be used in read-only mode until the socket
++          is destroyed. Whenever an incoming packet is received from the adapter
++          it will not passed to upper layers, but instead it is copied to a ring
++          buffer, which in turn is exported to user space applications via mmap.
++          Please refer to http://luca.ntop.org/Ring.pdf for more.
++
++	  Say N unless you know what you are doing.
++
+diff --unified --recursive --new-file linux-2.6.12.5/net/ring/Makefile linux-2.6.12.5-1-686-smp-ring3/net/ring/Makefile
+--- linux-2.6.12.5/net/ring/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/net/ring/Makefile	2005-10-22 23:50:45.051451500 +0200
+@@ -0,0 +1,7 @@
++#
++# Makefile for the ring driver.
++#
++
++obj-m += ring.o
++
++ring-objs := ring_packet.o
+diff --unified --recursive --new-file linux-2.6.12.5/net/ring/ring_packet.c linux-2.6.12.5-1-686-smp-ring3/net/ring/ring_packet.c
+--- linux-2.6.12.5/net/ring/ring_packet.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.5-1-686-smp-ring3/net/ring/ring_packet.c	2005-10-22 23:50:45.159458250 +0200
+@@ -0,0 +1,1592 @@
++/*
++ *
++ * (C) 2004-05 - Luca Deri <deri@ntop.org>
++ *
++ * This code includes patches courtesy of
++ * - Jeff Randall <jrandall@nexvu.com>
++ * - Helmut Manck <helmut.manck@secunet.com>
++ * - Brad Doctor <bdoctor@ps-ax.com>
++ *
++ */
++
++/* FIX: add an entry inside the /proc filesystem */
++
++#include <linux/version.h>
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/socket.h>
++#include <linux/skbuff.h>
++#include <linux/rtnetlink.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/init.h>
++#include <linux/filter.h>
++#include <linux/ring.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <linux/list.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#include <net/xfrm.h>
++#else
++#include <linux/poll.h>
++#endif
++#include <net/sock.h>
++#include <asm/io.h>   /* needed for virt_to_phys() */
++
++/* #define RING_DEBUG */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++static inline int remap_page_range(struct vm_area_struct *vma,
++				   unsigned long uvaddr,
++				   unsigned long paddr,
++				   unsigned long size,
++				   pgprot_t prot) {
++  return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
++			 size, prot));
++}
++#endif
++
++/* ************************************************* */
++
++#define CLUSTER_LEN       8
++
++struct ring_cluster {
++  u_short             cluster_id; /* 0 = no cluster */
++  u_short             num_cluster_elements;
++  enum cluster_type   hashing_mode;
++  u_short             hashing_id;
++  struct sock         *sk[CLUSTER_LEN];
++  struct ring_cluster *next;      /* NULL = last element of the cluster */
++};
++
++/* ************************************************* */
++
++struct ring_element {
++  struct list_head  list;
++  struct sock      *sk;
++};
++
++/* ************************************************* */
++
++struct ring_opt {
++  struct net_device *ring_netdev;
++
++  /* Cluster */
++  u_short cluster_id; /* 0 = no cluster */
++
++  /* Reflector */
++  struct net_device *reflector_dev;
++
++  /* Packet buffers */
++  unsigned long order;
++
++  /* Ring Slots */
++  unsigned long ring_memory;
++  FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
++  char *ring_slots;  /* Basically it points to ring_memory
++			+sizeof(FlowSlotInfo) */
++
++  /* Packet Sampling */
++  u_int pktToSample, sample_rate;
++
++  /* BPF Filter */
++  struct sk_filter *bpfFilter;
++
++  /* Locks */
++  atomic_t num_ring_slots_waiters;
++  wait_queue_head_t ring_slots_waitqueue;
++  rwlock_t ring_index_lock;
++
++  /* Indexes (Internal) */
++  u_int insert_page_id, insert_slot_id;
++};
++
++/* ************************************************* */
++
++/* List of all ring sockets. */
++static struct list_head ring_table;
++
++/* List of all clusters */
++static struct ring_cluster *ring_cluster_list;
++
++static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
++
++/* ********************************** */
++
++/* Forward */
++static struct proto_ops ring_ops;
++
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++static struct proto ring_proto;
++#endif
++
++static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
++			    u_char real_skb);
++static int buffer_ring_handler(struct net_device *dev, char *data, int len);
++static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
++
++/* Extern */
++
++/* ********************************** */
++
++/* Defaults */
++static u_int bucket_len = 128, num_slots = 4096, sample_rate = 1,
++  transparent_mode = 0, enable_tx_capture = 0;
++
++MODULE_PARM(bucket_len, "i");
++MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
++MODULE_PARM(num_slots,  "i");
++MODULE_PARM_DESC(num_slots,  "Number of ring slots");
++MODULE_PARM(sample_rate, "i");
++MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
++MODULE_PARM(transparent_mode, "i");
++MODULE_PARM_DESC(transparent_mode,
++		 "Set to 1 to set transparent mode "
++		 "(slower but backwards compatible)");
++MODULE_PARM(enable_tx_capture, "i");
++MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
++
++/* ********************************** */
++
++#define MIN_QUEUED_PKTS      64
++#define MAX_QUEUE_LOOPS      64
++
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
++#define ring_sk(__sk) ((__sk)->sk_protinfo)
++#else
++#define ring_sk_datatype(a) (a)
++#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
++#endif
++
++/*
++  int dev_queue_xmit(struct sk_buff *skb)
++  skb->dev;
++  struct net_device *dev_get_by_name(const char *name)
++*/
++
++/* ********************************** */
++
++static void ring_sock_destruct(struct sock *sk) {
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  skb_queue_purge(&sk->sk_receive_queue);
++
++  if (!sock_flag(sk, SOCK_DEAD)) {
++#if defined(RING_DEBUG)
++    printk("Attempt to release alive ring socket: %p\n", sk);
++#endif
++    return;
++  }
++
++  BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
++  BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
++#else
++
++  BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
++  BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
++
++  if (!sk->dead) {
++#if defined(RING_DEBUG)
++    printk("Attempt to release alive ring socket: %p\n", sk);
++#endif
++    return;
++  }
++#endif
++
++  kfree(ring_sk(sk));
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_DEC_USE_COUNT;
++#endif
++}
++
++/* ********************************** */
++/*
++ * ring_insert()
++ *
++ * store the sk in a new element and add it
++ * to the head of the list.
++ */
++static inline void ring_insert(struct sock *sk) {
++  struct ring_element *next;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_insert()\n");
++#endif
++
++  next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
++  if(next != NULL) {
++    next->sk = sk;
++    write_lock_irq(&ring_mgmt_lock);
++    list_add(&next->list, &ring_table);
++    write_unlock_irq(&ring_mgmt_lock);
++  } else {
++    if (net_ratelimit())
++      printk("RING: could not kmalloc slot!!\n");
++  }
++}
++
++/* ********************************** */
++/*
++ * ring_remove()
++ *
++ * For each of the elements in the list:
++ *  - check if this is the element we want to delete
++ *  - if it is, remove it from the list, and free it.
++ *
++ * stop when we find the one we're looking for (break),
++ * or when we reach the end of the list.
++ */
++static inline void ring_remove(struct sock *sk) {
++  struct list_head *ptr;
++  struct ring_element *entry;
++
++
++  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    entry = list_entry(ptr, struct ring_element, list);
++
++    if(entry->sk == sk) {
++      write_lock_irq(&ring_mgmt_lock);
++      list_del(ptr);
++      kfree(ptr);
++      write_unlock_irq(&ring_mgmt_lock);
++      break;
++    }
++  }
++
++}
++
++/* ********************************** */
++
++static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
++
++  if(pfr->ring_slots != NULL) {
++
++    u_int32_t tot_insert = pfr->slots_info->insert_idx,
++#if defined(RING_DEBUG)
++      tot_read = pfr->slots_info->tot_read, tot_pkts;
++#else
++    tot_read = pfr->slots_info->tot_read;
++#endif
++
++    if(tot_insert >= tot_read) {
++#if defined(RING_DEBUG)
++      tot_pkts = tot_insert-tot_read;
++#endif
++      return(tot_insert-tot_read);
++    } else {
++#if defined(RING_DEBUG)
++      tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
++#endif
++      return(((u_int32_t)-1)+tot_insert-tot_read);
++    }
++
++#if defined(RING_DEBUG)
++    printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
++	   tot_pkts, tot_insert, tot_read);
++#endif
++
++  } else
++    return(0);
++}
++
++/* ********************************** */
++
++static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
++#if defined(RING_DEBUG)
++  printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
++#endif
++
++  if(pfr->ring_slots != NULL) {
++    FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
++						  *pfr->slots_info->slot_len]);
++    return(slot);
++  } else
++    return(NULL);
++}
++
++/* ********************************** */
++
++static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
++#if defined(RING_DEBUG)
++  printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
++#endif
++
++  if(pfr->ring_slots != NULL)
++    return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
++					pfr->slots_info->slot_len]));
++  else
++    return(NULL);
++}
++
++/* ********************************** */
++
++static void add_skb_to_ring(struct sk_buff *skb,
++			    struct ring_opt *pfr,
++			    u_char recv_packet,
++			    u_char real_skb /* 1=skb 0=faked skb */) {
++  FlowSlot *theSlot;
++  int idx, displ;
++
++  if(recv_packet) {
++    /* Hack for identifying a packet received by the e1000 */
++    if(real_skb) {
++      displ = SKB_DISPLACEMENT;
++    } else
++      displ = 0; /* Received by the e1000 wrapper */
++  } else
++    displ = 0;
++
++  write_lock(&pfr->ring_index_lock);
++  pfr->slots_info->tot_pkts++;
++  write_unlock(&pfr->ring_index_lock);
++
++  /* BPF Filtering (from af_packet.c) */
++  if(pfr->bpfFilter != NULL) {
++    unsigned res = 1, len;
++
++    len = skb->len-skb->data_len;
++
++    write_lock(&pfr->ring_index_lock);
++    skb->data -= displ;
++    res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
++    skb->data += displ;
++    write_unlock(&pfr->ring_index_lock);
++
++    if(res == 0) {
++      /* Filter failed */
++
++#if defined(RING_DEBUG)
++      printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
++	     "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
++	     (int)skb->len, pfr->slots_info->tot_pkts,
++	     pfr->slots_info->insert_idx,
++	     skb->pkt_type, skb->cloned);
++#endif
++
++      return;
++    }
++  }
++
++  /* ************************** */
++
++  if(pfr->sample_rate > 1) {
++    if(pfr->pktToSample == 0) {
++      write_lock(&pfr->ring_index_lock);
++      pfr->pktToSample = pfr->sample_rate;
++      write_unlock(&pfr->ring_index_lock);
++    } else {
++      write_lock(&pfr->ring_index_lock);
++      pfr->pktToSample--;
++      write_unlock(&pfr->ring_index_lock);
++
++#if defined(RING_DEBUG)
++      printk("add_skb_to_ring(skb): sampled packet [len=%d]"
++	     "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
++	     (int)skb->len, pfr->slots_info->tot_pkts,
++	     pfr->slots_info->insert_idx,
++	     skb->pkt_type, skb->cloned);
++#endif
++      return;
++    }
++  }
++
++  /* ************************************* */
++
++  if((pfr->reflector_dev != NULL)
++     && (!netif_queue_stopped(pfr->reflector_dev))) {
++    int cpu = smp_processor_id();
++
++    /* increase reference counter so that this skb is not freed */
++    atomic_inc(&skb->users);
++
++    skb->data -= displ;
++
++    /* send it */
++    if (pfr->reflector_dev->xmit_lock_owner != cpu) {
++      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++      pfr->reflector_dev->xmit_lock_owner = cpu;
++      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++
++      if (pfr->reflector_dev->hard_start_xmit(skb,
++					      pfr->reflector_dev) == 0) {
++        spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++	pfr->reflector_dev->xmit_lock_owner = -1;
++	skb->data += displ;
++	spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++#if defined(RING_DEBUG)
++	printk("++ hard_start_xmit succeeded\n");
++#endif
++	return; /* OK */
++      }
++
++      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
++      pfr->reflector_dev->xmit_lock_owner = -1;
++      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
++    }
++
++#if defined(RING_DEBUG)
++    printk("++ hard_start_xmit failed\n");
++#endif
++    skb->data += displ;
++    return; /* -ENETDOWN */
++  }
++
++  /* ************************************* */
++
++#if defined(RING_DEBUG)
++  printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
++	 "[pkt_type=%d][cloned=%d]\n",
++	 (int)skb->len, pfr->slots_info->tot_pkts,
++	 pfr->slots_info->insert_idx,
++	 skb->pkt_type, skb->cloned);
++#endif
++
++  idx = pfr->slots_info->insert_idx;
++  theSlot = get_insert_slot(pfr);
++
++  if((theSlot != NULL) && (theSlot->slot_state == 0)) {
++    struct pcap_pkthdr *hdr;
++    unsigned int bucketSpace;
++    char *bucket;
++
++    /* Update Index */
++    idx++;
++
++    if(idx == pfr->slots_info->tot_slots) {
++      write_lock(&pfr->ring_index_lock);
++      pfr->slots_info->insert_idx = 0;
++      write_unlock(&pfr->ring_index_lock);
++    } else {
++      write_lock(&pfr->ring_index_lock);
++      pfr->slots_info->insert_idx = idx;
++      write_unlock(&pfr->ring_index_lock);
++    }
++
++    bucketSpace = pfr->slots_info->slot_len
++#ifdef RING_MAGIC
++      - sizeof(u_char)
++#endif
++      - sizeof(u_char)  /* flowSlot.slot_state */
++      - sizeof(struct pcap_pkthdr)
++      - 1 /* 10 */ /* safe boundary */;
++
++    bucket = &theSlot->bucket;
++    hdr = (struct pcap_pkthdr*)bucket;
++
++    if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
++
++    hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
++    hdr->caplen    = skb->len+displ;
++
++    if(hdr->caplen > bucketSpace)
++      hdr->caplen = bucketSpace;
++
++    hdr->len = skb->len+displ;
++    memcpy(&bucket[sizeof(struct pcap_pkthdr)],
++	   skb->data-displ, hdr->caplen);
++
++#if defined(RING_DEBUG)
++    {
++      static unsigned int lastLoss = 0;
++
++      if(pfr->slots_info->tot_lost
++	 && (lastLoss != pfr->slots_info->tot_lost)) {
++	printk("add_skb_to_ring(%d): [bucketSpace=%d]"
++	       "[hdr.caplen=%d][skb->len=%d]"
++	       "[pcap_pkthdr=%d][removeIdx=%d]"
++	       "[loss=%lu][page=%u][slot=%u]\n",
++	       idx-1, bucketSpace, hdr->caplen, skb->len,
++	       sizeof(struct pcap_pkthdr),
++	       pfr->slots_info->remove_idx,
++	       (long unsigned int)pfr->slots_info->tot_lost,
++	       pfr->insert_page_id, pfr->insert_slot_id);
++
++	lastLoss = pfr->slots_info->tot_lost;
++      }
++    }
++#endif
++
++    write_lock(&pfr->ring_index_lock);
++    pfr->slots_info->tot_insert++;
++    theSlot->slot_state = 1;
++    write_unlock(&pfr->ring_index_lock);
++  } else {
++    write_lock(&pfr->ring_index_lock);
++    pfr->slots_info->tot_lost++;
++    write_unlock(&pfr->ring_index_lock);
++
++#if defined(RING_DEBUG)
++    printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
++	   "[removeIdx=%u][insertIdx=%u]\n",
++	   (long unsigned int)pfr->slots_info->tot_lost,
++	   pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
++#endif
++  }
++
++  /* wakeup in case of poll() */
++  if(waitqueue_active(&pfr->ring_slots_waitqueue))
++    wake_up_interruptible(&pfr->ring_slots_waitqueue);
++}
++
++/* ********************************** */
++
++static u_int hash_skb(struct ring_cluster *cluster_ptr,
++		      struct sk_buff *skb, u_char recv_packet) {
++  u_int idx;
++  int displ;
++  struct iphdr *ip;
++
++  if(cluster_ptr->hashing_mode == cluster_round_robin) {
++    idx = cluster_ptr->hashing_id++;
++  } else {
++    /* Per-flow clustering */
++    if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
++      if(recv_packet)
++	displ = 0;
++      else
++	displ = SKB_DISPLACEMENT;
++
++      /*
++	skb->data+displ
++
++	Always points to to the IP part of the packet
++      */
++
++      ip = (struct iphdr*)(skb->data+displ);
++
++      idx = ip->saddr+ip->daddr+ip->protocol;
++
++      if(ip->protocol == IPPROTO_TCP) {
++	struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
++					      +sizeof(struct iphdr));
++	idx += tcp->source+tcp->dest;
++      } else if(ip->protocol == IPPROTO_UDP) {
++	struct udphdr *udp = (struct udphdr*)(skb->data+displ
++					      +sizeof(struct iphdr));
++	idx += udp->source+udp->dest;
++      }
++    } else
++      idx = skb->len;
++  }
++
++  return(idx % cluster_ptr->num_cluster_elements);
++}
++
++/* ********************************** */
++
++static int skb_ring_handler(struct sk_buff *skb,
++			    u_char recv_packet,
++			    u_char real_skb /* 1=skb 0=faked skb */) {
++  struct sock *skElement;
++  int rc = 0;
++  struct list_head *ptr;
++  struct ring_cluster *cluster_ptr;
++
++  if((!skb) /* Invalid skb */
++     || ((!enable_tx_capture) && (!recv_packet))) {
++    /*
++      An outgoing packet is about to be sent out
++      but we decided not to handle transmitted
++      packets.
++    */
++    return(0);
++  }
++
++#if defined(RING_DEBUG)
++  if(0) {
++    printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
++	   skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
++  }
++#endif
++
++  /* [1] Check unclustered sockets */
++  for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    struct ring_opt *pfr;
++    struct ring_element *entry;
++
++    entry = list_entry(ptr, struct ring_element, list);
++
++    read_lock(&ring_mgmt_lock);
++    skElement = entry->sk;
++    pfr = ring_sk(skElement);
++    read_unlock(&ring_mgmt_lock);
++
++    if((pfr != NULL)
++       && (pfr->cluster_id == 0 /* No cluster */)
++       && (pfr->ring_slots != NULL)
++       && (pfr->ring_netdev == skb->dev)) {
++      /* We've found the ring where the packet can be stored */
++      read_lock(&ring_mgmt_lock);
++      add_skb_to_ring(skb, pfr, recv_packet, real_skb);
++      read_unlock(&ring_mgmt_lock);
++
++      rc = 1; /* Ring found: we've done our job */
++    }
++  }
++
++  /* [2] Check socket clusters */
++  cluster_ptr = ring_cluster_list;
++
++  while(cluster_ptr != NULL) {
++    struct ring_opt *pfr;
++
++    if(cluster_ptr->num_cluster_elements > 0) {
++      u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
++
++      read_lock(&ring_mgmt_lock);
++      skElement = cluster_ptr->sk[skb_hash];
++      read_unlock(&ring_mgmt_lock);
++
++      if(skElement != NULL) {
++	pfr = ring_sk(skElement);
++
++	if((pfr != NULL)
++	   && (pfr->ring_slots != NULL)
++	   && (pfr->ring_netdev == skb->dev)) {
++	  /* We've found the ring where the packet can be stored */
++          read_lock(&ring_mgmt_lock);
++	  add_skb_to_ring(skb, pfr, recv_packet, real_skb);
++          read_unlock(&ring_mgmt_lock);
++
++	  rc = 1; /* Ring found: we've done our job */
++	}
++      }
++    }
++
++    cluster_ptr = cluster_ptr->next;
++  }
++
++  if(transparent_mode) rc = 0;
++
++  if((rc != 0) && real_skb)
++    dev_kfree_skb(skb); /* Free the skb */
++
++  return(rc); /*  0 = packet not handled */
++}
++
++/* ********************************** */
++
++struct sk_buff skb;
++
++static int buffer_ring_handler(struct net_device *dev,
++			       char *data, int len) {
++
++#if defined(RING_DEBUG)
++  printk("buffer_ring_handler: [dev=%s][len=%d]\n",
++	 dev->name == NULL ? "<NULL>" : dev->name, len);
++#endif
++
++  skb.dev = dev, skb.len = len, skb.data = data,
++    skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
++
++  skb_ring_handler(&skb, 1, 0 /* fake skb */);
++
++  return(0);
++}
++
++/* ********************************** */
++
++static int ring_create(struct socket *sock, int protocol) {
++  struct sock *sk;
++  struct ring_opt *pfr;
++  int err;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_create()\n");
++#endif
++
++  /* Are you root, superuser or so ? */
++  if(!capable(CAP_NET_ADMIN))
++    return -EPERM;
++
++  if(sock->type != SOCK_RAW)
++    return -ESOCKTNOSUPPORT;
++
++  if(protocol != htons(ETH_P_ALL))
++    return -EPROTONOSUPPORT;
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_INC_USE_COUNT;
++#endif
++
++  err = -ENOMEM;
++
++  // BD: -- broke this out to keep it more simple and clear as to what the
++  // options are.
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
++  sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
++#endif
++#endif
++
++  // BD: API changed in 2.6.12, ref:
++  // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++  sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
++#endif
++
++  if (sk == NULL)
++    goto out;
++
++  sock->ops = &ring_ops;
++  sock_init_data(sock, sk);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
++  sk_set_owner(sk, THIS_MODULE);
++#endif
++#endif
++
++  err = -ENOMEM;
++  ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
++
++  if (!(pfr = ring_sk(sk))) {
++    sk_free(sk);
++    goto out;
++  }
++  memset(pfr, 0, sizeof(*pfr));
++  init_waitqueue_head(&pfr->ring_slots_waitqueue);
++  pfr->ring_index_lock = RW_LOCK_UNLOCKED;
++  atomic_set(&pfr->num_ring_slots_waiters, 0);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  sk->sk_family       = PF_RING;
++  sk->sk_destruct     = ring_sock_destruct;
++#else
++  sk->family          = PF_RING;
++  sk->destruct        = ring_sock_destruct;
++  sk->num             = protocol;
++#endif
++
++  ring_insert(sk);
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_create() - created\n");
++#endif
++
++  return(0);
++ out:
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++  MOD_DEC_USE_COUNT;
++#endif
++  return err;
++}
++
++/* *********************************************** */
++
++static int ring_release(struct socket *sock)
++{
++  struct sock *sk = sock->sk;
++  struct ring_opt *pfr = ring_sk(sk);
++
++  if(!sk)
++    return 0;
++
++#if defined(RING_DEBUG)
++  printk("RING: called ring_release\n");
++#endif
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_release entered\n");
++#endif
++
++  ring_remove(sk);
++
++  sock_orphan(sk);
++  sock->sk = NULL;
++
++  /* Free the ring buffer */
++  if(pfr->ring_memory) {
++    struct page *page, *page_end;
++
++    page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
++    for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
++      ClearPageReserved(page);
++
++    free_pages(pfr->ring_memory, pfr->order);
++  }
++
++  kfree(pfr);
++  ring_sk(sk) = NULL;
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  skb_queue_purge(&sk->sk_write_queue);
++#endif
++  sock_put(sk);
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_release leaving\n");
++#endif
++
++  return 0;
++}
++
++/* ********************************** */
++/*
++ * We create a ring for this socket and bind it to the specified device
++ */
++static int packet_ring_bind(struct sock *sk, struct net_device *dev)
++{
++  u_int the_slot_len;
++  u_int32_t tot_mem;
++  struct ring_opt *pfr = ring_sk(sk);
++  struct page *page, *page_end;
++
++  if(!dev) return(-1);
++
++#if defined(RING_DEBUG)
++  printk("RING: packet_ring_bind(%s) called\n", dev->name);
++#endif
++
++  /* **********************************************
++
++  *************************************
++  *                                   *
++  *        FlowSlotInfo               *
++  *                                   *
++  ************************************* <-+
++  *        FlowSlot                   *   |
++  *************************************   |
++  *        FlowSlot                   *   |
++  *************************************   +- num_slots
++  *        FlowSlot                   *   |
++  *************************************   |
++  *        FlowSlot                   *   |
++  ************************************* <-+
++
++  ********************************************** */
++
++  the_slot_len = sizeof(u_char)    /* flowSlot.slot_state */
++    + sizeof(u_short) /* flowSlot.slot_len   */
++    + bucket_len      /* flowSlot.bucket     */;
++
++  tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
++
++  /*
++    Calculate the value of the order parameter used later.
++    See http://www.linuxjournal.com/article.php?sid=1133
++  */
++  for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++)  ;
++
++  /*
++    We now try to allocate the memory as required. If we fail
++    we try to allocate a smaller amount or memory (hence a
++    smaller ring).
++  */
++  while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
++    if(pfr->order-- == 0)
++      break;
++
++  if(pfr->order == 0) {
++#if defined(RING_DEBUG)
++    printk("ERROR: not enough memory\n");
++#endif
++    return(-1);
++  } else {
++#if defined(RING_DEBUG)
++    printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
++	   PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
++#endif
++  }
++
++  tot_mem = PAGE_SIZE << pfr->order;
++  memset((char*)pfr->ring_memory, 0, tot_mem);
++
++  /* Now we need to reserve the pages */
++  page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
++  for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
++    SetPageReserved(page);
++
++  pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
++  pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
++
++  pfr->slots_info->version     = RING_FLOWSLOT_VERSION;
++  pfr->slots_info->slot_len    = the_slot_len;
++  pfr->slots_info->tot_slots   = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
++  pfr->slots_info->tot_mem     = tot_mem;
++  pfr->slots_info->sample_rate = sample_rate;
++
++#if defined(RING_DEBUG)
++  printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
++	 pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
++	 pfr->slots_info->tot_mem);
++#endif
++
++#ifdef RING_MAGIC
++  {
++    int i;
++
++    for(i=0; i<pfr->slots_info->tot_slots; i++) {
++      unsigned long idx = i*pfr->slots_info->slot_len;
++      FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
++      slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
++    }
++  }
++#endif
++
++  pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
++
++  /*
++    IMPORTANT
++    Leave this statement here as last one. In fact when
++    the ring_netdev != NULL the socket is ready to be used.
++  */
++  pfr->ring_netdev = dev;
++
++  return(0);
++}
++
++/* ************************************* */
++
++/* Bind to a device */
++static int ring_bind(struct socket *sock,
++		     struct sockaddr *sa, int addr_len)
++{
++  struct sock *sk=sock->sk;
++  struct net_device *dev = NULL;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_bind() called\n");
++#endif
++
++  /*
++   *	Check legality
++   */
++  if (addr_len != sizeof(struct sockaddr))
++    return -EINVAL;
++  if (sa->sa_family != PF_RING)
++    return -EINVAL;
++
++  /* Safety check: add trailing zero if missing */
++  sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
++
++#if defined(RING_DEBUG)
++  printk("RING: searching device %s\n", sa->sa_data);
++#endif
++
++  if((dev = __dev_get_by_name(sa->sa_data)) == NULL) {
++#if defined(RING_DEBUG)
++    printk("RING: search failed\n");
++#endif
++    return(-EINVAL);
++  } else
++    return(packet_ring_bind(sk, dev));
++}
++
++/* ************************************* */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++
++volatile void* virt_to_kseg(volatile void* address) {
++  pte_t *pte;
++  pud_t *pud;
++  unsigned long addr = (unsigned long)address;
++		       
++  pud = pud_offset(pgd_offset_k((unsigned long) address),
++		   (unsigned long) address);
++
++  /*
++     High-memory support courtesy of
++     Brad Doctor <bdoctor@ps-ax.com>
++  */
++#if defined(CONFIG_X86_PAE) && (!defined(CONFIG_NOHIGHMEM))
++  pte = pte_offset_map(pmd_offset(pud, addr), addr);
++#else
++  pte = pmd_offset_map(pud, addr);
++#endif
++
++  return((volatile void*)pte_page(*pte));
++}
++
++#else /* 2.4 */
++
++/* http://www.scs.ch/~frey/linux/memorymap.html */
++volatile void *virt_to_kseg(volatile void *address)
++{
++  pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte;
++  unsigned long va, ret = 0UL;
++
++  va=VMALLOC_VMADDR((unsigned long)address);
++
++  /* get the page directory. Use the kernel memory map. */
++  pgd = pgd_offset_k(va);
++
++  /* check whether we found an entry */
++  if (!pgd_none(*pgd))
++    {
++      /* get the page middle directory */
++      pmd = pmd_offset(pgd, va);
++      /* check whether we found an entry */
++      if (!pmd_none(*pmd))
++	{
++	  /* get a pointer to the page table entry */
++	  ptep = pte_offset(pmd, va);
++	  pte = *ptep;
++	  /* check for a valid page */
++	  if (pte_present(pte))
++	    {
++	      /* get the address the page is refering to */
++	      ret = (unsigned long)page_address(pte_page(pte));
++	      /* add the offset within the page to the page address */
++	      ret |= (va & (PAGE_SIZE -1));
++	    }
++	}
++    }
++  return((volatile void *)ret);
++}
++#endif
++
++/* ************************************* */
++
++static int ring_mmap(struct file *file,
++		     struct socket *sock,
++		     struct vm_area_struct *vma)
++{
++  struct sock *sk = sock->sk;
++  struct ring_opt *pfr = ring_sk(sk);
++  unsigned long size, start;
++  u_int pagesToMap;
++  char *ptr;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_mmap() called\n");
++#endif
++
++  if(pfr->ring_memory == 0) {
++#if defined(RING_DEBUG)
++    printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
++#endif
++    return -EINVAL;
++  }
++
++  size = (unsigned long)(vma->vm_end-vma->vm_start);
++
++  if(size % PAGE_SIZE) {
++#if defined(RING_DEBUG)
++    printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
++#endif
++    return(-EINVAL);
++  }
++
++  /* if userspace tries to mmap beyond end of our buffer, fail */
++  if(size > pfr->slots_info->tot_mem) {
++#if defined(RING_DEBUG)
++    printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
++#endif
++    return(-EINVAL);
++  }
++
++  pagesToMap = size/PAGE_SIZE;
++
++#if defined(RING_DEBUG)
++  printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
++#endif
++
++#if defined(RING_DEBUG)
++  printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
++	 pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
++	 pfr->ring_netdev->name);
++#endif
++
++  /* we do not want to have this area swapped out, lock it */
++  vma->vm_flags |= VM_LOCKED;
++  start = vma->vm_start;
++
++  /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
++  ptr = (char*)(start+PAGE_SIZE);
++
++  if(remap_page_range(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++		      vma,
++#endif
++		      start,
++		      __pa(pfr->ring_memory),
++		      PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
++#if defined(RING_DEBUG)
++    printk("remap_page_range() failed\n");
++#endif
++    return(-EAGAIN);
++  }
++
++#if defined(RING_DEBUG)
++  printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
++#endif
++
++  return 0;
++}
++
++/* ************************************* */
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
++			struct msghdr *msg, size_t len, int flags)
++#else
++  static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
++			  int flags, struct scm_cookie *scm)
++#endif
++{
++  FlowSlot* slot;
++  struct ring_opt *pfr = ring_sk(sock->sk);
++  u_int32_t queued_pkts, num_loops = 0;
++
++#if defined(RING_DEBUG)
++  printk("ring_recvmsg called\n");
++#endif
++
++  slot = get_remove_slot(pfr);
++
++  while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
++    wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
++
++#if defined(RING_DEBUG)
++    printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
++	   slot->slot_state, queued_pkts, num_loops);
++#endif
++
++    if(queued_pkts > 0) {
++      if(num_loops++ > MAX_QUEUE_LOOPS)
++	break;
++    }
++  }
++
++#if defined(RING_DEBUG)
++  if(slot != NULL)
++    printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
++	   queued_pkts, num_loops);
++#endif
++
++  return(queued_pkts);
++}
++
++/* ************************************* */
++
++unsigned int ring_poll(struct file * file,
++		       struct socket *sock, poll_table *wait)
++{
++  FlowSlot* slot;
++  struct ring_opt *pfr = ring_sk(sock->sk);
++
++#if defined(RING_DEBUG)
++  printk("poll called\n");
++#endif
++
++  slot = get_remove_slot(pfr);
++
++  if((slot != NULL) && (slot->slot_state == 0))
++    poll_wait(file, &pfr->ring_slots_waitqueue, wait);
++
++#if defined(RING_DEBUG)
++  printk("poll returning %d\n", slot->slot_state);
++#endif
++
++  if((slot != NULL) && (slot->slot_state == 1))
++    return(POLLIN | POLLRDNORM);
++  else
++    return(0);
++}
++
++/* ************************************* */
++
++int add_to_cluster_list(struct ring_cluster *el,
++			struct sock *sock) {
++
++  if(el->num_cluster_elements == CLUSTER_LEN)
++    return(-1); /* Cluster full */
++
++  ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
++  el->sk[el->num_cluster_elements] = sock;
++  el->num_cluster_elements++;
++  return(0);
++}
++
++/* ************************************* */
++
++int remove_from_cluster_list(struct ring_cluster *el,
++			     struct sock *sock) {
++  int i, j;
++
++  for(i=0; i<CLUSTER_LEN; i++)
++    if(el->sk[i] == sock) {
++      el->num_cluster_elements--;
++
++      if(el->num_cluster_elements > 0) {
++	/* The cluster contains other elements */
++	for(j=i; j<CLUSTER_LEN-1; j++)
++	  el->sk[j] = el->sk[j+1];
++
++	el->sk[CLUSTER_LEN-1] = NULL;
++      } else {
++	/* Empty cluster */
++	memset(el->sk, 0, sizeof(el->sk));
++      }
++
++      return(0);
++    }
++
++  return(-1); /* Not found */
++}
++
++/* ************************************* */
++
++static int remove_from_cluster(struct sock *sock,
++			       struct ring_opt *pfr)
++{
++  struct ring_cluster *el;
++
++#if defined(RING_DEBUG)
++  printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
++#endif
++
++  if(pfr->cluster_id == 0 /* 0 = No Cluster */)
++    return(0); /* Noting to do */
++
++  el = ring_cluster_list;
++
++  while(el != NULL) {
++    if(el->cluster_id == pfr->cluster_id) {
++      return(remove_from_cluster_list(el, sock));
++    } else
++      el = el->next;
++  }
++
++  return(-EINVAL); /* Not found */
++}
++
++/* ************************************* */
++
++static int add_to_cluster(struct sock *sock,
++			  struct ring_opt *pfr,
++			  u_short cluster_id)
++{
++  struct ring_cluster *el;
++
++#ifndef RING_DEBUG
++  printk("--> add_to_cluster(%d)\n", cluster_id);
++#endif
++
++  if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
++
++  if(pfr->cluster_id != 0)
++    remove_from_cluster(sock, pfr);
++
++  el = ring_cluster_list;
++
++  while(el != NULL) {
++    if(el->cluster_id == cluster_id) {
++      return(add_to_cluster_list(el, sock));
++    } else
++      el = el->next;
++  }
++
++  /* There's no existing cluster. We need to create one */
++  if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
++    return(-ENOMEM);
++
++  el->cluster_id = cluster_id;
++  el->num_cluster_elements = 1;
++  el->hashing_mode = cluster_per_flow; /* Default */
++  el->hashing_id   = 0;
++
++  memset(el->sk, 0, sizeof(el->sk));
++  el->sk[0] = sock;
++  el->next = ring_cluster_list;
++  ring_cluster_list = el;
++  pfr->cluster_id = cluster_id;
++
++  return(0); /* 0 = OK */
++}
++
++/* ************************************* */
++
++/* Code taken/inspired from core/sock.c */
++static int ring_setsockopt(struct socket *sock,
++			   int level, int optname,
++			   char *optval, int optlen)
++{
++  struct ring_opt *pfr = ring_sk(sock->sk);
++  int val, found, ret = 0;
++  u_int cluster_id;
++  char devName[8];
++
++  if((optlen<sizeof(int)) || (pfr == NULL))
++    return(-EINVAL);
++
++  if (get_user(val, (int *)optval))
++    return -EFAULT;
++
++  found = 1;
++
++  switch(optname)
++    {
++    case SO_ATTACH_FILTER:
++      ret = -EINVAL;
++      if (optlen == sizeof(struct sock_fprog)) {
++	unsigned int fsize;
++	struct sock_fprog fprog;
++	struct sk_filter *filter;
++
++	ret = -EFAULT;
++
++	/*
++	  NOTE
++
++	  Do not call copy_from_user within a held
++	  splinlock (e.g. ring_mgmt_lock) as this caused
++	  problems when certain debugging was enabled under
++	  2.6.5 -- including hard lockups of the machine.
++	*/
++	if(copy_from_user(&fprog, optval, sizeof(fprog)))
++	  break;
++
++	fsize = sizeof(struct sock_filter) * fprog.len;
++	filter = kmalloc(fsize, GFP_KERNEL);
++
++	if(filter == NULL) {
++	  ret = -ENOMEM;
++	  break;
++	}
++
++	if(copy_from_user(filter->insns, fprog.filter, fsize))
++	  break;
++
++	filter->len = fprog.len;
++
++	if(sk_chk_filter(filter->insns, filter->len) != 0) {
++	  /* Bad filter specified */
++	  kfree(filter);
++	  pfr->bpfFilter = NULL;
++	  break;
++	}
++
++	/* get the lock, set the filter, release the lock */
++	write_lock(&ring_mgmt_lock);
++	pfr->bpfFilter = filter;
++	write_unlock(&ring_mgmt_lock);
++      }
++      ret = 0;
++      break;
++
++    case SO_DETACH_FILTER:
++      write_lock(&ring_mgmt_lock);
++      found = 1;
++      if(pfr->bpfFilter != NULL) {
++	kfree(pfr->bpfFilter);
++	pfr->bpfFilter = NULL;
++	write_unlock(&ring_mgmt_lock);
++	break;
++      }
++      ret = -ENONET;
++      break;
++
++    case SO_ADD_TO_CLUSTER:
++      if (optlen!=sizeof(val))
++	return -EINVAL;
++
++      if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
++	return -EFAULT;
++
++      write_lock(&ring_mgmt_lock);
++      ret = add_to_cluster(sock->sk, pfr, cluster_id);
++      write_unlock(&ring_mgmt_lock);
++      break;
++
++    case SO_REMOVE_FROM_CLUSTER:
++      write_lock(&ring_mgmt_lock);
++      ret = remove_from_cluster(sock->sk, pfr);
++      write_unlock(&ring_mgmt_lock);
++      break;
++
++    case SO_SET_REFLECTOR:
++      if(optlen >= (sizeof(devName)-1))
++	return -EINVAL;
++
++      if(optlen > 0) {
++	if(copy_from_user(devName, optval, optlen))
++	  return -EFAULT;
++      }
++
++      devName[optlen] = '\0';
++
++#if defined(RING_DEBUG)
++      printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
++#endif
++
++      write_lock(&ring_mgmt_lock);
++      pfr->reflector_dev = dev_get_by_name(devName);
++      write_unlock(&ring_mgmt_lock);
++
++#if defined(RING_DEBUG)
++      if(pfr->reflector_dev != NULL)
++	printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
++      else
++	printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
++#endif
++      break;
++
++    default:
++      found = 0;
++      break;
++    }
++
++  if(found)
++    return(ret);
++  else
++    return(sock_setsockopt(sock, level, optname, optval, optlen));
++}
++
++/* ************************************* */
++
++static int ring_ioctl(struct socket *sock,
++		      unsigned int cmd, unsigned long arg)
++{
++  switch(cmd)
++    {
++    case SIOCGIFFLAGS:
++    case SIOCSIFFLAGS:
++    case SIOCGIFCONF:
++    case SIOCGIFMETRIC:
++    case SIOCSIFMETRIC:
++    case SIOCGIFMEM:
++    case SIOCSIFMEM:
++    case SIOCGIFMTU:
++    case SIOCSIFMTU:
++    case SIOCSIFLINK:
++    case SIOCGIFHWADDR:
++    case SIOCSIFHWADDR:
++    case SIOCSIFMAP:
++    case SIOCGIFMAP:
++    case SIOCSIFSLAVE:
++    case SIOCGIFSLAVE:
++    case SIOCGIFINDEX:
++    case SIOCGIFNAME:
++    case SIOCGIFCOUNT:
++    case SIOCSIFHWBROADCAST:
++      return(dev_ioctl(cmd,(void *) arg));
++
++    default:
++      return -EOPNOTSUPP;
++    }
++
++  return 0;
++}
++
++/* ************************************* */
++
++static struct proto_ops ring_ops = {
++  .family	=	PF_RING,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  .owner	=	THIS_MODULE,
++#endif
++
++  /* Operations that make no sense on ring sockets. */
++  .connect	=	sock_no_connect,
++  .socketpair	=	sock_no_socketpair,
++  .accept	=	sock_no_accept,
++  .getname	=	sock_no_getname,
++  .listen	=	sock_no_listen,
++  .shutdown	=	sock_no_shutdown,
++  .sendpage	=	sock_no_sendpage,
++  .sendmsg	=	sock_no_sendmsg,
++  .getsockopt	=	sock_no_getsockopt,
++
++  /* Now the operations that really occur. */
++  .release	=	ring_release,
++  .bind		=	ring_bind,
++  .mmap		=	ring_mmap,
++  .poll		=	ring_poll,
++  .setsockopt	=	ring_setsockopt,
++  .ioctl	=	ring_ioctl,
++  .recvmsg	=	ring_recvmsg,
++};
++
++/* ************************************ */
++
++static struct net_proto_family ring_family_ops = {
++  .family	=	PF_RING,
++  .create	=	ring_create,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++  .owner	=	THIS_MODULE,
++#endif
++};
++
++// BD: API changed in 2.6.12, ref:
++// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++static struct proto ring_proto = {
++  .name		=	"PF_RING",
++  .owner	=	THIS_MODULE,
++  .obj_size	=	sizeof(struct sock),
++};
++#endif
++
++/* ************************************ */
++
++static void __exit ring_exit(void)
++{
++  struct list_head *ptr;
++  struct ring_element *entry;
++
++  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++    entry = list_entry(ptr, struct ring_element, list);
++    kfree(entry);
++  }
++
++  while(ring_cluster_list != NULL) {
++    struct ring_cluster *next = ring_cluster_list->next;
++    kfree(ring_cluster_list);
++    ring_cluster_list = next;
++  }
++
++  set_skb_ring_handler(NULL);
++  set_buffer_ring_handler(NULL);
++  sock_unregister(PF_RING);
++
++  printk("PF_RING shut down.\n");
++}
++
++/* ************************************ */
++
++static int __init ring_init(void)
++{
++  printk("Welcome to PF_RING %s\n(C) 2004 L.Deri <deri@ntop.org>\n",
++	 RING_VERSION);
++
++  INIT_LIST_HEAD(&ring_table);
++  ring_cluster_list = NULL;
++
++  sock_register(&ring_family_ops);
++
++  set_skb_ring_handler(skb_ring_handler);
++  set_buffer_ring_handler(buffer_ring_handler);
++
++  if(get_buffer_ring_handler() != buffer_ring_handler) {
++    printk("PF_RING: set_buffer_ring_handler FAILED\n");
++
++    set_skb_ring_handler(NULL);
++    set_buffer_ring_handler(NULL);
++    sock_unregister(PF_RING);
++    return -1;
++  } else {
++    printk("PF_RING: bucket length    %d bytes\n", bucket_len);
++    printk("PF_RING: ring slots       %d\n", num_slots);
++    printk("PF_RING: sample rate      %d [1=no sampling]\n", sample_rate);
++    printk("PF_RING: capture TX       %s\n",
++	   enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
++    printk("PF_RING: transparent mode %s\n",
++	   transparent_mode ? "Yes" : "No");
++
++    printk("PF_RING initialized correctly.\n");
++    return 0;
++  }
++}
++
++module_init(ring_init);
++module_exit(ring_exit);
++MODULE_LICENSE("GPL");
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++MODULE_ALIAS_NETPROTO(PF_RING);
++#endif
author	nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73>	2005-10-22 22:03:56 +0000
committer	nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73>	2005-10-22 22:03:56 +0000
commit	30bf34f05c0f66d9d9221f7ebfe9808024337dd4 (patch)
tree	bfaeed0ba28cc75ed562c61a7516fba097e78e13 /target/linux
parent	2be847dc58145c4946781ebde7cda49ce0e5f099 (diff)