diff options
Diffstat (limited to 'openwrt/target/linux/linux-2.6/patches/generic/104-pf_ring.patch')
-rw-r--r-- | openwrt/target/linux/linux-2.6/patches/generic/104-pf_ring.patch | 5299 |
1 files changed, 0 insertions, 5299 deletions
diff --git a/openwrt/target/linux/linux-2.6/patches/generic/104-pf_ring.patch b/openwrt/target/linux/linux-2.6/patches/generic/104-pf_ring.patch deleted file mode 100644 index 759fb2cc9..000000000 --- a/openwrt/target/linux/linux-2.6/patches/generic/104-pf_ring.patch +++ /dev/null @@ -1,5299 +0,0 @@ -diff --unified --recursive --new-file linux-2.6.12.5/include/linux/ring.h linux-2.6.12.5-1-686-smp-ring3/include/linux/ring.h ---- linux-2.6.12.5/include/linux/ring.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/include/linux/ring.h 2005-10-22 23:50:44.951445250 +0200 -@@ -0,0 +1,108 @@ -+/* -+ * Definitions for packet ring -+ * -+ * 2004 - Luca Deri <deri@ntop.org> -+ */ -+#ifndef __RING_H -+#define __RING_H -+ -+ -+#define INCLUDE_MAC_INFO -+ -+#ifdef INCLUDE_MAC_INFO -+#define SKB_DISPLACEMENT 14 /* Include MAC address information */ -+#else -+#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */ -+#endif -+ -+#define RING_MAGIC -+#define RING_MAGIC_VALUE 0x88 -+#define RING_FLOWSLOT_VERSION 5 -+#define RING_VERSION "3.0" -+ -+#define SO_ADD_TO_CLUSTER 99 -+#define SO_REMOVE_FROM_CLUSTER 100 -+#define SO_SET_REFLECTOR 101 -+ -+/* *********************************** */ -+ -+#ifndef HAVE_PCAP -+struct pcap_pkthdr { -+ struct timeval ts; /* time stamp */ -+ u_int32_t caplen; /* length of portion present */ -+ u_int32_t len; /* length this packet (off wire) */ -+}; -+#endif -+ -+/* *********************************** */ -+ -+enum cluster_type { -+ cluster_per_flow = 0, -+ cluster_round_robin -+}; -+ -+/* *********************************** */ -+ -+#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr)) -+#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr)) -+ -+/* *********************************** */ -+ -+typedef struct flowSlotInfo { -+ u_int16_t version, sample_rate; -+ u_int32_t tot_slots, slot_len, tot_mem; -+ -+ u_int64_t tot_pkts, tot_lost; -+ u_int64_t tot_insert, tot_read; -+ u_int16_t insert_idx; -+ u_int16_t remove_idx; -+} FlowSlotInfo; -+ -+/* *********************************** */ -+ -+typedef struct flowSlot { -+#ifdef RING_MAGIC -+ u_char magic; /* It must alwasy be zero */ -+#endif -+ u_char slot_state; /* 0=empty, 1=full */ -+ u_char bucket; /* bucket[bucketLen] */ -+} FlowSlot; -+ -+/* *********************************** */ -+ -+#ifdef __KERNEL__ -+ -+FlowSlotInfo* getRingPtr(void); -+int allocateRing(char *deviceName, u_int numSlots, -+ u_int bucketLen, u_int sampleRate); -+unsigned int pollRing(struct file *fp, struct poll_table_struct * wait); -+void deallocateRing(void); -+ -+/* ************************* */ -+ -+typedef int (*handle_ring_skb)(struct sk_buff *skb, -+ u_char recv_packet, u_char real_skb); -+extern handle_ring_skb get_skb_ring_handler(void); -+extern void set_skb_ring_handler(handle_ring_skb the_handler); -+extern void do_skb_ring_handler(struct sk_buff *skb, -+ u_char recv_packet, u_char real_skb); -+ -+typedef int (*handle_ring_buffer)(struct net_device *dev, -+ char *data, int len); -+extern handle_ring_buffer get_buffer_ring_handler(void); -+extern void set_buffer_ring_handler(handle_ring_buffer the_handler); -+extern int do_buffer_ring_handler(struct net_device *dev, -+ char *data, int len); -+#endif /* __KERNEL__ */ -+ -+/* *********************************** */ -+ -+#define PF_RING 27 /* Packet Ring */ -+#define SOCK_RING PF_RING -+ -+/* ioctl() */ -+#define SIORINGPOLL 0x8888 -+ -+/* *********************************** */ -+ -+#endif /* __RING_H */ -diff --unified --recursive --new-file linux-2.6.12.5/net/Kconfig linux-2.6.12.5-1-686-smp-ring3/net/Kconfig ---- linux-2.6.12.5/net/Kconfig 2005-08-15 02:20:18.000000000 +0200 -+++ linux-2.6.12.5-1-686-smp-ring3/net/Kconfig 2005-10-22 23:50:45.535481750 +0200 -@@ -72,6 +72,7 @@ - - Say Y unless you know what you are doing. - -+source "net/ring/Kconfig" - config INET - bool "TCP/IP networking" - ---help--- -diff --unified --recursive --new-file linux-2.6.12.5/net/Makefile linux-2.6.12.5-1-686-smp-ring3/net/Makefile ---- linux-2.6.12.5/net/Makefile 2005-08-15 02:20:18.000000000 +0200 -+++ linux-2.6.12.5-1-686-smp-ring3/net/Makefile 2005-10-22 23:50:45.491479000 +0200 -@@ -41,6 +41,7 @@ - obj-$(CONFIG_DECNET) += decnet/ - obj-$(CONFIG_ECONET) += econet/ - obj-$(CONFIG_VLAN_8021Q) += 8021q/ -+obj-$(CONFIG_RING) += ring/ - obj-$(CONFIG_IP_SCTP) += sctp/ - - ifeq ($(CONFIG_NET),y) -diff --unified --recursive --new-file linux-2.6.12.5/net/Makefile.ORG linux-2.6.12.5-1-686-smp-ring3/net/Makefile.ORG ---- linux-2.6.12.5/net/Makefile.ORG 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/net/Makefile.ORG 2005-10-22 23:50:45.483478500 +0200 -@@ -0,0 +1,48 @@ -+# -+# Makefile for the linux networking. -+# -+# 2 Sep 2000, Christoph Hellwig <hch@infradead.org> -+# Rewritten to use lists instead of if-statements. -+# -+ -+obj-y := nonet.o -+ -+obj-$(CONFIG_NET) := socket.o core/ -+ -+tmp-$(CONFIG_COMPAT) := compat.o -+obj-$(CONFIG_NET) += $(tmp-y) -+ -+# LLC has to be linked before the files in net/802/ -+obj-$(CONFIG_LLC) += llc/ -+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ -+obj-$(CONFIG_INET) += ipv4/ -+obj-$(CONFIG_XFRM) += xfrm/ -+obj-$(CONFIG_UNIX) += unix/ -+ifneq ($(CONFIG_IPV6),) -+obj-y += ipv6/ -+endif -+obj-$(CONFIG_PACKET) += packet/ -+obj-$(CONFIG_NET_KEY) += key/ -+obj-$(CONFIG_NET_SCHED) += sched/ -+obj-$(CONFIG_BRIDGE) += bridge/ -+obj-$(CONFIG_IPX) += ipx/ -+obj-$(CONFIG_ATALK) += appletalk/ -+obj-$(CONFIG_WAN_ROUTER) += wanrouter/ -+obj-$(CONFIG_X25) += x25/ -+obj-$(CONFIG_LAPB) += lapb/ -+obj-$(CONFIG_NETROM) += netrom/ -+obj-$(CONFIG_ROSE) += rose/ -+obj-$(CONFIG_AX25) += ax25/ -+obj-$(CONFIG_IRDA) += irda/ -+obj-$(CONFIG_BT) += bluetooth/ -+obj-$(CONFIG_SUNRPC) += sunrpc/ -+obj-$(CONFIG_RXRPC) += rxrpc/ -+obj-$(CONFIG_ATM) += atm/ -+obj-$(CONFIG_DECNET) += decnet/ -+obj-$(CONFIG_ECONET) += econet/ -+obj-$(CONFIG_VLAN_8021Q) += 8021q/ -+obj-$(CONFIG_IP_SCTP) += sctp/ -+ -+ifeq ($(CONFIG_NET),y) -+obj-$(CONFIG_SYSCTL) += sysctl_net.o -+endif -diff --unified --recursive --new-file linux-2.6.12.5/net/core/dev.c linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c ---- linux-2.6.12.5/net/core/dev.c 2005-08-15 02:20:18.000000000 +0200 -+++ linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c 2005-10-22 23:50:45.479478250 +0200 -@@ -115,6 +115,56 @@ - #endif /* CONFIG_NET_RADIO */ - #include <asm/current.h> - -+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) -+ -+/* #define RING_DEBUG */ -+ -+#include <linux/ring.h> -+#include <linux/version.h> -+ -+static handle_ring_skb ring_handler = NULL; -+ -+handle_ring_skb get_skb_ring_handler() { return(ring_handler); } -+ -+void set_skb_ring_handler(handle_ring_skb the_handler) { -+ ring_handler = the_handler; -+} -+ -+void do_skb_ring_handler(struct sk_buff *skb, -+ u_char recv_packet, u_char real_skb) { -+ if(ring_handler) -+ ring_handler(skb, recv_packet, real_skb); -+} -+ -+/* ******************* */ -+ -+static handle_ring_buffer buffer_ring_handler = NULL; -+ -+handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); } -+ -+void set_buffer_ring_handler(handle_ring_buffer the_handler) { -+ buffer_ring_handler = the_handler; -+} -+ -+int do_buffer_ring_handler(struct net_device *dev, char *data, int len) { -+ if(buffer_ring_handler) { -+ buffer_ring_handler(dev, data, len); -+ return(1); -+ } else -+ return(0); -+} -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+EXPORT_SYMBOL(get_skb_ring_handler); -+EXPORT_SYMBOL(set_skb_ring_handler); -+EXPORT_SYMBOL(do_skb_ring_handler); -+ -+EXPORT_SYMBOL(get_buffer_ring_handler); -+EXPORT_SYMBOL(set_buffer_ring_handler); -+EXPORT_SYMBOL(do_buffer_ring_handler); -+#endif -+ -+#endif - /* This define, if set, will randomly drop a packet when congestion - * is more than moderate. It helps fairness in the multi-interface - * case when one of them is a hog, but it kills performance for the -@@ -1293,6 +1343,10 @@ - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); - #endif - if (q->enqueue) { -+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) -+ if(ring_handler) ring_handler(skb, 0, 1); -+#endif /* CONFIG_RING */ -+ - /* Grab device queue */ - spin_lock(&dev->queue_lock); - -@@ -1509,6 +1563,13 @@ - - preempt_disable(); - err = netif_rx(skb); -+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) -+ if(ring_handler && ring_handler(skb, 1, 1)) { -+ /* The packet has been copied into a ring */ -+ return(NET_RX_SUCCESS); -+ } -+#endif /* CONFIG_RING */ -+ - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); -@@ -1655,6 +1716,13 @@ - int ret = NET_RX_DROP; - unsigned short type; - -+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) -+ if(ring_handler && ring_handler(skb, 1, 1)) { -+ /* The packet has been copied into a ring */ -+ return(NET_RX_SUCCESS); -+ } -+#endif /* CONFIG_RING */ -+ - /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) - return NET_RX_DROP; -diff --unified --recursive --new-file linux-2.6.12.5/net/core/dev.c.ORG linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c.ORG ---- linux-2.6.12.5/net/core/dev.c.ORG 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/net/core/dev.c.ORG 2005-10-22 23:50:45.203461000 +0200 -@@ -0,0 +1,3385 @@ -+/* -+ * NET3 Protocol independent device support routines. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Derived from the non IP parts of dev.c 1.0.19 -+ * Authors: Ross Biro -+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> -+ * Mark Evans, <evansmp@uhura.aston.ac.uk> -+ * -+ * Additional Authors: -+ * Florian la Roche <rzsfl@rz.uni-sb.de> -+ * Alan Cox <gw4pts@gw4pts.ampr.org> -+ * David Hinds <dahinds@users.sourceforge.net> -+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> -+ * Adam Sulmicki <adam@cfar.umd.edu> -+ * Pekka Riikonen <priikone@poesidon.pspt.fi> -+ * -+ * Changes: -+ * D.J. Barrow : Fixed bug where dev->refcnt gets set -+ * to 2 if register_netdev gets called -+ * before net_dev_init & also removed a -+ * few lines of code in the process. -+ * Alan Cox : device private ioctl copies fields back. -+ * Alan Cox : Transmit queue code does relevant -+ * stunts to keep the queue safe. -+ * Alan Cox : Fixed double lock. -+ * Alan Cox : Fixed promisc NULL pointer trap -+ * ???????? : Support the full private ioctl range -+ * Alan Cox : Moved ioctl permission check into -+ * drivers -+ * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI -+ * Alan Cox : 100 backlog just doesn't cut it when -+ * you start doing multicast video 8) -+ * Alan Cox : Rewrote net_bh and list manager. -+ * Alan Cox : Fix ETH_P_ALL echoback lengths. -+ * Alan Cox : Took out transmit every packet pass -+ * Saved a few bytes in the ioctl handler -+ * Alan Cox : Network driver sets packet type before -+ * calling netif_rx. Saves a function -+ * call a packet. -+ * Alan Cox : Hashed net_bh() -+ * Richard Kooijman: Timestamp fixes. -+ * Alan Cox : Wrong field in SIOCGIFDSTADDR -+ * Alan Cox : Device lock protection. -+ * Alan Cox : Fixed nasty side effect of device close -+ * changes. -+ * Rudi Cilibrasi : Pass the right thing to -+ * set_mac_address() -+ * Dave Miller : 32bit quantity for the device lock to -+ * make it work out on a Sparc. -+ * Bjorn Ekwall : Added KERNELD hack. -+ * Alan Cox : Cleaned up the backlog initialise. -+ * Craig Metz : SIOCGIFCONF fix if space for under -+ * 1 device. -+ * Thomas Bogendoerfer : Return ENODEV for dev_open, if there -+ * is no device open function. -+ * Andi Kleen : Fix error reporting for SIOCGIFCONF -+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF -+ * Cyrus Durgin : Cleaned for KMOD -+ * Adam Sulmicki : Bug Fix : Network Device Unload -+ * A network device unload needs to purge -+ * the backlog queue. -+ * Paul Rusty Russell : SIOCSIFNAME -+ * Pekka Riikonen : Netdev boot-time settings code -+ * Andrew Morton : Make unregister_netdevice wait -+ * indefinitely on dev->refcnt -+ * J Hadi Salim : - Backlog queue sampling -+ * - netif_rx() feedback -+ */ -+ -+#include <asm/uaccess.h> -+#include <asm/system.h> -+#include <linux/bitops.h> -+#include <linux/config.h> -+#include <linux/cpu.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/string.h> -+#include <linux/mm.h> -+#include <linux/socket.h> -+#include <linux/sockios.h> -+#include <linux/errno.h> -+#include <linux/interrupt.h> -+#include <linux/if_ether.h> -+#include <linux/netdevice.h> -+#include <linux/etherdevice.h> -+#include <linux/notifier.h> -+#include <linux/skbuff.h> -+#include <net/sock.h> -+#include <linux/rtnetlink.h> -+#include <linux/proc_fs.h> -+#include <linux/seq_file.h> -+#include <linux/stat.h> -+#include <linux/if_bridge.h> -+#include <linux/divert.h> -+#include <net/dst.h> -+#include <net/pkt_sched.h> -+#include <net/checksum.h> -+#include <linux/highmem.h> -+#include <linux/init.h> -+#include <linux/kmod.h> -+#include <linux/module.h> -+#include <linux/kallsyms.h> -+#include <linux/netpoll.h> -+#include <linux/rcupdate.h> -+#include <linux/delay.h> -+#ifdef CONFIG_NET_RADIO -+#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ -+#include <net/iw_handler.h> -+#endif /* CONFIG_NET_RADIO */ -+#include <asm/current.h> -+ -+/* This define, if set, will randomly drop a packet when congestion -+ * is more than moderate. It helps fairness in the multi-interface -+ * case when one of them is a hog, but it kills performance for the -+ * single interface case so it is off now by default. -+ */ -+#undef RAND_LIE -+ -+/* Setting this will sample the queue lengths and thus congestion -+ * via a timer instead of as each packet is received. -+ */ -+#undef OFFLINE_SAMPLE -+ -+/* -+ * The list of packet types we will receive (as opposed to discard) -+ * and the routines to invoke. -+ * -+ * Why 16. Because with 16 the only overlap we get on a hash of the -+ * low nibble of the protocol value is RARP/SNAP/X.25. -+ * -+ * NOTE: That is no longer true with the addition of VLAN tags. Not -+ * sure which should go first, but I bet it won't make much -+ * difference if we are running VLANs. The good news is that -+ * this protocol won't be in the list unless compiled in, so -+ * the average user (w/out VLANs) will not be adversly affected. -+ * --BLG -+ * -+ * 0800 IP -+ * 8100 802.1Q VLAN -+ * 0001 802.3 -+ * 0002 AX.25 -+ * 0004 802.2 -+ * 8035 RARP -+ * 0005 SNAP -+ * 0805 X.25 -+ * 0806 ARP -+ * 8137 IPX -+ * 0009 Localtalk -+ * 86DD IPv6 -+ */ -+ -+static DEFINE_SPINLOCK(ptype_lock); -+static struct list_head ptype_base[16]; /* 16 way hashed list */ -+static struct list_head ptype_all; /* Taps */ -+ -+#ifdef OFFLINE_SAMPLE -+static void sample_queue(unsigned long dummy); -+static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); -+#endif -+ -+/* -+ * The @dev_base list is protected by @dev_base_lock and the rtln -+ * semaphore. -+ * -+ * Pure readers hold dev_base_lock for reading. -+ * -+ * Writers must hold the rtnl semaphore while they loop through the -+ * dev_base list, and hold dev_base_lock for writing when they do the -+ * actual updates. This allows pure readers to access the list even -+ * while a writer is preparing to update it. -+ * -+ * To put it another way, dev_base_lock is held for writing only to -+ * protect against pure readers; the rtnl semaphore provides the -+ * protection against other writers. -+ * -+ * See, for example usages, register_netdevice() and -+ * unregister_netdevice(), which must be called with the rtnl -+ * semaphore held. -+ */ -+struct net_device *dev_base; -+static struct net_device **dev_tail = &dev_base; -+DEFINE_RWLOCK(dev_base_lock); -+ -+EXPORT_SYMBOL(dev_base); -+EXPORT_SYMBOL(dev_base_lock); -+ -+#define NETDEV_HASHBITS 8 -+static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; -+static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; -+ -+static inline struct hlist_head *dev_name_hash(const char *name) -+{ -+ unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); -+ return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; -+} -+ -+static inline struct hlist_head *dev_index_hash(int ifindex) -+{ -+ return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; -+} -+ -+/* -+ * Our notifier list -+ */ -+ -+static struct notifier_block *netdev_chain; -+ -+/* -+ * Device drivers call our routines to queue packets here. We empty the -+ * queue in the local softnet handler. -+ */ -+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; -+ -+#ifdef CONFIG_SYSFS -+extern int netdev_sysfs_init(void); -+extern int netdev_register_sysfs(struct net_device *); -+extern void netdev_unregister_sysfs(struct net_device *); -+#else -+#define netdev_sysfs_init() (0) -+#define netdev_register_sysfs(dev) (0) -+#define netdev_unregister_sysfs(dev) do { } while(0) -+#endif -+ -+ -+/******************************************************************************* -+ -+ Protocol management and registration routines -+ -+*******************************************************************************/ -+ -+/* -+ * For efficiency -+ */ -+ -+int netdev_nit; -+ -+/* -+ * Add a protocol ID to the list. Now that the input handler is -+ * smarter we can dispense with all the messy stuff that used to be -+ * here. -+ * -+ * BEWARE!!! Protocol handlers, mangling input packets, -+ * MUST BE last in hash buckets and checking protocol handlers -+ * MUST start from promiscuous ptype_all chain in net_bh. -+ * It is true now, do not change it. -+ * Explanation follows: if protocol handler, mangling packet, will -+ * be the first on list, it is not able to sense, that packet -+ * is cloned and should be copied-on-write, so that it will -+ * change it and subsequent readers will get broken packet. -+ * --ANK (980803) -+ */ -+ -+/** -+ * dev_add_pack - add packet handler -+ * @pt: packet type declaration -+ * -+ * Add a protocol handler to the networking stack. The passed &packet_type -+ * is linked into kernel lists and may not be freed until it has been -+ * removed from the kernel lists. -+ * -+ * This call does not sleep therefore it can not -+ * guarantee all CPU's that are in middle of receiving packets -+ * will see the new packet type (until the next received packet). -+ */ -+ -+void dev_add_pack(struct packet_type *pt) -+{ -+ int hash; -+ -+ spin_lock_bh(&ptype_lock); -+ if (pt->type == htons(ETH_P_ALL)) { -+ netdev_nit++; -+ list_add_rcu(&pt->list, &ptype_all); -+ } else { -+ hash = ntohs(pt->type) & 15; -+ list_add_rcu(&pt->list, &ptype_base[hash]); -+ } -+ spin_unlock_bh(&ptype_lock); -+} -+ -+extern void linkwatch_run_queue(void); -+ -+ -+ -+/** -+ * __dev_remove_pack - remove packet handler -+ * @pt: packet type declaration -+ * -+ * Remove a protocol handler that was previously added to the kernel -+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed -+ * from the kernel lists and can be freed or reused once this function -+ * returns. -+ * -+ * The packet type might still be in use by receivers -+ * and must not be freed until after all the CPU's have gone -+ * through a quiescent state. -+ */ -+void __dev_remove_pack(struct packet_type *pt) -+{ -+ struct list_head *head; -+ struct packet_type *pt1; -+ -+ spin_lock_bh(&ptype_lock); -+ -+ if (pt->type == htons(ETH_P_ALL)) { -+ netdev_nit--; -+ head = &ptype_all; -+ } else -+ head = &ptype_base[ntohs(pt->type) & 15]; -+ -+ list_for_each_entry(pt1, head, list) { -+ if (pt == pt1) { -+ list_del_rcu(&pt->list); -+ goto out; -+ } -+ } -+ -+ printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); -+out: -+ spin_unlock_bh(&ptype_lock); -+} -+/** -+ * dev_remove_pack - remove packet handler -+ * @pt: packet type declaration -+ * -+ * Remove a protocol handler that was previously added to the kernel -+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed -+ * from the kernel lists and can be freed or reused once this function -+ * returns. -+ * -+ * This call sleeps to guarantee that no CPU is looking at the packet -+ * type after return. -+ */ -+void dev_remove_pack(struct packet_type *pt) -+{ -+ __dev_remove_pack(pt); -+ -+ synchronize_net(); -+} -+ -+/****************************************************************************** -+ -+ Device Boot-time Settings Routines -+ -+*******************************************************************************/ -+ -+/* Boot time configuration table */ -+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; -+ -+/** -+ * netdev_boot_setup_add - add new setup entry -+ * @name: name of the device -+ * @map: configured settings for the device -+ * -+ * Adds new setup entry to the dev_boot_setup list. The function -+ * returns 0 on error and 1 on success. This is a generic routine to -+ * all netdevices. -+ */ -+static int netdev_boot_setup_add(char *name, struct ifmap *map) -+{ -+ struct netdev_boot_setup *s; -+ int i; -+ -+ s = dev_boot_setup; -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { -+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { -+ memset(s[i].name, 0, sizeof(s[i].name)); -+ strcpy(s[i].name, name); -+ memcpy(&s[i].map, map, sizeof(s[i].map)); -+ break; -+ } -+ } -+ -+ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -+} -+ -+/** -+ * netdev_boot_setup_check - check boot time settings -+ * @dev: the netdevice -+ * -+ * Check boot time settings for the device. -+ * The found settings are set for the device to be used -+ * later in the device probing. -+ * Returns 0 if no settings found, 1 if they are. -+ */ -+int netdev_boot_setup_check(struct net_device *dev) -+{ -+ struct netdev_boot_setup *s = dev_boot_setup; -+ int i; -+ -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { -+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && -+ !strncmp(dev->name, s[i].name, strlen(s[i].name))) { -+ dev->irq = s[i].map.irq; -+ dev->base_addr = s[i].map.base_addr; -+ dev->mem_start = s[i].map.mem_start; -+ dev->mem_end = s[i].map.mem_end; -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+ -+/** -+ * netdev_boot_base - get address from boot time settings -+ * @prefix: prefix for network device -+ * @unit: id for network device -+ * -+ * Check boot time settings for the base address of device. -+ * The found settings are set for the device to be used -+ * later in the device probing. -+ * Returns 0 if no settings found. -+ */ -+unsigned long netdev_boot_base(const char *prefix, int unit) -+{ -+ const struct netdev_boot_setup *s = dev_boot_setup; -+ char name[IFNAMSIZ]; -+ int i; -+ -+ sprintf(name, "%s%d", prefix, unit); -+ -+ /* -+ * If device already registered then return base of 1 -+ * to indicate not to probe for this interface -+ */ -+ if (__dev_get_by_name(name)) -+ return 1; -+ -+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) -+ if (!strcmp(name, s[i].name)) -+ return s[i].map.base_addr; -+ return 0; -+} -+ -+/* -+ * Saves at boot time configured settings for any netdevice. -+ */ -+int __init netdev_boot_setup(char *str) -+{ -+ int ints[5]; -+ struct ifmap map; -+ -+ str = get_options(str, ARRAY_SIZE(ints), ints); -+ if (!str || !*str) -+ return 0; -+ -+ /* Save settings */ -+ memset(&map, 0, sizeof(map)); -+ if (ints[0] > 0) -+ map.irq = ints[1]; -+ if (ints[0] > 1) -+ map.base_addr = ints[2]; -+ if (ints[0] > 2) -+ map.mem_start = ints[3]; -+ if (ints[0] > 3) -+ map.mem_end = ints[4]; -+ -+ /* Add new entry to the list */ -+ return netdev_boot_setup_add(str, &map); -+} -+ -+__setup("netdev=", netdev_boot_setup); -+ -+/******************************************************************************* -+ -+ Device Interface Subroutines -+ -+*******************************************************************************/ -+ -+/** -+ * __dev_get_by_name - find a device by its name -+ * @name: name to find -+ * -+ * Find an interface by name. Must be called under RTNL semaphore -+ * or @dev_base_lock. If the name is found a pointer to the device -+ * is returned. If the name is not found then %NULL is returned. The -+ * reference counters are not incremented so the caller must be -+ * careful with locks. -+ */ -+ -+struct net_device *__dev_get_by_name(const char *name) -+{ -+ struct hlist_node *p; -+ -+ hlist_for_each(p, dev_name_hash(name)) { -+ struct net_device *dev -+ = hlist_entry(p, struct net_device, name_hlist); -+ if (!strncmp(dev->name, name, IFNAMSIZ)) -+ return dev; -+ } -+ return NULL; -+} -+ -+/** -+ * dev_get_by_name - find a device by its name -+ * @name: name to find -+ * -+ * Find an interface by name. This can be called from any -+ * context and does its own locking. The returned handle has -+ * the usage count incremented and the caller must use dev_put() to -+ * release it when it is no longer needed. %NULL is returned if no -+ * matching device is found. -+ */ -+ -+struct net_device *dev_get_by_name(const char *name) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_name(name); -+ if (dev) -+ dev_hold(dev); -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * __dev_get_by_index - find a device by its ifindex -+ * @ifindex: index of device -+ * -+ * Search for an interface by index. Returns %NULL if the device -+ * is not found or a pointer to the device. The device has not -+ * had its reference counter increased so the caller must be careful -+ * about locking. The caller must hold either the RTNL semaphore -+ * or @dev_base_lock. -+ */ -+ -+struct net_device *__dev_get_by_index(int ifindex) -+{ -+ struct hlist_node *p; -+ -+ hlist_for_each(p, dev_index_hash(ifindex)) { -+ struct net_device *dev -+ = hlist_entry(p, struct net_device, index_hlist); -+ if (dev->ifindex == ifindex) -+ return dev; -+ } -+ return NULL; -+} -+ -+ -+/** -+ * dev_get_by_index - find a device by its ifindex -+ * @ifindex: index of device -+ * -+ * Search for an interface by index. Returns NULL if the device -+ * is not found or a pointer to the device. The device returned has -+ * had a reference added and the pointer is safe until the user calls -+ * dev_put to indicate they have finished with it. -+ */ -+ -+struct net_device *dev_get_by_index(int ifindex) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_index(ifindex); -+ if (dev) -+ dev_hold(dev); -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * dev_getbyhwaddr - find a device by its hardware address -+ * @type: media type of device -+ * @ha: hardware address -+ * -+ * Search for an interface by MAC address. Returns NULL if the device -+ * is not found or a pointer to the device. The caller must hold the -+ * rtnl semaphore. The returned device has not had its ref count increased -+ * and the caller must therefore be careful about locking -+ * -+ * BUGS: -+ * If the API was consistent this would be __dev_get_by_hwaddr -+ */ -+ -+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) -+{ -+ struct net_device *dev; -+ -+ ASSERT_RTNL(); -+ -+ for (dev = dev_base; dev; dev = dev->next) -+ if (dev->type == type && -+ !memcmp(dev->dev_addr, ha, dev->addr_len)) -+ break; -+ return dev; -+} -+ -+struct net_device *dev_getfirstbyhwtype(unsigned short type) -+{ -+ struct net_device *dev; -+ -+ rtnl_lock(); -+ for (dev = dev_base; dev; dev = dev->next) { -+ if (dev->type == type) { -+ dev_hold(dev); -+ break; -+ } -+ } -+ rtnl_unlock(); -+ return dev; -+} -+ -+EXPORT_SYMBOL(dev_getfirstbyhwtype); -+ -+/** -+ * dev_get_by_flags - find any device with given flags -+ * @if_flags: IFF_* values -+ * @mask: bitmask of bits in if_flags to check -+ * -+ * Search for any interface with the given flags. Returns NULL if a device -+ * is not found or a pointer to the device. The device returned has -+ * had a reference added and the pointer is safe until the user calls -+ * dev_put to indicate they have finished with it. -+ */ -+ -+struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ for (dev = dev_base; dev != NULL; dev = dev->next) { -+ if (((dev->flags ^ if_flags) & mask) == 0) { -+ dev_hold(dev); -+ break; -+ } -+ } -+ read_unlock(&dev_base_lock); -+ return dev; -+} -+ -+/** -+ * dev_valid_name - check if name is okay for network device -+ * @name: name string -+ * -+ * Network device names need to be valid file names to -+ * to allow sysfs to work -+ */ -+static int dev_valid_name(const char *name) -+{ -+ return !(*name == '\0' -+ || !strcmp(name, ".") -+ || !strcmp(name, "..") -+ || strchr(name, '/')); -+} -+ -+/** -+ * dev_alloc_name - allocate a name for a device -+ * @dev: device -+ * @name: name format string -+ * -+ * Passed a format string - eg "lt%d" it will try and find a suitable -+ * id. Not efficient for many devices, not called a lot. The caller -+ * must hold the dev_base or rtnl lock while allocating the name and -+ * adding the device in order to avoid duplicates. Returns the number -+ * of the unit assigned or a negative errno code. -+ */ -+ -+int dev_alloc_name(struct net_device *dev, const char *name) -+{ -+ int i = 0; -+ char buf[IFNAMSIZ]; -+ const char *p; -+ const int max_netdevices = 8*PAGE_SIZE; -+ long *inuse; -+ struct net_device *d; -+ -+ p = strnchr(name, IFNAMSIZ-1, '%'); -+ if (p) { -+ /* -+ * Verify the string as this thing may have come from -+ * the user. There must be either one "%d" and no other "%" -+ * characters. -+ */ -+ if (p[1] != 'd' || strchr(p + 2, '%')) -+ return -EINVAL; -+ -+ /* Use one page as a bit array of possible slots */ -+ inuse = (long *) get_zeroed_page(GFP_ATOMIC); -+ if (!inuse) -+ return -ENOMEM; -+ -+ for (d = dev_base; d; d = d->next) { -+ if (!sscanf(d->name, name, &i)) -+ continue; -+ if (i < 0 || i >= max_netdevices) -+ continue; -+ -+ /* avoid cases where sscanf is not exact inverse of printf */ -+ snprintf(buf, sizeof(buf), name, i); -+ if (!strncmp(buf, d->name, IFNAMSIZ)) -+ set_bit(i, inuse); -+ } -+ -+ i = find_first_zero_bit(inuse, max_netdevices); -+ free_page((unsigned long) inuse); -+ } -+ -+ snprintf(buf, sizeof(buf), name, i); -+ if (!__dev_get_by_name(buf)) { -+ strlcpy(dev->name, buf, IFNAMSIZ); -+ return i; -+ } -+ -+ /* It is possible to run out of possible slots -+ * when the name is long and there isn't enough space left -+ * for the digits, or if all bits are used. -+ */ -+ return -ENFILE; -+} -+ -+ -+/** -+ * dev_change_name - change name of a device -+ * @dev: device -+ * @newname: name (or format string) must be at least IFNAMSIZ -+ * -+ * Change name of a device, can pass format strings "eth%d". -+ * for wildcarding. -+ */ -+int dev_change_name(struct net_device *dev, char *newname) -+{ -+ int err = 0; -+ -+ ASSERT_RTNL(); -+ -+ if (dev->flags & IFF_UP) -+ return -EBUSY; -+ -+ if (!dev_valid_name(newname)) -+ return -EINVAL; -+ -+ if (strchr(newname, '%')) { -+ err = dev_alloc_name(dev, newname); -+ if (err < 0) -+ return err; -+ strcpy(newname, dev->name); -+ } -+ else if (__dev_get_by_name(newname)) -+ return -EEXIST; -+ else -+ strlcpy(dev->name, newname, IFNAMSIZ); -+ -+ err = class_device_rename(&dev->class_dev, dev->name); -+ if (!err) { -+ hlist_del(&dev->name_hlist); -+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); -+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); -+ } -+ -+ return err; -+} -+ -+/** -+ * netdev_features_change - device changes fatures -+ * @dev: device to cause notification -+ * -+ * Called to indicate a device has changed features. -+ */ -+void netdev_features_change(struct net_device *dev) -+{ -+ notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); -+} -+EXPORT_SYMBOL(netdev_features_change); -+ -+/** -+ * netdev_state_change - device changes state -+ * @dev: device to cause notification -+ * -+ * Called to indicate a device has changed state. This function calls -+ * the notifier chains for netdev_chain and sends a NEWLINK message -+ * to the routing socket. -+ */ -+void netdev_state_change(struct net_device *dev) -+{ -+ if (dev->flags & IFF_UP) { -+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); -+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0); -+ } -+} -+ -+/** -+ * dev_load - load a network module -+ * @name: name of interface -+ * -+ * If a network interface is not present and the process has suitable -+ * privileges this function loads the module. If module loading is not -+ * available in this kernel then it becomes a nop. -+ */ -+ -+void dev_load(const char *name) -+{ -+ struct net_device *dev; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_name(name); -+ read_unlock(&dev_base_lock); -+ -+ if (!dev && capable(CAP_SYS_MODULE)) -+ request_module("%s", name); -+} -+ -+static int default_rebuild_header(struct sk_buff *skb) -+{ -+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", -+ skb->dev ? skb->dev->name : "NULL!!!"); -+ kfree_skb(skb); -+ return 1; -+} -+ -+ -+/** -+ * dev_open - prepare an interface for use. -+ * @dev: device to open -+ * -+ * Takes a device from down to up state. The device's private open -+ * function is invoked and then the multicast lists are loaded. Finally -+ * the device is moved into the up state and a %NETDEV_UP message is -+ * sent to the netdev notifier chain. -+ * -+ * Calling this function on an active interface is a nop. On a failure -+ * a negative errno code is returned. -+ */ -+int dev_open(struct net_device *dev) -+{ -+ int ret = 0; -+ -+ /* -+ * Is it already up? -+ */ -+ -+ if (dev->flags & IFF_UP) -+ return 0; -+ -+ /* -+ * Is it even present? -+ */ -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ -+ /* -+ * Call device private open method -+ */ -+ set_bit(__LINK_STATE_START, &dev->state); -+ if (dev->open) { -+ ret = dev->open(dev); -+ if (ret) -+ clear_bit(__LINK_STATE_START, &dev->state); -+ } -+ -+ /* -+ * If it went open OK then: -+ */ -+ -+ if (!ret) { -+ /* -+ * Set the flags. -+ */ -+ dev->flags |= IFF_UP; -+ -+ /* -+ * Initialize multicasting status -+ */ -+ dev_mc_upload(dev); -+ -+ /* -+ * Wakeup transmit queue engine -+ */ -+ dev_activate(dev); -+ -+ /* -+ * ... and announce new interface. -+ */ -+ notifier_call_chain(&netdev_chain, NETDEV_UP, dev); -+ } -+ return ret; -+} -+ -+/** -+ * dev_close - shutdown an interface. -+ * @dev: device to shutdown -+ * -+ * This function moves an active device into down state. A -+ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device -+ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier -+ * chain. -+ */ -+int dev_close(struct net_device *dev) -+{ -+ if (!(dev->flags & IFF_UP)) -+ return 0; -+ -+ /* -+ * Tell people we are going down, so that they can -+ * prepare to death, when device is still operating. -+ */ -+ notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); -+ -+ dev_deactivate(dev); -+ -+ clear_bit(__LINK_STATE_START, &dev->state); -+ -+ /* Synchronize to scheduled poll. We cannot touch poll list, -+ * it can be even on different cpu. So just clear netif_running(), -+ * and wait when poll really will happen. Actually, the best place -+ * for this is inside dev->stop() after device stopped its irq -+ * engine, but this requires more changes in devices. */ -+ -+ smp_mb__after_clear_bit(); /* Commit netif_running(). */ -+ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { -+ /* No hurry. */ -+ current->state = TASK_INTERRUPTIBLE; -+ schedule_timeout(1); -+ } -+ -+ /* -+ * Call the device specific close. This cannot fail. -+ * Only if device is UP -+ * -+ * We allow it to be called even after a DETACH hot-plug -+ * event. -+ */ -+ if (dev->stop) -+ dev->stop(dev); -+ -+ /* -+ * Device is now down. -+ */ -+ -+ dev->flags &= ~IFF_UP; -+ -+ /* -+ * Tell people we are down -+ */ -+ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); -+ -+ return 0; -+} -+ -+ -+/* -+ * Device change register/unregister. These are not inline or static -+ * as we export them to the world. -+ */ -+ -+/** -+ * register_netdevice_notifier - register a network notifier block -+ * @nb: notifier -+ * -+ * Register a notifier to be called when network device events occur. -+ * The notifier passed is linked into the kernel structures and must -+ * not be reused until it has been unregistered. A negative errno code -+ * is returned on a failure. -+ * -+ * When registered all registration and up events are replayed -+ * to the new notifier to allow device to have a race free -+ * view of the network device list. -+ */ -+ -+int register_netdevice_notifier(struct notifier_block *nb) -+{ -+ struct net_device *dev; -+ int err; -+ -+ rtnl_lock(); -+ err = notifier_chain_register(&netdev_chain, nb); -+ if (!err) { -+ for (dev = dev_base; dev; dev = dev->next) { -+ nb->notifier_call(nb, NETDEV_REGISTER, dev); -+ -+ if (dev->flags & IFF_UP) -+ nb->notifier_call(nb, NETDEV_UP, dev); -+ } -+ } -+ rtnl_unlock(); -+ return err; -+} -+ -+/** -+ * unregister_netdevice_notifier - unregister a network notifier block -+ * @nb: notifier -+ * -+ * Unregister a notifier previously registered by -+ * register_netdevice_notifier(). The notifier is unlinked into the -+ * kernel structures and may then be reused. A negative errno code -+ * is returned on a failure. -+ */ -+ -+int unregister_netdevice_notifier(struct notifier_block *nb) -+{ -+ return notifier_chain_unregister(&netdev_chain, nb); -+} -+ -+/** -+ * call_netdevice_notifiers - call all network notifier blocks -+ * @val: value passed unmodified to notifier function -+ * @v: pointer passed unmodified to notifier function -+ * -+ * Call all network notifier blocks. Parameters and return value -+ * are as for notifier_call_chain(). -+ */ -+ -+int call_netdevice_notifiers(unsigned long val, void *v) -+{ -+ return notifier_call_chain(&netdev_chain, val, v); -+} -+ -+/* When > 0 there are consumers of rx skb time stamps */ -+static atomic_t netstamp_needed = ATOMIC_INIT(0); -+ -+void net_enable_timestamp(void) -+{ -+ atomic_inc(&netstamp_needed); -+} -+ -+void net_disable_timestamp(void) -+{ -+ atomic_dec(&netstamp_needed); -+} -+ -+static inline void net_timestamp(struct timeval *stamp) -+{ -+ if (atomic_read(&netstamp_needed)) -+ do_gettimeofday(stamp); -+ else { -+ stamp->tv_sec = 0; -+ stamp->tv_usec = 0; -+ } -+} -+ -+/* -+ * Support routine. Sends outgoing frames to any network -+ * taps currently in use. -+ */ -+ -+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct packet_type *ptype; -+ net_timestamp(&skb->stamp); -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(ptype, &ptype_all, list) { -+ /* Never send packets back to the socket -+ * they originated from - MvS (miquels@drinkel.ow.org) -+ */ -+ if ((ptype->dev == dev || !ptype->dev) && -+ (ptype->af_packet_priv == NULL || -+ (struct sock *)ptype->af_packet_priv != skb->sk)) { -+ struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); -+ if (!skb2) -+ break; -+ -+ /* skb->nh should be correctly -+ set by sender, so that the second statement is -+ just protection against buggy protocols. -+ */ -+ skb2->mac.raw = skb2->data; -+ -+ if (skb2->nh.raw < skb2->data || -+ skb2->nh.raw > skb2->tail) { -+ if (net_ratelimit()) -+ printk(KERN_CRIT "protocol %04x is " -+ "buggy, dev %s\n", -+ skb2->protocol, dev->name); -+ skb2->nh.raw = skb2->data; -+ } -+ -+ skb2->h.raw = skb2->nh.raw; -+ skb2->pkt_type = PACKET_OUTGOING; -+ ptype->func(skb2, skb->dev, ptype); -+ } -+ } -+ rcu_read_unlock(); -+} -+ -+/* -+ * Invalidate hardware checksum when packet is to be mangled, and -+ * complete checksum manually on outgoing path. -+ */ -+int skb_checksum_help(struct sk_buff *skb, int inward) -+{ -+ unsigned int csum; -+ int ret = 0, offset = skb->h.raw - skb->data; -+ -+ if (inward) { -+ skb->ip_summed = CHECKSUM_NONE; -+ goto out; -+ } -+ -+ if (skb_cloned(skb)) { -+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); -+ if (ret) -+ goto out; -+ } -+ -+ if (offset > (int)skb->len) -+ BUG(); -+ csum = skb_checksum(skb, offset, skb->len-offset, 0); -+ -+ offset = skb->tail - skb->h.raw; -+ if (offset <= 0) -+ BUG(); -+ if (skb->csum + 2 > offset) -+ BUG(); -+ -+ *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); -+ skb->ip_summed = CHECKSUM_NONE; -+out: -+ return ret; -+} -+ -+#ifdef CONFIG_HIGHMEM -+/* Actually, we should eliminate this check as soon as we know, that: -+ * 1. IOMMU is present and allows to map all the memory. -+ * 2. No high memory really exists on this machine. -+ */ -+ -+static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) -+{ -+ int i; -+ -+ if (dev->features & NETIF_F_HIGHDMA) -+ return 0; -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -+ if (PageHighMem(skb_shinfo(skb)->frags[i].page)) -+ return 1; -+ -+ return 0; -+} -+#else -+#define illegal_highdma(dev, skb) (0) -+#endif -+ -+extern void skb_release_data(struct sk_buff *); -+ -+/* Keep head the same: replace data */ -+int __skb_linearize(struct sk_buff *skb, int gfp_mask) -+{ -+ unsigned int size; -+ u8 *data; -+ long offset; -+ struct skb_shared_info *ninfo; -+ int headerlen = skb->data - skb->head; -+ int expand = (skb->tail + skb->data_len) - skb->end; -+ -+ if (skb_shared(skb)) -+ BUG(); -+ -+ if (expand <= 0) -+ expand = 0; -+ -+ size = skb->end - skb->head + expand; -+ size = SKB_DATA_ALIGN(size); -+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); -+ if (!data) -+ return -ENOMEM; -+ -+ /* Copy entire thing */ -+ if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) -+ BUG(); -+ -+ /* Set up shinfo */ -+ ninfo = (struct skb_shared_info*)(data + size); -+ atomic_set(&ninfo->dataref, 1); -+ ninfo->tso_size = skb_shinfo(skb)->tso_size; -+ ninfo->tso_segs = skb_shinfo(skb)->tso_segs; -+ ninfo->nr_frags = 0; -+ ninfo->frag_list = NULL; -+ -+ /* Offset between the two in bytes */ -+ offset = data - skb->head; -+ -+ /* Free old data. */ -+ skb_release_data(skb); -+ -+ skb->head = data; -+ skb->end = data + size; -+ -+ /* Set up new pointers */ -+ skb->h.raw += offset; -+ skb->nh.raw += offset; -+ skb->mac.raw += offset; -+ skb->tail += offset; -+ skb->data += offset; -+ -+ /* We are no longer a clone, even if we were. */ -+ skb->cloned = 0; -+ -+ skb->tail += skb->data_len; -+ skb->data_len = 0; -+ return 0; -+} -+ -+#define HARD_TX_LOCK(dev, cpu) { \ -+ if ((dev->features & NETIF_F_LLTX) == 0) { \ -+ spin_lock(&dev->xmit_lock); \ -+ dev->xmit_lock_owner = cpu; \ -+ } \ -+} -+ -+#define HARD_TX_UNLOCK(dev) { \ -+ if ((dev->features & NETIF_F_LLTX) == 0) { \ -+ dev->xmit_lock_owner = -1; \ -+ spin_unlock(&dev->xmit_lock); \ -+ } \ -+} -+ -+/** -+ * dev_queue_xmit - transmit a buffer -+ * @skb: buffer to transmit -+ * -+ * Queue a buffer for transmission to a network device. The caller must -+ * have set the device and priority and built the buffer before calling -+ * this function. The function can be called from an interrupt. -+ * -+ * A negative errno code is returned on a failure. A success does not -+ * guarantee the frame will be transmitted as it may be dropped due -+ * to congestion or traffic shaping. -+ * -+ * ----------------------------------------------------------------------------------- -+ * I notice this method can also return errors from the queue disciplines, -+ * including NET_XMIT_DROP, which is a positive value. So, errors can also -+ * be positive. -+ * -+ * Regardless of the return value, the skb is consumed, so it is currently -+ * difficult to retry a send to this method. (You can bump the ref count -+ * before sending to hold a reference for retry if you are careful.) -+ * -+ * When calling this method, interrupts MUST be enabled. This is because -+ * the BH enable code must have IRQs enabled so that it will not deadlock. -+ * --BLG -+ */ -+ -+int dev_queue_xmit(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb->dev; -+ struct Qdisc *q; -+ int rc = -ENOMEM; -+ -+ if (skb_shinfo(skb)->frag_list && -+ !(dev->features & NETIF_F_FRAGLIST) && -+ __skb_linearize(skb, GFP_ATOMIC)) -+ goto out_kfree_skb; -+ -+ /* Fragmented skb is linearized if device does not support SG, -+ * or if at least one of fragments is in highmem and device -+ * does not support DMA from it. -+ */ -+ if (skb_shinfo(skb)->nr_frags && -+ (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && -+ __skb_linearize(skb, GFP_ATOMIC)) -+ goto out_kfree_skb; -+ -+ /* If packet is not checksummed and device does not support -+ * checksumming for this protocol, complete checksumming here. -+ */ -+ if (skb->ip_summed == CHECKSUM_HW && -+ (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && -+ (!(dev->features & NETIF_F_IP_CSUM) || -+ skb->protocol != htons(ETH_P_IP)))) -+ if (skb_checksum_help(skb, 0)) -+ goto out_kfree_skb; -+ -+ /* Disable soft irqs for various locks below. Also -+ * stops preemption for RCU. -+ */ -+ local_bh_disable(); -+ -+ /* Updates of qdisc are serialized by queue_lock. -+ * The struct Qdisc which is pointed to by qdisc is now a -+ * rcu structure - it may be accessed without acquiring -+ * a lock (but the structure may be stale.) The freeing of the -+ * qdisc will be deferred until it's known that there are no -+ * more references to it. -+ * -+ * If the qdisc has an enqueue function, we still need to -+ * hold the queue_lock before calling it, since queue_lock -+ * also serializes access to the device queue. -+ */ -+ -+ q = rcu_dereference(dev->qdisc); -+#ifdef CONFIG_NET_CLS_ACT -+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); -+#endif -+ if (q->enqueue) { -+ /* Grab device queue */ -+ spin_lock(&dev->queue_lock); -+ -+ rc = q->enqueue(skb, q); -+ -+ qdisc_run(dev); -+ -+ spin_unlock(&dev->queue_lock); -+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; -+ goto out; -+ } -+ -+ /* The device has no queue. Common case for software devices: -+ loopback, all the sorts of tunnels... -+ -+ Really, it is unlikely that xmit_lock protection is necessary here. -+ (f.e. loopback and IP tunnels are clean ignoring statistics -+ counters.) -+ However, it is possible, that they rely on protection -+ made by us here. -+ -+ Check this and shot the lock. It is not prone from deadlocks. -+ Either shot noqueue qdisc, it is even simpler 8) -+ */ -+ if (dev->flags & IFF_UP) { -+ int cpu = smp_processor_id(); /* ok because BHs are off */ -+ -+ if (dev->xmit_lock_owner != cpu) { -+ -+ HARD_TX_LOCK(dev, cpu); -+ -+ if (!netif_queue_stopped(dev)) { -+ if (netdev_nit) -+ dev_queue_xmit_nit(skb, dev); -+ -+ rc = 0; -+ if (!dev->hard_start_xmit(skb, dev)) { -+ HARD_TX_UNLOCK(dev); -+ goto out; -+ } -+ } -+ HARD_TX_UNLOCK(dev); -+ if (net_ratelimit()) -+ printk(KERN_CRIT "Virtual device %s asks to " -+ "queue packet!\n", dev->name); -+ } else { -+ /* Recursion is detected! It is possible, -+ * unfortunately */ -+ if (net_ratelimit()) -+ printk(KERN_CRIT "Dead loop on virtual device " -+ "%s, fix it urgently!\n", dev->name); -+ } -+ } -+ -+ rc = -ENETDOWN; -+ local_bh_enable(); -+ -+out_kfree_skb: -+ kfree_skb(skb); -+ return rc; -+out: -+ local_bh_enable(); -+ return rc; -+} -+ -+ -+/*======================================================================= -+ Receiver routines -+ =======================================================================*/ -+ -+int netdev_max_backlog = 300; -+int weight_p = 64; /* old backlog weight */ -+/* These numbers are selected based on intuition and some -+ * experimentatiom, if you have more scientific way of doing this -+ * please go ahead and fix things. -+ */ -+int no_cong_thresh = 10; -+int no_cong = 20; -+int lo_cong = 100; -+int mod_cong = 290; -+ -+DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; -+ -+ -+static void get_sample_stats(int cpu) -+{ -+#ifdef RAND_LIE -+ unsigned long rd; -+ int rq; -+#endif -+ struct softnet_data *sd = &per_cpu(softnet_data, cpu); -+ int blog = sd->input_pkt_queue.qlen; -+ int avg_blog = sd->avg_blog; -+ -+ avg_blog = (avg_blog >> 1) + (blog >> 1); -+ -+ if (avg_blog > mod_cong) { -+ /* Above moderate congestion levels. */ -+ sd->cng_level = NET_RX_CN_HIGH; -+#ifdef RAND_LIE -+ rd = net_random(); -+ rq = rd % netdev_max_backlog; -+ if (rq < avg_blog) /* unlucky bastard */ -+ sd->cng_level = NET_RX_DROP; -+#endif -+ } else if (avg_blog > lo_cong) { -+ sd->cng_level = NET_RX_CN_MOD; -+#ifdef RAND_LIE -+ rd = net_random(); -+ rq = rd % netdev_max_backlog; -+ if (rq < avg_blog) /* unlucky bastard */ -+ sd->cng_level = NET_RX_CN_HIGH; -+#endif -+ } else if (avg_blog > no_cong) -+ sd->cng_level = NET_RX_CN_LOW; -+ else /* no congestion */ -+ sd->cng_level = NET_RX_SUCCESS; -+ -+ sd->avg_blog = avg_blog; -+} -+ -+#ifdef OFFLINE_SAMPLE -+static void sample_queue(unsigned long dummy) -+{ -+/* 10 ms 0r 1ms -- i don't care -- JHS */ -+ int next_tick = 1; -+ int cpu = smp_processor_id(); -+ -+ get_sample_stats(cpu); -+ next_tick += jiffies; -+ mod_timer(&samp_timer, next_tick); -+} -+#endif -+ -+ -+/** -+ * netif_rx - post buffer to the network code -+ * @skb: buffer to post -+ * -+ * This function receives a packet from a device driver and queues it for -+ * the upper (protocol) levels to process. It always succeeds. The buffer -+ * may be dropped during processing for congestion control or by the -+ * protocol layers. -+ * -+ * return values: -+ * NET_RX_SUCCESS (no congestion) -+ * NET_RX_CN_LOW (low congestion) -+ * NET_RX_CN_MOD (moderate congestion) -+ * NET_RX_CN_HIGH (high congestion) -+ * NET_RX_DROP (packet was dropped) -+ * -+ */ -+ -+int netif_rx(struct sk_buff *skb) -+{ -+ int this_cpu; -+ struct softnet_data *queue; -+ unsigned long flags; -+ -+ /* if netpoll wants it, pretend we never saw it */ -+ if (netpoll_rx(skb)) -+ return NET_RX_DROP; -+ -+ if (!skb->stamp.tv_sec) -+ net_timestamp(&skb->stamp); -+ -+ /* -+ * The code is rearranged so that the path is the most -+ * short when CPU is congested, but is still operating. -+ */ -+ local_irq_save(flags); -+ this_cpu = smp_processor_id(); -+ queue = &__get_cpu_var(softnet_data); -+ -+ __get_cpu_var(netdev_rx_stat).total++; -+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { -+ if (queue->input_pkt_queue.qlen) { -+ if (queue->throttle) -+ goto drop; -+ -+enqueue: -+ dev_hold(skb->dev); -+ __skb_queue_tail(&queue->input_pkt_queue, skb); -+#ifndef OFFLINE_SAMPLE -+ get_sample_stats(this_cpu); -+#endif -+ local_irq_restore(flags); -+ return queue->cng_level; -+ } -+ -+ if (queue->throttle) -+ queue->throttle = 0; -+ -+ netif_rx_schedule(&queue->backlog_dev); -+ goto enqueue; -+ } -+ -+ if (!queue->throttle) { -+ queue->throttle = 1; -+ __get_cpu_var(netdev_rx_stat).throttled++; -+ } -+ -+drop: -+ __get_cpu_var(netdev_rx_stat).dropped++; -+ local_irq_restore(flags); -+ -+ kfree_skb(skb); -+ return NET_RX_DROP; -+} -+ -+int netif_rx_ni(struct sk_buff *skb) -+{ -+ int err; -+ -+ preempt_disable(); -+ err = netif_rx(skb); -+ if (local_softirq_pending()) -+ do_softirq(); -+ preempt_enable(); -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(netif_rx_ni); -+ -+static __inline__ void skb_bond(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb->dev; -+ -+ if (dev->master) { -+ skb->real_dev = skb->dev; -+ skb->dev = dev->master; -+ } -+} -+ -+static void net_tx_action(struct softirq_action *h) -+{ -+ struct softnet_data *sd = &__get_cpu_var(softnet_data); -+ -+ if (sd->completion_queue) { -+ struct sk_buff *clist; -+ -+ local_irq_disable(); -+ clist = sd->completion_queue; -+ sd->completion_queue = NULL; -+ local_irq_enable(); -+ -+ while (clist) { -+ struct sk_buff *skb = clist; -+ clist = clist->next; -+ -+ BUG_TRAP(!atomic_read(&skb->users)); -+ __kfree_skb(skb); -+ } -+ } -+ -+ if (sd->output_queue) { -+ struct net_device *head; -+ -+ local_irq_disable(); -+ head = sd->output_queue; -+ sd->output_queue = NULL; -+ local_irq_enable(); -+ -+ while (head) { -+ struct net_device *dev = head; -+ head = head->next_sched; -+ -+ smp_mb__before_clear_bit(); -+ clear_bit(__LINK_STATE_SCHED, &dev->state); -+ -+ if (spin_trylock(&dev->queue_lock)) { -+ qdisc_run(dev); -+ spin_unlock(&dev->queue_lock); -+ } else { -+ netif_schedule(dev); -+ } -+ } -+ } -+} -+ -+static __inline__ int deliver_skb(struct sk_buff *skb, -+ struct packet_type *pt_prev) -+{ -+ atomic_inc(&skb->users); -+ return pt_prev->func(skb, skb->dev, pt_prev); -+} -+ -+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); -+struct net_bridge; -+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, -+ unsigned char *addr); -+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); -+ -+static __inline__ int handle_bridge(struct sk_buff **pskb, -+ struct packet_type **pt_prev, int *ret) -+{ -+ struct net_bridge_port *port; -+ -+ if ((*pskb)->pkt_type == PACKET_LOOPBACK || -+ (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) -+ return 0; -+ -+ if (*pt_prev) { -+ *ret = deliver_skb(*pskb, *pt_prev); -+ *pt_prev = NULL; -+ } -+ -+ return br_handle_frame_hook(port, pskb); -+} -+#else -+#define handle_bridge(skb, pt_prev, ret) (0) -+#endif -+ -+#ifdef CONFIG_NET_CLS_ACT -+/* TODO: Maybe we should just force sch_ingress to be compiled in -+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions -+ * a compare and 2 stores extra right now if we dont have it on -+ * but have CONFIG_NET_CLS_ACT -+ * NOTE: This doesnt stop any functionality; if you dont have -+ * the ingress scheduler, you just cant add policies on ingress. -+ * -+ */ -+static int ing_filter(struct sk_buff *skb) -+{ -+ struct Qdisc *q; -+ struct net_device *dev = skb->dev; -+ int result = TC_ACT_OK; -+ -+ if (dev->qdisc_ingress) { -+ __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); -+ if (MAX_RED_LOOP < ttl++) { -+ printk("Redir loop detected Dropping packet (%s->%s)\n", -+ skb->input_dev?skb->input_dev->name:"??",skb->dev->name); -+ return TC_ACT_SHOT; -+ } -+ -+ skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); -+ -+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); -+ if (NULL == skb->input_dev) { -+ skb->input_dev = skb->dev; -+ printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); -+ } -+ spin_lock(&dev->ingress_lock); -+ if ((q = dev->qdisc_ingress) != NULL) -+ result = q->enqueue(skb, q); -+ spin_unlock(&dev->ingress_lock); -+ -+ } -+ -+ return result; -+} -+#endif -+ -+int netif_receive_skb(struct sk_buff *skb) -+{ -+ struct packet_type *ptype, *pt_prev; -+ int ret = NET_RX_DROP; -+ unsigned short type; -+ -+ /* if we've gotten here through NAPI, check netpoll */ -+ if (skb->dev->poll && netpoll_rx(skb)) -+ return NET_RX_DROP; -+ -+ if (!skb->stamp.tv_sec) -+ net_timestamp(&skb->stamp); -+ -+ skb_bond(skb); -+ -+ __get_cpu_var(netdev_rx_stat).total++; -+ -+ skb->h.raw = skb->nh.raw = skb->data; -+ skb->mac_len = skb->nh.raw - skb->mac.raw; -+ -+ pt_prev = NULL; -+ -+ rcu_read_lock(); -+ -+#ifdef CONFIG_NET_CLS_ACT -+ if (skb->tc_verd & TC_NCLS) { -+ skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); -+ goto ncls; -+ } -+#endif -+ -+ list_for_each_entry_rcu(ptype, &ptype_all, list) { -+ if (!ptype->dev || ptype->dev == skb->dev) { -+ if (pt_prev) -+ ret = deliver_skb(skb, pt_prev); -+ pt_prev = ptype; -+ } -+ } -+ -+#ifdef CONFIG_NET_CLS_ACT -+ if (pt_prev) { -+ ret = deliver_skb(skb, pt_prev); -+ pt_prev = NULL; /* noone else should process this after*/ -+ } else { -+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); -+ } -+ -+ ret = ing_filter(skb); -+ -+ if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { -+ kfree_skb(skb); -+ goto out; -+ } -+ -+ skb->tc_verd = 0; -+ncls: -+#endif -+ -+ handle_diverter(skb); -+ -+ if (handle_bridge(&skb, &pt_prev, &ret)) -+ goto out; -+ -+ type = skb->protocol; -+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { -+ if (ptype->type == type && -+ (!ptype->dev || ptype->dev == skb->dev)) { -+ if (pt_prev) -+ ret = deliver_skb(skb, pt_prev); -+ pt_prev = ptype; -+ } -+ } -+ -+ if (pt_prev) { -+ ret = pt_prev->func(skb, skb->dev, pt_prev); -+ } else { -+ kfree_skb(skb); -+ /* Jamal, now you will not able to escape explaining -+ * me how you were going to use this. :-) -+ */ -+ ret = NET_RX_DROP; -+ } -+ -+out: -+ rcu_read_unlock(); -+ return ret; -+} -+ -+static int process_backlog(struct net_device *backlog_dev, int *budget) -+{ -+ int work = 0; -+ int quota = min(backlog_dev->quota, *budget); -+ struct softnet_data *queue = &__get_cpu_var(softnet_data); -+ unsigned long start_time = jiffies; -+ -+ backlog_dev->weight = weight_p; -+ for (;;) { -+ struct sk_buff *skb; -+ struct net_device *dev; -+ -+ local_irq_disable(); -+ skb = __skb_dequeue(&queue->input_pkt_queue); -+ if (!skb) -+ goto job_done; -+ local_irq_enable(); -+ -+ dev = skb->dev; -+ -+ netif_receive_skb(skb); -+ -+ dev_put(dev); -+ -+ work++; -+ -+ if (work >= quota || jiffies - start_time > 1) -+ break; -+ -+ } -+ -+ backlog_dev->quota -= work; -+ *budget -= work; -+ return -1; -+ -+job_done: -+ backlog_dev->quota -= work; -+ *budget -= work; -+ -+ list_del(&backlog_dev->poll_list); -+ smp_mb__before_clear_bit(); -+ netif_poll_enable(backlog_dev); -+ -+ if (queue->throttle) -+ queue->throttle = 0; -+ local_irq_enable(); -+ return 0; -+} -+ -+static void net_rx_action(struct softirq_action *h) -+{ -+ struct softnet_data *queue = &__get_cpu_var(softnet_data); -+ unsigned long start_time = jiffies; -+ int budget = netdev_max_backlog; -+ -+ -+ local_irq_disable(); -+ -+ while (!list_empty(&queue->poll_list)) { -+ struct net_device *dev; -+ -+ if (budget <= 0 || jiffies - start_time > 1) -+ goto softnet_break; -+ -+ local_irq_enable(); -+ -+ dev = list_entry(queue->poll_list.next, -+ struct net_device, poll_list); -+ netpoll_poll_lock(dev); -+ -+ if (dev->quota <= 0 || dev->poll(dev, &budget)) { -+ netpoll_poll_unlock(dev); -+ local_irq_disable(); -+ list_del(&dev->poll_list); -+ list_add_tail(&dev->poll_list, &queue->poll_list); -+ if (dev->quota < 0) -+ dev->quota += dev->weight; -+ else -+ dev->quota = dev->weight; -+ } else { -+ netpoll_poll_unlock(dev); -+ dev_put(dev); -+ local_irq_disable(); -+ } -+ } -+out: -+ local_irq_enable(); -+ return; -+ -+softnet_break: -+ __get_cpu_var(netdev_rx_stat).time_squeeze++; -+ __raise_softirq_irqoff(NET_RX_SOFTIRQ); -+ goto out; -+} -+ -+static gifconf_func_t * gifconf_list [NPROTO]; -+ -+/** -+ * register_gifconf - register a SIOCGIF handler -+ * @family: Address family -+ * @gifconf: Function handler -+ * -+ * Register protocol dependent address dumping routines. The handler -+ * that is passed must not be freed or reused until it has been replaced -+ * by another handler. -+ */ -+int register_gifconf(unsigned int family, gifconf_func_t * gifconf) -+{ -+ if (family >= NPROTO) -+ return -EINVAL; -+ gifconf_list[family] = gifconf; -+ return 0; -+} -+ -+ -+/* -+ * Map an interface index to its name (SIOCGIFNAME) -+ */ -+ -+/* -+ * We need this ioctl for efficient implementation of the -+ * if_indextoname() function required by the IPv6 API. Without -+ * it, we would have to search all the interfaces to find a -+ * match. --pb -+ */ -+ -+static int dev_ifname(struct ifreq __user *arg) -+{ -+ struct net_device *dev; -+ struct ifreq ifr; -+ -+ /* -+ * Fetch the caller's info block. -+ */ -+ -+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) -+ return -EFAULT; -+ -+ read_lock(&dev_base_lock); -+ dev = __dev_get_by_index(ifr.ifr_ifindex); -+ if (!dev) { -+ read_unlock(&dev_base_lock); -+ return -ENODEV; -+ } -+ -+ strcpy(ifr.ifr_name, dev->name); -+ read_unlock(&dev_base_lock); -+ -+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) -+ return -EFAULT; -+ return 0; -+} -+ -+/* -+ * Perform a SIOCGIFCONF call. This structure will change -+ * size eventually, and there is nothing I can do about it. -+ * Thus we will need a 'compatibility mode'. -+ */ -+ -+static int dev_ifconf(char __user *arg) -+{ -+ struct ifconf ifc; -+ struct net_device *dev; -+ char __user *pos; -+ int len; -+ int total; -+ int i; -+ -+ /* -+ * Fetch the caller's info block. -+ */ -+ -+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) -+ return -EFAULT; -+ -+ pos = ifc.ifc_buf; -+ len = ifc.ifc_len; -+ -+ /* -+ * Loop over the interfaces, and write an info block for each. -+ */ -+ -+ total = 0; -+ for (dev = dev_base; dev; dev = dev->next) { -+ for (i = 0; i < NPROTO; i++) { -+ if (gifconf_list[i]) { -+ int done; -+ if (!pos) -+ done = gifconf_list[i](dev, NULL, 0); -+ else -+ done = gifconf_list[i](dev, pos + total, -+ len - total); -+ if (done < 0) -+ return -EFAULT; -+ total += done; -+ } -+ } -+ } -+ -+ /* -+ * All done. Write the updated control block back to the caller. -+ */ -+ ifc.ifc_len = total; -+ -+ /* -+ * Both BSD and Solaris return 0 here, so we do too. -+ */ -+ return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; -+} -+ -+#ifdef CONFIG_PROC_FS -+/* -+ * This is invoked by the /proc filesystem handler to display a device -+ * in detail. -+ */ -+static __inline__ struct net_device *dev_get_idx(loff_t pos) -+{ -+ struct net_device *dev; -+ loff_t i; -+ -+ for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); -+ -+ return i == pos ? dev : NULL; -+} -+ -+void *dev_seq_start(struct seq_file *seq, loff_t *pos) -+{ -+ read_lock(&dev_base_lock); -+ return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; -+} -+ -+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; -+} -+ -+void dev_seq_stop(struct seq_file *seq, void *v) -+{ -+ read_unlock(&dev_base_lock); -+} -+ -+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -+{ -+ if (dev->get_stats) { -+ struct net_device_stats *stats = dev->get_stats(dev); -+ -+ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " -+ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", -+ dev->name, stats->rx_bytes, stats->rx_packets, -+ stats->rx_errors, -+ stats->rx_dropped + stats->rx_missed_errors, -+ stats->rx_fifo_errors, -+ stats->rx_length_errors + stats->rx_over_errors + -+ stats->rx_crc_errors + stats->rx_frame_errors, -+ stats->rx_compressed, stats->multicast, -+ stats->tx_bytes, stats->tx_packets, -+ stats->tx_errors, stats->tx_dropped, -+ stats->tx_fifo_errors, stats->collisions, -+ stats->tx_carrier_errors + -+ stats->tx_aborted_errors + -+ stats->tx_window_errors + -+ stats->tx_heartbeat_errors, -+ stats->tx_compressed); -+ } else -+ seq_printf(seq, "%6s: No statistics available.\n", dev->name); -+} -+ -+/* -+ * Called from the PROCfs module. This now uses the new arbitrary sized -+ * /proc/net interface to create /proc/net/dev -+ */ -+static int dev_seq_show(struct seq_file *seq, void *v) -+{ -+ if (v == SEQ_START_TOKEN) -+ seq_puts(seq, "Inter-| Receive " -+ " | Transmit\n" -+ " face |bytes packets errs drop fifo frame " -+ "compressed multicast|bytes packets errs " -+ "drop fifo colls carrier compressed\n"); -+ else -+ dev_seq_printf_stats(seq, v); -+ return 0; -+} -+ -+static struct netif_rx_stats *softnet_get_online(loff_t *pos) -+{ -+ struct netif_rx_stats *rc = NULL; -+ -+ while (*pos < NR_CPUS) -+ if (cpu_online(*pos)) { -+ rc = &per_cpu(netdev_rx_stat, *pos); -+ break; -+ } else -+ ++*pos; -+ return rc; -+} -+ -+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -+{ -+ return softnet_get_online(pos); -+} -+ -+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return softnet_get_online(pos); -+} -+ -+static void softnet_seq_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static int softnet_seq_show(struct seq_file *seq, void *v) -+{ -+ struct netif_rx_stats *s = v; -+ -+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", -+ s->total, s->dropped, s->time_squeeze, s->throttled, -+ s->fastroute_hit, s->fastroute_success, s->fastroute_defer, -+ s->fastroute_deferred_out, -+#if 0 -+ s->fastroute_latency_reduction -+#else -+ s->cpu_collision -+#endif -+ ); -+ return 0; -+} -+ -+static struct seq_operations dev_seq_ops = { -+ .start = dev_seq_start, -+ .next = dev_seq_next, -+ .stop = dev_seq_stop, -+ .show = dev_seq_show, -+}; -+ -+static int dev_seq_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &dev_seq_ops); -+} -+ -+static struct file_operations dev_seq_fops = { -+ .owner = THIS_MODULE, -+ .open = dev_seq_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static struct seq_operations softnet_seq_ops = { -+ .start = softnet_seq_start, -+ .next = softnet_seq_next, -+ .stop = softnet_seq_stop, -+ .show = softnet_seq_show, -+}; -+ -+static int softnet_seq_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &softnet_seq_ops); -+} -+ -+static struct file_operations softnet_seq_fops = { -+ .owner = THIS_MODULE, -+ .open = softnet_seq_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+#ifdef WIRELESS_EXT -+extern int wireless_proc_init(void); -+#else -+#define wireless_proc_init() 0 -+#endif -+ -+static int __init dev_proc_init(void) -+{ -+ int rc = -ENOMEM; -+ -+ if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) -+ goto out; -+ if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) -+ goto out_dev; -+ if (wireless_proc_init()) -+ goto out_softnet; -+ rc = 0; -+out: -+ return rc; -+out_softnet: -+ proc_net_remove("softnet_stat"); -+out_dev: -+ proc_net_remove("dev"); -+ goto out; -+} -+#else -+#define dev_proc_init() 0 -+#endif /* CONFIG_PROC_FS */ -+ -+ -+/** -+ * netdev_set_master - set up master/slave pair -+ * @slave: slave device -+ * @master: new master device -+ * -+ * Changes the master device of the slave. Pass %NULL to break the -+ * bonding. The caller must hold the RTNL semaphore. On a failure -+ * a negative errno code is returned. On success the reference counts -+ * are adjusted, %RTM_NEWLINK is sent to the routing socket and the -+ * function returns zero. -+ */ -+int netdev_set_master(struct net_device *slave, struct net_device *master) -+{ -+ struct net_device *old = slave->master; -+ -+ ASSERT_RTNL(); -+ -+ if (master) { -+ if (old) -+ return -EBUSY; -+ dev_hold(master); -+ } -+ -+ slave->master = master; -+ -+ synchronize_net(); -+ -+ if (old) -+ dev_put(old); -+ -+ if (master) -+ slave->flags |= IFF_SLAVE; -+ else -+ slave->flags &= ~IFF_SLAVE; -+ -+ rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); -+ return 0; -+} -+ -+/** -+ * dev_set_promiscuity - update promiscuity count on a device -+ * @dev: device -+ * @inc: modifier -+ * -+ * Add or remove promsicuity from a device. While the count in the device -+ * remains above zero the interface remains promiscuous. Once it hits zero -+ * the device reverts back to normal filtering operation. A negative inc -+ * value is used to drop promiscuity on the device. -+ */ -+void dev_set_promiscuity(struct net_device *dev, int inc) -+{ -+ unsigned short old_flags = dev->flags; -+ -+ dev->flags |= IFF_PROMISC; -+ if ((dev->promiscuity += inc) == 0) -+ dev->flags &= ~IFF_PROMISC; -+ if (dev->flags ^ old_flags) { -+ dev_mc_upload(dev); -+ printk(KERN_INFO "device %s %s promiscuous mode\n", -+ dev->name, (dev->flags & IFF_PROMISC) ? "entered" : -+ "left"); -+ } -+} -+ -+/** -+ * dev_set_allmulti - update allmulti count on a device -+ * @dev: device -+ * @inc: modifier -+ * -+ * Add or remove reception of all multicast frames to a device. While the -+ * count in the device remains above zero the interface remains listening -+ * to all interfaces. Once it hits zero the device reverts back to normal -+ * filtering operation. A negative @inc value is used to drop the counter -+ * when releasing a resource needing all multicasts. -+ */ -+ -+void dev_set_allmulti(struct net_device *dev, int inc) -+{ -+ unsigned short old_flags = dev->flags; -+ -+ dev->flags |= IFF_ALLMULTI; -+ if ((dev->allmulti += inc) == 0) -+ dev->flags &= ~IFF_ALLMULTI; -+ if (dev->flags ^ old_flags) -+ dev_mc_upload(dev); -+} -+ -+unsigned dev_get_flags(const struct net_device *dev) -+{ -+ unsigned flags; -+ -+ flags = (dev->flags & ~(IFF_PROMISC | -+ IFF_ALLMULTI | -+ IFF_RUNNING)) | -+ (dev->gflags & (IFF_PROMISC | -+ IFF_ALLMULTI)); -+ -+ if (netif_running(dev) && netif_carrier_ok(dev)) -+ flags |= IFF_RUNNING; -+ -+ return flags; -+} -+ -+int dev_change_flags(struct net_device *dev, unsigned flags) -+{ -+ int ret; -+ int old_flags = dev->flags; -+ -+ /* -+ * Set the flags on our device. -+ */ -+ -+ dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | -+ IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | -+ IFF_AUTOMEDIA)) | -+ (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | -+ IFF_ALLMULTI)); -+ -+ /* -+ * Load in the correct multicast list now the flags have changed. -+ */ -+ -+ dev_mc_upload(dev); -+ -+ /* -+ * Have we downed the interface. We handle IFF_UP ourselves -+ * according to user attempts to set it, rather than blindly -+ * setting it. -+ */ -+ -+ ret = 0; -+ if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ -+ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); -+ -+ if (!ret) -+ dev_mc_upload(dev); -+ } -+ -+ if (dev->flags & IFF_UP && -+ ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | -+ IFF_VOLATILE))) -+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); -+ -+ if ((flags ^ dev->gflags) & IFF_PROMISC) { -+ int inc = (flags & IFF_PROMISC) ? +1 : -1; -+ dev->gflags ^= IFF_PROMISC; -+ dev_set_promiscuity(dev, inc); -+ } -+ -+ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI -+ is important. Some (broken) drivers set IFF_PROMISC, when -+ IFF_ALLMULTI is requested not asking us and not reporting. -+ */ -+ if ((flags ^ dev->gflags) & IFF_ALLMULTI) { -+ int inc = (flags & IFF_ALLMULTI) ? +1 : -1; -+ dev->gflags ^= IFF_ALLMULTI; -+ dev_set_allmulti(dev, inc); -+ } -+ -+ if (old_flags ^ dev->flags) -+ rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); -+ -+ return ret; -+} -+ -+int dev_set_mtu(struct net_device *dev, int new_mtu) -+{ -+ int err; -+ -+ if (new_mtu == dev->mtu) -+ return 0; -+ -+ /* MTU must be positive. */ -+ if (new_mtu < 0) -+ return -EINVAL; -+ -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ -+ err = 0; -+ if (dev->change_mtu) -+ err = dev->change_mtu(dev, new_mtu); -+ else -+ dev->mtu = new_mtu; -+ if (!err && dev->flags & IFF_UP) -+ notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGEMTU, dev); -+ return err; -+} -+ -+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) -+{ -+ int err; -+ -+ if (!dev->set_mac_address) -+ return -EOPNOTSUPP; -+ if (sa->sa_family != dev->type) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ err = dev->set_mac_address(dev, sa); -+ if (!err) -+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); -+ return err; -+} -+ -+/* -+ * Perform the SIOCxIFxxx calls. -+ */ -+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) -+{ -+ int err; -+ struct net_device *dev = __dev_get_by_name(ifr->ifr_name); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ switch (cmd) { -+ case SIOCGIFFLAGS: /* Get interface flags */ -+ ifr->ifr_flags = dev_get_flags(dev); -+ return 0; -+ -+ case SIOCSIFFLAGS: /* Set interface flags */ -+ return dev_change_flags(dev, ifr->ifr_flags); -+ -+ case SIOCGIFMETRIC: /* Get the metric on the interface -+ (currently unused) */ -+ ifr->ifr_metric = 0; -+ return 0; -+ -+ case SIOCSIFMETRIC: /* Set the metric on the interface -+ (currently unused) */ -+ return -EOPNOTSUPP; -+ -+ case SIOCGIFMTU: /* Get the MTU of a device */ -+ ifr->ifr_mtu = dev->mtu; -+ return 0; -+ -+ case SIOCSIFMTU: /* Set the MTU of a device */ -+ return dev_set_mtu(dev, ifr->ifr_mtu); -+ -+ case SIOCGIFHWADDR: -+ if (!dev->addr_len) -+ memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); -+ else -+ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, -+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); -+ ifr->ifr_hwaddr.sa_family = dev->type; -+ return 0; -+ -+ case SIOCSIFHWADDR: -+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr); -+ -+ case SIOCSIFHWBROADCAST: -+ if (ifr->ifr_hwaddr.sa_family != dev->type) -+ return -EINVAL; -+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, -+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); -+ notifier_call_chain(&netdev_chain, -+ NETDEV_CHANGEADDR, dev); -+ return 0; -+ -+ case SIOCGIFMAP: -+ ifr->ifr_map.mem_start = dev->mem_start; -+ ifr->ifr_map.mem_end = dev->mem_end; -+ ifr->ifr_map.base_addr = dev->base_addr; -+ ifr->ifr_map.irq = dev->irq; -+ ifr->ifr_map.dma = dev->dma; -+ ifr->ifr_map.port = dev->if_port; -+ return 0; -+ -+ case SIOCSIFMAP: -+ if (dev->set_config) { -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev->set_config(dev, &ifr->ifr_map); -+ } -+ return -EOPNOTSUPP; -+ -+ case SIOCADDMULTI: -+ if (!dev->set_multicast_list || -+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, -+ dev->addr_len, 1); -+ -+ case SIOCDELMULTI: -+ if (!dev->set_multicast_list || -+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC) -+ return -EINVAL; -+ if (!netif_device_present(dev)) -+ return -ENODEV; -+ return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, -+ dev->addr_len, 1); -+ -+ case SIOCGIFINDEX: -+ ifr->ifr_ifindex = dev->ifindex; -+ return 0; -+ -+ case SIOCGIFTXQLEN: -+ ifr->ifr_qlen = dev->tx_queue_len; -+ return 0; -+ -+ case SIOCSIFTXQLEN: -+ if (ifr->ifr_qlen < 0) -+ return -EINVAL; -+ dev->tx_queue_len = ifr->ifr_qlen; -+ return 0; -+ -+ case SIOCSIFNAME: -+ ifr->ifr_newname[IFNAMSIZ-1] = '\0'; -+ return dev_change_name(dev, ifr->ifr_newname); -+ -+ /* -+ * Unknown or private ioctl -+ */ -+ -+ default: -+ if ((cmd >= SIOCDEVPRIVATE && -+ cmd <= SIOCDEVPRIVATE + 15) || -+ cmd == SIOCBONDENSLAVE || -+ cmd == SIOCBONDRELEASE || -+ cmd == SIOCBONDSETHWADDR || -+ cmd == SIOCBONDSLAVEINFOQUERY || -+ cmd == SIOCBONDINFOQUERY || -+ cmd == SIOCBONDCHANGEACTIVE || -+ cmd == SIOCGMIIPHY || -+ cmd == SIOCGMIIREG || -+ cmd == SIOCSMIIREG || -+ cmd == SIOCBRADDIF || -+ cmd == SIOCBRDELIF || -+ cmd == SIOCWANDEV) { -+ err = -EOPNOTSUPP; -+ if (dev->do_ioctl) { -+ if (netif_device_present(dev)) -+ err = dev->do_ioctl(dev, ifr, -+ cmd); -+ else -+ err = -ENODEV; -+ } -+ } else -+ err = -EINVAL; -+ -+ } -+ return err; -+} -+ -+/* -+ * This function handles all "interface"-type I/O control requests. The actual -+ * 'doing' part of this is dev_ifsioc above. -+ */ -+ -+/** -+ * dev_ioctl - network device ioctl -+ * @cmd: command to issue -+ * @arg: pointer to a struct ifreq in user space -+ * -+ * Issue ioctl functions to devices. This is normally called by the -+ * user space syscall interfaces but can sometimes be useful for -+ * other purposes. The return value is the return from the syscall if -+ * positive or a negative errno code on error. -+ */ -+ -+int dev_ioctl(unsigned int cmd, void __user *arg) -+{ -+ struct ifreq ifr; -+ int ret; -+ char *colon; -+ -+ /* One special case: SIOCGIFCONF takes ifconf argument -+ and requires shared lock, because it sleeps writing -+ to user space. -+ */ -+ -+ if (cmd == SIOCGIFCONF) { -+ rtnl_shlock(); -+ ret = dev_ifconf((char __user *) arg); -+ rtnl_shunlock(); -+ return ret; -+ } -+ if (cmd == SIOCGIFNAME) -+ return dev_ifname((struct ifreq __user *)arg); -+ -+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) -+ return -EFAULT; -+ -+ ifr.ifr_name[IFNAMSIZ-1] = 0; -+ -+ colon = strchr(ifr.ifr_name, ':'); -+ if (colon) -+ *colon = 0; -+ -+ /* -+ * See which interface the caller is talking about. -+ */ -+ -+ switch (cmd) { -+ /* -+ * These ioctl calls: -+ * - can be done by all. -+ * - atomic and do not require locking. -+ * - return a value -+ */ -+ case SIOCGIFFLAGS: -+ case SIOCGIFMETRIC: -+ case SIOCGIFMTU: -+ case SIOCGIFHWADDR: -+ case SIOCGIFSLAVE: -+ case SIOCGIFMAP: -+ case SIOCGIFINDEX: -+ case SIOCGIFTXQLEN: -+ dev_load(ifr.ifr_name); -+ read_lock(&dev_base_lock); -+ ret = dev_ifsioc(&ifr, cmd); -+ read_unlock(&dev_base_lock); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ case SIOCETHTOOL: -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ethtool(&ifr); -+ rtnl_unlock(); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ /* -+ * These ioctl calls: -+ * - require superuser power. -+ * - require strict serialization. -+ * - return a value -+ */ -+ case SIOCGMIIPHY: -+ case SIOCGMIIREG: -+ case SIOCSIFNAME: -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ if (!ret) { -+ if (colon) -+ *colon = ':'; -+ if (copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ } -+ return ret; -+ -+ /* -+ * These ioctl calls: -+ * - require superuser power. -+ * - require strict serialization. -+ * - do not return a value -+ */ -+ case SIOCSIFFLAGS: -+ case SIOCSIFMETRIC: -+ case SIOCSIFMTU: -+ case SIOCSIFMAP: -+ case SIOCSIFHWADDR: -+ case SIOCSIFSLAVE: -+ case SIOCADDMULTI: -+ case SIOCDELMULTI: -+ case SIOCSIFHWBROADCAST: -+ case SIOCSIFTXQLEN: -+ case SIOCSMIIREG: -+ case SIOCBONDENSLAVE: -+ case SIOCBONDRELEASE: -+ case SIOCBONDSETHWADDR: -+ case SIOCBONDSLAVEINFOQUERY: -+ case SIOCBONDINFOQUERY: -+ case SIOCBONDCHANGEACTIVE: -+ case SIOCBRADDIF: -+ case SIOCBRDELIF: -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ return ret; -+ -+ case SIOCGIFMEM: -+ /* Get the per device memory space. We can add this but -+ * currently do not support it */ -+ case SIOCSIFMEM: -+ /* Set the per device memory buffer space. -+ * Not applicable in our case */ -+ case SIOCSIFLINK: -+ return -EINVAL; -+ -+ /* -+ * Unknown or private ioctl. -+ */ -+ default: -+ if (cmd == SIOCWANDEV || -+ (cmd >= SIOCDEVPRIVATE && -+ cmd <= SIOCDEVPRIVATE + 15)) { -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ ret = dev_ifsioc(&ifr, cmd); -+ rtnl_unlock(); -+ if (!ret && copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ return ret; -+ } -+#ifdef WIRELESS_EXT -+ /* Take care of Wireless Extensions */ -+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { -+ /* If command is `set a parameter', or -+ * `get the encoding parameters', check if -+ * the user has the right to do it */ -+ if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) { -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ } -+ dev_load(ifr.ifr_name); -+ rtnl_lock(); -+ /* Follow me in net/core/wireless.c */ -+ ret = wireless_process_ioctl(&ifr, cmd); -+ rtnl_unlock(); -+ if (IW_IS_GET(cmd) && -+ copy_to_user(arg, &ifr, -+ sizeof(struct ifreq))) -+ ret = -EFAULT; -+ return ret; -+ } -+#endif /* WIRELESS_EXT */ -+ return -EINVAL; -+ } -+} -+ -+ -+/** -+ * dev_new_index - allocate an ifindex -+ * -+ * Returns a suitable unique value for a new device interface -+ * number. The caller must hold the rtnl semaphore or the -+ * dev_base_lock to be sure it remains unique. -+ */ -+static int dev_new_index(void) -+{ -+ static int ifindex; -+ for (;;) { -+ if (++ifindex <= 0) -+ ifindex = 1; -+ if (!__dev_get_by_index(ifindex)) -+ return ifindex; -+ } -+} -+ -+static int dev_boot_phase = 1; -+ -+/* Delayed registration/unregisteration */ -+static DEFINE_SPINLOCK(net_todo_list_lock); -+static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); -+ -+static inline void net_set_todo(struct net_device *dev) -+{ -+ spin_lock(&net_todo_list_lock); -+ list_add_tail(&dev->todo_list, &net_todo_list); -+ spin_unlock(&net_todo_list_lock); -+} -+ -+/** -+ * register_netdevice - register a network device -+ * @dev: device to register -+ * -+ * Take a completed network device structure and add it to the kernel -+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier -+ * chain. 0 is returned on success. A negative errno code is returned -+ * on a failure to set up the device, or if the name is a duplicate. -+ * -+ * Callers must hold the rtnl semaphore. You may want -+ * register_netdev() instead of this. -+ * -+ * BUGS: -+ * The locking appears insufficient to guarantee two parallel registers -+ * will not get the same name. -+ */ -+ -+int register_netdevice(struct net_device *dev) -+{ -+ struct hlist_head *head; -+ struct hlist_node *p; -+ int ret; -+ -+ BUG_ON(dev_boot_phase); -+ ASSERT_RTNL(); -+ -+ /* When net_device's are persistent, this will be fatal. */ -+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); -+ -+ spin_lock_init(&dev->queue_lock); -+ spin_lock_init(&dev->xmit_lock); -+ dev->xmit_lock_owner = -1; -+#ifdef CONFIG_NET_CLS_ACT -+ spin_lock_init(&dev->ingress_lock); -+#endif -+ -+ ret = alloc_divert_blk(dev); -+ if (ret) -+ goto out; -+ -+ dev->iflink = -1; -+ -+ /* Init, if this function is available */ -+ if (dev->init) { -+ ret = dev->init(dev); -+ if (ret) { -+ if (ret > 0) -+ ret = -EIO; -+ goto out_err; -+ } -+ } -+ -+ if (!dev_valid_name(dev->name)) { -+ ret = -EINVAL; -+ goto out_err; -+ } -+ -+ dev->ifindex = dev_new_index(); -+ if (dev->iflink == -1) -+ dev->iflink = dev->ifindex; -+ -+ /* Check for existence of name */ -+ head = dev_name_hash(dev->name); -+ hlist_for_each(p, head) { -+ struct net_device *d -+ = hlist_entry(p, struct net_device, name_hlist); -+ if (!strncmp(d->name, dev->name, IFNAMSIZ)) { -+ ret = -EEXIST; -+ goto out_err; -+ } -+ } -+ -+ /* Fix illegal SG+CSUM combinations. */ -+ if ((dev->features & NETIF_F_SG) && -+ !(dev->features & (NETIF_F_IP_CSUM | -+ NETIF_F_NO_CSUM | -+ NETIF_F_HW_CSUM))) { -+ printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_SG; -+ } -+ -+ /* TSO requires that SG is present as well. */ -+ if ((dev->features & NETIF_F_TSO) && -+ !(dev->features & NETIF_F_SG)) { -+ printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", -+ dev->name); -+ dev->features &= ~NETIF_F_TSO; -+ } -+ -+ /* -+ * nil rebuild_header routine, -+ * that should be never called and used as just bug trap. -+ */ -+ -+ if (!dev->rebuild_header) -+ dev->rebuild_header = default_rebuild_header; -+ -+ /* -+ * Default initial state at registry is that the -+ * device is present. -+ */ -+ -+ set_bit(__LINK_STATE_PRESENT, &dev->state); -+ -+ dev->next = NULL; -+ dev_init_scheduler(dev); -+ write_lock_bh(&dev_base_lock); -+ *dev_tail = dev; -+ dev_tail = &dev->next; -+ hlist_add_head(&dev->name_hlist, head); -+ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); -+ dev_hold(dev); -+ dev->reg_state = NETREG_REGISTERING; -+ write_unlock_bh(&dev_base_lock); -+ -+ /* Notify protocols, that a new device appeared. */ -+ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); -+ -+ /* Finish registration after unlock */ -+ net_set_todo(dev); -+ ret = 0; -+ -+out: -+ return ret; -+out_err: -+ free_divert_blk(dev); -+ goto out; -+} -+ -+/** -+ * register_netdev - register a network device -+ * @dev: device to register -+ * -+ * Take a completed network device structure and add it to the kernel -+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier -+ * chain. 0 is returned on success. A negative errno code is returned -+ * on a failure to set up the device, or if the name is a duplicate. -+ * -+ * This is a wrapper around register_netdev that takes the rtnl semaphore -+ * and expands the device name if you passed a format string to -+ * alloc_netdev. -+ */ -+int register_netdev(struct net_device *dev) -+{ -+ int err; -+ -+ rtnl_lock(); -+ -+ /* -+ * If the name is a format string the caller wants us to do a -+ * name allocation. -+ */ -+ if (strchr(dev->name, '%')) { -+ err = dev_alloc_name(dev, dev->name); -+ if (err < 0) -+ goto out; -+ } -+ -+ /* -+ * Back compatibility hook. Kill this one in 2.5 -+ */ -+ if (dev->name[0] == 0 || dev->name[0] == ' ') { -+ err = dev_alloc_name(dev, "eth%d"); -+ if (err < 0) -+ goto out; -+ } -+ -+ err = register_netdevice(dev); -+out: -+ rtnl_unlock(); -+ return err; -+} -+EXPORT_SYMBOL(register_netdev); -+ -+/* -+ * netdev_wait_allrefs - wait until all references are gone. -+ * -+ * This is called when unregistering network devices. -+ * -+ * Any protocol or device that holds a reference should register -+ * for netdevice notification, and cleanup and put back the -+ * reference if they receive an UNREGISTER event. -+ * We can get stuck here if buggy protocols don't correctly -+ * call dev_put. -+ */ -+static void netdev_wait_allrefs(struct net_device *dev) -+{ -+ unsigned long rebroadcast_time, warning_time; -+ -+ rebroadcast_time = warning_time = jiffies; -+ while (atomic_read(&dev->refcnt) != 0) { -+ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { -+ rtnl_shlock(); -+ -+ /* Rebroadcast unregister notification */ -+ notifier_call_chain(&netdev_chain, -+ NETDEV_UNREGISTER, dev); -+ -+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, -+ &dev->state)) { -+ /* We must not have linkwatch events -+ * pending on unregister. If this -+ * happens, we simply run the queue -+ * unscheduled, resulting in a noop -+ * for this device. -+ */ -+ linkwatch_run_queue(); -+ } -+ -+ rtnl_shunlock(); -+ -+ rebroadcast_time = jiffies; -+ } -+ -+ msleep(250); -+ -+ if (time_after(jiffies, warning_time + 10 * HZ)) { -+ printk(KERN_EMERG "unregister_netdevice: " -+ "waiting for %s to become free. Usage " -+ "count = %d\n", -+ dev->name, atomic_read(&dev->refcnt)); -+ warning_time = jiffies; -+ } -+ } -+} -+ -+/* The sequence is: -+ * -+ * rtnl_lock(); -+ * ... -+ * register_netdevice(x1); -+ * register_netdevice(x2); -+ * ... -+ * unregister_netdevice(y1); -+ * unregister_netdevice(y2); -+ * ... -+ * rtnl_unlock(); -+ * free_netdev(y1); -+ * free_netdev(y2); -+ * -+ * We are invoked by rtnl_unlock() after it drops the semaphore. -+ * This allows us to deal with problems: -+ * 1) We can create/delete sysfs objects which invoke hotplug -+ * without deadlocking with linkwatch via keventd. -+ * 2) Since we run with the RTNL semaphore not held, we can sleep -+ * safely in order to wait for the netdev refcnt to drop to zero. -+ */ -+static DECLARE_MUTEX(net_todo_run_mutex); -+void netdev_run_todo(void) -+{ -+ struct list_head list = LIST_HEAD_INIT(list); -+ int err; -+ -+ -+ /* Need to guard against multiple cpu's getting out of order. */ -+ down(&net_todo_run_mutex); -+ -+ /* Not safe to do outside the semaphore. We must not return -+ * until all unregister events invoked by the local processor -+ * have been completed (either by this todo run, or one on -+ * another cpu). -+ */ -+ if (list_empty(&net_todo_list)) -+ goto out; -+ -+ /* Snapshot list, allow later requests */ -+ spin_lock(&net_todo_list_lock); -+ list_splice_init(&net_todo_list, &list); -+ spin_unlock(&net_todo_list_lock); -+ -+ while (!list_empty(&list)) { -+ struct net_device *dev -+ = list_entry(list.next, struct net_device, todo_list); -+ list_del(&dev->todo_list); -+ -+ switch(dev->reg_state) { -+ case NETREG_REGISTERING: -+ err = netdev_register_sysfs(dev); -+ if (err) -+ printk(KERN_ERR "%s: failed sysfs registration (%d)\n", -+ dev->name, err); -+ dev->reg_state = NETREG_REGISTERED; -+ break; -+ -+ case NETREG_UNREGISTERING: -+ netdev_unregister_sysfs(dev); -+ dev->reg_state = NETREG_UNREGISTERED; -+ -+ netdev_wait_allrefs(dev); -+ -+ /* paranoia */ -+ BUG_ON(atomic_read(&dev->refcnt)); -+ BUG_TRAP(!dev->ip_ptr); -+ BUG_TRAP(!dev->ip6_ptr); -+ BUG_TRAP(!dev->dn_ptr); -+ -+ -+ /* It must be the very last action, -+ * after this 'dev' may point to freed up memory. -+ */ -+ if (dev->destructor) -+ dev->destructor(dev); -+ break; -+ -+ default: -+ printk(KERN_ERR "network todo '%s' but state %d\n", -+ dev->name, dev->reg_state); -+ break; -+ } -+ } -+ -+out: -+ up(&net_todo_run_mutex); -+} -+ -+/** -+ * alloc_netdev - allocate network device -+ * @sizeof_priv: size of private data to allocate space for -+ * @name: device name format string -+ * @setup: callback to initialize device -+ * -+ * Allocates a struct net_device with private data area for driver use -+ * and performs basic initialization. -+ */ -+struct net_device *alloc_netdev(int sizeof_priv, const char *name, -+ void (*setup)(struct net_device *)) -+{ -+ void *p; -+ struct net_device *dev; -+ int alloc_size; -+ -+ /* ensure 32-byte alignment of both the device and private area */ -+ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; -+ alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; -+ -+ p = kmalloc(alloc_size, GFP_KERNEL); -+ if (!p) { -+ printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); -+ return NULL; -+ } -+ memset(p, 0, alloc_size); -+ -+ dev = (struct net_device *) -+ (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); -+ dev->padded = (char *)dev - (char *)p; -+ -+ if (sizeof_priv) -+ dev->priv = netdev_priv(dev); -+ -+ setup(dev); -+ strcpy(dev->name, name); -+ return dev; -+} -+EXPORT_SYMBOL(alloc_netdev); -+ -+/** -+ * free_netdev - free network device -+ * @dev: device -+ * -+ * This function does the last stage of destroying an allocated device -+ * interface. The reference to the device object is released. -+ * If this is the last reference then it will be freed. -+ */ -+void free_netdev(struct net_device *dev) -+{ -+#ifdef CONFIG_SYSFS -+ /* Compatiablity with error handling in drivers */ -+ if (dev->reg_state == NETREG_UNINITIALIZED) { -+ kfree((char *)dev - dev->padded); -+ return; -+ } -+ -+ BUG_ON(dev->reg_state != NETREG_UNREGISTERED); -+ dev->reg_state = NETREG_RELEASED; -+ -+ /* will free via class release */ -+ class_device_put(&dev->class_dev); -+#else -+ kfree((char *)dev - dev->padded); -+#endif -+} -+ -+/* Synchronize with packet receive processing. */ -+void synchronize_net(void) -+{ -+ might_sleep(); -+ synchronize_rcu(); -+} -+ -+/** -+ * unregister_netdevice - remove device from the kernel -+ * @dev: device -+ * -+ * This function shuts down a device interface and removes it -+ * from the kernel tables. On success 0 is returned, on a failure -+ * a negative errno code is returned. -+ * -+ * Callers must hold the rtnl semaphore. You may want -+ * unregister_netdev() instead of this. -+ */ -+ -+int unregister_netdevice(struct net_device *dev) -+{ -+ struct net_device *d, **dp; -+ -+ BUG_ON(dev_boot_phase); -+ ASSERT_RTNL(); -+ -+ /* Some devices call without registering for initialization unwind. */ -+ if (dev->reg_state == NETREG_UNINITIALIZED) { -+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " -+ "was registered\n", dev->name, dev); -+ return -ENODEV; -+ } -+ -+ BUG_ON(dev->reg_state != NETREG_REGISTERED); -+ -+ /* If device is running, close it first. */ -+ if (dev->flags & IFF_UP) -+ dev_close(dev); -+ -+ /* And unlink it from device chain. */ -+ for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { -+ if (d == dev) { -+ write_lock_bh(&dev_base_lock); -+ hlist_del(&dev->name_hlist); -+ hlist_del(&dev->index_hlist); -+ if (dev_tail == &dev->next) -+ dev_tail = dp; -+ *dp = d->next; -+ write_unlock_bh(&dev_base_lock); -+ break; -+ } -+ } -+ if (!d) { -+ printk(KERN_ERR "unregister net_device: '%s' not found\n", -+ dev->name); -+ return -ENODEV; -+ } -+ -+ dev->reg_state = NETREG_UNREGISTERING; -+ -+ synchronize_net(); -+ -+ /* Shutdown queueing discipline. */ -+ dev_shutdown(dev); -+ -+ -+ /* Notify protocols, that we are about to destroy -+ this device. They should clean all the things. -+ */ -+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); -+ -+ /* -+ * Flush the multicast chain -+ */ -+ dev_mc_discard(dev); -+ -+ if (dev->uninit) -+ dev->uninit(dev); -+ -+ /* Notifier chain MUST detach us from master device. */ -+ BUG_TRAP(!dev->master); -+ -+ free_divert_blk(dev); -+ -+ /* Finish processing unregister after unlock */ -+ net_set_todo(dev); -+ -+ synchronize_net(); -+ -+ dev_put(dev); -+ return 0; -+} -+ -+/** -+ * unregister_netdev - remove device from the kernel -+ * @dev: device -+ * -+ * This function shuts down a device interface and removes it -+ * from the kernel tables. On success 0 is returned, on a failure -+ * a negative errno code is returned. -+ * -+ * This is just a wrapper for unregister_netdevice that takes -+ * the rtnl semaphore. In general you want to use this and not -+ * unregister_netdevice. -+ */ -+void unregister_netdev(struct net_device *dev) -+{ -+ rtnl_lock(); -+ unregister_netdevice(dev); -+ rtnl_unlock(); -+} -+ -+EXPORT_SYMBOL(unregister_netdev); -+ -+#ifdef CONFIG_HOTPLUG_CPU -+static int dev_cpu_callback(struct notifier_block *nfb, -+ unsigned long action, -+ void *ocpu) -+{ -+ struct sk_buff **list_skb; -+ struct net_device **list_net; -+ struct sk_buff *skb; -+ unsigned int cpu, oldcpu = (unsigned long)ocpu; -+ struct softnet_data *sd, *oldsd; -+ -+ if (action != CPU_DEAD) -+ return NOTIFY_OK; -+ -+ local_irq_disable(); -+ cpu = smp_processor_id(); -+ sd = &per_cpu(softnet_data, cpu); -+ oldsd = &per_cpu(softnet_data, oldcpu); -+ -+ /* Find end of our completion_queue. */ -+ list_skb = &sd->completion_queue; -+ while (*list_skb) -+ list_skb = &(*list_skb)->next; -+ /* Append completion queue from offline CPU. */ -+ *list_skb = oldsd->completion_queue; -+ oldsd->completion_queue = NULL; -+ -+ /* Find end of our output_queue. */ -+ list_net = &sd->output_queue; -+ while (*list_net) -+ list_net = &(*list_net)->next_sched; -+ /* Append output queue from offline CPU. */ -+ *list_net = oldsd->output_queue; -+ oldsd->output_queue = NULL; -+ -+ raise_softirq_irqoff(NET_TX_SOFTIRQ); -+ local_irq_enable(); -+ -+ /* Process offline CPU's input_pkt_queue */ -+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) -+ netif_rx(skb); -+ -+ return NOTIFY_OK; -+} -+#endif /* CONFIG_HOTPLUG_CPU */ -+ -+ -+/* -+ * Initialize the DEV module. At boot time this walks the device list and -+ * unhooks any devices that fail to initialise (normally hardware not -+ * present) and leaves us with a valid list of present and active devices. -+ * -+ */ -+ -+/* -+ * This is called single threaded during boot, so no need -+ * to take the rtnl semaphore. -+ */ -+static int __init net_dev_init(void) -+{ -+ int i, rc = -ENOMEM; -+ -+ BUG_ON(!dev_boot_phase); -+ -+ net_random_init(); -+ -+ if (dev_proc_init()) -+ goto out; -+ -+ if (netdev_sysfs_init()) -+ goto out; -+ -+ INIT_LIST_HEAD(&ptype_all); -+ for (i = 0; i < 16; i++) -+ INIT_LIST_HEAD(&ptype_base[i]); -+ -+ for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) -+ INIT_HLIST_HEAD(&dev_name_head[i]); -+ -+ for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) -+ INIT_HLIST_HEAD(&dev_index_head[i]); -+ -+ /* -+ * Initialise the packet receive queues. -+ */ -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ struct softnet_data *queue; -+ -+ queue = &per_cpu(softnet_data, i); -+ skb_queue_head_init(&queue->input_pkt_queue); -+ queue->throttle = 0; -+ queue->cng_level = 0; -+ queue->avg_blog = 10; /* arbitrary non-zero */ -+ queue->completion_queue = NULL; -+ INIT_LIST_HEAD(&queue->poll_list); -+ set_bit(__LINK_STATE_START, &queue->backlog_dev.state); -+ queue->backlog_dev.weight = weight_p; -+ queue->backlog_dev.poll = process_backlog; -+ atomic_set(&queue->backlog_dev.refcnt, 1); -+ } -+ -+#ifdef OFFLINE_SAMPLE -+ samp_timer.expires = jiffies + (10 * HZ); -+ add_timer(&samp_timer); -+#endif -+ -+ dev_boot_phase = 0; -+ -+ open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); -+ open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); -+ -+ hotcpu_notifier(dev_cpu_callback, 0); -+ dst_init(); -+ dev_mcast_init(); -+ rc = 0; -+out: -+ return rc; -+} -+ -+subsys_initcall(net_dev_init); -+ -+EXPORT_SYMBOL(__dev_get_by_index); -+EXPORT_SYMBOL(__dev_get_by_name); -+EXPORT_SYMBOL(__dev_remove_pack); -+EXPORT_SYMBOL(__skb_linearize); -+EXPORT_SYMBOL(dev_add_pack); -+EXPORT_SYMBOL(dev_alloc_name); -+EXPORT_SYMBOL(dev_close); -+EXPORT_SYMBOL(dev_get_by_flags); -+EXPORT_SYMBOL(dev_get_by_index); -+EXPORT_SYMBOL(dev_get_by_name); -+EXPORT_SYMBOL(dev_ioctl); -+EXPORT_SYMBOL(dev_open); -+EXPORT_SYMBOL(dev_queue_xmit); -+EXPORT_SYMBOL(dev_remove_pack); -+EXPORT_SYMBOL(dev_set_allmulti); -+EXPORT_SYMBOL(dev_set_promiscuity); -+EXPORT_SYMBOL(dev_change_flags); -+EXPORT_SYMBOL(dev_set_mtu); -+EXPORT_SYMBOL(dev_set_mac_address); -+EXPORT_SYMBOL(free_netdev); -+EXPORT_SYMBOL(netdev_boot_setup_check); -+EXPORT_SYMBOL(netdev_set_master); -+EXPORT_SYMBOL(netdev_state_change); -+EXPORT_SYMBOL(netif_receive_skb); -+EXPORT_SYMBOL(netif_rx); -+EXPORT_SYMBOL(register_gifconf); -+EXPORT_SYMBOL(register_netdevice); -+EXPORT_SYMBOL(register_netdevice_notifier); -+EXPORT_SYMBOL(skb_checksum_help); -+EXPORT_SYMBOL(synchronize_net); -+EXPORT_SYMBOL(unregister_netdevice); -+EXPORT_SYMBOL(unregister_netdevice_notifier); -+EXPORT_SYMBOL(net_enable_timestamp); -+EXPORT_SYMBOL(net_disable_timestamp); -+EXPORT_SYMBOL(dev_get_flags); -+ -+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -+EXPORT_SYMBOL(br_handle_frame_hook); -+EXPORT_SYMBOL(br_fdb_get_hook); -+EXPORT_SYMBOL(br_fdb_put_hook); -+#endif -+ -+#ifdef CONFIG_KMOD -+EXPORT_SYMBOL(dev_load); -+#endif -+ -+EXPORT_PER_CPU_SYMBOL(softnet_data); -diff --unified --recursive --new-file linux-2.6.12.5/net/ring/Kconfig linux-2.6.12.5-1-686-smp-ring3/net/ring/Kconfig ---- linux-2.6.12.5/net/ring/Kconfig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/net/ring/Kconfig 2005-10-22 23:50:45.539482000 +0200 -@@ -0,0 +1,14 @@ -+config RING -+ tristate "PF_RING sockets (EXPERIMENTAL)" -+ depends on EXPERIMENTAL -+ ---help--- -+ PF_RING socket family, optimized for packet capture. -+ If a PF_RING socket is bound to an adapter (via the bind() system -+ call), such adapter will be used in read-only mode until the socket -+ is destroyed. Whenever an incoming packet is received from the adapter -+ it will not passed to upper layers, but instead it is copied to a ring -+ buffer, which in turn is exported to user space applications via mmap. -+ Please refer to http://luca.ntop.org/Ring.pdf for more. -+ -+ Say N unless you know what you are doing. -+ -diff --unified --recursive --new-file linux-2.6.12.5/net/ring/Makefile linux-2.6.12.5-1-686-smp-ring3/net/ring/Makefile ---- linux-2.6.12.5/net/ring/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/net/ring/Makefile 2005-10-22 23:50:45.051451500 +0200 -@@ -0,0 +1,7 @@ -+# -+# Makefile for the ring driver. -+# -+ -+obj-m += ring.o -+ -+ring-objs := ring_packet.o -diff --unified --recursive --new-file linux-2.6.12.5/net/ring/ring_packet.c linux-2.6.12.5-1-686-smp-ring3/net/ring/ring_packet.c ---- linux-2.6.12.5/net/ring/ring_packet.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.12.5-1-686-smp-ring3/net/ring/ring_packet.c 2005-10-22 23:50:45.159458250 +0200 -@@ -0,0 +1,1592 @@ -+/* -+ * -+ * (C) 2004-05 - Luca Deri <deri@ntop.org> -+ * -+ * This code includes patches courtesy of -+ * - Jeff Randall <jrandall@nexvu.com> -+ * - Helmut Manck <helmut.manck@secunet.com> -+ * - Brad Doctor <bdoctor@ps-ax.com> -+ * -+ */ -+ -+/* FIX: add an entry inside the /proc filesystem */ -+ -+#include <linux/version.h> -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/kernel.h> -+#include <linux/socket.h> -+#include <linux/skbuff.h> -+#include <linux/rtnetlink.h> -+#include <linux/in.h> -+#include <linux/in6.h> -+#include <linux/init.h> -+#include <linux/filter.h> -+#include <linux/ring.h> -+#include <linux/ip.h> -+#include <linux/tcp.h> -+#include <linux/udp.h> -+#include <linux/list.h> -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+#include <net/xfrm.h> -+#else -+#include <linux/poll.h> -+#endif -+#include <net/sock.h> -+#include <asm/io.h> /* needed for virt_to_phys() */ -+ -+/* #define RING_DEBUG */ -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)) -+static inline int remap_page_range(struct vm_area_struct *vma, -+ unsigned long uvaddr, -+ unsigned long paddr, -+ unsigned long size, -+ pgprot_t prot) { -+ return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT, -+ size, prot)); -+} -+#endif -+ -+/* ************************************************* */ -+ -+#define CLUSTER_LEN 8 -+ -+struct ring_cluster { -+ u_short cluster_id; /* 0 = no cluster */ -+ u_short num_cluster_elements; -+ enum cluster_type hashing_mode; -+ u_short hashing_id; -+ struct sock *sk[CLUSTER_LEN]; -+ struct ring_cluster *next; /* NULL = last element of the cluster */ -+}; -+ -+/* ************************************************* */ -+ -+struct ring_element { -+ struct list_head list; -+ struct sock *sk; -+}; -+ -+/* ************************************************* */ -+ -+struct ring_opt { -+ struct net_device *ring_netdev; -+ -+ /* Cluster */ -+ u_short cluster_id; /* 0 = no cluster */ -+ -+ /* Reflector */ -+ struct net_device *reflector_dev; -+ -+ /* Packet buffers */ -+ unsigned long order; -+ -+ /* Ring Slots */ -+ unsigned long ring_memory; -+ FlowSlotInfo *slots_info; /* Basically it points to ring_memory */ -+ char *ring_slots; /* Basically it points to ring_memory -+ +sizeof(FlowSlotInfo) */ -+ -+ /* Packet Sampling */ -+ u_int pktToSample, sample_rate; -+ -+ /* BPF Filter */ -+ struct sk_filter *bpfFilter; -+ -+ /* Locks */ -+ atomic_t num_ring_slots_waiters; -+ wait_queue_head_t ring_slots_waitqueue; -+ rwlock_t ring_index_lock; -+ -+ /* Indexes (Internal) */ -+ u_int insert_page_id, insert_slot_id; -+}; -+ -+/* ************************************************* */ -+ -+/* List of all ring sockets. */ -+static struct list_head ring_table; -+ -+/* List of all clusters */ -+static struct ring_cluster *ring_cluster_list; -+ -+static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED; -+ -+/* ********************************** */ -+ -+/* Forward */ -+static struct proto_ops ring_ops; -+ -+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) -+static struct proto ring_proto; -+#endif -+ -+static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet, -+ u_char real_skb); -+static int buffer_ring_handler(struct net_device *dev, char *data, int len); -+static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr); -+ -+/* Extern */ -+ -+/* ********************************** */ -+ -+/* Defaults */ -+static u_int bucket_len = 128, num_slots = 4096, sample_rate = 1, -+ transparent_mode = 0, enable_tx_capture = 0; -+ -+MODULE_PARM(bucket_len, "i"); -+MODULE_PARM_DESC(bucket_len, "Number of ring buckets"); -+MODULE_PARM(num_slots, "i"); -+MODULE_PARM_DESC(num_slots, "Number of ring slots"); -+MODULE_PARM(sample_rate, "i"); -+MODULE_PARM_DESC(sample_rate, "Ring packet sample rate"); -+MODULE_PARM(transparent_mode, "i"); -+MODULE_PARM_DESC(transparent_mode, -+ "Set to 1 to set transparent mode " -+ "(slower but backwards compatible)"); -+MODULE_PARM(enable_tx_capture, "i"); -+MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets"); -+ -+/* ********************************** */ -+ -+#define MIN_QUEUED_PKTS 64 -+#define MAX_QUEUE_LOOPS 64 -+ -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk) -+#define ring_sk(__sk) ((__sk)->sk_protinfo) -+#else -+#define ring_sk_datatype(a) (a) -+#define ring_sk(__sk) ((__sk)->protinfo.pf_ring) -+#endif -+ -+/* -+ int dev_queue_xmit(struct sk_buff *skb) -+ skb->dev; -+ struct net_device *dev_get_by_name(const char *name) -+*/ -+ -+/* ********************************** */ -+ -+static void ring_sock_destruct(struct sock *sk) { -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ skb_queue_purge(&sk->sk_receive_queue); -+ -+ if (!sock_flag(sk, SOCK_DEAD)) { -+#if defined(RING_DEBUG) -+ printk("Attempt to release alive ring socket: %p\n", sk); -+#endif -+ return; -+ } -+ -+ BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); -+ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); -+#else -+ -+ BUG_TRAP(atomic_read(&sk->rmem_alloc)==0); -+ BUG_TRAP(atomic_read(&sk->wmem_alloc)==0); -+ -+ if (!sk->dead) { -+#if defined(RING_DEBUG) -+ printk("Attempt to release alive ring socket: %p\n", sk); -+#endif -+ return; -+ } -+#endif -+ -+ kfree(ring_sk(sk)); -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+/* ********************************** */ -+/* -+ * ring_insert() -+ * -+ * store the sk in a new element and add it -+ * to the head of the list. -+ */ -+static inline void ring_insert(struct sock *sk) { -+ struct ring_element *next; -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_insert()\n"); -+#endif -+ -+ next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC); -+ if(next != NULL) { -+ next->sk = sk; -+ write_lock_irq(&ring_mgmt_lock); -+ list_add(&next->list, &ring_table); -+ write_unlock_irq(&ring_mgmt_lock); -+ } else { -+ if (net_ratelimit()) -+ printk("RING: could not kmalloc slot!!\n"); -+ } -+} -+ -+/* ********************************** */ -+/* -+ * ring_remove() -+ * -+ * For each of the elements in the list: -+ * - check if this is the element we want to delete -+ * - if it is, remove it from the list, and free it. -+ * -+ * stop when we find the one we're looking for (break), -+ * or when we reach the end of the list. -+ */ -+static inline void ring_remove(struct sock *sk) { -+ struct list_head *ptr; -+ struct ring_element *entry; -+ -+ -+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { -+ entry = list_entry(ptr, struct ring_element, list); -+ -+ if(entry->sk == sk) { -+ write_lock_irq(&ring_mgmt_lock); -+ list_del(ptr); -+ kfree(ptr); -+ write_unlock_irq(&ring_mgmt_lock); -+ break; -+ } -+ } -+ -+} -+ -+/* ********************************** */ -+ -+static u_int32_t num_queued_pkts(struct ring_opt *pfr) { -+ -+ if(pfr->ring_slots != NULL) { -+ -+ u_int32_t tot_insert = pfr->slots_info->insert_idx, -+#if defined(RING_DEBUG) -+ tot_read = pfr->slots_info->tot_read, tot_pkts; -+#else -+ tot_read = pfr->slots_info->tot_read; -+#endif -+ -+ if(tot_insert >= tot_read) { -+#if defined(RING_DEBUG) -+ tot_pkts = tot_insert-tot_read; -+#endif -+ return(tot_insert-tot_read); -+ } else { -+#if defined(RING_DEBUG) -+ tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read; -+#endif -+ return(((u_int32_t)-1)+tot_insert-tot_read); -+ } -+ -+#if defined(RING_DEBUG) -+ printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n", -+ tot_pkts, tot_insert, tot_read); -+#endif -+ -+ } else -+ return(0); -+} -+ -+/* ********************************** */ -+ -+static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) { -+#if defined(RING_DEBUG) -+ printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx); -+#endif -+ -+ if(pfr->ring_slots != NULL) { -+ FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx -+ *pfr->slots_info->slot_len]); -+ return(slot); -+ } else -+ return(NULL); -+} -+ -+/* ********************************** */ -+ -+static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) { -+#if defined(RING_DEBUG) -+ printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx); -+#endif -+ -+ if(pfr->ring_slots != NULL) -+ return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx* -+ pfr->slots_info->slot_len])); -+ else -+ return(NULL); -+} -+ -+/* ********************************** */ -+ -+static void add_skb_to_ring(struct sk_buff *skb, -+ struct ring_opt *pfr, -+ u_char recv_packet, -+ u_char real_skb /* 1=skb 0=faked skb */) { -+ FlowSlot *theSlot; -+ int idx, displ; -+ -+ if(recv_packet) { -+ /* Hack for identifying a packet received by the e1000 */ -+ if(real_skb) { -+ displ = SKB_DISPLACEMENT; -+ } else -+ displ = 0; /* Received by the e1000 wrapper */ -+ } else -+ displ = 0; -+ -+ write_lock(&pfr->ring_index_lock); -+ pfr->slots_info->tot_pkts++; -+ write_unlock(&pfr->ring_index_lock); -+ -+ /* BPF Filtering (from af_packet.c) */ -+ if(pfr->bpfFilter != NULL) { -+ unsigned res = 1, len; -+ -+ len = skb->len-skb->data_len; -+ -+ write_lock(&pfr->ring_index_lock); -+ skb->data -= displ; -+ res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len); -+ skb->data += displ; -+ write_unlock(&pfr->ring_index_lock); -+ -+ if(res == 0) { -+ /* Filter failed */ -+ -+#if defined(RING_DEBUG) -+ printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]" -+ "[insertIdx=%d][pkt_type=%d][cloned=%d]\n", -+ (int)skb->len, pfr->slots_info->tot_pkts, -+ pfr->slots_info->insert_idx, -+ skb->pkt_type, skb->cloned); -+#endif -+ -+ return; -+ } -+ } -+ -+ /* ************************** */ -+ -+ if(pfr->sample_rate > 1) { -+ if(pfr->pktToSample == 0) { -+ write_lock(&pfr->ring_index_lock); -+ pfr->pktToSample = pfr->sample_rate; -+ write_unlock(&pfr->ring_index_lock); -+ } else { -+ write_lock(&pfr->ring_index_lock); -+ pfr->pktToSample--; -+ write_unlock(&pfr->ring_index_lock); -+ -+#if defined(RING_DEBUG) -+ printk("add_skb_to_ring(skb): sampled packet [len=%d]" -+ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n", -+ (int)skb->len, pfr->slots_info->tot_pkts, -+ pfr->slots_info->insert_idx, -+ skb->pkt_type, skb->cloned); -+#endif -+ return; -+ } -+ } -+ -+ /* ************************************* */ -+ -+ if((pfr->reflector_dev != NULL) -+ && (!netif_queue_stopped(pfr->reflector_dev))) { -+ int cpu = smp_processor_id(); -+ -+ /* increase reference counter so that this skb is not freed */ -+ atomic_inc(&skb->users); -+ -+ skb->data -= displ; -+ -+ /* send it */ -+ if (pfr->reflector_dev->xmit_lock_owner != cpu) { -+ spin_lock_bh(&pfr->reflector_dev->xmit_lock); -+ pfr->reflector_dev->xmit_lock_owner = cpu; -+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); -+ -+ if (pfr->reflector_dev->hard_start_xmit(skb, -+ pfr->reflector_dev) == 0) { -+ spin_lock_bh(&pfr->reflector_dev->xmit_lock); -+ pfr->reflector_dev->xmit_lock_owner = -1; -+ skb->data += displ; -+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); -+#if defined(RING_DEBUG) -+ printk("++ hard_start_xmit succeeded\n"); -+#endif -+ return; /* OK */ -+ } -+ -+ spin_lock_bh(&pfr->reflector_dev->xmit_lock); -+ pfr->reflector_dev->xmit_lock_owner = -1; -+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); -+ } -+ -+#if defined(RING_DEBUG) -+ printk("++ hard_start_xmit failed\n"); -+#endif -+ skb->data += displ; -+ return; /* -ENETDOWN */ -+ } -+ -+ /* ************************************* */ -+ -+#if defined(RING_DEBUG) -+ printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]" -+ "[pkt_type=%d][cloned=%d]\n", -+ (int)skb->len, pfr->slots_info->tot_pkts, -+ pfr->slots_info->insert_idx, -+ skb->pkt_type, skb->cloned); -+#endif -+ -+ idx = pfr->slots_info->insert_idx; -+ theSlot = get_insert_slot(pfr); -+ -+ if((theSlot != NULL) && (theSlot->slot_state == 0)) { -+ struct pcap_pkthdr *hdr; -+ unsigned int bucketSpace; -+ char *bucket; -+ -+ /* Update Index */ -+ idx++; -+ -+ if(idx == pfr->slots_info->tot_slots) { -+ write_lock(&pfr->ring_index_lock); -+ pfr->slots_info->insert_idx = 0; -+ write_unlock(&pfr->ring_index_lock); -+ } else { -+ write_lock(&pfr->ring_index_lock); -+ pfr->slots_info->insert_idx = idx; -+ write_unlock(&pfr->ring_index_lock); -+ } -+ -+ bucketSpace = pfr->slots_info->slot_len -+#ifdef RING_MAGIC -+ - sizeof(u_char) -+#endif -+ - sizeof(u_char) /* flowSlot.slot_state */ -+ - sizeof(struct pcap_pkthdr) -+ - 1 /* 10 */ /* safe boundary */; -+ -+ bucket = &theSlot->bucket; -+ hdr = (struct pcap_pkthdr*)bucket; -+ -+ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp); -+ -+ hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec; -+ hdr->caplen = skb->len+displ; -+ -+ if(hdr->caplen > bucketSpace) -+ hdr->caplen = bucketSpace; -+ -+ hdr->len = skb->len+displ; -+ memcpy(&bucket[sizeof(struct pcap_pkthdr)], -+ skb->data-displ, hdr->caplen); -+ -+#if defined(RING_DEBUG) -+ { -+ static unsigned int lastLoss = 0; -+ -+ if(pfr->slots_info->tot_lost -+ && (lastLoss != pfr->slots_info->tot_lost)) { -+ printk("add_skb_to_ring(%d): [bucketSpace=%d]" -+ "[hdr.caplen=%d][skb->len=%d]" -+ "[pcap_pkthdr=%d][removeIdx=%d]" -+ "[loss=%lu][page=%u][slot=%u]\n", -+ idx-1, bucketSpace, hdr->caplen, skb->len, -+ sizeof(struct pcap_pkthdr), -+ pfr->slots_info->remove_idx, -+ (long unsigned int)pfr->slots_info->tot_lost, -+ pfr->insert_page_id, pfr->insert_slot_id); -+ -+ lastLoss = pfr->slots_info->tot_lost; -+ } -+ } -+#endif -+ -+ write_lock(&pfr->ring_index_lock); -+ pfr->slots_info->tot_insert++; -+ theSlot->slot_state = 1; -+ write_unlock(&pfr->ring_index_lock); -+ } else { -+ write_lock(&pfr->ring_index_lock); -+ pfr->slots_info->tot_lost++; -+ write_unlock(&pfr->ring_index_lock); -+ -+#if defined(RING_DEBUG) -+ printk("add_skb_to_ring(skb): packet lost [loss=%lu]" -+ "[removeIdx=%u][insertIdx=%u]\n", -+ (long unsigned int)pfr->slots_info->tot_lost, -+ pfr->slots_info->remove_idx, pfr->slots_info->insert_idx); -+#endif -+ } -+ -+ /* wakeup in case of poll() */ -+ if(waitqueue_active(&pfr->ring_slots_waitqueue)) -+ wake_up_interruptible(&pfr->ring_slots_waitqueue); -+} -+ -+/* ********************************** */ -+ -+static u_int hash_skb(struct ring_cluster *cluster_ptr, -+ struct sk_buff *skb, u_char recv_packet) { -+ u_int idx; -+ int displ; -+ struct iphdr *ip; -+ -+ if(cluster_ptr->hashing_mode == cluster_round_robin) { -+ idx = cluster_ptr->hashing_id++; -+ } else { -+ /* Per-flow clustering */ -+ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) { -+ if(recv_packet) -+ displ = 0; -+ else -+ displ = SKB_DISPLACEMENT; -+ -+ /* -+ skb->data+displ -+ -+ Always points to to the IP part of the packet -+ */ -+ -+ ip = (struct iphdr*)(skb->data+displ); -+ -+ idx = ip->saddr+ip->daddr+ip->protocol; -+ -+ if(ip->protocol == IPPROTO_TCP) { -+ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ -+ +sizeof(struct iphdr)); -+ idx += tcp->source+tcp->dest; -+ } else if(ip->protocol == IPPROTO_UDP) { -+ struct udphdr *udp = (struct udphdr*)(skb->data+displ -+ +sizeof(struct iphdr)); -+ idx += udp->source+udp->dest; -+ } -+ } else -+ idx = skb->len; -+ } -+ -+ return(idx % cluster_ptr->num_cluster_elements); -+} -+ -+/* ********************************** */ -+ -+static int skb_ring_handler(struct sk_buff *skb, -+ u_char recv_packet, -+ u_char real_skb /* 1=skb 0=faked skb */) { -+ struct sock *skElement; -+ int rc = 0; -+ struct list_head *ptr; -+ struct ring_cluster *cluster_ptr; -+ -+ if((!skb) /* Invalid skb */ -+ || ((!enable_tx_capture) && (!recv_packet))) { -+ /* -+ An outgoing packet is about to be sent out -+ but we decided not to handle transmitted -+ packets. -+ */ -+ return(0); -+ } -+ -+#if defined(RING_DEBUG) -+ if(0) { -+ printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len, -+ skb->dev->name == NULL ? "<NULL>" : skb->dev->name); -+ } -+#endif -+ -+ /* [1] Check unclustered sockets */ -+ for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { -+ struct ring_opt *pfr; -+ struct ring_element *entry; -+ -+ entry = list_entry(ptr, struct ring_element, list); -+ -+ read_lock(&ring_mgmt_lock); -+ skElement = entry->sk; -+ pfr = ring_sk(skElement); -+ read_unlock(&ring_mgmt_lock); -+ -+ if((pfr != NULL) -+ && (pfr->cluster_id == 0 /* No cluster */) -+ && (pfr->ring_slots != NULL) -+ && (pfr->ring_netdev == skb->dev)) { -+ /* We've found the ring where the packet can be stored */ -+ read_lock(&ring_mgmt_lock); -+ add_skb_to_ring(skb, pfr, recv_packet, real_skb); -+ read_unlock(&ring_mgmt_lock); -+ -+ rc = 1; /* Ring found: we've done our job */ -+ } -+ } -+ -+ /* [2] Check socket clusters */ -+ cluster_ptr = ring_cluster_list; -+ -+ while(cluster_ptr != NULL) { -+ struct ring_opt *pfr; -+ -+ if(cluster_ptr->num_cluster_elements > 0) { -+ u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet); -+ -+ read_lock(&ring_mgmt_lock); -+ skElement = cluster_ptr->sk[skb_hash]; -+ read_unlock(&ring_mgmt_lock); -+ -+ if(skElement != NULL) { -+ pfr = ring_sk(skElement); -+ -+ if((pfr != NULL) -+ && (pfr->ring_slots != NULL) -+ && (pfr->ring_netdev == skb->dev)) { -+ /* We've found the ring where the packet can be stored */ -+ read_lock(&ring_mgmt_lock); -+ add_skb_to_ring(skb, pfr, recv_packet, real_skb); -+ read_unlock(&ring_mgmt_lock); -+ -+ rc = 1; /* Ring found: we've done our job */ -+ } -+ } -+ } -+ -+ cluster_ptr = cluster_ptr->next; -+ } -+ -+ if(transparent_mode) rc = 0; -+ -+ if((rc != 0) && real_skb) -+ dev_kfree_skb(skb); /* Free the skb */ -+ -+ return(rc); /* 0 = packet not handled */ -+} -+ -+/* ********************************** */ -+ -+struct sk_buff skb; -+ -+static int buffer_ring_handler(struct net_device *dev, -+ char *data, int len) { -+ -+#if defined(RING_DEBUG) -+ printk("buffer_ring_handler: [dev=%s][len=%d]\n", -+ dev->name == NULL ? "<NULL>" : dev->name, len); -+#endif -+ -+ skb.dev = dev, skb.len = len, skb.data = data, -+ skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */ -+ -+ skb_ring_handler(&skb, 1, 0 /* fake skb */); -+ -+ return(0); -+} -+ -+/* ********************************** */ -+ -+static int ring_create(struct socket *sock, int protocol) { -+ struct sock *sk; -+ struct ring_opt *pfr; -+ int err; -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_create()\n"); -+#endif -+ -+ /* Are you root, superuser or so ? */ -+ if(!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ if(sock->type != SOCK_RAW) -+ return -ESOCKTNOSUPPORT; -+ -+ if(protocol != htons(ETH_P_ALL)) -+ return -EPROTONOSUPPORT; -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -+ MOD_INC_USE_COUNT; -+#endif -+ -+ err = -ENOMEM; -+ -+ // BD: -- broke this out to keep it more simple and clear as to what the -+ // options are. -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) -+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL); -+#endif -+#endif -+ -+ // BD: API changed in 2.6.12, ref: -+ // http://svn.clkao.org/svnweb/linux/revision/?rev=28201 -+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) -+ sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1); -+#endif -+ -+ if (sk == NULL) -+ goto out; -+ -+ sock->ops = &ring_ops; -+ sock_init_data(sock, sk); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) -+ sk_set_owner(sk, THIS_MODULE); -+#endif -+#endif -+ -+ err = -ENOMEM; -+ ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL)); -+ -+ if (!(pfr = ring_sk(sk))) { -+ sk_free(sk); -+ goto out; -+ } -+ memset(pfr, 0, sizeof(*pfr)); -+ init_waitqueue_head(&pfr->ring_slots_waitqueue); -+ pfr->ring_index_lock = RW_LOCK_UNLOCKED; -+ atomic_set(&pfr->num_ring_slots_waiters, 0); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ sk->sk_family = PF_RING; -+ sk->sk_destruct = ring_sock_destruct; -+#else -+ sk->family = PF_RING; -+ sk->destruct = ring_sock_destruct; -+ sk->num = protocol; -+#endif -+ -+ ring_insert(sk); -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_create() - created\n"); -+#endif -+ -+ return(0); -+ out: -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -+ MOD_DEC_USE_COUNT; -+#endif -+ return err; -+} -+ -+/* *********************************************** */ -+ -+static int ring_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct ring_opt *pfr = ring_sk(sk); -+ -+ if(!sk) -+ return 0; -+ -+#if defined(RING_DEBUG) -+ printk("RING: called ring_release\n"); -+#endif -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_release entered\n"); -+#endif -+ -+ ring_remove(sk); -+ -+ sock_orphan(sk); -+ sock->sk = NULL; -+ -+ /* Free the ring buffer */ -+ if(pfr->ring_memory) { -+ struct page *page, *page_end; -+ -+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); -+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) -+ ClearPageReserved(page); -+ -+ free_pages(pfr->ring_memory, pfr->order); -+ } -+ -+ kfree(pfr); -+ ring_sk(sk) = NULL; -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ skb_queue_purge(&sk->sk_write_queue); -+#endif -+ sock_put(sk); -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_release leaving\n"); -+#endif -+ -+ return 0; -+} -+ -+/* ********************************** */ -+/* -+ * We create a ring for this socket and bind it to the specified device -+ */ -+static int packet_ring_bind(struct sock *sk, struct net_device *dev) -+{ -+ u_int the_slot_len; -+ u_int32_t tot_mem; -+ struct ring_opt *pfr = ring_sk(sk); -+ struct page *page, *page_end; -+ -+ if(!dev) return(-1); -+ -+#if defined(RING_DEBUG) -+ printk("RING: packet_ring_bind(%s) called\n", dev->name); -+#endif -+ -+ /* ********************************************** -+ -+ ************************************* -+ * * -+ * FlowSlotInfo * -+ * * -+ ************************************* <-+ -+ * FlowSlot * | -+ ************************************* | -+ * FlowSlot * | -+ ************************************* +- num_slots -+ * FlowSlot * | -+ ************************************* | -+ * FlowSlot * | -+ ************************************* <-+ -+ -+ ********************************************** */ -+ -+ the_slot_len = sizeof(u_char) /* flowSlot.slot_state */ -+ + sizeof(u_short) /* flowSlot.slot_len */ -+ + bucket_len /* flowSlot.bucket */; -+ -+ tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len; -+ -+ /* -+ Calculate the value of the order parameter used later. -+ See http://www.linuxjournal.com/article.php?sid=1133 -+ */ -+ for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ; -+ -+ /* -+ We now try to allocate the memory as required. If we fail -+ we try to allocate a smaller amount or memory (hence a -+ smaller ring). -+ */ -+ while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0) -+ if(pfr->order-- == 0) -+ break; -+ -+ if(pfr->order == 0) { -+#if defined(RING_DEBUG) -+ printk("ERROR: not enough memory\n"); -+#endif -+ return(-1); -+ } else { -+#if defined(RING_DEBUG) -+ printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n", -+ PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order); -+#endif -+ } -+ -+ tot_mem = PAGE_SIZE << pfr->order; -+ memset((char*)pfr->ring_memory, 0, tot_mem); -+ -+ /* Now we need to reserve the pages */ -+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); -+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) -+ SetPageReserved(page); -+ -+ pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory; -+ pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo)); -+ -+ pfr->slots_info->version = RING_FLOWSLOT_VERSION; -+ pfr->slots_info->slot_len = the_slot_len; -+ pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len; -+ pfr->slots_info->tot_mem = tot_mem; -+ pfr->slots_info->sample_rate = sample_rate; -+ -+#if defined(RING_DEBUG) -+ printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n", -+ pfr->slots_info->tot_slots, pfr->slots_info->slot_len, -+ pfr->slots_info->tot_mem); -+#endif -+ -+#ifdef RING_MAGIC -+ { -+ int i; -+ -+ for(i=0; i<pfr->slots_info->tot_slots; i++) { -+ unsigned long idx = i*pfr->slots_info->slot_len; -+ FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx]; -+ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0; -+ } -+ } -+#endif -+ -+ pfr->insert_page_id = 1, pfr->insert_slot_id = 0; -+ -+ /* -+ IMPORTANT -+ Leave this statement here as last one. In fact when -+ the ring_netdev != NULL the socket is ready to be used. -+ */ -+ pfr->ring_netdev = dev; -+ -+ return(0); -+} -+ -+/* ************************************* */ -+ -+/* Bind to a device */ -+static int ring_bind(struct socket *sock, -+ struct sockaddr *sa, int addr_len) -+{ -+ struct sock *sk=sock->sk; -+ struct net_device *dev = NULL; -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_bind() called\n"); -+#endif -+ -+ /* -+ * Check legality -+ */ -+ if (addr_len != sizeof(struct sockaddr)) -+ return -EINVAL; -+ if (sa->sa_family != PF_RING) -+ return -EINVAL; -+ -+ /* Safety check: add trailing zero if missing */ -+ sa->sa_data[sizeof(sa->sa_data)-1] = '\0'; -+ -+#if defined(RING_DEBUG) -+ printk("RING: searching device %s\n", sa->sa_data); -+#endif -+ -+ if((dev = __dev_get_by_name(sa->sa_data)) == NULL) { -+#if defined(RING_DEBUG) -+ printk("RING: search failed\n"); -+#endif -+ return(-EINVAL); -+ } else -+ return(packet_ring_bind(sk, dev)); -+} -+ -+/* ************************************* */ -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ -+volatile void* virt_to_kseg(volatile void* address) { -+ pte_t *pte; -+ pud_t *pud; -+ unsigned long addr = (unsigned long)address; -+ -+ pud = pud_offset(pgd_offset_k((unsigned long) address), -+ (unsigned long) address); -+ -+ /* -+ High-memory support courtesy of -+ Brad Doctor <bdoctor@ps-ax.com> -+ */ -+#if defined(CONFIG_X86_PAE) && (!defined(CONFIG_NOHIGHMEM)) -+ pte = pte_offset_map(pmd_offset(pud, addr), addr); -+#else -+ pte = pmd_offset_map(pud, addr); -+#endif -+ -+ return((volatile void*)pte_page(*pte)); -+} -+ -+#else /* 2.4 */ -+ -+/* http://www.scs.ch/~frey/linux/memorymap.html */ -+volatile void *virt_to_kseg(volatile void *address) -+{ -+ pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; -+ unsigned long va, ret = 0UL; -+ -+ va=VMALLOC_VMADDR((unsigned long)address); -+ -+ /* get the page directory. Use the kernel memory map. */ -+ pgd = pgd_offset_k(va); -+ -+ /* check whether we found an entry */ -+ if (!pgd_none(*pgd)) -+ { -+ /* get the page middle directory */ -+ pmd = pmd_offset(pgd, va); -+ /* check whether we found an entry */ -+ if (!pmd_none(*pmd)) -+ { -+ /* get a pointer to the page table entry */ -+ ptep = pte_offset(pmd, va); -+ pte = *ptep; -+ /* check for a valid page */ -+ if (pte_present(pte)) -+ { -+ /* get the address the page is refering to */ -+ ret = (unsigned long)page_address(pte_page(pte)); -+ /* add the offset within the page to the page address */ -+ ret |= (va & (PAGE_SIZE -1)); -+ } -+ } -+ } -+ return((volatile void *)ret); -+} -+#endif -+ -+/* ************************************* */ -+ -+static int ring_mmap(struct file *file, -+ struct socket *sock, -+ struct vm_area_struct *vma) -+{ -+ struct sock *sk = sock->sk; -+ struct ring_opt *pfr = ring_sk(sk); -+ unsigned long size, start; -+ u_int pagesToMap; -+ char *ptr; -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_mmap() called\n"); -+#endif -+ -+ if(pfr->ring_memory == 0) { -+#if defined(RING_DEBUG) -+ printk("RING: ring_mmap() failed: mapping area to an unbound socket\n"); -+#endif -+ return -EINVAL; -+ } -+ -+ size = (unsigned long)(vma->vm_end-vma->vm_start); -+ -+ if(size % PAGE_SIZE) { -+#if defined(RING_DEBUG) -+ printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n"); -+#endif -+ return(-EINVAL); -+ } -+ -+ /* if userspace tries to mmap beyond end of our buffer, fail */ -+ if(size > pfr->slots_info->tot_mem) { -+#if defined(RING_DEBUG) -+ printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem); -+#endif -+ return(-EINVAL); -+ } -+ -+ pagesToMap = size/PAGE_SIZE; -+ -+#if defined(RING_DEBUG) -+ printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap); -+#endif -+ -+#if defined(RING_DEBUG) -+ printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n", -+ pfr->slots_info->slot_len, pfr->slots_info->tot_slots, -+ pfr->ring_netdev->name); -+#endif -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ start = vma->vm_start; -+ -+ /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */ -+ ptr = (char*)(start+PAGE_SIZE); -+ -+ if(remap_page_range( -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ vma, -+#endif -+ start, -+ __pa(pfr->ring_memory), -+ PAGE_SIZE*pagesToMap, vma->vm_page_prot)) { -+#if defined(RING_DEBUG) -+ printk("remap_page_range() failed\n"); -+#endif -+ return(-EAGAIN); -+ } -+ -+#if defined(RING_DEBUG) -+ printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap); -+#endif -+ -+ return 0; -+} -+ -+/* ************************************* */ -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+static int ring_recvmsg(struct kiocb *iocb, struct socket *sock, -+ struct msghdr *msg, size_t len, int flags) -+#else -+ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len, -+ int flags, struct scm_cookie *scm) -+#endif -+{ -+ FlowSlot* slot; -+ struct ring_opt *pfr = ring_sk(sock->sk); -+ u_int32_t queued_pkts, num_loops = 0; -+ -+#if defined(RING_DEBUG) -+ printk("ring_recvmsg called\n"); -+#endif -+ -+ slot = get_remove_slot(pfr); -+ -+ while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) { -+ wait_event_interruptible(pfr->ring_slots_waitqueue, 1); -+ -+#if defined(RING_DEBUG) -+ printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n", -+ slot->slot_state, queued_pkts, num_loops); -+#endif -+ -+ if(queued_pkts > 0) { -+ if(num_loops++ > MAX_QUEUE_LOOPS) -+ break; -+ } -+ } -+ -+#if defined(RING_DEBUG) -+ if(slot != NULL) -+ printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n", -+ queued_pkts, num_loops); -+#endif -+ -+ return(queued_pkts); -+} -+ -+/* ************************************* */ -+ -+unsigned int ring_poll(struct file * file, -+ struct socket *sock, poll_table *wait) -+{ -+ FlowSlot* slot; -+ struct ring_opt *pfr = ring_sk(sock->sk); -+ -+#if defined(RING_DEBUG) -+ printk("poll called\n"); -+#endif -+ -+ slot = get_remove_slot(pfr); -+ -+ if((slot != NULL) && (slot->slot_state == 0)) -+ poll_wait(file, &pfr->ring_slots_waitqueue, wait); -+ -+#if defined(RING_DEBUG) -+ printk("poll returning %d\n", slot->slot_state); -+#endif -+ -+ if((slot != NULL) && (slot->slot_state == 1)) -+ return(POLLIN | POLLRDNORM); -+ else -+ return(0); -+} -+ -+/* ************************************* */ -+ -+int add_to_cluster_list(struct ring_cluster *el, -+ struct sock *sock) { -+ -+ if(el->num_cluster_elements == CLUSTER_LEN) -+ return(-1); /* Cluster full */ -+ -+ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id; -+ el->sk[el->num_cluster_elements] = sock; -+ el->num_cluster_elements++; -+ return(0); -+} -+ -+/* ************************************* */ -+ -+int remove_from_cluster_list(struct ring_cluster *el, -+ struct sock *sock) { -+ int i, j; -+ -+ for(i=0; i<CLUSTER_LEN; i++) -+ if(el->sk[i] == sock) { -+ el->num_cluster_elements--; -+ -+ if(el->num_cluster_elements > 0) { -+ /* The cluster contains other elements */ -+ for(j=i; j<CLUSTER_LEN-1; j++) -+ el->sk[j] = el->sk[j+1]; -+ -+ el->sk[CLUSTER_LEN-1] = NULL; -+ } else { -+ /* Empty cluster */ -+ memset(el->sk, 0, sizeof(el->sk)); -+ } -+ -+ return(0); -+ } -+ -+ return(-1); /* Not found */ -+} -+ -+/* ************************************* */ -+ -+static int remove_from_cluster(struct sock *sock, -+ struct ring_opt *pfr) -+{ -+ struct ring_cluster *el; -+ -+#if defined(RING_DEBUG) -+ printk("--> remove_from_cluster(%d)\n", pfr->cluster_id); -+#endif -+ -+ if(pfr->cluster_id == 0 /* 0 = No Cluster */) -+ return(0); /* Noting to do */ -+ -+ el = ring_cluster_list; -+ -+ while(el != NULL) { -+ if(el->cluster_id == pfr->cluster_id) { -+ return(remove_from_cluster_list(el, sock)); -+ } else -+ el = el->next; -+ } -+ -+ return(-EINVAL); /* Not found */ -+} -+ -+/* ************************************* */ -+ -+static int add_to_cluster(struct sock *sock, -+ struct ring_opt *pfr, -+ u_short cluster_id) -+{ -+ struct ring_cluster *el; -+ -+#ifndef RING_DEBUG -+ printk("--> add_to_cluster(%d)\n", cluster_id); -+#endif -+ -+ if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL); -+ -+ if(pfr->cluster_id != 0) -+ remove_from_cluster(sock, pfr); -+ -+ el = ring_cluster_list; -+ -+ while(el != NULL) { -+ if(el->cluster_id == cluster_id) { -+ return(add_to_cluster_list(el, sock)); -+ } else -+ el = el->next; -+ } -+ -+ /* There's no existing cluster. We need to create one */ -+ if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL) -+ return(-ENOMEM); -+ -+ el->cluster_id = cluster_id; -+ el->num_cluster_elements = 1; -+ el->hashing_mode = cluster_per_flow; /* Default */ -+ el->hashing_id = 0; -+ -+ memset(el->sk, 0, sizeof(el->sk)); -+ el->sk[0] = sock; -+ el->next = ring_cluster_list; -+ ring_cluster_list = el; -+ pfr->cluster_id = cluster_id; -+ -+ return(0); /* 0 = OK */ -+} -+ -+/* ************************************* */ -+ -+/* Code taken/inspired from core/sock.c */ -+static int ring_setsockopt(struct socket *sock, -+ int level, int optname, -+ char *optval, int optlen) -+{ -+ struct ring_opt *pfr = ring_sk(sock->sk); -+ int val, found, ret = 0; -+ u_int cluster_id; -+ char devName[8]; -+ -+ if((optlen<sizeof(int)) || (pfr == NULL)) -+ return(-EINVAL); -+ -+ if (get_user(val, (int *)optval)) -+ return -EFAULT; -+ -+ found = 1; -+ -+ switch(optname) -+ { -+ case SO_ATTACH_FILTER: -+ ret = -EINVAL; -+ if (optlen == sizeof(struct sock_fprog)) { -+ unsigned int fsize; -+ struct sock_fprog fprog; -+ struct sk_filter *filter; -+ -+ ret = -EFAULT; -+ -+ /* -+ NOTE -+ -+ Do not call copy_from_user within a held -+ splinlock (e.g. ring_mgmt_lock) as this caused -+ problems when certain debugging was enabled under -+ 2.6.5 -- including hard lockups of the machine. -+ */ -+ if(copy_from_user(&fprog, optval, sizeof(fprog))) -+ break; -+ -+ fsize = sizeof(struct sock_filter) * fprog.len; -+ filter = kmalloc(fsize, GFP_KERNEL); -+ -+ if(filter == NULL) { -+ ret = -ENOMEM; -+ break; -+ } -+ -+ if(copy_from_user(filter->insns, fprog.filter, fsize)) -+ break; -+ -+ filter->len = fprog.len; -+ -+ if(sk_chk_filter(filter->insns, filter->len) != 0) { -+ /* Bad filter specified */ -+ kfree(filter); -+ pfr->bpfFilter = NULL; -+ break; -+ } -+ -+ /* get the lock, set the filter, release the lock */ -+ write_lock(&ring_mgmt_lock); -+ pfr->bpfFilter = filter; -+ write_unlock(&ring_mgmt_lock); -+ } -+ ret = 0; -+ break; -+ -+ case SO_DETACH_FILTER: -+ write_lock(&ring_mgmt_lock); -+ found = 1; -+ if(pfr->bpfFilter != NULL) { -+ kfree(pfr->bpfFilter); -+ pfr->bpfFilter = NULL; -+ write_unlock(&ring_mgmt_lock); -+ break; -+ } -+ ret = -ENONET; -+ break; -+ -+ case SO_ADD_TO_CLUSTER: -+ if (optlen!=sizeof(val)) -+ return -EINVAL; -+ -+ if (copy_from_user(&cluster_id, optval, sizeof(cluster_id))) -+ return -EFAULT; -+ -+ write_lock(&ring_mgmt_lock); -+ ret = add_to_cluster(sock->sk, pfr, cluster_id); -+ write_unlock(&ring_mgmt_lock); -+ break; -+ -+ case SO_REMOVE_FROM_CLUSTER: -+ write_lock(&ring_mgmt_lock); -+ ret = remove_from_cluster(sock->sk, pfr); -+ write_unlock(&ring_mgmt_lock); -+ break; -+ -+ case SO_SET_REFLECTOR: -+ if(optlen >= (sizeof(devName)-1)) -+ return -EINVAL; -+ -+ if(optlen > 0) { -+ if(copy_from_user(devName, optval, optlen)) -+ return -EFAULT; -+ } -+ -+ devName[optlen] = '\0'; -+ -+#if defined(RING_DEBUG) -+ printk("+++ SO_SET_REFLECTOR(%s)\n", devName); -+#endif -+ -+ write_lock(&ring_mgmt_lock); -+ pfr->reflector_dev = dev_get_by_name(devName); -+ write_unlock(&ring_mgmt_lock); -+ -+#if defined(RING_DEBUG) -+ if(pfr->reflector_dev != NULL) -+ printk("SO_SET_REFLECTOR(%s): succeded\n", devName); -+ else -+ printk("SO_SET_REFLECTOR(%s): device unknown\n", devName); -+#endif -+ break; -+ -+ default: -+ found = 0; -+ break; -+ } -+ -+ if(found) -+ return(ret); -+ else -+ return(sock_setsockopt(sock, level, optname, optval, optlen)); -+} -+ -+/* ************************************* */ -+ -+static int ring_ioctl(struct socket *sock, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch(cmd) -+ { -+ case SIOCGIFFLAGS: -+ case SIOCSIFFLAGS: -+ case SIOCGIFCONF: -+ case SIOCGIFMETRIC: -+ case SIOCSIFMETRIC: -+ case SIOCGIFMEM: -+ case SIOCSIFMEM: -+ case SIOCGIFMTU: -+ case SIOCSIFMTU: -+ case SIOCSIFLINK: -+ case SIOCGIFHWADDR: -+ case SIOCSIFHWADDR: -+ case SIOCSIFMAP: -+ case SIOCGIFMAP: -+ case SIOCSIFSLAVE: -+ case SIOCGIFSLAVE: -+ case SIOCGIFINDEX: -+ case SIOCGIFNAME: -+ case SIOCGIFCOUNT: -+ case SIOCSIFHWBROADCAST: -+ return(dev_ioctl(cmd,(void *) arg)); -+ -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return 0; -+} -+ -+/* ************************************* */ -+ -+static struct proto_ops ring_ops = { -+ .family = PF_RING, -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ .owner = THIS_MODULE, -+#endif -+ -+ /* Operations that make no sense on ring sockets. */ -+ .connect = sock_no_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .getname = sock_no_getname, -+ .listen = sock_no_listen, -+ .shutdown = sock_no_shutdown, -+ .sendpage = sock_no_sendpage, -+ .sendmsg = sock_no_sendmsg, -+ .getsockopt = sock_no_getsockopt, -+ -+ /* Now the operations that really occur. */ -+ .release = ring_release, -+ .bind = ring_bind, -+ .mmap = ring_mmap, -+ .poll = ring_poll, -+ .setsockopt = ring_setsockopt, -+ .ioctl = ring_ioctl, -+ .recvmsg = ring_recvmsg, -+}; -+ -+/* ************************************ */ -+ -+static struct net_proto_family ring_family_ops = { -+ .family = PF_RING, -+ .create = ring_create, -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+ .owner = THIS_MODULE, -+#endif -+}; -+ -+// BD: API changed in 2.6.12, ref: -+// http://svn.clkao.org/svnweb/linux/revision/?rev=28201 -+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) -+static struct proto ring_proto = { -+ .name = "PF_RING", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct sock), -+}; -+#endif -+ -+/* ************************************ */ -+ -+static void __exit ring_exit(void) -+{ -+ struct list_head *ptr; -+ struct ring_element *entry; -+ -+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { -+ entry = list_entry(ptr, struct ring_element, list); -+ kfree(entry); -+ } -+ -+ while(ring_cluster_list != NULL) { -+ struct ring_cluster *next = ring_cluster_list->next; -+ kfree(ring_cluster_list); -+ ring_cluster_list = next; -+ } -+ -+ set_skb_ring_handler(NULL); -+ set_buffer_ring_handler(NULL); -+ sock_unregister(PF_RING); -+ -+ printk("PF_RING shut down.\n"); -+} -+ -+/* ************************************ */ -+ -+static int __init ring_init(void) -+{ -+ printk("Welcome to PF_RING %s\n(C) 2004 L.Deri <deri@ntop.org>\n", -+ RING_VERSION); -+ -+ INIT_LIST_HEAD(&ring_table); -+ ring_cluster_list = NULL; -+ -+ sock_register(&ring_family_ops); -+ -+ set_skb_ring_handler(skb_ring_handler); -+ set_buffer_ring_handler(buffer_ring_handler); -+ -+ if(get_buffer_ring_handler() != buffer_ring_handler) { -+ printk("PF_RING: set_buffer_ring_handler FAILED\n"); -+ -+ set_skb_ring_handler(NULL); -+ set_buffer_ring_handler(NULL); -+ sock_unregister(PF_RING); -+ return -1; -+ } else { -+ printk("PF_RING: bucket length %d bytes\n", bucket_len); -+ printk("PF_RING: ring slots %d\n", num_slots); -+ printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate); -+ printk("PF_RING: capture TX %s\n", -+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]"); -+ printk("PF_RING: transparent mode %s\n", -+ transparent_mode ? "Yes" : "No"); -+ -+ printk("PF_RING initialized correctly.\n"); -+ return 0; -+ } -+} -+ -+module_init(ring_init); -+module_exit(ring_exit); -+MODULE_LICENSE("GPL"); -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) -+MODULE_ALIAS_NETPROTO(PF_RING); -+#endif |