From 30bf34f05c0f66d9d9221f7ebfe9808024337dd4 Mon Sep 17 00:00:00 2001 From: nbd Date: Sat, 22 Oct 2005 22:03:56 +0000 Subject: add pf_ring patches for kernel and libpcap git-svn-id: svn://svn.openwrt.org/openwrt/trunk/openwrt@2266 3c298f89-4303-0410-b956-a3cf2f4a3e73 --- package/libpcap/Makefile | 2 +- package/libpcap/patches/110-pf_ring.patch | 613 ++++++++++++++++++++++++++++++ 2 files changed, 614 insertions(+), 1 deletion(-) create mode 100644 package/libpcap/patches/110-pf_ring.patch (limited to 'package/libpcap') diff --git a/package/libpcap/Makefile b/package/libpcap/Makefile index fa79e7461..c7dfd9991 100644 --- a/package/libpcap/Makefile +++ b/package/libpcap/Makefile @@ -57,7 +57,7 @@ $(PKG_BUILD_DIR)/.built: rm -rf $(PKG_INSTALL_DIR) mkdir -p $(PKG_INSTALL_DIR) $(MAKE) -C $(PKG_BUILD_DIR) \ - CCOPT="$(TARGET_CFLAGS)" \ + CCOPT="$(TARGET_CFLAGS) -I$(BUILD_DIR)/linux/include" \ DESTDIR="$(PKG_INSTALL_DIR)" \ all install touch $@ diff --git a/package/libpcap/patches/110-pf_ring.patch b/package/libpcap/patches/110-pf_ring.patch new file mode 100644 index 000000000..1d5124fac --- /dev/null +++ b/package/libpcap/patches/110-pf_ring.patch @@ -0,0 +1,613 @@ +diff -urN libpcap.old/pcap-int.h libpcap.dev/pcap-int.h +--- libpcap.old/pcap-int.h 2003-12-15 02:42:24.000000000 +0100 ++++ libpcap.dev/pcap-int.h 2005-10-22 23:20:12.220060500 +0200 +@@ -30,7 +30,7 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * +- * @(#) $Header: /tcpdump/master/libpcap/pcap-int.h,v 1.55.2.4 2003/12/15 01:42:24 guy Exp $ (LBL) ++ * @(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-int.h,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL) + */ + + #ifndef pcap_int_h +@@ -46,6 +46,8 @@ + #include + #endif /* WIN32 */ + ++#define RING /* L.Deri */ ++ + /* + * Savefile + */ +@@ -93,6 +95,57 @@ + #endif + }; + ++/* **************************** */ ++ ++#ifdef RING ++ ++#include ++#include ++#include ++#include ++ ++#define PAGE_SIZE 4096 ++ ++#define HAVE_PCAP ++#include ++#endif ++ ++#ifdef RING ++ ++#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ ++ ++struct e1000_rx_desc { ++ u_int64_t buffer_addr; /* Address of the descriptor's data buffer */ ++ u_int16_t length; /* Length of data DMAed into data buffer */ ++ u_int16_t csum; /* Packet checksum */ ++ u_int8_t status; /* Descriptor status */ ++ u_int8_t errors; /* Descriptor Errors */ ++ u_int16_t special; ++}; ++ ++/* Transmit Descriptor */ ++struct e1000_tx_desc { ++ u_int64_t buffer_addr; /* Address of the descriptor's data buffer */ ++ union { ++ u_int32_t data; ++ struct { ++ u_int16_t length; /* Data buffer length */ ++ u_int8_t cso; /* Checksum offset */ ++ u_int8_t cmd; /* Descriptor control */ ++ } flags; ++ } lower; ++ union { ++ u_int32_t data; ++ struct { ++ u_int8_t status; /* Descriptor status */ ++ u_int8_t css; /* Checksum start */ ++ u_int16_t special; ++ } fields; ++ } upper; ++}; ++ ++#endif ++ + struct pcap { + #ifdef WIN32 + ADAPTER *adapter; +@@ -121,6 +174,14 @@ + u_char *bp; + int cc; + ++#ifdef RING ++ /* PF_RING */ ++ char *ring_buffer, *ring_slots; ++ int ring_fd; ++ FlowSlotInfo *slots_info; ++ u_int page_id, slot_id, pkts_per_page; ++ u_int poll_sleep; ++#endif + /* + * Place holder for pcap_next(). + */ +diff -urN libpcap.old/pcap-linux.c libpcap.dev/pcap-linux.c +--- libpcap.old/pcap-linux.c 2003-11-21 11:20:46.000000000 +0100 ++++ libpcap.dev/pcap-linux.c 2005-10-22 23:43:59.726120250 +0200 +@@ -27,7 +27,7 @@ + + #ifndef lint + static const char rcsid[] _U_ = +- "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.98.2.4 2003/11/21 10:20:46 guy Exp $ (LBL)"; ++ "@(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-linux.c,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL)"; + #endif + + /* +@@ -83,7 +83,7 @@ + #ifdef HAVE_DAG_API + #include "pcap-dag.h" + #endif /* HAVE_DAG_API */ +- ++ + #include + #include + #include +@@ -217,6 +217,83 @@ + = { 1, &total_insn }; + #endif + ++#define RING /* L.Deri */ ++#define SAFE_RING_MODE /* ++ Copy the bucket in order to avoid kernel ++ crash if the application faults ++ */ ++ ++#ifdef RING ++unsigned char *write_register; ++static struct pcap_stat ringStats; ++u_long numPollCalls = 0, numReadCalls = 0; ++ ++#define POLL_SLEEP_STEP 10 /* ns = 0.1 ms */ ++#define POLL_SLEEP_MIN POLL_SLEEP_STEP ++#define POLL_SLEEP_MAX 1000 /* ns */ ++#define POLL_QUEUE_MIN_LEN 500 /* # packets */ ++ ++#ifdef SAFE_RING_MODE ++static char staticBucket[2048]; ++#endif ++ ++ ++/* ******************************* */ ++ ++int pcap_set_cluster(pcap_t *handle, u_int clusterId) { ++ return(handle->ring_fd ? setsockopt(handle->ring_fd, 0, SO_ADD_TO_CLUSTER, ++ &clusterId, sizeof(clusterId)): -1); ++} ++ ++/* ******************************* */ ++ ++int pcap_remove_from_cluster(pcap_t *handle) { ++ return(handle->ring_fd ? ++ setsockopt(handle->ring_fd, 0, SO_REMOVE_FROM_CLUSTER, NULL, 0) : -1); ++} ++ ++/* ******************************* */ ++ ++int pcap_set_reflector(pcap_t *handle, char *reflectorDevice) { ++ return(handle->ring_fd ? ++ setsockopt(handle->ring_fd, 0, SO_SET_REFLECTOR, ++ &reflectorDevice, strlen(reflectorDevice)) : -1); ++} ++ ++/* ******************************* */ ++ ++static int set_if_promisc(const char *device, int set_promisc) { ++ int sock_fd; ++ struct ifreq ifr; ++ ++ if(device == NULL) return(-3); ++ ++ sock_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); ++ if(sock_fd <= 0) return(-1); ++ ++ memset(&ifr, 0, sizeof(ifr)); ++ strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); ++ if(ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { ++ close(sock_fd); ++ return(-2); ++ } ++ ++ if(set_promisc) { ++ if((ifr.ifr_flags & IFF_PROMISC) == 0) ifr.ifr_flags |= IFF_PROMISC; ++ } else { ++ /* Remove promisc */ ++ if((ifr.ifr_flags & IFF_PROMISC) != 0) ifr.ifr_flags &= ~IFF_PROMISC; ++ } ++ ++ if(ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) ++ return(-1); ++ ++ close(sock_fd); ++ return(0); ++} ++ ++#endif ++ + /* + * Get a handle for a live capture from the given device. You can + * pass NULL as device to get all packages (without link level +@@ -258,6 +335,138 @@ + handle->snapshot = snaplen; + handle->md.timeout = to_ms; + ++#ifdef RING ++ handle->ring_fd = handle->fd = socket(PF_RING, SOCK_RAW, htons(ETH_P_ALL)); ++ ++ printf("Open RING [fd=%d]\n", handle->ring_fd); ++ ++ if(handle->ring_fd > 0) { ++ struct sockaddr sa; ++ int rc; ++ u_int memSlotsLen; ++ ++ err = 0; ++ sa.sa_family = PF_RING; ++ snprintf(sa.sa_data, sizeof(sa.sa_data), "%s", device); ++ rc = bind(handle->ring_fd, (struct sockaddr *)&sa, sizeof(sa)); ++ ++ if(rc == 0) { ++ ++ ++ handle->md.device = strdup(device); ++ handle->ring_buffer = (char *)mmap(NULL, PAGE_SIZE, ++ PROT_READ|PROT_WRITE, ++ MAP_SHARED, ++ handle->ring_fd, 0); ++ ++ if(handle->ring_buffer == MAP_FAILED) { ++ sprintf(ebuf, "mmap() failed"); ++ return (NULL); ++ } ++ ++ handle->slots_info = (FlowSlotInfo *)handle->ring_buffer; ++ if(handle->slots_info->version != RING_FLOWSLOT_VERSION) { ++ snprintf(ebuf, PCAP_ERRBUF_SIZE, "Wrong RING version: " ++ "kernel is %i, libpcap was compiled with %i\n", ++ handle->slots_info->version, RING_FLOWSLOT_VERSION); ++ return (NULL); ++ } ++ memSlotsLen = handle->slots_info->tot_mem; ++ munmap(handle->ring_buffer, PAGE_SIZE); ++ ++ handle->ring_buffer = (char *)mmap(NULL, memSlotsLen, ++ PROT_READ|PROT_WRITE, ++ MAP_SHARED, handle->ring_fd, 0); ++ ++ if(handle->ring_buffer == MAP_FAILED) { ++ sprintf(ebuf, "mmap() failed"); ++ return (NULL); ++ } ++ ++ handle->slots_info = (FlowSlotInfo *)handle->ring_buffer; ++ handle->ring_slots = (char *)(handle->ring_buffer+sizeof(FlowSlotInfo)); ++ ++ /* Safety check */ ++ if(handle->slots_info->remove_idx >= handle->slots_info->tot_slots) ++ handle->slots_info->remove_idx = 0; ++ ++ handle->page_id = PAGE_SIZE, handle->slot_id = 0, ++ handle->pkts_per_page = 0; ++ ++ if(0) { ++ int i; ++ ++ for(i=0; islots_info->tot_slots; i++) { ++ unsigned long idx = i*handle->slots_info->slot_len; ++ FlowSlot *slot = (FlowSlot*)&handle->ring_slots[idx]; ++ ++ printf("RING: Setting RING_MAGIC_VALUE into slot %d [displacement=%lu]\n", i, idx); ++ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0; ++ printf("RING: slot[%d]: magic=%d, slot_state=%d\n", ++ slot->magic, slot->slot_state); ++ } ++ } ++ ++ ++ /* Set defaults */ ++ handle->linktype = DLT_EN10MB; ++ handle->offset = 2; ++ ++ printf("RING (%s): tot_slots=%d/slot_len=%d/" ++ "insertIdx=%d/remove_idx=%d/dropped=%d\n", ++ device, ++ handle->slots_info->tot_slots, ++ handle->slots_info->slot_len, ++ handle->slots_info->insert_idx, ++ handle->slots_info->remove_idx, ++ handle->slots_info->tot_lost); ++ ++ ringStats.ps_recv = handle->slots_info->tot_read; ++ ringStats.ps_drop = handle->slots_info->tot_lost; ++ ++ if(promisc) { ++ struct ifreq ifr; ++ ++ err = 0; ++ memset(&ifr, 0, sizeof(ifr)); ++ strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); ++ if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { ++ snprintf(ebuf, PCAP_ERRBUF_SIZE, ++ "ioctl: %s", pcap_strerror(errno)); ++ err = 1; ++ } ++ ++ if(err == 0) { ++ if ((ifr.ifr_flags & IFF_PROMISC) == 0) { ++ /* ++ * Promiscuous mode isn't currently on, ++ * so turn it on, and remember that ++ * we should turn it off when the ++ * pcap_t is closed. ++ */ ++ ++ ifr.ifr_flags |= IFF_PROMISC; ++ if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) { ++ snprintf(ebuf, PCAP_ERRBUF_SIZE, ++ "ioctl: %s", pcap_strerror(errno)); ++ err = 1; ++ } ++ } ++ ++ if(err == 0) ++ handle->md.clear_promisc = 1; ++ } ++ } ++ ++ if(err == 0) ++ goto open_open_live_final; ++ } ++ ++ /* Don't put 'else' above... */ ++ close(handle->ring_fd); ++ /* Continue without ring support */ ++ } ++#endif + /* + * NULL and "any" are special devices which give us the hint to + * monitor all devices. +@@ -397,6 +606,9 @@ + return NULL; + } + ++#ifdef RING ++ open_open_live_final: ++#endif + /* + * "handle->fd" is a socket, so "select()" and "poll()" + * should work on it. +@@ -449,6 +661,120 @@ + int packet_len, caplen; + struct pcap_pkthdr pcap_header; + ++#ifdef RING ++ if(handle->ring_buffer != NULL) { ++ u_int idx, numRuns = 0, ptrAddr; ++ FlowSlot *slot; ++ ++ slot = (FlowSlot*)&handle->ring_slots[handle->slots_info->remove_idx*handle->slots_info->slot_len]; ++ ++ while(1) { ++ u_int32_t queuedPkts; ++ ++ if(handle->slots_info->tot_insert >= handle->slots_info->tot_read) ++ queuedPkts = handle->slots_info->tot_insert - handle->slots_info->tot_read; ++ else ++ queuedPkts = handle->slots_info->tot_slots + handle->slots_info->tot_insert - handle->slots_info->tot_read; ++ ++ if(queuedPkts && (slot->slot_state == 1)) { ++ char *bucket = &slot->bucket; ++ ++#ifdef RING_MAGIC ++ if(slot->magic != RING_MAGIC_VALUE) { ++ printf("==>> Bad Magic [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n", ++ handle->slots_info->remove_idx, ++ handle->slots_info->insert_idx, ++ ptrAddr); ++ slot->magic = RING_MAGIC_VALUE; ++ } ++#endif ++ ++ ++ handle->md.stat.ps_recv++; ++ ++#ifdef SAFE_RING_MODE ++ { ++ struct pcap_pkthdr *hdr = (struct pcap_pkthdr*)bucket; ++ int bktLen = hdr->caplen; ++ ++ if(bktLen > sizeof(staticBucket)) ++ bktLen = sizeof(staticBucket); ++ ++ memcpy(staticBucket, &bucket[sizeof(struct pcap_pkthdr)], bktLen); ++ ++#ifdef RING_DEBUG ++ printf("==>> [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n", ++ handle->slots_info->remove_idx, ++ handle->slots_info->insert_idx, ++ ptrAddr); ++#endif ++ ++ callback(userdata, hdr, staticBucket); ++ } ++#else ++ callback(userdata, ++ (const struct pcap_pkthdr*)bucket, ++ (const u_char*)&bucket[sizeof(struct pcap_pkthdr)]); ++#endif ++ ++ if(handle->slots_info->remove_idx >= (handle->slots_info->tot_slots-1)) { ++ handle->slots_info->remove_idx = 0; ++ handle->page_id = PAGE_SIZE, handle->slot_id = 0, handle->pkts_per_page = 0; ++ } else { ++ handle->slots_info->remove_idx++; ++ handle->pkts_per_page++, handle->slot_id += handle->slots_info->slot_len; ++ } ++ ++ handle->slots_info->tot_read++; ++ slot->slot_state = 0; ++ ++ return(1); ++ } else { ++ struct pollfd pfd; ++ int rc; ++ ++ /* Sleep when nothing is happening */ ++ pfd.fd = handle->ring_fd; ++ pfd.events = POLLIN|POLLERR; ++ pfd.revents = 0; ++ ++#ifdef RING_DEBUG ++ printf("==>> poll [remove_idx=%u][insert_idx=%u][loss=%d][queuedPkts=%u]" ++ "[slot_state=%d][tot_insert=%u][tot_read=%u]\n", ++ handle->slots_info->remove_idx, ++ handle->slots_info->insert_idx, ++ handle->slots_info->tot_lost, ++ queuedPkts, slot->slot_state, ++ handle->slots_info->tot_insert, ++ handle->slots_info->tot_read); ++ #endif ++ ++#ifdef RING_DEBUG ++ printf("==>> poll @ [remove_idx=%u][slot_id=%u]\n", handle->slots_info->remove_idx, handle->slot_id); ++#endif ++ errno = 0; ++ rc = poll(&pfd, 1, -1); ++#ifdef RING_DEBUG ++ printf("==>> poll returned %d [%s][errno=%d][break_loop=%d]\n", ++ rc, strerror(errno), errno, handle->break_loop); ++#endif ++ numPollCalls++; ++ ++ if(rc == -1) { ++ if(errno == EINTR) { ++ if(handle->break_loop) { ++ handle->break_loop = 0; ++ return(-2); ++ } else ++ return(0); ++ } else ++ return(-1); ++ } ++ } ++ } /* while() */ ++ } ++#endif ++ + #ifdef HAVE_PF_PACKET_SOCKETS + /* + * If this is a cooked device, leave extra room for a +@@ -688,6 +1014,22 @@ + socklen_t len = sizeof (struct tpacket_stats); + #endif + ++#ifdef RING ++ if(handle->ring_fd > 0) { ++ stats->ps_recv = handle->slots_info->tot_read-ringStats.ps_recv; ++ stats->ps_drop = handle->slots_info->tot_lost-ringStats.ps_drop; ++ ++ printf("RING: numPollCalls=%d [%.1f packets/call]\n", ++ numPollCalls, (float)stats->ps_recv/(float)numPollCalls); ++ printf("RING: [tot_pkts=%u][tot_read=%u][tot_lost=%u]\n", ++ handle->slots_info->tot_pkts, ++ handle->slots_info->tot_read, ++ handle->slots_info->tot_lost); ++ ++ return(0); ++ } ++#endif ++ + #ifdef HAVE_TPACKET_STATS + /* + * Try to get the packet counts from the kernel. +@@ -879,6 +1221,11 @@ + } + } + ++ ++#ifdef RING ++ if(handle->ring_fd <= 0) can_filter_in_kernel = 0; ++#endif ++ + if (can_filter_in_kernel) { + if ((err = set_kernel_filter(handle, &fcode)) == 0) + { +@@ -1348,7 +1695,7 @@ + memset(&mr, 0, sizeof(mr)); + mr.mr_ifindex = device_id; + mr.mr_type = PACKET_MR_PROMISC; +- if (setsockopt(sock_fd, SOL_PACKET, ++ if (setsockopt(sock_fd, 0 /* SOL_PACKET */, + PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1) + { + snprintf(ebuf, PCAP_ERRBUF_SIZE, +@@ -1425,10 +1772,11 @@ + + /* Any pending errors, e.g., network is down? */ + +- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { +- snprintf(ebuf, PCAP_ERRBUF_SIZE, +- "getsockopt: %s", pcap_strerror(errno)); +- return -2; ++ if ((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1) ++ && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) { ++ snprintf(ebuf, PCAP_ERRBUF_SIZE, ++ "getsockopt: %s", pcap_strerror(errno)); ++ return -2; + } + + if (err > 0) { +@@ -1482,6 +1830,13 @@ + struct pcap *p, *prevp; + struct ifreq ifr; + ++#ifdef RING ++ if(handle->ring_buffer != NULL) { ++ munmap(handle->ring_buffer, handle->slots_info->tot_mem); ++ handle->ring_buffer = NULL; ++ } ++#endif ++ + if (handle->md.clear_promisc) { + /* + * We put the interface into promiscuous mode; take +@@ -1698,11 +2053,11 @@ + } + + /* Any pending errors, e.g., network is down? */ +- +- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { +- snprintf(ebuf, PCAP_ERRBUF_SIZE, +- "getsockopt: %s", pcap_strerror(errno)); +- return -1; ++ if((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1) ++ && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) { ++ snprintf(ebuf, PCAP_ERRBUF_SIZE, ++ "getsockopt: %s", pcap_strerror(errno)); ++ return -1; + } + + if (err > 0) { +@@ -1924,8 +2279,11 @@ + * the filtering done in userland even if it could have been + * done in the kernel. + */ +- if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, +- &total_fcode, sizeof(total_fcode)) == 0) { ++ printf("pcap[setsockopt(%d)]\n", 0); ++ if (setsockopt(handle->fd, 0 /* SOL_SOCKET */, ++ SO_ATTACH_FILTER, ++ &total_fcode, ++ sizeof(total_fcode)) == 0) { + char drain[1]; + + /* +@@ -1933,6 +2291,9 @@ + */ + total_filter_on = 1; + ++#ifdef RING ++ if(!handle->ring_fd) { ++#endif + /* + * Save the socket's current mode, and put it in + * non-blocking mode; we drain it by reading packets +@@ -1955,12 +2316,15 @@ + return -2; + } + } +- } ++#ifdef RING ++ } ++#endif ++} + + /* + * Now attach the new filter. + */ +- ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, ++ ret = setsockopt(handle->fd, 0 /* SOL_SOCKET */, SO_ATTACH_FILTER, + fcode, sizeof(*fcode)); + if (ret == -1 && total_filter_on) { + /* +@@ -1993,7 +2357,8 @@ + /* setsockopt() barfs unless it get a dummy parameter */ + int dummy; + +- return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, +- &dummy, sizeof(dummy)); ++ return setsockopt(handle->fd, handle->ring_fd > 0 ? PF_RING : SOL_SOCKET, ++ SO_DETACH_FILTER, ++ &dummy, sizeof(dummy)); + } + #endif -- cgit v1.2.3