summaryrefslogtreecommitdiffstats
path: root/obsolete-buildroot
diff options
context:
space:
mode:
authormbm <mbm@3c298f89-4303-0410-b956-a3cf2f4a3e73>2004-05-31 06:43:24 +0000
committermbm <mbm@3c298f89-4303-0410-b956-a3cf2f4a3e73>2004-05-31 06:43:24 +0000
commite934f65d803ce9309af3701a79ceca82a4fadcc8 (patch)
treeee916877ad89c8c5dc1b0aa565389ba36476e63a /obsolete-buildroot
parentcaa3383f5093fe2925c75a961db1f3d6f5e1b040 (diff)
nfs swap patch
git-svn-id: svn://svn.openwrt.org/openwrt/trunk@45 3c298f89-4303-0410-b956-a3cf2f4a3e73
Diffstat (limited to 'obsolete-buildroot')
-rw-r--r--obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch2362
1 files changed, 2362 insertions, 0 deletions
diff --git a/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch b/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch
new file mode 100644
index 000000000..bf848c129
--- /dev/null
+++ b/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch
@@ -0,0 +1,2362 @@
+diff -Nurb src/linux/linux.orig/Documentation/netswap.txt src/linux/linux/Documentation/netswap.txt
+--- src/linux/linux.orig/Documentation/netswap.txt 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/Documentation/netswap.txt 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,51 @@
++ Swapping over network
++
++Support for this is enabled via the CONFIG_NETSWAP option, which is
++automatically enabled when enabling swap files located on NFS volumes
++(CONFIG_SWAP_VIA_NFS).
++
++When swapping to files located on a network file system like NFS or
++CODA or others or to nbd (network block device, see `nbd.txt')
++partitions there is the problem that this requires additional memory,
++besides the page which is currently swapped in or out, probably at
++least two more pages for each page in question.
++
++This means that not only there needs to be free space left in the swap
++file or the swap partition, but in addition there must be enough free
++memory left in the system to perform the swap out of pages.
++
++This is particularly painful as receiving data over the network itself
++consumes memory, and this memory is allocated from an interrupt
++context (i.e. in the interrupt handler of the network card). That
++means that on a congested network there are chances that the machine
++runs out of memory, simply because the network device's interrupt
++routines allocate memory faster that it is freed by swapping via
++network.
++
++To cope with this problem, there is a new socket option `SO_SWAPPING'
++which has to be set on the `SOL_SOCKET' level with setsockopt() (see
++setsockopt(2)). When this option is set on any network socket, then
++the system will start to drop network packets it receives on any other
++socket when the number of free pages falls below a certain threshold.
++
++This threshold initially is 4 pages less than `freepages.min' (see
++`Documentation/sysctl/vm.txt') but can be tuned using the sysctl
++interface by writing to the file `/proc/sys/net/swapping/threshold'
++
++There are two other files:
++
++`/proc/sys/net/swapping/dropped':
++ how many network packets have been dropped so far. This file is
++ writable, writing to it simply sets the counter to the given value
++ (useful for resetting the counter).
++
++`/proc/sys/net/swapping/sock_count':
++ How many network sockets have the `SO_SWAPPING' option set (read
++ only, of course).
++
++When using swap-files on NFS volumes, then the `SO_SWAPPING' option is
++set or cleared by swapon/swapoff system calls, so the user need not
++care about it.
++
++Swapping over the network is insecure unless the data would be
++encrypted, which is not the case with NFS. It is also very slow.
+diff -Nurb src/linux/linux.orig/Documentation/nfsswap.txt src/linux/linux/Documentation/nfsswap.txt
+--- src/linux/linux.orig/Documentation/nfsswap.txt 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/Documentation/nfsswap.txt 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,41 @@
++ Swapping to files on NFS volumes
++
++To do this you have to say `Y' or `M' to the CONFIG_SWAP_VIA_NFS
++configuration option. When compling support for this as a module you
++should read `Documentation/modules.txt'. For auto-loading of the
++module during the `swapon' system call you have to place a line like
++
++alias swapfile-mod nfsswap
++
++in `/etc/modules.conf' (or `/etc/conf.modules', depending on your
++setup). NFS volumes holding swapfile should be mounted with `rsize'
++and `wsize' set to something less than the size of a page, otherwise
++deadlocks caused by memory fragmentation can happen, i.e. mount the
++volume which is to hold the swapfiles with
++
++mount -t nfs -o rsize=2048,wsize=2048 NFS_SERVER_IP:/server_volume /mount_point
++
++or set the option in `/etc/fstab'. Read `Documentation/nfsroot.txt' to
++learn how to set mount options for the root file system, if your swap
++files are to be located on the root file system.
++
++Setting the `rsize' and `wsize' to anything less than PAGE_SIZE is a
++performance hit, so you probably want to have at least two volumes
++mounted, one for the swapfiles, one for the rest.
++
++You may want to read `Documentation/netswap.txt' as well.
++
++Swapfiles on NFS volumes can be treated like any other swapfile,
++i.e.
++
++dd if=/dev/zero of=/swapfiles/SWAPFILE bs=1k count=20480
++mkswap /swapfiles/SWAPFILE
++swapon /swapfiles/SWAPFILE
++
++will create a 20M swapfile and tell the system to use it. Actually,
++one could use lseek(2) to create an empty swapfile. This is different
++from swapfiles located on local harddisk.
++
++Swapping over the network is insecure unless the data would be
++encrypted, which is not the case with NFS. It is also very slow.
++
+diff -Nurb src/linux/linux.orig/drivers/block/blkpg.c src/linux/linux/drivers/block/blkpg.c
+--- src/linux/linux.orig/drivers/block/blkpg.c 2003-07-04 04:11:31.000000000 -0400
++++ src/linux/linux/drivers/block/blkpg.c 2004-05-31 02:18:03.000000000 -0400
+@@ -34,7 +34,7 @@
+ #include <linux/blk.h> /* for set_device_ro() */
+ #include <linux/blkpg.h>
+ #include <linux/genhd.h>
+-#include <linux/swap.h> /* for is_swap_partition() */
++#include <linux/swap.h> /* for swap_run_test() */
+ #include <linux/module.h> /* for EXPORT_SYMBOL */
+
+ #include <asm/uaccess.h>
+@@ -114,6 +114,29 @@
+ return 0;
+ }
+
++/* swap_run_test() applies this hook to all swapfiles until it returns
++ * "1". If it never returns "1", the result of swap_run_test() is "0",
++ * otherwise "1".
++ */
++static int is_swap_partition_hook(unsigned int flags, struct file *swap_file,
++ void *testdata)
++{
++ kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode)
++ ? swap_file->f_dentry->d_inode->i_rdev : 0;
++ kdev_t dev = *((kdev_t *)testdata);
++
++ if (flags & SWP_USED && dev == swap_dev) {
++ return 1;
++ } else {
++ return 0;
++ }
++}
++
++static inline int is_swap_partition(kdev_t dev)
++{
++ return swap_run_test(is_swap_partition_hook, &dev);
++}
++
+ /*
+ * Delete a partition given by partition number
+ *
+diff -Nurb src/linux/linux.orig/fs/Config.in src/linux/linux/fs/Config.in
+--- src/linux/linux.orig/fs/Config.in 2004-05-31 02:02:43.000000000 -0400
++++ src/linux/linux/fs/Config.in 2004-05-31 02:18:03.000000000 -0400
+@@ -4,6 +4,12 @@
+ mainmenu_option next_comment
+ comment 'File systems'
+
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++ tristate 'Swapping to block devices' CONFIG_BLKDEV_SWAP
++else
++ define_bool CONFIG_BLKDEV_SWAP y
++fi
++
+ bool 'Quota support' CONFIG_QUOTA
+ tristate 'Kernel automounter support' CONFIG_AUTOFS_FS
+ tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS
+@@ -110,6 +116,12 @@
+ dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
+ dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
+ dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
++ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++ dep_tristate ' Swapping via NFS (EXPERIMENTAL)' CONFIG_SWAP_VIA_NFS $CONFIG_NFS_FS
++ if [ "$CONFIG_SWAP_VIA_NFS" = "y" -o "$CONFIG_SWAP_VIA_NFS" = "m" ]; then
++ define_bool CONFIG_NETSWAP y
++ fi
++ fi
+
+ dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
+ dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD
+diff -Nurb src/linux/linux.orig/fs/Makefile src/linux/linux/fs/Makefile
+--- src/linux/linux.orig/fs/Makefile 2004-05-31 02:02:42.000000000 -0400
++++ src/linux/linux/fs/Makefile 2004-05-31 02:18:03.000000000 -0400
+@@ -8,7 +8,7 @@
+ O_TARGET := fs.o
+
+ export-objs := filesystems.o open.o dcache.o buffer.o
+-mod-subdirs := nls
++mod-subdirs := nls nfs
+
+ obj-y := open.o read_write.o devices.o file_table.o buffer.o \
+ super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
+@@ -70,6 +70,7 @@
+ subdir-$(CONFIG_JFS_FS) += jfs
+ subdir-$(CONFIG_SQUASHFS) += squashfs
+
++obj-$(CONFIG_BLKDEV_SWAP) += blkdev_swap.o
+
+ obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
+ obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
+diff -Nurb src/linux/linux.orig/fs/blkdev_swap.c src/linux/linux/fs/blkdev_swap.c
+--- src/linux/linux.orig/fs/blkdev_swap.c 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/fs/blkdev_swap.c 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,309 @@
++/*
++ * Swapping to partitions or files located on partitions.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/locks.h>
++#include <linux/blkdev.h>
++#include <linux/pagemap.h>
++#include <linux/swap.h>
++#include <linux/fs.h>
++
++#ifdef DEBUG_BLKDEV_SWAP
++# define dprintk(fmt...) printk(##fmt)
++#else
++# define dprintk(fmt...) do { /* */ } while (0)
++#endif
++
++#define BLKDEV_SWAP_ID "blkdev"
++#define BLKDEV_FILE_SWAP_ID "blkdev file"
++
++/*
++ * Helper function, copied here from buffer.c
++ */
++
++/*
++ * Start I/O on a page.
++ * This function expects the page to be locked and may return
++ * before I/O is complete. You then have to check page->locked
++ * and page->uptodate.
++ *
++ * brw_page() is SMP-safe, although it's being called with the
++ * kernel lock held - but the code is ready.
++ *
++ * FIXME: we need a swapper_inode->get_block function to remove
++ * some of the bmap kludges and interface ugliness here.
++ */
++int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
++{
++ struct buffer_head *head, *bh;
++
++ if (!PageLocked(page))
++ panic("brw_page: page not locked for I/O");
++
++ if (!page->buffers)
++ create_empty_buffers(page, dev, size);
++ head = bh = page->buffers;
++
++ /* Stage 1: lock all the buffers */
++ do {
++ lock_buffer(bh);
++ bh->b_blocknr = *(b++);
++ set_bit(BH_Mapped, &bh->b_state);
++ set_buffer_async_io(bh);
++ bh = bh->b_this_page;
++ } while (bh != head);
++
++ /* Stage 2: start the IO */
++ do {
++ struct buffer_head *next = bh->b_this_page;
++ submit_bh(rw, bh);
++ bh = next;
++ } while (bh != head);
++ return 0;
++}
++
++/*
++ * We implement to methods: swapping to partitions, and swapping to files
++ * located on partitions.
++ */
++
++struct blkdev_swap_data {
++ kdev_t dev;
++};
++
++struct test_data {
++ struct file * filp;
++ kdev_t dev;
++};
++
++static int is_blkdev_swapping(unsigned int flags,
++ struct file * swapf,
++ void *data)
++{
++ struct test_data *testdata = (struct test_data *) data;
++ struct file * filp = testdata->filp;
++ kdev_t dev = testdata->dev;
++
++ /* Only check filp's that don't match the one already opened
++ * for us by sys_swapon(). Otherwise, we will always flag a
++ * busy swap file.
++ */
++
++ if (swapf != filp) {
++ if (dev == swapf->f_dentry->d_inode->i_rdev)
++ return 1;
++ }
++ return 0;
++}
++
++static int blkdev_swap_open(struct file * filp, void **dptr)
++{
++ int swapfilesize;
++ kdev_t dev;
++ struct blkdev_swap_data *data;
++ int error;
++ struct test_data testdata;
++
++ MOD_INC_USE_COUNT;
++
++ if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) {
++ dprintk(__FUNCTION__": can't handle this swap file: %s\n",
++ swapf->d_name.name);
++ error = 0; /* not for us */
++ goto bad_swap;
++ }
++
++ dev = filp->f_dentry->d_inode->i_rdev;
++ set_blocksize(dev, PAGE_SIZE);
++ error = -ENODEV;
++ if (!dev ||
++ (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) {
++ printk("blkdev_swap_open: blkdev weirdness for %s\n",
++ filp->f_dentry->d_name.name);
++ goto bad_swap;
++ }
++
++ /* Check to make sure that we aren't already swapping. */
++ error = -EBUSY;
++ testdata.filp = filp;
++ testdata.dev = dev;
++ if (swap_run_test(is_blkdev_swapping, &testdata)) {
++ printk("blkdev_swap_open: already swapping to %s\n",
++ filp->f_dentry->d_name.name);
++ goto bad_swap;
++ }
++
++ swapfilesize = 0;
++ if (blk_size[MAJOR(dev)])
++ swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
++ >> (PAGE_SHIFT - 10);
++
++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
++ printk("blkdev_swap_open: can't allocate data for %s\n",
++ filp->f_dentry->d_name.name);
++ error = -ENOMEM;
++ goto bad_swap;
++ }
++ data->dev = dev;
++ *dptr = data;
++
++ dprintk("blkdev_swap_open: returning %d\n", swapfilesize);
++ return swapfilesize;
++
++ bad_swap:
++ MOD_DEC_USE_COUNT;
++ return error; /* this swap thing is not for us */
++}
++
++static int blkdev_swap_release(struct file * filp, void *data)
++{
++ dprintk("blkdev_swap_release: releasing swap device %s\n",
++ filp->f_dentry->d_name.name);
++ kfree(data);
++ MOD_DEC_USE_COUNT;
++ return 0;
++}
++
++static int blkdev_rw_page(int rw, struct page *page, unsigned long offset,
++ void *ptr)
++{
++ struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr;
++ brw_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE);
++ return 1;
++}
++
++static struct swap_ops blkdev_swap_ops = {
++ blkdev_swap_open,
++ blkdev_swap_release,
++ blkdev_rw_page
++};
++
++struct blkdevfile_swap_data {
++ struct inode *swapf;
++};
++
++static int is_blkdevfile_swapping(unsigned int flags,
++ struct file * swapf,
++ void * data)
++{
++ struct file * filp = (struct file *) data;
++
++ /* Only check filp's that don't match the one already opened
++ * for us by sys_swapon(). Otherwise, we will always flag a
++ * busy swap file.
++ */
++
++ if (swapf != filp) {
++ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
++ return 1;
++ }
++ return 0;
++}
++
++static int blkdevfile_swap_open(struct file *swapf, void **dptr)
++{
++ int error = 0;
++ int swapfilesize;
++ struct blkdevfile_swap_data *data;
++
++ MOD_INC_USE_COUNT;
++
++ /* first check whether this is a regular file located on a local
++ * hard disk
++ */
++ if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) {
++ dprintk("blkdevfile_swap_open: "
++ "can't handle this swap file: %s\n",
++ swapf->d_name.name);
++ error = 0; /* not for us */
++ goto bad_swap;
++ }
++ if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) {
++ dprintk("blkdevfile_swap_open: no bmap for file: %s\n",
++ swapf->d_name.name);
++ error = 0; /* not for us */
++ goto bad_swap;
++ }
++
++ if (swap_run_test(is_blkdevfile_swapping, swapf)) {
++ dprintk("blkdevfile_swap_open: already swapping to %s\n",
++ swapf->d_name.name);
++ error = -EBUSY;
++ goto bad_swap;
++ }
++ swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT;
++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
++ error = -ENOMEM;
++ goto bad_swap;
++ }
++ data->swapf = swapf->f_dentry->d_inode;
++ *dptr = data;
++ return swapfilesize;
++
++ bad_swap:
++ MOD_DEC_USE_COUNT;
++ return error;
++}
++
++static int blkdevfile_swap_release(struct file *swapf, void *data)
++{
++ kfree(data);
++ MOD_DEC_USE_COUNT;
++ return 0;
++}
++
++static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset,
++ void *ptr)
++{
++ struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr;
++ struct inode * swapf = data->swapf;
++ int i, j;
++ unsigned int block = offset
++ << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
++ kdev_t dev = swapf->i_dev;
++ int block_size;
++ int zones[PAGE_SIZE/512];
++ int zones_used;
++
++ block_size = swapf->i_sb->s_blocksize;
++ for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
++ if (!(zones[i] = bmap(swapf,block++))) {
++ printk("blkdevfile_rw_page: bad swap file\n");
++ return 0;
++ }
++ zones_used = i;
++
++ /* block_size == PAGE_SIZE/zones_used */
++ brw_page(rw, page, dev, zones, block_size);
++ return 1;
++}
++
++static struct swap_ops blkdevfile_swap_ops = {
++ blkdevfile_swap_open,
++ blkdevfile_swap_release,
++ blkdevfile_rw_page
++ };
++
++int __init blkdev_swap_init(void)
++{
++ (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops);
++ (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops);
++ return 0;
++}
++
++void __exit blkdev_swap_exit(void)
++{
++ unregister_swap_method(BLKDEV_SWAP_ID);
++ unregister_swap_method(BLKDEV_FILE_SWAP_ID);
++}
++
++module_init(blkdev_swap_init)
++module_exit(blkdev_swap_exit)
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Many. Stuffed into a module by cH (Claus-Justus Heine)");
++MODULE_DESCRIPTION("Swapping to partitions and files on local hard-disks");
+diff -Nurb src/linux/linux.orig/fs/buffer.c src/linux/linux/fs/buffer.c
+--- src/linux/linux.orig/fs/buffer.c 2003-07-04 04:12:05.000000000 -0400
++++ src/linux/linux/fs/buffer.c 2004-05-31 02:21:05.000000000 -0400
+@@ -743,7 +743,7 @@
+ bh->b_private = private;
+ }
+
+-static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
++void end_buffer_io_async(struct buffer_head * bh, int uptodate)
+ {
+ static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
+ unsigned long flags;
+@@ -2344,35 +2344,6 @@
+ return err;
+ }
+
+-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
+-{
+- struct buffer_head *head, *bh;
+-
+- if (!PageLocked(page))
+- panic("brw_page: page not locked for I/O");
+-
+- if (!page->buffers)
+- create_empty_buffers(page, dev, size);
+- head = bh = page->buffers;
+-
+- /* Stage 1: lock all the buffers */
+- do {
+- lock_buffer(bh);
+- bh->b_blocknr = *(b++);
+- set_bit(BH_Mapped, &bh->b_state);
+- set_buffer_async_io(bh);
+- bh = bh->b_this_page;
+- } while (bh != head);
+-
+- /* Stage 2: start the IO */
+- do {
+- struct buffer_head *next = bh->b_this_page;
+- submit_bh(rw, bh);
+- bh = next;
+- } while (bh != head);
+- return 0;
+-}
+-
+ int block_symlink(struct inode *inode, const char *symname, int len)
+ {
+ struct address_space *mapping = inode->i_mapping;
+diff -Nurb src/linux/linux.orig/fs/nfs/Makefile src/linux/linux/fs/nfs/Makefile
+--- src/linux/linux.orig/fs/nfs/Makefile 2003-07-04 04:12:07.000000000 -0400
++++ src/linux/linux/fs/nfs/Makefile 2004-05-31 02:18:03.000000000 -0400
+@@ -15,6 +15,14 @@
+ obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
+ obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
+
+-obj-m := $(O_TARGET)
++obj-$(CONFIG_SWAP_VIA_NFS) += nfsswap.o
++ifeq ($(CONFIG_SWAP_VIA_NFS),m)
++export-objs := nfs_syms.o
++obj-y += nfs_syms.o
++endif
++
++ifeq ($(CONFIG_NFS_FS),m)
++obj-m += $(O_TARGET)
++endif
+
+ include $(TOPDIR)/Rules.make
+diff -Nurb src/linux/linux.orig/fs/nfs/file.c src/linux/linux/fs/nfs/file.c
+--- src/linux/linux.orig/fs/nfs/file.c 2003-07-04 04:12:07.000000000 -0400
++++ src/linux/linux/fs/nfs/file.c 2004-05-31 02:18:03.000000000 -0400
+@@ -58,11 +58,6 @@
+ setattr: nfs_notify_change,
+ };
+
+-/* Hack for future NFS swap support */
+-#ifndef IS_SWAPFILE
+-# define IS_SWAPFILE(inode) (0)
+-#endif
+-
+ /*
+ * Flush all dirty pages, and check for write errors.
+ *
+@@ -217,8 +212,6 @@
+ inode->i_ino, (unsigned long) count, (unsigned long) *ppos);
+
+ result = -EBUSY;
+- if (IS_SWAPFILE(inode))
+- goto out_swapfile;
+ result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (result)
+ goto out;
+@@ -230,10 +223,6 @@
+ result = generic_file_write(file, buf, count, ppos);
+ out:
+ return result;
+-
+-out_swapfile:
+- printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
+- goto out;
+ }
+
+ /*
+diff -Nurb src/linux/linux.orig/fs/nfs/nfs_syms.c src/linux/linux/fs/nfs/nfs_syms.c
+--- src/linux/linux.orig/fs/nfs/nfs_syms.c 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/fs/nfs/nfs_syms.c 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,10 @@
++#include <linux/config.h>
++#define __NO_VERSION__
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/nfs_fs.h>
++
++EXPORT_SYMBOL(__nfs_refresh_inode);
++EXPORT_SYMBOL(nfs_write_attributes);
++
+diff -Nurb src/linux/linux.orig/fs/nfs/nfsswap.c src/linux/linux/fs/nfs/nfsswap.c
+--- src/linux/linux.orig/fs/nfs/nfsswap.c 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/fs/nfs/nfsswap.c 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,350 @@
++/*
++ * Swapping to files located on NFS mounted volumes
++ * Copyright (c) 2000 Claus-Justus Heine
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/swap.h>
++#include <linux/pagemap.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/socket.h>
++#include <linux/smp_lock.h>
++#include <net/netswapping.h>
++#include <net/sock.h>
++
++#include <linux/sunrpc/clnt.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_fs_sb.h>
++#include <asm/uaccess.h>
++
++#define NFSDBG_FACILITY NFSDBG_SWAP
++
++#define NFS_SWAP_ID "nfs file"
++
++/* we cache some values here. In principle, we only need the file.
++ */
++struct nfs_swap_data {
++ struct file *file;
++ struct inode *inode;
++ struct nfs_server *server;
++ struct socket *socket;
++};
++
++/* Nearly a clone of nfs_readpage_sync() in read.c, but "struct page" does not
++ * contain information about the file offset when swapping. So.
++ */
++static int nfs_read_swap_page(struct page *page,
++ struct nfs_server *server,
++ struct inode *inode,
++ struct file *file)
++{
++ unsigned int rsize = server->rsize;
++ unsigned int count = PAGE_SIZE;
++ unsigned int offset = 0; /* always at start of page */
++ int result, eof;
++ struct rpc_cred *cred;
++ struct nfs_fattr fattr;
++
++ cred = nfs_file_cred(file);
++
++ do {
++ if (count < rsize)
++ rsize = count;
++
++ lock_kernel();
++ result = NFS_PROTO(inode)->read(inode, cred,
++ &fattr,
++ NFS_RPC_SWAPFLAGS,
++ offset, rsize, page, &eof);
++ nfs_refresh_inode(inode, &fattr);
++ unlock_kernel();
++
++ /*
++ * Even if we had a partial success we can't mark the page
++ * cache valid.
++ */
++ if (result < 0) {
++ if (result == -EISDIR)
++ result = -EINVAL;
++ goto io_error;
++ }
++ count -= result;
++ offset += result;
++ if (result < rsize) /* NFSv2ism */
++ break;
++ } while (count);
++
++ if (count) {
++ char *kaddr = kmap(page);
++ memset(kaddr + offset, 0, count);
++ kunmap(page);
++ }
++ flush_dcache_page(page);
++ result = 0;
++
++io_error:
++ return result;
++}
++
++/* Like nfs_writepage_sync(), but when swapping page->index does not encode
++ * the offset in the swap file alone.
++ *
++ */
++static int nfs_write_swap_page(struct page *page,
++ struct nfs_server *server,
++ struct inode *inode,
++ struct file *file)
++{
++ struct rpc_cred *cred;
++ unsigned int wsize = server->wsize;
++ unsigned int count = PAGE_SIZE;
++ unsigned int offset = 0;
++ int result;
++ struct nfs_writeverf verf;
++ struct nfs_fattr fattr;
++
++ cred = nfs_file_cred(file);
++
++ do {
++ if (count < wsize)
++ wsize = count;
++
++ lock_kernel();
++ result = NFS_PROTO(inode)->write(inode, cred, &fattr,
++ NFS_RW_SWAP|NFS_RW_SYNC,
++ offset, wsize, page, &verf);
++ nfs_write_attributes(inode, &fattr);
++ unlock_kernel();
++
++ if (result < 0) {
++ goto io_error;
++ }
++ if (result != wsize)
++ printk("NFS: short write, wsize=%u, result=%d\n",
++ wsize, result);
++ offset += wsize;
++ count -= wsize;
++ /*
++ * If we've extended the file, update the inode
++ * now so we don't invalidate the cache.
++ */
++ if (offset > inode->i_size)
++ inode->i_size = offset;
++ } while (count);
++
++ result = 0;
++
++io_error:
++
++ return result;
++}
++
++/* Unluckily (for us) form 2.4.19 -> 2.4.20 the nfs-proc's where
++ * changed and expect now a proper file-mapping page, where index
++ * encodes the offset alone.
++ *
++ * What we do: we save the original value of page->index, initialize
++ * page->index to what the NFS/sun-rpc subsystem expects and restore
++ * the index later.
++ */
++static int nfs_rw_swap_page(int rw, struct page *page,
++ unsigned long offset, void *dptr)
++{
++ int error;
++ struct nfs_swap_data *data = dptr;
++ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
++ unsigned long page_index;
++
++ if (!PageLocked(page))
++ panic("nfs_rw_swap_page: page not locked for I/O");
++
++ /* prevent memory deadlocks */
++ if (!(current->flags & PF_MEMALLOC)) {
++ dprintk("nfs_rw_swap_page: Setting PF_MEMALLOC\n");
++ }
++ current->flags |= PF_MEMALLOC;
++
++ /* now tweak the page->index field ... */
++ page_index = page->index;
++ page->index = ((loff_t)offset*(loff_t)PAGE_SIZE) >> PAGE_CACHE_SHIFT;
++
++ if (rw == WRITE) {
++ error = nfs_write_swap_page(page,
++ data->server,
++ data->inode,
++ data->file);
++ } else {
++ error = nfs_read_swap_page(page,
++ data->server,
++ data->inode,
++ data->file);
++ }
++
++ if (!alloc_flag) {
++ current->flags &= ~PF_MEMALLOC;
++ }
++
++ /* now restore the page->index field ... */
++ page->index = page_index;
++
++ if (error) {
++ /* Must mark the page invalid after I/O error */
++ SetPageError(page);
++ ClearPageUptodate(page);
++ } else {
++ ClearPageError(page);
++ SetPageUptodate(page);
++ }
++
++ if (!error) { /* in case of an error rw_swap_page() likes to unlock
++ * itself.
++ */
++ UnlockPage(page);
++ }
++
++ return error < 0 ? 0 : 1;
++}
++
++static int is_nfsfile_swapping(unsigned int flags,
++ struct file * swapf,
++ void * data)
++{
++ struct file * filp = (struct file *) data;
++
++ /* Only check filp's that don't match the one already opened
++ * for us by sys_swapon(). Otherwise, we will always flag a
++ * busy swap file.
++ */
++
++ if (swapf != filp) {
++ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
++ return 1;
++ }
++ return 0;
++}
++
++static int nfs_swap_open(struct file *swapf, void **dptr)
++{
++ int error = 0;
++ int swapfilesize;
++ struct nfs_swap_data *data;
++ int on = 1;
++ mm_segment_t fs;
++ struct inode *inode = swapf->f_dentry->d_inode;
++
++ MOD_INC_USE_COUNT;
++
++ if (!S_ISREG(inode->i_mode)) {
++ dprintk("nfs_swap_open: can't handle this swap file: %s\n",
++ swapf->f_dentry->d_name.name);
++ error = 0; /* not for us */
++ goto bad_swap;
++ }
++ /* determine whether this file really is located on an NFS mounted
++ * volume
++ */
++ if (!inode->i_sb || inode->i_sb->s_magic != NFS_SUPER_MAGIC) {
++ dprintk("nfs_swap_open: %s is not an NFS file.\n",
++ swapf->f_dentry->d_name.name);
++ error = 0; /* not for us */
++ goto bad_swap;
++ }
++
++ if (swap_run_test(is_nfsfile_swapping, swapf)) {
++ dprintk("nfs_swap_open: already swapping to %s\n",
++ swapf->f_dentry->d_name.name);
++ error = -EBUSY;
++ goto bad_swap;
++ }
++ swapfilesize = inode->i_size >> PAGE_SHIFT;
++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
++ error = -ENOMEM;
++ goto bad_swap;
++ }
++ data->file = swapf;
++ data->inode = inode;
++ data->server = NFS_SERVER(inode);
++ data->socket = data->server->client->cl_xprt->sock;
++
++ /* set socket option SO_SWAPPING */
++ fs = get_fs();
++ set_fs(KERNEL_DS);
++ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
++ (char *)&on, sizeof(on));
++ set_fs(fs);
++ if (error) {
++ dprintk("nfs_swap_open: error setting SO_SWAPPING\n");
++ goto bad_swap_2;
++ }
++
++ *dptr = data;
++ return swapfilesize;
++
++ bad_swap_2:
++ kfree(data);
++ bad_swap:
++ MOD_DEC_USE_COUNT;
++ return error;
++}
++
++static int nfs_swap_release(struct file *swapf, void *dptr)
++{
++ struct nfs_swap_data *data = (struct nfs_swap_data *)dptr;
++ int off = 0;
++ mm_segment_t fs;
++ int error;
++
++#if 1
++ if (swapf != data->file ||
++ swapf->f_dentry->d_inode != data->inode ||
++ !swapf->f_dentry->d_inode->i_sb ||
++ swapf->f_dentry->d_inode->i_sb->s_magic != NFS_SUPER_MAGIC ||
++ NFS_SERVER(swapf->f_dentry->d_inode) != data->server ||
++ data->socket != data->server->client->cl_xprt->sock) {
++ panic("nfs_swap_release: nfs swap data messed up");
++ }
++#endif
++
++ /* remove socket option SO_SWAPPING */
++ fs = get_fs();
++ set_fs(KERNEL_DS);
++ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
++ (char *)&off, sizeof(off));
++ set_fs(fs);
++ if (error) {
++ dprintk("nfs_swap_open: error clearing SO_SWAPPING\n");
++ }
++ kfree(data);
++ MOD_DEC_USE_COUNT;
++ return error;
++}
++
++static struct swap_ops nfs_swap_ops = {
++ open: nfs_swap_open,
++ release: nfs_swap_release,
++ rw_page: nfs_rw_swap_page
++};
++
++int __init nfs_swap_init(void)
++{
++ (void)register_swap_method(NFS_SWAP_ID, &nfs_swap_ops);
++ return 0;
++}
++
++void __exit nfs_swap_exit(void)
++{
++ unregister_swap_method(NFS_SWAP_ID);
++}
++
++module_init(nfs_swap_init)
++module_exit(nfs_swap_exit)
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("(c) 1996-2002 cH (Claus-Justus Heine)");
++MODULE_DESCRIPTION("Swapping to files located on volumes mounted via NFS");
+diff -Nurb src/linux/linux.orig/fs/nfs/read.c src/linux/linux/fs/nfs/read.c
+--- src/linux/linux.orig/fs/nfs/read.c 2003-07-04 04:12:08.000000000 -0400
++++ src/linux/linux/fs/nfs/read.c 2004-05-31 02:18:03.000000000 -0400
+@@ -50,11 +50,6 @@
+ */
+ static void nfs_readpage_result(struct rpc_task *task);
+
+-/* Hack for future NFS swap support */
+-#ifndef IS_SWAPFILE
+-# define IS_SWAPFILE(inode) (0)
+-#endif
+-
+ static kmem_cache_t *nfs_rdata_cachep;
+
+ static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
+@@ -92,7 +87,6 @@
+ int rsize = NFS_SERVER(inode)->rsize;
+ int result;
+ int count = PAGE_CACHE_SIZE;
+- int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
+ int eof;
+
+ dprintk("NFS: nfs_readpage_sync(%p)\n", page);
+@@ -114,7 +108,7 @@
+ offset, rsize, page);
+
+ lock_kernel();
+- result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
++ result = NFS_PROTO(inode)->read(inode, cred, &fattr, 0,
+ offset, rsize, page, &eof);
+ nfs_refresh_inode(inode, &fattr);
+ unlock_kernel();
+@@ -246,7 +240,7 @@
+ task = &data->task;
+
+ /* N.B. Do we need to test? Never called for swapfile inode */
+- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
++ flags = RPC_TASK_ASYNC;
+
+ nfs_read_rpcsetup(head, data);
+
+@@ -476,8 +470,6 @@
+ }
+
+ error = nfs_readpage_sync(file, inode, page);
+- if (error < 0 && IS_SWAPFILE(inode))
+- printk("Aiee.. nfs swap-in of page failed!\n");
+ out:
+ return error;
+
+diff -Nurb src/linux/linux.orig/fs/nfs/write.c src/linux/linux/fs/nfs/write.c
+--- src/linux/linux.orig/fs/nfs/write.c 2003-07-04 04:12:08.000000000 -0400
++++ src/linux/linux/fs/nfs/write.c 2004-05-31 02:20:47.000000000 -0400
+@@ -3,7 +3,6 @@
+ #include <linux/config.h>
+ #include <linux/types.h>
+ #include <linux/slab.h>
+-#include <linux/swap.h>
+ #include <linux/pagemap.h>
+ #include <linux/file.h>
+
+@@ -46,11 +45,6 @@
+ static void nfs_commit_done(struct rpc_task *);
+ #endif
+
+-/* Hack for future NFS swap support */
+-#ifndef IS_SWAPFILE
+-# define IS_SWAPFILE(inode) (0)
+-#endif
+-
+ static kmem_cache_t *nfs_wdata_cachep;
+
+ static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
+@@ -82,7 +76,7 @@
+ * For the moment, we just call nfs_refresh_inode().
+ */
+ static __inline__ int
+-nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
++__nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
+ {
+ if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
+ fattr->pre_size = NFS_CACHE_ISIZE(inode);
+@@ -93,6 +87,11 @@
+ return nfs_refresh_inode(inode, fattr);
+ }
+
++int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
++{
++ return __nfs_write_attributes(inode, fattr);
++}
++
+ /*
+ * Write a page synchronously.
+ * Offset is the data offset within the page.
+@@ -104,8 +103,7 @@
+ struct rpc_cred *cred = NULL;
+ loff_t base;
+ unsigned int wsize = NFS_SERVER(inode)->wsize;
+- int result, refresh = 0, written = 0, flags;
+- u8 *buffer;
++ int result, refresh = 0, written = 0;
+ struct nfs_fattr fattr;
+ struct nfs_writeverf verf;
+
+@@ -121,15 +119,14 @@
+
+ base = page_offset(page) + offset;
+
+- flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
+-
+ do {
+- if (count < wsize && !IS_SWAPFILE(inode))
++ if (count < wsize)
+ wsize = count;
+
+- result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
++ result = NFS_PROTO(inode)->write(inode, cred, &fattr,
++ NFS_RW_SYNC,
+ offset, wsize, page, &verf);
+- nfs_write_attributes(inode, &fattr);
++ __nfs_write_attributes(inode, &fattr);
+
+ if (result < 0) {
+ /* Must mark the page invalid after I/O error */
+@@ -140,7 +137,6 @@
+ printk("NFS: short write, wsize=%u, result=%d\n",
+ wsize, result);
+ refresh = 1;
+- buffer += wsize;
+ base += wsize;
+ offset += wsize;
+ written += wsize;
+@@ -979,7 +975,7 @@
+ }
+ #endif
+
+- nfs_write_attributes(inode, resp->fattr);
++ __nfs_write_attributes(inode, resp->fattr);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+@@ -1133,7 +1129,7 @@
+ if (nfs_async_handle_jukebox(task))
+ return;
+
+- nfs_write_attributes(inode, resp->fattr);
++ __nfs_write_attributes(inode, resp->fattr);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+diff -Nurb src/linux/linux.orig/include/linux/fs.h src/linux/linux/include/linux/fs.h
+--- src/linux/linux.orig/include/linux/fs.h 2004-05-31 02:06:19.000000000 -0400
++++ src/linux/linux/include/linux/fs.h 2004-05-31 02:18:03.000000000 -0400
+@@ -1500,6 +1500,10 @@
+ extern int inode_change_ok(struct inode *, struct iattr *);
+ extern int inode_setattr(struct inode *, struct iattr *);
+
++/* for swapping to block devices */
++void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize);
++void end_buffer_io_async(struct buffer_head * bh, int uptodate);
++
+ /*
+ * Common dentry functions for inclusion in the VFS
+ * or in other stackable file systems. Some of these
+diff -Nurb src/linux/linux.orig/include/linux/nfs_fs.h src/linux/linux/include/linux/nfs_fs.h
+--- src/linux/linux.orig/include/linux/nfs_fs.h 2004-05-31 02:06:28.000000000 -0400
++++ src/linux/linux/include/linux/nfs_fs.h 2004-05-31 02:18:03.000000000 -0400
+@@ -40,8 +40,8 @@
+ */
+ #define NFS_MAX_DIRCACHE 16
+
+-#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
+-#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
++#define NFS_MAX_FILE_IO_BUFFER_SIZE (8*PAGE_SIZE)
++#define NFS_DEF_FILE_IO_BUFFER_SIZE PAGE_SIZE
+
+ /*
+ * The upper limit on timeouts for the exponential backoff algorithm.
+@@ -205,6 +205,8 @@
+ extern int nfs_writepage(struct page *);
+ extern int nfs_flush_incompatible(struct file *file, struct page *page);
+ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++extern int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr);
++
+ /*
+ * Try to write back everything synchronously (but check the
+ * return value!)
+@@ -375,6 +377,7 @@
+ #define NFSDBG_XDR 0x0020
+ #define NFSDBG_FILE 0x0040
+ #define NFSDBG_ROOT 0x0080
++#define NFSDBG_SWAP 0x0100
+ #define NFSDBG_ALL 0xFFFF
+
+ #ifdef __KERNEL__
+diff -Nurb src/linux/linux.orig/include/linux/slab.h src/linux/linux/include/linux/slab.h
+--- src/linux/linux.orig/include/linux/slab.h 2004-05-31 02:06:19.000000000 -0400
++++ src/linux/linux/include/linux/slab.h 2004-05-31 02:18:03.000000000 -0400
+@@ -39,6 +39,7 @@
+ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
+ #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
+ #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
++#define SLAB_LOW_GFP_ORDER 0x00010000UL /* use as low a gfp order as possible */
+
+ /* flags passed to a constructor func */
+ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
+diff -Nurb src/linux/linux.orig/include/linux/swap.h src/linux/linux/include/linux/swap.h
+--- src/linux/linux.orig/include/linux/swap.h 2004-05-31 02:06:19.000000000 -0400
++++ src/linux/linux/include/linux/swap.h 2004-05-31 02:18:03.000000000 -0400
+@@ -58,15 +58,29 @@
+ #define SWAP_MAP_MAX 0x7fff
+ #define SWAP_MAP_BAD 0x8000
+
++struct swap_ops {
++ int (*open)(struct file *swapf, void **data);
++ int (*release)(struct file *swapf, void *data);
++ int (*rw_page)(int rw,
++ struct page *page, unsigned long offset, void *data);
++};
++
++struct swap_method {
++ struct swap_method *next;
++ char * name;
++ struct swap_ops *ops;
++ int use_count;
++};
++
+ /*
+ * The in-memory structure used to track swap areas.
+ */
+ struct swap_info_struct {
+ unsigned int flags;
+- kdev_t swap_device;
++ struct file *swap_file;
++ struct swap_method *method;
++ void *data;
+ spinlock_t sdev_lock;
+- struct dentry * swap_file;
+- struct vfsmount *swap_vfsmnt;
+ unsigned short * swap_map;
+ unsigned int lowest_bit;
+ unsigned int highest_bit;
+@@ -141,11 +155,15 @@
+ extern int total_swap_pages;
+ extern unsigned int nr_swapfiles;
+ extern struct swap_info_struct swap_info[];
+-extern int is_swap_partition(kdev_t);
++extern int register_swap_method(char *name, struct swap_ops *ops);
++extern int unregister_swap_method(char *name);
++extern int swap_run_test(int (*test_fct)(unsigned int flags,
++ struct file *swap_file,
++ void *testdata), void *testdata);
+ extern void si_swapinfo(struct sysinfo *);
+ extern swp_entry_t get_swap_page(void);
+-extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
+- struct inode **);
++struct swap_method *get_swaphandle_info(swp_entry_t entry,
++ unsigned long *offset, void **data);
+ extern int swap_duplicate(swp_entry_t);
+ extern int swap_count(struct page *);
+ extern int valid_swaphandles(swp_entry_t, unsigned long *);
+diff -Nurb src/linux/linux.orig/include/net/netswapping.h src/linux/linux/include/net/netswapping.h
+--- src/linux/linux.orig/include/net/netswapping.h 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/include/net/netswapping.h 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,47 @@
++#ifndef _LINUX_NETSWAPPING_H
++#define _LINUX_NETSWAPPING_H
++
++#include <linux/swap.h>
++#include <linux/init.h>
++
++/* It is a mess. Socket options are defined in asm-ARCH/socket.h */
++
++#define SO_SWAPPING 0x00100000 /* hopefully not used by anybody else */
++
++#ifdef __KERNEL__
++
++#define CTL_NETSWAP 0x00100000
++
++enum {
++ NET_SWAP_DROPPED = 1,
++ NET_SWAP_DROP_THRESHOLD = 2,
++ NET_SWAP_SOCK_COUNT = 3
++};
++
++extern unsigned int netswap_free_pages_min;
++extern int netswap_sock_count;
++extern unsigned int netswap_dropped;
++
++/* this is "#defined" and not inline because sock.h includes us, but we need
++ * the "struct sock" definition.
++ */
++#define netswap_low_memory(sk, skb) \
++({ \
++ int _ret = 0; \
++ \
++ if (netswap_sock_count > 0 && /* anybody swapping via network? */ \
++ !(sk)->swapping && /* but we are not needed for swapping */ \
++ nr_free_pages() < netswap_free_pages_min) { /* so drop us */ \
++ printk("netswap_low_memory: " \
++ "dropping skb 0x%p@0x%p\n", skb, sk); \
++ netswap_dropped ++; \
++ _ret = 1; \
++ } \
++ _ret; \
++})
++
++extern int __init netswap_init(void);
++
++#endif
++
++#endif
+diff -Nurb src/linux/linux.orig/include/net/sock.h src/linux/linux/include/net/sock.h
+--- src/linux/linux.orig/include/net/sock.h 2004-05-31 02:07:17.000000000 -0400
++++ src/linux/linux/include/net/sock.h 2004-05-31 02:18:03.000000000 -0400
+@@ -103,6 +103,10 @@
+ #include <linux/filter.h>
+ #endif
+
++#ifdef CONFIG_NETSWAP
++#include <net/netswapping.h>
++#endif
++
+ #include <asm/atomic.h>
+ #include <net/dst.h>
+
+@@ -536,6 +540,12 @@
+ no_check,
+ broadcast,
+ bsdism;
++#ifdef CONFIG_NETSWAP
++ /* Increased by SO_SWAPPING with arg != 0, decreased by
++ * SO_SWAPPING with arg 0
++ */
++ int swapping;
++#endif
+ unsigned char debug;
+ unsigned char rcvtstamp;
+ unsigned char use_write_queue;
+@@ -1165,6 +1175,11 @@
+ return err; /* Toss packet */
+ }
+ #endif /* CONFIG_FILTER */
++#ifdef CONFIG_NETSWAP
++ /* an inline function defined in net/netswapping.h */
++ if (netswap_low_memory(sk, skb))
++ return -ENOMEM;
++#endif /* CONFIG_NETSWAP */
+
+ skb->dev = NULL;
+ skb_set_owner_r(skb, sk);
+diff -Nurb src/linux/linux.orig/kernel/ksyms.c src/linux/linux/kernel/ksyms.c
+--- src/linux/linux.orig/kernel/ksyms.c 2004-05-31 02:02:43.000000000 -0400
++++ src/linux/linux/kernel/ksyms.c 2004-05-31 02:18:03.000000000 -0400
+@@ -41,6 +41,7 @@
+ #include <linux/mm.h>
+ #include <linux/capability.h>
+ #include <linux/highuid.h>
++#include <linux/swapctl.h>
+ #include <linux/brlock.h>
+ #include <linux/fs.h>
+ #include <linux/tty.h>
+@@ -127,6 +128,11 @@
+ EXPORT_SYMBOL(kmap_prot);
+ EXPORT_SYMBOL(kmap_pte);
+ #endif
++EXPORT_SYMBOL(nr_free_pages);
++/* EXPORT_SYMBOL(freepages); */
++EXPORT_SYMBOL(register_swap_method);
++EXPORT_SYMBOL(unregister_swap_method);
++EXPORT_SYMBOL(swap_run_test);
+
+ /* filesystem internal functions */
+ EXPORT_SYMBOL(def_blk_fops);
+@@ -531,7 +537,7 @@
+ EXPORT_SYMBOL(make_bad_inode);
+ EXPORT_SYMBOL(is_bad_inode);
+ EXPORT_SYMBOL(event);
+-EXPORT_SYMBOL(brw_page);
++EXPORT_SYMBOL(end_buffer_io_async);
+ EXPORT_SYMBOL(__inode_dir_notify);
+
+ #ifdef CONFIG_UID16
+diff -Nurb src/linux/linux.orig/mm/page_io.c src/linux/linux/mm/page_io.c
+--- src/linux/linux.orig/mm/page_io.c 2003-07-04 04:12:29.000000000 -0400
++++ src/linux/linux/mm/page_io.c 2004-05-31 02:18:03.000000000 -0400
+@@ -36,11 +36,8 @@
+ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
+ {
+ unsigned long offset;
+- int zones[PAGE_SIZE/512];
+- int zones_used;
+- kdev_t dev = 0;
+- int block_size;
+- struct inode *swapf = 0;
++ struct swap_method *method;
++ void *data;
+
+ if (rw == READ) {
+ ClearPageUptodate(page);
+@@ -48,30 +45,11 @@
+ } else
+ kstat.pswpout++;
+
+- get_swaphandle_info(entry, &offset, &dev, &swapf);
+- if (dev) {
+- zones[0] = offset;
+- zones_used = 1;
+- block_size = PAGE_SIZE;
+- } else if (swapf) {
+- int i, j;
+- unsigned int block = offset
+- << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
+-
+- block_size = swapf->i_sb->s_blocksize;
+- for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
+- if (!(zones[i] = bmap(swapf,block++))) {
+- printk("rw_swap_page: bad swap file\n");
+- return 0;
+- }
+- zones_used = i;
+- dev = swapf->i_dev;
+- } else {
++ method = get_swaphandle_info(entry, &offset, &data);
++ if (!method || !method->ops->rw_page(rw, page, offset, data)) {
+ return 0;
+ }
+
+- /* block_size == PAGE_SIZE/zones_used */
+- brw_page(rw, page, dev, zones, block_size);
+ return 1;
+ }
+
+diff -Nurb src/linux/linux.orig/mm/slab.c src/linux/linux/mm/slab.c
+--- src/linux/linux.orig/mm/slab.c 2003-07-04 04:12:29.000000000 -0400
++++ src/linux/linux/mm/slab.c 2004-05-31 02:18:03.000000000 -0400
+@@ -111,10 +111,12 @@
+ # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+ SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+ SLAB_NO_REAP | SLAB_CACHE_DMA | \
+- SLAB_MUST_HWCACHE_ALIGN)
++ SLAB_MUST_HWCACHE_ALIGN | \
++ SLAB_LOW_GFP_ORDER)
+ #else
+ # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+- SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
++ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
++ SLAB_LOW_GFP_ORDER)
+ #endif
+
+ /*
+@@ -247,8 +249,13 @@
+ };
+
+ /* internal c_flags */
+-#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
+-#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
++#define CFLGS_OFF_SLAB 0x020000UL /* slab management in own cache */
++#define CFLGS_OPTIMIZE 0x040000UL /* optimized slab lookup */
++#define CFLGS_MASK (CFLGS_OFF_SLAB | CFLGS_OPTIMIZE)
++
++#if (CFLGS_MASK & CREATE_MASK)
++# error BUG: internal and external SLAB flags overlap
++#endif
+
+ /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
+ #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
+@@ -452,7 +459,12 @@
+ snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size);
+ if (!(sizes->cs_cachep =
+ kmem_cache_create(name, sizes->cs_size,
+- 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
++ 0,
++#if CONFIG_NETSWAP
++ SLAB_LOW_GFP_ORDER| /* sorry */
++#endif
++ SLAB_HWCACHE_ALIGN,
++ NULL, NULL))) {
+ BUG();
+ }
+
+@@ -731,6 +743,8 @@
+ break;
+ if (!cachep->num)
+ goto next;
++ if (cachep->gfporder == 0 && (flags & SLAB_LOW_GFP_ORDER))
++ break;
+ if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
+ /* Oops, this num of objs will cause problems. */
+ cachep->gfporder--;
+diff -Nurb src/linux/linux.orig/mm/swapfile.c src/linux/linux/mm/swapfile.c
+--- src/linux/linux.orig/mm/swapfile.c 2003-07-04 04:12:29.000000000 -0400
++++ src/linux/linux/mm/swapfile.c 2004-05-31 02:18:03.000000000 -0400
+@@ -11,12 +11,17 @@
+ #include <linux/swap.h>
+ #include <linux/swapctl.h>
+ #include <linux/blkdev.h> /* for blk_size */
++#include <linux/file.h>
+ #include <linux/vmalloc.h>
+ #include <linux/pagemap.h>
+ #include <linux/shm.h>
+
+ #include <asm/pgtable.h>
+
++#ifdef CONFIG_KMOD
++#include <linux/kmod.h>
++#endif
++
+ spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
+ unsigned int nr_swapfiles;
+ int total_swap_pages;
+@@ -31,8 +36,78 @@
+
+ struct swap_info_struct swap_info[MAX_SWAPFILES];
+
++static struct swap_method *swap_methods = NULL;
++
+ #define SWAPFILE_CLUSTER 256
+
++int register_swap_method(char *name, struct swap_ops *ops)
++{
++ struct swap_method *pos;
++ struct swap_method *new;
++ int result = 0;
++
++ lock_kernel();
++
++ for (pos = swap_methods; pos; pos = pos->next) {
++ if (strcmp(pos->name, name) == 0) {
++ printk(KERN_ERR "register_swap_method: "
++ "method %s already registered\n", name);
++ result = -EBUSY;
++ goto out;
++ }
++ }
++
++ if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) {
++ printk(KERN_ERR "register_swap_method: "
++ "no memory for new method \"%s\"\n", name);
++ result = -ENOMEM;
++ goto out;
++ }
++
++ new->name = name;
++ new->ops = ops;
++ new->use_count = 0;
++
++ /* ok, insert at top of list */
++ printk("register_swap_method: method %s\n", name);
++ new->next = swap_methods;
++ swap_methods = new;
++ out:
++ unlock_kernel();
++ return result;
++}
++
++int unregister_swap_method(char *name)
++{
++ struct swap_method **method, *next;
++ int result = 0;
++
++ lock_kernel();
++
++ for (method = &swap_methods; *method; method = &(*method)->next) {
++ if (strcmp((*method)->name, name) == 0) {
++ if ((*method)->use_count > 0) {
++ printk(KERN_ERR "unregister_swap_method: "
++ "method \"%s\" is in use\n", name);
++ result = -EBUSY;
++ goto out;
++ }
++
++ next = (*method)->next;
++ kfree(*method);
++ *method = next;
++ printk("unregister_swap_method: method %s\n", name);
++ goto out;
++ }
++ }
++ /* not found */
++ printk("unregister_swap_method: no such method %s\n", name);
++ result = -ENOENT;
++ out:
++ unlock_kernel();
++ return result;
++}
++
+ static inline int scan_swap_map(struct swap_info_struct *si)
+ {
+ unsigned long offset;
+@@ -711,13 +786,14 @@
+ struct nameidata nd;
+ int i, type, prev;
+ int err;
++ struct file *swap_file;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ err = user_path_walk(specialfile, &nd);
+ if (err)
+- goto out;
++ return err;
+
+ lock_kernel();
+ prev = -1;
+@@ -725,15 +801,20 @@
+ for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
+ p = swap_info + type;
+ if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
+- if (p->swap_file == nd.dentry)
++ if (p->swap_file &&
++ p->swap_file->f_dentry == nd.dentry)
+ break;
+ }
+ prev = type;
+ }
+ err = -EINVAL;
++ /* p->swap_file contains all needed info, no need to keep nd, so
++ * release it now.
++ */
++ path_release(&nd);
+ if (type < 0) {
+ swap_list_unlock();
+- goto out_dput;
++ goto out;
+ }
+
+ if (prev < 0) {
+@@ -767,32 +848,30 @@
+ total_swap_pages += p->pages;
+ p->flags = SWP_WRITEOK;
+ swap_list_unlock();
+- goto out_dput;
++ goto out;
+ }
+- if (p->swap_device)
+- blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
+- path_release(&nd);
+
++ if (p->method->ops->release)
++ p->method->ops->release(p->swap_file, p->data);
+ swap_list_lock();
+ swap_device_lock(p);
+- nd.mnt = p->swap_vfsmnt;
+- nd.dentry = p->swap_file;
+- p->swap_vfsmnt = NULL;
++ p->method->use_count --;
++ p->method = NULL;
++ p->data = NULL;
++ swap_file = p->swap_file;
+ p->swap_file = NULL;
+- p->swap_device = 0;
+ p->max = 0;
+ swap_map = p->swap_map;
+ p->swap_map = NULL;
+ p->flags = 0;
+ swap_device_unlock(p);
+ swap_list_unlock();
++ filp_close(swap_file, NULL);
+ vfree(swap_map);
+ err = 0;
+
+-out_dput:
+- unlock_kernel();
+- path_release(&nd);
+ out:
++ unlock_kernel();
+ return err;
+ }
+
+@@ -805,18 +884,17 @@
+ if (!page)
+ return -ENOMEM;
+
+- len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
++ len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n",
++ "Filename", "Type", "Size", "Used");
+ for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
+ if ((ptr->flags & SWP_USED) && ptr->swap_map) {
+- char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
++ char * path = d_path(ptr->swap_file->f_dentry,
++ ptr->swap_file->f_vfsmnt,
+ page, PAGE_SIZE);
+
+ len += sprintf(buf + len, "%-31s ", path);
+
+- if (!ptr->swap_device)
+- len += sprintf(buf + len, "file\t\t");
+- else
+- len += sprintf(buf + len, "partition\t");
++ len += sprintf(buf + len, "%-15s ", ptr->method->name);
+
+ usedswap = 0;
+ for (j = 0; j < ptr->max; ++j)
+@@ -827,7 +905,7 @@
+ default:
+ usedswap++;
+ }
+- len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
++ len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10),
+ usedswap << (PAGE_SHIFT - 10), ptr->prio);
+ }
+ }
+@@ -835,18 +913,55 @@
+ return len;
+ }
+
+-int is_swap_partition(kdev_t dev) {
++/* apply a test function to all active swap objects. E.g. for checking
++ * whether a partition is used for swapping
++ */
++int swap_run_test(int (*test_fct)(unsigned int flags,
++ struct file * swap_file,
++ void *testdata), void *testdata)
++{
+ struct swap_info_struct *ptr = swap_info;
+ int i;
+
+ for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
+- if (ptr->flags & SWP_USED)
+- if (ptr->swap_device == dev)
++ if (ptr->swap_file &&
++ test_fct(ptr->flags, ptr->swap_file, testdata))
+ return 1;
+ }
+ return 0;
+ }
+
++/* Walk through the list of known swap method until somebody wants to
++ * handle this file. Pick the first one which claims to be able to
++ * swap to this kind of file.
++ *
++ * return value: < 0: error, 0: not found, > 0: swapfilesize
++ */
++int find_swap_method(struct file *swap_file,
++ struct swap_info_struct *p)
++{
++ int swapfilesize = 0;
++ struct swap_method *method;
++
++ p->method = NULL;
++ for (method = swap_methods; method; method = method->next) {
++ swapfilesize = method->ops->open(swap_file, &p->data);
++ if (swapfilesize == 0) {
++ continue;
++ }
++ if (swapfilesize > 0) {
++ p->method = method;
++ p->method->use_count ++;
++ p->swap_file = swap_file;
++ break;
++ }
++ if (swapfilesize < 0) {
++ break;
++ }
++ }
++ return swapfilesize;
++}
++
+ /*
+ * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
+ *
+@@ -855,8 +970,6 @@
+ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
+ {
+ struct swap_info_struct * p;
+- struct nameidata nd;
+- struct inode * swap_inode;
+ unsigned int type;
+ int i, j, prev;
+ int error;
+@@ -866,8 +979,9 @@
+ int nr_good_pages = 0;
+ unsigned long maxpages = 1;
+ int swapfilesize;
+- struct block_device *bdev = NULL;
+ unsigned short *swap_map;
++ char * tmp_specialfile;
++ struct file *swap_file;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -886,8 +1000,7 @@
+ nr_swapfiles = type+1;
+ p->flags = SWP_USED;
+ p->swap_file = NULL;
+- p->swap_vfsmnt = NULL;
+- p->swap_device = 0;
++ p->method = NULL;
+ p->swap_map = NULL;
+ p->lowest_bit = 0;
+ p->highest_bit = 0;
+@@ -901,53 +1014,56 @@
+ p->prio = --least_priority;
+ }
+ swap_list_unlock();
+- error = user_path_walk(specialfile, &nd);
+- if (error)
++
++ /* Open the swap using filp_open. Bail out on any errors. */
++ tmp_specialfile = getname(specialfile);
++ if (IS_ERR(tmp_specialfile)) {
++ error = PTR_ERR(tmp_specialfile);
+ goto bad_swap_2;
++ }
++ p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600);
++ putname(tmp_specialfile);
++ if (IS_ERR(p->swap_file)) {
++ error = PTR_ERR(p->swap_file);
++ goto bad_swap_1;
++ }
+
+- p->swap_file = nd.dentry;
+- p->swap_vfsmnt = nd.mnt;
+- swap_inode = nd.dentry->d_inode;
+ error = -EINVAL;
+
+- if (S_ISBLK(swap_inode->i_mode)) {
+- kdev_t dev = swap_inode->i_rdev;
+- struct block_device_operations *bdops;
+- devfs_handle_t de;
+-
+- p->swap_device = dev;
+- set_blocksize(dev, PAGE_SIZE);
+-
+- bd_acquire(swap_inode);
+- bdev = swap_inode->i_bdev;
+- de = devfs_get_handle_from_inode(swap_inode);
+- bdops = devfs_get_ops(de); /* Increments module use count */
+- if (bdops) bdev->bd_op = bdops;
+-
+- error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
+- devfs_put_ops(de);/*Decrement module use count now we're safe*/
+- if (error)
+- goto bad_swap_2;
+- set_blocksize(dev, PAGE_SIZE);
+- error = -ENODEV;
+- if (!dev || (blk_size[MAJOR(dev)] &&
+- !blk_size[MAJOR(dev)][MINOR(dev)]))
+- goto bad_swap;
+- swapfilesize = 0;
+- if (blk_size[MAJOR(dev)])
+- swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
+- >> (PAGE_SHIFT - 10);
+- } else if (S_ISREG(swap_inode->i_mode))
+- swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
+- else
+- goto bad_swap;
++ swapfilesize = find_swap_method(p->swap_file, p);
++ if (swapfilesize < 0) {
++ error = swapfilesize;
++ goto bad_swap_1;
++ }
++#ifdef CONFIG_KMOD
++ if (swapfilesize == 0) {
++ (void)request_module("swapfile-mod");
++
++ swapfilesize = find_swap_method(p->swap_file, p);
++ if (swapfilesize < 0) {
++ error = swapfilesize;
++ goto bad_swap_1;
++ }
++ }
++#endif
++ if (swapfilesize == 0) {
++ printk("Don't know how to swap to this kind of file\n");
++ goto bad_swap_1; /* free swap map */
++ }
++
++ /* After this point, the swap-file has been opened by the swap
++ * method. We must make sure to use the bad_swap label for any
++ * errors.
++ */
+
+ error = -EBUSY;
+ for (i = 0 ; i < nr_swapfiles ; i++) {
+ struct swap_info_struct *q = &swap_info[i];
+ if (i == type || !q->swap_file)
+ continue;
+- if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
++ if (p->swap_file->f_dentry->d_inode->i_mapping
++ ==
++ q->swap_file->f_dentry->d_inode->i_mapping)
+ goto bad_swap;
+ }
+
+@@ -1083,17 +1199,27 @@
+ swap_list_unlock();
+ error = 0;
+ goto out;
++
+ bad_swap:
+- if (bdev)
+- blkdev_put(bdev, BDEV_SWAP);
++ if (p->method->ops->release)
++ p->method->ops->release(p->swap_file, p->data);
++ swap_list_lock();
++ p->method->use_count --;
++ p->method = NULL;
++ p->data = NULL;
++ swap_list_unlock();
++
++bad_swap_1:
++ swap_list_lock();
++ swap_file = p->swap_file;
++ p->swap_file = NULL;
++ swap_list_unlock();
++ filp_close(swap_file, NULL);
++
+ bad_swap_2:
++
+ swap_list_lock();
+ swap_map = p->swap_map;
+- nd.mnt = p->swap_vfsmnt;
+- nd.dentry = p->swap_file;
+- p->swap_device = 0;
+- p->swap_file = NULL;
+- p->swap_vfsmnt = NULL;
+ p->swap_map = NULL;
+ p->flags = 0;
+ if (!(swap_flags & SWAP_FLAG_PREFER))
+@@ -1101,7 +1227,7 @@
+ swap_list_unlock();
+ if (swap_map)
+ vfree(swap_map);
+- path_release(&nd);
++
+ out:
+ if (swap_header)
+ free_page((long) swap_header);
+@@ -1217,8 +1343,8 @@
+ /*
+ * Prior swap_duplicate protects against swap device deletion.
+ */
+-void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
+- kdev_t *dev, struct inode **swapf)
++struct swap_method *get_swaphandle_info(swp_entry_t entry,
++ unsigned long *offset, void **data)
+ {
+ unsigned long type;
+ struct swap_info_struct *p;
+@@ -1226,32 +1352,26 @@
+ type = SWP_TYPE(entry);
+ if (type >= nr_swapfiles) {
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
+- return;
++ return NULL;
+ }
+
+ p = &swap_info[type];
+ *offset = SWP_OFFSET(entry);
+ if (*offset >= p->max && *offset != 0) {
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
+- return;
++ return NULL;
+ }
+ if (p->swap_map && !p->swap_map[*offset]) {
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
+- return;
++ return NULL;
+ }
+ if (!(p->flags & SWP_USED)) {
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
+- return;
++ return NULL;
+ }
+
+- if (p->swap_device) {
+- *dev = p->swap_device;
+- } else if (p->swap_file) {
+- *swapf = p->swap_file->d_inode;
+- } else {
+- printk(KERN_ERR "rw_swap_page: no swap file or device\n");
+- }
+- return;
++ *data = p->data;
++ return p->method;
+ }
+
+ /*
+diff -Nurb src/linux/linux.orig/net/Config.in src/linux/linux/net/Config.in
+--- src/linux/linux.orig/net/Config.in 2003-07-04 04:12:29.000000000 -0400
++++ src/linux/linux/net/Config.in 2004-05-31 02:18:03.000000000 -0400
+@@ -16,6 +16,9 @@
+ fi
+ bool 'Socket Filtering' CONFIG_FILTER
+ tristate 'Unix domain sockets' CONFIG_UNIX
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++ bool 'Swapping via network sockets (EXPERIMENTAL)' CONFIG_NETSWAP
++fi
+ bool 'TCP/IP networking' CONFIG_INET
+ if [ "$CONFIG_INET" = "y" ]; then
+ source net/ipv4/Config.in
+diff -Nurb src/linux/linux.orig/net/Makefile src/linux/linux/net/Makefile
+--- src/linux/linux.orig/net/Makefile 2003-07-04 04:12:29.000000000 -0400
++++ src/linux/linux/net/Makefile 2004-05-31 02:18:03.000000000 -0400
+@@ -51,6 +51,7 @@
+ ifeq ($(CONFIG_NET),y)
+ obj-$(CONFIG_MODULES) += netsyms.o
+ obj-$(CONFIG_SYSCTL) += sysctl_net.o
++obj-$(CONFIG_NETSWAP) += netswapping.o
+ endif
+
+ include $(TOPDIR)/Rules.make
+diff -Nurb src/linux/linux.orig/net/core/sock.c src/linux/linux/net/core/sock.c
+--- src/linux/linux.orig/net/core/sock.c 2003-10-14 04:09:32.000000000 -0400
++++ src/linux/linux/net/core/sock.c 2004-05-31 02:18:03.000000000 -0400
+@@ -402,6 +402,21 @@
+ ret = -ENONET;
+ break;
+ #endif
++#ifdef CONFIG_NETSWAP
++ case SO_SWAPPING:
++ if (valbool) {
++ if (!sk->swapping) {
++ netswap_sock_count ++;
++ }
++ sk->swapping ++;
++ } else if (sk->swapping > 0) {
++ sk->swapping --;
++ if (!sk->swapping) {
++ netswap_sock_count --;
++ }
++ }
++ break;
++#endif
+ /* We implement the SO_SNDLOWAT etc to
+ not be settable (1003.1g 5.3) */
+ default:
+@@ -552,6 +567,12 @@
+ goto lenout;
+ }
+
++#ifdef CONFIG_NETSWAP
++ case SO_SWAPPING:
++ v.val = sk->swapping;
++ break;
++#endif
++
+ /* Dubious BSD thing... Probably nobody even uses it, but
+ * the UNIX standard wants it for whatever reason... -DaveM
+ */
+diff -Nurb src/linux/linux.orig/net/ipv4/tcp_ipv4.c src/linux/linux/net/ipv4/tcp_ipv4.c
+--- src/linux/linux.orig/net/ipv4/tcp_ipv4.c 2003-10-14 04:09:33.000000000 -0400
++++ src/linux/linux/net/ipv4/tcp_ipv4.c 2004-05-31 02:18:03.000000000 -0400
+@@ -1657,6 +1657,12 @@
+ if (filter && sk_filter(skb, filter))
+ goto discard;
+ #endif /* CONFIG_FILTER */
++#ifdef CONFIG_NETSWAP
++ /* tcp doesn't use sock_queue_rcv_skb() ... */
++ /* an inline function defined in net/netswapping.h */
++ if (netswap_low_memory(sk, skb))
++ goto discard;
++#endif /* CONFIG_NETSWAP */
+
+ IP_INC_STATS_BH(IpInDelivers);
+
+diff -Nurb src/linux/linux.orig/net/ipv6/tcp_ipv6.c src/linux/linux/net/ipv6/tcp_ipv6.c
+--- src/linux/linux.orig/net/ipv6/tcp_ipv6.c 2003-10-14 04:09:34.000000000 -0400
++++ src/linux/linux/net/ipv6/tcp_ipv6.c 2004-05-31 02:18:03.000000000 -0400
+@@ -1424,6 +1424,12 @@
+ if (filter && sk_filter(skb, filter))
+ goto discard;
+ #endif /* CONFIG_FILTER */
++#ifdef CONFIG_NETSWAP
++ /* tcp doesn't use sock_queue_rcv_skb() ... */
++ /* an inline function defined in net/netswapping.h */
++ if (netswap_low_memory(sk, skb))
++ goto discard;
++#endif /* CONFIG_NETSWAP */
+
+ /*
+ * socket locking is here for SMP purposes as backlog rcv
+diff -Nurb src/linux/linux.orig/net/netswapping.c src/linux/linux/net/netswapping.c
+--- src/linux/linux.orig/net/netswapping.c 1969-12-31 19:00:00.000000000 -0500
++++ src/linux/linux/net/netswapping.c 2004-05-31 02:18:03.000000000 -0400
+@@ -0,0 +1,76 @@
++/*
++ * linux/net/swapping.c
++ *
++ * Support paging over network connections (inet only)
++ *
++ * (c) 2000 Claus-Justus Heine <heine@instmath.rwth-aachen.de>
++ */
++
++#include <linux/slab.h>
++#include <linux/swap.h>
++#include <linux/swapctl.h>
++#include <linux/skbuff.h>
++#include <linux/module.h>
++#include <linux/sysctl.h>
++#include <linux/init.h>
++#include <net/netswapping.h>
++#include <net/sock.h>
++#include <asm/uaccess.h>
++
++unsigned int netswap_dropped; /* statistics */
++unsigned int netswap_free_pages_min;
++int netswap_sock_count; /* how many sockets have swapping option set */
++
++#ifdef CONFIG_SYSCTL
++
++static ctl_table netswap_table[] = {
++ {NET_SWAP_DROPPED, "dropped",
++ &netswap_dropped, sizeof(int), 0644, NULL, &proc_dointvec },
++ {NET_SWAP_DROP_THRESHOLD, "threshold",
++ &netswap_free_pages_min, sizeof(int), 0644, NULL, &proc_dointvec },
++ {NET_SWAP_SOCK_COUNT, "sock_count",
++ &netswap_sock_count, sizeof(int), 0444, NULL, &proc_dointvec },
++ {0},
++};
++
++static struct ctl_table_header *netswap_sysctl_header;
++
++static ctl_table netswap_net_table[] = {
++ {CTL_NETSWAP, "swapping", NULL, 0, 0555, netswap_table},
++ {0}
++};
++
++static ctl_table netswap_root_table[] = {
++ {CTL_NET, "net", NULL, 0, 0555, netswap_net_table},
++ {0}
++};
++
++#endif
++
++int __init netswap_init(void)
++{
++ /* drop packets when below this threshold */
++ netswap_free_pages_min = 32 /* freepages.min */;
++#ifdef CONFIG_SYSCTL
++ netswap_sysctl_header = register_sysctl_table(netswap_root_table, 0);
++#endif
++ return 0;
++}
++
++void __exit netswap_exit(void)
++{
++#ifdef CONFIG_SYSCTL
++ unregister_sysctl_table(netswap_sysctl_header);
++#endif
++}
++
++/* linux/init.h -- VERY nice :-)
++ *
++ * On the other hand, we have no control over the order the initcalls
++ * are performed ...
++ *
++ * Actually, we are not compiled as module ...
++ */
++
++module_init(netswap_init)
++module_exit(netswap_exit)
+diff -Nurb src/linux/linux.orig/net/netsyms.c src/linux/linux/net/netsyms.c
+--- src/linux/linux.orig/net/netsyms.c 2004-05-31 02:02:49.000000000 -0400
++++ src/linux/linux/net/netsyms.c 2004-05-31 02:18:03.000000000 -0400
+@@ -601,4 +601,10 @@
+ EXPORT_SYMBOL(wireless_send_event);
+ #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
+
++#ifdef CONFIG_NETSWAP
++EXPORT_SYMBOL(netswap_sock_count);
++EXPORT_SYMBOL(netswap_free_pages_min);
++EXPORT_SYMBOL(netswap_dropped);
++#endif
++
+ #endif /* CONFIG_NET */
+diff -Nurb src/linux/linux.orig/net/packet/af_packet.c src/linux/linux/net/packet/af_packet.c
+--- src/linux/linux.orig/net/packet/af_packet.c 2003-10-14 04:09:35.000000000 -0400
++++ src/linux/linux/net/packet/af_packet.c 2004-05-31 02:18:03.000000000 -0400
+@@ -449,6 +449,12 @@
+ snaplen = res;
+ }
+ #endif /* CONFIG_FILTER */
++#ifdef CONFIG_NETSWAP
++ /* packet doesn't use sock_queue_rcv_skb() ... */
++ /* an inline function defined in net/netswapping.h */
++ if (netswap_low_memory(sk, skb))
++ goto drop_n_restore;
++#endif /* CONFIG_NETSWAP */
+
+ if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
+ goto drop_n_acct;
+@@ -496,7 +502,7 @@
+ po->stats.tp_drops++;
+ spin_unlock(&sk->receive_queue.lock);
+
+-#ifdef CONFIG_FILTER
++#if defined(CONFIG_FILTER) || defined(CONFIG_NETSWAP)
+ drop_n_restore:
+ #endif
+ if (skb_head != skb->data && skb_shared(skb)) {
+@@ -557,6 +563,12 @@
+ snaplen = res;
+ }
+ #endif
++#ifdef CONFIG_NETSWAP
++ /* packet doesn't use sock_queue_rcv_skb() ... */
++ /* an inline function defined in net/netswapping.h */
++ if (netswap_low_memory(sk, skb))
++ goto drop_n_restore;
++#endif /* CONFIG_NETSWAP */
+
+ if (sk->type == SOCK_DGRAM) {
+ macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+diff -Nurb src/linux/linux.orig/net/sunrpc/sched.c src/linux/linux/net/sunrpc/sched.c
+--- src/linux/linux.orig/net/sunrpc/sched.c 2003-07-04 04:12:33.000000000 -0400
++++ src/linux/linux/net/sunrpc/sched.c 2004-05-31 02:18:03.000000000 -0400
+@@ -79,10 +79,11 @@
+ */
+ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
+
++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
+ /*
+ * This is the last-ditch buffer for NFS swap requests
+ */
+-static u32 swap_buffer[PAGE_SIZE >> 2];
++static u32 swap_buffer[2*PAGE_SIZE >> 2];
+ static long swap_buffer_used;
+
+ /*
+@@ -96,6 +97,7 @@
+ {
+ clear_bit(1, &swap_buffer_used);
+ }
++#endif
+
+ /*
+ * Disable the timer for a given RPC task. Should be called with
+@@ -501,6 +503,7 @@
+ __rpc_execute(struct rpc_task *task)
+ {
+ int status = 0;
++ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
+
+ dprintk("RPC: %4d rpc_execute flgs %x\n",
+ task->tk_pid, task->tk_flags);
+@@ -510,6 +513,13 @@
+ return 0;
+ }
+
++ if (task->tk_flags & RPC_TASK_SWAPPER) {
++ if (!current->flags & PF_MEMALLOC) {
++ dprintk("__rpc_execute: Setting PF_MEMALLOC\n");
++ }
++ current->flags |= PF_MEMALLOC;
++ }
++
+ restarted:
+ while (1) {
+ /*
+@@ -554,7 +564,8 @@
+ rpc_set_sleeping(task);
+ if (RPC_IS_ASYNC(task)) {
+ spin_unlock_bh(&rpc_queue_lock);
+- return 0;
++ status = 0;
++ goto out;
+ }
+ }
+ spin_unlock_bh(&rpc_queue_lock);
+@@ -563,7 +574,12 @@
+ /* sync task: sleep here */
+ dprintk("RPC: %4d sync task going to sleep\n",
+ task->tk_pid);
+- if (current->pid == rpciod_pid)
++ /* it's ok to wait for rpciod when swapping,
++ * because this means it needed memory and is
++ * doing the swap-out itself.
++ */
++ if (current->pid == rpciod_pid &&
++ !(task->tk_flags & RPC_TASK_SWAPPER))
+ printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
+
+ __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
+@@ -608,6 +624,10 @@
+ /* Release all resources associated with the task */
+ rpc_release_task(task);
+
++ out:
++ if (!alloc_flag) {
++ current->flags &= ~PF_MEMALLOC;
++ }
+ return status;
+ }
+
+@@ -699,10 +719,16 @@
+ {
+ u32 *buffer;
+ int gfp;
++ unsigned long alloc_flag = current->flags & PF_MEMALLOC;
++ void *ret = NULL;
+
+- if (flags & RPC_TASK_SWAPPER)
++ if (flags & RPC_TASK_SWAPPER) {
+ gfp = GFP_ATOMIC;
+- else if (flags & RPC_TASK_ASYNC)
++ if (!(current->flags & PF_MEMALLOC)) {
++ dprintk("rpc_allocate: Setting PF_MEMALLOC\n");
++ }
++ current->flags |= PF_MEMALLOC;
++ } else if (flags & RPC_TASK_ASYNC)
+ gfp = GFP_RPC;
+ else
+ gfp = GFP_KERNEL;
+@@ -710,29 +736,44 @@
+ do {
+ if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
+ dprintk("RPC: allocated buffer %p\n", buffer);
+- return buffer;
++ ret = buffer;
++ goto out;
+ }
++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
+ if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
+ && rpc_lock_swapbuf()) {
+ dprintk("RPC: used last-ditch swap buffer\n");
+- return swap_buffer;
++ ret = swap_buffer;
++ goto out;
++#endif
++ }
++ if (flags & RPC_TASK_ASYNC) {
++ ret = NULL;
++ goto out;
+ }
+- if (flags & RPC_TASK_ASYNC)
+- return NULL;
+ yield();
+ } while (!signalled());
+
+- return NULL;
++ out:
++ if (!alloc_flag) {
++ current->flags &= ~PF_MEMALLOC;
++ }
++ return ret;
+ }
+
+ void
+ rpc_free(void *buffer)
+ {
++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
+ if (buffer != swap_buffer) {
++#endif
+ kfree(buffer);
+ return;
++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
+ }
+ rpc_unlock_swapbuf();
++ printk("RPC: Released swap buffer\n");
++#endif
+ }
+
+ /*
+diff -Nurb src/linux/linux.orig/net/sunrpc/xprt.c src/linux/linux/net/sunrpc/xprt.c
+--- src/linux/linux.orig/net/sunrpc/xprt.c 2003-07-04 04:12:33.000000000 -0400
++++ src/linux/linux/net/sunrpc/xprt.c 2004-05-31 02:18:03.000000000 -0400
+@@ -139,7 +139,7 @@
+ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
+ {
+ if (!xprt->snd_task) {
+- if (xprt->nocong || __xprt_get_cong(xprt, task))
++ if (__xprt_get_cong(xprt, task))
+ xprt->snd_task = task;
+ }
+ if (xprt->snd_task != task) {
+@@ -179,7 +179,7 @@
+ if (!task)
+ return;
+ }
+- if (xprt->nocong || __xprt_get_cong(xprt, task))
++ if (__xprt_get_cong(xprt, task))
+ xprt->snd_task = task;
+ }
+
+@@ -276,6 +276,9 @@
+ {
+ struct rpc_rqst *req = task->tk_rqstp;
+
++ if (xprt->nocong || RPC_IS_SWAPPER(task))
++ return 1;
++
+ if (req->rq_cong)
+ return 1;
+ dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",