diff options
author | mbm <mbm@3c298f89-4303-0410-b956-a3cf2f4a3e73> | 2004-05-31 06:43:24 +0000 |
---|---|---|
committer | mbm <mbm@3c298f89-4303-0410-b956-a3cf2f4a3e73> | 2004-05-31 06:43:24 +0000 |
commit | e934f65d803ce9309af3701a79ceca82a4fadcc8 (patch) | |
tree | ee916877ad89c8c5dc1b0aa565389ba36476e63a /obsolete-buildroot | |
parent | caa3383f5093fe2925c75a961db1f3d6f5e1b040 (diff) |
nfs swap patch
git-svn-id: svn://svn.openwrt.org/openwrt/trunk@45 3c298f89-4303-0410-b956-a3cf2f4a3e73
Diffstat (limited to 'obsolete-buildroot')
-rw-r--r-- | obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch | 2362 |
1 files changed, 2362 insertions, 0 deletions
diff --git a/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch b/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch new file mode 100644 index 000000000..bf848c129 --- /dev/null +++ b/obsolete-buildroot/sources/openwrt-wrt54g-nfsswap.patch @@ -0,0 +1,2362 @@ +diff -Nurb src/linux/linux.orig/Documentation/netswap.txt src/linux/linux/Documentation/netswap.txt +--- src/linux/linux.orig/Documentation/netswap.txt 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/Documentation/netswap.txt 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,51 @@ ++ Swapping over network ++ ++Support for this is enabled via the CONFIG_NETSWAP option, which is ++automatically enabled when enabling swap files located on NFS volumes ++(CONFIG_SWAP_VIA_NFS). ++ ++When swapping to files located on a network file system like NFS or ++CODA or others or to nbd (network block device, see `nbd.txt') ++partitions there is the problem that this requires additional memory, ++besides the page which is currently swapped in or out, probably at ++least two more pages for each page in question. ++ ++This means that not only there needs to be free space left in the swap ++file or the swap partition, but in addition there must be enough free ++memory left in the system to perform the swap out of pages. ++ ++This is particularly painful as receiving data over the network itself ++consumes memory, and this memory is allocated from an interrupt ++context (i.e. in the interrupt handler of the network card). That ++means that on a congested network there are chances that the machine ++runs out of memory, simply because the network device's interrupt ++routines allocate memory faster that it is freed by swapping via ++network. ++ ++To cope with this problem, there is a new socket option `SO_SWAPPING' ++which has to be set on the `SOL_SOCKET' level with setsockopt() (see ++setsockopt(2)). When this option is set on any network socket, then ++the system will start to drop network packets it receives on any other ++socket when the number of free pages falls below a certain threshold. ++ ++This threshold initially is 4 pages less than `freepages.min' (see ++`Documentation/sysctl/vm.txt') but can be tuned using the sysctl ++interface by writing to the file `/proc/sys/net/swapping/threshold' ++ ++There are two other files: ++ ++`/proc/sys/net/swapping/dropped': ++ how many network packets have been dropped so far. This file is ++ writable, writing to it simply sets the counter to the given value ++ (useful for resetting the counter). ++ ++`/proc/sys/net/swapping/sock_count': ++ How many network sockets have the `SO_SWAPPING' option set (read ++ only, of course). ++ ++When using swap-files on NFS volumes, then the `SO_SWAPPING' option is ++set or cleared by swapon/swapoff system calls, so the user need not ++care about it. ++ ++Swapping over the network is insecure unless the data would be ++encrypted, which is not the case with NFS. It is also very slow. +diff -Nurb src/linux/linux.orig/Documentation/nfsswap.txt src/linux/linux/Documentation/nfsswap.txt +--- src/linux/linux.orig/Documentation/nfsswap.txt 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/Documentation/nfsswap.txt 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,41 @@ ++ Swapping to files on NFS volumes ++ ++To do this you have to say `Y' or `M' to the CONFIG_SWAP_VIA_NFS ++configuration option. When compling support for this as a module you ++should read `Documentation/modules.txt'. For auto-loading of the ++module during the `swapon' system call you have to place a line like ++ ++alias swapfile-mod nfsswap ++ ++in `/etc/modules.conf' (or `/etc/conf.modules', depending on your ++setup). NFS volumes holding swapfile should be mounted with `rsize' ++and `wsize' set to something less than the size of a page, otherwise ++deadlocks caused by memory fragmentation can happen, i.e. mount the ++volume which is to hold the swapfiles with ++ ++mount -t nfs -o rsize=2048,wsize=2048 NFS_SERVER_IP:/server_volume /mount_point ++ ++or set the option in `/etc/fstab'. Read `Documentation/nfsroot.txt' to ++learn how to set mount options for the root file system, if your swap ++files are to be located on the root file system. ++ ++Setting the `rsize' and `wsize' to anything less than PAGE_SIZE is a ++performance hit, so you probably want to have at least two volumes ++mounted, one for the swapfiles, one for the rest. ++ ++You may want to read `Documentation/netswap.txt' as well. ++ ++Swapfiles on NFS volumes can be treated like any other swapfile, ++i.e. ++ ++dd if=/dev/zero of=/swapfiles/SWAPFILE bs=1k count=20480 ++mkswap /swapfiles/SWAPFILE ++swapon /swapfiles/SWAPFILE ++ ++will create a 20M swapfile and tell the system to use it. Actually, ++one could use lseek(2) to create an empty swapfile. This is different ++from swapfiles located on local harddisk. ++ ++Swapping over the network is insecure unless the data would be ++encrypted, which is not the case with NFS. It is also very slow. ++ +diff -Nurb src/linux/linux.orig/drivers/block/blkpg.c src/linux/linux/drivers/block/blkpg.c +--- src/linux/linux.orig/drivers/block/blkpg.c 2003-07-04 04:11:31.000000000 -0400 ++++ src/linux/linux/drivers/block/blkpg.c 2004-05-31 02:18:03.000000000 -0400 +@@ -34,7 +34,7 @@ + #include <linux/blk.h> /* for set_device_ro() */ + #include <linux/blkpg.h> + #include <linux/genhd.h> +-#include <linux/swap.h> /* for is_swap_partition() */ ++#include <linux/swap.h> /* for swap_run_test() */ + #include <linux/module.h> /* for EXPORT_SYMBOL */ + + #include <asm/uaccess.h> +@@ -114,6 +114,29 @@ + return 0; + } + ++/* swap_run_test() applies this hook to all swapfiles until it returns ++ * "1". If it never returns "1", the result of swap_run_test() is "0", ++ * otherwise "1". ++ */ ++static int is_swap_partition_hook(unsigned int flags, struct file *swap_file, ++ void *testdata) ++{ ++ kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode) ++ ? swap_file->f_dentry->d_inode->i_rdev : 0; ++ kdev_t dev = *((kdev_t *)testdata); ++ ++ if (flags & SWP_USED && dev == swap_dev) { ++ return 1; ++ } else { ++ return 0; ++ } ++} ++ ++static inline int is_swap_partition(kdev_t dev) ++{ ++ return swap_run_test(is_swap_partition_hook, &dev); ++} ++ + /* + * Delete a partition given by partition number + * +diff -Nurb src/linux/linux.orig/fs/Config.in src/linux/linux/fs/Config.in +--- src/linux/linux.orig/fs/Config.in 2004-05-31 02:02:43.000000000 -0400 ++++ src/linux/linux/fs/Config.in 2004-05-31 02:18:03.000000000 -0400 +@@ -4,6 +4,12 @@ + mainmenu_option next_comment + comment 'File systems' + ++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then ++ tristate 'Swapping to block devices' CONFIG_BLKDEV_SWAP ++else ++ define_bool CONFIG_BLKDEV_SWAP y ++fi ++ + bool 'Quota support' CONFIG_QUOTA + tristate 'Kernel automounter support' CONFIG_AUTOFS_FS + tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS +@@ -110,6 +116,12 @@ + dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET + dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS + dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP ++ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then ++ dep_tristate ' Swapping via NFS (EXPERIMENTAL)' CONFIG_SWAP_VIA_NFS $CONFIG_NFS_FS ++ if [ "$CONFIG_SWAP_VIA_NFS" = "y" -o "$CONFIG_SWAP_VIA_NFS" = "m" ]; then ++ define_bool CONFIG_NETSWAP y ++ fi ++ fi + + dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET + dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD +diff -Nurb src/linux/linux.orig/fs/Makefile src/linux/linux/fs/Makefile +--- src/linux/linux.orig/fs/Makefile 2004-05-31 02:02:42.000000000 -0400 ++++ src/linux/linux/fs/Makefile 2004-05-31 02:18:03.000000000 -0400 +@@ -8,7 +8,7 @@ + O_TARGET := fs.o + + export-objs := filesystems.o open.o dcache.o buffer.o +-mod-subdirs := nls ++mod-subdirs := nls nfs + + obj-y := open.o read_write.o devices.o file_table.o buffer.o \ + super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ +@@ -70,6 +70,7 @@ + subdir-$(CONFIG_JFS_FS) += jfs + subdir-$(CONFIG_SQUASHFS) += squashfs + ++obj-$(CONFIG_BLKDEV_SWAP) += blkdev_swap.o + + obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o + obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o +diff -Nurb src/linux/linux.orig/fs/blkdev_swap.c src/linux/linux/fs/blkdev_swap.c +--- src/linux/linux.orig/fs/blkdev_swap.c 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/fs/blkdev_swap.c 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,309 @@ ++/* ++ * Swapping to partitions or files located on partitions. ++ */ ++ ++#include <linux/config.h> ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/slab.h> ++#include <linux/locks.h> ++#include <linux/blkdev.h> ++#include <linux/pagemap.h> ++#include <linux/swap.h> ++#include <linux/fs.h> ++ ++#ifdef DEBUG_BLKDEV_SWAP ++# define dprintk(fmt...) printk(##fmt) ++#else ++# define dprintk(fmt...) do { /* */ } while (0) ++#endif ++ ++#define BLKDEV_SWAP_ID "blkdev" ++#define BLKDEV_FILE_SWAP_ID "blkdev file" ++ ++/* ++ * Helper function, copied here from buffer.c ++ */ ++ ++/* ++ * Start I/O on a page. ++ * This function expects the page to be locked and may return ++ * before I/O is complete. You then have to check page->locked ++ * and page->uptodate. ++ * ++ * brw_page() is SMP-safe, although it's being called with the ++ * kernel lock held - but the code is ready. ++ * ++ * FIXME: we need a swapper_inode->get_block function to remove ++ * some of the bmap kludges and interface ugliness here. ++ */ ++int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) ++{ ++ struct buffer_head *head, *bh; ++ ++ if (!PageLocked(page)) ++ panic("brw_page: page not locked for I/O"); ++ ++ if (!page->buffers) ++ create_empty_buffers(page, dev, size); ++ head = bh = page->buffers; ++ ++ /* Stage 1: lock all the buffers */ ++ do { ++ lock_buffer(bh); ++ bh->b_blocknr = *(b++); ++ set_bit(BH_Mapped, &bh->b_state); ++ set_buffer_async_io(bh); ++ bh = bh->b_this_page; ++ } while (bh != head); ++ ++ /* Stage 2: start the IO */ ++ do { ++ struct buffer_head *next = bh->b_this_page; ++ submit_bh(rw, bh); ++ bh = next; ++ } while (bh != head); ++ return 0; ++} ++ ++/* ++ * We implement to methods: swapping to partitions, and swapping to files ++ * located on partitions. ++ */ ++ ++struct blkdev_swap_data { ++ kdev_t dev; ++}; ++ ++struct test_data { ++ struct file * filp; ++ kdev_t dev; ++}; ++ ++static int is_blkdev_swapping(unsigned int flags, ++ struct file * swapf, ++ void *data) ++{ ++ struct test_data *testdata = (struct test_data *) data; ++ struct file * filp = testdata->filp; ++ kdev_t dev = testdata->dev; ++ ++ /* Only check filp's that don't match the one already opened ++ * for us by sys_swapon(). Otherwise, we will always flag a ++ * busy swap file. ++ */ ++ ++ if (swapf != filp) { ++ if (dev == swapf->f_dentry->d_inode->i_rdev) ++ return 1; ++ } ++ return 0; ++} ++ ++static int blkdev_swap_open(struct file * filp, void **dptr) ++{ ++ int swapfilesize; ++ kdev_t dev; ++ struct blkdev_swap_data *data; ++ int error; ++ struct test_data testdata; ++ ++ MOD_INC_USE_COUNT; ++ ++ if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) { ++ dprintk(__FUNCTION__": can't handle this swap file: %s\n", ++ swapf->d_name.name); ++ error = 0; /* not for us */ ++ goto bad_swap; ++ } ++ ++ dev = filp->f_dentry->d_inode->i_rdev; ++ set_blocksize(dev, PAGE_SIZE); ++ error = -ENODEV; ++ if (!dev || ++ (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) { ++ printk("blkdev_swap_open: blkdev weirdness for %s\n", ++ filp->f_dentry->d_name.name); ++ goto bad_swap; ++ } ++ ++ /* Check to make sure that we aren't already swapping. */ ++ error = -EBUSY; ++ testdata.filp = filp; ++ testdata.dev = dev; ++ if (swap_run_test(is_blkdev_swapping, &testdata)) { ++ printk("blkdev_swap_open: already swapping to %s\n", ++ filp->f_dentry->d_name.name); ++ goto bad_swap; ++ } ++ ++ swapfilesize = 0; ++ if (blk_size[MAJOR(dev)]) ++ swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] ++ >> (PAGE_SHIFT - 10); ++ ++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) { ++ printk("blkdev_swap_open: can't allocate data for %s\n", ++ filp->f_dentry->d_name.name); ++ error = -ENOMEM; ++ goto bad_swap; ++ } ++ data->dev = dev; ++ *dptr = data; ++ ++ dprintk("blkdev_swap_open: returning %d\n", swapfilesize); ++ return swapfilesize; ++ ++ bad_swap: ++ MOD_DEC_USE_COUNT; ++ return error; /* this swap thing is not for us */ ++} ++ ++static int blkdev_swap_release(struct file * filp, void *data) ++{ ++ dprintk("blkdev_swap_release: releasing swap device %s\n", ++ filp->f_dentry->d_name.name); ++ kfree(data); ++ MOD_DEC_USE_COUNT; ++ return 0; ++} ++ ++static int blkdev_rw_page(int rw, struct page *page, unsigned long offset, ++ void *ptr) ++{ ++ struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr; ++ brw_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE); ++ return 1; ++} ++ ++static struct swap_ops blkdev_swap_ops = { ++ blkdev_swap_open, ++ blkdev_swap_release, ++ blkdev_rw_page ++}; ++ ++struct blkdevfile_swap_data { ++ struct inode *swapf; ++}; ++ ++static int is_blkdevfile_swapping(unsigned int flags, ++ struct file * swapf, ++ void * data) ++{ ++ struct file * filp = (struct file *) data; ++ ++ /* Only check filp's that don't match the one already opened ++ * for us by sys_swapon(). Otherwise, we will always flag a ++ * busy swap file. ++ */ ++ ++ if (swapf != filp) { ++ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode) ++ return 1; ++ } ++ return 0; ++} ++ ++static int blkdevfile_swap_open(struct file *swapf, void **dptr) ++{ ++ int error = 0; ++ int swapfilesize; ++ struct blkdevfile_swap_data *data; ++ ++ MOD_INC_USE_COUNT; ++ ++ /* first check whether this is a regular file located on a local ++ * hard disk ++ */ ++ if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) { ++ dprintk("blkdevfile_swap_open: " ++ "can't handle this swap file: %s\n", ++ swapf->d_name.name); ++ error = 0; /* not for us */ ++ goto bad_swap; ++ } ++ if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) { ++ dprintk("blkdevfile_swap_open: no bmap for file: %s\n", ++ swapf->d_name.name); ++ error = 0; /* not for us */ ++ goto bad_swap; ++ } ++ ++ if (swap_run_test(is_blkdevfile_swapping, swapf)) { ++ dprintk("blkdevfile_swap_open: already swapping to %s\n", ++ swapf->d_name.name); ++ error = -EBUSY; ++ goto bad_swap; ++ } ++ swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT; ++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) { ++ error = -ENOMEM; ++ goto bad_swap; ++ } ++ data->swapf = swapf->f_dentry->d_inode; ++ *dptr = data; ++ return swapfilesize; ++ ++ bad_swap: ++ MOD_DEC_USE_COUNT; ++ return error; ++} ++ ++static int blkdevfile_swap_release(struct file *swapf, void *data) ++{ ++ kfree(data); ++ MOD_DEC_USE_COUNT; ++ return 0; ++} ++ ++static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset, ++ void *ptr) ++{ ++ struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr; ++ struct inode * swapf = data->swapf; ++ int i, j; ++ unsigned int block = offset ++ << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); ++ kdev_t dev = swapf->i_dev; ++ int block_size; ++ int zones[PAGE_SIZE/512]; ++ int zones_used; ++ ++ block_size = swapf->i_sb->s_blocksize; ++ for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) ++ if (!(zones[i] = bmap(swapf,block++))) { ++ printk("blkdevfile_rw_page: bad swap file\n"); ++ return 0; ++ } ++ zones_used = i; ++ ++ /* block_size == PAGE_SIZE/zones_used */ ++ brw_page(rw, page, dev, zones, block_size); ++ return 1; ++} ++ ++static struct swap_ops blkdevfile_swap_ops = { ++ blkdevfile_swap_open, ++ blkdevfile_swap_release, ++ blkdevfile_rw_page ++ }; ++ ++int __init blkdev_swap_init(void) ++{ ++ (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops); ++ (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops); ++ return 0; ++} ++ ++void __exit blkdev_swap_exit(void) ++{ ++ unregister_swap_method(BLKDEV_SWAP_ID); ++ unregister_swap_method(BLKDEV_FILE_SWAP_ID); ++} ++ ++module_init(blkdev_swap_init) ++module_exit(blkdev_swap_exit) ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Many. Stuffed into a module by cH (Claus-Justus Heine)"); ++MODULE_DESCRIPTION("Swapping to partitions and files on local hard-disks"); +diff -Nurb src/linux/linux.orig/fs/buffer.c src/linux/linux/fs/buffer.c +--- src/linux/linux.orig/fs/buffer.c 2003-07-04 04:12:05.000000000 -0400 ++++ src/linux/linux/fs/buffer.c 2004-05-31 02:21:05.000000000 -0400 +@@ -743,7 +743,7 @@ + bh->b_private = private; + } + +-static void end_buffer_io_async(struct buffer_head * bh, int uptodate) ++void end_buffer_io_async(struct buffer_head * bh, int uptodate) + { + static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; +@@ -2344,35 +2344,6 @@ + return err; + } + +-int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +-{ +- struct buffer_head *head, *bh; +- +- if (!PageLocked(page)) +- panic("brw_page: page not locked for I/O"); +- +- if (!page->buffers) +- create_empty_buffers(page, dev, size); +- head = bh = page->buffers; +- +- /* Stage 1: lock all the buffers */ +- do { +- lock_buffer(bh); +- bh->b_blocknr = *(b++); +- set_bit(BH_Mapped, &bh->b_state); +- set_buffer_async_io(bh); +- bh = bh->b_this_page; +- } while (bh != head); +- +- /* Stage 2: start the IO */ +- do { +- struct buffer_head *next = bh->b_this_page; +- submit_bh(rw, bh); +- bh = next; +- } while (bh != head); +- return 0; +-} +- + int block_symlink(struct inode *inode, const char *symname, int len) + { + struct address_space *mapping = inode->i_mapping; +diff -Nurb src/linux/linux.orig/fs/nfs/Makefile src/linux/linux/fs/nfs/Makefile +--- src/linux/linux.orig/fs/nfs/Makefile 2003-07-04 04:12:07.000000000 -0400 ++++ src/linux/linux/fs/nfs/Makefile 2004-05-31 02:18:03.000000000 -0400 +@@ -15,6 +15,14 @@ + obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o + obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o + +-obj-m := $(O_TARGET) ++obj-$(CONFIG_SWAP_VIA_NFS) += nfsswap.o ++ifeq ($(CONFIG_SWAP_VIA_NFS),m) ++export-objs := nfs_syms.o ++obj-y += nfs_syms.o ++endif ++ ++ifeq ($(CONFIG_NFS_FS),m) ++obj-m += $(O_TARGET) ++endif + + include $(TOPDIR)/Rules.make +diff -Nurb src/linux/linux.orig/fs/nfs/file.c src/linux/linux/fs/nfs/file.c +--- src/linux/linux.orig/fs/nfs/file.c 2003-07-04 04:12:07.000000000 -0400 ++++ src/linux/linux/fs/nfs/file.c 2004-05-31 02:18:03.000000000 -0400 +@@ -58,11 +58,6 @@ + setattr: nfs_notify_change, + }; + +-/* Hack for future NFS swap support */ +-#ifndef IS_SWAPFILE +-# define IS_SWAPFILE(inode) (0) +-#endif +- + /* + * Flush all dirty pages, and check for write errors. + * +@@ -217,8 +212,6 @@ + inode->i_ino, (unsigned long) count, (unsigned long) *ppos); + + result = -EBUSY; +- if (IS_SWAPFILE(inode)) +- goto out_swapfile; + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (result) + goto out; +@@ -230,10 +223,6 @@ + result = generic_file_write(file, buf, count, ppos); + out: + return result; +- +-out_swapfile: +- printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); +- goto out; + } + + /* +diff -Nurb src/linux/linux.orig/fs/nfs/nfs_syms.c src/linux/linux/fs/nfs/nfs_syms.c +--- src/linux/linux.orig/fs/nfs/nfs_syms.c 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/fs/nfs/nfs_syms.c 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,10 @@ ++#include <linux/config.h> ++#define __NO_VERSION__ ++#include <linux/module.h> ++#include <linux/types.h> ++#include <linux/sunrpc/clnt.h> ++#include <linux/nfs_fs.h> ++ ++EXPORT_SYMBOL(__nfs_refresh_inode); ++EXPORT_SYMBOL(nfs_write_attributes); ++ +diff -Nurb src/linux/linux.orig/fs/nfs/nfsswap.c src/linux/linux/fs/nfs/nfsswap.c +--- src/linux/linux.orig/fs/nfs/nfsswap.c 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/fs/nfs/nfsswap.c 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,350 @@ ++/* ++ * Swapping to files located on NFS mounted volumes ++ * Copyright (c) 2000 Claus-Justus Heine ++ * ++ */ ++ ++#include <linux/config.h> ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/types.h> ++#include <linux/slab.h> ++#include <linux/swap.h> ++#include <linux/pagemap.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/socket.h> ++#include <linux/smp_lock.h> ++#include <net/netswapping.h> ++#include <net/sock.h> ++ ++#include <linux/sunrpc/clnt.h> ++#include <linux/nfs_fs.h> ++#include <linux/nfs_fs_sb.h> ++#include <asm/uaccess.h> ++ ++#define NFSDBG_FACILITY NFSDBG_SWAP ++ ++#define NFS_SWAP_ID "nfs file" ++ ++/* we cache some values here. In principle, we only need the file. ++ */ ++struct nfs_swap_data { ++ struct file *file; ++ struct inode *inode; ++ struct nfs_server *server; ++ struct socket *socket; ++}; ++ ++/* Nearly a clone of nfs_readpage_sync() in read.c, but "struct page" does not ++ * contain information about the file offset when swapping. So. ++ */ ++static int nfs_read_swap_page(struct page *page, ++ struct nfs_server *server, ++ struct inode *inode, ++ struct file *file) ++{ ++ unsigned int rsize = server->rsize; ++ unsigned int count = PAGE_SIZE; ++ unsigned int offset = 0; /* always at start of page */ ++ int result, eof; ++ struct rpc_cred *cred; ++ struct nfs_fattr fattr; ++ ++ cred = nfs_file_cred(file); ++ ++ do { ++ if (count < rsize) ++ rsize = count; ++ ++ lock_kernel(); ++ result = NFS_PROTO(inode)->read(inode, cred, ++ &fattr, ++ NFS_RPC_SWAPFLAGS, ++ offset, rsize, page, &eof); ++ nfs_refresh_inode(inode, &fattr); ++ unlock_kernel(); ++ ++ /* ++ * Even if we had a partial success we can't mark the page ++ * cache valid. ++ */ ++ if (result < 0) { ++ if (result == -EISDIR) ++ result = -EINVAL; ++ goto io_error; ++ } ++ count -= result; ++ offset += result; ++ if (result < rsize) /* NFSv2ism */ ++ break; ++ } while (count); ++ ++ if (count) { ++ char *kaddr = kmap(page); ++ memset(kaddr + offset, 0, count); ++ kunmap(page); ++ } ++ flush_dcache_page(page); ++ result = 0; ++ ++io_error: ++ return result; ++} ++ ++/* Like nfs_writepage_sync(), but when swapping page->index does not encode ++ * the offset in the swap file alone. ++ * ++ */ ++static int nfs_write_swap_page(struct page *page, ++ struct nfs_server *server, ++ struct inode *inode, ++ struct file *file) ++{ ++ struct rpc_cred *cred; ++ unsigned int wsize = server->wsize; ++ unsigned int count = PAGE_SIZE; ++ unsigned int offset = 0; ++ int result; ++ struct nfs_writeverf verf; ++ struct nfs_fattr fattr; ++ ++ cred = nfs_file_cred(file); ++ ++ do { ++ if (count < wsize) ++ wsize = count; ++ ++ lock_kernel(); ++ result = NFS_PROTO(inode)->write(inode, cred, &fattr, ++ NFS_RW_SWAP|NFS_RW_SYNC, ++ offset, wsize, page, &verf); ++ nfs_write_attributes(inode, &fattr); ++ unlock_kernel(); ++ ++ if (result < 0) { ++ goto io_error; ++ } ++ if (result != wsize) ++ printk("NFS: short write, wsize=%u, result=%d\n", ++ wsize, result); ++ offset += wsize; ++ count -= wsize; ++ /* ++ * If we've extended the file, update the inode ++ * now so we don't invalidate the cache. ++ */ ++ if (offset > inode->i_size) ++ inode->i_size = offset; ++ } while (count); ++ ++ result = 0; ++ ++io_error: ++ ++ return result; ++} ++ ++/* Unluckily (for us) form 2.4.19 -> 2.4.20 the nfs-proc's where ++ * changed and expect now a proper file-mapping page, where index ++ * encodes the offset alone. ++ * ++ * What we do: we save the original value of page->index, initialize ++ * page->index to what the NFS/sun-rpc subsystem expects and restore ++ * the index later. ++ */ ++static int nfs_rw_swap_page(int rw, struct page *page, ++ unsigned long offset, void *dptr) ++{ ++ int error; ++ struct nfs_swap_data *data = dptr; ++ unsigned long alloc_flag = current->flags & PF_MEMALLOC; ++ unsigned long page_index; ++ ++ if (!PageLocked(page)) ++ panic("nfs_rw_swap_page: page not locked for I/O"); ++ ++ /* prevent memory deadlocks */ ++ if (!(current->flags & PF_MEMALLOC)) { ++ dprintk("nfs_rw_swap_page: Setting PF_MEMALLOC\n"); ++ } ++ current->flags |= PF_MEMALLOC; ++ ++ /* now tweak the page->index field ... */ ++ page_index = page->index; ++ page->index = ((loff_t)offset*(loff_t)PAGE_SIZE) >> PAGE_CACHE_SHIFT; ++ ++ if (rw == WRITE) { ++ error = nfs_write_swap_page(page, ++ data->server, ++ data->inode, ++ data->file); ++ } else { ++ error = nfs_read_swap_page(page, ++ data->server, ++ data->inode, ++ data->file); ++ } ++ ++ if (!alloc_flag) { ++ current->flags &= ~PF_MEMALLOC; ++ } ++ ++ /* now restore the page->index field ... */ ++ page->index = page_index; ++ ++ if (error) { ++ /* Must mark the page invalid after I/O error */ ++ SetPageError(page); ++ ClearPageUptodate(page); ++ } else { ++ ClearPageError(page); ++ SetPageUptodate(page); ++ } ++ ++ if (!error) { /* in case of an error rw_swap_page() likes to unlock ++ * itself. ++ */ ++ UnlockPage(page); ++ } ++ ++ return error < 0 ? 0 : 1; ++} ++ ++static int is_nfsfile_swapping(unsigned int flags, ++ struct file * swapf, ++ void * data) ++{ ++ struct file * filp = (struct file *) data; ++ ++ /* Only check filp's that don't match the one already opened ++ * for us by sys_swapon(). Otherwise, we will always flag a ++ * busy swap file. ++ */ ++ ++ if (swapf != filp) { ++ if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode) ++ return 1; ++ } ++ return 0; ++} ++ ++static int nfs_swap_open(struct file *swapf, void **dptr) ++{ ++ int error = 0; ++ int swapfilesize; ++ struct nfs_swap_data *data; ++ int on = 1; ++ mm_segment_t fs; ++ struct inode *inode = swapf->f_dentry->d_inode; ++ ++ MOD_INC_USE_COUNT; ++ ++ if (!S_ISREG(inode->i_mode)) { ++ dprintk("nfs_swap_open: can't handle this swap file: %s\n", ++ swapf->f_dentry->d_name.name); ++ error = 0; /* not for us */ ++ goto bad_swap; ++ } ++ /* determine whether this file really is located on an NFS mounted ++ * volume ++ */ ++ if (!inode->i_sb || inode->i_sb->s_magic != NFS_SUPER_MAGIC) { ++ dprintk("nfs_swap_open: %s is not an NFS file.\n", ++ swapf->f_dentry->d_name.name); ++ error = 0; /* not for us */ ++ goto bad_swap; ++ } ++ ++ if (swap_run_test(is_nfsfile_swapping, swapf)) { ++ dprintk("nfs_swap_open: already swapping to %s\n", ++ swapf->f_dentry->d_name.name); ++ error = -EBUSY; ++ goto bad_swap; ++ } ++ swapfilesize = inode->i_size >> PAGE_SHIFT; ++ if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) { ++ error = -ENOMEM; ++ goto bad_swap; ++ } ++ data->file = swapf; ++ data->inode = inode; ++ data->server = NFS_SERVER(inode); ++ data->socket = data->server->client->cl_xprt->sock; ++ ++ /* set socket option SO_SWAPPING */ ++ fs = get_fs(); ++ set_fs(KERNEL_DS); ++ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING, ++ (char *)&on, sizeof(on)); ++ set_fs(fs); ++ if (error) { ++ dprintk("nfs_swap_open: error setting SO_SWAPPING\n"); ++ goto bad_swap_2; ++ } ++ ++ *dptr = data; ++ return swapfilesize; ++ ++ bad_swap_2: ++ kfree(data); ++ bad_swap: ++ MOD_DEC_USE_COUNT; ++ return error; ++} ++ ++static int nfs_swap_release(struct file *swapf, void *dptr) ++{ ++ struct nfs_swap_data *data = (struct nfs_swap_data *)dptr; ++ int off = 0; ++ mm_segment_t fs; ++ int error; ++ ++#if 1 ++ if (swapf != data->file || ++ swapf->f_dentry->d_inode != data->inode || ++ !swapf->f_dentry->d_inode->i_sb || ++ swapf->f_dentry->d_inode->i_sb->s_magic != NFS_SUPER_MAGIC || ++ NFS_SERVER(swapf->f_dentry->d_inode) != data->server || ++ data->socket != data->server->client->cl_xprt->sock) { ++ panic("nfs_swap_release: nfs swap data messed up"); ++ } ++#endif ++ ++ /* remove socket option SO_SWAPPING */ ++ fs = get_fs(); ++ set_fs(KERNEL_DS); ++ error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING, ++ (char *)&off, sizeof(off)); ++ set_fs(fs); ++ if (error) { ++ dprintk("nfs_swap_open: error clearing SO_SWAPPING\n"); ++ } ++ kfree(data); ++ MOD_DEC_USE_COUNT; ++ return error; ++} ++ ++static struct swap_ops nfs_swap_ops = { ++ open: nfs_swap_open, ++ release: nfs_swap_release, ++ rw_page: nfs_rw_swap_page ++}; ++ ++int __init nfs_swap_init(void) ++{ ++ (void)register_swap_method(NFS_SWAP_ID, &nfs_swap_ops); ++ return 0; ++} ++ ++void __exit nfs_swap_exit(void) ++{ ++ unregister_swap_method(NFS_SWAP_ID); ++} ++ ++module_init(nfs_swap_init) ++module_exit(nfs_swap_exit) ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("(c) 1996-2002 cH (Claus-Justus Heine)"); ++MODULE_DESCRIPTION("Swapping to files located on volumes mounted via NFS"); +diff -Nurb src/linux/linux.orig/fs/nfs/read.c src/linux/linux/fs/nfs/read.c +--- src/linux/linux.orig/fs/nfs/read.c 2003-07-04 04:12:08.000000000 -0400 ++++ src/linux/linux/fs/nfs/read.c 2004-05-31 02:18:03.000000000 -0400 +@@ -50,11 +50,6 @@ + */ + static void nfs_readpage_result(struct rpc_task *task); + +-/* Hack for future NFS swap support */ +-#ifndef IS_SWAPFILE +-# define IS_SWAPFILE(inode) (0) +-#endif +- + static kmem_cache_t *nfs_rdata_cachep; + + static __inline__ struct nfs_read_data *nfs_readdata_alloc(void) +@@ -92,7 +87,6 @@ + int rsize = NFS_SERVER(inode)->rsize; + int result; + int count = PAGE_CACHE_SIZE; +- int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0; + int eof; + + dprintk("NFS: nfs_readpage_sync(%p)\n", page); +@@ -114,7 +108,7 @@ + offset, rsize, page); + + lock_kernel(); +- result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags, ++ result = NFS_PROTO(inode)->read(inode, cred, &fattr, 0, + offset, rsize, page, &eof); + nfs_refresh_inode(inode, &fattr); + unlock_kernel(); +@@ -246,7 +240,7 @@ + task = &data->task; + + /* N.B. Do we need to test? Never called for swapfile inode */ +- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); ++ flags = RPC_TASK_ASYNC; + + nfs_read_rpcsetup(head, data); + +@@ -476,8 +470,6 @@ + } + + error = nfs_readpage_sync(file, inode, page); +- if (error < 0 && IS_SWAPFILE(inode)) +- printk("Aiee.. nfs swap-in of page failed!\n"); + out: + return error; + +diff -Nurb src/linux/linux.orig/fs/nfs/write.c src/linux/linux/fs/nfs/write.c +--- src/linux/linux.orig/fs/nfs/write.c 2003-07-04 04:12:08.000000000 -0400 ++++ src/linux/linux/fs/nfs/write.c 2004-05-31 02:20:47.000000000 -0400 +@@ -3,7 +3,6 @@ + #include <linux/config.h> + #include <linux/types.h> + #include <linux/slab.h> +-#include <linux/swap.h> + #include <linux/pagemap.h> + #include <linux/file.h> + +@@ -46,11 +45,6 @@ + static void nfs_commit_done(struct rpc_task *); + #endif + +-/* Hack for future NFS swap support */ +-#ifndef IS_SWAPFILE +-# define IS_SWAPFILE(inode) (0) +-#endif +- + static kmem_cache_t *nfs_wdata_cachep; + + static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) +@@ -82,7 +76,7 @@ + * For the moment, we just call nfs_refresh_inode(). + */ + static __inline__ int +-nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) ++__nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) + { + if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { + fattr->pre_size = NFS_CACHE_ISIZE(inode); +@@ -93,6 +87,11 @@ + return nfs_refresh_inode(inode, fattr); + } + ++int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) ++{ ++ return __nfs_write_attributes(inode, fattr); ++} ++ + /* + * Write a page synchronously. + * Offset is the data offset within the page. +@@ -104,8 +103,7 @@ + struct rpc_cred *cred = NULL; + loff_t base; + unsigned int wsize = NFS_SERVER(inode)->wsize; +- int result, refresh = 0, written = 0, flags; +- u8 *buffer; ++ int result, refresh = 0, written = 0; + struct nfs_fattr fattr; + struct nfs_writeverf verf; + +@@ -121,15 +119,14 @@ + + base = page_offset(page) + offset; + +- flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC; +- + do { +- if (count < wsize && !IS_SWAPFILE(inode)) ++ if (count < wsize) + wsize = count; + +- result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags, ++ result = NFS_PROTO(inode)->write(inode, cred, &fattr, ++ NFS_RW_SYNC, + offset, wsize, page, &verf); +- nfs_write_attributes(inode, &fattr); ++ __nfs_write_attributes(inode, &fattr); + + if (result < 0) { + /* Must mark the page invalid after I/O error */ +@@ -140,7 +137,6 @@ + printk("NFS: short write, wsize=%u, result=%d\n", + wsize, result); + refresh = 1; +- buffer += wsize; + base += wsize; + offset += wsize; + written += wsize; +@@ -979,7 +975,7 @@ + } + #endif + +- nfs_write_attributes(inode, resp->fattr); ++ __nfs_write_attributes(inode, resp->fattr); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); +@@ -1133,7 +1129,7 @@ + if (nfs_async_handle_jukebox(task)) + return; + +- nfs_write_attributes(inode, resp->fattr); ++ __nfs_write_attributes(inode, resp->fattr); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); +diff -Nurb src/linux/linux.orig/include/linux/fs.h src/linux/linux/include/linux/fs.h +--- src/linux/linux.orig/include/linux/fs.h 2004-05-31 02:06:19.000000000 -0400 ++++ src/linux/linux/include/linux/fs.h 2004-05-31 02:18:03.000000000 -0400 +@@ -1500,6 +1500,10 @@ + extern int inode_change_ok(struct inode *, struct iattr *); + extern int inode_setattr(struct inode *, struct iattr *); + ++/* for swapping to block devices */ ++void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize); ++void end_buffer_io_async(struct buffer_head * bh, int uptodate); ++ + /* + * Common dentry functions for inclusion in the VFS + * or in other stackable file systems. Some of these +diff -Nurb src/linux/linux.orig/include/linux/nfs_fs.h src/linux/linux/include/linux/nfs_fs.h +--- src/linux/linux.orig/include/linux/nfs_fs.h 2004-05-31 02:06:28.000000000 -0400 ++++ src/linux/linux/include/linux/nfs_fs.h 2004-05-31 02:18:03.000000000 -0400 +@@ -40,8 +40,8 @@ + */ + #define NFS_MAX_DIRCACHE 16 + +-#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 +-#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 ++#define NFS_MAX_FILE_IO_BUFFER_SIZE (8*PAGE_SIZE) ++#define NFS_DEF_FILE_IO_BUFFER_SIZE PAGE_SIZE + + /* + * The upper limit on timeouts for the exponential backoff algorithm. +@@ -205,6 +205,8 @@ + extern int nfs_writepage(struct page *); + extern int nfs_flush_incompatible(struct file *file, struct page *page); + extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); ++extern int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr); ++ + /* + * Try to write back everything synchronously (but check the + * return value!) +@@ -375,6 +377,7 @@ + #define NFSDBG_XDR 0x0020 + #define NFSDBG_FILE 0x0040 + #define NFSDBG_ROOT 0x0080 ++#define NFSDBG_SWAP 0x0100 + #define NFSDBG_ALL 0xFFFF + + #ifdef __KERNEL__ +diff -Nurb src/linux/linux.orig/include/linux/slab.h src/linux/linux/include/linux/slab.h +--- src/linux/linux.orig/include/linux/slab.h 2004-05-31 02:06:19.000000000 -0400 ++++ src/linux/linux/include/linux/slab.h 2004-05-31 02:18:03.000000000 -0400 +@@ -39,6 +39,7 @@ + #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ + #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ + #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ ++#define SLAB_LOW_GFP_ORDER 0x00010000UL /* use as low a gfp order as possible */ + + /* flags passed to a constructor func */ + #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ +diff -Nurb src/linux/linux.orig/include/linux/swap.h src/linux/linux/include/linux/swap.h +--- src/linux/linux.orig/include/linux/swap.h 2004-05-31 02:06:19.000000000 -0400 ++++ src/linux/linux/include/linux/swap.h 2004-05-31 02:18:03.000000000 -0400 +@@ -58,15 +58,29 @@ + #define SWAP_MAP_MAX 0x7fff + #define SWAP_MAP_BAD 0x8000 + ++struct swap_ops { ++ int (*open)(struct file *swapf, void **data); ++ int (*release)(struct file *swapf, void *data); ++ int (*rw_page)(int rw, ++ struct page *page, unsigned long offset, void *data); ++}; ++ ++struct swap_method { ++ struct swap_method *next; ++ char * name; ++ struct swap_ops *ops; ++ int use_count; ++}; ++ + /* + * The in-memory structure used to track swap areas. + */ + struct swap_info_struct { + unsigned int flags; +- kdev_t swap_device; ++ struct file *swap_file; ++ struct swap_method *method; ++ void *data; + spinlock_t sdev_lock; +- struct dentry * swap_file; +- struct vfsmount *swap_vfsmnt; + unsigned short * swap_map; + unsigned int lowest_bit; + unsigned int highest_bit; +@@ -141,11 +155,15 @@ + extern int total_swap_pages; + extern unsigned int nr_swapfiles; + extern struct swap_info_struct swap_info[]; +-extern int is_swap_partition(kdev_t); ++extern int register_swap_method(char *name, struct swap_ops *ops); ++extern int unregister_swap_method(char *name); ++extern int swap_run_test(int (*test_fct)(unsigned int flags, ++ struct file *swap_file, ++ void *testdata), void *testdata); + extern void si_swapinfo(struct sysinfo *); + extern swp_entry_t get_swap_page(void); +-extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *, +- struct inode **); ++struct swap_method *get_swaphandle_info(swp_entry_t entry, ++ unsigned long *offset, void **data); + extern int swap_duplicate(swp_entry_t); + extern int swap_count(struct page *); + extern int valid_swaphandles(swp_entry_t, unsigned long *); +diff -Nurb src/linux/linux.orig/include/net/netswapping.h src/linux/linux/include/net/netswapping.h +--- src/linux/linux.orig/include/net/netswapping.h 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/include/net/netswapping.h 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,47 @@ ++#ifndef _LINUX_NETSWAPPING_H ++#define _LINUX_NETSWAPPING_H ++ ++#include <linux/swap.h> ++#include <linux/init.h> ++ ++/* It is a mess. Socket options are defined in asm-ARCH/socket.h */ ++ ++#define SO_SWAPPING 0x00100000 /* hopefully not used by anybody else */ ++ ++#ifdef __KERNEL__ ++ ++#define CTL_NETSWAP 0x00100000 ++ ++enum { ++ NET_SWAP_DROPPED = 1, ++ NET_SWAP_DROP_THRESHOLD = 2, ++ NET_SWAP_SOCK_COUNT = 3 ++}; ++ ++extern unsigned int netswap_free_pages_min; ++extern int netswap_sock_count; ++extern unsigned int netswap_dropped; ++ ++/* this is "#defined" and not inline because sock.h includes us, but we need ++ * the "struct sock" definition. ++ */ ++#define netswap_low_memory(sk, skb) \ ++({ \ ++ int _ret = 0; \ ++ \ ++ if (netswap_sock_count > 0 && /* anybody swapping via network? */ \ ++ !(sk)->swapping && /* but we are not needed for swapping */ \ ++ nr_free_pages() < netswap_free_pages_min) { /* so drop us */ \ ++ printk("netswap_low_memory: " \ ++ "dropping skb 0x%p@0x%p\n", skb, sk); \ ++ netswap_dropped ++; \ ++ _ret = 1; \ ++ } \ ++ _ret; \ ++}) ++ ++extern int __init netswap_init(void); ++ ++#endif ++ ++#endif +diff -Nurb src/linux/linux.orig/include/net/sock.h src/linux/linux/include/net/sock.h +--- src/linux/linux.orig/include/net/sock.h 2004-05-31 02:07:17.000000000 -0400 ++++ src/linux/linux/include/net/sock.h 2004-05-31 02:18:03.000000000 -0400 +@@ -103,6 +103,10 @@ + #include <linux/filter.h> + #endif + ++#ifdef CONFIG_NETSWAP ++#include <net/netswapping.h> ++#endif ++ + #include <asm/atomic.h> + #include <net/dst.h> + +@@ -536,6 +540,12 @@ + no_check, + broadcast, + bsdism; ++#ifdef CONFIG_NETSWAP ++ /* Increased by SO_SWAPPING with arg != 0, decreased by ++ * SO_SWAPPING with arg 0 ++ */ ++ int swapping; ++#endif + unsigned char debug; + unsigned char rcvtstamp; + unsigned char use_write_queue; +@@ -1165,6 +1175,11 @@ + return err; /* Toss packet */ + } + #endif /* CONFIG_FILTER */ ++#ifdef CONFIG_NETSWAP ++ /* an inline function defined in net/netswapping.h */ ++ if (netswap_low_memory(sk, skb)) ++ return -ENOMEM; ++#endif /* CONFIG_NETSWAP */ + + skb->dev = NULL; + skb_set_owner_r(skb, sk); +diff -Nurb src/linux/linux.orig/kernel/ksyms.c src/linux/linux/kernel/ksyms.c +--- src/linux/linux.orig/kernel/ksyms.c 2004-05-31 02:02:43.000000000 -0400 ++++ src/linux/linux/kernel/ksyms.c 2004-05-31 02:18:03.000000000 -0400 +@@ -41,6 +41,7 @@ + #include <linux/mm.h> + #include <linux/capability.h> + #include <linux/highuid.h> ++#include <linux/swapctl.h> + #include <linux/brlock.h> + #include <linux/fs.h> + #include <linux/tty.h> +@@ -127,6 +128,11 @@ + EXPORT_SYMBOL(kmap_prot); + EXPORT_SYMBOL(kmap_pte); + #endif ++EXPORT_SYMBOL(nr_free_pages); ++/* EXPORT_SYMBOL(freepages); */ ++EXPORT_SYMBOL(register_swap_method); ++EXPORT_SYMBOL(unregister_swap_method); ++EXPORT_SYMBOL(swap_run_test); + + /* filesystem internal functions */ + EXPORT_SYMBOL(def_blk_fops); +@@ -531,7 +537,7 @@ + EXPORT_SYMBOL(make_bad_inode); + EXPORT_SYMBOL(is_bad_inode); + EXPORT_SYMBOL(event); +-EXPORT_SYMBOL(brw_page); ++EXPORT_SYMBOL(end_buffer_io_async); + EXPORT_SYMBOL(__inode_dir_notify); + + #ifdef CONFIG_UID16 +diff -Nurb src/linux/linux.orig/mm/page_io.c src/linux/linux/mm/page_io.c +--- src/linux/linux.orig/mm/page_io.c 2003-07-04 04:12:29.000000000 -0400 ++++ src/linux/linux/mm/page_io.c 2004-05-31 02:18:03.000000000 -0400 +@@ -36,11 +36,8 @@ + static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) + { + unsigned long offset; +- int zones[PAGE_SIZE/512]; +- int zones_used; +- kdev_t dev = 0; +- int block_size; +- struct inode *swapf = 0; ++ struct swap_method *method; ++ void *data; + + if (rw == READ) { + ClearPageUptodate(page); +@@ -48,30 +45,11 @@ + } else + kstat.pswpout++; + +- get_swaphandle_info(entry, &offset, &dev, &swapf); +- if (dev) { +- zones[0] = offset; +- zones_used = 1; +- block_size = PAGE_SIZE; +- } else if (swapf) { +- int i, j; +- unsigned int block = offset +- << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); +- +- block_size = swapf->i_sb->s_blocksize; +- for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) +- if (!(zones[i] = bmap(swapf,block++))) { +- printk("rw_swap_page: bad swap file\n"); +- return 0; +- } +- zones_used = i; +- dev = swapf->i_dev; +- } else { ++ method = get_swaphandle_info(entry, &offset, &data); ++ if (!method || !method->ops->rw_page(rw, page, offset, data)) { + return 0; + } + +- /* block_size == PAGE_SIZE/zones_used */ +- brw_page(rw, page, dev, zones, block_size); + return 1; + } + +diff -Nurb src/linux/linux.orig/mm/slab.c src/linux/linux/mm/slab.c +--- src/linux/linux.orig/mm/slab.c 2003-07-04 04:12:29.000000000 -0400 ++++ src/linux/linux/mm/slab.c 2004-05-31 02:18:03.000000000 -0400 +@@ -111,10 +111,12 @@ + # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ + SLAB_POISON | SLAB_HWCACHE_ALIGN | \ + SLAB_NO_REAP | SLAB_CACHE_DMA | \ +- SLAB_MUST_HWCACHE_ALIGN) ++ SLAB_MUST_HWCACHE_ALIGN | \ ++ SLAB_LOW_GFP_ORDER) + #else + # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ +- SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN) ++ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ ++ SLAB_LOW_GFP_ORDER) + #endif + + /* +@@ -247,8 +249,13 @@ + }; + + /* internal c_flags */ +-#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */ +-#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */ ++#define CFLGS_OFF_SLAB 0x020000UL /* slab management in own cache */ ++#define CFLGS_OPTIMIZE 0x040000UL /* optimized slab lookup */ ++#define CFLGS_MASK (CFLGS_OFF_SLAB | CFLGS_OPTIMIZE) ++ ++#if (CFLGS_MASK & CREATE_MASK) ++# error BUG: internal and external SLAB flags overlap ++#endif + + /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */ + #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */ +@@ -452,7 +459,12 @@ + snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size); + if (!(sizes->cs_cachep = + kmem_cache_create(name, sizes->cs_size, +- 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) { ++ 0, ++#if CONFIG_NETSWAP ++ SLAB_LOW_GFP_ORDER| /* sorry */ ++#endif ++ SLAB_HWCACHE_ALIGN, ++ NULL, NULL))) { + BUG(); + } + +@@ -731,6 +743,8 @@ + break; + if (!cachep->num) + goto next; ++ if (cachep->gfporder == 0 && (flags & SLAB_LOW_GFP_ORDER)) ++ break; + if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) { + /* Oops, this num of objs will cause problems. */ + cachep->gfporder--; +diff -Nurb src/linux/linux.orig/mm/swapfile.c src/linux/linux/mm/swapfile.c +--- src/linux/linux.orig/mm/swapfile.c 2003-07-04 04:12:29.000000000 -0400 ++++ src/linux/linux/mm/swapfile.c 2004-05-31 02:18:03.000000000 -0400 +@@ -11,12 +11,17 @@ + #include <linux/swap.h> + #include <linux/swapctl.h> + #include <linux/blkdev.h> /* for blk_size */ ++#include <linux/file.h> + #include <linux/vmalloc.h> + #include <linux/pagemap.h> + #include <linux/shm.h> + + #include <asm/pgtable.h> + ++#ifdef CONFIG_KMOD ++#include <linux/kmod.h> ++#endif ++ + spinlock_t swaplock = SPIN_LOCK_UNLOCKED; + unsigned int nr_swapfiles; + int total_swap_pages; +@@ -31,8 +36,78 @@ + + struct swap_info_struct swap_info[MAX_SWAPFILES]; + ++static struct swap_method *swap_methods = NULL; ++ + #define SWAPFILE_CLUSTER 256 + ++int register_swap_method(char *name, struct swap_ops *ops) ++{ ++ struct swap_method *pos; ++ struct swap_method *new; ++ int result = 0; ++ ++ lock_kernel(); ++ ++ for (pos = swap_methods; pos; pos = pos->next) { ++ if (strcmp(pos->name, name) == 0) { ++ printk(KERN_ERR "register_swap_method: " ++ "method %s already registered\n", name); ++ result = -EBUSY; ++ goto out; ++ } ++ } ++ ++ if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) { ++ printk(KERN_ERR "register_swap_method: " ++ "no memory for new method \"%s\"\n", name); ++ result = -ENOMEM; ++ goto out; ++ } ++ ++ new->name = name; ++ new->ops = ops; ++ new->use_count = 0; ++ ++ /* ok, insert at top of list */ ++ printk("register_swap_method: method %s\n", name); ++ new->next = swap_methods; ++ swap_methods = new; ++ out: ++ unlock_kernel(); ++ return result; ++} ++ ++int unregister_swap_method(char *name) ++{ ++ struct swap_method **method, *next; ++ int result = 0; ++ ++ lock_kernel(); ++ ++ for (method = &swap_methods; *method; method = &(*method)->next) { ++ if (strcmp((*method)->name, name) == 0) { ++ if ((*method)->use_count > 0) { ++ printk(KERN_ERR "unregister_swap_method: " ++ "method \"%s\" is in use\n", name); ++ result = -EBUSY; ++ goto out; ++ } ++ ++ next = (*method)->next; ++ kfree(*method); ++ *method = next; ++ printk("unregister_swap_method: method %s\n", name); ++ goto out; ++ } ++ } ++ /* not found */ ++ printk("unregister_swap_method: no such method %s\n", name); ++ result = -ENOENT; ++ out: ++ unlock_kernel(); ++ return result; ++} ++ + static inline int scan_swap_map(struct swap_info_struct *si) + { + unsigned long offset; +@@ -711,13 +786,14 @@ + struct nameidata nd; + int i, type, prev; + int err; ++ struct file *swap_file; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = user_path_walk(specialfile, &nd); + if (err) +- goto out; ++ return err; + + lock_kernel(); + prev = -1; +@@ -725,15 +801,20 @@ + for (type = swap_list.head; type >= 0; type = swap_info[type].next) { + p = swap_info + type; + if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { +- if (p->swap_file == nd.dentry) ++ if (p->swap_file && ++ p->swap_file->f_dentry == nd.dentry) + break; + } + prev = type; + } + err = -EINVAL; ++ /* p->swap_file contains all needed info, no need to keep nd, so ++ * release it now. ++ */ ++ path_release(&nd); + if (type < 0) { + swap_list_unlock(); +- goto out_dput; ++ goto out; + } + + if (prev < 0) { +@@ -767,32 +848,30 @@ + total_swap_pages += p->pages; + p->flags = SWP_WRITEOK; + swap_list_unlock(); +- goto out_dput; ++ goto out; + } +- if (p->swap_device) +- blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP); +- path_release(&nd); + ++ if (p->method->ops->release) ++ p->method->ops->release(p->swap_file, p->data); + swap_list_lock(); + swap_device_lock(p); +- nd.mnt = p->swap_vfsmnt; +- nd.dentry = p->swap_file; +- p->swap_vfsmnt = NULL; ++ p->method->use_count --; ++ p->method = NULL; ++ p->data = NULL; ++ swap_file = p->swap_file; + p->swap_file = NULL; +- p->swap_device = 0; + p->max = 0; + swap_map = p->swap_map; + p->swap_map = NULL; + p->flags = 0; + swap_device_unlock(p); + swap_list_unlock(); ++ filp_close(swap_file, NULL); + vfree(swap_map); + err = 0; + +-out_dput: +- unlock_kernel(); +- path_release(&nd); + out: ++ unlock_kernel(); + return err; + } + +@@ -805,18 +884,17 @@ + if (!page) + return -ENOMEM; + +- len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n"); ++ len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n", ++ "Filename", "Type", "Size", "Used"); + for (i = 0 ; i < nr_swapfiles ; i++, ptr++) { + if ((ptr->flags & SWP_USED) && ptr->swap_map) { +- char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt, ++ char * path = d_path(ptr->swap_file->f_dentry, ++ ptr->swap_file->f_vfsmnt, + page, PAGE_SIZE); + + len += sprintf(buf + len, "%-31s ", path); + +- if (!ptr->swap_device) +- len += sprintf(buf + len, "file\t\t"); +- else +- len += sprintf(buf + len, "partition\t"); ++ len += sprintf(buf + len, "%-15s ", ptr->method->name); + + usedswap = 0; + for (j = 0; j < ptr->max; ++j) +@@ -827,7 +905,7 @@ + default: + usedswap++; + } +- len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), ++ len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10), + usedswap << (PAGE_SHIFT - 10), ptr->prio); + } + } +@@ -835,18 +913,55 @@ + return len; + } + +-int is_swap_partition(kdev_t dev) { ++/* apply a test function to all active swap objects. E.g. for checking ++ * whether a partition is used for swapping ++ */ ++int swap_run_test(int (*test_fct)(unsigned int flags, ++ struct file * swap_file, ++ void *testdata), void *testdata) ++{ + struct swap_info_struct *ptr = swap_info; + int i; + + for (i = 0 ; i < nr_swapfiles ; i++, ptr++) { +- if (ptr->flags & SWP_USED) +- if (ptr->swap_device == dev) ++ if (ptr->swap_file && ++ test_fct(ptr->flags, ptr->swap_file, testdata)) + return 1; + } + return 0; + } + ++/* Walk through the list of known swap method until somebody wants to ++ * handle this file. Pick the first one which claims to be able to ++ * swap to this kind of file. ++ * ++ * return value: < 0: error, 0: not found, > 0: swapfilesize ++ */ ++int find_swap_method(struct file *swap_file, ++ struct swap_info_struct *p) ++{ ++ int swapfilesize = 0; ++ struct swap_method *method; ++ ++ p->method = NULL; ++ for (method = swap_methods; method; method = method->next) { ++ swapfilesize = method->ops->open(swap_file, &p->data); ++ if (swapfilesize == 0) { ++ continue; ++ } ++ if (swapfilesize > 0) { ++ p->method = method; ++ p->method->use_count ++; ++ p->swap_file = swap_file; ++ break; ++ } ++ if (swapfilesize < 0) { ++ break; ++ } ++ } ++ return swapfilesize; ++} ++ + /* + * Written 01/25/92 by Simmule Turner, heavily changed by Linus. + * +@@ -855,8 +970,6 @@ + asmlinkage long sys_swapon(const char * specialfile, int swap_flags) + { + struct swap_info_struct * p; +- struct nameidata nd; +- struct inode * swap_inode; + unsigned int type; + int i, j, prev; + int error; +@@ -866,8 +979,9 @@ + int nr_good_pages = 0; + unsigned long maxpages = 1; + int swapfilesize; +- struct block_device *bdev = NULL; + unsigned short *swap_map; ++ char * tmp_specialfile; ++ struct file *swap_file; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; +@@ -886,8 +1000,7 @@ + nr_swapfiles = type+1; + p->flags = SWP_USED; + p->swap_file = NULL; +- p->swap_vfsmnt = NULL; +- p->swap_device = 0; ++ p->method = NULL; + p->swap_map = NULL; + p->lowest_bit = 0; + p->highest_bit = 0; +@@ -901,53 +1014,56 @@ + p->prio = --least_priority; + } + swap_list_unlock(); +- error = user_path_walk(specialfile, &nd); +- if (error) ++ ++ /* Open the swap using filp_open. Bail out on any errors. */ ++ tmp_specialfile = getname(specialfile); ++ if (IS_ERR(tmp_specialfile)) { ++ error = PTR_ERR(tmp_specialfile); + goto bad_swap_2; ++ } ++ p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600); ++ putname(tmp_specialfile); ++ if (IS_ERR(p->swap_file)) { ++ error = PTR_ERR(p->swap_file); ++ goto bad_swap_1; ++ } + +- p->swap_file = nd.dentry; +- p->swap_vfsmnt = nd.mnt; +- swap_inode = nd.dentry->d_inode; + error = -EINVAL; + +- if (S_ISBLK(swap_inode->i_mode)) { +- kdev_t dev = swap_inode->i_rdev; +- struct block_device_operations *bdops; +- devfs_handle_t de; +- +- p->swap_device = dev; +- set_blocksize(dev, PAGE_SIZE); +- +- bd_acquire(swap_inode); +- bdev = swap_inode->i_bdev; +- de = devfs_get_handle_from_inode(swap_inode); +- bdops = devfs_get_ops(de); /* Increments module use count */ +- if (bdops) bdev->bd_op = bdops; +- +- error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP); +- devfs_put_ops(de);/*Decrement module use count now we're safe*/ +- if (error) +- goto bad_swap_2; +- set_blocksize(dev, PAGE_SIZE); +- error = -ENODEV; +- if (!dev || (blk_size[MAJOR(dev)] && +- !blk_size[MAJOR(dev)][MINOR(dev)])) +- goto bad_swap; +- swapfilesize = 0; +- if (blk_size[MAJOR(dev)]) +- swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] +- >> (PAGE_SHIFT - 10); +- } else if (S_ISREG(swap_inode->i_mode)) +- swapfilesize = swap_inode->i_size >> PAGE_SHIFT; +- else +- goto bad_swap; ++ swapfilesize = find_swap_method(p->swap_file, p); ++ if (swapfilesize < 0) { ++ error = swapfilesize; ++ goto bad_swap_1; ++ } ++#ifdef CONFIG_KMOD ++ if (swapfilesize == 0) { ++ (void)request_module("swapfile-mod"); ++ ++ swapfilesize = find_swap_method(p->swap_file, p); ++ if (swapfilesize < 0) { ++ error = swapfilesize; ++ goto bad_swap_1; ++ } ++ } ++#endif ++ if (swapfilesize == 0) { ++ printk("Don't know how to swap to this kind of file\n"); ++ goto bad_swap_1; /* free swap map */ ++ } ++ ++ /* After this point, the swap-file has been opened by the swap ++ * method. We must make sure to use the bad_swap label for any ++ * errors. ++ */ + + error = -EBUSY; + for (i = 0 ; i < nr_swapfiles ; i++) { + struct swap_info_struct *q = &swap_info[i]; + if (i == type || !q->swap_file) + continue; +- if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping) ++ if (p->swap_file->f_dentry->d_inode->i_mapping ++ == ++ q->swap_file->f_dentry->d_inode->i_mapping) + goto bad_swap; + } + +@@ -1083,17 +1199,27 @@ + swap_list_unlock(); + error = 0; + goto out; ++ + bad_swap: +- if (bdev) +- blkdev_put(bdev, BDEV_SWAP); ++ if (p->method->ops->release) ++ p->method->ops->release(p->swap_file, p->data); ++ swap_list_lock(); ++ p->method->use_count --; ++ p->method = NULL; ++ p->data = NULL; ++ swap_list_unlock(); ++ ++bad_swap_1: ++ swap_list_lock(); ++ swap_file = p->swap_file; ++ p->swap_file = NULL; ++ swap_list_unlock(); ++ filp_close(swap_file, NULL); ++ + bad_swap_2: ++ + swap_list_lock(); + swap_map = p->swap_map; +- nd.mnt = p->swap_vfsmnt; +- nd.dentry = p->swap_file; +- p->swap_device = 0; +- p->swap_file = NULL; +- p->swap_vfsmnt = NULL; + p->swap_map = NULL; + p->flags = 0; + if (!(swap_flags & SWAP_FLAG_PREFER)) +@@ -1101,7 +1227,7 @@ + swap_list_unlock(); + if (swap_map) + vfree(swap_map); +- path_release(&nd); ++ + out: + if (swap_header) + free_page((long) swap_header); +@@ -1217,8 +1343,8 @@ + /* + * Prior swap_duplicate protects against swap device deletion. + */ +-void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, +- kdev_t *dev, struct inode **swapf) ++struct swap_method *get_swaphandle_info(swp_entry_t entry, ++ unsigned long *offset, void **data) + { + unsigned long type; + struct swap_info_struct *p; +@@ -1226,32 +1352,26 @@ + type = SWP_TYPE(entry); + if (type >= nr_swapfiles) { + printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val); +- return; ++ return NULL; + } + + p = &swap_info[type]; + *offset = SWP_OFFSET(entry); + if (*offset >= p->max && *offset != 0) { + printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val); +- return; ++ return NULL; + } + if (p->swap_map && !p->swap_map[*offset]) { + printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val); +- return; ++ return NULL; + } + if (!(p->flags & SWP_USED)) { + printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val); +- return; ++ return NULL; + } + +- if (p->swap_device) { +- *dev = p->swap_device; +- } else if (p->swap_file) { +- *swapf = p->swap_file->d_inode; +- } else { +- printk(KERN_ERR "rw_swap_page: no swap file or device\n"); +- } +- return; ++ *data = p->data; ++ return p->method; + } + + /* +diff -Nurb src/linux/linux.orig/net/Config.in src/linux/linux/net/Config.in +--- src/linux/linux.orig/net/Config.in 2003-07-04 04:12:29.000000000 -0400 ++++ src/linux/linux/net/Config.in 2004-05-31 02:18:03.000000000 -0400 +@@ -16,6 +16,9 @@ + fi + bool 'Socket Filtering' CONFIG_FILTER + tristate 'Unix domain sockets' CONFIG_UNIX ++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then ++ bool 'Swapping via network sockets (EXPERIMENTAL)' CONFIG_NETSWAP ++fi + bool 'TCP/IP networking' CONFIG_INET + if [ "$CONFIG_INET" = "y" ]; then + source net/ipv4/Config.in +diff -Nurb src/linux/linux.orig/net/Makefile src/linux/linux/net/Makefile +--- src/linux/linux.orig/net/Makefile 2003-07-04 04:12:29.000000000 -0400 ++++ src/linux/linux/net/Makefile 2004-05-31 02:18:03.000000000 -0400 +@@ -51,6 +51,7 @@ + ifeq ($(CONFIG_NET),y) + obj-$(CONFIG_MODULES) += netsyms.o + obj-$(CONFIG_SYSCTL) += sysctl_net.o ++obj-$(CONFIG_NETSWAP) += netswapping.o + endif + + include $(TOPDIR)/Rules.make +diff -Nurb src/linux/linux.orig/net/core/sock.c src/linux/linux/net/core/sock.c +--- src/linux/linux.orig/net/core/sock.c 2003-10-14 04:09:32.000000000 -0400 ++++ src/linux/linux/net/core/sock.c 2004-05-31 02:18:03.000000000 -0400 +@@ -402,6 +402,21 @@ + ret = -ENONET; + break; + #endif ++#ifdef CONFIG_NETSWAP ++ case SO_SWAPPING: ++ if (valbool) { ++ if (!sk->swapping) { ++ netswap_sock_count ++; ++ } ++ sk->swapping ++; ++ } else if (sk->swapping > 0) { ++ sk->swapping --; ++ if (!sk->swapping) { ++ netswap_sock_count --; ++ } ++ } ++ break; ++#endif + /* We implement the SO_SNDLOWAT etc to + not be settable (1003.1g 5.3) */ + default: +@@ -552,6 +567,12 @@ + goto lenout; + } + ++#ifdef CONFIG_NETSWAP ++ case SO_SWAPPING: ++ v.val = sk->swapping; ++ break; ++#endif ++ + /* Dubious BSD thing... Probably nobody even uses it, but + * the UNIX standard wants it for whatever reason... -DaveM + */ +diff -Nurb src/linux/linux.orig/net/ipv4/tcp_ipv4.c src/linux/linux/net/ipv4/tcp_ipv4.c +--- src/linux/linux.orig/net/ipv4/tcp_ipv4.c 2003-10-14 04:09:33.000000000 -0400 ++++ src/linux/linux/net/ipv4/tcp_ipv4.c 2004-05-31 02:18:03.000000000 -0400 +@@ -1657,6 +1657,12 @@ + if (filter && sk_filter(skb, filter)) + goto discard; + #endif /* CONFIG_FILTER */ ++#ifdef CONFIG_NETSWAP ++ /* tcp doesn't use sock_queue_rcv_skb() ... */ ++ /* an inline function defined in net/netswapping.h */ ++ if (netswap_low_memory(sk, skb)) ++ goto discard; ++#endif /* CONFIG_NETSWAP */ + + IP_INC_STATS_BH(IpInDelivers); + +diff -Nurb src/linux/linux.orig/net/ipv6/tcp_ipv6.c src/linux/linux/net/ipv6/tcp_ipv6.c +--- src/linux/linux.orig/net/ipv6/tcp_ipv6.c 2003-10-14 04:09:34.000000000 -0400 ++++ src/linux/linux/net/ipv6/tcp_ipv6.c 2004-05-31 02:18:03.000000000 -0400 +@@ -1424,6 +1424,12 @@ + if (filter && sk_filter(skb, filter)) + goto discard; + #endif /* CONFIG_FILTER */ ++#ifdef CONFIG_NETSWAP ++ /* tcp doesn't use sock_queue_rcv_skb() ... */ ++ /* an inline function defined in net/netswapping.h */ ++ if (netswap_low_memory(sk, skb)) ++ goto discard; ++#endif /* CONFIG_NETSWAP */ + + /* + * socket locking is here for SMP purposes as backlog rcv +diff -Nurb src/linux/linux.orig/net/netswapping.c src/linux/linux/net/netswapping.c +--- src/linux/linux.orig/net/netswapping.c 1969-12-31 19:00:00.000000000 -0500 ++++ src/linux/linux/net/netswapping.c 2004-05-31 02:18:03.000000000 -0400 +@@ -0,0 +1,76 @@ ++/* ++ * linux/net/swapping.c ++ * ++ * Support paging over network connections (inet only) ++ * ++ * (c) 2000 Claus-Justus Heine <heine@instmath.rwth-aachen.de> ++ */ ++ ++#include <linux/slab.h> ++#include <linux/swap.h> ++#include <linux/swapctl.h> ++#include <linux/skbuff.h> ++#include <linux/module.h> ++#include <linux/sysctl.h> ++#include <linux/init.h> ++#include <net/netswapping.h> ++#include <net/sock.h> ++#include <asm/uaccess.h> ++ ++unsigned int netswap_dropped; /* statistics */ ++unsigned int netswap_free_pages_min; ++int netswap_sock_count; /* how many sockets have swapping option set */ ++ ++#ifdef CONFIG_SYSCTL ++ ++static ctl_table netswap_table[] = { ++ {NET_SWAP_DROPPED, "dropped", ++ &netswap_dropped, sizeof(int), 0644, NULL, &proc_dointvec }, ++ {NET_SWAP_DROP_THRESHOLD, "threshold", ++ &netswap_free_pages_min, sizeof(int), 0644, NULL, &proc_dointvec }, ++ {NET_SWAP_SOCK_COUNT, "sock_count", ++ &netswap_sock_count, sizeof(int), 0444, NULL, &proc_dointvec }, ++ {0}, ++}; ++ ++static struct ctl_table_header *netswap_sysctl_header; ++ ++static ctl_table netswap_net_table[] = { ++ {CTL_NETSWAP, "swapping", NULL, 0, 0555, netswap_table}, ++ {0} ++}; ++ ++static ctl_table netswap_root_table[] = { ++ {CTL_NET, "net", NULL, 0, 0555, netswap_net_table}, ++ {0} ++}; ++ ++#endif ++ ++int __init netswap_init(void) ++{ ++ /* drop packets when below this threshold */ ++ netswap_free_pages_min = 32 /* freepages.min */; ++#ifdef CONFIG_SYSCTL ++ netswap_sysctl_header = register_sysctl_table(netswap_root_table, 0); ++#endif ++ return 0; ++} ++ ++void __exit netswap_exit(void) ++{ ++#ifdef CONFIG_SYSCTL ++ unregister_sysctl_table(netswap_sysctl_header); ++#endif ++} ++ ++/* linux/init.h -- VERY nice :-) ++ * ++ * On the other hand, we have no control over the order the initcalls ++ * are performed ... ++ * ++ * Actually, we are not compiled as module ... ++ */ ++ ++module_init(netswap_init) ++module_exit(netswap_exit) +diff -Nurb src/linux/linux.orig/net/netsyms.c src/linux/linux/net/netsyms.c +--- src/linux/linux.orig/net/netsyms.c 2004-05-31 02:02:49.000000000 -0400 ++++ src/linux/linux/net/netsyms.c 2004-05-31 02:18:03.000000000 -0400 +@@ -601,4 +601,10 @@ + EXPORT_SYMBOL(wireless_send_event); + #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ + ++#ifdef CONFIG_NETSWAP ++EXPORT_SYMBOL(netswap_sock_count); ++EXPORT_SYMBOL(netswap_free_pages_min); ++EXPORT_SYMBOL(netswap_dropped); ++#endif ++ + #endif /* CONFIG_NET */ +diff -Nurb src/linux/linux.orig/net/packet/af_packet.c src/linux/linux/net/packet/af_packet.c +--- src/linux/linux.orig/net/packet/af_packet.c 2003-10-14 04:09:35.000000000 -0400 ++++ src/linux/linux/net/packet/af_packet.c 2004-05-31 02:18:03.000000000 -0400 +@@ -449,6 +449,12 @@ + snaplen = res; + } + #endif /* CONFIG_FILTER */ ++#ifdef CONFIG_NETSWAP ++ /* packet doesn't use sock_queue_rcv_skb() ... */ ++ /* an inline function defined in net/netswapping.h */ ++ if (netswap_low_memory(sk, skb)) ++ goto drop_n_restore; ++#endif /* CONFIG_NETSWAP */ + + if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) + goto drop_n_acct; +@@ -496,7 +502,7 @@ + po->stats.tp_drops++; + spin_unlock(&sk->receive_queue.lock); + +-#ifdef CONFIG_FILTER ++#if defined(CONFIG_FILTER) || defined(CONFIG_NETSWAP) + drop_n_restore: + #endif + if (skb_head != skb->data && skb_shared(skb)) { +@@ -557,6 +563,12 @@ + snaplen = res; + } + #endif ++#ifdef CONFIG_NETSWAP ++ /* packet doesn't use sock_queue_rcv_skb() ... */ ++ /* an inline function defined in net/netswapping.h */ ++ if (netswap_low_memory(sk, skb)) ++ goto drop_n_restore; ++#endif /* CONFIG_NETSWAP */ + + if (sk->type == SOCK_DGRAM) { + macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; +diff -Nurb src/linux/linux.orig/net/sunrpc/sched.c src/linux/linux/net/sunrpc/sched.c +--- src/linux/linux.orig/net/sunrpc/sched.c 2003-07-04 04:12:33.000000000 -0400 ++++ src/linux/linux/net/sunrpc/sched.c 2004-05-31 02:18:03.000000000 -0400 +@@ -79,10 +79,11 @@ + */ + static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; + ++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE + /* + * This is the last-ditch buffer for NFS swap requests + */ +-static u32 swap_buffer[PAGE_SIZE >> 2]; ++static u32 swap_buffer[2*PAGE_SIZE >> 2]; + static long swap_buffer_used; + + /* +@@ -96,6 +97,7 @@ + { + clear_bit(1, &swap_buffer_used); + } ++#endif + + /* + * Disable the timer for a given RPC task. Should be called with +@@ -501,6 +503,7 @@ + __rpc_execute(struct rpc_task *task) + { + int status = 0; ++ unsigned long alloc_flag = current->flags & PF_MEMALLOC; + + dprintk("RPC: %4d rpc_execute flgs %x\n", + task->tk_pid, task->tk_flags); +@@ -510,6 +513,13 @@ + return 0; + } + ++ if (task->tk_flags & RPC_TASK_SWAPPER) { ++ if (!current->flags & PF_MEMALLOC) { ++ dprintk("__rpc_execute: Setting PF_MEMALLOC\n"); ++ } ++ current->flags |= PF_MEMALLOC; ++ } ++ + restarted: + while (1) { + /* +@@ -554,7 +564,8 @@ + rpc_set_sleeping(task); + if (RPC_IS_ASYNC(task)) { + spin_unlock_bh(&rpc_queue_lock); +- return 0; ++ status = 0; ++ goto out; + } + } + spin_unlock_bh(&rpc_queue_lock); +@@ -563,7 +574,12 @@ + /* sync task: sleep here */ + dprintk("RPC: %4d sync task going to sleep\n", + task->tk_pid); +- if (current->pid == rpciod_pid) ++ /* it's ok to wait for rpciod when swapping, ++ * because this means it needed memory and is ++ * doing the swap-out itself. ++ */ ++ if (current->pid == rpciod_pid && ++ !(task->tk_flags & RPC_TASK_SWAPPER)) + printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); + + __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); +@@ -608,6 +624,10 @@ + /* Release all resources associated with the task */ + rpc_release_task(task); + ++ out: ++ if (!alloc_flag) { ++ current->flags &= ~PF_MEMALLOC; ++ } + return status; + } + +@@ -699,10 +719,16 @@ + { + u32 *buffer; + int gfp; ++ unsigned long alloc_flag = current->flags & PF_MEMALLOC; ++ void *ret = NULL; + +- if (flags & RPC_TASK_SWAPPER) ++ if (flags & RPC_TASK_SWAPPER) { + gfp = GFP_ATOMIC; +- else if (flags & RPC_TASK_ASYNC) ++ if (!(current->flags & PF_MEMALLOC)) { ++ dprintk("rpc_allocate: Setting PF_MEMALLOC\n"); ++ } ++ current->flags |= PF_MEMALLOC; ++ } else if (flags & RPC_TASK_ASYNC) + gfp = GFP_RPC; + else + gfp = GFP_KERNEL; +@@ -710,29 +736,44 @@ + do { + if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) { + dprintk("RPC: allocated buffer %p\n", buffer); +- return buffer; ++ ret = buffer; ++ goto out; + } ++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE + if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer) + && rpc_lock_swapbuf()) { + dprintk("RPC: used last-ditch swap buffer\n"); +- return swap_buffer; ++ ret = swap_buffer; ++ goto out; ++#endif ++ } ++ if (flags & RPC_TASK_ASYNC) { ++ ret = NULL; ++ goto out; + } +- if (flags & RPC_TASK_ASYNC) +- return NULL; + yield(); + } while (!signalled()); + +- return NULL; ++ out: ++ if (!alloc_flag) { ++ current->flags &= ~PF_MEMALLOC; ++ } ++ return ret; + } + + void + rpc_free(void *buffer) + { ++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE + if (buffer != swap_buffer) { ++#endif + kfree(buffer); + return; ++#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE + } + rpc_unlock_swapbuf(); ++ printk("RPC: Released swap buffer\n"); ++#endif + } + + /* +diff -Nurb src/linux/linux.orig/net/sunrpc/xprt.c src/linux/linux/net/sunrpc/xprt.c +--- src/linux/linux.orig/net/sunrpc/xprt.c 2003-07-04 04:12:33.000000000 -0400 ++++ src/linux/linux/net/sunrpc/xprt.c 2004-05-31 02:18:03.000000000 -0400 +@@ -139,7 +139,7 @@ + __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) + { + if (!xprt->snd_task) { +- if (xprt->nocong || __xprt_get_cong(xprt, task)) ++ if (__xprt_get_cong(xprt, task)) + xprt->snd_task = task; + } + if (xprt->snd_task != task) { +@@ -179,7 +179,7 @@ + if (!task) + return; + } +- if (xprt->nocong || __xprt_get_cong(xprt, task)) ++ if (__xprt_get_cong(xprt, task)) + xprt->snd_task = task; + } + +@@ -276,6 +276,9 @@ + { + struct rpc_rqst *req = task->tk_rqstp; + ++ if (xprt->nocong || RPC_IS_SWAPPER(task)) ++ return 1; ++ + if (req->rq_cong) + return 1; + dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", |