--- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_string.h @@ -0,0 +1,18 @@ +#ifndef _IPT_STRING_H +#define _IPT_STRING_H + +#define IPT_STRING_MAX_PATTERN_SIZE 128 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16 + +struct ipt_string_info +{ + u_int16_t from_offset; + u_int16_t to_offset; + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE]; + char pattern[IPT_STRING_MAX_PATTERN_SIZE]; + u_int8_t patlen; + u_int8_t invert; + struct ts_config __attribute__((aligned(8))) *config; +}; + +#endif /*_IPT_STRING_H*/ --- a/net/ipv4/netfilter/Config.in +++ b/net/ipv4/netfilter/Config.in @@ -61,6 +61,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; fi if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES + dep_tristate ' String match support (EXPERIMENTAL) ' CONFIG_IP_NF_MATCH_STRING $CONFIG_IP_NF_IPTABLES dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7 --- /dev/null +++ b/net/ipv4/netfilter/ipt_string.c @@ -0,0 +1,99 @@ +/* String matching match for iptables + * + * (C) 2005 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include "textsearch/textsearch.h" +#include "textsearch/textsearch.c" +#include "textsearch/ts_bm.c" +#include "textsearch/ts_kmp.c" + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("IP tables string match module"); +MODULE_LICENSE("GPL"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + struct iphdr *ip = skb->nh.iph; + struct ts_state state; + struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo; + char *buf = (char *)ip+(ip->ihl*4); + int len = ntohs(ip->tot_len)-(ip->ihl*4); + + memset(&state, 0, sizeof(struct ts_state)); + + return (textsearch_find_continuous(conf->config, &state, buf, len) != UINT_MAX) && !conf->invert; +} + +#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m) + +static int checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_string_info *conf = matchinfo; + struct ts_config *ts_conf; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info))) + return 0; + + /* Damn, can't handle this case properly with iptables... */ + if (conf->from_offset > conf->to_offset) + return 0; + + ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, + GFP_KERNEL, TS_AUTOLOAD); + if (IS_ERR(ts_conf)) + return 0; + + conf->config = ts_conf; + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchsize) +{ + textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); +} + +static struct ipt_match string_match = { + .name = "string", + .match = match, + .checkentry = checkentry, + .destroy = destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + init_bm(); + init_kmp(); + return ipt_register_match(&string_match); +} + +static void __exit fini(void) +{ + exit_kmp(); + exit_bm(); + ipt_unregister_match(&string_match); +} + +module_init(init); +module_exit(fini); --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_s obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o --- /dev/null +++ b/net/ipv4/netfilter/textsearch/textsearch.c @@ -0,0 +1,305 @@ +/* + * lib/textsearch.c Generic text search interface + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + * Pablo Neira Ayuso + * + * ========================================================================== + * + * INTRODUCTION + * + * The textsearch infrastructure provides text searching facitilies for + * both linear and non-linear data. Individual search algorithms are + * implemented in modules and chosen by the user. + * + * ARCHITECTURE + * + * User + * +----------------+ + * | finish()|<--------------(6)-----------------+ + * |get_next_block()|<--------------(5)---------------+ | + * | | Algorithm | | + * | | +------------------------------+ + * | | | init() find() destroy() | + * | | +------------------------------+ + * | | Core API ^ ^ ^ + * | | +---------------+ (2) (4) (8) + * | (1)|----->| prepare() |---+ | | + * | (3)|----->| find()/next() |-----------+ | + * | (7)|----->| destroy() |----------------------+ + * +----------------+ +---------------+ + * + * (1) User configures a search by calling _prepare() specifying the + * search parameters such as the pattern and algorithm name. + * (2) Core requests the algorithm to allocate and initialize a search + * configuration according to the specified parameters. + * (3) User starts the search(es) by calling _find() or _next() to + * fetch subsequent occurrences. A state variable is provided + * to the algorihtm to store persistant variables. + * (4) Core eventually resets the search offset and forwards the find() + * request to the algorithm. + * (5) Algorithm calls get_next_block() provided by the user continously + * to fetch the data to be searched in block by block. + * (6) Algorithm invokes finish() after the last call to get_next_block + * to clean up any leftovers from get_next_block. (Optional) + * (7) User destroys the configuration by calling _destroy(). + * (8) Core notifies the algorithm to destroy algorithm specific + * allocations. (Optional) + * + * USAGE + * + * Before a search can be performed, a configuration must be created + * by calling textsearch_prepare() specyfing the searching algorithm and + * the pattern to look for. The returned configuration may then be used + * for an arbitary amount of times and even in parallel as long as a + * separate struct ts_state variable is provided to every instance. + * + * The actual search is performed by either calling textsearch_find_- + * continuous() for linear data or by providing an own get_next_block() + * implementation and calling textsearch_find(). Both functions return + * the position of the first occurrence of the patern or UINT_MAX if + * no match was found. Subsequent occurences can be found by calling + * textsearch_next() regardless of the linearity of the data. + * + * Once you're done using a configuration it must be given back via + * textsearch_destroy. + * + * EXAMPLE + * + * int pos; + * struct ts_config *conf; + * struct ts_state state; + * const char *pattern = "chicken"; + * const char *example = "We dance the funky chicken"; + * + * conf = textsearch_prepare("kmp", pattern, strlen(pattern), + * GFP_KERNEL, TS_AUTOLOAD); + * if (IS_ERR(conf)) { + * err = PTR_ERR(conf); + * goto errout; + * } + * + * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); + * if (pos != UINT_MAX) + * panic("Oh my god, dancing chickens at %d\n", pos); + * + * textsearch_destroy(conf); + * + * ========================================================================== + */ + +#include +#include +#include +#include +#include +#include +#include "textsearch.h" + +static LIST_HEAD(ts_ops); +static spinlock_t ts_mod_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_RWLOCK(ts_ops_lock); + +static inline struct ts_ops *lookup_ts_algo(const char *name) +{ + struct ts_ops *o; + + read_lock(&ts_ops_lock); + list_for_each_entry(o, &ts_ops, list) { + if (!strcmp(name, o->name)) { + MOD_INC_USE_COUNT; + read_unlock(&ts_ops_lock); + return o; + } + } + read_unlock(&ts_ops_lock); + + return NULL; +} + +/** + * textsearch_register - register a textsearch module + * @ops: operations lookup table + * + * This function must be called by textsearch modules to announce + * their presence. The specified &@ops must have %name set to a + * unique identifier and the callbacks find(), init(), get_pattern(), + * and get_pattern_len() must be implemented. + * + * Returns 0 or -EEXISTS if another module has already registered + * with same name. + */ +int textsearch_register(struct ts_ops *ops) +{ + int err = -EEXIST; + struct ts_ops *o; + + if (ops->name == NULL || ops->find == NULL || ops->init == NULL || + ops->get_pattern == NULL || ops->get_pattern_len == NULL) + return -EINVAL; + + spin_lock(&ts_mod_lock); + list_for_each_entry(o, &ts_ops, list) { + if (!strcmp(ops->name, o->name)) + goto errout; + } + + write_lock(&ts_ops_lock); + list_add_tail(&ops->list, &ts_ops); + write_unlock(&ts_ops_lock); + + err = 0; +errout: + spin_unlock(&ts_mod_lock); + return err; +} + +/** + * textsearch_unregister - unregister a textsearch module + * @ops: operations lookup table + * + * This function must be called by textsearch modules to announce + * their disappearance for examples when the module gets unloaded. + * The &ops parameter must be the same as the one during the + * registration. + * + * Returns 0 on success or -ENOENT if no matching textsearch + * registration was found. + */ +int textsearch_unregister(struct ts_ops *ops) +{ + int err = 0; + struct ts_ops *o; + + spin_lock(&ts_mod_lock); + list_for_each_entry(o, &ts_ops, list) { + if (o == ops) { + write_lock(&ts_ops_lock); + list_del(&o->list); + write_unlock(&ts_ops_lock); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&ts_mod_lock); + return err; +} + +struct ts_linear_state +{ + unsigned int len; + const void *data; +}; + +static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, + struct ts_config *conf, + struct ts_state *state) +{ + struct ts_linear_state *st = (struct ts_linear_state *) state->cb; + + if (likely(consumed < st->len)) { + *dst = st->data + consumed; + return st->len - consumed; + } + + return 0; +} + +/** + * textsearch_find_continuous - search a pattern in continuous/linear data + * @conf: search configuration + * @state: search state + * @data: data to search in + * @len: length of data + * + * A simplified version of textsearch_find() for continuous/linear data. + * Call textsearch_next() to retrieve subsequent matches. + * + * Returns the position of first occurrence of the pattern or + * UINT_MAX if no occurrence was found. + */ +unsigned int textsearch_find_continuous(struct ts_config *conf, + struct ts_state *state, + const void *data, unsigned int len) +{ + struct ts_linear_state *st = (struct ts_linear_state *) state->cb; + + conf->get_next_block = get_linear_data; + st->data = data; + st->len = len; + + return textsearch_find(conf, state); +} + +/** + * textsearch_prepare - Prepare a search + * @algo: name of search algorithm + * @pattern: pattern data + * @len: length of pattern + * @gfp_mask: allocation mask + * @flags: search flags + * + * Looks up the search algorithm module and creates a new textsearch + * configuration for the specified pattern. Upon completion all + * necessary refcnts are held and the configuration must be put back + * using textsearch_put() after usage. + * + * Note: The format of the pattern may not be compatible between + * the various search algorithms. + * + * Returns a new textsearch configuration according to the specified + * parameters or a ERR_PTR(). + */ +struct ts_config *textsearch_prepare(const char *algo, const void *pattern, + unsigned int len, gfp_t gfp_mask, int flags) +{ + int err = -ENOENT; + struct ts_config *conf; + struct ts_ops *ops; + + ops = lookup_ts_algo(algo); + + if (ops == NULL) + goto errout; + + conf = ops->init(pattern, len, gfp_mask); + if (IS_ERR(conf)) { + err = PTR_ERR(conf); + goto errout; + } + + conf->ops = ops; + return conf; + +errout: + if (ops) + MOD_DEC_USE_COUNT; + + return ERR_PTR(err); +} + +/** + * textsearch_destroy - destroy a search configuration + * @conf: search configuration + * + * Releases all references of the configuration and frees + * up the memory. + */ +void textsearch_destroy(struct ts_config *conf) +{ + if (conf->ops) { + if (conf->ops->destroy) + conf->ops->destroy(conf); + MOD_DEC_USE_COUNT; + } + + kfree(conf); +} + --- /dev/null +++ b/net/ipv4/netfilter/textsearch/textsearch.h @@ -0,0 +1,182 @@ +#ifndef __LINUX_TEXTSEARCH_H +#define __LINUX_TEXTSEARCH_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +typedef int gfp_t; +struct ts_config; + +/** + * TS_AUTOLOAD - Automatically load textsearch modules when needed + */ +#define TS_AUTOLOAD 1 + +/** + * struct ts_state - search state + * @offset: offset for next match + * @cb: control buffer, for persistant variables of get_next_block() + */ +struct ts_state +{ + unsigned int offset; + char cb[40]; +}; + +/** + * struct ts_ops - search module operations + * @name: name of search algorithm + * @init: initialization function to prepare a search + * @find: find the next occurrence of the pattern + * @destroy: destroy algorithm specific parts of a search configuration + * @get_pattern: return head of pattern + * @get_pattern_len: return length of pattern + * @owner: module reference to algorithm + */ +struct ts_ops +{ + const char *name; + struct ts_config * (*init)(const void *, unsigned int, gfp_t); + unsigned int (*find)(struct ts_config *, + struct ts_state *); + void (*destroy)(struct ts_config *); + void * (*get_pattern)(struct ts_config *); + unsigned int (*get_pattern_len)(struct ts_config *); + struct module *owner; + struct list_head list; +}; + +/** + * struct ts_config - search configuration + * @ops: operations of chosen algorithm + * @get_next_block: callback to fetch the next block to search in + * @finish: callback to finalize a search + */ +struct ts_config +{ + struct ts_ops *ops; + + /** + * get_next_block - fetch next block of data + * @consumed: number of bytes consumed by the caller + * @dst: destination buffer + * @conf: search configuration + * @state: search state + * + * Called repeatedly until 0 is returned. Must assign the + * head of the next block of data to &*dst and return the length + * of the block or 0 if at the end. consumed == 0 indicates + * a new search. May store/read persistant values in state->cb. + */ + unsigned int (*get_next_block)(unsigned int consumed, + const u8 **dst, + struct ts_config *conf, + struct ts_state *state); + + /** + * finish - finalize/clean a series of get_next_block() calls + * @conf: search configuration + * @state: search state + * + * Called after the last use of get_next_block(), may be used + * to cleanup any leftovers. + */ + void (*finish)(struct ts_config *conf, + struct ts_state *state); +}; + +/** + * textsearch_next - continue searching for a pattern + * @conf: search configuration + * @state: search state + * + * Continues a search looking for more occurrences of the pattern. + * textsearch_find() must be called to find the first occurrence + * in order to reset the state. + * + * Returns the position of the next occurrence of the pattern or + * UINT_MAX if not match was found. + */ +static inline unsigned int textsearch_next(struct ts_config *conf, + struct ts_state *state) +{ + unsigned int ret = conf->ops->find(conf, state); + + if (conf->finish) + conf->finish(conf, state); + + return ret; +} + +/** + * textsearch_find - start searching for a pattern + * @conf: search configuration + * @state: search state + * + * Returns the position of first occurrence of the pattern or + * UINT_MAX if no match was found. + */ +static inline unsigned int textsearch_find(struct ts_config *conf, + struct ts_state *state) +{ + state->offset = 0; + return textsearch_next(conf, state); +} + +/** + * textsearch_get_pattern - return head of the pattern + * @conf: search configuration + */ +static inline void *textsearch_get_pattern(struct ts_config *conf) +{ + return conf->ops->get_pattern(conf); +} + +/** + * textsearch_get_pattern_len - return length of the pattern + * @conf: search configuration + */ +static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf) +{ + return conf->ops->get_pattern_len(conf); +} + +extern int textsearch_register(struct ts_ops *); +extern int textsearch_unregister(struct ts_ops *); +extern struct ts_config *textsearch_prepare(const char *, const void *, + unsigned int, gfp_t, int); +extern void textsearch_destroy(struct ts_config *conf); +extern unsigned int textsearch_find_continuous(struct ts_config *, + struct ts_state *, + const void *, unsigned int); + + +#define TS_PRIV_ALIGNTO 8 +#define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) + +static inline struct ts_config *alloc_ts_config(size_t payload, + gfp_t gfp_mask) +{ + struct ts_config *conf; + + conf = kmalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask); + if (conf == NULL) + return ERR_PTR(-ENOMEM); + + memset(conf, 0, TS_PRIV_ALIGN(sizeof(*conf)) + payload); + return conf; +} + +static inline void *ts_config_priv(struct ts_config *conf) +{ + return ((u8 *) conf + TS_PRIV_ALIGN(sizeof(struct ts_config))); +} + +#endif /* __KERNEL__ */ + +#endif --- /dev/null +++ b/net/ipv4/netfilter/textsearch/ts_bm.c @@ -0,0 +1,190 @@ +/* + * lib/ts_bm.c Boyer-Moore text search implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pablo Neira Ayuso + * + * ========================================================================== + * + * Implements Boyer-Moore string matching algorithm: + * + * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. + * Communications of the Association for Computing Machinery, + * 20(10), 1977, pp. 762-772. + * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf + * + * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 + * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf + * + * Note: Since Boyer-Moore (BM) performs searches for matchings from right + * to left, it's still possible that a matching could be spread over + * multiple blocks, in that case this algorithm won't find any coincidence. + * + * If you're willing to ensure that such thing won't ever happen, use the + * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose + * the proper string search algorithm depending on your setting. + * + * Say you're using the textsearch infrastructure for filtering, NIDS or + * any similar security focused purpose, then go KMP. Otherwise, if you + * really care about performance, say you're classifying packets to apply + * Quality of Service (QoS) policies, and you don't mind about possible + * matchings spread over multiple fragments, then go BM. + */ + +#include +#include +#include +#include +#include +#include "textsearch.h" + +/* Alphabet size, use ASCII */ +#define ASIZE 256 + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(args, format...) +#endif + +struct ts_bm +{ + u8 * pattern; + unsigned int patlen; + unsigned int bad_shift[ASIZE]; + unsigned int good_shift[0]; +}; + +static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_bm *bm = ts_config_priv(conf); + unsigned int i, text_len, consumed = state->offset; + const u8 *text; + int shift = bm->patlen, bs; + + for (;;) { + text_len = conf->get_next_block(consumed, &text, conf, state); + + if (unlikely(text_len == 0)) + break; + + while (shift < text_len) { + DEBUGP("Searching in position %d (%c)\n", + shift, text[shift]); + for (i = 0; i < bm->patlen; i++) + if (text[shift-i] != bm->pattern[bm->patlen-1-i]) + goto next; + + /* London calling... */ + DEBUGP("found!\n"); + return consumed += (shift-(bm->patlen-1)); + +next: bs = bm->bad_shift[text[shift-i]]; + + /* Now jumping to... */ + shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); + } + consumed += text_len; + } + + return UINT_MAX; +} + +static int subpattern(u8 *pattern, int i, int j, int g) +{ + int x = i+g-1, y = j+g-1, ret = 0; + + while(pattern[x--] == pattern[y--]) { + if (y < 0) { + ret = 1; + break; + } + if (--g == 0) { + ret = pattern[i-1] != pattern[j-1]; + break; + } + } + + return ret; +} + +static void bm_compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, + unsigned int len) +{ + int i, j, g; + + for (i = 0; i < ASIZE; i++) + bm->bad_shift[i] = len; + for (i = 0; i < len - 1; i++) + bm->bad_shift[pattern[i]] = len - 1 - i; + + /* Compute the good shift array, used to match reocurrences + * of a subpattern */ + bm->good_shift[0] = 1; + for (i = 1; i < bm->patlen; i++) + bm->good_shift[i] = bm->patlen; + for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { + for (j = i-1; j >= 1-g ; j--) + if (subpattern(bm->pattern, i, j, g)) { + bm->good_shift[g] = bm->patlen-j-g; + break; + } + } +} + +static struct ts_config *bm_init(const void *pattern, unsigned int len, + gfp_t gfp_mask) +{ + struct ts_config *conf; + struct ts_bm *bm; + unsigned int prefix_tbl_len = len * sizeof(unsigned int); + size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + bm = ts_config_priv(conf); + bm->patlen = len; + bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; + bm_compute_prefix_tbl(bm, pattern, len); + memcpy(bm->pattern, pattern, len); + + return conf; +} + +static void *bm_get_pattern(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->pattern; +} + +static unsigned int bm_get_pattern_len(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->patlen; +} + +static struct ts_ops bm_ops = { + .name = "bm", + .find = bm_find, + .init = bm_init, + .get_pattern = bm_get_pattern, + .get_pattern_len = bm_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(bm_ops.list) +}; + +static int __init init_bm(void) +{ + return textsearch_register(&bm_ops); +} + +static void __exit exit_bm(void) +{ + textsearch_unregister(&bm_ops); +} --- /dev/null +++ b/net/ipv4/netfilter/textsearch/ts_kmp.c @@ -0,0 +1,141 @@ +/* + * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + * + * ========================================================================== + * + * Implements a linear-time string-matching algorithm due to Knuth, + * Morris, and Pratt [1]. Their algorithm avoids the explicit + * computation of the transition function DELTA altogether. Its + * matching time is O(n), for n being length(text), using just an + * auxiliary function PI[1..m], for m being length(pattern), + * precomputed from the pattern in time O(m). The array PI allows + * the transition function DELTA to be computed efficiently + * "on the fly" as needed. Roughly speaking, for any state + * "q" = 0,1,...,m and any character "a" in SIGMA, the value + * PI["q"] contains the information that is independent of "a" and + * is needed to compute DELTA("q", "a") [2]. Since the array PI + * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we + * save a factor of |SIGMA| in the preprocessing time by computing + * PI rather than DELTA. + * + * [1] Cormen, Leiserson, Rivest, Stein + * Introdcution to Algorithms, 2nd Edition, MIT Press + * [2] See finite automation theory + */ + +#include +#include +#include +#include +#include "textsearch.h" + +struct ts_kmp +{ + u8 * pattern; + unsigned int pattern_len; + unsigned int prefix_tbl[0]; +}; + +static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + unsigned int i, q = 0, text_len, consumed = state->offset; + const u8 *text; + + for (;;) { + text_len = conf->get_next_block(consumed, &text, conf, state); + + if (unlikely(text_len == 0)) + break; + + for (i = 0; i < text_len; i++) { + while (q > 0 && kmp->pattern[q] != text[i]) + q = kmp->prefix_tbl[q - 1]; + if (kmp->pattern[q] == text[i]) + q++; + if (unlikely(q == kmp->pattern_len)) { + state->offset = consumed + i + 1; + return state->offset - kmp->pattern_len; + } + } + + consumed += text_len; + } + + return UINT_MAX; +} + +static inline void kmp_compute_prefix_tbl(const u8 *pattern, unsigned int len, + unsigned int *prefix_tbl) +{ + unsigned int k, q; + + for (k = 0, q = 1; q < len; q++) { + while (k > 0 && pattern[k] != pattern[q]) + k = prefix_tbl[k-1]; + if (pattern[k] == pattern[q]) + k++; + prefix_tbl[q] = k; + } +} + +static struct ts_config *kmp_init(const void *pattern, unsigned int len, + gfp_t gfp_mask) +{ + struct ts_config *conf; + struct ts_kmp *kmp; + unsigned int prefix_tbl_len = len * sizeof(unsigned int); + size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + kmp = ts_config_priv(conf); + kmp->pattern_len = len; + kmp_compute_prefix_tbl(pattern, len, kmp->prefix_tbl); + kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; + memcpy(kmp->pattern, pattern, len); + + return conf; +} + +static void *kmp_get_pattern(struct ts_config *conf) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + return kmp->pattern; +} + +static unsigned int kmp_get_pattern_len(struct ts_config *conf) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + return kmp->pattern_len; +} + +static struct ts_ops kmp_ops = { + .name = "kmp", + .find = kmp_find, + .init = kmp_init, + .get_pattern = kmp_get_pattern, + .get_pattern_len = kmp_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(kmp_ops.list) +}; + +static int __init init_kmp(void) +{ + return textsearch_register(&kmp_ops); +} + +static void __exit exit_kmp(void) +{ + textsearch_unregister(&kmp_ops); +} +