/*1* net/sched/ematch.c Extended Match API2*3* This program is free software; you can redistribute it and/or4* modify it under the terms of the GNU General Public License5* as published by the Free Software Foundation; either version6* 2 of the License, or (at your option) any later version.7*8* Authors: Thomas Graf <[email protected]>9*10* ==========================================================================11*12* An extended match (ematch) is a small classification tool not worth13* writing a full classifier for. Ematches can be interconnected to form14* a logic expression and get attached to classifiers to extend their15* functionatlity.16*17* The userspace part transforms the logic expressions into an array18* consisting of multiple sequences of interconnected ematches separated19* by markers. Precedence is implemented by a special ematch kind20* referencing a sequence beyond the marker of the current sequence21* causing the current position in the sequence to be pushed onto a stack22* to allow the current position to be overwritten by the position referenced23* in the special ematch. Matching continues in the new sequence until a24* marker is reached causing the position to be restored from the stack.25*26* Example:27* A AND (B1 OR B2) AND C AND D28*29* ------->-PUSH-------30* -->-- / -->-- \ -->--31* / \ / / \ \ / \32* +-------+-------+-------+-------+-------+--------+33* | A AND | B AND | C AND | D END | B1 OR | B2 END |34* +-------+-------+-------+-------+-------+--------+35* \ /36* --------<-POP---------37*38* where B is a virtual ematch referencing to sequence starting with B1.39*40* ==========================================================================41*42* How to write an ematch in 60 seconds43* ------------------------------------44*45* 1) Provide a matcher function:46* static int my_match(struct sk_buff *skb, struct tcf_ematch *m,47* struct tcf_pkt_info *info)48* {49* struct mydata *d = (struct mydata *) m->data;50*51* if (...matching goes here...)52* return 1;53* else54* return 0;55* }56*57* 2) Fill out a struct tcf_ematch_ops:58* static struct tcf_ematch_ops my_ops = {59* .kind = unique id,60* .datalen = sizeof(struct mydata),61* .match = my_match,62* .owner = THIS_MODULE,63* };64*65* 3) Register/Unregister your ematch:66* static int __init init_my_ematch(void)67* {68* return tcf_em_register(&my_ops);69* }70*71* static void __exit exit_my_ematch(void)72* {73* tcf_em_unregister(&my_ops);74* }75*76* module_init(init_my_ematch);77* module_exit(exit_my_ematch);78*79* 4) By now you should have two more seconds left, barely enough to80* open up a beer to watch the compilation going.81*/8283#include <linux/module.h>84#include <linux/slab.h>85#include <linux/types.h>86#include <linux/kernel.h>87#include <linux/errno.h>88#include <linux/rtnetlink.h>89#include <linux/skbuff.h>90#include <net/pkt_cls.h>9192static LIST_HEAD(ematch_ops);93static DEFINE_RWLOCK(ematch_mod_lock);9495static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)96{97struct tcf_ematch_ops *e = NULL;9899read_lock(&ematch_mod_lock);100list_for_each_entry(e, &ematch_ops, link) {101if (kind == e->kind) {102if (!try_module_get(e->owner))103e = NULL;104read_unlock(&ematch_mod_lock);105return e;106}107}108read_unlock(&ematch_mod_lock);109110return NULL;111}112113/**114* tcf_em_register - register an extended match115*116* @ops: ematch operations lookup table117*118* This function must be called by ematches to announce their presence.119* The given @ops must have kind set to a unique identifier and the120* callback match() must be implemented. All other callbacks are optional121* and a fallback implementation is used instead.122*123* Returns -EEXISTS if an ematch of the same kind has already registered.124*/125int tcf_em_register(struct tcf_ematch_ops *ops)126{127int err = -EEXIST;128struct tcf_ematch_ops *e;129130if (ops->match == NULL)131return -EINVAL;132133write_lock(&ematch_mod_lock);134list_for_each_entry(e, &ematch_ops, link)135if (ops->kind == e->kind)136goto errout;137138list_add_tail(&ops->link, &ematch_ops);139err = 0;140errout:141write_unlock(&ematch_mod_lock);142return err;143}144EXPORT_SYMBOL(tcf_em_register);145146/**147* tcf_em_unregister - unregster and extended match148*149* @ops: ematch operations lookup table150*151* This function must be called by ematches to announce their disappearance152* for examples when the module gets unloaded. The @ops parameter must be153* the same as the one used for registration.154*155* Returns -ENOENT if no matching ematch was found.156*/157void tcf_em_unregister(struct tcf_ematch_ops *ops)158{159write_lock(&ematch_mod_lock);160list_del(&ops->link);161write_unlock(&ematch_mod_lock);162}163EXPORT_SYMBOL(tcf_em_unregister);164165static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,166int index)167{168return &tree->matches[index];169}170171172static int tcf_em_validate(struct tcf_proto *tp,173struct tcf_ematch_tree_hdr *tree_hdr,174struct tcf_ematch *em, struct nlattr *nla, int idx)175{176int err = -EINVAL;177struct tcf_ematch_hdr *em_hdr = nla_data(nla);178int data_len = nla_len(nla) - sizeof(*em_hdr);179void *data = (void *) em_hdr + sizeof(*em_hdr);180181if (!TCF_EM_REL_VALID(em_hdr->flags))182goto errout;183184if (em_hdr->kind == TCF_EM_CONTAINER) {185/* Special ematch called "container", carries an index186* referencing an external ematch sequence.187*/188u32 ref;189190if (data_len < sizeof(ref))191goto errout;192ref = *(u32 *) data;193194if (ref >= tree_hdr->nmatches)195goto errout;196197/* We do not allow backward jumps to avoid loops and jumps198* to our own position are of course illegal.199*/200if (ref <= idx)201goto errout;202203204em->data = ref;205} else {206/* Note: This lookup will increase the module refcnt207* of the ematch module referenced. In case of a failure,208* a destroy function is called by the underlying layer209* which automatically releases the reference again, therefore210* the module MUST not be given back under any circumstances211* here. Be aware, the destroy function assumes that the212* module is held if the ops field is non zero.213*/214em->ops = tcf_em_lookup(em_hdr->kind);215216if (em->ops == NULL) {217err = -ENOENT;218#ifdef CONFIG_MODULES219__rtnl_unlock();220request_module("ematch-kind-%u", em_hdr->kind);221rtnl_lock();222em->ops = tcf_em_lookup(em_hdr->kind);223if (em->ops) {224/* We dropped the RTNL mutex in order to225* perform the module load. Tell the caller226* to replay the request.227*/228module_put(em->ops->owner);229err = -EAGAIN;230}231#endif232goto errout;233}234235/* ematch module provides expected length of data, so we236* can do a basic sanity check.237*/238if (em->ops->datalen && data_len < em->ops->datalen)239goto errout;240241if (em->ops->change) {242err = em->ops->change(tp, data, data_len, em);243if (err < 0)244goto errout;245} else if (data_len > 0) {246/* ematch module doesn't provide an own change247* procedure and expects us to allocate and copy248* the ematch data.249*250* TCF_EM_SIMPLE may be specified stating that the251* data only consists of a u32 integer and the module252* does not expected a memory reference but rather253* the value carried.254*/255if (em_hdr->flags & TCF_EM_SIMPLE) {256if (data_len < sizeof(u32))257goto errout;258em->data = *(u32 *) data;259} else {260void *v = kmemdup(data, data_len, GFP_KERNEL);261if (v == NULL) {262err = -ENOBUFS;263goto errout;264}265em->data = (unsigned long) v;266}267}268}269270em->matchid = em_hdr->matchid;271em->flags = em_hdr->flags;272em->datalen = data_len;273274err = 0;275errout:276return err;277}278279static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {280[TCA_EMATCH_TREE_HDR] = { .len = sizeof(struct tcf_ematch_tree_hdr) },281[TCA_EMATCH_TREE_LIST] = { .type = NLA_NESTED },282};283284/**285* tcf_em_tree_validate - validate ematch config TLV and build ematch tree286*287* @tp: classifier kind handle288* @nla: ematch tree configuration TLV289* @tree: destination ematch tree variable to store the resulting290* ematch tree.291*292* This function validates the given configuration TLV @nla and builds an293* ematch tree in @tree. The resulting tree must later be copied into294* the private classifier data using tcf_em_tree_change(). You MUST NOT295* provide the ematch tree variable of the private classifier data directly,296* the changes would not be locked properly.297*298* Returns a negative error code if the configuration TLV contains errors.299*/300int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,301struct tcf_ematch_tree *tree)302{303int idx, list_len, matches_len, err;304struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];305struct nlattr *rt_match, *rt_hdr, *rt_list;306struct tcf_ematch_tree_hdr *tree_hdr;307struct tcf_ematch *em;308309memset(tree, 0, sizeof(*tree));310if (!nla)311return 0;312313err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);314if (err < 0)315goto errout;316317err = -EINVAL;318rt_hdr = tb[TCA_EMATCH_TREE_HDR];319rt_list = tb[TCA_EMATCH_TREE_LIST];320321if (rt_hdr == NULL || rt_list == NULL)322goto errout;323324tree_hdr = nla_data(rt_hdr);325memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));326327rt_match = nla_data(rt_list);328list_len = nla_len(rt_list);329matches_len = tree_hdr->nmatches * sizeof(*em);330331tree->matches = kzalloc(matches_len, GFP_KERNEL);332if (tree->matches == NULL)333goto errout;334335/* We do not use nla_parse_nested here because the maximum336* number of attributes is unknown. This saves us the allocation337* for a tb buffer which would serve no purpose at all.338*339* The array of rt attributes is parsed in the order as they are340* provided, their type must be incremental from 1 to n. Even341* if it does not serve any real purpose, a failure of sticking342* to this policy will result in parsing failure.343*/344for (idx = 0; nla_ok(rt_match, list_len); idx++) {345err = -EINVAL;346347if (rt_match->nla_type != (idx + 1))348goto errout_abort;349350if (idx >= tree_hdr->nmatches)351goto errout_abort;352353if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))354goto errout_abort;355356em = tcf_em_get_match(tree, idx);357358err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);359if (err < 0)360goto errout_abort;361362rt_match = nla_next(rt_match, &list_len);363}364365/* Check if the number of matches provided by userspace actually366* complies with the array of matches. The number was used for367* the validation of references and a mismatch could lead to368* undefined references during the matching process.369*/370if (idx != tree_hdr->nmatches) {371err = -EINVAL;372goto errout_abort;373}374375err = 0;376errout:377return err;378379errout_abort:380tcf_em_tree_destroy(tp, tree);381return err;382}383EXPORT_SYMBOL(tcf_em_tree_validate);384385/**386* tcf_em_tree_destroy - destroy an ematch tree387*388* @tp: classifier kind handle389* @tree: ematch tree to be deleted390*391* This functions destroys an ematch tree previously created by392* tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that393* the ematch tree is not in use before calling this function.394*/395void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)396{397int i;398399if (tree->matches == NULL)400return;401402for (i = 0; i < tree->hdr.nmatches; i++) {403struct tcf_ematch *em = tcf_em_get_match(tree, i);404405if (em->ops) {406if (em->ops->destroy)407em->ops->destroy(tp, em);408else if (!tcf_em_is_simple(em))409kfree((void *) em->data);410module_put(em->ops->owner);411}412}413414tree->hdr.nmatches = 0;415kfree(tree->matches);416tree->matches = NULL;417}418EXPORT_SYMBOL(tcf_em_tree_destroy);419420/**421* tcf_em_tree_dump - dump ematch tree into a rtnl message422*423* @skb: skb holding the rtnl message424* @t: ematch tree to be dumped425* @tlv: TLV type to be used to encapsulate the tree426*427* This function dumps a ematch tree into a rtnl message. It is valid to428* call this function while the ematch tree is in use.429*430* Returns -1 if the skb tailroom is insufficient.431*/432int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)433{434int i;435u8 *tail;436struct nlattr *top_start;437struct nlattr *list_start;438439top_start = nla_nest_start(skb, tlv);440if (top_start == NULL)441goto nla_put_failure;442443NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);444445list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);446if (list_start == NULL)447goto nla_put_failure;448449tail = skb_tail_pointer(skb);450for (i = 0; i < tree->hdr.nmatches; i++) {451struct nlattr *match_start = (struct nlattr *)tail;452struct tcf_ematch *em = tcf_em_get_match(tree, i);453struct tcf_ematch_hdr em_hdr = {454.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,455.matchid = em->matchid,456.flags = em->flags457};458459NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);460461if (em->ops && em->ops->dump) {462if (em->ops->dump(skb, em) < 0)463goto nla_put_failure;464} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {465u32 u = em->data;466nla_put_nohdr(skb, sizeof(u), &u);467} else if (em->datalen > 0)468nla_put_nohdr(skb, em->datalen, (void *) em->data);469470tail = skb_tail_pointer(skb);471match_start->nla_len = tail - (u8 *)match_start;472}473474nla_nest_end(skb, list_start);475nla_nest_end(skb, top_start);476477return 0;478479nla_put_failure:480return -1;481}482EXPORT_SYMBOL(tcf_em_tree_dump);483484static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,485struct tcf_pkt_info *info)486{487int r = em->ops->match(skb, em, info);488489return tcf_em_is_inverted(em) ? !r : r;490}491492/* Do not use this function directly, use tcf_em_tree_match instead */493int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,494struct tcf_pkt_info *info)495{496int stackp = 0, match_idx = 0, res = 0;497struct tcf_ematch *cur_match;498int stack[CONFIG_NET_EMATCH_STACK];499500proceed:501while (match_idx < tree->hdr.nmatches) {502cur_match = tcf_em_get_match(tree, match_idx);503504if (tcf_em_is_container(cur_match)) {505if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))506goto stack_overflow;507508stack[stackp++] = match_idx;509match_idx = cur_match->data;510goto proceed;511}512513res = tcf_em_match(skb, cur_match, info);514515if (tcf_em_early_end(cur_match, res))516break;517518match_idx++;519}520521pop_stack:522if (stackp > 0) {523match_idx = stack[--stackp];524cur_match = tcf_em_get_match(tree, match_idx);525526if (tcf_em_early_end(cur_match, res))527goto pop_stack;528else {529match_idx++;530goto proceed;531}532}533534return res;535536stack_overflow:537if (net_ratelimit())538pr_warning("tc ematch: local stack overflow,"539" increase NET_EMATCH_STACK\n");540return -1;541}542EXPORT_SYMBOL(__tcf_em_tree_match);543544545