Root/drivers/net/imq.c

1/*
2 * Pseudo-driver for the intermediate queue device.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Patrick McHardy, <kaber@trash.net>
10 *
11 * The first version was written by Martin Devera, <devik@cdi.cz>
12 *
13 * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
14 * - Update patch to 2.4.21
15 * Sebastian Strollo <sstrollo@nortelnetworks.com>
16 * - Fix "Dead-loop on netdevice imq"-issue
17 * Marcel Sebek <sebek64@post.cz>
18 * - Update to 2.6.2-rc1
19 *
20 * After some time of inactivity there is a group taking care
21 * of IMQ again: http://www.linuximq.net
22 *
23 *
24 * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
25 * including the following changes:
26 *
27 * - Correction of ipv6 support "+"s issue (Hasso Tepper)
28 * - Correction of imq_init_devs() issue that resulted in
29 * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
30 * - Addition of functionality to choose number of IMQ devices
31 * during kernel config (Andre Correa)
32 * - Addition of functionality to choose how IMQ hooks on
33 * PRE and POSTROUTING (after or before NAT) (Andre Correa)
34 * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
35 *
36 *
37 * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
38 * released with almost no problems. 2.6.14-x was released
39 * with some important changes: nfcache was removed; After
40 * some weeks of trouble we figured out that some IMQ fields
41 * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
42 * These functions are correctly patched by this new patch version.
43 *
44 * Thanks for all who helped to figure out all the problems with
45 * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
46 * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
47 * I didn't forget anybody). I apologize again for my lack of time.
48 *
49 *
50 * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
51 * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
52 * recursive locking. New initialization routines to fix 'rmmod' not
53 * working anymore. Used code from ifb.c. (Jussi Kivilinna)
54 *
55 * 2008/08/06 - 2.6.26 - (JK)
56 * - Replaced tasklet with 'netif_schedule()'.
57 * - Cleaned up and added comments for imq_nf_queue().
58 *
59 * 2009/04/12
60 * - Add skb_save_cb/skb_restore_cb helper functions for backuping
61 * control buffer. This is needed because qdisc-layer on kernels
62 * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
63 * - Add better locking for IMQ device. Hopefully this will solve
64 * SMP issues. (Jussi Kivilinna)
65 * - Port to 2.6.27
66 * - Port to 2.6.28
67 * - Port to 2.6.29 + fix rmmod not working
68 *
69 * 2009/04/20 - (Jussi Kivilinna)
70 * - Use netdevice feature flags to avoid extra packet handling
71 * by core networking layer and possibly increase performance.
72 *
73 * Also, many thanks to pablo Sebastian Greco for making the initial
74 * patch and to those who helped the testing.
75 *
76 * More info at: http://www.linuximq.net/ (Andre Correa)
77 */
78
79#include <linux/module.h>
80#include <linux/kernel.h>
81#include <linux/moduleparam.h>
82#include <linux/list.h>
83#include <linux/skbuff.h>
84#include <linux/netdevice.h>
85#include <linux/etherdevice.h>
86#include <linux/rtnetlink.h>
87#include <linux/if_arp.h>
88#include <linux/netfilter.h>
89#include <linux/netfilter_ipv4.h>
90#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
91    #include <linux/netfilter_ipv6.h>
92#endif
93#include <linux/imq.h>
94#include <net/pkt_sched.h>
95#include <net/netfilter/nf_queue.h>
96
97static nf_hookfn imq_nf_hook;
98
99static struct nf_hook_ops imq_ingress_ipv4 = {
100    .hook = imq_nf_hook,
101    .owner = THIS_MODULE,
102    .pf = PF_INET,
103    .hooknum = NF_INET_PRE_ROUTING,
104#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
105    .priority = NF_IP_PRI_MANGLE + 1
106#else
107    .priority = NF_IP_PRI_NAT_DST + 1
108#endif
109};
110
111static struct nf_hook_ops imq_egress_ipv4 = {
112    .hook = imq_nf_hook,
113    .owner = THIS_MODULE,
114    .pf = PF_INET,
115    .hooknum = NF_INET_POST_ROUTING,
116#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
117    .priority = NF_IP_PRI_LAST
118#else
119    .priority = NF_IP_PRI_NAT_SRC - 1
120#endif
121};
122
123#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
124static struct nf_hook_ops imq_ingress_ipv6 = {
125    .hook = imq_nf_hook,
126    .owner = THIS_MODULE,
127    .pf = PF_INET6,
128    .hooknum = NF_INET_PRE_ROUTING,
129#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
130    .priority = NF_IP6_PRI_MANGLE + 1
131#else
132    .priority = NF_IP6_PRI_NAT_DST + 1
133#endif
134};
135
136static struct nf_hook_ops imq_egress_ipv6 = {
137    .hook = imq_nf_hook,
138    .owner = THIS_MODULE,
139    .pf = PF_INET6,
140    .hooknum = NF_INET_POST_ROUTING,
141#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
142    .priority = NF_IP6_PRI_LAST
143#else
144    .priority = NF_IP6_PRI_NAT_SRC - 1
145#endif
146};
147#endif
148
149#if defined(CONFIG_IMQ_NUM_DEVS)
150static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS;
151#else
152static unsigned int numdevs = IMQ_MAX_DEVS;
153#endif
154
155static DEFINE_SPINLOCK(imq_nf_queue_lock);
156
157static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
158
159
160static struct net_device_stats *imq_get_stats(struct net_device *dev)
161{
162    return &dev->stats;
163}
164
165/* called for packets kfree'd in qdiscs at places other than enqueue */
166static void imq_skb_destructor(struct sk_buff *skb)
167{
168    struct nf_queue_entry *entry = skb->nf_queue_entry;
169
170    if (entry) {
171        nf_queue_entry_release_refs(entry);
172        kfree(entry);
173    }
174
175    skb_restore_cb(skb); /* kfree backup */
176}
177
178static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
179{
180    int status;
181
182    if (!entry->next_outfn) {
183        spin_lock_bh(&imq_nf_queue_lock);
184        nf_reinject(entry, verdict);
185        spin_unlock_bh(&imq_nf_queue_lock);
186        return;
187    }
188
189    rcu_read_lock();
190    local_bh_disable();
191    status = entry->next_outfn(entry, entry->next_queuenum);
192    local_bh_enable();
193    if (status < 0) {
194        nf_queue_entry_release_refs(entry);
195        kfree_skb(entry->skb);
196        kfree(entry);
197    }
198
199    rcu_read_unlock();
200}
201
202static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
203{
204    dev->stats.tx_bytes += skb->len;
205    dev->stats.tx_packets++;
206
207    skb->imq_flags = 0;
208    skb->destructor = NULL;
209
210    skb_restore_cb(skb); /* restore skb->cb */
211
212    dev->trans_start = jiffies;
213    imq_nf_reinject(skb->nf_queue_entry, NF_ACCEPT);
214    return 0;
215}
216
217static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
218{
219    struct net_device *dev;
220    struct sk_buff *skb_orig, *skb, *skb_shared;
221    struct Qdisc *q;
222    struct netdev_queue *txq;
223    int users, index;
224    int retval = -EINVAL;
225
226    index = entry->skb->imq_flags & IMQ_F_IFMASK;
227    if (unlikely(index > numdevs - 1)) {
228        if (net_ratelimit())
229            printk(KERN_WARNING
230                   "IMQ: invalid device specified, highest is %u\n",
231                   numdevs - 1);
232        retval = -EINVAL;
233        goto out;
234    }
235
236    /* check for imq device by index from cache */
237    dev = imq_devs_cache[index];
238    if (unlikely(!dev)) {
239        char buf[8];
240
241        /* get device by name and cache result */
242        snprintf(buf, sizeof(buf), "imq%d", index);
243        dev = dev_get_by_name(&init_net, buf);
244        if (!dev) {
245            /* not found ?!*/
246            BUG();
247            retval = -ENODEV;
248            goto out;
249        }
250
251        imq_devs_cache[index] = dev;
252        dev_put(dev);
253    }
254
255    if (unlikely(!(dev->flags & IFF_UP))) {
256        entry->skb->imq_flags = 0;
257        imq_nf_reinject(entry, NF_ACCEPT);
258        retval = 0;
259        goto out;
260    }
261    dev->last_rx = jiffies;
262
263    skb = entry->skb;
264    skb_orig = NULL;
265
266    /* skb has owner? => make clone */
267    if (unlikely(skb->destructor)) {
268        skb_orig = skb;
269        skb = skb_clone(skb, GFP_ATOMIC);
270        if (!skb) {
271            retval = -ENOMEM;
272            goto out;
273        }
274        entry->skb = skb;
275    }
276
277    skb->nf_queue_entry = entry;
278
279    dev->stats.rx_bytes += skb->len;
280    dev->stats.rx_packets++;
281
282    txq = dev_pick_tx(dev, skb);
283
284    q = rcu_dereference(txq->qdisc);
285    if (unlikely(!q->enqueue))
286        goto packet_not_eaten_by_imq_dev;
287
288    spin_lock_bh(qdisc_lock(q));
289
290    users = atomic_read(&skb->users);
291
292    skb_shared = skb_get(skb); /* increase reference count by one */
293    skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
294                    overwrite it */
295    qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
296
297    if (likely(atomic_read(&skb_shared->users) == users + 1)) {
298        kfree_skb(skb_shared); /* decrease reference count by one */
299
300        skb->destructor = &imq_skb_destructor;
301
302        /* cloned? */
303        if (skb_orig)
304            kfree_skb(skb_orig); /* free original */
305
306        spin_unlock_bh(qdisc_lock(q));
307
308        /* schedule qdisc dequeue */
309        __netif_schedule(q);
310
311        retval = 0;
312        goto out;
313    } else {
314        skb_restore_cb(skb_shared); /* restore skb->cb */
315        /* qdisc dropped packet and decreased skb reference count of
316         * skb, so we don't really want to and try refree as that would
317         * actually destroy the skb. */
318        spin_unlock_bh(qdisc_lock(q));
319        goto packet_not_eaten_by_imq_dev;
320    }
321
322packet_not_eaten_by_imq_dev:
323    /* cloned? restore original */
324    if (skb_orig) {
325        kfree_skb(skb);
326        entry->skb = skb_orig;
327    }
328    retval = -1;
329out:
330    return retval;
331}
332
333static struct nf_queue_handler nfqh = {
334    .name = "imq",
335    .outfn = imq_nf_queue,
336};
337
338static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
339                const struct net_device *indev,
340                const struct net_device *outdev,
341                int (*okfn)(struct sk_buff *))
342{
343    if (pskb->imq_flags & IMQ_F_ENQUEUE)
344        return NF_QUEUE;
345
346    return NF_ACCEPT;
347}
348
349static int imq_close(struct net_device *dev)
350{
351    netif_stop_queue(dev);
352    return 0;
353}
354
355static int imq_open(struct net_device *dev)
356{
357    netif_start_queue(dev);
358    return 0;
359}
360
361static const struct net_device_ops imq_netdev_ops = {
362    .ndo_open = imq_open,
363    .ndo_stop = imq_close,
364    .ndo_start_xmit = imq_dev_xmit,
365    .ndo_get_stats = imq_get_stats,
366};
367
368static void imq_setup(struct net_device *dev)
369{
370    dev->netdev_ops = &imq_netdev_ops;
371    dev->type = ARPHRD_VOID;
372    dev->mtu = 16000;
373    dev->tx_queue_len = 11000;
374    dev->flags = IFF_NOARP;
375    dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
376                  NETIF_F_GSO | NETIF_F_HW_CSUM |
377                  NETIF_F_HIGHDMA;
378}
379
380static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
381{
382    int ret = 0;
383
384    if (tb[IFLA_ADDRESS]) {
385        if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
386            ret = -EINVAL;
387            goto end;
388        }
389        if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
390            ret = -EADDRNOTAVAIL;
391            goto end;
392        }
393    }
394    return 0;
395end:
396    printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
397    return ret;
398}
399
400static struct rtnl_link_ops imq_link_ops __read_mostly = {
401    .kind = "imq",
402    .priv_size = 0,
403    .setup = imq_setup,
404    .validate = imq_validate,
405};
406
407static int __init imq_init_hooks(void)
408{
409    int err;
410
411    nf_register_queue_imq_handler(&nfqh);
412
413    err = nf_register_hook(&imq_ingress_ipv4);
414    if (err)
415        goto err1;
416
417    err = nf_register_hook(&imq_egress_ipv4);
418    if (err)
419        goto err2;
420
421#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
422    err = nf_register_hook(&imq_ingress_ipv6);
423    if (err)
424        goto err3;
425
426    err = nf_register_hook(&imq_egress_ipv6);
427    if (err)
428        goto err4;
429#endif
430
431    return 0;
432
433#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
434err4:
435    nf_unregister_hook(&imq_ingress_ipv6);
436err3:
437    nf_unregister_hook(&imq_egress_ipv4);
438#endif
439err2:
440    nf_unregister_hook(&imq_ingress_ipv4);
441err1:
442    nf_unregister_queue_imq_handler();
443    return err;
444}
445
446static int __init imq_init_one(int index)
447{
448    struct net_device *dev;
449    int ret;
450
451    dev = alloc_netdev(0, "imq%d", imq_setup);
452    if (!dev)
453        return -ENOMEM;
454
455    ret = dev_alloc_name(dev, dev->name);
456    if (ret < 0)
457        goto fail;
458
459    dev->rtnl_link_ops = &imq_link_ops;
460    ret = register_netdevice(dev);
461    if (ret < 0)
462        goto fail;
463
464    return 0;
465fail:
466    free_netdev(dev);
467    return ret;
468}
469
470static int __init imq_init_devs(void)
471{
472    int err, i;
473
474    if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
475        printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
476               IMQ_MAX_DEVS);
477        return -EINVAL;
478    }
479
480    rtnl_lock();
481    err = __rtnl_link_register(&imq_link_ops);
482
483    for (i = 0; i < numdevs && !err; i++)
484        err = imq_init_one(i);
485
486    if (err) {
487        __rtnl_link_unregister(&imq_link_ops);
488        memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
489    }
490    rtnl_unlock();
491
492    return err;
493}
494
495static int __init imq_init_module(void)
496{
497    int err;
498
499#if defined(CONFIG_IMQ_NUM_DEVS)
500    BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
501    BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
502    BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
503#endif
504
505    err = imq_init_devs();
506    if (err) {
507        printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
508        return err;
509    }
510
511    err = imq_init_hooks();
512    if (err) {
513        printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
514        rtnl_link_unregister(&imq_link_ops);
515        memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
516        return err;
517    }
518
519    printk(KERN_INFO "IMQ driver loaded successfully.\n");
520
521#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
522    printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
523#else
524    printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
525#endif
526#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
527    printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
528#else
529    printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
530#endif
531
532    return 0;
533}
534
535static void __exit imq_unhook(void)
536{
537#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
538    nf_unregister_hook(&imq_ingress_ipv6);
539    nf_unregister_hook(&imq_egress_ipv6);
540#endif
541    nf_unregister_hook(&imq_ingress_ipv4);
542    nf_unregister_hook(&imq_egress_ipv4);
543
544    nf_unregister_queue_imq_handler();
545}
546
547static void __exit imq_cleanup_devs(void)
548{
549    rtnl_link_unregister(&imq_link_ops);
550    memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
551}
552
553static void __exit imq_exit_module(void)
554{
555    imq_unhook();
556    imq_cleanup_devs();
557    printk(KERN_INFO "IMQ driver unloaded successfully.\n");
558}
559
560module_init(imq_init_module);
561module_exit(imq_exit_module);
562
563module_param(numdevs, int, 0);
564MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
565            "be created)");
566MODULE_AUTHOR("http://www.linuximq.net");
567MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
568            "http://www.linuximq.net/ for more information.");
569MODULE_LICENSE("GPL");
570MODULE_ALIAS_RTNL_LINK("imq");
571
572

Archive Download this file



interactive