Root/security/device_cgroup.c

1/*
2 * device_cgroup.c - device cgroup subsystem
3 *
4 * Copyright 2007 IBM Corp
5 */
6
7#include <linux/device_cgroup.h>
8#include <linux/cgroup.h>
9#include <linux/ctype.h>
10#include <linux/list.h>
11#include <linux/uaccess.h>
12#include <linux/seq_file.h>
13#include <linux/slab.h>
14#include <linux/rcupdate.h>
15#include <linux/mutex.h>
16
17#define ACC_MKNOD 1
18#define ACC_READ 2
19#define ACC_WRITE 4
20#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
21
22#define DEV_BLOCK 1
23#define DEV_CHAR 2
24#define DEV_ALL 4 /* this represents all devices */
25
26static DEFINE_MUTEX(devcgroup_mutex);
27
28enum devcg_behavior {
29    DEVCG_DEFAULT_NONE,
30    DEVCG_DEFAULT_ALLOW,
31    DEVCG_DEFAULT_DENY,
32};
33
34/*
35 * exception list locking rules:
36 * hold devcgroup_mutex for update/read.
37 * hold rcu_read_lock() for read.
38 */
39
40struct dev_exception_item {
41    u32 major, minor;
42    short type;
43    short access;
44    struct list_head list;
45    struct rcu_head rcu;
46};
47
48struct dev_cgroup {
49    struct cgroup_subsys_state css;
50    struct list_head exceptions;
51    enum devcg_behavior behavior;
52};
53
54static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
55{
56    return container_of(s, struct dev_cgroup, css);
57}
58
59static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
60{
61    return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
62}
63
64static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
65{
66    return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
67}
68
69struct cgroup_subsys devices_subsys;
70
71static int devcgroup_can_attach(struct cgroup *new_cgrp,
72                struct cgroup_taskset *set)
73{
74    struct task_struct *task = cgroup_taskset_first(set);
75
76    if (current != task && !capable(CAP_SYS_ADMIN))
77        return -EPERM;
78    return 0;
79}
80
81/*
82 * called under devcgroup_mutex
83 */
84static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
85{
86    struct dev_exception_item *ex, *tmp, *new;
87
88    lockdep_assert_held(&devcgroup_mutex);
89
90    list_for_each_entry(ex, orig, list) {
91        new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
92        if (!new)
93            goto free_and_exit;
94        list_add_tail(&new->list, dest);
95    }
96
97    return 0;
98
99free_and_exit:
100    list_for_each_entry_safe(ex, tmp, dest, list) {
101        list_del(&ex->list);
102        kfree(ex);
103    }
104    return -ENOMEM;
105}
106
107/*
108 * called under devcgroup_mutex
109 */
110static int dev_exception_add(struct dev_cgroup *dev_cgroup,
111                 struct dev_exception_item *ex)
112{
113    struct dev_exception_item *excopy, *walk;
114
115    lockdep_assert_held(&devcgroup_mutex);
116
117    excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
118    if (!excopy)
119        return -ENOMEM;
120
121    list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
122        if (walk->type != ex->type)
123            continue;
124        if (walk->major != ex->major)
125            continue;
126        if (walk->minor != ex->minor)
127            continue;
128
129        walk->access |= ex->access;
130        kfree(excopy);
131        excopy = NULL;
132    }
133
134    if (excopy != NULL)
135        list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
136    return 0;
137}
138
139/*
140 * called under devcgroup_mutex
141 */
142static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
143                 struct dev_exception_item *ex)
144{
145    struct dev_exception_item *walk, *tmp;
146
147    lockdep_assert_held(&devcgroup_mutex);
148
149    list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
150        if (walk->type != ex->type)
151            continue;
152        if (walk->major != ex->major)
153            continue;
154        if (walk->minor != ex->minor)
155            continue;
156
157        walk->access &= ~ex->access;
158        if (!walk->access) {
159            list_del_rcu(&walk->list);
160            kfree_rcu(walk, rcu);
161        }
162    }
163}
164
165static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
166{
167    struct dev_exception_item *ex, *tmp;
168
169    list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
170        list_del_rcu(&ex->list);
171        kfree_rcu(ex, rcu);
172    }
173}
174
175/**
176 * dev_exception_clean - frees all entries of the exception list
177 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
178 *
179 * called under devcgroup_mutex
180 */
181static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
182{
183    lockdep_assert_held(&devcgroup_mutex);
184
185    __dev_exception_clean(dev_cgroup);
186}
187
188static inline bool is_devcg_online(const struct dev_cgroup *devcg)
189{
190    return (devcg->behavior != DEVCG_DEFAULT_NONE);
191}
192
193/**
194 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
195 * parent's
196 * @cgroup: cgroup getting online
197 * returns 0 in case of success, error code otherwise
198 */
199static int devcgroup_online(struct cgroup *cgroup)
200{
201    struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
202    int ret = 0;
203
204    mutex_lock(&devcgroup_mutex);
205    dev_cgroup = cgroup_to_devcgroup(cgroup);
206    if (cgroup->parent)
207        parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
208
209    if (parent_dev_cgroup == NULL)
210        dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
211    else {
212        ret = dev_exceptions_copy(&dev_cgroup->exceptions,
213                      &parent_dev_cgroup->exceptions);
214        if (!ret)
215            dev_cgroup->behavior = parent_dev_cgroup->behavior;
216    }
217    mutex_unlock(&devcgroup_mutex);
218
219    return ret;
220}
221
222static void devcgroup_offline(struct cgroup *cgroup)
223{
224    struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
225
226    mutex_lock(&devcgroup_mutex);
227    dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
228    mutex_unlock(&devcgroup_mutex);
229}
230
231/*
232 * called from kernel/cgroup.c with cgroup_lock() held.
233 */
234static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
235{
236    struct dev_cgroup *dev_cgroup;
237
238    dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
239    if (!dev_cgroup)
240        return ERR_PTR(-ENOMEM);
241    INIT_LIST_HEAD(&dev_cgroup->exceptions);
242    dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
243
244    return &dev_cgroup->css;
245}
246
247static void devcgroup_css_free(struct cgroup *cgroup)
248{
249    struct dev_cgroup *dev_cgroup;
250
251    dev_cgroup = cgroup_to_devcgroup(cgroup);
252    __dev_exception_clean(dev_cgroup);
253    kfree(dev_cgroup);
254}
255
256#define DEVCG_ALLOW 1
257#define DEVCG_DENY 2
258#define DEVCG_LIST 3
259
260#define MAJMINLEN 13
261#define ACCLEN 4
262
263static void set_access(char *acc, short access)
264{
265    int idx = 0;
266    memset(acc, 0, ACCLEN);
267    if (access & ACC_READ)
268        acc[idx++] = 'r';
269    if (access & ACC_WRITE)
270        acc[idx++] = 'w';
271    if (access & ACC_MKNOD)
272        acc[idx++] = 'm';
273}
274
275static char type_to_char(short type)
276{
277    if (type == DEV_ALL)
278        return 'a';
279    if (type == DEV_CHAR)
280        return 'c';
281    if (type == DEV_BLOCK)
282        return 'b';
283    return 'X';
284}
285
286static void set_majmin(char *str, unsigned m)
287{
288    if (m == ~0)
289        strcpy(str, "*");
290    else
291        sprintf(str, "%u", m);
292}
293
294static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
295                struct seq_file *m)
296{
297    struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
298    struct dev_exception_item *ex;
299    char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
300
301    rcu_read_lock();
302    /*
303     * To preserve the compatibility:
304     * - Only show the "all devices" when the default policy is to allow
305     * - List the exceptions in case the default policy is to deny
306     * This way, the file remains as a "whitelist of devices"
307     */
308    if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
309        set_access(acc, ACC_MASK);
310        set_majmin(maj, ~0);
311        set_majmin(min, ~0);
312        seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
313               maj, min, acc);
314    } else {
315        list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
316            set_access(acc, ex->access);
317            set_majmin(maj, ex->major);
318            set_majmin(min, ex->minor);
319            seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
320                   maj, min, acc);
321        }
322    }
323    rcu_read_unlock();
324
325    return 0;
326}
327
328/**
329 * may_access - verifies if a new exception is part of what is allowed
330 * by a dev cgroup based on the default policy +
331 * exceptions. This is used to make sure a child cgroup
332 * won't have more privileges than its parent or to
333 * verify if a certain access is allowed.
334 * @dev_cgroup: dev cgroup to be tested against
335 * @refex: new exception
336 * @behavior: behavior of the exception
337 */
338static bool may_access(struct dev_cgroup *dev_cgroup,
339               struct dev_exception_item *refex,
340               enum devcg_behavior behavior)
341{
342    struct dev_exception_item *ex;
343    bool match = false;
344
345    rcu_lockdep_assert(rcu_read_lock_held() ||
346               lockdep_is_held(&devcgroup_mutex),
347               "device_cgroup::may_access() called without proper synchronization");
348
349    list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
350        if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
351            continue;
352        if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
353            continue;
354        if (ex->major != ~0 && ex->major != refex->major)
355            continue;
356        if (ex->minor != ~0 && ex->minor != refex->minor)
357            continue;
358        if (refex->access & (~ex->access))
359            continue;
360        match = true;
361        break;
362    }
363
364    if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
365        if (behavior == DEVCG_DEFAULT_ALLOW) {
366            /* the exception will deny access to certain devices */
367            return true;
368        } else {
369            /* the exception will allow access to certain devices */
370            if (match)
371                /*
372                 * a new exception allowing access shouldn't
373                 * match an parent's exception
374                 */
375                return false;
376            return true;
377        }
378    } else {
379        /* only behavior == DEVCG_DEFAULT_DENY allowed here */
380        if (match)
381            /* parent has an exception that matches the proposed */
382            return true;
383        else
384            return false;
385    }
386    return false;
387}
388
389/*
390 * parent_has_perm:
391 * when adding a new allow rule to a device exception list, the rule
392 * must be allowed in the parent device
393 */
394static int parent_has_perm(struct dev_cgroup *childcg,
395                  struct dev_exception_item *ex)
396{
397    struct cgroup *pcg = childcg->css.cgroup->parent;
398    struct dev_cgroup *parent;
399
400    if (!pcg)
401        return 1;
402    parent = cgroup_to_devcgroup(pcg);
403    return may_access(parent, ex, childcg->behavior);
404}
405
406/**
407 * may_allow_all - checks if it's possible to change the behavior to
408 * allow based on parent's rules.
409 * @parent: device cgroup's parent
410 * returns: != 0 in case it's allowed, 0 otherwise
411 */
412static inline int may_allow_all(struct dev_cgroup *parent)
413{
414    if (!parent)
415        return 1;
416    return parent->behavior == DEVCG_DEFAULT_ALLOW;
417}
418
419/**
420 * revalidate_active_exceptions - walks through the active exception list and
421 * revalidates the exceptions based on parent's
422 * behavior and exceptions. The exceptions that
423 * are no longer valid will be removed.
424 * Called with devcgroup_mutex held.
425 * @devcg: cgroup which exceptions will be checked
426 *
427 * This is one of the three key functions for hierarchy implementation.
428 * This function is responsible for re-evaluating all the cgroup's active
429 * exceptions due to a parent's exception change.
430 * Refer to Documentation/cgroups/devices.txt for more details.
431 */
432static void revalidate_active_exceptions(struct dev_cgroup *devcg)
433{
434    struct dev_exception_item *ex;
435    struct list_head *this, *tmp;
436
437    list_for_each_safe(this, tmp, &devcg->exceptions) {
438        ex = container_of(this, struct dev_exception_item, list);
439        if (!parent_has_perm(devcg, ex))
440            dev_exception_rm(devcg, ex);
441    }
442}
443
444/**
445 * propagate_exception - propagates a new exception to the children
446 * @devcg_root: device cgroup that added a new exception
447 * @ex: new exception to be propagated
448 *
449 * returns: 0 in case of success, != 0 in case of error
450 */
451static int propagate_exception(struct dev_cgroup *devcg_root,
452                   struct dev_exception_item *ex)
453{
454    struct cgroup *root = devcg_root->css.cgroup, *pos;
455    int rc = 0;
456
457    rcu_read_lock();
458
459    cgroup_for_each_descendant_pre(pos, root) {
460        struct dev_cgroup *devcg = cgroup_to_devcgroup(pos);
461
462        /*
463         * Because devcgroup_mutex is held, no devcg will become
464         * online or offline during the tree walk (see on/offline
465         * methods), and online ones are safe to access outside RCU
466         * read lock without bumping refcnt.
467         */
468        if (!is_devcg_online(devcg))
469            continue;
470
471        rcu_read_unlock();
472
473        /*
474         * in case both root's behavior and devcg is allow, a new
475         * restriction means adding to the exception list
476         */
477        if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
478            devcg->behavior == DEVCG_DEFAULT_ALLOW) {
479            rc = dev_exception_add(devcg, ex);
480            if (rc)
481                break;
482        } else {
483            /*
484             * in the other possible cases:
485             * root's behavior: allow, devcg's: deny
486             * root's behavior: deny, devcg's: deny
487             * the exception will be removed
488             */
489            dev_exception_rm(devcg, ex);
490        }
491        revalidate_active_exceptions(devcg);
492
493        rcu_read_lock();
494    }
495
496    rcu_read_unlock();
497    return rc;
498}
499
500static inline bool has_children(struct dev_cgroup *devcgroup)
501{
502    struct cgroup *cgrp = devcgroup->css.cgroup;
503
504    return !list_empty(&cgrp->children);
505}
506
507/*
508 * Modify the exception list using allow/deny rules.
509 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD
510 * so we can give a container CAP_MKNOD to let it create devices but not
511 * modify the exception list.
512 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
513 * us to also grant CAP_SYS_ADMIN to containers without giving away the
514 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
515 *
516 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting
517 * new access is only allowed if you're in the top-level cgroup, or your
518 * parent cgroup has the access you're asking for.
519 */
520static int devcgroup_update_access(struct dev_cgroup *devcgroup,
521                   int filetype, const char *buffer)
522{
523    const char *b;
524    char temp[12]; /* 11 + 1 characters needed for a u32 */
525    int count, rc = 0;
526    struct dev_exception_item ex;
527    struct cgroup *p = devcgroup->css.cgroup;
528    struct dev_cgroup *parent = NULL;
529
530    if (!capable(CAP_SYS_ADMIN))
531        return -EPERM;
532
533    if (p->parent)
534        parent = cgroup_to_devcgroup(p->parent);
535
536    memset(&ex, 0, sizeof(ex));
537    b = buffer;
538
539    switch (*b) {
540    case 'a':
541        switch (filetype) {
542        case DEVCG_ALLOW:
543            if (has_children(devcgroup))
544                return -EINVAL;
545
546            if (!may_allow_all(parent))
547                return -EPERM;
548            dev_exception_clean(devcgroup);
549            devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
550            if (!parent)
551                break;
552
553            rc = dev_exceptions_copy(&devcgroup->exceptions,
554                         &parent->exceptions);
555            if (rc)
556                return rc;
557            break;
558        case DEVCG_DENY:
559            if (has_children(devcgroup))
560                return -EINVAL;
561
562            dev_exception_clean(devcgroup);
563            devcgroup->behavior = DEVCG_DEFAULT_DENY;
564            break;
565        default:
566            return -EINVAL;
567        }
568        return 0;
569    case 'b':
570        ex.type = DEV_BLOCK;
571        break;
572    case 'c':
573        ex.type = DEV_CHAR;
574        break;
575    default:
576        return -EINVAL;
577    }
578    b++;
579    if (!isspace(*b))
580        return -EINVAL;
581    b++;
582    if (*b == '*') {
583        ex.major = ~0;
584        b++;
585    } else if (isdigit(*b)) {
586        memset(temp, 0, sizeof(temp));
587        for (count = 0; count < sizeof(temp) - 1; count++) {
588            temp[count] = *b;
589            b++;
590            if (!isdigit(*b))
591                break;
592        }
593        rc = kstrtou32(temp, 10, &ex.major);
594        if (rc)
595            return -EINVAL;
596    } else {
597        return -EINVAL;
598    }
599    if (*b != ':')
600        return -EINVAL;
601    b++;
602
603    /* read minor */
604    if (*b == '*') {
605        ex.minor = ~0;
606        b++;
607    } else if (isdigit(*b)) {
608        memset(temp, 0, sizeof(temp));
609        for (count = 0; count < sizeof(temp) - 1; count++) {
610            temp[count] = *b;
611            b++;
612            if (!isdigit(*b))
613                break;
614        }
615        rc = kstrtou32(temp, 10, &ex.minor);
616        if (rc)
617            return -EINVAL;
618    } else {
619        return -EINVAL;
620    }
621    if (!isspace(*b))
622        return -EINVAL;
623    for (b++, count = 0; count < 3; count++, b++) {
624        switch (*b) {
625        case 'r':
626            ex.access |= ACC_READ;
627            break;
628        case 'w':
629            ex.access |= ACC_WRITE;
630            break;
631        case 'm':
632            ex.access |= ACC_MKNOD;
633            break;
634        case '\n':
635        case '\0':
636            count = 3;
637            break;
638        default:
639            return -EINVAL;
640        }
641    }
642
643    switch (filetype) {
644    case DEVCG_ALLOW:
645        if (!parent_has_perm(devcgroup, &ex))
646            return -EPERM;
647        /*
648         * If the default policy is to allow by default, try to remove
649         * an matching exception instead. And be silent about it: we
650         * don't want to break compatibility
651         */
652        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
653            dev_exception_rm(devcgroup, &ex);
654            return 0;
655        }
656        rc = dev_exception_add(devcgroup, &ex);
657        break;
658    case DEVCG_DENY:
659        /*
660         * If the default policy is to deny by default, try to remove
661         * an matching exception instead. And be silent about it: we
662         * don't want to break compatibility
663         */
664        if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
665            dev_exception_rm(devcgroup, &ex);
666        else
667            rc = dev_exception_add(devcgroup, &ex);
668
669        if (rc)
670            break;
671        /* we only propagate new restrictions */
672        rc = propagate_exception(devcgroup, &ex);
673        break;
674    default:
675        rc = -EINVAL;
676    }
677    return rc;
678}
679
680static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
681                  const char *buffer)
682{
683    int retval;
684
685    mutex_lock(&devcgroup_mutex);
686    retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
687                     cft->private, buffer);
688    mutex_unlock(&devcgroup_mutex);
689    return retval;
690}
691
692static struct cftype dev_cgroup_files[] = {
693    {
694        .name = "allow",
695        .write_string = devcgroup_access_write,
696        .private = DEVCG_ALLOW,
697    },
698    {
699        .name = "deny",
700        .write_string = devcgroup_access_write,
701        .private = DEVCG_DENY,
702    },
703    {
704        .name = "list",
705        .read_seq_string = devcgroup_seq_read,
706        .private = DEVCG_LIST,
707    },
708    { } /* terminate */
709};
710
711struct cgroup_subsys devices_subsys = {
712    .name = "devices",
713    .can_attach = devcgroup_can_attach,
714    .css_alloc = devcgroup_css_alloc,
715    .css_free = devcgroup_css_free,
716    .css_online = devcgroup_online,
717    .css_offline = devcgroup_offline,
718    .subsys_id = devices_subsys_id,
719    .base_cftypes = dev_cgroup_files,
720};
721
722/**
723 * __devcgroup_check_permission - checks if an inode operation is permitted
724 * @dev_cgroup: the dev cgroup to be tested against
725 * @type: device type
726 * @major: device major number
727 * @minor: device minor number
728 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
729 *
730 * returns 0 on success, -EPERM case the operation is not permitted
731 */
732static int __devcgroup_check_permission(short type, u32 major, u32 minor,
733                        short access)
734{
735    struct dev_cgroup *dev_cgroup;
736    struct dev_exception_item ex;
737    int rc;
738
739    memset(&ex, 0, sizeof(ex));
740    ex.type = type;
741    ex.major = major;
742    ex.minor = minor;
743    ex.access = access;
744
745    rcu_read_lock();
746    dev_cgroup = task_devcgroup(current);
747    rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
748    rcu_read_unlock();
749
750    if (!rc)
751        return -EPERM;
752
753    return 0;
754}
755
756int __devcgroup_inode_permission(struct inode *inode, int mask)
757{
758    short type, access = 0;
759
760    if (S_ISBLK(inode->i_mode))
761        type = DEV_BLOCK;
762    if (S_ISCHR(inode->i_mode))
763        type = DEV_CHAR;
764    if (mask & MAY_WRITE)
765        access |= ACC_WRITE;
766    if (mask & MAY_READ)
767        access |= ACC_READ;
768
769    return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
770            access);
771}
772
773int devcgroup_inode_mknod(int mode, dev_t dev)
774{
775    short type;
776
777    if (!S_ISBLK(mode) && !S_ISCHR(mode))
778        return 0;
779
780    if (S_ISBLK(mode))
781        type = DEV_BLOCK;
782    else
783        type = DEV_CHAR;
784
785    return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
786            ACC_MKNOD);
787
788}
789

Archive Download this file



interactive