Root/
1 | /* |
2 | * Pid namespaces |
3 | * |
4 | * Authors: |
5 | * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. |
6 | * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM |
7 | * Many thanks to Oleg Nesterov for comments and help |
8 | * |
9 | */ |
10 | |
11 | #include <linux/pid.h> |
12 | #include <linux/pid_namespace.h> |
13 | #include <linux/syscalls.h> |
14 | #include <linux/err.h> |
15 | #include <linux/acct.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/proc_fs.h> |
18 | #include <linux/reboot.h> |
19 | |
20 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
21 | |
22 | struct pid_cache { |
23 | int nr_ids; |
24 | char name[16]; |
25 | struct kmem_cache *cachep; |
26 | struct list_head list; |
27 | }; |
28 | |
29 | static LIST_HEAD(pid_caches_lh); |
30 | static DEFINE_MUTEX(pid_caches_mutex); |
31 | static struct kmem_cache *pid_ns_cachep; |
32 | |
33 | /* |
34 | * creates the kmem cache to allocate pids from. |
35 | * @nr_ids: the number of numerical ids this pid will have to carry |
36 | */ |
37 | |
38 | static struct kmem_cache *create_pid_cachep(int nr_ids) |
39 | { |
40 | struct pid_cache *pcache; |
41 | struct kmem_cache *cachep; |
42 | |
43 | mutex_lock(&pid_caches_mutex); |
44 | list_for_each_entry(pcache, &pid_caches_lh, list) |
45 | if (pcache->nr_ids == nr_ids) |
46 | goto out; |
47 | |
48 | pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); |
49 | if (pcache == NULL) |
50 | goto err_alloc; |
51 | |
52 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); |
53 | cachep = kmem_cache_create(pcache->name, |
54 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), |
55 | 0, SLAB_HWCACHE_ALIGN, NULL); |
56 | if (cachep == NULL) |
57 | goto err_cachep; |
58 | |
59 | pcache->nr_ids = nr_ids; |
60 | pcache->cachep = cachep; |
61 | list_add(&pcache->list, &pid_caches_lh); |
62 | out: |
63 | mutex_unlock(&pid_caches_mutex); |
64 | return pcache->cachep; |
65 | |
66 | err_cachep: |
67 | kfree(pcache); |
68 | err_alloc: |
69 | mutex_unlock(&pid_caches_mutex); |
70 | return NULL; |
71 | } |
72 | |
73 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) |
74 | { |
75 | struct pid_namespace *ns; |
76 | unsigned int level = parent_pid_ns->level + 1; |
77 | int i, err = -ENOMEM; |
78 | |
79 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); |
80 | if (ns == NULL) |
81 | goto out; |
82 | |
83 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
84 | if (!ns->pidmap[0].page) |
85 | goto out_free; |
86 | |
87 | ns->pid_cachep = create_pid_cachep(level + 1); |
88 | if (ns->pid_cachep == NULL) |
89 | goto out_free_map; |
90 | |
91 | kref_init(&ns->kref); |
92 | ns->level = level; |
93 | ns->parent = get_pid_ns(parent_pid_ns); |
94 | |
95 | set_bit(0, ns->pidmap[0].page); |
96 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
97 | |
98 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
99 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
100 | |
101 | err = pid_ns_prepare_proc(ns); |
102 | if (err) |
103 | goto out_put_parent_pid_ns; |
104 | |
105 | return ns; |
106 | |
107 | out_put_parent_pid_ns: |
108 | put_pid_ns(parent_pid_ns); |
109 | out_free_map: |
110 | kfree(ns->pidmap[0].page); |
111 | out_free: |
112 | kmem_cache_free(pid_ns_cachep, ns); |
113 | out: |
114 | return ERR_PTR(err); |
115 | } |
116 | |
117 | static void destroy_pid_namespace(struct pid_namespace *ns) |
118 | { |
119 | int i; |
120 | |
121 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
122 | kfree(ns->pidmap[i].page); |
123 | kmem_cache_free(pid_ns_cachep, ns); |
124 | } |
125 | |
126 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
127 | { |
128 | if (!(flags & CLONE_NEWPID)) |
129 | return get_pid_ns(old_ns); |
130 | if (flags & (CLONE_THREAD|CLONE_PARENT)) |
131 | return ERR_PTR(-EINVAL); |
132 | return create_pid_namespace(old_ns); |
133 | } |
134 | |
135 | void free_pid_ns(struct kref *kref) |
136 | { |
137 | struct pid_namespace *ns, *parent; |
138 | |
139 | ns = container_of(kref, struct pid_namespace, kref); |
140 | |
141 | parent = ns->parent; |
142 | destroy_pid_namespace(ns); |
143 | |
144 | if (parent != NULL) |
145 | put_pid_ns(parent); |
146 | } |
147 | |
148 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) |
149 | { |
150 | int nr; |
151 | int rc; |
152 | struct task_struct *task; |
153 | |
154 | /* |
155 | * The last thread in the cgroup-init thread group is terminating. |
156 | * Find remaining pid_ts in the namespace, signal and wait for them |
157 | * to exit. |
158 | * |
159 | * Note: This signals each threads in the namespace - even those that |
160 | * belong to the same thread group, To avoid this, we would have |
161 | * to walk the entire tasklist looking a processes in this |
162 | * namespace, but that could be unnecessarily expensive if the |
163 | * pid namespace has just a few processes. Or we need to |
164 | * maintain a tasklist for each pid namespace. |
165 | * |
166 | */ |
167 | read_lock(&tasklist_lock); |
168 | nr = next_pidmap(pid_ns, 1); |
169 | while (nr > 0) { |
170 | rcu_read_lock(); |
171 | |
172 | task = pid_task(find_vpid(nr), PIDTYPE_PID); |
173 | if (task && !__fatal_signal_pending(task)) |
174 | send_sig_info(SIGKILL, SEND_SIG_FORCED, task); |
175 | |
176 | rcu_read_unlock(); |
177 | |
178 | nr = next_pidmap(pid_ns, nr); |
179 | } |
180 | read_unlock(&tasklist_lock); |
181 | |
182 | do { |
183 | clear_thread_flag(TIF_SIGPENDING); |
184 | rc = sys_wait4(-1, NULL, __WALL, NULL); |
185 | } while (rc != -ECHILD); |
186 | |
187 | if (pid_ns->reboot) |
188 | current->signal->group_exit_code = pid_ns->reboot; |
189 | |
190 | acct_exit_ns(pid_ns); |
191 | return; |
192 | } |
193 | |
194 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
195 | void __user *buffer, size_t *lenp, loff_t *ppos) |
196 | { |
197 | struct ctl_table tmp = *table; |
198 | |
199 | if (write && !capable(CAP_SYS_ADMIN)) |
200 | return -EPERM; |
201 | |
202 | /* |
203 | * Writing directly to ns' last_pid field is OK, since this field |
204 | * is volatile in a living namespace anyway and a code writing to |
205 | * it should synchronize its usage with external means. |
206 | */ |
207 | |
208 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; |
209 | return proc_dointvec(&tmp, write, buffer, lenp, ppos); |
210 | } |
211 | |
212 | static struct ctl_table pid_ns_ctl_table[] = { |
213 | { |
214 | .procname = "ns_last_pid", |
215 | .maxlen = sizeof(int), |
216 | .mode = 0666, /* permissions are checked in the handler */ |
217 | .proc_handler = pid_ns_ctl_handler, |
218 | }, |
219 | { } |
220 | }; |
221 | |
222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |
223 | |
224 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) |
225 | { |
226 | if (pid_ns == &init_pid_ns) |
227 | return 0; |
228 | |
229 | switch (cmd) { |
230 | case LINUX_REBOOT_CMD_RESTART2: |
231 | case LINUX_REBOOT_CMD_RESTART: |
232 | pid_ns->reboot = SIGHUP; |
233 | break; |
234 | |
235 | case LINUX_REBOOT_CMD_POWER_OFF: |
236 | case LINUX_REBOOT_CMD_HALT: |
237 | pid_ns->reboot = SIGINT; |
238 | break; |
239 | default: |
240 | return -EINVAL; |
241 | } |
242 | |
243 | read_lock(&tasklist_lock); |
244 | force_sig(SIGKILL, pid_ns->child_reaper); |
245 | read_unlock(&tasklist_lock); |
246 | |
247 | do_exit(0); |
248 | |
249 | /* Not reached */ |
250 | return 0; |
251 | } |
252 | |
253 | static __init int pid_namespaces_init(void) |
254 | { |
255 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
256 | register_sysctl_paths(kern_path, pid_ns_ctl_table); |
257 | return 0; |
258 | } |
259 | |
260 | __initcall(pid_namespaces_init); |
261 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9