Root/
1 | /* |
2 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions |
6 | * of the GNU General Public License version 2. |
7 | */ |
8 | |
9 | #include <linux/fs.h> |
10 | #include <linux/miscdevice.h> |
11 | #include <linux/poll.h> |
12 | #include <linux/dlm.h> |
13 | #include <linux/dlm_plock.h> |
14 | #include <linux/slab.h> |
15 | |
16 | #include "dlm_internal.h" |
17 | #include "lockspace.h" |
18 | |
19 | static spinlock_t ops_lock; |
20 | static struct list_head send_list; |
21 | static struct list_head recv_list; |
22 | static wait_queue_head_t send_wq; |
23 | static wait_queue_head_t recv_wq; |
24 | |
25 | struct plock_op { |
26 | struct list_head list; |
27 | int done; |
28 | struct dlm_plock_info info; |
29 | }; |
30 | |
31 | struct plock_xop { |
32 | struct plock_op xop; |
33 | void *callback; |
34 | void *fl; |
35 | void *file; |
36 | struct file_lock flc; |
37 | }; |
38 | |
39 | |
40 | static inline void set_version(struct dlm_plock_info *info) |
41 | { |
42 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; |
43 | info->version[1] = DLM_PLOCK_VERSION_MINOR; |
44 | info->version[2] = DLM_PLOCK_VERSION_PATCH; |
45 | } |
46 | |
47 | static int check_version(struct dlm_plock_info *info) |
48 | { |
49 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || |
50 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { |
51 | log_print("plock device version mismatch: " |
52 | "kernel (%u.%u.%u), user (%u.%u.%u)", |
53 | DLM_PLOCK_VERSION_MAJOR, |
54 | DLM_PLOCK_VERSION_MINOR, |
55 | DLM_PLOCK_VERSION_PATCH, |
56 | info->version[0], |
57 | info->version[1], |
58 | info->version[2]); |
59 | return -EINVAL; |
60 | } |
61 | return 0; |
62 | } |
63 | |
64 | static void send_op(struct plock_op *op) |
65 | { |
66 | set_version(&op->info); |
67 | INIT_LIST_HEAD(&op->list); |
68 | spin_lock(&ops_lock); |
69 | list_add_tail(&op->list, &send_list); |
70 | spin_unlock(&ops_lock); |
71 | wake_up(&send_wq); |
72 | } |
73 | |
74 | /* If a process was killed while waiting for the only plock on a file, |
75 | locks_remove_posix will not see any lock on the file so it won't |
76 | send an unlock-close to us to pass on to userspace to clean up the |
77 | abandoned waiter. So, we have to insert the unlock-close when the |
78 | lock call is interrupted. */ |
79 | |
80 | static void do_unlock_close(struct dlm_ls *ls, u64 number, |
81 | struct file *file, struct file_lock *fl) |
82 | { |
83 | struct plock_op *op; |
84 | |
85 | op = kzalloc(sizeof(*op), GFP_NOFS); |
86 | if (!op) |
87 | return; |
88 | |
89 | op->info.optype = DLM_PLOCK_OP_UNLOCK; |
90 | op->info.pid = fl->fl_pid; |
91 | op->info.fsid = ls->ls_global_id; |
92 | op->info.number = number; |
93 | op->info.start = 0; |
94 | op->info.end = OFFSET_MAX; |
95 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
96 | op->info.owner = (__u64) fl->fl_pid; |
97 | else |
98 | op->info.owner = (__u64)(long) fl->fl_owner; |
99 | |
100 | op->info.flags |= DLM_PLOCK_FL_CLOSE; |
101 | send_op(op); |
102 | } |
103 | |
104 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
105 | int cmd, struct file_lock *fl) |
106 | { |
107 | struct dlm_ls *ls; |
108 | struct plock_op *op; |
109 | struct plock_xop *xop; |
110 | int rv; |
111 | |
112 | ls = dlm_find_lockspace_local(lockspace); |
113 | if (!ls) |
114 | return -EINVAL; |
115 | |
116 | xop = kzalloc(sizeof(*xop), GFP_NOFS); |
117 | if (!xop) { |
118 | rv = -ENOMEM; |
119 | goto out; |
120 | } |
121 | |
122 | op = &xop->xop; |
123 | op->info.optype = DLM_PLOCK_OP_LOCK; |
124 | op->info.pid = fl->fl_pid; |
125 | op->info.ex = (fl->fl_type == F_WRLCK); |
126 | op->info.wait = IS_SETLKW(cmd); |
127 | op->info.fsid = ls->ls_global_id; |
128 | op->info.number = number; |
129 | op->info.start = fl->fl_start; |
130 | op->info.end = fl->fl_end; |
131 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { |
132 | /* fl_owner is lockd which doesn't distinguish |
133 | processes on the nfs client */ |
134 | op->info.owner = (__u64) fl->fl_pid; |
135 | xop->callback = fl->fl_lmops->fl_grant; |
136 | locks_init_lock(&xop->flc); |
137 | locks_copy_lock(&xop->flc, fl); |
138 | xop->fl = fl; |
139 | xop->file = file; |
140 | } else { |
141 | op->info.owner = (__u64)(long) fl->fl_owner; |
142 | xop->callback = NULL; |
143 | } |
144 | |
145 | send_op(op); |
146 | |
147 | if (xop->callback == NULL) { |
148 | rv = wait_event_killable(recv_wq, (op->done != 0)); |
149 | if (rv == -ERESTARTSYS) { |
150 | log_debug(ls, "dlm_posix_lock: wait killed %llx", |
151 | (unsigned long long)number); |
152 | spin_lock(&ops_lock); |
153 | list_del(&op->list); |
154 | spin_unlock(&ops_lock); |
155 | kfree(xop); |
156 | do_unlock_close(ls, number, file, fl); |
157 | goto out; |
158 | } |
159 | } else { |
160 | rv = FILE_LOCK_DEFERRED; |
161 | goto out; |
162 | } |
163 | |
164 | spin_lock(&ops_lock); |
165 | if (!list_empty(&op->list)) { |
166 | log_error(ls, "dlm_posix_lock: op on list %llx", |
167 | (unsigned long long)number); |
168 | list_del(&op->list); |
169 | } |
170 | spin_unlock(&ops_lock); |
171 | |
172 | rv = op->info.rv; |
173 | |
174 | if (!rv) { |
175 | if (posix_lock_file_wait(file, fl) < 0) |
176 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", |
177 | (unsigned long long)number); |
178 | } |
179 | |
180 | kfree(xop); |
181 | out: |
182 | dlm_put_lockspace(ls); |
183 | return rv; |
184 | } |
185 | EXPORT_SYMBOL_GPL(dlm_posix_lock); |
186 | |
187 | /* Returns failure iff a successful lock operation should be canceled */ |
188 | static int dlm_plock_callback(struct plock_op *op) |
189 | { |
190 | struct file *file; |
191 | struct file_lock *fl; |
192 | struct file_lock *flc; |
193 | int (*notify)(void *, void *, int) = NULL; |
194 | struct plock_xop *xop = (struct plock_xop *)op; |
195 | int rv = 0; |
196 | |
197 | spin_lock(&ops_lock); |
198 | if (!list_empty(&op->list)) { |
199 | log_print("dlm_plock_callback: op on list %llx", |
200 | (unsigned long long)op->info.number); |
201 | list_del(&op->list); |
202 | } |
203 | spin_unlock(&ops_lock); |
204 | |
205 | /* check if the following 2 are still valid or make a copy */ |
206 | file = xop->file; |
207 | flc = &xop->flc; |
208 | fl = xop->fl; |
209 | notify = xop->callback; |
210 | |
211 | if (op->info.rv) { |
212 | notify(fl, NULL, op->info.rv); |
213 | goto out; |
214 | } |
215 | |
216 | /* got fs lock; bookkeep locally as well: */ |
217 | flc->fl_flags &= ~FL_SLEEP; |
218 | if (posix_lock_file(file, flc, NULL)) { |
219 | /* |
220 | * This can only happen in the case of kmalloc() failure. |
221 | * The filesystem's own lock is the authoritative lock, |
222 | * so a failure to get the lock locally is not a disaster. |
223 | * As long as the fs cannot reliably cancel locks (especially |
224 | * in a low-memory situation), we're better off ignoring |
225 | * this failure than trying to recover. |
226 | */ |
227 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", |
228 | (unsigned long long)op->info.number, file, fl); |
229 | } |
230 | |
231 | rv = notify(fl, NULL, 0); |
232 | if (rv) { |
233 | /* XXX: We need to cancel the fs lock here: */ |
234 | log_print("dlm_plock_callback: lock granted after lock request " |
235 | "failed; dangling lock!\n"); |
236 | goto out; |
237 | } |
238 | |
239 | out: |
240 | kfree(xop); |
241 | return rv; |
242 | } |
243 | |
244 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
245 | struct file_lock *fl) |
246 | { |
247 | struct dlm_ls *ls; |
248 | struct plock_op *op; |
249 | int rv; |
250 | |
251 | ls = dlm_find_lockspace_local(lockspace); |
252 | if (!ls) |
253 | return -EINVAL; |
254 | |
255 | op = kzalloc(sizeof(*op), GFP_NOFS); |
256 | if (!op) { |
257 | rv = -ENOMEM; |
258 | goto out; |
259 | } |
260 | |
261 | if (posix_lock_file_wait(file, fl) < 0) |
262 | log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", |
263 | (unsigned long long)number); |
264 | |
265 | op->info.optype = DLM_PLOCK_OP_UNLOCK; |
266 | op->info.pid = fl->fl_pid; |
267 | op->info.fsid = ls->ls_global_id; |
268 | op->info.number = number; |
269 | op->info.start = fl->fl_start; |
270 | op->info.end = fl->fl_end; |
271 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
272 | op->info.owner = (__u64) fl->fl_pid; |
273 | else |
274 | op->info.owner = (__u64)(long) fl->fl_owner; |
275 | |
276 | if (fl->fl_flags & FL_CLOSE) { |
277 | op->info.flags |= DLM_PLOCK_FL_CLOSE; |
278 | send_op(op); |
279 | rv = 0; |
280 | goto out; |
281 | } |
282 | |
283 | send_op(op); |
284 | wait_event(recv_wq, (op->done != 0)); |
285 | |
286 | spin_lock(&ops_lock); |
287 | if (!list_empty(&op->list)) { |
288 | log_error(ls, "dlm_posix_unlock: op on list %llx", |
289 | (unsigned long long)number); |
290 | list_del(&op->list); |
291 | } |
292 | spin_unlock(&ops_lock); |
293 | |
294 | rv = op->info.rv; |
295 | |
296 | if (rv == -ENOENT) |
297 | rv = 0; |
298 | |
299 | kfree(op); |
300 | out: |
301 | dlm_put_lockspace(ls); |
302 | return rv; |
303 | } |
304 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); |
305 | |
306 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
307 | struct file_lock *fl) |
308 | { |
309 | struct dlm_ls *ls; |
310 | struct plock_op *op; |
311 | int rv; |
312 | |
313 | ls = dlm_find_lockspace_local(lockspace); |
314 | if (!ls) |
315 | return -EINVAL; |
316 | |
317 | op = kzalloc(sizeof(*op), GFP_NOFS); |
318 | if (!op) { |
319 | rv = -ENOMEM; |
320 | goto out; |
321 | } |
322 | |
323 | op->info.optype = DLM_PLOCK_OP_GET; |
324 | op->info.pid = fl->fl_pid; |
325 | op->info.ex = (fl->fl_type == F_WRLCK); |
326 | op->info.fsid = ls->ls_global_id; |
327 | op->info.number = number; |
328 | op->info.start = fl->fl_start; |
329 | op->info.end = fl->fl_end; |
330 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
331 | op->info.owner = (__u64) fl->fl_pid; |
332 | else |
333 | op->info.owner = (__u64)(long) fl->fl_owner; |
334 | |
335 | send_op(op); |
336 | wait_event(recv_wq, (op->done != 0)); |
337 | |
338 | spin_lock(&ops_lock); |
339 | if (!list_empty(&op->list)) { |
340 | log_error(ls, "dlm_posix_get: op on list %llx", |
341 | (unsigned long long)number); |
342 | list_del(&op->list); |
343 | } |
344 | spin_unlock(&ops_lock); |
345 | |
346 | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, |
347 | -ENOENT if there are no locks on the file */ |
348 | |
349 | rv = op->info.rv; |
350 | |
351 | fl->fl_type = F_UNLCK; |
352 | if (rv == -ENOENT) |
353 | rv = 0; |
354 | else if (rv > 0) { |
355 | locks_init_lock(fl); |
356 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; |
357 | fl->fl_flags = FL_POSIX; |
358 | fl->fl_pid = op->info.pid; |
359 | fl->fl_start = op->info.start; |
360 | fl->fl_end = op->info.end; |
361 | rv = 0; |
362 | } |
363 | |
364 | kfree(op); |
365 | out: |
366 | dlm_put_lockspace(ls); |
367 | return rv; |
368 | } |
369 | EXPORT_SYMBOL_GPL(dlm_posix_get); |
370 | |
371 | /* a read copies out one plock request from the send list */ |
372 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, |
373 | loff_t *ppos) |
374 | { |
375 | struct dlm_plock_info info; |
376 | struct plock_op *op = NULL; |
377 | |
378 | if (count < sizeof(info)) |
379 | return -EINVAL; |
380 | |
381 | spin_lock(&ops_lock); |
382 | if (!list_empty(&send_list)) { |
383 | op = list_entry(send_list.next, struct plock_op, list); |
384 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
385 | list_del(&op->list); |
386 | else |
387 | list_move(&op->list, &recv_list); |
388 | memcpy(&info, &op->info, sizeof(info)); |
389 | } |
390 | spin_unlock(&ops_lock); |
391 | |
392 | if (!op) |
393 | return -EAGAIN; |
394 | |
395 | /* there is no need to get a reply from userspace for unlocks |
396 | that were generated by the vfs cleaning up for a close |
397 | (the process did not make an unlock call). */ |
398 | |
399 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
400 | kfree(op); |
401 | |
402 | if (copy_to_user(u, &info, sizeof(info))) |
403 | return -EFAULT; |
404 | return sizeof(info); |
405 | } |
406 | |
407 | /* a write copies in one plock result that should match a plock_op |
408 | on the recv list */ |
409 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, |
410 | loff_t *ppos) |
411 | { |
412 | struct dlm_plock_info info; |
413 | struct plock_op *op; |
414 | int found = 0, do_callback = 0; |
415 | |
416 | if (count != sizeof(info)) |
417 | return -EINVAL; |
418 | |
419 | if (copy_from_user(&info, u, sizeof(info))) |
420 | return -EFAULT; |
421 | |
422 | if (check_version(&info)) |
423 | return -EINVAL; |
424 | |
425 | spin_lock(&ops_lock); |
426 | list_for_each_entry(op, &recv_list, list) { |
427 | if (op->info.fsid == info.fsid && |
428 | op->info.number == info.number && |
429 | op->info.owner == info.owner) { |
430 | struct plock_xop *xop = (struct plock_xop *)op; |
431 | list_del_init(&op->list); |
432 | memcpy(&op->info, &info, sizeof(info)); |
433 | if (xop->callback) |
434 | do_callback = 1; |
435 | else |
436 | op->done = 1; |
437 | found = 1; |
438 | break; |
439 | } |
440 | } |
441 | spin_unlock(&ops_lock); |
442 | |
443 | if (found) { |
444 | if (do_callback) |
445 | dlm_plock_callback(op); |
446 | else |
447 | wake_up(&recv_wq); |
448 | } else |
449 | log_print("dev_write no op %x %llx", info.fsid, |
450 | (unsigned long long)info.number); |
451 | return count; |
452 | } |
453 | |
454 | static unsigned int dev_poll(struct file *file, poll_table *wait) |
455 | { |
456 | unsigned int mask = 0; |
457 | |
458 | poll_wait(file, &send_wq, wait); |
459 | |
460 | spin_lock(&ops_lock); |
461 | if (!list_empty(&send_list)) |
462 | mask = POLLIN | POLLRDNORM; |
463 | spin_unlock(&ops_lock); |
464 | |
465 | return mask; |
466 | } |
467 | |
468 | static const struct file_operations dev_fops = { |
469 | .read = dev_read, |
470 | .write = dev_write, |
471 | .poll = dev_poll, |
472 | .owner = THIS_MODULE, |
473 | .llseek = noop_llseek, |
474 | }; |
475 | |
476 | static struct miscdevice plock_dev_misc = { |
477 | .minor = MISC_DYNAMIC_MINOR, |
478 | .name = DLM_PLOCK_MISC_NAME, |
479 | .fops = &dev_fops |
480 | }; |
481 | |
482 | int dlm_plock_init(void) |
483 | { |
484 | int rv; |
485 | |
486 | spin_lock_init(&ops_lock); |
487 | INIT_LIST_HEAD(&send_list); |
488 | INIT_LIST_HEAD(&recv_list); |
489 | init_waitqueue_head(&send_wq); |
490 | init_waitqueue_head(&recv_wq); |
491 | |
492 | rv = misc_register(&plock_dev_misc); |
493 | if (rv) |
494 | log_print("dlm_plock_init: misc_register failed %d", rv); |
495 | return rv; |
496 | } |
497 | |
498 | void dlm_plock_exit(void) |
499 | { |
500 | if (misc_deregister(&plock_dev_misc) < 0) |
501 | log_print("dlm_plock_exit: misc_deregister failed"); |
502 | } |
503 | |
504 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9