Root/
1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * |
4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public |
6 | * License v2 as published by the Free Software Foundation. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public |
14 | * License along with this program; if not, write to the |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
16 | * Boston, MA 021110-1307, USA. |
17 | */ |
18 | |
19 | #include <linux/delay.h> |
20 | #include <linux/kthread.h> |
21 | #include <linux/pagemap.h> |
22 | |
23 | #include "ctree.h" |
24 | #include "disk-io.h" |
25 | #include "free-space-cache.h" |
26 | #include "inode-map.h" |
27 | #include "transaction.h" |
28 | |
29 | static int caching_kthread(void *data) |
30 | { |
31 | struct btrfs_root *root = data; |
32 | struct btrfs_fs_info *fs_info = root->fs_info; |
33 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
34 | struct btrfs_key key; |
35 | struct btrfs_path *path; |
36 | struct extent_buffer *leaf; |
37 | u64 last = (u64)-1; |
38 | int slot; |
39 | int ret; |
40 | |
41 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
42 | return 0; |
43 | |
44 | path = btrfs_alloc_path(); |
45 | if (!path) |
46 | return -ENOMEM; |
47 | |
48 | /* Since the commit root is read-only, we can safely skip locking. */ |
49 | path->skip_locking = 1; |
50 | path->search_commit_root = 1; |
51 | path->reada = 2; |
52 | |
53 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
54 | key.offset = 0; |
55 | key.type = BTRFS_INODE_ITEM_KEY; |
56 | again: |
57 | /* need to make sure the commit_root doesn't disappear */ |
58 | mutex_lock(&root->fs_commit_mutex); |
59 | |
60 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
61 | if (ret < 0) |
62 | goto out; |
63 | |
64 | while (1) { |
65 | if (btrfs_fs_closing(fs_info)) |
66 | goto out; |
67 | |
68 | leaf = path->nodes[0]; |
69 | slot = path->slots[0]; |
70 | if (slot >= btrfs_header_nritems(leaf)) { |
71 | ret = btrfs_next_leaf(root, path); |
72 | if (ret < 0) |
73 | goto out; |
74 | else if (ret > 0) |
75 | break; |
76 | |
77 | if (need_resched() || |
78 | btrfs_transaction_in_commit(fs_info)) { |
79 | leaf = path->nodes[0]; |
80 | |
81 | if (btrfs_header_nritems(leaf) == 0) { |
82 | WARN_ON(1); |
83 | break; |
84 | } |
85 | |
86 | /* |
87 | * Save the key so we can advances forward |
88 | * in the next search. |
89 | */ |
90 | btrfs_item_key_to_cpu(leaf, &key, 0); |
91 | btrfs_release_path(path); |
92 | root->cache_progress = last; |
93 | mutex_unlock(&root->fs_commit_mutex); |
94 | schedule_timeout(1); |
95 | goto again; |
96 | } else |
97 | continue; |
98 | } |
99 | |
100 | btrfs_item_key_to_cpu(leaf, &key, slot); |
101 | |
102 | if (key.type != BTRFS_INODE_ITEM_KEY) |
103 | goto next; |
104 | |
105 | if (key.objectid >= root->highest_objectid) |
106 | break; |
107 | |
108 | if (last != (u64)-1 && last + 1 != key.objectid) { |
109 | __btrfs_add_free_space(ctl, last + 1, |
110 | key.objectid - last - 1); |
111 | wake_up(&root->cache_wait); |
112 | } |
113 | |
114 | last = key.objectid; |
115 | next: |
116 | path->slots[0]++; |
117 | } |
118 | |
119 | if (last < root->highest_objectid - 1) { |
120 | __btrfs_add_free_space(ctl, last + 1, |
121 | root->highest_objectid - last - 1); |
122 | } |
123 | |
124 | spin_lock(&root->cache_lock); |
125 | root->cached = BTRFS_CACHE_FINISHED; |
126 | spin_unlock(&root->cache_lock); |
127 | |
128 | root->cache_progress = (u64)-1; |
129 | btrfs_unpin_free_ino(root); |
130 | out: |
131 | wake_up(&root->cache_wait); |
132 | mutex_unlock(&root->fs_commit_mutex); |
133 | |
134 | btrfs_free_path(path); |
135 | |
136 | return ret; |
137 | } |
138 | |
139 | static void start_caching(struct btrfs_root *root) |
140 | { |
141 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
142 | struct task_struct *tsk; |
143 | int ret; |
144 | u64 objectid; |
145 | |
146 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
147 | return; |
148 | |
149 | spin_lock(&root->cache_lock); |
150 | if (root->cached != BTRFS_CACHE_NO) { |
151 | spin_unlock(&root->cache_lock); |
152 | return; |
153 | } |
154 | |
155 | root->cached = BTRFS_CACHE_STARTED; |
156 | spin_unlock(&root->cache_lock); |
157 | |
158 | ret = load_free_ino_cache(root->fs_info, root); |
159 | if (ret == 1) { |
160 | spin_lock(&root->cache_lock); |
161 | root->cached = BTRFS_CACHE_FINISHED; |
162 | spin_unlock(&root->cache_lock); |
163 | return; |
164 | } |
165 | |
166 | /* |
167 | * It can be quite time-consuming to fill the cache by searching |
168 | * through the extent tree, and this can keep ino allocation path |
169 | * waiting. Therefore at start we quickly find out the highest |
170 | * inode number and we know we can use inode numbers which fall in |
171 | * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID]. |
172 | */ |
173 | ret = btrfs_find_free_objectid(root, &objectid); |
174 | if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) { |
175 | __btrfs_add_free_space(ctl, objectid, |
176 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); |
177 | } |
178 | |
179 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", |
180 | root->root_key.objectid); |
181 | BUG_ON(IS_ERR(tsk)); |
182 | } |
183 | |
184 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) |
185 | { |
186 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
187 | return btrfs_find_free_objectid(root, objectid); |
188 | |
189 | again: |
190 | *objectid = btrfs_find_ino_for_alloc(root); |
191 | |
192 | if (*objectid != 0) |
193 | return 0; |
194 | |
195 | start_caching(root); |
196 | |
197 | wait_event(root->cache_wait, |
198 | root->cached == BTRFS_CACHE_FINISHED || |
199 | root->free_ino_ctl->free_space > 0); |
200 | |
201 | if (root->cached == BTRFS_CACHE_FINISHED && |
202 | root->free_ino_ctl->free_space == 0) |
203 | return -ENOSPC; |
204 | else |
205 | goto again; |
206 | } |
207 | |
208 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid) |
209 | { |
210 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
211 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; |
212 | |
213 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
214 | return; |
215 | |
216 | again: |
217 | if (root->cached == BTRFS_CACHE_FINISHED) { |
218 | __btrfs_add_free_space(ctl, objectid, 1); |
219 | } else { |
220 | /* |
221 | * If we are in the process of caching free ino chunks, |
222 | * to avoid adding the same inode number to the free_ino |
223 | * tree twice due to cross transaction, we'll leave it |
224 | * in the pinned tree until a transaction is committed |
225 | * or the caching work is done. |
226 | */ |
227 | |
228 | mutex_lock(&root->fs_commit_mutex); |
229 | spin_lock(&root->cache_lock); |
230 | if (root->cached == BTRFS_CACHE_FINISHED) { |
231 | spin_unlock(&root->cache_lock); |
232 | mutex_unlock(&root->fs_commit_mutex); |
233 | goto again; |
234 | } |
235 | spin_unlock(&root->cache_lock); |
236 | |
237 | start_caching(root); |
238 | |
239 | if (objectid <= root->cache_progress || |
240 | objectid > root->highest_objectid) |
241 | __btrfs_add_free_space(ctl, objectid, 1); |
242 | else |
243 | __btrfs_add_free_space(pinned, objectid, 1); |
244 | |
245 | mutex_unlock(&root->fs_commit_mutex); |
246 | } |
247 | } |
248 | |
249 | /* |
250 | * When a transaction is committed, we'll move those inode numbers which |
251 | * are smaller than root->cache_progress from pinned tree to free_ino tree, |
252 | * and others will just be dropped, because the commit root we were |
253 | * searching has changed. |
254 | * |
255 | * Must be called with root->fs_commit_mutex held |
256 | */ |
257 | void btrfs_unpin_free_ino(struct btrfs_root *root) |
258 | { |
259 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
260 | struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; |
261 | struct btrfs_free_space *info; |
262 | struct rb_node *n; |
263 | u64 count; |
264 | |
265 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
266 | return; |
267 | |
268 | while (1) { |
269 | n = rb_first(rbroot); |
270 | if (!n) |
271 | break; |
272 | |
273 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
274 | BUG_ON(info->bitmap); |
275 | |
276 | if (info->offset > root->cache_progress) |
277 | goto free; |
278 | else if (info->offset + info->bytes > root->cache_progress) |
279 | count = root->cache_progress - info->offset + 1; |
280 | else |
281 | count = info->bytes; |
282 | |
283 | __btrfs_add_free_space(ctl, info->offset, count); |
284 | free: |
285 | rb_erase(&info->offset_index, rbroot); |
286 | kfree(info); |
287 | } |
288 | } |
289 | |
290 | #define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space)) |
291 | #define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
292 | |
293 | /* |
294 | * The goal is to keep the memory used by the free_ino tree won't |
295 | * exceed the memory if we use bitmaps only. |
296 | */ |
297 | static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) |
298 | { |
299 | struct btrfs_free_space *info; |
300 | struct rb_node *n; |
301 | int max_ino; |
302 | int max_bitmaps; |
303 | |
304 | n = rb_last(&ctl->free_space_offset); |
305 | if (!n) { |
306 | ctl->extents_thresh = INIT_THRESHOLD; |
307 | return; |
308 | } |
309 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
310 | |
311 | /* |
312 | * Find the maximum inode number in the filesystem. Note we |
313 | * ignore the fact that this can be a bitmap, because we are |
314 | * not doing precise calculation. |
315 | */ |
316 | max_ino = info->bytes - 1; |
317 | |
318 | max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP; |
319 | if (max_bitmaps <= ctl->total_bitmaps) { |
320 | ctl->extents_thresh = 0; |
321 | return; |
322 | } |
323 | |
324 | ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) * |
325 | PAGE_CACHE_SIZE / sizeof(*info); |
326 | } |
327 | |
328 | /* |
329 | * We don't fall back to bitmap, if we are below the extents threshold |
330 | * or this chunk of inode numbers is a big one. |
331 | */ |
332 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, |
333 | struct btrfs_free_space *info) |
334 | { |
335 | if (ctl->free_extents < ctl->extents_thresh || |
336 | info->bytes > INODES_PER_BITMAP / 10) |
337 | return false; |
338 | |
339 | return true; |
340 | } |
341 | |
342 | static struct btrfs_free_space_op free_ino_op = { |
343 | .recalc_thresholds = recalculate_thresholds, |
344 | .use_bitmap = use_bitmap, |
345 | }; |
346 | |
347 | static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl) |
348 | { |
349 | } |
350 | |
351 | static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl, |
352 | struct btrfs_free_space *info) |
353 | { |
354 | /* |
355 | * We always use extents for two reasons: |
356 | * |
357 | * - The pinned tree is only used during the process of caching |
358 | * work. |
359 | * - Make code simpler. See btrfs_unpin_free_ino(). |
360 | */ |
361 | return false; |
362 | } |
363 | |
364 | static struct btrfs_free_space_op pinned_free_ino_op = { |
365 | .recalc_thresholds = pinned_recalc_thresholds, |
366 | .use_bitmap = pinned_use_bitmap, |
367 | }; |
368 | |
369 | void btrfs_init_free_ino_ctl(struct btrfs_root *root) |
370 | { |
371 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
372 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; |
373 | |
374 | spin_lock_init(&ctl->tree_lock); |
375 | ctl->unit = 1; |
376 | ctl->start = 0; |
377 | ctl->private = NULL; |
378 | ctl->op = &free_ino_op; |
379 | |
380 | /* |
381 | * Initially we allow to use 16K of ram to cache chunks of |
382 | * inode numbers before we resort to bitmaps. This is somewhat |
383 | * arbitrary, but it will be adjusted in runtime. |
384 | */ |
385 | ctl->extents_thresh = INIT_THRESHOLD; |
386 | |
387 | spin_lock_init(&pinned->tree_lock); |
388 | pinned->unit = 1; |
389 | pinned->start = 0; |
390 | pinned->private = NULL; |
391 | pinned->extents_thresh = 0; |
392 | pinned->op = &pinned_free_ino_op; |
393 | } |
394 | |
395 | int btrfs_save_ino_cache(struct btrfs_root *root, |
396 | struct btrfs_trans_handle *trans) |
397 | { |
398 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
399 | struct btrfs_path *path; |
400 | struct inode *inode; |
401 | u64 alloc_hint = 0; |
402 | int ret; |
403 | int prealloc; |
404 | bool retry = false; |
405 | |
406 | /* only fs tree and subvol/snap needs ino cache */ |
407 | if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && |
408 | (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || |
409 | root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) |
410 | return 0; |
411 | |
412 | /* Don't save inode cache if we are deleting this root */ |
413 | if (btrfs_root_refs(&root->root_item) == 0 && |
414 | root != root->fs_info->tree_root) |
415 | return 0; |
416 | |
417 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
418 | return 0; |
419 | |
420 | path = btrfs_alloc_path(); |
421 | if (!path) |
422 | return -ENOMEM; |
423 | |
424 | again: |
425 | inode = lookup_free_ino_inode(root, path); |
426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
427 | ret = PTR_ERR(inode); |
428 | goto out; |
429 | } |
430 | |
431 | if (IS_ERR(inode)) { |
432 | BUG_ON(retry); |
433 | retry = true; |
434 | |
435 | ret = create_free_ino_inode(root, trans, path); |
436 | if (ret) |
437 | goto out; |
438 | goto again; |
439 | } |
440 | |
441 | BTRFS_I(inode)->generation = 0; |
442 | ret = btrfs_update_inode(trans, root, inode); |
443 | WARN_ON(ret); |
444 | |
445 | if (i_size_read(inode) > 0) { |
446 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); |
447 | if (ret) |
448 | goto out_put; |
449 | } |
450 | |
451 | spin_lock(&root->cache_lock); |
452 | if (root->cached != BTRFS_CACHE_FINISHED) { |
453 | ret = -1; |
454 | spin_unlock(&root->cache_lock); |
455 | goto out_put; |
456 | } |
457 | spin_unlock(&root->cache_lock); |
458 | |
459 | spin_lock(&ctl->tree_lock); |
460 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; |
461 | prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE); |
462 | prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE; |
463 | spin_unlock(&ctl->tree_lock); |
464 | |
465 | /* Just to make sure we have enough space */ |
466 | prealloc += 8 * PAGE_CACHE_SIZE; |
467 | |
468 | ret = btrfs_check_data_free_space(inode, prealloc); |
469 | if (ret) |
470 | goto out_put; |
471 | |
472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
473 | prealloc, prealloc, &alloc_hint); |
474 | if (ret) |
475 | goto out_put; |
476 | btrfs_free_reserved_data_space(inode, prealloc); |
477 | |
478 | out_put: |
479 | iput(inode); |
480 | out: |
481 | if (ret == 0) |
482 | ret = btrfs_write_out_ino_cache(root, trans, path); |
483 | |
484 | btrfs_free_path(path); |
485 | return ret; |
486 | } |
487 | |
488 | static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) |
489 | { |
490 | struct btrfs_path *path; |
491 | int ret; |
492 | struct extent_buffer *l; |
493 | struct btrfs_key search_key; |
494 | struct btrfs_key found_key; |
495 | int slot; |
496 | |
497 | path = btrfs_alloc_path(); |
498 | if (!path) |
499 | return -ENOMEM; |
500 | |
501 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
502 | search_key.type = -1; |
503 | search_key.offset = (u64)-1; |
504 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
505 | if (ret < 0) |
506 | goto error; |
507 | BUG_ON(ret == 0); |
508 | if (path->slots[0] > 0) { |
509 | slot = path->slots[0] - 1; |
510 | l = path->nodes[0]; |
511 | btrfs_item_key_to_cpu(l, &found_key, slot); |
512 | *objectid = max_t(u64, found_key.objectid, |
513 | BTRFS_FIRST_FREE_OBJECTID - 1); |
514 | } else { |
515 | *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; |
516 | } |
517 | ret = 0; |
518 | error: |
519 | btrfs_free_path(path); |
520 | return ret; |
521 | } |
522 | |
523 | int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) |
524 | { |
525 | int ret; |
526 | mutex_lock(&root->objectid_mutex); |
527 | |
528 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
529 | ret = btrfs_find_highest_objectid(root, |
530 | &root->highest_objectid); |
531 | if (ret) |
532 | goto out; |
533 | } |
534 | |
535 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
536 | ret = -ENOSPC; |
537 | goto out; |
538 | } |
539 | |
540 | *objectid = ++root->highest_objectid; |
541 | ret = 0; |
542 | out: |
543 | mutex_unlock(&root->objectid_mutex); |
544 | return ret; |
545 | } |
546 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9