Root/fs/btrfs/free-space-cache.c

1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/pagemap.h>
20#include <linux/sched.h>
21#include <linux/slab.h>
22#include <linux/math64.h>
23#include "ctree.h"
24#include "free-space-cache.h"
25#include "transaction.h"
26
27#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31                      u64 offset)
32{
33    BUG_ON(offset < bitmap_start);
34    offset -= bitmap_start;
35    return (unsigned long)(div64_u64(offset, sectorsize));
36}
37
38static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
39{
40    return (unsigned long)(div64_u64(bytes, sectorsize));
41}
42
43static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
44                   u64 offset)
45{
46    u64 bitmap_start;
47    u64 bytes_per_bitmap;
48
49    bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
50    bitmap_start = offset - block_group->key.objectid;
51    bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
52    bitmap_start *= bytes_per_bitmap;
53    bitmap_start += block_group->key.objectid;
54
55    return bitmap_start;
56}
57
58static int tree_insert_offset(struct rb_root *root, u64 offset,
59                  struct rb_node *node, int bitmap)
60{
61    struct rb_node **p = &root->rb_node;
62    struct rb_node *parent = NULL;
63    struct btrfs_free_space *info;
64
65    while (*p) {
66        parent = *p;
67        info = rb_entry(parent, struct btrfs_free_space, offset_index);
68
69        if (offset < info->offset) {
70            p = &(*p)->rb_left;
71        } else if (offset > info->offset) {
72            p = &(*p)->rb_right;
73        } else {
74            /*
75             * we could have a bitmap entry and an extent entry
76             * share the same offset. If this is the case, we want
77             * the extent entry to always be found first if we do a
78             * linear search through the tree, since we want to have
79             * the quickest allocation time, and allocating from an
80             * extent is faster than allocating from a bitmap. So
81             * if we're inserting a bitmap and we find an entry at
82             * this offset, we want to go right, or after this entry
83             * logically. If we are inserting an extent and we've
84             * found a bitmap, we want to go left, or before
85             * logically.
86             */
87            if (bitmap) {
88                WARN_ON(info->bitmap);
89                p = &(*p)->rb_right;
90            } else {
91                WARN_ON(!info->bitmap);
92                p = &(*p)->rb_left;
93            }
94        }
95    }
96
97    rb_link_node(node, parent, p);
98    rb_insert_color(node, root);
99
100    return 0;
101}
102
103/*
104 * searches the tree for the given offset.
105 *
106 * fuzzy - If this is set, then we are trying to make an allocation, and we just
107 * want a section that has at least bytes size and comes at or after the given
108 * offset.
109 */
110static struct btrfs_free_space *
111tree_search_offset(struct btrfs_block_group_cache *block_group,
112           u64 offset, int bitmap_only, int fuzzy)
113{
114    struct rb_node *n = block_group->free_space_offset.rb_node;
115    struct btrfs_free_space *entry, *prev = NULL;
116
117    /* find entry that is closest to the 'offset' */
118    while (1) {
119        if (!n) {
120            entry = NULL;
121            break;
122        }
123
124        entry = rb_entry(n, struct btrfs_free_space, offset_index);
125        prev = entry;
126
127        if (offset < entry->offset)
128            n = n->rb_left;
129        else if (offset > entry->offset)
130            n = n->rb_right;
131        else
132            break;
133    }
134
135    if (bitmap_only) {
136        if (!entry)
137            return NULL;
138        if (entry->bitmap)
139            return entry;
140
141        /*
142         * bitmap entry and extent entry may share same offset,
143         * in that case, bitmap entry comes after extent entry.
144         */
145        n = rb_next(n);
146        if (!n)
147            return NULL;
148        entry = rb_entry(n, struct btrfs_free_space, offset_index);
149        if (entry->offset != offset)
150            return NULL;
151
152        WARN_ON(!entry->bitmap);
153        return entry;
154    } else if (entry) {
155        if (entry->bitmap) {
156            /*
157             * if previous extent entry covers the offset,
158             * we should return it instead of the bitmap entry
159             */
160            n = &entry->offset_index;
161            while (1) {
162                n = rb_prev(n);
163                if (!n)
164                    break;
165                prev = rb_entry(n, struct btrfs_free_space,
166                        offset_index);
167                if (!prev->bitmap) {
168                    if (prev->offset + prev->bytes > offset)
169                        entry = prev;
170                    break;
171                }
172            }
173        }
174        return entry;
175    }
176
177    if (!prev)
178        return NULL;
179
180    /* find last entry before the 'offset' */
181    entry = prev;
182    if (entry->offset > offset) {
183        n = rb_prev(&entry->offset_index);
184        if (n) {
185            entry = rb_entry(n, struct btrfs_free_space,
186                    offset_index);
187            BUG_ON(entry->offset > offset);
188        } else {
189            if (fuzzy)
190                return entry;
191            else
192                return NULL;
193        }
194    }
195
196    if (entry->bitmap) {
197        n = &entry->offset_index;
198        while (1) {
199            n = rb_prev(n);
200            if (!n)
201                break;
202            prev = rb_entry(n, struct btrfs_free_space,
203                    offset_index);
204            if (!prev->bitmap) {
205                if (prev->offset + prev->bytes > offset)
206                    return prev;
207                break;
208            }
209        }
210        if (entry->offset + BITS_PER_BITMAP *
211            block_group->sectorsize > offset)
212            return entry;
213    } else if (entry->offset + entry->bytes > offset)
214        return entry;
215
216    if (!fuzzy)
217        return NULL;
218
219    while (1) {
220        if (entry->bitmap) {
221            if (entry->offset + BITS_PER_BITMAP *
222                block_group->sectorsize > offset)
223                break;
224        } else {
225            if (entry->offset + entry->bytes > offset)
226                break;
227        }
228
229        n = rb_next(&entry->offset_index);
230        if (!n)
231            return NULL;
232        entry = rb_entry(n, struct btrfs_free_space, offset_index);
233    }
234    return entry;
235}
236
237static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238                  struct btrfs_free_space *info)
239{
240    rb_erase(&info->offset_index, &block_group->free_space_offset);
241    block_group->free_extents--;
242    block_group->free_space -= info->bytes;
243}
244
245static int link_free_space(struct btrfs_block_group_cache *block_group,
246               struct btrfs_free_space *info)
247{
248    int ret = 0;
249
250    BUG_ON(!info->bitmap && !info->bytes);
251    ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
252                 &info->offset_index, (info->bitmap != NULL));
253    if (ret)
254        return ret;
255
256    block_group->free_space += info->bytes;
257    block_group->free_extents++;
258    return ret;
259}
260
261static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
262{
263    u64 max_bytes;
264    u64 bitmap_bytes;
265    u64 extent_bytes;
266
267    /*
268     * The goal is to keep the total amount of memory used per 1gb of space
269     * at or below 32k, so we need to adjust how much memory we allow to be
270     * used by extent based free space tracking
271     */
272    max_bytes = MAX_CACHE_BYTES_PER_GIG *
273        (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
274
275    /*
276     * we want to account for 1 more bitmap than what we have so we can make
277     * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278     * we add more bitmaps.
279     */
280    bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
281
282    if (bitmap_bytes >= max_bytes) {
283        block_group->extents_thresh = 0;
284        return;
285    }
286
287    /*
288     * we want the extent entry threshold to always be at most 1/2 the maxw
289     * bytes we can have, or whatever is less than that.
290     */
291    extent_bytes = max_bytes - bitmap_bytes;
292    extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
293
294    block_group->extents_thresh =
295        div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
296}
297
298static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
299                  struct btrfs_free_space *info, u64 offset,
300                  u64 bytes)
301{
302    unsigned long start, end;
303    unsigned long i;
304
305    start = offset_to_bit(info->offset, block_group->sectorsize, offset);
306    end = start + bytes_to_bits(bytes, block_group->sectorsize);
307    BUG_ON(end > BITS_PER_BITMAP);
308
309    for (i = start; i < end; i++)
310        clear_bit(i, info->bitmap);
311
312    info->bytes -= bytes;
313    block_group->free_space -= bytes;
314}
315
316static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
317                struct btrfs_free_space *info, u64 offset,
318                u64 bytes)
319{
320    unsigned long start, end;
321    unsigned long i;
322
323    start = offset_to_bit(info->offset, block_group->sectorsize, offset);
324    end = start + bytes_to_bits(bytes, block_group->sectorsize);
325    BUG_ON(end > BITS_PER_BITMAP);
326
327    for (i = start; i < end; i++)
328        set_bit(i, info->bitmap);
329
330    info->bytes += bytes;
331    block_group->free_space += bytes;
332}
333
334static int search_bitmap(struct btrfs_block_group_cache *block_group,
335             struct btrfs_free_space *bitmap_info, u64 *offset,
336             u64 *bytes)
337{
338    unsigned long found_bits = 0;
339    unsigned long bits, i;
340    unsigned long next_zero;
341
342    i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
343              max_t(u64, *offset, bitmap_info->offset));
344    bits = bytes_to_bits(*bytes, block_group->sectorsize);
345
346    for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
347         i < BITS_PER_BITMAP;
348         i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
349        next_zero = find_next_zero_bit(bitmap_info->bitmap,
350                           BITS_PER_BITMAP, i);
351        if ((next_zero - i) >= bits) {
352            found_bits = next_zero - i;
353            break;
354        }
355        i = next_zero;
356    }
357
358    if (found_bits) {
359        *offset = (u64)(i * block_group->sectorsize) +
360            bitmap_info->offset;
361        *bytes = (u64)(found_bits) * block_group->sectorsize;
362        return 0;
363    }
364
365    return -1;
366}
367
368static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
369                        *block_group, u64 *offset,
370                        u64 *bytes, int debug)
371{
372    struct btrfs_free_space *entry;
373    struct rb_node *node;
374    int ret;
375
376    if (!block_group->free_space_offset.rb_node)
377        return NULL;
378
379    entry = tree_search_offset(block_group,
380                   offset_to_bitmap(block_group, *offset),
381                   0, 1);
382    if (!entry)
383        return NULL;
384
385    for (node = &entry->offset_index; node; node = rb_next(node)) {
386        entry = rb_entry(node, struct btrfs_free_space, offset_index);
387        if (entry->bytes < *bytes)
388            continue;
389
390        if (entry->bitmap) {
391            ret = search_bitmap(block_group, entry, offset, bytes);
392            if (!ret)
393                return entry;
394            continue;
395        }
396
397        *offset = entry->offset;
398        *bytes = entry->bytes;
399        return entry;
400    }
401
402    return NULL;
403}
404
405static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
406               struct btrfs_free_space *info, u64 offset)
407{
408    u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
409    int max_bitmaps = (int)div64_u64(block_group->key.offset +
410                     bytes_per_bg - 1, bytes_per_bg);
411    BUG_ON(block_group->total_bitmaps >= max_bitmaps);
412
413    info->offset = offset_to_bitmap(block_group, offset);
414    info->bytes = 0;
415    link_free_space(block_group, info);
416    block_group->total_bitmaps++;
417
418    recalculate_thresholds(block_group);
419}
420
421static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
422                  struct btrfs_free_space *bitmap_info,
423                  u64 *offset, u64 *bytes)
424{
425    u64 end;
426    u64 search_start, search_bytes;
427    int ret;
428
429again:
430    end = bitmap_info->offset +
431        (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
432
433    /*
434     * XXX - this can go away after a few releases.
435     *
436     * since the only user of btrfs_remove_free_space is the tree logging
437     * stuff, and the only way to test that is under crash conditions, we
438     * want to have this debug stuff here just in case somethings not
439     * working. Search the bitmap for the space we are trying to use to
440     * make sure its actually there. If its not there then we need to stop
441     * because something has gone wrong.
442     */
443    search_start = *offset;
444    search_bytes = *bytes;
445    ret = search_bitmap(block_group, bitmap_info, &search_start,
446                &search_bytes);
447    BUG_ON(ret < 0 || search_start != *offset);
448
449    if (*offset > bitmap_info->offset && *offset + *bytes > end) {
450        bitmap_clear_bits(block_group, bitmap_info, *offset,
451                  end - *offset + 1);
452        *bytes -= end - *offset + 1;
453        *offset = end + 1;
454    } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
455        bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
456        *bytes = 0;
457    }
458
459    if (*bytes) {
460        struct rb_node *next = rb_next(&bitmap_info->offset_index);
461        if (!bitmap_info->bytes) {
462            unlink_free_space(block_group, bitmap_info);
463            kfree(bitmap_info->bitmap);
464            kfree(bitmap_info);
465            block_group->total_bitmaps--;
466            recalculate_thresholds(block_group);
467        }
468
469        /*
470         * no entry after this bitmap, but we still have bytes to
471         * remove, so something has gone wrong.
472         */
473        if (!next)
474            return -EINVAL;
475
476        bitmap_info = rb_entry(next, struct btrfs_free_space,
477                       offset_index);
478
479        /*
480         * if the next entry isn't a bitmap we need to return to let the
481         * extent stuff do its work.
482         */
483        if (!bitmap_info->bitmap)
484            return -EAGAIN;
485
486        /*
487         * Ok the next item is a bitmap, but it may not actually hold
488         * the information for the rest of this free space stuff, so
489         * look for it, and if we don't find it return so we can try
490         * everything over again.
491         */
492        search_start = *offset;
493        search_bytes = *bytes;
494        ret = search_bitmap(block_group, bitmap_info, &search_start,
495                    &search_bytes);
496        if (ret < 0 || search_start != *offset)
497            return -EAGAIN;
498
499        goto again;
500    } else if (!bitmap_info->bytes) {
501        unlink_free_space(block_group, bitmap_info);
502        kfree(bitmap_info->bitmap);
503        kfree(bitmap_info);
504        block_group->total_bitmaps--;
505        recalculate_thresholds(block_group);
506    }
507
508    return 0;
509}
510
511static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
512                  struct btrfs_free_space *info)
513{
514    struct btrfs_free_space *bitmap_info;
515    int added = 0;
516    u64 bytes, offset, end;
517    int ret;
518
519    /*
520     * If we are below the extents threshold then we can add this as an
521     * extent, and don't have to deal with the bitmap
522     */
523    if (block_group->free_extents < block_group->extents_thresh &&
524        info->bytes > block_group->sectorsize * 4)
525        return 0;
526
527    /*
528     * some block groups are so tiny they can't be enveloped by a bitmap, so
529     * don't even bother to create a bitmap for this
530     */
531    if (BITS_PER_BITMAP * block_group->sectorsize >
532        block_group->key.offset)
533        return 0;
534
535    bytes = info->bytes;
536    offset = info->offset;
537
538again:
539    bitmap_info = tree_search_offset(block_group,
540                     offset_to_bitmap(block_group, offset),
541                     1, 0);
542    if (!bitmap_info) {
543        BUG_ON(added);
544        goto new_bitmap;
545    }
546
547    end = bitmap_info->offset +
548        (u64)(BITS_PER_BITMAP * block_group->sectorsize);
549
550    if (offset >= bitmap_info->offset && offset + bytes > end) {
551        bitmap_set_bits(block_group, bitmap_info, offset,
552                end - offset);
553        bytes -= end - offset;
554        offset = end;
555        added = 0;
556    } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
557        bitmap_set_bits(block_group, bitmap_info, offset, bytes);
558        bytes = 0;
559    } else {
560        BUG();
561    }
562
563    if (!bytes) {
564        ret = 1;
565        goto out;
566    } else
567        goto again;
568
569new_bitmap:
570    if (info && info->bitmap) {
571        add_new_bitmap(block_group, info, offset);
572        added = 1;
573        info = NULL;
574        goto again;
575    } else {
576        spin_unlock(&block_group->tree_lock);
577
578        /* no pre-allocated info, allocate a new one */
579        if (!info) {
580            info = kzalloc(sizeof(struct btrfs_free_space),
581                       GFP_NOFS);
582            if (!info) {
583                spin_lock(&block_group->tree_lock);
584                ret = -ENOMEM;
585                goto out;
586            }
587        }
588
589        /* allocate the bitmap */
590        info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
591        spin_lock(&block_group->tree_lock);
592        if (!info->bitmap) {
593            ret = -ENOMEM;
594            goto out;
595        }
596        goto again;
597    }
598
599out:
600    if (info) {
601        if (info->bitmap)
602            kfree(info->bitmap);
603        kfree(info);
604    }
605
606    return ret;
607}
608
609int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
610             u64 offset, u64 bytes)
611{
612    struct btrfs_free_space *right_info = NULL;
613    struct btrfs_free_space *left_info = NULL;
614    struct btrfs_free_space *info = NULL;
615    int ret = 0;
616
617    info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
618    if (!info)
619        return -ENOMEM;
620
621    info->offset = offset;
622    info->bytes = bytes;
623
624    spin_lock(&block_group->tree_lock);
625
626    /*
627     * first we want to see if there is free space adjacent to the range we
628     * are adding, if there is remove that struct and add a new one to
629     * cover the entire range
630     */
631    right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
632    if (right_info && rb_prev(&right_info->offset_index))
633        left_info = rb_entry(rb_prev(&right_info->offset_index),
634                     struct btrfs_free_space, offset_index);
635    else
636        left_info = tree_search_offset(block_group, offset - 1, 0, 0);
637
638    /*
639     * If there was no extent directly to the left or right of this new
640     * extent then we know we're going to have to allocate a new extent, so
641     * before we do that see if we need to drop this into a bitmap
642     */
643    if ((!left_info || left_info->bitmap) &&
644        (!right_info || right_info->bitmap)) {
645        ret = insert_into_bitmap(block_group, info);
646
647        if (ret < 0) {
648            goto out;
649        } else if (ret) {
650            ret = 0;
651            goto out;
652        }
653    }
654
655    if (right_info && !right_info->bitmap) {
656        unlink_free_space(block_group, right_info);
657        info->bytes += right_info->bytes;
658        kfree(right_info);
659    }
660
661    if (left_info && !left_info->bitmap &&
662        left_info->offset + left_info->bytes == offset) {
663        unlink_free_space(block_group, left_info);
664        info->offset = left_info->offset;
665        info->bytes += left_info->bytes;
666        kfree(left_info);
667    }
668
669    ret = link_free_space(block_group, info);
670    if (ret)
671        kfree(info);
672out:
673    spin_unlock(&block_group->tree_lock);
674
675    if (ret) {
676        printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
677        BUG_ON(ret == -EEXIST);
678    }
679
680    return ret;
681}
682
683int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
684                u64 offset, u64 bytes)
685{
686    struct btrfs_free_space *info;
687    struct btrfs_free_space *next_info = NULL;
688    int ret = 0;
689
690    spin_lock(&block_group->tree_lock);
691
692again:
693    info = tree_search_offset(block_group, offset, 0, 0);
694    if (!info) {
695        /*
696         * oops didn't find an extent that matched the space we wanted
697         * to remove, look for a bitmap instead
698         */
699        info = tree_search_offset(block_group,
700                      offset_to_bitmap(block_group, offset),
701                      1, 0);
702        if (!info) {
703            WARN_ON(1);
704            goto out_lock;
705        }
706    }
707
708    if (info->bytes < bytes && rb_next(&info->offset_index)) {
709        u64 end;
710        next_info = rb_entry(rb_next(&info->offset_index),
711                         struct btrfs_free_space,
712                         offset_index);
713
714        if (next_info->bitmap)
715            end = next_info->offset + BITS_PER_BITMAP *
716                block_group->sectorsize - 1;
717        else
718            end = next_info->offset + next_info->bytes;
719
720        if (next_info->bytes < bytes ||
721            next_info->offset > offset || offset > end) {
722            printk(KERN_CRIT "Found free space at %llu, size %llu,"
723                  " trying to use %llu\n",
724                  (unsigned long long)info->offset,
725                  (unsigned long long)info->bytes,
726                  (unsigned long long)bytes);
727            WARN_ON(1);
728            ret = -EINVAL;
729            goto out_lock;
730        }
731
732        info = next_info;
733    }
734
735    if (info->bytes == bytes) {
736        unlink_free_space(block_group, info);
737        if (info->bitmap) {
738            kfree(info->bitmap);
739            block_group->total_bitmaps--;
740        }
741        kfree(info);
742        goto out_lock;
743    }
744
745    if (!info->bitmap && info->offset == offset) {
746        unlink_free_space(block_group, info);
747        info->offset += bytes;
748        info->bytes -= bytes;
749        link_free_space(block_group, info);
750        goto out_lock;
751    }
752
753    if (!info->bitmap && info->offset <= offset &&
754        info->offset + info->bytes >= offset + bytes) {
755        u64 old_start = info->offset;
756        /*
757         * we're freeing space in the middle of the info,
758         * this can happen during tree log replay
759         *
760         * first unlink the old info and then
761         * insert it again after the hole we're creating
762         */
763        unlink_free_space(block_group, info);
764        if (offset + bytes < info->offset + info->bytes) {
765            u64 old_end = info->offset + info->bytes;
766
767            info->offset = offset + bytes;
768            info->bytes = old_end - info->offset;
769            ret = link_free_space(block_group, info);
770            WARN_ON(ret);
771            if (ret)
772                goto out_lock;
773        } else {
774            /* the hole we're creating ends at the end
775             * of the info struct, just free the info
776             */
777            kfree(info);
778        }
779        spin_unlock(&block_group->tree_lock);
780
781        /* step two, insert a new info struct to cover
782         * anything before the hole
783         */
784        ret = btrfs_add_free_space(block_group, old_start,
785                       offset - old_start);
786        WARN_ON(ret);
787        goto out;
788    }
789
790    ret = remove_from_bitmap(block_group, info, &offset, &bytes);
791    if (ret == -EAGAIN)
792        goto again;
793    BUG_ON(ret);
794out_lock:
795    spin_unlock(&block_group->tree_lock);
796out:
797    return ret;
798}
799
800void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
801               u64 bytes)
802{
803    struct btrfs_free_space *info;
804    struct rb_node *n;
805    int count = 0;
806
807    for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
808        info = rb_entry(n, struct btrfs_free_space, offset_index);
809        if (info->bytes >= bytes)
810            count++;
811        printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
812               (unsigned long long)info->offset,
813               (unsigned long long)info->bytes,
814               (info->bitmap) ? "yes" : "no");
815    }
816    printk(KERN_INFO "block group has cluster?: %s\n",
817           list_empty(&block_group->cluster_list) ? "no" : "yes");
818    printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
819           "\n", count);
820}
821
822u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
823{
824    struct btrfs_free_space *info;
825    struct rb_node *n;
826    u64 ret = 0;
827
828    for (n = rb_first(&block_group->free_space_offset); n;
829         n = rb_next(n)) {
830        info = rb_entry(n, struct btrfs_free_space, offset_index);
831        ret += info->bytes;
832    }
833
834    return ret;
835}
836
837/*
838 * for a given cluster, put all of its extents back into the free
839 * space cache. If the block group passed doesn't match the block group
840 * pointed to by the cluster, someone else raced in and freed the
841 * cluster already. In that case, we just return without changing anything
842 */
843static int
844__btrfs_return_cluster_to_free_space(
845                 struct btrfs_block_group_cache *block_group,
846                 struct btrfs_free_cluster *cluster)
847{
848    struct btrfs_free_space *entry;
849    struct rb_node *node;
850    bool bitmap;
851
852    spin_lock(&cluster->lock);
853    if (cluster->block_group != block_group)
854        goto out;
855
856    bitmap = cluster->points_to_bitmap;
857    cluster->block_group = NULL;
858    cluster->window_start = 0;
859    list_del_init(&cluster->block_group_list);
860    cluster->points_to_bitmap = false;
861
862    if (bitmap)
863        goto out;
864
865    node = rb_first(&cluster->root);
866    while (node) {
867        entry = rb_entry(node, struct btrfs_free_space, offset_index);
868        node = rb_next(&entry->offset_index);
869        rb_erase(&entry->offset_index, &cluster->root);
870        BUG_ON(entry->bitmap);
871        tree_insert_offset(&block_group->free_space_offset,
872                   entry->offset, &entry->offset_index, 0);
873    }
874    cluster->root = RB_ROOT;
875
876out:
877    spin_unlock(&cluster->lock);
878    btrfs_put_block_group(block_group);
879    return 0;
880}
881
882void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
883{
884    struct btrfs_free_space *info;
885    struct rb_node *node;
886    struct btrfs_free_cluster *cluster;
887    struct list_head *head;
888
889    spin_lock(&block_group->tree_lock);
890    while ((head = block_group->cluster_list.next) !=
891           &block_group->cluster_list) {
892        cluster = list_entry(head, struct btrfs_free_cluster,
893                     block_group_list);
894
895        WARN_ON(cluster->block_group != block_group);
896        __btrfs_return_cluster_to_free_space(block_group, cluster);
897        if (need_resched()) {
898            spin_unlock(&block_group->tree_lock);
899            cond_resched();
900            spin_lock(&block_group->tree_lock);
901        }
902    }
903
904    while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
905        info = rb_entry(node, struct btrfs_free_space, offset_index);
906        unlink_free_space(block_group, info);
907        if (info->bitmap)
908            kfree(info->bitmap);
909        kfree(info);
910        if (need_resched()) {
911            spin_unlock(&block_group->tree_lock);
912            cond_resched();
913            spin_lock(&block_group->tree_lock);
914        }
915    }
916
917    spin_unlock(&block_group->tree_lock);
918}
919
920u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
921                   u64 offset, u64 bytes, u64 empty_size)
922{
923    struct btrfs_free_space *entry = NULL;
924    u64 bytes_search = bytes + empty_size;
925    u64 ret = 0;
926
927    spin_lock(&block_group->tree_lock);
928    entry = find_free_space(block_group, &offset, &bytes_search, 0);
929    if (!entry)
930        goto out;
931
932    ret = offset;
933    if (entry->bitmap) {
934        bitmap_clear_bits(block_group, entry, offset, bytes);
935        if (!entry->bytes) {
936            unlink_free_space(block_group, entry);
937            kfree(entry->bitmap);
938            kfree(entry);
939            block_group->total_bitmaps--;
940            recalculate_thresholds(block_group);
941        }
942    } else {
943        unlink_free_space(block_group, entry);
944        entry->offset += bytes;
945        entry->bytes -= bytes;
946        if (!entry->bytes)
947            kfree(entry);
948        else
949            link_free_space(block_group, entry);
950    }
951
952out:
953    spin_unlock(&block_group->tree_lock);
954
955    return ret;
956}
957
958/*
959 * given a cluster, put all of its extents back into the free space
960 * cache. If a block group is passed, this function will only free
961 * a cluster that belongs to the passed block group.
962 *
963 * Otherwise, it'll get a reference on the block group pointed to by the
964 * cluster and remove the cluster from it.
965 */
966int btrfs_return_cluster_to_free_space(
967                   struct btrfs_block_group_cache *block_group,
968                   struct btrfs_free_cluster *cluster)
969{
970    int ret;
971
972    /* first, get a safe pointer to the block group */
973    spin_lock(&cluster->lock);
974    if (!block_group) {
975        block_group = cluster->block_group;
976        if (!block_group) {
977            spin_unlock(&cluster->lock);
978            return 0;
979        }
980    } else if (cluster->block_group != block_group) {
981        /* someone else has already freed it don't redo their work */
982        spin_unlock(&cluster->lock);
983        return 0;
984    }
985    atomic_inc(&block_group->count);
986    spin_unlock(&cluster->lock);
987
988    /* now return any extents the cluster had on it */
989    spin_lock(&block_group->tree_lock);
990    ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
991    spin_unlock(&block_group->tree_lock);
992
993    /* finally drop our ref */
994    btrfs_put_block_group(block_group);
995    return ret;
996}
997
998static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
999                   struct btrfs_free_cluster *cluster,
1000                   u64 bytes, u64 min_start)
1001{
1002    struct btrfs_free_space *entry;
1003    int err;
1004    u64 search_start = cluster->window_start;
1005    u64 search_bytes = bytes;
1006    u64 ret = 0;
1007
1008    spin_lock(&block_group->tree_lock);
1009    spin_lock(&cluster->lock);
1010
1011    if (!cluster->points_to_bitmap)
1012        goto out;
1013
1014    if (cluster->block_group != block_group)
1015        goto out;
1016
1017    /*
1018     * search_start is the beginning of the bitmap, but at some point it may
1019     * be a good idea to point to the actual start of the free area in the
1020     * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021     * to 1 to make sure we get the bitmap entry
1022     */
1023    entry = tree_search_offset(block_group,
1024                   offset_to_bitmap(block_group, search_start),
1025                   1, 0);
1026    if (!entry || !entry->bitmap)
1027        goto out;
1028
1029    search_start = min_start;
1030    search_bytes = bytes;
1031
1032    err = search_bitmap(block_group, entry, &search_start,
1033                &search_bytes);
1034    if (err)
1035        goto out;
1036
1037    ret = search_start;
1038    bitmap_clear_bits(block_group, entry, ret, bytes);
1039out:
1040    spin_unlock(&cluster->lock);
1041    spin_unlock(&block_group->tree_lock);
1042
1043    return ret;
1044}
1045
1046/*
1047 * given a cluster, try to allocate 'bytes' from it, returns 0
1048 * if it couldn't find anything suitably large, or a logical disk offset
1049 * if things worked out
1050 */
1051u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1052                 struct btrfs_free_cluster *cluster, u64 bytes,
1053                 u64 min_start)
1054{
1055    struct btrfs_free_space *entry = NULL;
1056    struct rb_node *node;
1057    u64 ret = 0;
1058
1059    if (cluster->points_to_bitmap)
1060        return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1061                           min_start);
1062
1063    spin_lock(&cluster->lock);
1064    if (bytes > cluster->max_size)
1065        goto out;
1066
1067    if (cluster->block_group != block_group)
1068        goto out;
1069
1070    node = rb_first(&cluster->root);
1071    if (!node)
1072        goto out;
1073
1074    entry = rb_entry(node, struct btrfs_free_space, offset_index);
1075
1076    while(1) {
1077        if (entry->bytes < bytes || entry->offset < min_start) {
1078            struct rb_node *node;
1079
1080            node = rb_next(&entry->offset_index);
1081            if (!node)
1082                break;
1083            entry = rb_entry(node, struct btrfs_free_space,
1084                     offset_index);
1085            continue;
1086        }
1087        ret = entry->offset;
1088
1089        entry->offset += bytes;
1090        entry->bytes -= bytes;
1091
1092        if (entry->bytes == 0) {
1093            rb_erase(&entry->offset_index, &cluster->root);
1094            kfree(entry);
1095        }
1096        break;
1097    }
1098out:
1099    spin_unlock(&cluster->lock);
1100
1101    return ret;
1102}
1103
1104static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1105                struct btrfs_free_space *entry,
1106                struct btrfs_free_cluster *cluster,
1107                u64 offset, u64 bytes, u64 min_bytes)
1108{
1109    unsigned long next_zero;
1110    unsigned long i;
1111    unsigned long search_bits;
1112    unsigned long total_bits;
1113    unsigned long found_bits;
1114    unsigned long start = 0;
1115    unsigned long total_found = 0;
1116    bool found = false;
1117
1118    i = offset_to_bit(entry->offset, block_group->sectorsize,
1119              max_t(u64, offset, entry->offset));
1120    search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1121    total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1122
1123again:
1124    found_bits = 0;
1125    for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1126         i < BITS_PER_BITMAP;
1127         i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1128        next_zero = find_next_zero_bit(entry->bitmap,
1129                           BITS_PER_BITMAP, i);
1130        if (next_zero - i >= search_bits) {
1131            found_bits = next_zero - i;
1132            break;
1133        }
1134        i = next_zero;
1135    }
1136
1137    if (!found_bits)
1138        return -1;
1139
1140    if (!found) {
1141        start = i;
1142        found = true;
1143    }
1144
1145    total_found += found_bits;
1146
1147    if (cluster->max_size < found_bits * block_group->sectorsize)
1148        cluster->max_size = found_bits * block_group->sectorsize;
1149
1150    if (total_found < total_bits) {
1151        i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1152        if (i - start > total_bits * 2) {
1153            total_found = 0;
1154            cluster->max_size = 0;
1155            found = false;
1156        }
1157        goto again;
1158    }
1159
1160    cluster->window_start = start * block_group->sectorsize +
1161        entry->offset;
1162    cluster->points_to_bitmap = true;
1163
1164    return 0;
1165}
1166
1167/*
1168 * here we try to find a cluster of blocks in a block group. The goal
1169 * is to find at least bytes free and up to empty_size + bytes free.
1170 * We might not find them all in one contiguous area.
1171 *
1172 * returns zero and sets up cluster if things worked out, otherwise
1173 * it returns -enospc
1174 */
1175int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1176                 struct btrfs_root *root,
1177                 struct btrfs_block_group_cache *block_group,
1178                 struct btrfs_free_cluster *cluster,
1179                 u64 offset, u64 bytes, u64 empty_size)
1180{
1181    struct btrfs_free_space *entry = NULL;
1182    struct rb_node *node;
1183    struct btrfs_free_space *next;
1184    struct btrfs_free_space *last = NULL;
1185    u64 min_bytes;
1186    u64 window_start;
1187    u64 window_free;
1188    u64 max_extent = 0;
1189    bool found_bitmap = false;
1190    int ret;
1191
1192    /* for metadata, allow allocates with more holes */
1193    if (btrfs_test_opt(root, SSD_SPREAD)) {
1194        min_bytes = bytes + empty_size;
1195    } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1196        /*
1197         * we want to do larger allocations when we are
1198         * flushing out the delayed refs, it helps prevent
1199         * making more work as we go along.
1200         */
1201        if (trans->transaction->delayed_refs.flushing)
1202            min_bytes = max(bytes, (bytes + empty_size) >> 1);
1203        else
1204            min_bytes = max(bytes, (bytes + empty_size) >> 4);
1205    } else
1206        min_bytes = max(bytes, (bytes + empty_size) >> 2);
1207
1208    spin_lock(&block_group->tree_lock);
1209    spin_lock(&cluster->lock);
1210
1211    /* someone already found a cluster, hooray */
1212    if (cluster->block_group) {
1213        ret = 0;
1214        goto out;
1215    }
1216again:
1217    entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1218    if (!entry) {
1219        ret = -ENOSPC;
1220        goto out;
1221    }
1222
1223    /*
1224     * If found_bitmap is true, we exhausted our search for extent entries,
1225     * and we just want to search all of the bitmaps that we can find, and
1226     * ignore any extent entries we find.
1227     */
1228    while (entry->bitmap || found_bitmap ||
1229           (!entry->bitmap && entry->bytes < min_bytes)) {
1230        struct rb_node *node = rb_next(&entry->offset_index);
1231
1232        if (entry->bitmap && entry->bytes > bytes + empty_size) {
1233            ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1234                           offset, bytes + empty_size,
1235                           min_bytes);
1236            if (!ret)
1237                goto got_it;
1238        }
1239
1240        if (!node) {
1241            ret = -ENOSPC;
1242            goto out;
1243        }
1244        entry = rb_entry(node, struct btrfs_free_space, offset_index);
1245    }
1246
1247    /*
1248     * We already searched all the extent entries from the passed in offset
1249     * to the end and didn't find enough space for the cluster, and we also
1250     * didn't find any bitmaps that met our criteria, just go ahead and exit
1251     */
1252    if (found_bitmap) {
1253        ret = -ENOSPC;
1254        goto out;
1255    }
1256
1257    cluster->points_to_bitmap = false;
1258    window_start = entry->offset;
1259    window_free = entry->bytes;
1260    last = entry;
1261    max_extent = entry->bytes;
1262
1263    while (1) {
1264        /* out window is just right, lets fill it */
1265        if (window_free >= bytes + empty_size)
1266            break;
1267
1268        node = rb_next(&last->offset_index);
1269        if (!node) {
1270            if (found_bitmap)
1271                goto again;
1272            ret = -ENOSPC;
1273            goto out;
1274        }
1275        next = rb_entry(node, struct btrfs_free_space, offset_index);
1276
1277        /*
1278         * we found a bitmap, so if this search doesn't result in a
1279         * cluster, we know to go and search again for the bitmaps and
1280         * start looking for space there
1281         */
1282        if (next->bitmap) {
1283            if (!found_bitmap)
1284                offset = next->offset;
1285            found_bitmap = true;
1286            last = next;
1287            continue;
1288        }
1289
1290        /*
1291         * we haven't filled the empty size and the window is
1292         * very large. reset and try again
1293         */
1294        if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1295            next->offset - window_start > (bytes + empty_size) * 2) {
1296            entry = next;
1297            window_start = entry->offset;
1298            window_free = entry->bytes;
1299            last = entry;
1300            max_extent = entry->bytes;
1301        } else {
1302            last = next;
1303            window_free += next->bytes;
1304            if (entry->bytes > max_extent)
1305                max_extent = entry->bytes;
1306        }
1307    }
1308
1309    cluster->window_start = entry->offset;
1310
1311    /*
1312     * now we've found our entries, pull them out of the free space
1313     * cache and put them into the cluster rbtree
1314     *
1315     * The cluster includes an rbtree, but only uses the offset index
1316     * of each free space cache entry.
1317     */
1318    while (1) {
1319        node = rb_next(&entry->offset_index);
1320        if (entry->bitmap && node) {
1321            entry = rb_entry(node, struct btrfs_free_space,
1322                     offset_index);
1323            continue;
1324        } else if (entry->bitmap && !node) {
1325            break;
1326        }
1327
1328        rb_erase(&entry->offset_index, &block_group->free_space_offset);
1329        ret = tree_insert_offset(&cluster->root, entry->offset,
1330                     &entry->offset_index, 0);
1331        BUG_ON(ret);
1332
1333        if (!node || entry == last)
1334            break;
1335
1336        entry = rb_entry(node, struct btrfs_free_space, offset_index);
1337    }
1338
1339    cluster->max_size = max_extent;
1340got_it:
1341    ret = 0;
1342    atomic_inc(&block_group->count);
1343    list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1344    cluster->block_group = block_group;
1345out:
1346    spin_unlock(&cluster->lock);
1347    spin_unlock(&block_group->tree_lock);
1348
1349    return ret;
1350}
1351
1352/*
1353 * simple code to zero out a cluster
1354 */
1355void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1356{
1357    spin_lock_init(&cluster->lock);
1358    spin_lock_init(&cluster->refill_lock);
1359    cluster->root = RB_ROOT;
1360    cluster->max_size = 0;
1361    cluster->points_to_bitmap = false;
1362    INIT_LIST_HEAD(&cluster->block_group_list);
1363    cluster->block_group = NULL;
1364}
1365
1366

Archive Download this file



interactive