Root/fs/btrfs/relocation.c

1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include "ctree.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "volumes.h"
29#include "locking.h"
30#include "btrfs_inode.h"
31#include "async-thread.h"
32
33/*
34 * backref_node, mapping_node and tree_block start with this
35 */
36struct tree_entry {
37    struct rb_node rb_node;
38    u64 bytenr;
39};
40
41/*
42 * present a tree block in the backref cache
43 */
44struct backref_node {
45    struct rb_node rb_node;
46    u64 bytenr;
47    /* objectid tree block owner */
48    u64 owner;
49    /* list of upper level blocks reference this block */
50    struct list_head upper;
51    /* list of child blocks in the cache */
52    struct list_head lower;
53    /* NULL if this node is not tree root */
54    struct btrfs_root *root;
55    /* extent buffer got by COW the block */
56    struct extent_buffer *eb;
57    /* level of tree block */
58    unsigned int level:8;
59    /* 1 if the block is root of old snapshot */
60    unsigned int old_root:1;
61    /* 1 if no child blocks in the cache */
62    unsigned int lowest:1;
63    /* is the extent buffer locked */
64    unsigned int locked:1;
65    /* has the block been processed */
66    unsigned int processed:1;
67    /* have backrefs of this block been checked */
68    unsigned int checked:1;
69};
70
71/*
72 * present a block pointer in the backref cache
73 */
74struct backref_edge {
75    struct list_head list[2];
76    struct backref_node *node[2];
77    u64 blockptr;
78};
79
80#define LOWER 0
81#define UPPER 1
82
83struct backref_cache {
84    /* red black tree of all backref nodes in the cache */
85    struct rb_root rb_root;
86    /* list of backref nodes with no child block in the cache */
87    struct list_head pending[BTRFS_MAX_LEVEL];
88    spinlock_t lock;
89};
90
91/*
92 * map address of tree root to tree
93 */
94struct mapping_node {
95    struct rb_node rb_node;
96    u64 bytenr;
97    void *data;
98};
99
100struct mapping_tree {
101    struct rb_root rb_root;
102    spinlock_t lock;
103};
104
105/*
106 * present a tree block to process
107 */
108struct tree_block {
109    struct rb_node rb_node;
110    u64 bytenr;
111    struct btrfs_key key;
112    unsigned int level:8;
113    unsigned int key_ready:1;
114};
115
116/* inode vector */
117#define INODEVEC_SIZE 16
118
119struct inodevec {
120    struct list_head list;
121    struct inode *inode[INODEVEC_SIZE];
122    int nr;
123};
124
125#define MAX_EXTENTS 128
126
127struct file_extent_cluster {
128    u64 start;
129    u64 end;
130    u64 boundary[MAX_EXTENTS];
131    unsigned int nr;
132};
133
134struct reloc_control {
135    /* block group to relocate */
136    struct btrfs_block_group_cache *block_group;
137    /* extent tree */
138    struct btrfs_root *extent_root;
139    /* inode for moving data */
140    struct inode *data_inode;
141    struct btrfs_workers workers;
142    /* tree blocks have been processed */
143    struct extent_io_tree processed_blocks;
144    /* map start of tree root to corresponding reloc tree */
145    struct mapping_tree reloc_root_tree;
146    /* list of reloc trees */
147    struct list_head reloc_roots;
148    u64 search_start;
149    u64 extents_found;
150    u64 extents_skipped;
151    int stage;
152    int create_reloc_root;
153    unsigned int found_file_extent:1;
154    unsigned int found_old_snapshot:1;
155};
156
157/* stages of data relocation */
158#define MOVE_DATA_EXTENTS 0
159#define UPDATE_DATA_PTRS 1
160
161/*
162 * merge reloc tree to corresponding fs tree in worker threads
163 */
164struct async_merge {
165    struct btrfs_work work;
166    struct reloc_control *rc;
167    struct btrfs_root *root;
168    struct completion *done;
169    atomic_t *num_pending;
170};
171
172static void mapping_tree_init(struct mapping_tree *tree)
173{
174    tree->rb_root = RB_ROOT;
175    spin_lock_init(&tree->lock);
176}
177
178static void backref_cache_init(struct backref_cache *cache)
179{
180    int i;
181    cache->rb_root = RB_ROOT;
182    for (i = 0; i < BTRFS_MAX_LEVEL; i++)
183        INIT_LIST_HEAD(&cache->pending[i]);
184    spin_lock_init(&cache->lock);
185}
186
187static void backref_node_init(struct backref_node *node)
188{
189    memset(node, 0, sizeof(*node));
190    INIT_LIST_HEAD(&node->upper);
191    INIT_LIST_HEAD(&node->lower);
192    RB_CLEAR_NODE(&node->rb_node);
193}
194
195static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
196                   struct rb_node *node)
197{
198    struct rb_node **p = &root->rb_node;
199    struct rb_node *parent = NULL;
200    struct tree_entry *entry;
201
202    while (*p) {
203        parent = *p;
204        entry = rb_entry(parent, struct tree_entry, rb_node);
205
206        if (bytenr < entry->bytenr)
207            p = &(*p)->rb_left;
208        else if (bytenr > entry->bytenr)
209            p = &(*p)->rb_right;
210        else
211            return parent;
212    }
213
214    rb_link_node(node, parent, p);
215    rb_insert_color(node, root);
216    return NULL;
217}
218
219static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
220{
221    struct rb_node *n = root->rb_node;
222    struct tree_entry *entry;
223
224    while (n) {
225        entry = rb_entry(n, struct tree_entry, rb_node);
226
227        if (bytenr < entry->bytenr)
228            n = n->rb_left;
229        else if (bytenr > entry->bytenr)
230            n = n->rb_right;
231        else
232            return n;
233    }
234    return NULL;
235}
236
237/*
238 * walk up backref nodes until reach node presents tree root
239 */
240static struct backref_node *walk_up_backref(struct backref_node *node,
241                        struct backref_edge *edges[],
242                        int *index)
243{
244    struct backref_edge *edge;
245    int idx = *index;
246
247    while (!list_empty(&node->upper)) {
248        edge = list_entry(node->upper.next,
249                  struct backref_edge, list[LOWER]);
250        edges[idx++] = edge;
251        node = edge->node[UPPER];
252    }
253    *index = idx;
254    return node;
255}
256
257/*
258 * walk down backref nodes to find start of next reference path
259 */
260static struct backref_node *walk_down_backref(struct backref_edge *edges[],
261                          int *index)
262{
263    struct backref_edge *edge;
264    struct backref_node *lower;
265    int idx = *index;
266
267    while (idx > 0) {
268        edge = edges[idx - 1];
269        lower = edge->node[LOWER];
270        if (list_is_last(&edge->list[LOWER], &lower->upper)) {
271            idx--;
272            continue;
273        }
274        edge = list_entry(edge->list[LOWER].next,
275                  struct backref_edge, list[LOWER]);
276        edges[idx - 1] = edge;
277        *index = idx;
278        return edge->node[UPPER];
279    }
280    *index = 0;
281    return NULL;
282}
283
284static void drop_node_buffer(struct backref_node *node)
285{
286    if (node->eb) {
287        if (node->locked) {
288            btrfs_tree_unlock(node->eb);
289            node->locked = 0;
290        }
291        free_extent_buffer(node->eb);
292        node->eb = NULL;
293    }
294}
295
296static void drop_backref_node(struct backref_cache *tree,
297                  struct backref_node *node)
298{
299    BUG_ON(!node->lowest);
300    BUG_ON(!list_empty(&node->upper));
301
302    drop_node_buffer(node);
303    list_del(&node->lower);
304
305    rb_erase(&node->rb_node, &tree->rb_root);
306    kfree(node);
307}
308
309/*
310 * remove a backref node from the backref cache
311 */
312static void remove_backref_node(struct backref_cache *cache,
313                struct backref_node *node)
314{
315    struct backref_node *upper;
316    struct backref_edge *edge;
317
318    if (!node)
319        return;
320
321    BUG_ON(!node->lowest);
322    while (!list_empty(&node->upper)) {
323        edge = list_entry(node->upper.next, struct backref_edge,
324                  list[LOWER]);
325        upper = edge->node[UPPER];
326        list_del(&edge->list[LOWER]);
327        list_del(&edge->list[UPPER]);
328        kfree(edge);
329        /*
330         * add the node to pending list if no other
331         * child block cached.
332         */
333        if (list_empty(&upper->lower)) {
334            list_add_tail(&upper->lower,
335                      &cache->pending[upper->level]);
336            upper->lowest = 1;
337        }
338    }
339    drop_backref_node(cache, node);
340}
341
342/*
343 * find reloc tree by address of tree root
344 */
345static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
346                      u64 bytenr)
347{
348    struct rb_node *rb_node;
349    struct mapping_node *node;
350    struct btrfs_root *root = NULL;
351
352    spin_lock(&rc->reloc_root_tree.lock);
353    rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
354    if (rb_node) {
355        node = rb_entry(rb_node, struct mapping_node, rb_node);
356        root = (struct btrfs_root *)node->data;
357    }
358    spin_unlock(&rc->reloc_root_tree.lock);
359    return root;
360}
361
362static int is_cowonly_root(u64 root_objectid)
363{
364    if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
365        root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
366        root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
367        root_objectid == BTRFS_DEV_TREE_OBJECTID ||
368        root_objectid == BTRFS_TREE_LOG_OBJECTID ||
369        root_objectid == BTRFS_CSUM_TREE_OBJECTID)
370        return 1;
371    return 0;
372}
373
374static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
375                    u64 root_objectid)
376{
377    struct btrfs_key key;
378
379    key.objectid = root_objectid;
380    key.type = BTRFS_ROOT_ITEM_KEY;
381    if (is_cowonly_root(root_objectid))
382        key.offset = 0;
383    else
384        key.offset = (u64)-1;
385
386    return btrfs_read_fs_root_no_name(fs_info, &key);
387}
388
389#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
390static noinline_for_stack
391struct btrfs_root *find_tree_root(struct reloc_control *rc,
392                  struct extent_buffer *leaf,
393                  struct btrfs_extent_ref_v0 *ref0)
394{
395    struct btrfs_root *root;
396    u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
397    u64 generation = btrfs_ref_generation_v0(leaf, ref0);
398
399    BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
400
401    root = read_fs_root(rc->extent_root->fs_info, root_objectid);
402    BUG_ON(IS_ERR(root));
403
404    if (root->ref_cows &&
405        generation != btrfs_root_generation(&root->root_item))
406        return NULL;
407
408    return root;
409}
410#endif
411
412static noinline_for_stack
413int find_inline_backref(struct extent_buffer *leaf, int slot,
414            unsigned long *ptr, unsigned long *end)
415{
416    struct btrfs_extent_item *ei;
417    struct btrfs_tree_block_info *bi;
418    u32 item_size;
419
420    item_size = btrfs_item_size_nr(leaf, slot);
421#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
422    if (item_size < sizeof(*ei)) {
423        WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
424        return 1;
425    }
426#endif
427    ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
428    WARN_ON(!(btrfs_extent_flags(leaf, ei) &
429          BTRFS_EXTENT_FLAG_TREE_BLOCK));
430
431    if (item_size <= sizeof(*ei) + sizeof(*bi)) {
432        WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
433        return 1;
434    }
435
436    bi = (struct btrfs_tree_block_info *)(ei + 1);
437    *ptr = (unsigned long)(bi + 1);
438    *end = (unsigned long)ei + item_size;
439    return 0;
440}
441
442/*
443 * build backref tree for a given tree block. root of the backref tree
444 * corresponds the tree block, leaves of the backref tree correspond
445 * roots of b-trees that reference the tree block.
446 *
447 * the basic idea of this function is check backrefs of a given block
448 * to find upper level blocks that refernece the block, and then check
449 * bakcrefs of these upper level blocks recursively. the recursion stop
450 * when tree root is reached or backrefs for the block is cached.
451 *
452 * NOTE: if we find backrefs for a block are cached, we know backrefs
453 * for all upper level blocks that directly/indirectly reference the
454 * block are also cached.
455 */
456static struct backref_node *build_backref_tree(struct reloc_control *rc,
457                           struct backref_cache *cache,
458                           struct btrfs_key *node_key,
459                           int level, u64 bytenr)
460{
461    struct btrfs_path *path1;
462    struct btrfs_path *path2;
463    struct extent_buffer *eb;
464    struct btrfs_root *root;
465    struct backref_node *cur;
466    struct backref_node *upper;
467    struct backref_node *lower;
468    struct backref_node *node = NULL;
469    struct backref_node *exist = NULL;
470    struct backref_edge *edge;
471    struct rb_node *rb_node;
472    struct btrfs_key key;
473    unsigned long end;
474    unsigned long ptr;
475    LIST_HEAD(list);
476    int ret;
477    int err = 0;
478
479    path1 = btrfs_alloc_path();
480    path2 = btrfs_alloc_path();
481    if (!path1 || !path2) {
482        err = -ENOMEM;
483        goto out;
484    }
485
486    node = kmalloc(sizeof(*node), GFP_NOFS);
487    if (!node) {
488        err = -ENOMEM;
489        goto out;
490    }
491
492    backref_node_init(node);
493    node->bytenr = bytenr;
494    node->owner = 0;
495    node->level = level;
496    node->lowest = 1;
497    cur = node;
498again:
499    end = 0;
500    ptr = 0;
501    key.objectid = cur->bytenr;
502    key.type = BTRFS_EXTENT_ITEM_KEY;
503    key.offset = (u64)-1;
504
505    path1->search_commit_root = 1;
506    path1->skip_locking = 1;
507    ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
508                0, 0);
509    if (ret < 0) {
510        err = ret;
511        goto out;
512    }
513    BUG_ON(!ret || !path1->slots[0]);
514
515    path1->slots[0]--;
516
517    WARN_ON(cur->checked);
518    if (!list_empty(&cur->upper)) {
519        /*
520         * the backref was added previously when processsing
521         * backref of type BTRFS_TREE_BLOCK_REF_KEY
522         */
523        BUG_ON(!list_is_singular(&cur->upper));
524        edge = list_entry(cur->upper.next, struct backref_edge,
525                  list[LOWER]);
526        BUG_ON(!list_empty(&edge->list[UPPER]));
527        exist = edge->node[UPPER];
528        /*
529         * add the upper level block to pending list if we need
530         * check its backrefs
531         */
532        if (!exist->checked)
533            list_add_tail(&edge->list[UPPER], &list);
534    } else {
535        exist = NULL;
536    }
537
538    while (1) {
539        cond_resched();
540        eb = path1->nodes[0];
541
542        if (ptr >= end) {
543            if (path1->slots[0] >= btrfs_header_nritems(eb)) {
544                ret = btrfs_next_leaf(rc->extent_root, path1);
545                if (ret < 0) {
546                    err = ret;
547                    goto out;
548                }
549                if (ret > 0)
550                    break;
551                eb = path1->nodes[0];
552            }
553
554            btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
555            if (key.objectid != cur->bytenr) {
556                WARN_ON(exist);
557                break;
558            }
559
560            if (key.type == BTRFS_EXTENT_ITEM_KEY) {
561                ret = find_inline_backref(eb, path1->slots[0],
562                              &ptr, &end);
563                if (ret)
564                    goto next;
565            }
566        }
567
568        if (ptr < end) {
569            /* update key for inline back ref */
570            struct btrfs_extent_inline_ref *iref;
571            iref = (struct btrfs_extent_inline_ref *)ptr;
572            key.type = btrfs_extent_inline_ref_type(eb, iref);
573            key.offset = btrfs_extent_inline_ref_offset(eb, iref);
574            WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
575                key.type != BTRFS_SHARED_BLOCK_REF_KEY);
576        }
577
578        if (exist &&
579            ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
580              exist->owner == key.offset) ||
581             (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
582              exist->bytenr == key.offset))) {
583            exist = NULL;
584            goto next;
585        }
586
587#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
588        if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
589            key.type == BTRFS_EXTENT_REF_V0_KEY) {
590            if (key.objectid == key.offset &&
591                key.type == BTRFS_EXTENT_REF_V0_KEY) {
592                struct btrfs_extent_ref_v0 *ref0;
593                ref0 = btrfs_item_ptr(eb, path1->slots[0],
594                        struct btrfs_extent_ref_v0);
595                root = find_tree_root(rc, eb, ref0);
596                if (root)
597                    cur->root = root;
598                else
599                    cur->old_root = 1;
600                break;
601            }
602#else
603        BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
604        if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
605#endif
606            if (key.objectid == key.offset) {
607                /*
608                 * only root blocks of reloc trees use
609                 * backref of this type.
610                 */
611                root = find_reloc_root(rc, cur->bytenr);
612                BUG_ON(!root);
613                cur->root = root;
614                break;
615            }
616
617            edge = kzalloc(sizeof(*edge), GFP_NOFS);
618            if (!edge) {
619                err = -ENOMEM;
620                goto out;
621            }
622            rb_node = tree_search(&cache->rb_root, key.offset);
623            if (!rb_node) {
624                upper = kmalloc(sizeof(*upper), GFP_NOFS);
625                if (!upper) {
626                    kfree(edge);
627                    err = -ENOMEM;
628                    goto out;
629                }
630                backref_node_init(upper);
631                upper->bytenr = key.offset;
632                upper->owner = 0;
633                upper->level = cur->level + 1;
634                /*
635                 * backrefs for the upper level block isn't
636                 * cached, add the block to pending list
637                 */
638                list_add_tail(&edge->list[UPPER], &list);
639            } else {
640                upper = rb_entry(rb_node, struct backref_node,
641                         rb_node);
642                INIT_LIST_HEAD(&edge->list[UPPER]);
643            }
644            list_add(&edge->list[LOWER], &cur->upper);
645            edge->node[UPPER] = upper;
646            edge->node[LOWER] = cur;
647
648            goto next;
649        } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
650            goto next;
651        }
652
653        /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
654        root = read_fs_root(rc->extent_root->fs_info, key.offset);
655        if (IS_ERR(root)) {
656            err = PTR_ERR(root);
657            goto out;
658        }
659
660        if (btrfs_root_level(&root->root_item) == cur->level) {
661            /* tree root */
662            BUG_ON(btrfs_root_bytenr(&root->root_item) !=
663                   cur->bytenr);
664            cur->root = root;
665            break;
666        }
667
668        level = cur->level + 1;
669
670        /*
671         * searching the tree to find upper level blocks
672         * reference the block.
673         */
674        path2->search_commit_root = 1;
675        path2->skip_locking = 1;
676        path2->lowest_level = level;
677        ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
678        path2->lowest_level = 0;
679        if (ret < 0) {
680            err = ret;
681            goto out;
682        }
683        if (ret > 0 && path2->slots[level] > 0)
684            path2->slots[level]--;
685
686        eb = path2->nodes[level];
687        WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
688            cur->bytenr);
689
690        lower = cur;
691        for (; level < BTRFS_MAX_LEVEL; level++) {
692            if (!path2->nodes[level]) {
693                BUG_ON(btrfs_root_bytenr(&root->root_item) !=
694                       lower->bytenr);
695                lower->root = root;
696                break;
697            }
698
699            edge = kzalloc(sizeof(*edge), GFP_NOFS);
700            if (!edge) {
701                err = -ENOMEM;
702                goto out;
703            }
704
705            eb = path2->nodes[level];
706            rb_node = tree_search(&cache->rb_root, eb->start);
707            if (!rb_node) {
708                upper = kmalloc(sizeof(*upper), GFP_NOFS);
709                if (!upper) {
710                    kfree(edge);
711                    err = -ENOMEM;
712                    goto out;
713                }
714                backref_node_init(upper);
715                upper->bytenr = eb->start;
716                upper->owner = btrfs_header_owner(eb);
717                upper->level = lower->level + 1;
718
719                /*
720                 * if we know the block isn't shared
721                 * we can void checking its backrefs.
722                 */
723                if (btrfs_block_can_be_shared(root, eb))
724                    upper->checked = 0;
725                else
726                    upper->checked = 1;
727
728                /*
729                 * add the block to pending list if we
730                 * need check its backrefs. only block
731                 * at 'cur->level + 1' is added to the
732                 * tail of pending list. this guarantees
733                 * we check backrefs from lower level
734                 * blocks to upper level blocks.
735                 */
736                if (!upper->checked &&
737                    level == cur->level + 1) {
738                    list_add_tail(&edge->list[UPPER],
739                              &list);
740                } else
741                    INIT_LIST_HEAD(&edge->list[UPPER]);
742            } else {
743                upper = rb_entry(rb_node, struct backref_node,
744                         rb_node);
745                BUG_ON(!upper->checked);
746                INIT_LIST_HEAD(&edge->list[UPPER]);
747            }
748            list_add_tail(&edge->list[LOWER], &lower->upper);
749            edge->node[UPPER] = upper;
750            edge->node[LOWER] = lower;
751
752            if (rb_node)
753                break;
754            lower = upper;
755            upper = NULL;
756        }
757        btrfs_release_path(root, path2);
758next:
759        if (ptr < end) {
760            ptr += btrfs_extent_inline_ref_size(key.type);
761            if (ptr >= end) {
762                WARN_ON(ptr > end);
763                ptr = 0;
764                end = 0;
765            }
766        }
767        if (ptr >= end)
768            path1->slots[0]++;
769    }
770    btrfs_release_path(rc->extent_root, path1);
771
772    cur->checked = 1;
773    WARN_ON(exist);
774
775    /* the pending list isn't empty, take the first block to process */
776    if (!list_empty(&list)) {
777        edge = list_entry(list.next, struct backref_edge, list[UPPER]);
778        list_del_init(&edge->list[UPPER]);
779        cur = edge->node[UPPER];
780        goto again;
781    }
782
783    /*
784     * everything goes well, connect backref nodes and insert backref nodes
785     * into the cache.
786     */
787    BUG_ON(!node->checked);
788    rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
789    BUG_ON(rb_node);
790
791    list_for_each_entry(edge, &node->upper, list[LOWER])
792        list_add_tail(&edge->list[UPPER], &list);
793
794    while (!list_empty(&list)) {
795        edge = list_entry(list.next, struct backref_edge, list[UPPER]);
796        list_del_init(&edge->list[UPPER]);
797        upper = edge->node[UPPER];
798
799        if (!RB_EMPTY_NODE(&upper->rb_node)) {
800            if (upper->lowest) {
801                list_del_init(&upper->lower);
802                upper->lowest = 0;
803            }
804
805            list_add_tail(&edge->list[UPPER], &upper->lower);
806            continue;
807        }
808
809        BUG_ON(!upper->checked);
810        rb_node = tree_insert(&cache->rb_root, upper->bytenr,
811                      &upper->rb_node);
812        BUG_ON(rb_node);
813
814        list_add_tail(&edge->list[UPPER], &upper->lower);
815
816        list_for_each_entry(edge, &upper->upper, list[LOWER])
817            list_add_tail(&edge->list[UPPER], &list);
818    }
819out:
820    btrfs_free_path(path1);
821    btrfs_free_path(path2);
822    if (err) {
823        INIT_LIST_HEAD(&list);
824        upper = node;
825        while (upper) {
826            if (RB_EMPTY_NODE(&upper->rb_node)) {
827                list_splice_tail(&upper->upper, &list);
828                kfree(upper);
829            }
830
831            if (list_empty(&list))
832                break;
833
834            edge = list_entry(list.next, struct backref_edge,
835                      list[LOWER]);
836            upper = edge->node[UPPER];
837            kfree(edge);
838        }
839        return ERR_PTR(err);
840    }
841    return node;
842}
843
844/*
845 * helper to add 'address of tree root -> reloc tree' mapping
846 */
847static int __add_reloc_root(struct btrfs_root *root)
848{
849    struct rb_node *rb_node;
850    struct mapping_node *node;
851    struct reloc_control *rc = root->fs_info->reloc_ctl;
852
853    node = kmalloc(sizeof(*node), GFP_NOFS);
854    BUG_ON(!node);
855
856    node->bytenr = root->node->start;
857    node->data = root;
858
859    spin_lock(&rc->reloc_root_tree.lock);
860    rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
861                  node->bytenr, &node->rb_node);
862    spin_unlock(&rc->reloc_root_tree.lock);
863    BUG_ON(rb_node);
864
865    list_add_tail(&root->root_list, &rc->reloc_roots);
866    return 0;
867}
868
869/*
870 * helper to update/delete the 'address of tree root -> reloc tree'
871 * mapping
872 */
873static int __update_reloc_root(struct btrfs_root *root, int del)
874{
875    struct rb_node *rb_node;
876    struct mapping_node *node = NULL;
877    struct reloc_control *rc = root->fs_info->reloc_ctl;
878
879    spin_lock(&rc->reloc_root_tree.lock);
880    rb_node = tree_search(&rc->reloc_root_tree.rb_root,
881                  root->commit_root->start);
882    if (rb_node) {
883        node = rb_entry(rb_node, struct mapping_node, rb_node);
884        rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
885    }
886    spin_unlock(&rc->reloc_root_tree.lock);
887
888    BUG_ON((struct btrfs_root *)node->data != root);
889
890    if (!del) {
891        spin_lock(&rc->reloc_root_tree.lock);
892        node->bytenr = root->node->start;
893        rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
894                      node->bytenr, &node->rb_node);
895        spin_unlock(&rc->reloc_root_tree.lock);
896        BUG_ON(rb_node);
897    } else {
898        list_del_init(&root->root_list);
899        kfree(node);
900    }
901    return 0;
902}
903
904/*
905 * create reloc tree for a given fs tree. reloc tree is just a
906 * snapshot of the fs tree with special root objectid.
907 */
908int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
909              struct btrfs_root *root)
910{
911    struct btrfs_root *reloc_root;
912    struct extent_buffer *eb;
913    struct btrfs_root_item *root_item;
914    struct btrfs_key root_key;
915    int ret;
916
917    if (root->reloc_root) {
918        reloc_root = root->reloc_root;
919        reloc_root->last_trans = trans->transid;
920        return 0;
921    }
922
923    if (!root->fs_info->reloc_ctl ||
924        !root->fs_info->reloc_ctl->create_reloc_root ||
925        root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
926        return 0;
927
928    root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
929    BUG_ON(!root_item);
930
931    root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
932    root_key.type = BTRFS_ROOT_ITEM_KEY;
933    root_key.offset = root->root_key.objectid;
934
935    ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
936                  BTRFS_TREE_RELOC_OBJECTID);
937    BUG_ON(ret);
938
939    btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
940    memcpy(root_item, &root->root_item, sizeof(*root_item));
941    btrfs_set_root_refs(root_item, 1);
942    btrfs_set_root_bytenr(root_item, eb->start);
943    btrfs_set_root_level(root_item, btrfs_header_level(eb));
944    btrfs_set_root_generation(root_item, trans->transid);
945    memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
946    root_item->drop_level = 0;
947
948    btrfs_tree_unlock(eb);
949    free_extent_buffer(eb);
950
951    ret = btrfs_insert_root(trans, root->fs_info->tree_root,
952                &root_key, root_item);
953    BUG_ON(ret);
954    kfree(root_item);
955
956    reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
957                         &root_key);
958    BUG_ON(IS_ERR(reloc_root));
959    reloc_root->last_trans = trans->transid;
960
961    __add_reloc_root(reloc_root);
962    root->reloc_root = reloc_root;
963    return 0;
964}
965
966/*
967 * update root item of reloc tree
968 */
969int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
970                struct btrfs_root *root)
971{
972    struct btrfs_root *reloc_root;
973    struct btrfs_root_item *root_item;
974    int del = 0;
975    int ret;
976
977    if (!root->reloc_root)
978        return 0;
979
980    reloc_root = root->reloc_root;
981    root_item = &reloc_root->root_item;
982
983    if (btrfs_root_refs(root_item) == 0) {
984        root->reloc_root = NULL;
985        del = 1;
986    }
987
988    __update_reloc_root(reloc_root, del);
989
990    if (reloc_root->commit_root != reloc_root->node) {
991        btrfs_set_root_node(root_item, reloc_root->node);
992        free_extent_buffer(reloc_root->commit_root);
993        reloc_root->commit_root = btrfs_root_node(reloc_root);
994    }
995
996    ret = btrfs_update_root(trans, root->fs_info->tree_root,
997                &reloc_root->root_key, root_item);
998    BUG_ON(ret);
999    return 0;
1000}
1001
1002/*
1003 * helper to find first cached inode with inode number >= objectid
1004 * in a subvolume
1005 */
1006static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1007{
1008    struct rb_node *node;
1009    struct rb_node *prev;
1010    struct btrfs_inode *entry;
1011    struct inode *inode;
1012
1013    spin_lock(&root->inode_lock);
1014again:
1015    node = root->inode_tree.rb_node;
1016    prev = NULL;
1017    while (node) {
1018        prev = node;
1019        entry = rb_entry(node, struct btrfs_inode, rb_node);
1020
1021        if (objectid < entry->vfs_inode.i_ino)
1022            node = node->rb_left;
1023        else if (objectid > entry->vfs_inode.i_ino)
1024            node = node->rb_right;
1025        else
1026            break;
1027    }
1028    if (!node) {
1029        while (prev) {
1030            entry = rb_entry(prev, struct btrfs_inode, rb_node);
1031            if (objectid <= entry->vfs_inode.i_ino) {
1032                node = prev;
1033                break;
1034            }
1035            prev = rb_next(prev);
1036        }
1037    }
1038    while (node) {
1039        entry = rb_entry(node, struct btrfs_inode, rb_node);
1040        inode = igrab(&entry->vfs_inode);
1041        if (inode) {
1042            spin_unlock(&root->inode_lock);
1043            return inode;
1044        }
1045
1046        objectid = entry->vfs_inode.i_ino + 1;
1047        if (cond_resched_lock(&root->inode_lock))
1048            goto again;
1049
1050        node = rb_next(node);
1051    }
1052    spin_unlock(&root->inode_lock);
1053    return NULL;
1054}
1055
1056static int in_block_group(u64 bytenr,
1057              struct btrfs_block_group_cache *block_group)
1058{
1059    if (bytenr >= block_group->key.objectid &&
1060        bytenr < block_group->key.objectid + block_group->key.offset)
1061        return 1;
1062    return 0;
1063}
1064
1065/*
1066 * get new location of data
1067 */
1068static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1069                u64 bytenr, u64 num_bytes)
1070{
1071    struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1072    struct btrfs_path *path;
1073    struct btrfs_file_extent_item *fi;
1074    struct extent_buffer *leaf;
1075    int ret;
1076
1077    path = btrfs_alloc_path();
1078    if (!path)
1079        return -ENOMEM;
1080
1081    bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1082    ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1083                       bytenr, 0);
1084    if (ret < 0)
1085        goto out;
1086    if (ret > 0) {
1087        ret = -ENOENT;
1088        goto out;
1089    }
1090
1091    leaf = path->nodes[0];
1092    fi = btrfs_item_ptr(leaf, path->slots[0],
1093                struct btrfs_file_extent_item);
1094
1095    BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1096           btrfs_file_extent_compression(leaf, fi) ||
1097           btrfs_file_extent_encryption(leaf, fi) ||
1098           btrfs_file_extent_other_encoding(leaf, fi));
1099
1100    if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1101        ret = 1;
1102        goto out;
1103    }
1104
1105    if (new_bytenr)
1106        *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1107    ret = 0;
1108out:
1109    btrfs_free_path(path);
1110    return ret;
1111}
1112
1113/*
1114 * update file extent items in the tree leaf to point to
1115 * the new locations.
1116 */
1117static int replace_file_extents(struct btrfs_trans_handle *trans,
1118                struct reloc_control *rc,
1119                struct btrfs_root *root,
1120                struct extent_buffer *leaf,
1121                struct list_head *inode_list)
1122{
1123    struct btrfs_key key;
1124    struct btrfs_file_extent_item *fi;
1125    struct inode *inode = NULL;
1126    struct inodevec *ivec = NULL;
1127    u64 parent;
1128    u64 bytenr;
1129    u64 new_bytenr;
1130    u64 num_bytes;
1131    u64 end;
1132    u32 nritems;
1133    u32 i;
1134    int ret;
1135    int first = 1;
1136    int dirty = 0;
1137
1138    if (rc->stage != UPDATE_DATA_PTRS)
1139        return 0;
1140
1141    /* reloc trees always use full backref */
1142    if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1143        parent = leaf->start;
1144    else
1145        parent = 0;
1146
1147    nritems = btrfs_header_nritems(leaf);
1148    for (i = 0; i < nritems; i++) {
1149        cond_resched();
1150        btrfs_item_key_to_cpu(leaf, &key, i);
1151        if (key.type != BTRFS_EXTENT_DATA_KEY)
1152            continue;
1153        fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1154        if (btrfs_file_extent_type(leaf, fi) ==
1155            BTRFS_FILE_EXTENT_INLINE)
1156            continue;
1157        bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1158        num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1159        if (bytenr == 0)
1160            continue;
1161        if (!in_block_group(bytenr, rc->block_group))
1162            continue;
1163
1164        /*
1165         * if we are modifying block in fs tree, wait for readpage
1166         * to complete and drop the extent cache
1167         */
1168        if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1169            if (!ivec || ivec->nr == INODEVEC_SIZE) {
1170                ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
1171                BUG_ON(!ivec);
1172                ivec->nr = 0;
1173                list_add_tail(&ivec->list, inode_list);
1174            }
1175            if (first) {
1176                inode = find_next_inode(root, key.objectid);
1177                if (inode)
1178                    ivec->inode[ivec->nr++] = inode;
1179                first = 0;
1180            } else if (inode && inode->i_ino < key.objectid) {
1181                inode = find_next_inode(root, key.objectid);
1182                if (inode)
1183                    ivec->inode[ivec->nr++] = inode;
1184            }
1185            if (inode && inode->i_ino == key.objectid) {
1186                end = key.offset +
1187                      btrfs_file_extent_num_bytes(leaf, fi);
1188                WARN_ON(!IS_ALIGNED(key.offset,
1189                            root->sectorsize));
1190                WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1191                end--;
1192                ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1193                              key.offset, end,
1194                              GFP_NOFS);
1195                if (!ret)
1196                    continue;
1197
1198                btrfs_drop_extent_cache(inode, key.offset, end,
1199                            1);
1200                unlock_extent(&BTRFS_I(inode)->io_tree,
1201                          key.offset, end, GFP_NOFS);
1202            }
1203        }
1204
1205        ret = get_new_location(rc->data_inode, &new_bytenr,
1206                       bytenr, num_bytes);
1207        if (ret > 0)
1208            continue;
1209        BUG_ON(ret < 0);
1210
1211        btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1212        dirty = 1;
1213
1214        key.offset -= btrfs_file_extent_offset(leaf, fi);
1215        ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1216                       num_bytes, parent,
1217                       btrfs_header_owner(leaf),
1218                       key.objectid, key.offset);
1219        BUG_ON(ret);
1220
1221        ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1222                    parent, btrfs_header_owner(leaf),
1223                    key.objectid, key.offset);
1224        BUG_ON(ret);
1225    }
1226    if (dirty)
1227        btrfs_mark_buffer_dirty(leaf);
1228    return 0;
1229}
1230
1231static noinline_for_stack
1232int memcmp_node_keys(struct extent_buffer *eb, int slot,
1233             struct btrfs_path *path, int level)
1234{
1235    struct btrfs_disk_key key1;
1236    struct btrfs_disk_key key2;
1237    btrfs_node_key(eb, &key1, slot);
1238    btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1239    return memcmp(&key1, &key2, sizeof(key1));
1240}
1241
1242/*
1243 * try to replace tree blocks in fs tree with the new blocks
1244 * in reloc tree. tree blocks haven't been modified since the
1245 * reloc tree was create can be replaced.
1246 *
1247 * if a block was replaced, level of the block + 1 is returned.
1248 * if no block got replaced, 0 is returned. if there are other
1249 * errors, a negative error number is returned.
1250 */
1251static int replace_path(struct btrfs_trans_handle *trans,
1252            struct btrfs_root *dest, struct btrfs_root *src,
1253            struct btrfs_path *path, struct btrfs_key *next_key,
1254            struct extent_buffer **leaf,
1255            int lowest_level, int max_level)
1256{
1257    struct extent_buffer *eb;
1258    struct extent_buffer *parent;
1259    struct btrfs_key key;
1260    u64 old_bytenr;
1261    u64 new_bytenr;
1262    u64 old_ptr_gen;
1263    u64 new_ptr_gen;
1264    u64 last_snapshot;
1265    u32 blocksize;
1266    int level;
1267    int ret;
1268    int slot;
1269
1270    BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1271    BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1272    BUG_ON(lowest_level > 1 && leaf);
1273
1274    last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1275
1276    slot = path->slots[lowest_level];
1277    btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1278
1279    eb = btrfs_lock_root_node(dest);
1280    btrfs_set_lock_blocking(eb);
1281    level = btrfs_header_level(eb);
1282
1283    if (level < lowest_level) {
1284        btrfs_tree_unlock(eb);
1285        free_extent_buffer(eb);
1286        return 0;
1287    }
1288
1289    ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1290    BUG_ON(ret);
1291    btrfs_set_lock_blocking(eb);
1292
1293    if (next_key) {
1294        next_key->objectid = (u64)-1;
1295        next_key->type = (u8)-1;
1296        next_key->offset = (u64)-1;
1297    }
1298
1299    parent = eb;
1300    while (1) {
1301        level = btrfs_header_level(parent);
1302        BUG_ON(level < lowest_level);
1303
1304        ret = btrfs_bin_search(parent, &key, level, &slot);
1305        if (ret && slot > 0)
1306            slot--;
1307
1308        if (next_key && slot + 1 < btrfs_header_nritems(parent))
1309            btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1310
1311        old_bytenr = btrfs_node_blockptr(parent, slot);
1312        blocksize = btrfs_level_size(dest, level - 1);
1313        old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1314
1315        if (level <= max_level) {
1316            eb = path->nodes[level];
1317            new_bytenr = btrfs_node_blockptr(eb,
1318                            path->slots[level]);
1319            new_ptr_gen = btrfs_node_ptr_generation(eb,
1320                            path->slots[level]);
1321        } else {
1322            new_bytenr = 0;
1323            new_ptr_gen = 0;
1324        }
1325
1326        if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1327            WARN_ON(1);
1328            ret = level;
1329            break;
1330        }
1331
1332        if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1333            memcmp_node_keys(parent, slot, path, level)) {
1334            if (level <= lowest_level && !leaf) {
1335                ret = 0;
1336                break;
1337            }
1338
1339            eb = read_tree_block(dest, old_bytenr, blocksize,
1340                         old_ptr_gen);
1341            btrfs_tree_lock(eb);
1342            ret = btrfs_cow_block(trans, dest, eb, parent,
1343                          slot, &eb);
1344            BUG_ON(ret);
1345            btrfs_set_lock_blocking(eb);
1346
1347            if (level <= lowest_level) {
1348                *leaf = eb;
1349                ret = 0;
1350                break;
1351            }
1352
1353            btrfs_tree_unlock(parent);
1354            free_extent_buffer(parent);
1355
1356            parent = eb;
1357            continue;
1358        }
1359
1360        btrfs_node_key_to_cpu(path->nodes[level], &key,
1361                      path->slots[level]);
1362        btrfs_release_path(src, path);
1363
1364        path->lowest_level = level;
1365        ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1366        path->lowest_level = 0;
1367        BUG_ON(ret);
1368
1369        /*
1370         * swap blocks in fs tree and reloc tree.
1371         */
1372        btrfs_set_node_blockptr(parent, slot, new_bytenr);
1373        btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1374        btrfs_mark_buffer_dirty(parent);
1375
1376        btrfs_set_node_blockptr(path->nodes[level],
1377                    path->slots[level], old_bytenr);
1378        btrfs_set_node_ptr_generation(path->nodes[level],
1379                          path->slots[level], old_ptr_gen);
1380        btrfs_mark_buffer_dirty(path->nodes[level]);
1381
1382        ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1383                    path->nodes[level]->start,
1384                    src->root_key.objectid, level - 1, 0);
1385        BUG_ON(ret);
1386        ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1387                    0, dest->root_key.objectid, level - 1,
1388                    0);
1389        BUG_ON(ret);
1390
1391        ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1392                    path->nodes[level]->start,
1393                    src->root_key.objectid, level - 1, 0);
1394        BUG_ON(ret);
1395
1396        ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1397                    0, dest->root_key.objectid, level - 1,
1398                    0);
1399        BUG_ON(ret);
1400
1401        btrfs_unlock_up_safe(path, 0);
1402
1403        ret = level;
1404        break;
1405    }
1406    btrfs_tree_unlock(parent);
1407    free_extent_buffer(parent);
1408    return ret;
1409}
1410
1411/*
1412 * helper to find next relocated block in reloc tree
1413 */
1414static noinline_for_stack
1415int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1416               int *level)
1417{
1418    struct extent_buffer *eb;
1419    int i;
1420    u64 last_snapshot;
1421    u32 nritems;
1422
1423    last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1424
1425    for (i = 0; i < *level; i++) {
1426        free_extent_buffer(path->nodes[i]);
1427        path->nodes[i] = NULL;
1428    }
1429
1430    for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1431        eb = path->nodes[i];
1432        nritems = btrfs_header_nritems(eb);
1433        while (path->slots[i] + 1 < nritems) {
1434            path->slots[i]++;
1435            if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1436                last_snapshot)
1437                continue;
1438
1439            *level = i;
1440            return 0;
1441        }
1442        free_extent_buffer(path->nodes[i]);
1443        path->nodes[i] = NULL;
1444    }
1445    return 1;
1446}
1447
1448/*
1449 * walk down reloc tree to find relocated block of lowest level
1450 */
1451static noinline_for_stack
1452int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1453             int *level)
1454{
1455    struct extent_buffer *eb = NULL;
1456    int i;
1457    u64 bytenr;
1458    u64 ptr_gen = 0;
1459    u64 last_snapshot;
1460    u32 blocksize;
1461    u32 nritems;
1462
1463    last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1464
1465    for (i = *level; i > 0; i--) {
1466        eb = path->nodes[i];
1467        nritems = btrfs_header_nritems(eb);
1468        while (path->slots[i] < nritems) {
1469            ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1470            if (ptr_gen > last_snapshot)
1471                break;
1472            path->slots[i]++;
1473        }
1474        if (path->slots[i] >= nritems) {
1475            if (i == *level)
1476                break;
1477            *level = i + 1;
1478            return 0;
1479        }
1480        if (i == 1) {
1481            *level = i;
1482            return 0;
1483        }
1484
1485        bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1486        blocksize = btrfs_level_size(root, i - 1);
1487        eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1488        BUG_ON(btrfs_header_level(eb) != i - 1);
1489        path->nodes[i - 1] = eb;
1490        path->slots[i - 1] = 0;
1491    }
1492    return 1;
1493}
1494
1495/*
1496 * invalidate extent cache for file extents whose key in range of
1497 * [min_key, max_key)
1498 */
1499static int invalidate_extent_cache(struct btrfs_root *root,
1500                   struct btrfs_key *min_key,
1501                   struct btrfs_key *max_key)
1502{
1503    struct inode *inode = NULL;
1504    u64 objectid;
1505    u64 start, end;
1506
1507    objectid = min_key->objectid;
1508    while (1) {
1509        cond_resched();
1510        iput(inode);
1511
1512        if (objectid > max_key->objectid)
1513            break;
1514
1515        inode = find_next_inode(root, objectid);
1516        if (!inode)
1517            break;
1518
1519        if (inode->i_ino > max_key->objectid) {
1520            iput(inode);
1521            break;
1522        }
1523
1524        objectid = inode->i_ino + 1;
1525        if (!S_ISREG(inode->i_mode))
1526            continue;
1527
1528        if (unlikely(min_key->objectid == inode->i_ino)) {
1529            if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1530                continue;
1531            if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1532                start = 0;
1533            else {
1534                start = min_key->offset;
1535                WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1536            }
1537        } else {
1538            start = 0;
1539        }
1540
1541        if (unlikely(max_key->objectid == inode->i_ino)) {
1542            if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1543                continue;
1544            if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1545                end = (u64)-1;
1546            } else {
1547                if (max_key->offset == 0)
1548                    continue;
1549                end = max_key->offset;
1550                WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1551                end--;
1552            }
1553        } else {
1554            end = (u64)-1;
1555        }
1556
1557        /* the lock_extent waits for readpage to complete */
1558        lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1559        btrfs_drop_extent_cache(inode, start, end, 1);
1560        unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1561    }
1562    return 0;
1563}
1564
1565static void put_inodes(struct list_head *list)
1566{
1567    struct inodevec *ivec;
1568    while (!list_empty(list)) {
1569        ivec = list_entry(list->next, struct inodevec, list);
1570        list_del(&ivec->list);
1571        while (ivec->nr > 0) {
1572            ivec->nr--;
1573            iput(ivec->inode[ivec->nr]);
1574        }
1575        kfree(ivec);
1576    }
1577}
1578
1579static int find_next_key(struct btrfs_path *path, int level,
1580             struct btrfs_key *key)
1581
1582{
1583    while (level < BTRFS_MAX_LEVEL) {
1584        if (!path->nodes[level])
1585            break;
1586        if (path->slots[level] + 1 <
1587            btrfs_header_nritems(path->nodes[level])) {
1588            btrfs_node_key_to_cpu(path->nodes[level], key,
1589                          path->slots[level] + 1);
1590            return 0;
1591        }
1592        level++;
1593    }
1594    return 1;
1595}
1596
1597/*
1598 * merge the relocated tree blocks in reloc tree with corresponding
1599 * fs tree.
1600 */
1601static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1602                           struct btrfs_root *root)
1603{
1604    LIST_HEAD(inode_list);
1605    struct btrfs_key key;
1606    struct btrfs_key next_key;
1607    struct btrfs_trans_handle *trans;
1608    struct btrfs_root *reloc_root;
1609    struct btrfs_root_item *root_item;
1610    struct btrfs_path *path;
1611    struct extent_buffer *leaf = NULL;
1612    unsigned long nr;
1613    int level;
1614    int max_level;
1615    int replaced = 0;
1616    int ret;
1617    int err = 0;
1618
1619    path = btrfs_alloc_path();
1620    if (!path)
1621        return -ENOMEM;
1622
1623    reloc_root = root->reloc_root;
1624    root_item = &reloc_root->root_item;
1625
1626    if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1627        level = btrfs_root_level(root_item);
1628        extent_buffer_get(reloc_root->node);
1629        path->nodes[level] = reloc_root->node;
1630        path->slots[level] = 0;
1631    } else {
1632        btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1633
1634        level = root_item->drop_level;
1635        BUG_ON(level == 0);
1636        path->lowest_level = level;
1637        ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1638        path->lowest_level = 0;
1639        if (ret < 0) {
1640            btrfs_free_path(path);
1641            return ret;
1642        }
1643
1644        btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1645                      path->slots[level]);
1646        WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1647
1648        btrfs_unlock_up_safe(path, 0);
1649    }
1650
1651    if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
1652        trans = btrfs_start_transaction(root, 1);
1653
1654        leaf = path->nodes[0];
1655        btrfs_item_key_to_cpu(leaf, &key, 0);
1656        btrfs_release_path(reloc_root, path);
1657
1658        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1659        if (ret < 0) {
1660            err = ret;
1661            goto out;
1662        }
1663
1664        leaf = path->nodes[0];
1665        btrfs_unlock_up_safe(path, 1);
1666        ret = replace_file_extents(trans, rc, root, leaf,
1667                       &inode_list);
1668        if (ret < 0)
1669            err = ret;
1670        goto out;
1671    }
1672
1673    memset(&next_key, 0, sizeof(next_key));
1674
1675    while (1) {
1676        leaf = NULL;
1677        replaced = 0;
1678        trans = btrfs_start_transaction(root, 1);
1679        max_level = level;
1680
1681        ret = walk_down_reloc_tree(reloc_root, path, &level);
1682        if (ret < 0) {
1683            err = ret;
1684            goto out;
1685        }
1686        if (ret > 0)
1687            break;
1688
1689        if (!find_next_key(path, level, &key) &&
1690            btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1691            ret = 0;
1692        } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
1693            ret = replace_path(trans, root, reloc_root,
1694                       path, &next_key, &leaf,
1695                       level, max_level);
1696        } else {
1697            ret = replace_path(trans, root, reloc_root,
1698                       path, &next_key, NULL,
1699                       level, max_level);
1700        }
1701        if (ret < 0) {
1702            err = ret;
1703            goto out;
1704        }
1705
1706        if (ret > 0) {
1707            level = ret;
1708            btrfs_node_key_to_cpu(path->nodes[level], &key,
1709                          path->slots[level]);
1710            replaced = 1;
1711        } else if (leaf) {
1712            /*
1713             * no block got replaced, try replacing file extents
1714             */
1715            btrfs_item_key_to_cpu(leaf, &key, 0);
1716            ret = replace_file_extents(trans, rc, root, leaf,
1717                           &inode_list);
1718            btrfs_tree_unlock(leaf);
1719            free_extent_buffer(leaf);
1720            BUG_ON(ret < 0);
1721        }
1722
1723        ret = walk_up_reloc_tree(reloc_root, path, &level);
1724        if (ret > 0)
1725            break;
1726
1727        BUG_ON(level == 0);
1728        /*
1729         * save the merging progress in the drop_progress.
1730         * this is OK since root refs == 1 in this case.
1731         */
1732        btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1733                   path->slots[level]);
1734        root_item->drop_level = level;
1735
1736        nr = trans->blocks_used;
1737        btrfs_end_transaction(trans, root);
1738
1739        btrfs_btree_balance_dirty(root, nr);
1740
1741        /*
1742         * put inodes outside transaction, otherwise we may deadlock.
1743         */
1744        put_inodes(&inode_list);
1745
1746        if (replaced && rc->stage == UPDATE_DATA_PTRS)
1747            invalidate_extent_cache(root, &key, &next_key);
1748    }
1749
1750    /*
1751     * handle the case only one block in the fs tree need to be
1752     * relocated and the block is tree root.
1753     */
1754    leaf = btrfs_lock_root_node(root);
1755    ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
1756    btrfs_tree_unlock(leaf);
1757    free_extent_buffer(leaf);
1758    if (ret < 0)
1759        err = ret;
1760out:
1761    btrfs_free_path(path);
1762
1763    if (err == 0) {
1764        memset(&root_item->drop_progress, 0,
1765               sizeof(root_item->drop_progress));
1766        root_item->drop_level = 0;
1767        btrfs_set_root_refs(root_item, 0);
1768    }
1769
1770    nr = trans->blocks_used;
1771    btrfs_end_transaction(trans, root);
1772
1773    btrfs_btree_balance_dirty(root, nr);
1774
1775    put_inodes(&inode_list);
1776
1777    if (replaced && rc->stage == UPDATE_DATA_PTRS)
1778        invalidate_extent_cache(root, &key, &next_key);
1779
1780    return err;
1781}
1782
1783/*
1784 * callback for the work threads.
1785 * this function merges reloc tree with corresponding fs tree,
1786 * and then drops the reloc tree.
1787 */
1788static void merge_func(struct btrfs_work *work)
1789{
1790    struct btrfs_trans_handle *trans;
1791    struct btrfs_root *root;
1792    struct btrfs_root *reloc_root;
1793    struct async_merge *async;
1794
1795    async = container_of(work, struct async_merge, work);
1796    reloc_root = async->root;
1797
1798    if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1799        root = read_fs_root(reloc_root->fs_info,
1800                    reloc_root->root_key.offset);
1801        BUG_ON(IS_ERR(root));
1802        BUG_ON(root->reloc_root != reloc_root);
1803
1804        merge_reloc_root(async->rc, root);
1805
1806        trans = btrfs_start_transaction(root, 1);
1807        btrfs_update_reloc_root(trans, root);
1808        btrfs_end_transaction(trans, root);
1809    }
1810
1811    btrfs_drop_snapshot(reloc_root, 0);
1812
1813    if (atomic_dec_and_test(async->num_pending))
1814        complete(async->done);
1815
1816    kfree(async);
1817}
1818
1819static int merge_reloc_roots(struct reloc_control *rc)
1820{
1821    struct async_merge *async;
1822    struct btrfs_root *root;
1823    struct completion done;
1824    atomic_t num_pending;
1825
1826    init_completion(&done);
1827    atomic_set(&num_pending, 1);
1828
1829    while (!list_empty(&rc->reloc_roots)) {
1830        root = list_entry(rc->reloc_roots.next,
1831                  struct btrfs_root, root_list);
1832        list_del_init(&root->root_list);
1833
1834        async = kmalloc(sizeof(*async), GFP_NOFS);
1835        BUG_ON(!async);
1836        async->work.func = merge_func;
1837        async->work.flags = 0;
1838        async->rc = rc;
1839        async->root = root;
1840        async->done = &done;
1841        async->num_pending = &num_pending;
1842        atomic_inc(&num_pending);
1843        btrfs_queue_worker(&rc->workers, &async->work);
1844    }
1845
1846    if (!atomic_dec_and_test(&num_pending))
1847        wait_for_completion(&done);
1848
1849    BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
1850    return 0;
1851}
1852
1853static void free_block_list(struct rb_root *blocks)
1854{
1855    struct tree_block *block;
1856    struct rb_node *rb_node;
1857    while ((rb_node = rb_first(blocks))) {
1858        block = rb_entry(rb_node, struct tree_block, rb_node);
1859        rb_erase(rb_node, blocks);
1860        kfree(block);
1861    }
1862}
1863
1864static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
1865                      struct btrfs_root *reloc_root)
1866{
1867    struct btrfs_root *root;
1868
1869    if (reloc_root->last_trans == trans->transid)
1870        return 0;
1871
1872    root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
1873    BUG_ON(IS_ERR(root));
1874    BUG_ON(root->reloc_root != reloc_root);
1875
1876    return btrfs_record_root_in_trans(trans, root);
1877}
1878
1879/*
1880 * select one tree from trees that references the block.
1881 * for blocks in refernce counted trees, we preper reloc tree.
1882 * if no reloc tree found and reloc_only is true, NULL is returned.
1883 */
1884static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
1885                        struct backref_node *node,
1886                        struct backref_edge *edges[],
1887                        int *nr, int reloc_only)
1888{
1889    struct backref_node *next;
1890    struct btrfs_root *root;
1891    int index;
1892    int loop = 0;
1893again:
1894    index = 0;
1895    next = node;
1896    while (1) {
1897        cond_resched();
1898        next = walk_up_backref(next, edges, &index);
1899        root = next->root;
1900        if (!root) {
1901            BUG_ON(!node->old_root);
1902            goto skip;
1903        }
1904
1905        /* no other choice for non-refernce counted tree */
1906        if (!root->ref_cows) {
1907            BUG_ON(reloc_only);
1908            break;
1909        }
1910
1911        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1912            record_reloc_root_in_trans(trans, root);
1913            break;
1914        }
1915
1916        if (loop) {
1917            btrfs_record_root_in_trans(trans, root);
1918            break;
1919        }
1920
1921        if (reloc_only || next != node) {
1922            if (!root->reloc_root)
1923                btrfs_record_root_in_trans(trans, root);
1924            root = root->reloc_root;
1925            /*
1926             * if the reloc tree was created in current
1927             * transation, there is no node in backref tree
1928             * corresponds to the root of the reloc tree.
1929             */
1930            if (btrfs_root_last_snapshot(&root->root_item) ==
1931                trans->transid - 1)
1932                break;
1933        }
1934skip:
1935        root = NULL;
1936        next = walk_down_backref(edges, &index);
1937        if (!next || next->level <= node->level)
1938            break;
1939    }
1940
1941    if (!root && !loop && !reloc_only) {
1942        loop = 1;
1943        goto again;
1944    }
1945
1946    if (root)
1947        *nr = index;
1948    else
1949        *nr = 0;
1950
1951    return root;
1952}
1953
1954static noinline_for_stack
1955struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
1956                   struct backref_node *node)
1957{
1958    struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1959    int nr;
1960    return __select_one_root(trans, node, edges, &nr, 0);
1961}
1962
1963static noinline_for_stack
1964struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
1965                     struct backref_node *node,
1966                     struct backref_edge *edges[], int *nr)
1967{
1968    return __select_one_root(trans, node, edges, nr, 1);
1969}
1970
1971static void grab_path_buffers(struct btrfs_path *path,
1972                  struct backref_node *node,
1973                  struct backref_edge *edges[], int nr)
1974{
1975    int i = 0;
1976    while (1) {
1977        drop_node_buffer(node);
1978        node->eb = path->nodes[node->level];
1979        BUG_ON(!node->eb);
1980        if (path->locks[node->level])
1981            node->locked = 1;
1982        path->nodes[node->level] = NULL;
1983        path->locks[node->level] = 0;
1984
1985        if (i >= nr)
1986            break;
1987
1988        edges[i]->blockptr = node->eb->start;
1989        node = edges[i]->node[UPPER];
1990        i++;
1991    }
1992}
1993
1994/*
1995 * relocate a block tree, and then update pointers in upper level
1996 * blocks that reference the block to point to the new location.
1997 *
1998 * if called by link_to_upper, the block has already been relocated.
1999 * in that case this function just updates pointers.
2000 */
2001static int do_relocation(struct btrfs_trans_handle *trans,
2002             struct backref_node *node,
2003             struct btrfs_key *key,
2004             struct btrfs_path *path, int lowest)
2005{
2006    struct backref_node *upper;
2007    struct backref_edge *edge;
2008    struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2009    struct btrfs_root *root;
2010    struct extent_buffer *eb;
2011    u32 blocksize;
2012    u64 bytenr;
2013    u64 generation;
2014    int nr;
2015    int slot;
2016    int ret;
2017    int err = 0;
2018
2019    BUG_ON(lowest && node->eb);
2020
2021    path->lowest_level = node->level + 1;
2022    list_for_each_entry(edge, &node->upper, list[LOWER]) {
2023        cond_resched();
2024        if (node->eb && node->eb->start == edge->blockptr)
2025            continue;
2026
2027        upper = edge->node[UPPER];
2028        root = select_reloc_root(trans, upper, edges, &nr);
2029        if (!root)
2030            continue;
2031
2032        if (upper->eb && !upper->locked)
2033            drop_node_buffer(upper);
2034
2035        if (!upper->eb) {
2036            ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2037            if (ret < 0) {
2038                err = ret;
2039                break;
2040            }
2041            BUG_ON(ret > 0);
2042
2043            slot = path->slots[upper->level];
2044
2045            btrfs_unlock_up_safe(path, upper->level + 1);
2046            grab_path_buffers(path, upper, edges, nr);
2047
2048            btrfs_release_path(NULL, path);
2049        } else {
2050            ret = btrfs_bin_search(upper->eb, key, upper->level,
2051                           &slot);
2052            BUG_ON(ret);
2053        }
2054
2055        bytenr = btrfs_node_blockptr(upper->eb, slot);
2056        if (!lowest) {
2057            if (node->eb->start == bytenr) {
2058                btrfs_tree_unlock(upper->eb);
2059                upper->locked = 0;
2060                continue;
2061            }
2062        } else {
2063            BUG_ON(node->bytenr != bytenr);
2064        }
2065
2066        blocksize = btrfs_level_size(root, node->level);
2067        generation = btrfs_node_ptr_generation(upper->eb, slot);
2068        eb = read_tree_block(root, bytenr, blocksize, generation);
2069        btrfs_tree_lock(eb);
2070        btrfs_set_lock_blocking(eb);
2071
2072        if (!node->eb) {
2073            ret = btrfs_cow_block(trans, root, eb, upper->eb,
2074                          slot, &eb);
2075            if (ret < 0) {
2076                err = ret;
2077                break;
2078            }
2079            btrfs_set_lock_blocking(eb);
2080            node->eb = eb;
2081            node->locked = 1;
2082        } else {
2083            btrfs_set_node_blockptr(upper->eb, slot,
2084                        node->eb->start);
2085            btrfs_set_node_ptr_generation(upper->eb, slot,
2086                              trans->transid);
2087            btrfs_mark_buffer_dirty(upper->eb);
2088
2089            ret = btrfs_inc_extent_ref(trans, root,
2090                        node->eb->start, blocksize,
2091                        upper->eb->start,
2092                        btrfs_header_owner(upper->eb),
2093                        node->level, 0);
2094            BUG_ON(ret);
2095
2096            ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2097            BUG_ON(ret);
2098        }
2099        if (!lowest) {
2100            btrfs_tree_unlock(upper->eb);
2101            upper->locked = 0;
2102        }
2103    }
2104    path->lowest_level = 0;
2105    return err;
2106}
2107
2108static int link_to_upper(struct btrfs_trans_handle *trans,
2109             struct backref_node *node,
2110             struct btrfs_path *path)
2111{
2112    struct btrfs_key key;
2113    if (!node->eb || list_empty(&node->upper))
2114        return 0;
2115
2116    btrfs_node_key_to_cpu(node->eb, &key, 0);
2117    return do_relocation(trans, node, &key, path, 0);
2118}
2119
2120static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2121                struct backref_cache *cache,
2122                struct btrfs_path *path)
2123{
2124    struct backref_node *node;
2125    int level;
2126    int ret;
2127    int err = 0;
2128
2129    for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2130        while (!list_empty(&cache->pending[level])) {
2131            node = list_entry(cache->pending[level].next,
2132                      struct backref_node, lower);
2133            BUG_ON(node->level != level);
2134
2135            ret = link_to_upper(trans, node, path);
2136            if (ret < 0)
2137                err = ret;
2138            /*
2139             * this remove the node from the pending list and
2140             * may add some other nodes to the level + 1
2141             * pending list
2142             */
2143            remove_backref_node(cache, node);
2144        }
2145    }
2146    BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
2147    return err;
2148}
2149
2150static void mark_block_processed(struct reloc_control *rc,
2151                 struct backref_node *node)
2152{
2153    u32 blocksize;
2154    if (node->level == 0 ||
2155        in_block_group(node->bytenr, rc->block_group)) {
2156        blocksize = btrfs_level_size(rc->extent_root, node->level);
2157        set_extent_bits(&rc->processed_blocks, node->bytenr,
2158                node->bytenr + blocksize - 1, EXTENT_DIRTY,
2159                GFP_NOFS);
2160    }
2161    node->processed = 1;
2162}
2163
2164/*
2165 * mark a block and all blocks directly/indirectly reference the block
2166 * as processed.
2167 */
2168static void update_processed_blocks(struct reloc_control *rc,
2169                    struct backref_node *node)
2170{
2171    struct backref_node *next = node;
2172    struct backref_edge *edge;
2173    struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2174    int index = 0;
2175
2176    while (next) {
2177        cond_resched();
2178        while (1) {
2179            if (next->processed)
2180                break;
2181
2182            mark_block_processed(rc, next);
2183
2184            if (list_empty(&next->upper))
2185                break;
2186
2187            edge = list_entry(next->upper.next,
2188                      struct backref_edge, list[LOWER]);
2189            edges[index++] = edge;
2190            next = edge->node[UPPER];
2191        }
2192        next = walk_down_backref(edges, &index);
2193    }
2194}
2195
2196static int tree_block_processed(u64 bytenr, u32 blocksize,
2197                struct reloc_control *rc)
2198{
2199    if (test_range_bit(&rc->processed_blocks, bytenr,
2200               bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2201        return 1;
2202    return 0;
2203}
2204
2205/*
2206 * check if there are any file extent pointers in the leaf point to
2207 * data require processing
2208 */
2209static int check_file_extents(struct reloc_control *rc,
2210                  u64 bytenr, u32 blocksize, u64 ptr_gen)
2211{
2212    struct btrfs_key found_key;
2213    struct btrfs_file_extent_item *fi;
2214    struct extent_buffer *leaf;
2215    u32 nritems;
2216    int i;
2217    int ret = 0;
2218
2219    leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
2220
2221    nritems = btrfs_header_nritems(leaf);
2222    for (i = 0; i < nritems; i++) {
2223        cond_resched();
2224        btrfs_item_key_to_cpu(leaf, &found_key, i);
2225        if (found_key.type != BTRFS_EXTENT_DATA_KEY)
2226            continue;
2227        fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2228        if (btrfs_file_extent_type(leaf, fi) ==
2229            BTRFS_FILE_EXTENT_INLINE)
2230            continue;
2231        bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2232        if (bytenr == 0)
2233            continue;
2234        if (in_block_group(bytenr, rc->block_group)) {
2235            ret = 1;
2236            break;
2237        }
2238    }
2239    free_extent_buffer(leaf);
2240    return ret;
2241}
2242
2243/*
2244 * scan child blocks of a given block to find blocks require processing
2245 */
2246static int add_child_blocks(struct btrfs_trans_handle *trans,
2247                struct reloc_control *rc,
2248                struct backref_node *node,
2249                struct rb_root *blocks)
2250{
2251    struct tree_block *block;
2252    struct rb_node *rb_node;
2253    u64 bytenr;
2254    u64 ptr_gen;
2255    u32 blocksize;
2256    u32 nritems;
2257    int i;
2258    int err = 0;
2259
2260    nritems = btrfs_header_nritems(node->eb);
2261    blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
2262    for (i = 0; i < nritems; i++) {
2263        cond_resched();
2264        bytenr = btrfs_node_blockptr(node->eb, i);
2265        ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2266        if (ptr_gen == trans->transid)
2267            continue;
2268        if (!in_block_group(bytenr, rc->block_group) &&
2269            (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2270            continue;
2271        if (tree_block_processed(bytenr, blocksize, rc))
2272            continue;
2273
2274        readahead_tree_block(rc->extent_root,
2275                     bytenr, blocksize, ptr_gen);
2276    }
2277
2278    for (i = 0; i < nritems; i++) {
2279        cond_resched();
2280        bytenr = btrfs_node_blockptr(node->eb, i);
2281        ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2282        if (ptr_gen == trans->transid)
2283            continue;
2284        if (!in_block_group(bytenr, rc->block_group) &&
2285            (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2286            continue;
2287        if (tree_block_processed(bytenr, blocksize, rc))
2288            continue;
2289        if (!in_block_group(bytenr, rc->block_group) &&
2290            !check_file_extents(rc, bytenr, blocksize, ptr_gen))
2291            continue;
2292
2293        block = kmalloc(sizeof(*block), GFP_NOFS);
2294        if (!block) {
2295            err = -ENOMEM;
2296            break;
2297        }
2298        block->bytenr = bytenr;
2299        btrfs_node_key_to_cpu(node->eb, &block->key, i);
2300        block->level = node->level - 1;
2301        block->key_ready = 1;
2302        rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2303        BUG_ON(rb_node);
2304    }
2305    if (err)
2306        free_block_list(blocks);
2307    return err;
2308}
2309
2310/*
2311 * find adjacent blocks require processing
2312 */
2313static noinline_for_stack
2314int add_adjacent_blocks(struct btrfs_trans_handle *trans,
2315            struct reloc_control *rc,
2316            struct backref_cache *cache,
2317            struct rb_root *blocks, int level,
2318            struct backref_node **upper)
2319{
2320    struct backref_node *node;
2321    int ret = 0;
2322
2323    WARN_ON(!list_empty(&cache->pending[level]));
2324
2325    if (list_empty(&cache->pending[level + 1]))
2326        return 1;
2327
2328    node = list_entry(cache->pending[level + 1].next,
2329              struct backref_node, lower);
2330    if (node->eb)
2331        ret = add_child_blocks(trans, rc, node, blocks);
2332
2333    *upper = node;
2334    return ret;
2335}
2336
2337static int get_tree_block_key(struct reloc_control *rc,
2338                  struct tree_block *block)
2339{
2340    struct extent_buffer *eb;
2341
2342    BUG_ON(block->key_ready);
2343    eb = read_tree_block(rc->extent_root, block->bytenr,
2344                 block->key.objectid, block->key.offset);
2345    WARN_ON(btrfs_header_level(eb) != block->level);
2346    if (block->level == 0)
2347        btrfs_item_key_to_cpu(eb, &block->key, 0);
2348    else
2349        btrfs_node_key_to_cpu(eb, &block->key, 0);
2350    free_extent_buffer(eb);
2351    block->key_ready = 1;
2352    return 0;
2353}
2354
2355static int reada_tree_block(struct reloc_control *rc,
2356                struct tree_block *block)
2357{
2358    BUG_ON(block->key_ready);
2359    readahead_tree_block(rc->extent_root, block->bytenr,
2360                 block->key.objectid, block->key.offset);
2361    return 0;
2362}
2363
2364/*
2365 * helper function to relocate a tree block
2366 */
2367static int relocate_tree_block(struct btrfs_trans_handle *trans,
2368                struct reloc_control *rc,
2369                struct backref_node *node,
2370                struct btrfs_key *key,
2371                struct btrfs_path *path)
2372{
2373    struct btrfs_root *root;
2374    int ret;
2375
2376    root = select_one_root(trans, node);
2377    if (unlikely(!root)) {
2378        rc->found_old_snapshot = 1;
2379        update_processed_blocks(rc, node);
2380        return 0;
2381    }
2382
2383    if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2384        ret = do_relocation(trans, node, key, path, 1);
2385        if (ret < 0)
2386            goto out;
2387        if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
2388            ret = replace_file_extents(trans, rc, root,
2389                           node->eb, NULL);
2390            if (ret < 0)
2391                goto out;
2392        }
2393        drop_node_buffer(node);
2394    } else if (!root->ref_cows) {
2395        path->lowest_level = node->level;
2396        ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2397        btrfs_release_path(root, path);
2398        if (ret < 0)
2399            goto out;
2400    } else if (root != node->root) {
2401        WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
2402    }
2403
2404    update_processed_blocks(rc, node);
2405    ret = 0;
2406out:
2407    drop_node_buffer(node);
2408    return ret;
2409}
2410
2411/*
2412 * relocate a list of blocks
2413 */
2414static noinline_for_stack
2415int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2416             struct reloc_control *rc, struct rb_root *blocks)
2417{
2418    struct backref_cache *cache;
2419    struct backref_node *node;
2420    struct btrfs_path *path;
2421    struct tree_block *block;
2422    struct rb_node *rb_node;
2423    int level = -1;
2424    int ret;
2425    int err = 0;
2426
2427    path = btrfs_alloc_path();
2428    if (!path)
2429        return -ENOMEM;
2430
2431    cache = kmalloc(sizeof(*cache), GFP_NOFS);
2432    if (!cache) {
2433        btrfs_free_path(path);
2434        return -ENOMEM;
2435    }
2436
2437    backref_cache_init(cache);
2438
2439    rb_node = rb_first(blocks);
2440    while (rb_node) {
2441        block = rb_entry(rb_node, struct tree_block, rb_node);
2442        if (level == -1)
2443            level = block->level;
2444        else
2445            BUG_ON(level != block->level);
2446        if (!block->key_ready)
2447            reada_tree_block(rc, block);
2448        rb_node = rb_next(rb_node);
2449    }
2450
2451    rb_node = rb_first(blocks);
2452    while (rb_node) {
2453        block = rb_entry(rb_node, struct tree_block, rb_node);
2454        if (!block->key_ready)
2455            get_tree_block_key(rc, block);
2456        rb_node = rb_next(rb_node);
2457    }
2458
2459    rb_node = rb_first(blocks);
2460    while (rb_node) {
2461        block = rb_entry(rb_node, struct tree_block, rb_node);
2462
2463        node = build_backref_tree(rc, cache, &block->key,
2464                      block->level, block->bytenr);
2465        if (IS_ERR(node)) {
2466            err = PTR_ERR(node);
2467            goto out;
2468        }
2469
2470        ret = relocate_tree_block(trans, rc, node, &block->key,
2471                      path);
2472        if (ret < 0) {
2473            err = ret;
2474            goto out;
2475        }
2476        remove_backref_node(cache, node);
2477        rb_node = rb_next(rb_node);
2478    }
2479
2480    if (level > 0)
2481        goto out;
2482
2483    free_block_list(blocks);
2484
2485    /*
2486     * now backrefs of some upper level tree blocks have been cached,
2487     * try relocating blocks referenced by these upper level blocks.
2488     */
2489    while (1) {
2490        struct backref_node *upper = NULL;
2491        if (trans->transaction->in_commit ||
2492            trans->transaction->delayed_refs.flushing)
2493            break;
2494
2495        ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
2496                      &upper);
2497        if (ret < 0)
2498            err = ret;
2499        if (ret != 0)
2500            break;
2501
2502        rb_node = rb_first(blocks);
2503        while (rb_node) {
2504            block = rb_entry(rb_node, struct tree_block, rb_node);
2505            if (trans->transaction->in_commit ||
2506                trans->transaction->delayed_refs.flushing)
2507                goto out;
2508            BUG_ON(!block->key_ready);
2509            node = build_backref_tree(rc, cache, &block->key,
2510                          level, block->bytenr);
2511            if (IS_ERR(node)) {
2512                err = PTR_ERR(node);
2513                goto out;
2514            }
2515
2516            ret = relocate_tree_block(trans, rc, node,
2517                          &block->key, path);
2518            if (ret < 0) {
2519                err = ret;
2520                goto out;
2521            }
2522            remove_backref_node(cache, node);
2523            rb_node = rb_next(rb_node);
2524        }
2525        free_block_list(blocks);
2526
2527        if (upper) {
2528            ret = link_to_upper(trans, upper, path);
2529            if (ret < 0) {
2530                err = ret;
2531                break;
2532            }
2533            remove_backref_node(cache, upper);
2534        }
2535    }
2536out:
2537    free_block_list(blocks);
2538
2539    ret = finish_pending_nodes(trans, cache, path);
2540    if (ret < 0)
2541        err = ret;
2542
2543    kfree(cache);
2544    btrfs_free_path(path);
2545    return err;
2546}
2547
2548static noinline_for_stack
2549int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2550             u64 block_start)
2551{
2552    struct btrfs_root *root = BTRFS_I(inode)->root;
2553    struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2554    struct extent_map *em;
2555    int ret = 0;
2556
2557    em = alloc_extent_map(GFP_NOFS);
2558    if (!em)
2559        return -ENOMEM;
2560
2561    em->start = start;
2562    em->len = end + 1 - start;
2563    em->block_len = em->len;
2564    em->block_start = block_start;
2565    em->bdev = root->fs_info->fs_devices->latest_bdev;
2566    set_bit(EXTENT_FLAG_PINNED, &em->flags);
2567
2568    lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2569    while (1) {
2570        write_lock(&em_tree->lock);
2571        ret = add_extent_mapping(em_tree, em);
2572        write_unlock(&em_tree->lock);
2573        if (ret != -EEXIST) {
2574            free_extent_map(em);
2575            break;
2576        }
2577        btrfs_drop_extent_cache(inode, start, end, 0);
2578    }
2579    unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2580    return ret;
2581}
2582
2583static int relocate_file_extent_cluster(struct inode *inode,
2584                    struct file_extent_cluster *cluster)
2585{
2586    u64 page_start;
2587    u64 page_end;
2588    u64 offset = BTRFS_I(inode)->index_cnt;
2589    unsigned long index;
2590    unsigned long last_index;
2591    unsigned int dirty_page = 0;
2592    struct page *page;
2593    struct file_ra_state *ra;
2594    int nr = 0;
2595    int ret = 0;
2596
2597    if (!cluster->nr)
2598        return 0;
2599
2600    ra = kzalloc(sizeof(*ra), GFP_NOFS);
2601    if (!ra)
2602        return -ENOMEM;
2603
2604    index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2605    last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2606
2607    mutex_lock(&inode->i_mutex);
2608
2609    i_size_write(inode, cluster->end + 1 - offset);
2610    ret = setup_extent_mapping(inode, cluster->start - offset,
2611                   cluster->end - offset, cluster->start);
2612    if (ret)
2613        goto out_unlock;
2614
2615    file_ra_state_init(ra, inode->i_mapping);
2616
2617    WARN_ON(cluster->start != cluster->boundary[0]);
2618    while (index <= last_index) {
2619        page = find_lock_page(inode->i_mapping, index);
2620        if (!page) {
2621            page_cache_sync_readahead(inode->i_mapping,
2622                          ra, NULL, index,
2623                          last_index + 1 - index);
2624            page = grab_cache_page(inode->i_mapping, index);
2625            if (!page) {
2626                ret = -ENOMEM;
2627                goto out_unlock;
2628            }
2629        }
2630
2631        if (PageReadahead(page)) {
2632            page_cache_async_readahead(inode->i_mapping,
2633                           ra, NULL, page, index,
2634                           last_index + 1 - index);
2635        }
2636
2637        if (!PageUptodate(page)) {
2638            btrfs_readpage(NULL, page);
2639            lock_page(page);
2640            if (!PageUptodate(page)) {
2641                unlock_page(page);
2642                page_cache_release(page);
2643                ret = -EIO;
2644                goto out_unlock;
2645            }
2646        }
2647
2648        page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2649        page_end = page_start + PAGE_CACHE_SIZE - 1;
2650
2651        lock_extent(&BTRFS_I(inode)->io_tree,
2652                page_start, page_end, GFP_NOFS);
2653
2654        set_page_extent_mapped(page);
2655
2656        if (nr < cluster->nr &&
2657            page_start + offset == cluster->boundary[nr]) {
2658            set_extent_bits(&BTRFS_I(inode)->io_tree,
2659                    page_start, page_end,
2660                    EXTENT_BOUNDARY, GFP_NOFS);
2661            nr++;
2662        }
2663        btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2664
2665        set_page_dirty(page);
2666        dirty_page++;
2667
2668        unlock_extent(&BTRFS_I(inode)->io_tree,
2669                  page_start, page_end, GFP_NOFS);
2670        unlock_page(page);
2671        page_cache_release(page);
2672
2673        index++;
2674        if (nr < cluster->nr &&
2675            page_end + 1 + offset == cluster->boundary[nr]) {
2676            balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2677                               dirty_page);
2678            dirty_page = 0;
2679        }
2680    }
2681    if (dirty_page) {
2682        balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2683                           dirty_page);
2684    }
2685    WARN_ON(nr != cluster->nr);
2686out_unlock:
2687    mutex_unlock(&inode->i_mutex);
2688    kfree(ra);
2689    return ret;
2690}
2691
2692static noinline_for_stack
2693int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2694             struct file_extent_cluster *cluster)
2695{
2696    int ret;
2697
2698    if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2699        ret = relocate_file_extent_cluster(inode, cluster);
2700        if (ret)
2701            return ret;
2702        cluster->nr = 0;
2703    }
2704
2705    if (!cluster->nr)
2706        cluster->start = extent_key->objectid;
2707    else
2708        BUG_ON(cluster->nr >= MAX_EXTENTS);
2709    cluster->end = extent_key->objectid + extent_key->offset - 1;
2710    cluster->boundary[cluster->nr] = extent_key->objectid;
2711    cluster->nr++;
2712
2713    if (cluster->nr >= MAX_EXTENTS) {
2714        ret = relocate_file_extent_cluster(inode, cluster);
2715        if (ret)
2716            return ret;
2717        cluster->nr = 0;
2718    }
2719    return 0;
2720}
2721
2722#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2723static int get_ref_objectid_v0(struct reloc_control *rc,
2724                   struct btrfs_path *path,
2725                   struct btrfs_key *extent_key,
2726                   u64 *ref_objectid, int *path_change)
2727{
2728    struct btrfs_key key;
2729    struct extent_buffer *leaf;
2730    struct btrfs_extent_ref_v0 *ref0;
2731    int ret;
2732    int slot;
2733
2734    leaf = path->nodes[0];
2735    slot = path->slots[0];
2736    while (1) {
2737        if (slot >= btrfs_header_nritems(leaf)) {
2738            ret = btrfs_next_leaf(rc->extent_root, path);
2739            if (ret < 0)
2740                return ret;
2741            BUG_ON(ret > 0);
2742            leaf = path->nodes[0];
2743            slot = path->slots[0];
2744            if (path_change)
2745                *path_change = 1;
2746        }
2747        btrfs_item_key_to_cpu(leaf, &key, slot);
2748        if (key.objectid != extent_key->objectid)
2749            return -ENOENT;
2750
2751        if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
2752            slot++;
2753            continue;
2754        }
2755        ref0 = btrfs_item_ptr(leaf, slot,
2756                struct btrfs_extent_ref_v0);
2757        *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
2758        break;
2759    }
2760    return 0;
2761}
2762#endif
2763
2764/*
2765 * helper to add a tree block to the list.
2766 * the major work is getting the generation and level of the block
2767 */
2768static int add_tree_block(struct reloc_control *rc,
2769              struct btrfs_key *extent_key,
2770              struct btrfs_path *path,
2771              struct rb_root *blocks)
2772{
2773    struct extent_buffer *eb;
2774    struct btrfs_extent_item *ei;
2775    struct btrfs_tree_block_info *bi;
2776    struct tree_block *block;
2777    struct rb_node *rb_node;
2778    u32 item_size;
2779    int level = -1;
2780    int generation;
2781
2782    eb = path->nodes[0];
2783    item_size = btrfs_item_size_nr(eb, path->slots[0]);
2784
2785    if (item_size >= sizeof(*ei) + sizeof(*bi)) {
2786        ei = btrfs_item_ptr(eb, path->slots[0],
2787                struct btrfs_extent_item);
2788        bi = (struct btrfs_tree_block_info *)(ei + 1);
2789        generation = btrfs_extent_generation(eb, ei);
2790        level = btrfs_tree_block_level(eb, bi);
2791    } else {
2792#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2793        u64 ref_owner;
2794        int ret;
2795
2796        BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2797        ret = get_ref_objectid_v0(rc, path, extent_key,
2798                      &ref_owner, NULL);
2799        BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
2800        level = (int)ref_owner;
2801        /* FIXME: get real generation */
2802        generation = 0;
2803#else
2804        BUG();
2805#endif
2806    }
2807
2808    btrfs_release_path(rc->extent_root, path);
2809
2810    BUG_ON(level == -1);
2811
2812    block = kmalloc(sizeof(*block), GFP_NOFS);
2813    if (!block)
2814        return -ENOMEM;
2815
2816    block->bytenr = extent_key->objectid;
2817    block->key.objectid = extent_key->offset;
2818    block->key.offset = generation;
2819    block->level = level;
2820    block->key_ready = 0;
2821
2822    rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2823    BUG_ON(rb_node);
2824
2825    return 0;
2826}
2827
2828/*
2829 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
2830 */
2831static int __add_tree_block(struct reloc_control *rc,
2832                u64 bytenr, u32 blocksize,
2833                struct rb_root *blocks)
2834{
2835    struct btrfs_path *path;
2836    struct btrfs_key key;
2837    int ret;
2838
2839    if (tree_block_processed(bytenr, blocksize, rc))
2840        return 0;
2841
2842    if (tree_search(blocks, bytenr))
2843        return 0;
2844
2845    path = btrfs_alloc_path();
2846    if (!path)
2847        return -ENOMEM;
2848
2849    key.objectid = bytenr;
2850    key.type = BTRFS_EXTENT_ITEM_KEY;
2851    key.offset = blocksize;
2852
2853    path->search_commit_root = 1;
2854    path->skip_locking = 1;
2855    ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
2856    if (ret < 0)
2857        goto out;
2858    BUG_ON(ret);
2859
2860    btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2861    ret = add_tree_block(rc, &key, path, blocks);
2862out:
2863    btrfs_free_path(path);
2864    return ret;
2865}
2866
2867/*
2868 * helper to check if the block use full backrefs for pointers in it
2869 */
2870static int block_use_full_backref(struct reloc_control *rc,
2871                  struct extent_buffer *eb)
2872{
2873    struct btrfs_path *path;
2874    struct btrfs_extent_item *ei;
2875    struct btrfs_key key;
2876    u64 flags;
2877    int ret;
2878
2879    if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
2880        btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
2881        return 1;
2882
2883    path = btrfs_alloc_path();
2884    BUG_ON(!path);
2885
2886    key.objectid = eb->start;
2887    key.type = BTRFS_EXTENT_ITEM_KEY;
2888    key.offset = eb->len;
2889
2890    path->search_commit_root = 1;
2891    path->skip_locking = 1;
2892    ret = btrfs_search_slot(NULL, rc->extent_root,
2893                &key, path, 0, 0);
2894    BUG_ON(ret);
2895
2896    ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2897                struct btrfs_extent_item);
2898    flags = btrfs_extent_flags(path->nodes[0], ei);
2899    BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2900    if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2901        ret = 1;
2902    else
2903        ret = 0;
2904    btrfs_free_path(path);
2905    return ret;
2906}
2907
2908/*
2909 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
2910 * this function scans fs tree to find blocks reference the data extent
2911 */
2912static int find_data_references(struct reloc_control *rc,
2913                struct btrfs_key *extent_key,
2914                struct extent_buffer *leaf,
2915                struct btrfs_extent_data_ref *ref,
2916                struct rb_root *blocks)
2917{
2918    struct btrfs_path *path;
2919    struct tree_block *block;
2920    struct btrfs_root *root;
2921    struct btrfs_file_extent_item *fi;
2922    struct rb_node *rb_node;
2923    struct btrfs_key key;
2924    u64 ref_root;
2925    u64 ref_objectid;
2926    u64 ref_offset;
2927    u32 ref_count;
2928    u32 nritems;
2929    int err = 0;
2930    int added = 0;
2931    int counted;
2932    int ret;
2933
2934    path = btrfs_alloc_path();
2935    if (!path)
2936        return -ENOMEM;
2937
2938    ref_root = btrfs_extent_data_ref_root(leaf, ref);
2939    ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
2940    ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
2941    ref_count = btrfs_extent_data_ref_count(leaf, ref);
2942
2943    root = read_fs_root(rc->extent_root->fs_info, ref_root);
2944    if (IS_ERR(root)) {
2945        err = PTR_ERR(root);
2946        goto out;
2947    }
2948
2949    key.objectid = ref_objectid;
2950    key.offset = ref_offset;
2951    key.type = BTRFS_EXTENT_DATA_KEY;
2952
2953    path->search_commit_root = 1;
2954    path->skip_locking = 1;
2955    ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2956    if (ret < 0) {
2957        err = ret;
2958        goto out;
2959    }
2960
2961    leaf = path->nodes[0];
2962    nritems = btrfs_header_nritems(leaf);
2963    /*
2964     * the references in tree blocks that use full backrefs
2965     * are not counted in
2966     */
2967    if (block_use_full_backref(rc, leaf))
2968        counted = 0;
2969    else
2970        counted = 1;
2971    rb_node = tree_search(blocks, leaf->start);
2972    if (rb_node) {
2973        if (counted)
2974            added = 1;
2975        else
2976            path->slots[0] = nritems;
2977    }
2978
2979    while (ref_count > 0) {
2980        while (path->slots[0] >= nritems) {
2981            ret = btrfs_next_leaf(root, path);
2982            if (ret < 0) {
2983                err = ret;
2984                goto out;
2985            }
2986            if (ret > 0) {
2987                WARN_ON(1);
2988                goto out;
2989            }
2990
2991            leaf = path->nodes[0];
2992            nritems = btrfs_header_nritems(leaf);
2993            added = 0;
2994
2995            if (block_use_full_backref(rc, leaf))
2996                counted = 0;
2997            else
2998                counted = 1;
2999            rb_node = tree_search(blocks, leaf->start);
3000            if (rb_node) {
3001                if (counted)
3002                    added = 1;
3003                else
3004                    path->slots[0] = nritems;
3005            }
3006        }
3007
3008        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3009        if (key.objectid != ref_objectid ||
3010            key.type != BTRFS_EXTENT_DATA_KEY) {
3011            WARN_ON(1);
3012            break;
3013        }
3014
3015        fi = btrfs_item_ptr(leaf, path->slots[0],
3016                    struct btrfs_file_extent_item);
3017
3018        if (btrfs_file_extent_type(leaf, fi) ==
3019            BTRFS_FILE_EXTENT_INLINE)
3020            goto next;
3021
3022        if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3023            extent_key->objectid)
3024            goto next;
3025
3026        key.offset -= btrfs_file_extent_offset(leaf, fi);
3027        if (key.offset != ref_offset)
3028            goto next;
3029
3030        if (counted)
3031            ref_count--;
3032        if (added)
3033            goto next;
3034
3035        if (!tree_block_processed(leaf->start, leaf->len, rc)) {
3036            block = kmalloc(sizeof(*block), GFP_NOFS);
3037            if (!block) {
3038                err = -ENOMEM;
3039                break;
3040            }
3041            block->bytenr = leaf->start;
3042            btrfs_item_key_to_cpu(leaf, &block->key, 0);
3043            block->level = 0;
3044            block->key_ready = 1;
3045            rb_node = tree_insert(blocks, block->bytenr,
3046                          &block->rb_node);
3047            BUG_ON(rb_node);
3048        }
3049        if (counted)
3050            added = 1;
3051        else
3052            path->slots[0] = nritems;
3053next:
3054        path->slots[0]++;
3055
3056    }
3057out:
3058    btrfs_free_path(path);
3059    return err;
3060}
3061
3062/*
3063 * hepler to find all tree blocks that reference a given data extent
3064 */
3065static noinline_for_stack
3066int add_data_references(struct reloc_control *rc,
3067            struct btrfs_key *extent_key,
3068            struct btrfs_path *path,
3069            struct rb_root *blocks)
3070{
3071    struct btrfs_key key;
3072    struct extent_buffer *eb;
3073    struct btrfs_extent_data_ref *dref;
3074    struct btrfs_extent_inline_ref *iref;
3075    unsigned long ptr;
3076    unsigned long end;
3077    u32 blocksize;
3078    int ret;
3079    int err = 0;
3080
3081    ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
3082                   extent_key->offset);
3083    BUG_ON(ret < 0);
3084    if (ret > 0) {
3085        /* the relocated data is fragmented */
3086        rc->extents_skipped++;
3087        btrfs_release_path(rc->extent_root, path);
3088        return 0;
3089    }
3090
3091    blocksize = btrfs_level_size(rc->extent_root, 0);
3092
3093    eb = path->nodes[0];
3094    ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3095    end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3096#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3097    if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3098        ptr = end;
3099    else
3100#endif
3101        ptr += sizeof(struct btrfs_extent_item);
3102
3103    while (ptr < end) {
3104        iref = (struct btrfs_extent_inline_ref *)ptr;
3105        key.type = btrfs_extent_inline_ref_type(eb, iref);
3106        if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3107            key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3108            ret = __add_tree_block(rc, key.offset, blocksize,
3109                           blocks);
3110        } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3111            dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3112            ret = find_data_references(rc, extent_key,
3113                           eb, dref, blocks);
3114        } else {
3115            BUG();
3116        }
3117        ptr += btrfs_extent_inline_ref_size(key.type);
3118    }
3119    WARN_ON(ptr > end);
3120
3121    while (1) {
3122        cond_resched();
3123        eb = path->nodes[0];
3124        if (path->slots[0] >= btrfs_header_nritems(eb)) {
3125            ret = btrfs_next_leaf(rc->extent_root, path);
3126            if (ret < 0) {
3127                err = ret;
3128                break;
3129            }
3130            if (ret > 0)
3131                break;
3132            eb = path->nodes[0];
3133        }
3134
3135        btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3136        if (key.objectid != extent_key->objectid)
3137            break;
3138
3139#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3140        if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3141            key.type == BTRFS_EXTENT_REF_V0_KEY) {
3142#else
3143        BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3144        if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3145#endif
3146            ret = __add_tree_block(rc, key.offset, blocksize,
3147                           blocks);
3148        } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3149            dref = btrfs_item_ptr(eb, path->slots[0],
3150                          struct btrfs_extent_data_ref);
3151            ret = find_data_references(rc, extent_key,
3152                           eb, dref, blocks);
3153        } else {
3154            ret = 0;
3155        }
3156        if (ret) {
3157            err = ret;
3158            break;
3159        }
3160        path->slots[0]++;
3161    }
3162    btrfs_release_path(rc->extent_root, path);
3163    if (err)
3164        free_block_list(blocks);
3165    return err;
3166}
3167
3168/*
3169 * hepler to find next unprocessed extent
3170 */
3171static noinline_for_stack
3172int find_next_extent(struct btrfs_trans_handle *trans,
3173             struct reloc_control *rc, struct btrfs_path *path)
3174{
3175    struct btrfs_key key;
3176    struct extent_buffer *leaf;
3177    u64 start, end, last;
3178    int ret;
3179
3180    last = rc->block_group->key.objectid + rc->block_group->key.offset;
3181    while (1) {
3182        cond_resched();
3183        if (rc->search_start >= last) {
3184            ret = 1;
3185            break;
3186        }
3187
3188        key.objectid = rc->search_start;
3189        key.type = BTRFS_EXTENT_ITEM_KEY;
3190        key.offset = 0;
3191
3192        path->search_commit_root = 1;
3193        path->skip_locking = 1;
3194        ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3195                    0, 0);
3196        if (ret < 0)
3197            break;
3198next:
3199        leaf = path->nodes[0];
3200        if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3201            ret = btrfs_next_leaf(rc->extent_root, path);
3202            if (ret != 0)
3203                break;
3204            leaf = path->nodes[0];
3205        }
3206
3207        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3208        if (key.objectid >= last) {
3209            ret = 1;
3210            break;
3211        }
3212
3213        if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3214            key.objectid + key.offset <= rc->search_start) {
3215            path->slots[0]++;
3216            goto next;
3217        }
3218
3219        ret = find_first_extent_bit(&rc->processed_blocks,
3220                        key.objectid, &start, &end,
3221                        EXTENT_DIRTY);
3222
3223        if (ret == 0 && start <= key.objectid) {
3224            btrfs_release_path(rc->extent_root, path);
3225            rc->search_start = end + 1;
3226        } else {
3227            rc->search_start = key.objectid + key.offset;
3228            return 0;
3229        }
3230    }
3231    btrfs_release_path(rc->extent_root, path);
3232    return ret;
3233}
3234
3235static void set_reloc_control(struct reloc_control *rc)
3236{
3237    struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3238    mutex_lock(&fs_info->trans_mutex);
3239    fs_info->reloc_ctl = rc;
3240    mutex_unlock(&fs_info->trans_mutex);
3241}
3242
3243static void unset_reloc_control(struct reloc_control *rc)
3244{
3245    struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3246    mutex_lock(&fs_info->trans_mutex);
3247    fs_info->reloc_ctl = NULL;
3248    mutex_unlock(&fs_info->trans_mutex);
3249}
3250
3251static int check_extent_flags(u64 flags)
3252{
3253    if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3254        (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3255        return 1;
3256    if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3257        !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3258        return 1;
3259    if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3260        (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3261        return 1;
3262    return 0;
3263}
3264
3265
3266static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3267{
3268    struct rb_root blocks = RB_ROOT;
3269    struct btrfs_key key;
3270    struct file_extent_cluster *cluster;
3271    struct btrfs_trans_handle *trans = NULL;
3272    struct btrfs_path *path;
3273    struct btrfs_extent_item *ei;
3274    unsigned long nr;
3275    u64 flags;
3276    u32 item_size;
3277    int ret;
3278    int err = 0;
3279
3280    cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
3281    if (!cluster)
3282        return -ENOMEM;
3283
3284    path = btrfs_alloc_path();
3285    if (!path) {
3286        kfree(cluster);
3287        return -ENOMEM;
3288    }
3289
3290    rc->extents_found = 0;
3291    rc->extents_skipped = 0;
3292
3293    rc->search_start = rc->block_group->key.objectid;
3294    clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3295              GFP_NOFS);
3296
3297    rc->create_reloc_root = 1;
3298    set_reloc_control(rc);
3299
3300    trans = btrfs_start_transaction(rc->extent_root, 1);
3301    btrfs_commit_transaction(trans, rc->extent_root);
3302
3303    while (1) {
3304        trans = btrfs_start_transaction(rc->extent_root, 1);
3305
3306        ret = find_next_extent(trans, rc, path);
3307        if (ret < 0)
3308            err = ret;
3309        if (ret != 0)
3310            break;
3311
3312        rc->extents_found++;
3313
3314        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3315                    struct btrfs_extent_item);
3316        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3317        item_size = btrfs_item_size_nr(path->nodes[0],
3318                           path->slots[0]);
3319        if (item_size >= sizeof(*ei)) {
3320            flags = btrfs_extent_flags(path->nodes[0], ei);
3321            ret = check_extent_flags(flags);
3322            BUG_ON(ret);
3323
3324        } else {
3325#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3326            u64 ref_owner;
3327            int path_change = 0;
3328
3329            BUG_ON(item_size !=
3330                   sizeof(struct btrfs_extent_item_v0));
3331            ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3332                          &path_change);
3333            if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3334                flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3335            else
3336                flags = BTRFS_EXTENT_FLAG_DATA;
3337
3338            if (path_change) {
3339                btrfs_release_path(rc->extent_root, path);
3340
3341                path->search_commit_root = 1;
3342                path->skip_locking = 1;
3343                ret = btrfs_search_slot(NULL, rc->extent_root,
3344                            &key, path, 0, 0);
3345                if (ret < 0) {
3346                    err = ret;
3347                    break;
3348                }
3349                BUG_ON(ret > 0);
3350            }
3351#else
3352            BUG();
3353#endif
3354        }
3355
3356        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3357            ret = add_tree_block(rc, &key, path, &blocks);
3358        } else if (rc->stage == UPDATE_DATA_PTRS &&
3359             (flags & BTRFS_EXTENT_FLAG_DATA)) {
3360            ret = add_data_references(rc, &key, path, &blocks);
3361        } else {
3362            btrfs_release_path(rc->extent_root, path);
3363            ret = 0;
3364        }
3365        if (ret < 0) {
3366            err = 0;
3367            break;
3368        }
3369
3370        if (!RB_EMPTY_ROOT(&blocks)) {
3371            ret = relocate_tree_blocks(trans, rc, &blocks);
3372            if (ret < 0) {
3373                err = ret;
3374                break;
3375            }
3376        }
3377
3378        nr = trans->blocks_used;
3379        btrfs_end_transaction(trans, rc->extent_root);
3380        trans = NULL;
3381        btrfs_btree_balance_dirty(rc->extent_root, nr);
3382
3383        if (rc->stage == MOVE_DATA_EXTENTS &&
3384            (flags & BTRFS_EXTENT_FLAG_DATA)) {
3385            rc->found_file_extent = 1;
3386            ret = relocate_data_extent(rc->data_inode,
3387                           &key, cluster);
3388            if (ret < 0) {
3389                err = ret;
3390                break;
3391            }
3392        }
3393    }
3394    btrfs_free_path(path);
3395
3396    if (trans) {
3397        nr = trans->blocks_used;
3398        btrfs_end_transaction(trans, rc->extent_root);
3399        btrfs_btree_balance_dirty(rc->extent_root, nr);
3400    }
3401
3402    if (!err) {
3403        ret = relocate_file_extent_cluster(rc->data_inode, cluster);
3404        if (ret < 0)
3405            err = ret;
3406    }
3407
3408    kfree(cluster);
3409
3410    rc->create_reloc_root = 0;
3411    smp_mb();
3412
3413    if (rc->extents_found > 0) {
3414        trans = btrfs_start_transaction(rc->extent_root, 1);
3415        btrfs_commit_transaction(trans, rc->extent_root);
3416    }
3417
3418    merge_reloc_roots(rc);
3419
3420    unset_reloc_control(rc);
3421
3422    /* get rid of pinned extents */
3423    trans = btrfs_start_transaction(rc->extent_root, 1);
3424    btrfs_commit_transaction(trans, rc->extent_root);
3425
3426    return err;
3427}
3428
3429static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3430                 struct btrfs_root *root, u64 objectid)
3431{
3432    struct btrfs_path *path;
3433    struct btrfs_inode_item *item;
3434    struct extent_buffer *leaf;
3435    int ret;
3436
3437    path = btrfs_alloc_path();
3438    if (!path)
3439        return -ENOMEM;
3440
3441    ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3442    if (ret)
3443        goto out;
3444
3445    leaf = path->nodes[0];
3446    item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3447    memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3448    btrfs_set_inode_generation(leaf, item, 1);
3449    btrfs_set_inode_size(leaf, item, 0);
3450    btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3451    btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3452    btrfs_mark_buffer_dirty(leaf);
3453    btrfs_release_path(root, path);
3454out:
3455    btrfs_free_path(path);
3456    return ret;
3457}
3458
3459/*
3460 * helper to create inode for data relocation.
3461 * the inode is in data relocation tree and its link count is 0
3462 */
3463static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3464                    struct btrfs_block_group_cache *group)
3465{
3466    struct inode *inode = NULL;
3467    struct btrfs_trans_handle *trans;
3468    struct btrfs_root *root;
3469    struct btrfs_key key;
3470    unsigned long nr;
3471    u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3472    int err = 0;
3473
3474    root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3475    if (IS_ERR(root))
3476        return ERR_CAST(root);
3477
3478    trans = btrfs_start_transaction(root, 1);
3479    BUG_ON(!trans);
3480
3481    err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3482    if (err)
3483        goto out;
3484
3485    err = __insert_orphan_inode(trans, root, objectid);
3486    BUG_ON(err);
3487
3488    key.objectid = objectid;
3489    key.type = BTRFS_INODE_ITEM_KEY;
3490    key.offset = 0;
3491    inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3492    BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3493    BTRFS_I(inode)->index_cnt = group->key.objectid;
3494
3495    err = btrfs_orphan_add(trans, inode);
3496out:
3497    nr = trans->blocks_used;
3498    btrfs_end_transaction(trans, root);
3499
3500    btrfs_btree_balance_dirty(root, nr);
3501    if (err) {
3502        if (inode)
3503            iput(inode);
3504        inode = ERR_PTR(err);
3505    }
3506    return inode;
3507}
3508
3509/*
3510 * function to relocate all extents in a block group.
3511 */
3512int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3513{
3514    struct btrfs_fs_info *fs_info = extent_root->fs_info;
3515    struct reloc_control *rc;
3516    int ret;
3517    int err = 0;
3518
3519    rc = kzalloc(sizeof(*rc), GFP_NOFS);
3520    if (!rc)
3521        return -ENOMEM;
3522
3523    mapping_tree_init(&rc->reloc_root_tree);
3524    extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3525    INIT_LIST_HEAD(&rc->reloc_roots);
3526
3527    rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3528    BUG_ON(!rc->block_group);
3529
3530    btrfs_init_workers(&rc->workers, "relocate",
3531               fs_info->thread_pool_size, NULL);
3532
3533    rc->extent_root = extent_root;
3534    btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
3535
3536    rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3537    if (IS_ERR(rc->data_inode)) {
3538        err = PTR_ERR(rc->data_inode);
3539        rc->data_inode = NULL;
3540        goto out;
3541    }
3542
3543    printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3544           (unsigned long long)rc->block_group->key.objectid,
3545           (unsigned long long)rc->block_group->flags);
3546
3547    btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3548    btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3549
3550    while (1) {
3551        rc->extents_found = 0;
3552        rc->extents_skipped = 0;
3553
3554        mutex_lock(&fs_info->cleaner_mutex);
3555
3556        btrfs_clean_old_snapshots(fs_info->tree_root);
3557        ret = relocate_block_group(rc);
3558
3559        mutex_unlock(&fs_info->cleaner_mutex);
3560        if (ret < 0) {
3561            err = ret;
3562            break;
3563        }
3564
3565        if (rc->extents_found == 0)
3566            break;
3567
3568        printk(KERN_INFO "btrfs: found %llu extents\n",
3569            (unsigned long long)rc->extents_found);
3570
3571        if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3572            btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3573            invalidate_mapping_pages(rc->data_inode->i_mapping,
3574                         0, -1);
3575            rc->stage = UPDATE_DATA_PTRS;
3576        } else if (rc->stage == UPDATE_DATA_PTRS &&
3577               rc->extents_skipped >= rc->extents_found) {
3578            iput(rc->data_inode);
3579            rc->data_inode = create_reloc_inode(fs_info,
3580                                rc->block_group);
3581            if (IS_ERR(rc->data_inode)) {
3582                err = PTR_ERR(rc->data_inode);
3583                rc->data_inode = NULL;
3584                break;
3585            }
3586            rc->stage = MOVE_DATA_EXTENTS;
3587            rc->found_file_extent = 0;
3588        }
3589    }
3590
3591    filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3592                     rc->block_group->key.objectid,
3593                     rc->block_group->key.objectid +
3594                     rc->block_group->key.offset - 1);
3595
3596    WARN_ON(rc->block_group->pinned > 0);
3597    WARN_ON(rc->block_group->reserved > 0);
3598    WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3599out:
3600    iput(rc->data_inode);
3601    btrfs_stop_workers(&rc->workers);
3602    btrfs_put_block_group(rc->block_group);
3603    kfree(rc);
3604    return err;
3605}
3606
3607static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3608{
3609    struct btrfs_trans_handle *trans;
3610    int ret;
3611
3612    trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
3613
3614    memset(&root->root_item.drop_progress, 0,
3615        sizeof(root->root_item.drop_progress));
3616    root->root_item.drop_level = 0;
3617    btrfs_set_root_refs(&root->root_item, 0);
3618    ret = btrfs_update_root(trans, root->fs_info->tree_root,
3619                &root->root_key, &root->root_item);
3620    BUG_ON(ret);
3621
3622    ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
3623    BUG_ON(ret);
3624    return 0;
3625}
3626
3627/*
3628 * recover relocation interrupted by system crash.
3629 *
3630 * this function resumes merging reloc trees with corresponding fs trees.
3631 * this is important for keeping the sharing of tree blocks
3632 */
3633int btrfs_recover_relocation(struct btrfs_root *root)
3634{
3635    LIST_HEAD(reloc_roots);
3636    struct btrfs_key key;
3637    struct btrfs_root *fs_root;
3638    struct btrfs_root *reloc_root;
3639    struct btrfs_path *path;
3640    struct extent_buffer *leaf;
3641    struct reloc_control *rc = NULL;
3642    struct btrfs_trans_handle *trans;
3643    int ret;
3644    int err = 0;
3645
3646    path = btrfs_alloc_path();
3647    if (!path)
3648        return -ENOMEM;
3649
3650    key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3651    key.type = BTRFS_ROOT_ITEM_KEY;
3652    key.offset = (u64)-1;
3653
3654    while (1) {
3655        ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3656                    path, 0, 0);
3657        if (ret < 0) {
3658            err = ret;
3659            goto out;
3660        }
3661        if (ret > 0) {
3662            if (path->slots[0] == 0)
3663                break;
3664            path->slots[0]--;
3665        }
3666        leaf = path->nodes[0];
3667        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3668        btrfs_release_path(root->fs_info->tree_root, path);
3669
3670        if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3671            key.type != BTRFS_ROOT_ITEM_KEY)
3672            break;
3673
3674        reloc_root = btrfs_read_fs_root_no_radix(root, &key);
3675        if (IS_ERR(reloc_root)) {
3676            err = PTR_ERR(reloc_root);
3677            goto out;
3678        }
3679
3680        list_add(&reloc_root->root_list, &reloc_roots);
3681
3682        if (btrfs_root_refs(&reloc_root->root_item) > 0) {
3683            fs_root = read_fs_root(root->fs_info,
3684                           reloc_root->root_key.offset);
3685            if (IS_ERR(fs_root)) {
3686                ret = PTR_ERR(fs_root);
3687                if (ret != -ENOENT) {
3688                    err = ret;
3689                    goto out;
3690                }
3691                mark_garbage_root(reloc_root);
3692            }
3693        }
3694
3695        if (key.offset == 0)
3696            break;
3697
3698        key.offset--;
3699    }
3700    btrfs_release_path(root->fs_info->tree_root, path);
3701
3702    if (list_empty(&reloc_roots))
3703        goto out;
3704
3705    rc = kzalloc(sizeof(*rc), GFP_NOFS);
3706    if (!rc) {
3707        err = -ENOMEM;
3708        goto out;
3709    }
3710
3711    mapping_tree_init(&rc->reloc_root_tree);
3712    INIT_LIST_HEAD(&rc->reloc_roots);
3713    btrfs_init_workers(&rc->workers, "relocate",
3714               root->fs_info->thread_pool_size, NULL);
3715    rc->extent_root = root->fs_info->extent_root;
3716
3717    set_reloc_control(rc);
3718
3719    while (!list_empty(&reloc_roots)) {
3720        reloc_root = list_entry(reloc_roots.next,
3721                    struct btrfs_root, root_list);
3722        list_del(&reloc_root->root_list);
3723
3724        if (btrfs_root_refs(&reloc_root->root_item) == 0) {
3725            list_add_tail(&reloc_root->root_list,
3726                      &rc->reloc_roots);
3727            continue;
3728        }
3729
3730        fs_root = read_fs_root(root->fs_info,
3731                       reloc_root->root_key.offset);
3732        BUG_ON(IS_ERR(fs_root));
3733
3734        __add_reloc_root(reloc_root);
3735        fs_root->reloc_root = reloc_root;
3736    }
3737
3738    trans = btrfs_start_transaction(rc->extent_root, 1);
3739    btrfs_commit_transaction(trans, rc->extent_root);
3740
3741    merge_reloc_roots(rc);
3742
3743    unset_reloc_control(rc);
3744
3745    trans = btrfs_start_transaction(rc->extent_root, 1);
3746    btrfs_commit_transaction(trans, rc->extent_root);
3747out:
3748    if (rc) {
3749        btrfs_stop_workers(&rc->workers);
3750        kfree(rc);
3751    }
3752    while (!list_empty(&reloc_roots)) {
3753        reloc_root = list_entry(reloc_roots.next,
3754                    struct btrfs_root, root_list);
3755        list_del(&reloc_root->root_list);
3756        free_extent_buffer(reloc_root->node);
3757        free_extent_buffer(reloc_root->commit_root);
3758        kfree(reloc_root);
3759    }
3760    btrfs_free_path(path);
3761
3762    if (err == 0) {
3763        /* cleanup orphan inode in data relocation tree */
3764        fs_root = read_fs_root(root->fs_info,
3765                       BTRFS_DATA_RELOC_TREE_OBJECTID);
3766        if (IS_ERR(fs_root))
3767            err = PTR_ERR(fs_root);
3768        else
3769            btrfs_orphan_cleanup(fs_root);
3770    }
3771    return err;
3772}
3773
3774/*
3775 * helper to add ordered checksum for data relocation.
3776 *
3777 * cloning checksum properly handles the nodatasum extents.
3778 * it also saves CPU time to re-calculate the checksum.
3779 */
3780int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
3781{
3782    struct btrfs_ordered_sum *sums;
3783    struct btrfs_sector_sum *sector_sum;
3784    struct btrfs_ordered_extent *ordered;
3785    struct btrfs_root *root = BTRFS_I(inode)->root;
3786    size_t offset;
3787    int ret;
3788    u64 disk_bytenr;
3789    LIST_HEAD(list);
3790
3791    ordered = btrfs_lookup_ordered_extent(inode, file_pos);
3792    BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
3793
3794    disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
3795    ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
3796                       disk_bytenr + len - 1, &list);
3797
3798    while (!list_empty(&list)) {
3799        sums = list_entry(list.next, struct btrfs_ordered_sum, list);
3800        list_del_init(&sums->list);
3801
3802        sector_sum = sums->sums;
3803        sums->bytenr = ordered->start;
3804
3805        offset = 0;
3806        while (offset < sums->len) {
3807            sector_sum->bytenr += ordered->start - disk_bytenr;
3808            sector_sum++;
3809            offset += root->sectorsize;
3810        }
3811
3812        btrfs_add_ordered_sum(inode, ordered, sums);
3813    }
3814    btrfs_put_ordered_extent(ordered);
3815    return 0;
3816}
3817

Archive Download this file



interactive