LCOV - code coverage report
Current view: top level - fs/btrfs - extent_io.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 1502 1970 76.2 %
Date: 2014-11-28 Functions: 112 128 87.5 %

          Line data    Source code
       1             : #include <linux/bitops.h>
       2             : #include <linux/slab.h>
       3             : #include <linux/bio.h>
       4             : #include <linux/mm.h>
       5             : #include <linux/pagemap.h>
       6             : #include <linux/page-flags.h>
       7             : #include <linux/spinlock.h>
       8             : #include <linux/blkdev.h>
       9             : #include <linux/swap.h>
      10             : #include <linux/writeback.h>
      11             : #include <linux/pagevec.h>
      12             : #include <linux/prefetch.h>
      13             : #include <linux/cleancache.h>
      14             : #include "extent_io.h"
      15             : #include "extent_map.h"
      16             : #include "ctree.h"
      17             : #include "btrfs_inode.h"
      18             : #include "volumes.h"
      19             : #include "check-integrity.h"
      20             : #include "locking.h"
      21             : #include "rcu-string.h"
      22             : #include "backref.h"
      23             : 
      24             : static struct kmem_cache *extent_state_cache;
      25             : static struct kmem_cache *extent_buffer_cache;
      26             : static struct bio_set *btrfs_bioset;
      27             : 
      28             : #ifdef CONFIG_BTRFS_DEBUG
      29             : static LIST_HEAD(buffers);
      30             : static LIST_HEAD(states);
      31             : 
      32             : static DEFINE_SPINLOCK(leak_lock);
      33             : 
      34             : static inline
      35             : void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
      36             : {
      37             :         unsigned long flags;
      38             : 
      39             :         spin_lock_irqsave(&leak_lock, flags);
      40             :         list_add(new, head);
      41             :         spin_unlock_irqrestore(&leak_lock, flags);
      42             : }
      43             : 
      44             : static inline
      45             : void btrfs_leak_debug_del(struct list_head *entry)
      46             : {
      47             :         unsigned long flags;
      48             : 
      49             :         spin_lock_irqsave(&leak_lock, flags);
      50             :         list_del(entry);
      51             :         spin_unlock_irqrestore(&leak_lock, flags);
      52             : }
      53             : 
      54             : static inline
      55             : void btrfs_leak_debug_check(void)
      56             : {
      57             :         struct extent_state *state;
      58             :         struct extent_buffer *eb;
      59             : 
      60             :         while (!list_empty(&states)) {
      61             :                 state = list_entry(states.next, struct extent_state, leak_list);
      62             :                 printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
      63             :                        "state %lu in tree %p refs %d\n",
      64             :                        state->start, state->end, state->state, state->tree,
      65             :                        atomic_read(&state->refs));
      66             :                 list_del(&state->leak_list);
      67             :                 kmem_cache_free(extent_state_cache, state);
      68             :         }
      69             : 
      70             :         while (!list_empty(&buffers)) {
      71             :                 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
      72             :                 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
      73             :                        "refs %d\n",
      74             :                        eb->start, eb->len, atomic_read(&eb->refs));
      75             :                 list_del(&eb->leak_list);
      76             :                 kmem_cache_free(extent_buffer_cache, eb);
      77             :         }
      78             : }
      79             : 
      80             : #define btrfs_debug_check_extent_io_range(tree, start, end)             \
      81             :         __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
      82             : static inline void __btrfs_debug_check_extent_io_range(const char *caller,
      83             :                 struct extent_io_tree *tree, u64 start, u64 end)
      84             : {
      85             :         struct inode *inode;
      86             :         u64 isize;
      87             : 
      88             :         if (!tree->mapping)
      89             :                 return;
      90             : 
      91             :         inode = tree->mapping->host;
      92             :         isize = i_size_read(inode);
      93             :         if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
      94             :                 printk_ratelimited(KERN_DEBUG
      95             :                     "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
      96             :                                 caller, btrfs_ino(inode), isize, start, end);
      97             :         }
      98             : }
      99             : #else
     100             : #define btrfs_leak_debug_add(new, head) do {} while (0)
     101             : #define btrfs_leak_debug_del(entry)     do {} while (0)
     102             : #define btrfs_leak_debug_check()        do {} while (0)
     103             : #define btrfs_debug_check_extent_io_range(c, s, e)      do {} while (0)
     104             : #endif
     105             : 
     106             : #define BUFFER_LRU_MAX 64
     107             : 
     108             : struct tree_entry {
     109             :         u64 start;
     110             :         u64 end;
     111             :         struct rb_node rb_node;
     112             : };
     113             : 
     114             : struct extent_page_data {
     115             :         struct bio *bio;
     116             :         struct extent_io_tree *tree;
     117             :         get_extent_t *get_extent;
     118             :         unsigned long bio_flags;
     119             : 
     120             :         /* tells writepage not to lock the state bits for this range
     121             :          * it still does the unlocking
     122             :          */
     123             :         unsigned int extent_locked:1;
     124             : 
     125             :         /* tells the submit_bio code to use a WRITE_SYNC */
     126             :         unsigned int sync_io:1;
     127             : };
     128             : 
     129             : static noinline void flush_write_bio(void *data);
     130             : static inline struct btrfs_fs_info *
     131             : tree_fs_info(struct extent_io_tree *tree)
     132             : {
     133           0 :         if (!tree->mapping)
     134             :                 return NULL;
     135           0 :         return btrfs_sb(tree->mapping->host->i_sb);
     136             : }
     137             : 
     138           0 : int __init extent_io_init(void)
     139             : {
     140           0 :         extent_state_cache = kmem_cache_create("btrfs_extent_state",
     141             :                         sizeof(struct extent_state), 0,
     142             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
     143           0 :         if (!extent_state_cache)
     144             :                 return -ENOMEM;
     145             : 
     146           0 :         extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
     147             :                         sizeof(struct extent_buffer), 0,
     148             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
     149           0 :         if (!extent_buffer_cache)
     150             :                 goto free_state_cache;
     151             : 
     152           0 :         btrfs_bioset = bioset_create(BIO_POOL_SIZE,
     153             :                                      offsetof(struct btrfs_io_bio, bio));
     154           0 :         if (!btrfs_bioset)
     155             :                 goto free_buffer_cache;
     156             : 
     157           0 :         if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
     158             :                 goto free_bioset;
     159             : 
     160             :         return 0;
     161             : 
     162             : free_bioset:
     163           0 :         bioset_free(btrfs_bioset);
     164           0 :         btrfs_bioset = NULL;
     165             : 
     166             : free_buffer_cache:
     167           0 :         kmem_cache_destroy(extent_buffer_cache);
     168           0 :         extent_buffer_cache = NULL;
     169             : 
     170             : free_state_cache:
     171           0 :         kmem_cache_destroy(extent_state_cache);
     172           0 :         extent_state_cache = NULL;
     173           0 :         return -ENOMEM;
     174             : }
     175             : 
     176           0 : void extent_io_exit(void)
     177             : {
     178             :         btrfs_leak_debug_check();
     179             : 
     180             :         /*
     181             :          * Make sure all delayed rcu free are flushed before we
     182             :          * destroy caches.
     183             :          */
     184           0 :         rcu_barrier();
     185           0 :         if (extent_state_cache)
     186           0 :                 kmem_cache_destroy(extent_state_cache);
     187           0 :         if (extent_buffer_cache)
     188           0 :                 kmem_cache_destroy(extent_buffer_cache);
     189           0 :         if (btrfs_bioset)
     190           0 :                 bioset_free(btrfs_bioset);
     191           0 : }
     192             : 
     193       57474 : void extent_io_tree_init(struct extent_io_tree *tree,
     194             :                          struct address_space *mapping)
     195             : {
     196       57474 :         tree->state = RB_ROOT;
     197       57474 :         tree->ops = NULL;
     198       57474 :         tree->dirty_bytes = 0;
     199       57474 :         spin_lock_init(&tree->lock);
     200       57474 :         tree->mapping = mapping;
     201       57474 : }
     202             : 
     203     2270839 : static struct extent_state *alloc_extent_state(gfp_t mask)
     204             : {
     205             :         struct extent_state *state;
     206             : 
     207     2270839 :         state = kmem_cache_alloc(extent_state_cache, mask);
     208     2271016 :         if (!state)
     209             :                 return state;
     210     2271022 :         state->state = 0;
     211     2271022 :         state->private = 0;
     212     2271022 :         state->tree = NULL;
     213             :         btrfs_leak_debug_add(&state->leak_list, &states);
     214             :         atomic_set(&state->refs, 1);
     215     2271022 :         init_waitqueue_head(&state->wq);
     216     2270878 :         trace_alloc_extent_state(state, mask, _RET_IP_);
     217     2270871 :         return state;
     218             : }
     219             : 
     220     4910880 : void free_extent_state(struct extent_state *state)
     221             : {
     222     4910880 :         if (!state)
     223     4911119 :                 return;
     224     4636122 :         if (atomic_dec_and_test(&state->refs)) {
     225     2271446 :                 WARN_ON(state->tree);
     226             :                 btrfs_leak_debug_del(&state->leak_list);
     227     2271446 :                 trace_free_extent_state(state, _RET_IP_);
     228     2271397 :                 kmem_cache_free(extent_state_cache, state);
     229             :         }
     230             : }
     231             : 
     232      962717 : static struct rb_node *tree_insert(struct rb_root *root,
     233             :                                    struct rb_node *search_start,
     234             :                                    u64 offset,
     235             :                                    struct rb_node *node,
     236             :                                    struct rb_node ***p_in,
     237             :                                    struct rb_node **parent_in)
     238             : {
     239             :         struct rb_node **p;
     240             :         struct rb_node *parent = NULL;
     241             :         struct tree_entry *entry;
     242             : 
     243      962717 :         if (p_in && parent_in) {
     244      307644 :                 p = *p_in;
     245      307644 :                 parent = *parent_in;
     246      307644 :                 goto do_insert;
     247             :         }
     248             : 
     249      655073 :         p = search_start ? &search_start : &root->rb_node;
     250     2261712 :         while (*p) {
     251             :                 parent = *p;
     252             :                 entry = rb_entry(parent, struct tree_entry, rb_node);
     253             : 
     254      951558 :                 if (offset < entry->start)
     255      733186 :                         p = &(*p)->rb_left;
     256      218372 :                 else if (offset > entry->end)
     257      218380 :                         p = &(*p)->rb_right;
     258             :                 else
     259             :                         return parent;
     260             :         }
     261             : 
     262             : do_insert:
     263             :         rb_link_node(node, parent, p);
     264      962725 :         rb_insert_color(node, root);
     265      962672 :         return NULL;
     266             : }
     267             : 
     268     3149917 : static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
     269             :                                       struct rb_node **prev_ret,
     270             :                                       struct rb_node **next_ret,
     271             :                                       struct rb_node ***p_ret,
     272             :                                       struct rb_node **parent_ret)
     273             : {
     274             :         struct rb_root *root = &tree->state;
     275     3149917 :         struct rb_node **n = &root->rb_node;
     276             :         struct rb_node *prev = NULL;
     277             :         struct rb_node *orig_prev = NULL;
     278             :         struct tree_entry *entry;
     279             :         struct tree_entry *prev_entry = NULL;
     280             : 
     281    10307844 :         while (*n) {
     282             :                 prev = *n;
     283     6293786 :                 entry = rb_entry(prev, struct tree_entry, rb_node);
     284             :                 prev_entry = entry;
     285             : 
     286     6293786 :                 if (offset < entry->start)
     287     1644278 :                         n = &(*n)->rb_left;
     288     4649508 :                 else if (offset > entry->end)
     289     2363732 :                         n = &(*n)->rb_right;
     290             :                 else
     291             :                         return *n;
     292             :         }
     293             : 
     294      864141 :         if (p_ret)
     295      364813 :                 *p_ret = n;
     296      864141 :         if (parent_ret)
     297      364813 :                 *parent_ret = prev;
     298             : 
     299      864141 :         if (prev_ret) {
     300             :                 orig_prev = prev;
     301     1157135 :                 while (prev && offset > prev_entry->end) {
     302      292995 :                         prev = rb_next(prev);
     303      292996 :                         prev_entry = rb_entry(prev, struct tree_entry, rb_node);
     304             :                 }
     305      864140 :                 *prev_ret = prev;
     306             :                 prev = orig_prev;
     307             :         }
     308             : 
     309      864142 :         if (next_ret) {
     310           0 :                 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
     311           0 :                 while (prev && offset < prev_entry->start) {
     312           0 :                         prev = rb_prev(prev);
     313           0 :                         prev_entry = rb_entry(prev, struct tree_entry, rb_node);
     314             :                 }
     315           0 :                 *next_ret = prev;
     316             :         }
     317             :         return NULL;
     318             : }
     319             : 
     320             : static inline struct rb_node *
     321     3150213 : tree_search_for_insert(struct extent_io_tree *tree,
     322             :                        u64 offset,
     323             :                        struct rb_node ***p_ret,
     324             :                        struct rb_node **parent_ret)
     325             : {
     326     3150213 :         struct rb_node *prev = NULL;
     327             :         struct rb_node *ret;
     328             : 
     329     3150213 :         ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
     330     3150178 :         if (!ret)
     331      864138 :                 return prev;
     332             :         return ret;
     333             : }
     334             : 
     335             : static inline struct rb_node *tree_search(struct extent_io_tree *tree,
     336             :                                           u64 offset)
     337             : {
     338     1989273 :         return tree_search_for_insert(tree, offset, NULL, NULL);
     339             : }
     340             : 
     341             : static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
     342             :                      struct extent_state *other)
     343             : {
     344      632459 :         if (tree->ops && tree->ops->merge_extent_hook)
     345      555607 :                 tree->ops->merge_extent_hook(tree->mapping->host, new,
     346             :                                              other);
     347             : }
     348             : 
     349             : /*
     350             :  * utility function to look for merge candidates inside a given range.
     351             :  * Any extents with matching state are merged together into a single
     352             :  * extent in the tree.  Extents with EXTENT_IO in their state field
     353             :  * are not merged because the end_io handlers need to be able to do
     354             :  * operations on them without sleeping (or doing allocations/splits).
     355             :  *
     356             :  * This should be called with the tree lock held.
     357             :  */
     358     2466574 : static void merge_state(struct extent_io_tree *tree,
     359             :                         struct extent_state *state)
     360             : {
     361             :         struct extent_state *other;
     362             :         struct rb_node *other_node;
     363             : 
     364     1834115 :         if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
     365     1834118 :                 return;
     366             : 
     367      794656 :         other_node = rb_prev(&state->rb_node);
     368      794660 :         if (other_node) {
     369      713642 :                 other = rb_entry(other_node, struct extent_state, rb_node);
     370     1349509 :                 if (other->end == state->start - 1 &&
     371      635867 :                     other->state == state->state) {
     372             :                         merge_cb(tree, state, other);
     373      606314 :                         state->start = other->start;
     374      606314 :                         other->tree = NULL;
     375      606314 :                         rb_erase(&other->rb_node, &tree->state);
     376      606313 :                         free_extent_state(other);
     377             :                 }
     378             :         }
     379      794662 :         other_node = rb_next(&state->rb_node);
     380      794660 :         if (other_node) {
     381      492668 :                 other = rb_entry(other_node, struct extent_state, rb_node);
     382      921487 :                 if (other->start == state->end + 1 &&
     383      428819 :                     other->state == state->state) {
     384             :                         merge_cb(tree, state, other);
     385       26145 :                         state->end = other->end;
     386       26145 :                         other->tree = NULL;
     387       26145 :                         rb_erase(&other->rb_node, &tree->state);
     388       26145 :                         free_extent_state(other);
     389             :                 }
     390             :         }
     391             : }
     392             : 
     393             : static void set_state_cb(struct extent_io_tree *tree,
     394             :                          struct extent_state *state, unsigned long *bits)
     395             : {
     396     1242906 :         if (tree->ops && tree->ops->set_bit_hook)
     397     1083211 :                 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
     398             : }
     399             : 
     400             : static void clear_state_cb(struct extent_io_tree *tree,
     401             :                            struct extent_state *state, unsigned long *bits)
     402             : {
     403      941819 :         if (tree->ops && tree->ops->clear_bit_hook)
     404      858665 :                 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
     405             : }
     406             : 
     407             : static void set_state_bits(struct extent_io_tree *tree,
     408             :                            struct extent_state *state, unsigned long *bits);
     409             : 
     410             : /*
     411             :  * insert an extent_state struct into the tree.  'bits' are set on the
     412             :  * struct before it is inserted.
     413             :  *
     414             :  * This may return -EEXIST if the extent is already there, in which case the
     415             :  * state struct is freed.
     416             :  *
     417             :  * The tree lock is not taken internally.  This is a utility function and
     418             :  * probably isn't what you want to call (see set/clear_extent_bit).
     419             :  */
     420      364809 : static int insert_state(struct extent_io_tree *tree,
     421             :                         struct extent_state *state, u64 start, u64 end,
     422             :                         struct rb_node ***p,
     423             :                         struct rb_node **parent,
     424             :                         unsigned long *bits)
     425             : {
     426             :         struct rb_node *node;
     427             : 
     428      364809 :         if (end < start)
     429           0 :                 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
     430             :                        end, start);
     431      364809 :         state->start = start;
     432      364809 :         state->end = end;
     433             : 
     434      364809 :         set_state_bits(tree, state, bits);
     435             : 
     436      364803 :         node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
     437      364785 :         if (node) {
     438             :                 struct extent_state *found;
     439             :                 found = rb_entry(node, struct extent_state, rb_node);
     440           0 :                 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
     441             :                        "%llu %llu\n",
     442             :                        found->start, found->end, start, end);
     443           0 :                 return -EEXIST;
     444             :         }
     445      364785 :         state->tree = tree;
     446      364785 :         merge_state(tree, state);
     447      364796 :         return 0;
     448             : }
     449             : 
     450             : static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
     451             :                      u64 split)
     452             : {
     453      597916 :         if (tree->ops && tree->ops->split_extent_hook)
     454      597393 :                 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
     455             : }
     456             : 
     457             : /*
     458             :  * split a given extent state struct in two, inserting the preallocated
     459             :  * struct 'prealloc' as the newly created second half.  'split' indicates an
     460             :  * offset inside 'orig' where it should be split.
     461             :  *
     462             :  * Before calling,
     463             :  * the tree has 'orig' at [orig->start, orig->end].  After calling, there
     464             :  * are two extent state structs in the tree:
     465             :  * prealloc: [orig->start, split - 1]
     466             :  * orig: [ split, orig->end ]
     467             :  *
     468             :  * The tree locks are not taken by this function. They need to be held
     469             :  * by the caller.
     470             :  */
     471      597916 : static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
     472             :                        struct extent_state *prealloc, u64 split)
     473             : {
     474             :         struct rb_node *node;
     475             : 
     476             :         split_cb(tree, orig, split);
     477             : 
     478      597918 :         prealloc->start = orig->start;
     479      597918 :         prealloc->end = split - 1;
     480      597918 :         prealloc->state = orig->state;
     481      597918 :         orig->start = split;
     482             : 
     483      597918 :         node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
     484             :                            &prealloc->rb_node, NULL, NULL);
     485      597913 :         if (node) {
     486           0 :                 free_extent_state(prealloc);
     487           0 :                 return -EEXIST;
     488             :         }
     489      597913 :         prealloc->tree = tree;
     490      597913 :         return 0;
     491             : }
     492             : 
     493             : static struct extent_state *next_state(struct extent_state *state)
     494             : {
     495     1273983 :         struct rb_node *next = rb_next(&state->rb_node);
     496     1273983 :         if (next)
     497      798086 :                 return rb_entry(next, struct extent_state, rb_node);
     498             :         else
     499             :                 return NULL;
     500             : }
     501             : 
     502             : /*
     503             :  * utility function to clear some bits in an extent state struct.
     504             :  * it will optionally wake up any one waiting on this state (wake == 1).
     505             :  *
     506             :  * If no bits are set on the state struct after clearing things, the
     507             :  * struct is freed and removed from the tree
     508             :  */
     509     1883638 : static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
     510             :                                             struct extent_state *state,
     511             :                                             unsigned long *bits, int wake)
     512             : {
     513             :         struct extent_state *next;
     514      941819 :         unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
     515             : 
     516      941819 :         if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
     517       51157 :                 u64 range = state->end - state->start + 1;
     518       51157 :                 WARN_ON(range > tree->dirty_bytes);
     519       51157 :                 tree->dirty_bytes -= range;
     520             :         }
     521             :         clear_state_cb(tree, state, bits);
     522      941830 :         state->state &= ~bits_to_clear;
     523      941830 :         if (wake)
     524      834402 :                 wake_up(&state->wq);
     525      941844 :         if (state->state == 0) {
     526             :                 next = next_state(state);
     527      330457 :                 if (state->tree) {
     528      330457 :                         rb_erase(&state->rb_node, &tree->state);
     529      330456 :                         state->tree = NULL;
     530      330456 :                         free_extent_state(state);
     531             :                 } else {
     532           0 :                         WARN_ON(1);
     533             :                 }
     534             :         } else {
     535      611379 :                 merge_state(tree, state);
     536             :                 next = next_state(state);
     537             :         }
     538      941835 :         return next;
     539             : }
     540             : 
     541             : static struct extent_state *
     542             : alloc_extent_state_atomic(struct extent_state *prealloc)
     543             : {
     544      962737 :         if (!prealloc)
     545        2241 :                 prealloc = alloc_extent_state(GFP_ATOMIC);
     546             : 
     547             :         return prealloc;
     548             : }
     549             : 
     550           0 : static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
     551             : {
     552           0 :         btrfs_panic(tree_fs_info(tree), err, "Locking error: "
     553             :                     "Extent tree was modified by another "
     554             :                     "thread while locked.");
     555             : }
     556             : 
     557             : /*
     558             :  * clear some bits on a range in the tree.  This may require splitting
     559             :  * or inserting elements in the tree, so the gfp mask is used to
     560             :  * indicate which allocations or sleeping are allowed.
     561             :  *
     562             :  * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
     563             :  * the given range from the tree regardless of state (ie for truncate).
     564             :  *
     565             :  * the range [start, end] is inclusive.
     566             :  *
     567             :  * This takes the tree lock, and returns 0 on success and < 0 on error.
     568             :  */
     569     1100499 : int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
     570             :                      unsigned long bits, int wake, int delete,
     571             :                      struct extent_state **cached_state,
     572             :                      gfp_t mask)
     573             : {
     574             :         struct extent_state *state;
     575             :         struct extent_state *cached;
     576             :         struct extent_state *prealloc = NULL;
     577             :         struct rb_node *node;
     578             :         u64 last_end;
     579             :         int err;
     580             :         int clear = 0;
     581             : 
     582             :         btrfs_debug_check_extent_io_range(tree, start, end);
     583             : 
     584     1100499 :         if (bits & EXTENT_DELALLOC)
     585      526283 :                 bits |= EXTENT_NORESERVE;
     586             : 
     587     1100499 :         if (delete)
     588      204857 :                 bits |= ~EXTENT_CTLBITS;
     589     1100499 :         bits |= EXTENT_FIRST_DELALLOC;
     590             : 
     591     1100499 :         if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
     592             :                 clear = 1;
     593             : again:
     594     1101537 :         if (!prealloc && (mask & __GFP_WAIT)) {
     595     1069637 :                 prealloc = alloc_extent_state(mask);
     596     1069636 :                 if (!prealloc)
     597             :                         return -ENOMEM;
     598             :         }
     599             : 
     600             :         spin_lock(&tree->lock);
     601     1101623 :         if (cached_state) {
     602      843781 :                 cached = *cached_state;
     603             : 
     604      843781 :                 if (clear) {
     605      778088 :                         *cached_state = NULL;
     606             :                         cached_state = NULL;
     607             :                 }
     608             : 
     609     1665334 :                 if (cached && cached->tree && cached->start <= start &&
     610      821553 :                     cached->end > start) {
     611      821538 :                         if (clear)
     612      775898 :                                 atomic_dec(&cached->refs);
     613             :                         state = cached;
     614      821533 :                         goto hit_next;
     615             :                 }
     616       22243 :                 if (clear)
     617        2184 :                         free_extent_state(cached);
     618             :         }
     619             :         /*
     620             :          * this search will find the extents that end after
     621             :          * our range starts
     622             :          */
     623             :         node = tree_search(tree, start);
     624      280079 :         if (!node)
     625             :                 goto out;
     626      272885 :         state = rb_entry(node, struct extent_state, rb_node);
     627             : hit_next:
     628     1130665 :         if (state->start > end)
     629             :                 goto out;
     630      948435 :         WARN_ON(state->end < start);
     631      948438 :         last_end = state->end;
     632             : 
     633             :         /* the state doesn't have the wanted bits, go ahead */
     634      948438 :         if (!(state->state & bits)) {
     635             :                 state = next_state(state);
     636       26645 :                 goto next;
     637             :         }
     638             : 
     639             :         /*
     640             :          *     | ---- desired range ---- |
     641             :          *  | state | or
     642             :          *  | ------------- state -------------- |
     643             :          *
     644             :          * We need to split the extent we found, and may flip
     645             :          * bits on second half.
     646             :          *
     647             :          * If the extent we found extends past our range, we
     648             :          * just split and search again.  It'll get split again
     649             :          * the next time though.
     650             :          *
     651             :          * If the extent we found is inside our range, we clear
     652             :          * the desired bit on it.
     653             :          */
     654             : 
     655      921794 :         if (state->start < start) {
     656             :                 prealloc = alloc_extent_state_atomic(prealloc);
     657         249 :                 BUG_ON(!prealloc);
     658         249 :                 err = split_state(tree, state, prealloc, start);
     659         249 :                 if (err)
     660           0 :                         extent_io_tree_panic(tree, err);
     661             : 
     662             :                 prealloc = NULL;
     663         249 :                 if (err)
     664             :                         goto out;
     665         249 :                 if (state->end <= end) {
     666         249 :                         state = clear_state_bit(tree, state, &bits, wake);
     667         249 :                         goto next;
     668             :                 }
     669             :                 goto search_again;
     670             :         }
     671             :         /*
     672             :          * | ---- desired range ---- |
     673             :          *                        | state |
     674             :          * We need to split the extent, and clear the bit
     675             :          * on the first half
     676             :          */
     677      921545 :         if (state->start <= end && state->end > end) {
     678             :                 prealloc = alloc_extent_state_atomic(prealloc);
     679         443 :                 BUG_ON(!prealloc);
     680         443 :                 err = split_state(tree, state, prealloc, end + 1);
     681         443 :                 if (err)
     682           0 :                         extent_io_tree_panic(tree, err);
     683             : 
     684         443 :                 if (wake)
     685         189 :                         wake_up(&state->wq);
     686             : 
     687         443 :                 clear_state_bit(tree, prealloc, &bits, wake);
     688             : 
     689             :                 prealloc = NULL;
     690         443 :                 goto out;
     691             :         }
     692             : 
     693      921102 :         state = clear_state_bit(tree, state, &bits, wake);
     694             : next:
     695      947982 :         if (last_end == (u64)-1)
     696             :                 goto out;
     697      947881 :         start = last_end + 1;
     698      984208 :         if (start <= end && state && !need_resched())
     699             :                 goto hit_next;
     700             :         goto search_again;
     701             : 
     702             : out:
     703             :         spin_unlock(&tree->lock);
     704     1100554 :         if (prealloc)
     705     1069252 :                 free_extent_state(prealloc);
     706             : 
     707             :         return 0;
     708             : 
     709             : search_again:
     710      911634 :         if (start > end)
     711             :                 goto out;
     712             :         spin_unlock(&tree->lock);
     713        1038 :         if (mask & __GFP_WAIT)
     714         978 :                 cond_resched();
     715             :         goto again;
     716             : }
     717             : 
     718          56 : static void wait_on_state(struct extent_io_tree *tree,
     719             :                           struct extent_state *state)
     720             :                 __releases(tree->lock)
     721             :                 __acquires(tree->lock)
     722             : {
     723         112 :         DEFINE_WAIT(wait);
     724          56 :         prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
     725             :         spin_unlock(&tree->lock);
     726          56 :         schedule();
     727             :         spin_lock(&tree->lock);
     728          56 :         finish_wait(&state->wq, &wait);
     729          56 : }
     730             : 
     731             : /*
     732             :  * waits for one or more bits to clear on a range in the state tree.
     733             :  * The range [start, end] is inclusive.
     734             :  * The tree lock is taken by this function
     735             :  */
     736          44 : static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
     737             :                             unsigned long bits)
     738             : {
     739             :         struct extent_state *state;
     740             :         struct rb_node *node;
     741             : 
     742             :         btrfs_debug_check_extent_io_range(tree, start, end);
     743             : 
     744             :         spin_lock(&tree->lock);
     745             : again:
     746             :         while (1) {
     747             :                 /*
     748             :                  * this search will find all the extents that end after
     749             :                  * our range starts
     750             :                  */
     751             :                 node = tree_search(tree, start);
     752             : process_node:
     753         132 :                 if (!node)
     754             :                         break;
     755             : 
     756         115 :                 state = rb_entry(node, struct extent_state, rb_node);
     757             : 
     758         115 :                 if (state->start > end)
     759             :                         goto out;
     760             : 
     761         115 :                 if (state->state & bits) {
     762             :                         start = state->start;
     763          56 :                         atomic_inc(&state->refs);
     764          56 :                         wait_on_state(tree, state);
     765          56 :                         free_extent_state(state);
     766          56 :                         goto again;
     767             :                 }
     768          59 :                 start = state->end + 1;
     769             : 
     770          59 :                 if (start > end)
     771             :                         break;
     772             : 
     773          32 :                 if (!cond_resched_lock(&tree->lock)) {
     774          32 :                         node = rb_next(node);
     775          32 :                         goto process_node;
     776             :                 }
     777             :         }
     778             : out:
     779             :         spin_unlock(&tree->lock);
     780          44 : }
     781             : 
     782     2485812 : static void set_state_bits(struct extent_io_tree *tree,
     783             :                            struct extent_state *state,
     784             :                            unsigned long *bits)
     785             : {
     786     1242906 :         unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
     787             : 
     788             :         set_state_cb(tree, state, bits);
     789     1242945 :         if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
     790      126806 :                 u64 range = state->end - state->start + 1;
     791      126806 :                 tree->dirty_bytes += range;
     792             :         }
     793     1242945 :         state->state |= bits_to_set;
     794     1242945 : }
     795             : 
     796     1349025 : static void cache_state(struct extent_state *state,
     797             :                         struct extent_state **cached_ptr)
     798             : {
     799     1349025 :         if (cached_ptr && !(*cached_ptr)) {
     800      835966 :                 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
     801      775868 :                         *cached_ptr = state;
     802      775868 :                         atomic_inc(&state->refs);
     803             :                 }
     804             :         }
     805     1349062 : }
     806             : 
     807             : /*
     808             :  * set some bits on a range in the tree.  This may require allocations or
     809             :  * sleeping, so the gfp mask is used to indicate what is allowed.
     810             :  *
     811             :  * If any of the exclusive bits are set, this will fail with -EEXIST if some
     812             :  * part of the range already has the desired bits set.  The start of the
     813             :  * existing range is returned in failed_start in this case.
     814             :  *
     815             :  * [start, end] is inclusive This takes the tree lock.
     816             :  */
     817             : 
     818             : static int __must_check
     819     1186226 : __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
     820             :                  unsigned long bits, unsigned long exclusive_bits,
     821             :                  u64 *failed_start, struct extent_state **cached_state,
     822             :                  gfp_t mask)
     823             : {
     824             :         struct extent_state *state;
     825             :         struct extent_state *prealloc = NULL;
     826             :         struct rb_node *node;
     827             :         struct rb_node **p;
     828             :         struct rb_node *parent;
     829             :         int err = 0;
     830             :         u64 last_start;
     831             :         u64 last_end;
     832             : 
     833             :         btrfs_debug_check_extent_io_range(tree, start, end);
     834             : 
     835     1186226 :         bits |= EXTENT_FIRST_DELALLOC;
     836             : again:
     837     1209997 :         if (!prealloc && (mask & __GFP_WAIT)) {
     838     1179170 :                 prealloc = alloc_extent_state(mask);
     839     1179180 :                 BUG_ON(!prealloc);
     840             :         }
     841             : 
     842             :         spin_lock(&tree->lock);
     843     1210110 :         if (cached_state && *cached_state) {
     844             :                 state = *cached_state;
     845      140860 :                 if (state->start <= start && state->end > start &&
     846       68966 :                     state->tree) {
     847             :                         node = &state->rb_node;
     848             :                         goto hit_next;
     849             :                 }
     850             :         }
     851             :         /*
     852             :          * this search will find all the extents that end after
     853             :          * our range starts.
     854             :          */
     855     1141153 :         node = tree_search_for_insert(tree, start, &p, &parent);
     856     1141155 :         if (!node) {
     857             :                 prealloc = alloc_extent_state_atomic(prealloc);
     858      307658 :                 BUG_ON(!prealloc);
     859      307658 :                 err = insert_state(tree, prealloc, start, end,
     860             :                                    &p, &parent, &bits);
     861      307644 :                 if (err)
     862           0 :                         extent_io_tree_panic(tree, err);
     863             : 
     864      307644 :                 cache_state(prealloc, cached_state);
     865             :                 prealloc = NULL;
     866      307650 :                 goto out;
     867             :         }
     868      833497 :         state = rb_entry(node, struct extent_state, rb_node);
     869             : hit_next:
     870      929642 :         last_start = state->start;
     871      929642 :         last_end = state->end;
     872             : 
     873             :         /*
     874             :          * | ---- desired range ---- |
     875             :          * | state |
     876             :          *
     877             :          * Just lock what we found and keep going
     878             :          */
     879      929642 :         if (state->start == start && state->end <= end) {
     880      275246 :                 if (state->state & exclusive_bits) {
     881          34 :                         *failed_start = state->start;
     882             :                         err = -EEXIST;
     883          34 :                         goto out;
     884             :                 }
     885             : 
     886      275212 :                 set_state_bits(tree, state, &bits);
     887      275206 :                 cache_state(state, cached_state);
     888      275201 :                 merge_state(tree, state);
     889      275196 :                 if (last_end == (u64)-1)
     890             :                         goto out;
     891      275193 :                 start = last_end + 1;
     892             :                 state = next_state(state);
     893      290770 :                 if (start < end && state && state->start == start &&
     894             :                     !need_resched())
     895             :                         goto hit_next;
     896             :                 goto search_again;
     897             :         }
     898             : 
     899             :         /*
     900             :          *     | ---- desired range ---- |
     901             :          * | state |
     902             :          *   or
     903             :          * | ------------- state -------------- |
     904             :          *
     905             :          * We need to split the extent we found, and may flip bits on
     906             :          * second half.
     907             :          *
     908             :          * If the extent we found extends past our
     909             :          * range, we just split and search again.  It'll get split
     910             :          * again the next time though.
     911             :          *
     912             :          * If the extent we found is inside our range, we set the
     913             :          * desired bit on it.
     914             :          */
     915      654396 :         if (state->start < start) {
     916       44587 :                 if (state->state & exclusive_bits) {
     917           2 :                         *failed_start = start;
     918             :                         err = -EEXIST;
     919           2 :                         goto out;
     920             :                 }
     921             : 
     922             :                 prealloc = alloc_extent_state_atomic(prealloc);
     923       44585 :                 BUG_ON(!prealloc);
     924       44585 :                 err = split_state(tree, state, prealloc, start);
     925       44583 :                 if (err)
     926           0 :                         extent_io_tree_panic(tree, err);
     927             : 
     928             :                 prealloc = NULL;
     929       44583 :                 if (err)
     930             :                         goto out;
     931       44583 :                 if (state->end <= end) {
     932       30306 :                         set_state_bits(tree, state, &bits);
     933       30306 :                         cache_state(state, cached_state);
     934       30306 :                         merge_state(tree, state);
     935       30306 :                         if (last_end == (u64)-1)
     936             :                                 goto out;
     937       30306 :                         start = last_end + 1;
     938             :                         state = next_state(state);
     939       41981 :                         if (start < end && state && state->start == start &&
     940             :                             !need_resched())
     941             :                                 goto hit_next;
     942             :                 }
     943             :                 goto search_again;
     944             :         }
     945             :         /*
     946             :          * | ---- desired range ---- |
     947             :          *     | state | or               | state |
     948             :          *
     949             :          * There's a hole, we need to insert something in it and
     950             :          * ignore the extent we found.
     951             :          */
     952      609809 :         if (state->start > start) {
     953             :                 u64 this_end;
     954       57158 :                 if (end < last_start)
     955             :                         this_end = end;
     956             :                 else
     957        5786 :                         this_end = last_start - 1;
     958             : 
     959             :                 prealloc = alloc_extent_state_atomic(prealloc);
     960       57158 :                 BUG_ON(!prealloc);
     961             : 
     962             :                 /*
     963             :                  * Avoid to free 'prealloc' if it can be merged with
     964             :                  * the later extent.
     965             :                  */
     966       57158 :                 err = insert_state(tree, prealloc, start, this_end,
     967             :                                    NULL, NULL, &bits);
     968       57160 :                 if (err)
     969           0 :                         extent_io_tree_panic(tree, err);
     970             : 
     971       57160 :                 cache_state(prealloc, cached_state);
     972             :                 prealloc = NULL;
     973       57160 :                 start = this_end + 1;
     974       57160 :                 goto search_again;
     975             :         }
     976             :         /*
     977             :          * | ---- desired range ---- |
     978             :          *                        | state |
     979             :          * We need to split the extent, and set the bit
     980             :          * on the first half
     981             :          */
     982      552651 :         if (state->start <= end && state->end > end) {
     983      552652 :                 if (state->state & exclusive_bits) {
     984           8 :                         *failed_start = start;
     985             :                         err = -EEXIST;
     986           8 :                         goto out;
     987             :                 }
     988             : 
     989             :                 prealloc = alloc_extent_state_atomic(prealloc);
     990      552644 :                 BUG_ON(!prealloc);
     991      552644 :                 err = split_state(tree, state, prealloc, end + 1);
     992      552641 :                 if (err)
     993           0 :                         extent_io_tree_panic(tree, err);
     994             : 
     995      552641 :                 set_state_bits(tree, prealloc, &bits);
     996      552638 :                 cache_state(prealloc, cached_state);
     997      552643 :                 merge_state(tree, prealloc);
     998             :                 prealloc = NULL;
     999      552643 :                 goto out;
    1000             :         }
    1001             : 
    1002             :         goto search_again;
    1003             : 
    1004             : out:
    1005             :         spin_unlock(&tree->lock);
    1006     1186218 :         if (prealloc)
    1007      219202 :                 free_extent_state(prealloc);
    1008             : 
    1009     1186240 :         return err;
    1010             : 
    1011             : search_again:
    1012      349751 :         if (start > end)
    1013             :                 goto out;
    1014             :         spin_unlock(&tree->lock);
    1015       23771 :         if (mask & __GFP_WAIT)
    1016       23522 :                 cond_resched();
    1017             :         goto again;
    1018             : }
    1019             : 
    1020       25258 : int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
    1021             :                    unsigned long bits, u64 * failed_start,
    1022             :                    struct extent_state **cached_state, gfp_t mask)
    1023             : {
    1024      722852 :         return __set_extent_bit(tree, start, end, bits, 0, failed_start,
    1025             :                                 cached_state, mask);
    1026             : }
    1027             : 
    1028             : 
    1029             : /**
    1030             :  * convert_extent_bit - convert all bits in a given range from one bit to
    1031             :  *                      another
    1032             :  * @tree:       the io tree to search
    1033             :  * @start:      the start offset in bytes
    1034             :  * @end:        the end offset in bytes (inclusive)
    1035             :  * @bits:       the bits to set in this range
    1036             :  * @clear_bits: the bits to clear in this range
    1037             :  * @cached_state:       state that we're going to cache
    1038             :  * @mask:       the allocation mask
    1039             :  *
    1040             :  * This will go through and set bits for the given range.  If any states exist
    1041             :  * already in this range they are set with the given bit and cleared of the
    1042             :  * clear_bits.  This is only meant to be used by things that are mergeable, ie
    1043             :  * converting from say DELALLOC to DIRTY.  This is not meant to be used with
    1044             :  * boundary bits like LOCK.
    1045             :  */
    1046       20035 : int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
    1047             :                        unsigned long bits, unsigned long clear_bits,
    1048             :                        struct extent_state **cached_state, gfp_t mask)
    1049             : {
    1050             :         struct extent_state *state;
    1051             :         struct extent_state *prealloc = NULL;
    1052             :         struct rb_node *node;
    1053             :         struct rb_node **p;
    1054             :         struct rb_node *parent;
    1055             :         int err = 0;
    1056             :         u64 last_start;
    1057             :         u64 last_end;
    1058             : 
    1059             :         btrfs_debug_check_extent_io_range(tree, start, end);
    1060             : 
    1061             : again:
    1062       20035 :         if (!prealloc && (mask & __GFP_WAIT)) {
    1063       20035 :                 prealloc = alloc_extent_state(mask);
    1064       20035 :                 if (!prealloc)
    1065             :                         return -ENOMEM;
    1066             :         }
    1067             : 
    1068             :         spin_lock(&tree->lock);
    1069       20035 :         if (cached_state && *cached_state) {
    1070             :                 state = *cached_state;
    1071           0 :                 if (state->start <= start && state->end > start &&
    1072           0 :                     state->tree) {
    1073             :                         node = &state->rb_node;
    1074             :                         goto hit_next;
    1075             :                 }
    1076             :         }
    1077             : 
    1078             :         /*
    1079             :          * this search will find all the extents that end after
    1080             :          * our range starts.
    1081             :          */
    1082       20035 :         node = tree_search_for_insert(tree, start, &p, &parent);
    1083       20035 :         if (!node) {
    1084             :                 prealloc = alloc_extent_state_atomic(prealloc);
    1085           0 :                 if (!prealloc) {
    1086             :                         err = -ENOMEM;
    1087             :                         goto out;
    1088             :                 }
    1089           0 :                 err = insert_state(tree, prealloc, start, end,
    1090             :                                    &p, &parent, &bits);
    1091           0 :                 if (err)
    1092           0 :                         extent_io_tree_panic(tree, err);
    1093           0 :                 cache_state(prealloc, cached_state);
    1094             :                 prealloc = NULL;
    1095           0 :                 goto out;
    1096             :         }
    1097       20035 :         state = rb_entry(node, struct extent_state, rb_node);
    1098             : hit_next:
    1099       20035 :         last_start = state->start;
    1100       20035 :         last_end = state->end;
    1101             : 
    1102             :         /*
    1103             :          * | ---- desired range ---- |
    1104             :          * | state |
    1105             :          *
    1106             :          * Just lock what we found and keep going
    1107             :          */
    1108       20035 :         if (state->start == start && state->end <= end) {
    1109       20035 :                 set_state_bits(tree, state, &bits);
    1110       20035 :                 cache_state(state, cached_state);
    1111       20035 :                 state = clear_state_bit(tree, state, &clear_bits, 0);
    1112       20035 :                 if (last_end == (u64)-1)
    1113             :                         goto out;
    1114       20035 :                 start = last_end + 1;
    1115       20035 :                 if (start < end && state && state->start == start &&
    1116             :                     !need_resched())
    1117             :                         goto hit_next;
    1118             :                 goto search_again;
    1119             :         }
    1120             : 
    1121             :         /*
    1122             :          *     | ---- desired range ---- |
    1123             :          * | state |
    1124             :          *   or
    1125             :          * | ------------- state -------------- |
    1126             :          *
    1127             :          * We need to split the extent we found, and may flip bits on
    1128             :          * second half.
    1129             :          *
    1130             :          * If the extent we found extends past our
    1131             :          * range, we just split and search again.  It'll get split
    1132             :          * again the next time though.
    1133             :          *
    1134             :          * If the extent we found is inside our range, we set the
    1135             :          * desired bit on it.
    1136             :          */
    1137           0 :         if (state->start < start) {
    1138             :                 prealloc = alloc_extent_state_atomic(prealloc);
    1139           0 :                 if (!prealloc) {
    1140             :                         err = -ENOMEM;
    1141             :                         goto out;
    1142             :                 }
    1143           0 :                 err = split_state(tree, state, prealloc, start);
    1144           0 :                 if (err)
    1145           0 :                         extent_io_tree_panic(tree, err);
    1146             :                 prealloc = NULL;
    1147           0 :                 if (err)
    1148             :                         goto out;
    1149           0 :                 if (state->end <= end) {
    1150           0 :                         set_state_bits(tree, state, &bits);
    1151           0 :                         cache_state(state, cached_state);
    1152           0 :                         state = clear_state_bit(tree, state, &clear_bits, 0);
    1153           0 :                         if (last_end == (u64)-1)
    1154             :                                 goto out;
    1155           0 :                         start = last_end + 1;
    1156           0 :                         if (start < end && state && state->start == start &&
    1157             :                             !need_resched())
    1158             :                                 goto hit_next;
    1159             :                 }
    1160             :                 goto search_again;
    1161             :         }
    1162             :         /*
    1163             :          * | ---- desired range ---- |
    1164             :          *     | state | or               | state |
    1165             :          *
    1166             :          * There's a hole, we need to insert something in it and
    1167             :          * ignore the extent we found.
    1168             :          */
    1169           0 :         if (state->start > start) {
    1170             :                 u64 this_end;
    1171           0 :                 if (end < last_start)
    1172             :                         this_end = end;
    1173             :                 else
    1174           0 :                         this_end = last_start - 1;
    1175             : 
    1176             :                 prealloc = alloc_extent_state_atomic(prealloc);
    1177           0 :                 if (!prealloc) {
    1178             :                         err = -ENOMEM;
    1179             :                         goto out;
    1180             :                 }
    1181             : 
    1182             :                 /*
    1183             :                  * Avoid to free 'prealloc' if it can be merged with
    1184             :                  * the later extent.
    1185             :                  */
    1186           0 :                 err = insert_state(tree, prealloc, start, this_end,
    1187             :                                    NULL, NULL, &bits);
    1188           0 :                 if (err)
    1189           0 :                         extent_io_tree_panic(tree, err);
    1190           0 :                 cache_state(prealloc, cached_state);
    1191             :                 prealloc = NULL;
    1192           0 :                 start = this_end + 1;
    1193           0 :                 goto search_again;
    1194             :         }
    1195             :         /*
    1196             :          * | ---- desired range ---- |
    1197             :          *                        | state |
    1198             :          * We need to split the extent, and set the bit
    1199             :          * on the first half
    1200             :          */
    1201           0 :         if (state->start <= end && state->end > end) {
    1202             :                 prealloc = alloc_extent_state_atomic(prealloc);
    1203           0 :                 if (!prealloc) {
    1204             :                         err = -ENOMEM;
    1205             :                         goto out;
    1206             :                 }
    1207             : 
    1208           0 :                 err = split_state(tree, state, prealloc, end + 1);
    1209           0 :                 if (err)
    1210           0 :                         extent_io_tree_panic(tree, err);
    1211             : 
    1212           0 :                 set_state_bits(tree, prealloc, &bits);
    1213           0 :                 cache_state(prealloc, cached_state);
    1214           0 :                 clear_state_bit(tree, prealloc, &clear_bits, 0);
    1215             :                 prealloc = NULL;
    1216           0 :                 goto out;
    1217             :         }
    1218             : 
    1219             :         goto search_again;
    1220             : 
    1221             : out:
    1222             :         spin_unlock(&tree->lock);
    1223       20035 :         if (prealloc)
    1224       20035 :                 free_extent_state(prealloc);
    1225             : 
    1226       20035 :         return err;
    1227             : 
    1228             : search_again:
    1229       20035 :         if (start > end)
    1230             :                 goto out;
    1231             :         spin_unlock(&tree->lock);
    1232           0 :         if (mask & __GFP_WAIT)
    1233           0 :                 cond_resched();
    1234             :         goto again;
    1235             : }
    1236             : 
    1237             : /* wrappers around set/clear extent bit */
    1238      126731 : int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
    1239             :                      gfp_t mask)
    1240             : {
    1241      126737 :         return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
    1242             :                               NULL, mask);
    1243             : }
    1244             : 
    1245        3159 : int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
    1246             :                     unsigned long bits, gfp_t mask)
    1247             : {
    1248        3159 :         return set_extent_bit(tree, start, end, bits, NULL,
    1249             :                               NULL, mask);
    1250             : }
    1251             : 
    1252        3025 : int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
    1253             :                       unsigned long bits, gfp_t mask)
    1254             : {
    1255        3025 :         return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
    1256             : }
    1257             : 
    1258      131791 : int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
    1259             :                         struct extent_state **cached_state, gfp_t mask)
    1260             : {
    1261      131792 :         return set_extent_bit(tree, start, end,
    1262             :                               EXTENT_DELALLOC | EXTENT_UPTODATE,
    1263             :                               NULL, cached_state, mask);
    1264             : }
    1265             : 
    1266        1337 : int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
    1267             :                       struct extent_state **cached_state, gfp_t mask)
    1268             : {
    1269        1337 :         return set_extent_bit(tree, start, end,
    1270             :                               EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
    1271             :                               NULL, cached_state, mask);
    1272             : }
    1273             : 
    1274       32303 : int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
    1275             :                        gfp_t mask)
    1276             : {
    1277       32303 :         return clear_extent_bit(tree, start, end,
    1278             :                                 EXTENT_DIRTY | EXTENT_DELALLOC |
    1279             :                                 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
    1280             : }
    1281             : 
    1282        1384 : int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
    1283             :                      gfp_t mask)
    1284             : {
    1285        1384 :         return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
    1286             :                               NULL, mask);
    1287             : }
    1288             : 
    1289         452 : int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
    1290             :                         struct extent_state **cached_state, gfp_t mask)
    1291             : {
    1292         452 :         return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
    1293             :                               cached_state, mask);
    1294             : }
    1295             : 
    1296           0 : int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
    1297             :                           struct extent_state **cached_state, gfp_t mask)
    1298             : {
    1299           0 :         return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
    1300             :                                 cached_state, mask);
    1301             : }
    1302             : 
    1303             : /*
    1304             :  * either insert or lock state struct between start and end use mask to tell
    1305             :  * us if waiting is desired.
    1306             :  */
    1307      463423 : int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
    1308             :                      unsigned long bits, struct extent_state **cached_state)
    1309             : {
    1310             :         int err;
    1311             :         u64 failed_start;
    1312             :         while (1) {
    1313      463467 :                 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
    1314             :                                        EXTENT_LOCKED, &failed_start,
    1315             :                                        cached_state, GFP_NOFS);
    1316      463459 :                 if (err == -EEXIST) {
    1317          44 :                         wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
    1318          44 :                         start = failed_start;
    1319             :                 } else
    1320             :                         break;
    1321          44 :                 WARN_ON(start > end);
    1322             :         }
    1323      463415 :         return err;
    1324             : }
    1325             : 
    1326        2746 : int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
    1327             : {
    1328       90403 :         return lock_extent_bits(tree, start, end, 0, NULL);
    1329             : }
    1330             : 
    1331           3 : int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
    1332             : {
    1333             :         int err;
    1334             :         u64 failed_start;
    1335             : 
    1336           3 :         err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
    1337             :                                &failed_start, NULL, GFP_NOFS);
    1338           3 :         if (err == -EEXIST) {
    1339           0 :                 if (failed_start > start)
    1340           0 :                         clear_extent_bit(tree, start, failed_start - 1,
    1341             :                                          EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
    1342             :                 return 0;
    1343             :         }
    1344             :         return 1;
    1345             : }
    1346             : 
    1347      112703 : int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
    1348             :                          struct extent_state **cached, gfp_t mask)
    1349             : {
    1350      547972 :         return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
    1351             :                                 mask);
    1352             : }
    1353             : 
    1354        2967 : int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
    1355             : {
    1356        2967 :         return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
    1357             :                                 GFP_NOFS);
    1358             : }
    1359             : 
    1360         155 : int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
    1361             : {
    1362         155 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    1363         155 :         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
    1364             :         struct page *page;
    1365             : 
    1366        4572 :         while (index <= end_index) {
    1367        4262 :                 page = find_get_page(inode->i_mapping, index);
    1368        4232 :                 BUG_ON(!page); /* Pages should be in the extent_io_tree */
    1369        4232 :                 clear_page_dirty_for_io(page);
    1370        4250 :                 page_cache_release(page);
    1371        4262 :                 index++;
    1372             :         }
    1373         155 :         return 0;
    1374             : }
    1375             : 
    1376           2 : int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
    1377             : {
    1378           2 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    1379           2 :         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
    1380             :         struct page *page;
    1381             : 
    1382           6 :         while (index <= end_index) {
    1383           2 :                 page = find_get_page(inode->i_mapping, index);
    1384           2 :                 BUG_ON(!page); /* Pages should be in the extent_io_tree */
    1385           2 :                 account_page_redirty(page);
    1386           2 :                 __set_page_dirty_nobuffers(page);
    1387           2 :                 page_cache_release(page);
    1388           2 :                 index++;
    1389             :         }
    1390           2 :         return 0;
    1391             : }
    1392             : 
    1393             : /*
    1394             :  * helper function to set both pages and extents in the tree writeback
    1395             :  */
    1396     1308912 : static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
    1397             : {
    1398     1308912 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    1399     1308912 :         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
    1400             :         struct page *page;
    1401             : 
    1402     2617824 :         while (index <= end_index) {
    1403     1308913 :                 page = find_get_page(tree->mapping, index);
    1404     1308914 :                 BUG_ON(!page); /* Pages should be in the extent_io_tree */
    1405             :                 set_page_writeback(page);
    1406     1308882 :                 page_cache_release(page);
    1407     1308912 :                 index++;
    1408             :         }
    1409     1308911 :         return 0;
    1410             : }
    1411             : 
    1412             : /* find the first state struct with 'bits' set after 'start', and
    1413             :  * return it.  tree->lock must be held.  NULL will returned if
    1414             :  * nothing was found after 'start'
    1415             :  */
    1416             : static struct extent_state *
    1417      123332 : find_first_extent_bit_state(struct extent_io_tree *tree,
    1418             :                             u64 start, unsigned long bits)
    1419             : {
    1420             :         struct rb_node *node;
    1421             :         struct extent_state *state;
    1422             : 
    1423             :         /*
    1424             :          * this search will find all the extents that end after
    1425             :          * our range starts.
    1426             :          */
    1427             :         node = tree_search(tree, start);
    1428      123332 :         if (!node)
    1429             :                 goto out;
    1430             : 
    1431             :         while (1) {
    1432      106257 :                 state = rb_entry(node, struct extent_state, rb_node);
    1433      106257 :                 if (state->end >= start && (state->state & bits))
    1434             :                         return state;
    1435             : 
    1436          66 :                 node = rb_next(node);
    1437          66 :                 if (!node)
    1438             :                         break;
    1439             :         }
    1440             : out:
    1441             :         return NULL;
    1442             : }
    1443             : 
    1444             : /*
    1445             :  * find the first offset in the io tree with 'bits' set. zero is
    1446             :  * returned if we find something, and *start_ret and *end_ret are
    1447             :  * set to reflect the state struct that was found.
    1448             :  *
    1449             :  * If nothing was found, 1 is returned. If found something, return 0.
    1450             :  */
    1451      123332 : int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
    1452             :                           u64 *start_ret, u64 *end_ret, unsigned long bits,
    1453             :                           struct extent_state **cached_state)
    1454             : {
    1455             :         struct extent_state *state;
    1456             :         struct rb_node *n;
    1457             :         int ret = 1;
    1458             : 
    1459             :         spin_lock(&tree->lock);
    1460      123332 :         if (cached_state && *cached_state) {
    1461             :                 state = *cached_state;
    1462           0 :                 if (state->end == start - 1 && state->tree) {
    1463           0 :                         n = rb_next(&state->rb_node);
    1464           0 :                         while (n) {
    1465           0 :                                 state = rb_entry(n, struct extent_state,
    1466             :                                                  rb_node);
    1467           0 :                                 if (state->state & bits)
    1468             :                                         goto got_it;
    1469           0 :                                 n = rb_next(n);
    1470             :                         }
    1471           0 :                         free_extent_state(*cached_state);
    1472           0 :                         *cached_state = NULL;
    1473           0 :                         goto out;
    1474             :                 }
    1475           0 :                 free_extent_state(*cached_state);
    1476           0 :                 *cached_state = NULL;
    1477             :         }
    1478             : 
    1479      123332 :         state = find_first_extent_bit_state(tree, start, bits);
    1480             : got_it:
    1481      123332 :         if (state) {
    1482      106191 :                 cache_state(state, cached_state);
    1483      106191 :                 *start_ret = state->start;
    1484      106191 :                 *end_ret = state->end;
    1485             :                 ret = 0;
    1486             :         }
    1487             : out:
    1488             :         spin_unlock(&tree->lock);
    1489      123332 :         return ret;
    1490             : }
    1491             : 
    1492             : /*
    1493             :  * find a contiguous range of bytes in the file marked as delalloc, not
    1494             :  * more than 'max_bytes'.  start and end are used to return the range,
    1495             :  *
    1496             :  * 1 is returned if we find something, 0 if nothing was in the tree
    1497             :  */
    1498     1312844 : static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
    1499             :                                         u64 *start, u64 *end, u64 max_bytes,
    1500             :                                         struct extent_state **cached_state)
    1501             : {
    1502             :         struct rb_node *node;
    1503             :         struct extent_state *state;
    1504     1312844 :         u64 cur_start = *start;
    1505             :         u64 found = 0;
    1506             :         u64 total_bytes = 0;
    1507             : 
    1508             :         spin_lock(&tree->lock);
    1509             : 
    1510             :         /*
    1511             :          * this search will find all the extents that end after
    1512             :          * our range starts.
    1513             :          */
    1514             :         node = tree_search(tree, cur_start);
    1515     1312861 :         if (!node) {
    1516             :                 if (!found)
    1517           0 :                         *end = (u64)-1;
    1518             :                 goto out;
    1519             :         }
    1520             : 
    1521             :         while (1) {
    1522     1324052 :                 state = rb_entry(node, struct extent_state, rb_node);
    1523     1332647 :                 if (found && (state->start != cur_start ||
    1524        8595 :                               (state->state & EXTENT_BOUNDARY))) {
    1525             :                         goto out;
    1526             :                 }
    1527     1320421 :                 if (!(state->state & EXTENT_DELALLOC)) {
    1528     1290539 :                         if (!found)
    1529     1282989 :                                 *end = state->end;
    1530             :                         goto out;
    1531             :                 }
    1532       29882 :                 if (!found) {
    1533       29879 :                         *start = state->start;
    1534       29879 :                         *cached_state = state;
    1535       29879 :                         atomic_inc(&state->refs);
    1536             :                 }
    1537       29884 :                 found++;
    1538       29884 :                 *end = state->end;
    1539       29884 :                 cur_start = state->end + 1;
    1540       29884 :                 node = rb_next(node);
    1541       29884 :                 total_bytes += state->end - state->start + 1;
    1542       29884 :                 if (total_bytes >= max_bytes)
    1543             :                         break;
    1544       29882 :                 if (!node)
    1545             :                         break;
    1546             :         }
    1547             : out:
    1548             :         spin_unlock(&tree->lock);
    1549     1312840 :         return found;
    1550             : }
    1551             : 
    1552           0 : static noinline void __unlock_for_delalloc(struct inode *inode,
    1553             :                                            struct page *locked_page,
    1554             :                                            u64 start, u64 end)
    1555             : {
    1556             :         int ret;
    1557             :         struct page *pages[16];
    1558           0 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    1559           0 :         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
    1560           0 :         unsigned long nr_pages = end_index - index + 1;
    1561             :         int i;
    1562             : 
    1563           0 :         if (index == locked_page->index && end_index == index)
    1564           0 :                 return;
    1565             : 
    1566           0 :         while (nr_pages > 0) {
    1567           0 :                 ret = find_get_pages_contig(inode->i_mapping, index,
    1568           0 :                                      min_t(unsigned long, nr_pages,
    1569             :                                      ARRAY_SIZE(pages)), pages);
    1570           0 :                 for (i = 0; i < ret; i++) {
    1571           0 :                         if (pages[i] != locked_page)
    1572           0 :                                 unlock_page(pages[i]);
    1573           0 :                         page_cache_release(pages[i]);
    1574             :                 }
    1575           0 :                 nr_pages -= ret;
    1576           0 :                 index += ret;
    1577           0 :                 cond_resched();
    1578             :         }
    1579             : }
    1580             : 
    1581       29879 : static noinline int lock_delalloc_pages(struct inode *inode,
    1582             :                                         struct page *locked_page,
    1583             :                                         u64 delalloc_start,
    1584             :                                         u64 delalloc_end)
    1585             : {
    1586       29879 :         unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
    1587             :         unsigned long start_index = index;
    1588       29879 :         unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
    1589             :         unsigned long pages_locked = 0;
    1590             :         struct page *pages[16];
    1591             :         unsigned long nrpages;
    1592             :         int ret;
    1593             :         int i;
    1594             : 
    1595             :         /* the caller is responsible for locking the start index */
    1596       29879 :         if (index == locked_page->index && index == end_index)
    1597             :                 return 0;
    1598             : 
    1599             :         /* skip the page at the start index */
    1600       14714 :         nrpages = end_index - index + 1;
    1601      115205 :         while (nrpages > 0) {
    1602       85780 :                 ret = find_get_pages_contig(inode->i_mapping, index,
    1603       85780 :                                      min_t(unsigned long,
    1604             :                                      nrpages, ARRAY_SIZE(pages)), pages);
    1605       85779 :                 if (ret == 0) {
    1606             :                         ret = -EAGAIN;
    1607             :                         goto done;
    1608             :                 }
    1609             :                 /* now we have an array of pages, lock them all */
    1610     1300542 :                 for (i = 0; i < ret; i++) {
    1611             :                         /*
    1612             :                          * the caller is taking responsibility for
    1613             :                          * locked_page
    1614             :                          */
    1615     1300538 :                         if (pages[i] != locked_page) {
    1616     1285819 :                                 lock_page(pages[i]);
    1617     3857475 :                                 if (!PageDirty(pages[i]) ||
    1618     1285825 :                                     pages[i]->mapping != inode->i_mapping) {
    1619             :                                         ret = -EAGAIN;
    1620           0 :                                         unlock_page(pages[i]);
    1621           0 :                                         page_cache_release(pages[i]);
    1622           0 :                                         goto done;
    1623             :                                 }
    1624             :                         }
    1625     1300544 :                         page_cache_release(pages[i]);
    1626     1300542 :                         pages_locked++;
    1627             :                 }
    1628       85780 :                 nrpages -= ret;
    1629       85780 :                 index += ret;
    1630       85780 :                 cond_resched();
    1631             :         }
    1632             :         ret = 0;
    1633             : done:
    1634       14714 :         if (ret && pages_locked) {
    1635           0 :                 __unlock_for_delalloc(inode, locked_page,
    1636             :                               delalloc_start,
    1637           0 :                               ((u64)(start_index + pages_locked - 1)) <<
    1638             :                               PAGE_CACHE_SHIFT);
    1639             :         }
    1640       14714 :         return ret;
    1641             : }
    1642             : 
    1643             : /*
    1644             :  * find a contiguous range of bytes in the file marked as delalloc, not
    1645             :  * more than 'max_bytes'.  start and end are used to return the range,
    1646             :  *
    1647             :  * 1 is returned if we find something, 0 if nothing was in the tree
    1648             :  */
    1649     1312842 : STATIC u64 find_lock_delalloc_range(struct inode *inode,
    1650             :                                     struct extent_io_tree *tree,
    1651             :                                     struct page *locked_page, u64 *start,
    1652             :                                     u64 *end, u64 max_bytes)
    1653             : {
    1654             :         u64 delalloc_start;
    1655             :         u64 delalloc_end;
    1656             :         u64 found;
    1657     1312842 :         struct extent_state *cached_state = NULL;
    1658             :         int ret;
    1659             :         int loops = 0;
    1660             : 
    1661             : again:
    1662             :         /* step one, find a bunch of delalloc bytes starting at start */
    1663     1312842 :         delalloc_start = *start;
    1664     1312842 :         delalloc_end = 0;
    1665     1312842 :         found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
    1666             :                                     max_bytes, &cached_state);
    1667     1312835 :         if (!found || delalloc_end <= *start) {
    1668     1282956 :                 *start = delalloc_start;
    1669     1282956 :                 *end = delalloc_end;
    1670     1282956 :                 free_extent_state(cached_state);
    1671     1282987 :                 return 0;
    1672             :         }
    1673             : 
    1674             :         /*
    1675             :          * start comes from the offset of locked_page.  We have to lock
    1676             :          * pages in order, so we can't process delalloc bytes before
    1677             :          * locked_page
    1678             :          */
    1679       29879 :         if (delalloc_start < *start)
    1680          63 :                 delalloc_start = *start;
    1681             : 
    1682             :         /*
    1683             :          * make sure to limit the number of pages we try to lock down
    1684             :          */
    1685       29879 :         if (delalloc_end + 1 - delalloc_start > max_bytes)
    1686           2 :                 delalloc_end = delalloc_start + max_bytes - 1;
    1687             : 
    1688             :         /* step two, lock all the pages after the page that has start */
    1689       29879 :         ret = lock_delalloc_pages(inode, locked_page,
    1690             :                                   delalloc_start, delalloc_end);
    1691       29878 :         if (ret == -EAGAIN) {
    1692             :                 /* some of the pages are gone, lets avoid looping by
    1693             :                  * shortening the size of the delalloc range we're searching
    1694             :                  */
    1695           0 :                 free_extent_state(cached_state);
    1696           0 :                 cached_state = NULL;
    1697           0 :                 if (!loops) {
    1698             :                         max_bytes = PAGE_CACHE_SIZE;
    1699             :                         loops = 1;
    1700             :                         goto again;
    1701             :                 } else {
    1702             :                         found = 0;
    1703             :                         goto out_failed;
    1704             :                 }
    1705             :         }
    1706       29878 :         BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
    1707             : 
    1708             :         /* step three, lock the state bits for the whole range */
    1709       29878 :         lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
    1710             : 
    1711             :         /* then test to make sure it is all still delalloc */
    1712       29880 :         ret = test_range_bit(tree, delalloc_start, delalloc_end,
    1713             :                              EXTENT_DELALLOC, 1, cached_state);
    1714       29881 :         if (!ret) {
    1715           0 :                 unlock_extent_cached(tree, delalloc_start, delalloc_end,
    1716             :                                      &cached_state, GFP_NOFS);
    1717           0 :                 __unlock_for_delalloc(inode, locked_page,
    1718             :                               delalloc_start, delalloc_end);
    1719           0 :                 cond_resched();
    1720           0 :                 goto again;
    1721             :         }
    1722       29881 :         free_extent_state(cached_state);
    1723       29881 :         *start = delalloc_start;
    1724       29881 :         *end = delalloc_end;
    1725             : out_failed:
    1726       29881 :         return found;
    1727             : }
    1728             : 
    1729       30095 : int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
    1730             :                                  struct page *locked_page,
    1731             :                                  unsigned long clear_bits,
    1732             :                                  unsigned long page_ops)
    1733             : {
    1734       30095 :         struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
    1735             :         int ret;
    1736             :         struct page *pages[16];
    1737       30095 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    1738       30095 :         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
    1739       30095 :         unsigned long nr_pages = end_index - index + 1;
    1740             :         int i;
    1741             : 
    1742       30095 :         clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
    1743       30096 :         if (page_ops == 0)
    1744             :                 return 0;
    1745             : 
    1746      131205 :         while (nr_pages > 0) {
    1747      101111 :                 ret = find_get_pages_contig(inode->i_mapping, index,
    1748      101111 :                                      min_t(unsigned long,
    1749             :                                      nr_pages, ARRAY_SIZE(pages)), pages);
    1750     1416776 :                 for (i = 0; i < ret; i++) {
    1751             : 
    1752     1315666 :                         if (page_ops & PAGE_SET_PRIVATE2)
    1753     1308871 :                                 SetPagePrivate2(pages[i]);
    1754             : 
    1755     1315710 :                         if (pages[i] == locked_page) {
    1756       25901 :                                 page_cache_release(pages[i]);
    1757       25901 :                                 continue;
    1758             :                         }
    1759     1289809 :                         if (page_ops & PAGE_CLEAR_DIRTY)
    1760        6793 :                                 clear_page_dirty_for_io(pages[i]);
    1761     1289809 :                         if (page_ops & PAGE_SET_WRITEBACK)
    1762        6793 :                                 set_page_writeback(pages[i]);
    1763     1289809 :                         if (page_ops & PAGE_END_WRITEBACK)
    1764        3851 :                                 end_page_writeback(pages[i]);
    1765     1289809 :                         if (page_ops & PAGE_UNLOCK)
    1766     1289780 :                                 unlock_page(pages[i]);
    1767     1289806 :                         page_cache_release(pages[i]);
    1768             :                 }
    1769      101110 :                 nr_pages -= ret;
    1770      101110 :                 index += ret;
    1771      101110 :                 cond_resched();
    1772             :         }
    1773             :         return 0;
    1774             : }
    1775             : 
    1776             : /*
    1777             :  * count the number of bytes in the tree that have a given bit(s)
    1778             :  * set.  This can be fairly slow, except for EXTENT_DIRTY which is
    1779             :  * cached.  The total number found is returned.
    1780             :  */
    1781       81161 : u64 count_range_bits(struct extent_io_tree *tree,
    1782             :                      u64 *start, u64 search_end, u64 max_bytes,
    1783             :                      unsigned long bits, int contig)
    1784             : {
    1785             :         struct rb_node *node;
    1786             :         struct extent_state *state;
    1787       81161 :         u64 cur_start = *start;
    1788             :         u64 total_bytes = 0;
    1789             :         u64 last = 0;
    1790             :         int found = 0;
    1791             : 
    1792       81161 :         if (WARN_ON(search_end <= cur_start))
    1793             :                 return 0;
    1794             : 
    1795             :         spin_lock(&tree->lock);
    1796       81358 :         if (cur_start == 0 && bits == EXTENT_DIRTY) {
    1797       80670 :                 total_bytes = tree->dirty_bytes;
    1798       80670 :                 goto out;
    1799             :         }
    1800             :         /*
    1801             :          * this search will find all the extents that end after
    1802             :          * our range starts.
    1803             :          */
    1804             :         node = tree_search(tree, cur_start);
    1805         688 :         if (!node)
    1806             :                 goto out;
    1807             : 
    1808             :         while (1) {
    1809             :                 state = rb_entry(node, struct extent_state, rb_node);
    1810        1164 :                 if (state->start > search_end)
    1811             :                         break;
    1812        1087 :                 if (contig && found && state->start > last + 1)
    1813             :                         break;
    1814        1087 :                 if (state->end >= cur_start && (state->state & bits) == bits) {
    1815         146 :                         total_bytes += min(search_end, state->end) + 1 -
    1816          73 :                                        max(cur_start, state->start);
    1817          73 :                         if (total_bytes >= max_bytes)
    1818             :                                 break;
    1819          66 :                         if (!found) {
    1820          66 :                                 *start = max(cur_start, state->start);
    1821             :                                 found = 1;
    1822             :                         }
    1823          66 :                         last = state->end;
    1824        1014 :                 } else if (contig && found) {
    1825             :                         break;
    1826             :                 }
    1827        1021 :                 node = rb_next(node);
    1828        1021 :                 if (!node)
    1829             :                         break;
    1830             :         }
    1831             : out:
    1832             :         spin_unlock(&tree->lock);
    1833       81356 :         return total_bytes;
    1834             : }
    1835             : 
    1836             : /*
    1837             :  * set the private field for a given byte offset in the tree.  If there isn't
    1838             :  * an extent_state there already, this does nothing.
    1839             :  */
    1840           0 : static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
    1841             : {
    1842             :         struct rb_node *node;
    1843             :         struct extent_state *state;
    1844             :         int ret = 0;
    1845             : 
    1846             :         spin_lock(&tree->lock);
    1847             :         /*
    1848             :          * this search will find all the extents that end after
    1849             :          * our range starts.
    1850             :          */
    1851             :         node = tree_search(tree, start);
    1852           0 :         if (!node) {
    1853             :                 ret = -ENOENT;
    1854             :                 goto out;
    1855             :         }
    1856             :         state = rb_entry(node, struct extent_state, rb_node);
    1857           0 :         if (state->start != start) {
    1858             :                 ret = -ENOENT;
    1859             :                 goto out;
    1860             :         }
    1861           0 :         state->private = private;
    1862             : out:
    1863             :         spin_unlock(&tree->lock);
    1864           0 :         return ret;
    1865             : }
    1866             : 
    1867           0 : int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
    1868             : {
    1869             :         struct rb_node *node;
    1870             :         struct extent_state *state;
    1871             :         int ret = 0;
    1872             : 
    1873             :         spin_lock(&tree->lock);
    1874             :         /*
    1875             :          * this search will find all the extents that end after
    1876             :          * our range starts.
    1877             :          */
    1878             :         node = tree_search(tree, start);
    1879           0 :         if (!node) {
    1880             :                 ret = -ENOENT;
    1881             :                 goto out;
    1882             :         }
    1883             :         state = rb_entry(node, struct extent_state, rb_node);
    1884           0 :         if (state->start != start) {
    1885             :                 ret = -ENOENT;
    1886             :                 goto out;
    1887             :         }
    1888           0 :         *private = state->private;
    1889             : out:
    1890             :         spin_unlock(&tree->lock);
    1891           0 :         return ret;
    1892             : }
    1893             : 
    1894             : /*
    1895             :  * searches a range in the state tree for a given mask.
    1896             :  * If 'filled' == 1, this returns 1 only if every extent in the tree
    1897             :  * has the bits set.  Otherwise, 1 is returned if any bit in the
    1898             :  * range is found set.
    1899             :  */
    1900      353484 : int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
    1901             :                    unsigned long bits, int filled, struct extent_state *cached)
    1902             : {
    1903             :         struct extent_state *state = NULL;
    1904             :         struct rb_node *node;
    1905             :         int bitset = 0;
    1906             : 
    1907             :         spin_lock(&tree->lock);
    1908      434797 :         if (cached && cached->tree && cached->start <= start &&
    1909       81299 :             cached->end > start)
    1910       81299 :                 node = &cached->rb_node;
    1911             :         else
    1912             :                 node = tree_search(tree, start);
    1913      353513 :         while (node && start <= end) {
    1914             :                 state = rb_entry(node, struct extent_state, rb_node);
    1915             : 
    1916      348935 :                 if (filled && state->start > start) {
    1917             :                         bitset = 0;
    1918             :                         break;
    1919             :                 }
    1920             : 
    1921      348933 :                 if (state->start > end)
    1922             :                         break;
    1923             : 
    1924      166806 :                 if (state->state & bits) {
    1925             :                         bitset = 1;
    1926       32129 :                         if (!filled)
    1927             :                                 break;
    1928      134677 :                 } else if (filled) {
    1929             :                         bitset = 0;
    1930             :                         break;
    1931             :                 }
    1932             : 
    1933       33752 :                 if (state->end == (u64)-1)
    1934             :                         break;
    1935             : 
    1936       33752 :                 start = state->end + 1;
    1937       33752 :                 if (start > end)
    1938             :                         break;
    1939          17 :                 node = rb_next(node);
    1940          17 :                 if (!node) {
    1941           0 :                         if (filled)
    1942             :                                 bitset = 0;
    1943             :                         break;
    1944             :                 }
    1945             :         }
    1946             :         spin_unlock(&tree->lock);
    1947      353492 :         return bitset;
    1948             : }
    1949             : 
    1950             : /*
    1951             :  * helper function to set a given page up to date if all the
    1952             :  * extents in the tree for that page are up to date
    1953             :  */
    1954         452 : static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
    1955             : {
    1956         452 :         u64 start = page_offset(page);
    1957         452 :         u64 end = start + PAGE_CACHE_SIZE - 1;
    1958         452 :         if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
    1959             :                 SetPageUptodate(page);
    1960         452 : }
    1961             : 
    1962             : /*
    1963             :  * When IO fails, either with EIO or csum verification fails, we
    1964             :  * try other mirrors that might have a good copy of the data.  This
    1965             :  * io_failure_record is used to record state as we go through all the
    1966             :  * mirrors.  If another mirror has good data, the page is set up to date
    1967             :  * and things continue.  If a good mirror can't be found, the original
    1968             :  * bio end_io callback is called to indicate things have failed.
    1969             :  */
    1970             : struct io_failure_record {
    1971             :         struct page *page;
    1972             :         u64 start;
    1973             :         u64 len;
    1974             :         u64 logical;
    1975             :         unsigned long bio_flags;
    1976             :         int this_mirror;
    1977             :         int failed_mirror;
    1978             :         int in_validation;
    1979             : };
    1980             : 
    1981           0 : static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
    1982             :                                 int did_repair)
    1983             : {
    1984             :         int ret;
    1985             :         int err = 0;
    1986           0 :         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
    1987             : 
    1988           0 :         set_state_private(failure_tree, rec->start, 0);
    1989           0 :         ret = clear_extent_bits(failure_tree, rec->start,
    1990           0 :                                 rec->start + rec->len - 1,
    1991             :                                 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
    1992           0 :         if (ret)
    1993             :                 err = ret;
    1994             : 
    1995           0 :         ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
    1996           0 :                                 rec->start + rec->len - 1,
    1997             :                                 EXTENT_DAMAGED, GFP_NOFS);
    1998           0 :         if (ret && !err)
    1999             :                 err = ret;
    2000             : 
    2001           0 :         kfree(rec);
    2002           0 :         return err;
    2003             : }
    2004             : 
    2005             : /*
    2006             :  * this bypasses the standard btrfs submit functions deliberately, as
    2007             :  * the standard behavior is to write all copies in a raid setup. here we only
    2008             :  * want to write the one bad copy. so we do the mapping for ourselves and issue
    2009             :  * submit_bio directly.
    2010             :  * to avoid any synchronization issues, wait for the data after writing, which
    2011             :  * actually prevents the read that triggered the error from finishing.
    2012             :  * currently, there can be no more than two copies of every data bit. thus,
    2013             :  * exactly one rewrite is required.
    2014             :  */
    2015           0 : int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
    2016           0 :                         u64 length, u64 logical, struct page *page,
    2017             :                         int mirror_num)
    2018             : {
    2019             :         struct bio *bio;
    2020             :         struct btrfs_device *dev;
    2021           0 :         u64 map_length = 0;
    2022             :         u64 sector;
    2023           0 :         struct btrfs_bio *bbio = NULL;
    2024           0 :         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
    2025             :         int ret;
    2026             : 
    2027             :         ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
    2028           0 :         BUG_ON(!mirror_num);
    2029             : 
    2030             :         /* we can't repair anything in raid56 yet */
    2031           0 :         if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
    2032             :                 return 0;
    2033             : 
    2034           0 :         bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
    2035           0 :         if (!bio)
    2036             :                 return -EIO;
    2037           0 :         bio->bi_iter.bi_size = 0;
    2038           0 :         map_length = length;
    2039             : 
    2040           0 :         ret = btrfs_map_block(fs_info, WRITE, logical,
    2041             :                               &map_length, &bbio, mirror_num);
    2042           0 :         if (ret) {
    2043           0 :                 bio_put(bio);
    2044           0 :                 return -EIO;
    2045             :         }
    2046           0 :         BUG_ON(mirror_num != bbio->mirror_num);
    2047           0 :         sector = bbio->stripes[mirror_num-1].physical >> 9;
    2048           0 :         bio->bi_iter.bi_sector = sector;
    2049           0 :         dev = bbio->stripes[mirror_num-1].dev;
    2050           0 :         kfree(bbio);
    2051           0 :         if (!dev || !dev->bdev || !dev->writeable) {
    2052           0 :                 bio_put(bio);
    2053           0 :                 return -EIO;
    2054             :         }
    2055           0 :         bio->bi_bdev = dev->bdev;
    2056           0 :         bio_add_page(bio, page, length, start - page_offset(page));
    2057             : 
    2058           0 :         if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
    2059             :                 /* try to remap that extent elsewhere? */
    2060           0 :                 bio_put(bio);
    2061           0 :                 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
    2062           0 :                 return -EIO;
    2063             :         }
    2064             : 
    2065           0 :         printk_ratelimited_in_rcu(KERN_INFO
    2066             :                         "BTRFS: read error corrected: ino %lu off %llu "
    2067             :                     "(dev %s sector %llu)\n", page->mapping->host->i_ino,
    2068             :                     start, rcu_str_deref(dev->name), sector);
    2069             : 
    2070           0 :         bio_put(bio);
    2071           0 :         return 0;
    2072             : }
    2073             : 
    2074           0 : int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
    2075             :                          int mirror_num)
    2076             : {
    2077           0 :         u64 start = eb->start;
    2078           0 :         unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
    2079             :         int ret = 0;
    2080             : 
    2081           0 :         if (root->fs_info->sb->s_flags & MS_RDONLY)
    2082             :                 return -EROFS;
    2083             : 
    2084           0 :         for (i = 0; i < num_pages; i++) {
    2085             :                 struct page *p = extent_buffer_page(eb, i);
    2086           0 :                 ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
    2087             :                                         start, p, mirror_num);
    2088           0 :                 if (ret)
    2089             :                         break;
    2090           0 :                 start += PAGE_CACHE_SIZE;
    2091             :         }
    2092             : 
    2093           0 :         return ret;
    2094             : }
    2095             : 
    2096             : /*
    2097             :  * each time an IO finishes, we do a fast check in the IO failure tree
    2098             :  * to see if we need to process or clean up an io_failure_record
    2099             :  */
    2100       80536 : static int clean_io_failure(u64 start, struct page *page)
    2101             : {
    2102             :         u64 private;
    2103             :         u64 private_failure;
    2104             :         struct io_failure_record *failrec;
    2105       80536 :         struct inode *inode = page->mapping->host;
    2106       80536 :         struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
    2107             :         struct extent_state *state;
    2108             :         int num_copies;
    2109             :         int did_repair = 0;
    2110             :         int ret;
    2111             : 
    2112       80536 :         private = 0;
    2113       80536 :         ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
    2114             :                                 (u64)-1, 1, EXTENT_DIRTY, 0);
    2115       80646 :         if (!ret)
    2116             :                 return 0;
    2117             : 
    2118           0 :         ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
    2119             :                                 &private_failure);
    2120           0 :         if (ret)
    2121             :                 return 0;
    2122             : 
    2123           0 :         failrec = (struct io_failure_record *)(unsigned long) private_failure;
    2124           0 :         BUG_ON(!failrec->this_mirror);
    2125             : 
    2126           0 :         if (failrec->in_validation) {
    2127             :                 /* there was no real error, just free the record */
    2128           0 :                 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
    2129             :                          failrec->start);
    2130             :                 did_repair = 1;
    2131             :                 goto out;
    2132             :         }
    2133           0 :         if (fs_info->sb->s_flags & MS_RDONLY)
    2134             :                 goto out;
    2135             : 
    2136             :         spin_lock(&BTRFS_I(inode)->io_tree.lock);
    2137           0 :         state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
    2138             :                                             failrec->start,
    2139             :                                             EXTENT_LOCKED);
    2140             :         spin_unlock(&BTRFS_I(inode)->io_tree.lock);
    2141             : 
    2142           0 :         if (state && state->start <= failrec->start &&
    2143           0 :             state->end >= failrec->start + failrec->len - 1) {
    2144           0 :                 num_copies = btrfs_num_copies(fs_info, failrec->logical,
    2145             :                                               failrec->len);
    2146           0 :                 if (num_copies > 1)  {
    2147           0 :                         ret = repair_io_failure(fs_info, start, failrec->len,
    2148             :                                                 failrec->logical, page,
    2149             :                                                 failrec->failed_mirror);
    2150             :                         did_repair = !ret;
    2151             :                 }
    2152             :                 ret = 0;
    2153             :         }
    2154             : 
    2155             : out:
    2156           0 :         if (!ret)
    2157           0 :                 ret = free_io_failure(inode, failrec, did_repair);
    2158             : 
    2159           0 :         return ret;
    2160             : }
    2161             : 
    2162             : /*
    2163             :  * this is a generic handler for readpage errors (default
    2164             :  * readpage_io_failed_hook). if other copies exist, read those and write back
    2165             :  * good data to the failed position. does not investigate in remapping the
    2166             :  * failed extent elsewhere, hoping the device will be smart enough to do this as
    2167             :  * needed
    2168             :  */
    2169             : 
    2170           0 : static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
    2171           0 :                               struct page *page, u64 start, u64 end,
    2172             :                               int failed_mirror)
    2173             : {
    2174             :         struct io_failure_record *failrec = NULL;
    2175             :         u64 private;
    2176             :         struct extent_map *em;
    2177           0 :         struct inode *inode = page->mapping->host;
    2178           0 :         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
    2179           0 :         struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
    2180           0 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    2181             :         struct bio *bio;
    2182             :         struct btrfs_io_bio *btrfs_failed_bio;
    2183             :         struct btrfs_io_bio *btrfs_bio;
    2184             :         int num_copies;
    2185             :         int ret;
    2186             :         int read_mode;
    2187             :         u64 logical;
    2188             : 
    2189           0 :         BUG_ON(failed_bio->bi_rw & REQ_WRITE);
    2190             : 
    2191           0 :         ret = get_state_private(failure_tree, start, &private);
    2192           0 :         if (ret) {
    2193           0 :                 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
    2194           0 :                 if (!failrec)
    2195             :                         return -ENOMEM;
    2196           0 :                 failrec->start = start;
    2197           0 :                 failrec->len = end - start + 1;
    2198           0 :                 failrec->this_mirror = 0;
    2199           0 :                 failrec->bio_flags = 0;
    2200           0 :                 failrec->in_validation = 0;
    2201             : 
    2202           0 :                 read_lock(&em_tree->lock);
    2203           0 :                 em = lookup_extent_mapping(em_tree, start, failrec->len);
    2204           0 :                 if (!em) {
    2205             :                         read_unlock(&em_tree->lock);
    2206           0 :                         kfree(failrec);
    2207           0 :                         return -EIO;
    2208             :                 }
    2209             : 
    2210           0 :                 if (em->start > start || em->start + em->len <= start) {
    2211           0 :                         free_extent_map(em);
    2212             :                         em = NULL;
    2213             :                 }
    2214             :                 read_unlock(&em_tree->lock);
    2215             : 
    2216           0 :                 if (!em) {
    2217           0 :                         kfree(failrec);
    2218           0 :                         return -EIO;
    2219             :                 }
    2220           0 :                 logical = start - em->start;
    2221           0 :                 logical = em->block_start + logical;
    2222           0 :                 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
    2223             :                         logical = em->block_start;
    2224           0 :                         failrec->bio_flags = EXTENT_BIO_COMPRESSED;
    2225           0 :                         extent_set_compress_type(&failrec->bio_flags,
    2226           0 :                                                  em->compress_type);
    2227             :                 }
    2228           0 :                 pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
    2229             :                          "len=%llu\n", logical, start, failrec->len);
    2230           0 :                 failrec->logical = logical;
    2231           0 :                 free_extent_map(em);
    2232             : 
    2233             :                 /* set the bits in the private failure tree */
    2234             :                 ret = set_extent_bits(failure_tree, start, end,
    2235             :                                         EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
    2236           0 :                 if (ret >= 0)
    2237           0 :                         ret = set_state_private(failure_tree, start,
    2238             :                                                 (u64)(unsigned long)failrec);
    2239             :                 /* set the bits in the inode's tree */
    2240           0 :                 if (ret >= 0)
    2241             :                         ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
    2242             :                                                 GFP_NOFS);
    2243           0 :                 if (ret < 0) {
    2244           0 :                         kfree(failrec);
    2245           0 :                         return ret;
    2246             :                 }
    2247             :         } else {
    2248           0 :                 failrec = (struct io_failure_record *)(unsigned long)private;
    2249           0 :                 pr_debug("bio_readpage_error: (found) logical=%llu, "
    2250             :                          "start=%llu, len=%llu, validation=%d\n",
    2251             :                          failrec->logical, failrec->start, failrec->len,
    2252             :                          failrec->in_validation);
    2253             :                 /*
    2254             :                  * when data can be on disk more than twice, add to failrec here
    2255             :                  * (e.g. with a list for failed_mirror) to make
    2256             :                  * clean_io_failure() clean all those errors at once.
    2257             :                  */
    2258             :         }
    2259           0 :         num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
    2260             :                                       failrec->logical, failrec->len);
    2261           0 :         if (num_copies == 1) {
    2262             :                 /*
    2263             :                  * we only have a single copy of the data, so don't bother with
    2264             :                  * all the retry and error correction code that follows. no
    2265             :                  * matter what the error is, it is very likely to persist.
    2266             :                  */
    2267           0 :                 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
    2268             :                          num_copies, failrec->this_mirror, failed_mirror);
    2269           0 :                 free_io_failure(inode, failrec, 0);
    2270           0 :                 return -EIO;
    2271             :         }
    2272             : 
    2273             :         /*
    2274             :          * there are two premises:
    2275             :          *      a) deliver good data to the caller
    2276             :          *      b) correct the bad sectors on disk
    2277             :          */
    2278           0 :         if (failed_bio->bi_vcnt > 1) {
    2279             :                 /*
    2280             :                  * to fulfill b), we need to know the exact failing sectors, as
    2281             :                  * we don't want to rewrite any more than the failed ones. thus,
    2282             :                  * we need separate read requests for the failed bio
    2283             :                  *
    2284             :                  * if the following BUG_ON triggers, our validation request got
    2285             :                  * merged. we need separate requests for our algorithm to work.
    2286             :                  */
    2287           0 :                 BUG_ON(failrec->in_validation);
    2288           0 :                 failrec->in_validation = 1;
    2289           0 :                 failrec->this_mirror = failed_mirror;
    2290             :                 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
    2291             :         } else {
    2292             :                 /*
    2293             :                  * we're ready to fulfill a) and b) alongside. get a good copy
    2294             :                  * of the failed sector and if we succeed, we have setup
    2295             :                  * everything for repair_io_failure to do the rest for us.
    2296             :                  */
    2297           0 :                 if (failrec->in_validation) {
    2298           0 :                         BUG_ON(failrec->this_mirror != failed_mirror);
    2299           0 :                         failrec->in_validation = 0;
    2300           0 :                         failrec->this_mirror = 0;
    2301             :                 }
    2302           0 :                 failrec->failed_mirror = failed_mirror;
    2303           0 :                 failrec->this_mirror++;
    2304           0 :                 if (failrec->this_mirror == failed_mirror)
    2305           0 :                         failrec->this_mirror++;
    2306             :                 read_mode = READ_SYNC;
    2307             :         }
    2308             : 
    2309           0 :         if (failrec->this_mirror > num_copies) {
    2310           0 :                 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
    2311             :                          num_copies, failrec->this_mirror, failed_mirror);
    2312           0 :                 free_io_failure(inode, failrec, 0);
    2313           0 :                 return -EIO;
    2314             :         }
    2315             : 
    2316           0 :         bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
    2317           0 :         if (!bio) {
    2318           0 :                 free_io_failure(inode, failrec, 0);
    2319           0 :                 return -EIO;
    2320             :         }
    2321           0 :         bio->bi_end_io = failed_bio->bi_end_io;
    2322           0 :         bio->bi_iter.bi_sector = failrec->logical >> 9;
    2323           0 :         bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
    2324           0 :         bio->bi_iter.bi_size = 0;
    2325             : 
    2326             :         btrfs_failed_bio = btrfs_io_bio(failed_bio);
    2327           0 :         if (btrfs_failed_bio->csum) {
    2328           0 :                 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
    2329           0 :                 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
    2330             : 
    2331             :                 btrfs_bio = btrfs_io_bio(bio);
    2332           0 :                 btrfs_bio->csum = btrfs_bio->csum_inline;
    2333           0 :                 phy_offset >>= inode->i_sb->s_blocksize_bits;
    2334           0 :                 phy_offset *= csum_size;
    2335           0 :                 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
    2336             :                        csum_size);
    2337             :         }
    2338             : 
    2339           0 :         bio_add_page(bio, page, failrec->len, start - page_offset(page));
    2340             : 
    2341           0 :         pr_debug("bio_readpage_error: submitting new read[%#x] to "
    2342             :                  "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
    2343             :                  failrec->this_mirror, num_copies, failrec->in_validation);
    2344             : 
    2345           0 :         ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
    2346             :                                          failrec->this_mirror,
    2347             :                                          failrec->bio_flags, 0);
    2348           0 :         return ret;
    2349             : }
    2350             : 
    2351             : /* lots and lots of room for performance fixes in the end_bio funcs */
    2352             : 
    2353     1308880 : int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
    2354             : {
    2355     1308880 :         int uptodate = (err == 0);
    2356             :         struct extent_io_tree *tree;
    2357             :         int ret = 0;
    2358             : 
    2359     1308880 :         tree = &BTRFS_I(page->mapping->host)->io_tree;
    2360             : 
    2361     1308880 :         if (tree->ops && tree->ops->writepage_end_io_hook) {
    2362     1308880 :                 ret = tree->ops->writepage_end_io_hook(page, start,
    2363             :                                                end, NULL, uptodate);
    2364     1308916 :                 if (ret)
    2365             :                         uptodate = 0;
    2366             :         }
    2367             : 
    2368     1308916 :         if (!uptodate) {
    2369             :                 ClearPageUptodate(page);
    2370             :                 SetPageError(page);
    2371           0 :                 ret = ret < 0 ? ret : -EIO;
    2372           0 :                 mapping_set_error(page->mapping, ret);
    2373             :         }
    2374     1308916 :         return 0;
    2375             : }
    2376             : 
    2377             : /*
    2378             :  * after a writepage IO is done, we need to:
    2379             :  * clear the uptodate bits on error
    2380             :  * clear the writeback bits in the extent tree for this IO
    2381             :  * end_page_writeback if the page has no more pending IO
    2382             :  *
    2383             :  * Scheduling is not allowed, so the extent state tree is expected
    2384             :  * to have one and only one object corresponding to this IO.
    2385             :  */
    2386       65676 : static void end_bio_extent_writepage(struct bio *bio, int err)
    2387             : {
    2388             :         struct bio_vec *bvec;
    2389             :         u64 start;
    2390             :         u64 end;
    2391             :         int i;
    2392             : 
    2393     1374571 :         bio_for_each_segment_all(bvec, bio, i) {
    2394     2617774 :                 struct page *page = bvec->bv_page;
    2395             : 
    2396             :                 /* We always issue full-page reads, but if some block
    2397             :                  * in a page fails to read, blk_update_request() will
    2398             :                  * advance bv_offset and adjust bv_len to compensate.
    2399             :                  * Print a warning for nonzero offsets, and an error
    2400             :                  * if they don't add up to a full page.  */
    2401     1308887 :                 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
    2402           0 :                         if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
    2403           0 :                                 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
    2404             :                                    "partial page write in btrfs with offset %u and length %u",
    2405             :                                         bvec->bv_offset, bvec->bv_len);
    2406             :                         else
    2407           0 :                                 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
    2408             :                                    "incomplete page write in btrfs with offset %u and "
    2409             :                                    "length %u",
    2410             :                                         bvec->bv_offset, bvec->bv_len);
    2411             :                 }
    2412             : 
    2413     1308887 :                 start = page_offset(page);
    2414     1308887 :                 end = start + bvec->bv_offset + bvec->bv_len - 1;
    2415             : 
    2416     1308887 :                 if (end_extent_writepage(page, err, start, end))
    2417           0 :                         continue;
    2418             : 
    2419     1308916 :                 end_page_writeback(page);
    2420             :         }
    2421             : 
    2422       65684 :         bio_put(bio);
    2423       65683 : }
    2424             : 
    2425             : static void
    2426       29590 : endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
    2427             :                               int uptodate)
    2428             : {
    2429       29590 :         struct extent_state *cached = NULL;
    2430       29590 :         u64 end = start + len - 1;
    2431             : 
    2432       29590 :         if (uptodate && tree->track_uptodate)
    2433             :                 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
    2434             :         unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
    2435       29591 : }
    2436             : 
    2437             : /*
    2438             :  * after a readpage IO is done, we need to:
    2439             :  * clear the uptodate bits on error
    2440             :  * set the uptodate bits if things worked
    2441             :  * set the page up to date if all extents in the tree are uptodate
    2442             :  * clear the lock bit in the extent tree
    2443             :  * unlock the page if there are no other extents locked for it
    2444             :  *
    2445             :  * Scheduling is not allowed, so the extent state tree is expected
    2446             :  * to have one and only one object corresponding to this IO.
    2447             :  */
    2448       28580 : static void end_bio_extent_readpage(struct bio *bio, int err)
    2449             : {
    2450             :         struct bio_vec *bvec;
    2451             :         int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
    2452       28580 :         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
    2453             :         struct extent_io_tree *tree;
    2454             :         u64 offset = 0;
    2455             :         u64 start;
    2456             :         u64 end;
    2457             :         u64 len;
    2458             :         u64 extent_start = 0;
    2459             :         u64 extent_len = 0;
    2460             :         int mirror;
    2461             :         int ret;
    2462             :         int i;
    2463             : 
    2464       28580 :         if (err)
    2465             :                 uptodate = 0;
    2466             : 
    2467      109239 :         bio_for_each_segment_all(bvec, bio, i) {
    2468      161282 :                 struct page *page = bvec->bv_page;
    2469      161289 :                 struct inode *inode = page->mapping->host;
    2470             : 
    2471       80660 :                 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
    2472             :                          "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
    2473             :                          io_bio->mirror_num);
    2474       80622 :                 tree = &BTRFS_I(inode)->io_tree;
    2475             : 
    2476             :                 /* We always issue full-page reads, but if some block
    2477             :                  * in a page fails to read, blk_update_request() will
    2478             :                  * advance bv_offset and adjust bv_len to compensate.
    2479             :                  * Print a warning for nonzero offsets, and an error
    2480             :                  * if they don't add up to a full page.  */
    2481       80622 :                 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
    2482           0 :                         if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
    2483           0 :                                 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
    2484             :                                    "partial page read in btrfs with offset %u and length %u",
    2485             :                                         bvec->bv_offset, bvec->bv_len);
    2486             :                         else
    2487           0 :                                 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
    2488             :                                    "incomplete page read in btrfs with offset %u and "
    2489             :                                    "length %u",
    2490             :                                         bvec->bv_offset, bvec->bv_len);
    2491             :                 }
    2492             : 
    2493       80622 :                 start = page_offset(page);
    2494       80622 :                 end = start + bvec->bv_offset + bvec->bv_len - 1;
    2495             :                 len = bvec->bv_len;
    2496             : 
    2497       80622 :                 mirror = io_bio->mirror_num;
    2498       80622 :                 if (likely(uptodate && tree->ops &&
    2499             :                            tree->ops->readpage_end_io_hook)) {
    2500       80631 :                         ret = tree->ops->readpage_end_io_hook(io_bio, offset,
    2501             :                                                               page, start, end,
    2502             :                                                               mirror);
    2503       80537 :                         if (ret)
    2504             :                                 uptodate = 0;
    2505             :                         else
    2506       80538 :                                 clean_io_failure(start, page);
    2507             :                 }
    2508             : 
    2509       80629 :                 if (likely(uptodate))
    2510             :                         goto readpage_ok;
    2511             : 
    2512           0 :                 if (tree->ops && tree->ops->readpage_io_failed_hook) {
    2513           0 :                         ret = tree->ops->readpage_io_failed_hook(page, mirror);
    2514           0 :                         if (!ret && !err &&
    2515             :                             test_bit(BIO_UPTODATE, &bio->bi_flags))
    2516             :                                 uptodate = 1;
    2517             :                 } else {
    2518             :                         /*
    2519             :                          * The generic bio_readpage_error handles errors the
    2520             :                          * following way: If possible, new read requests are
    2521             :                          * created and submitted and will end up in
    2522             :                          * end_bio_extent_readpage as well (if we're lucky, not
    2523             :                          * in the !uptodate case). In that case it returns 0 and
    2524             :                          * we just go on with the next page in our bio. If it
    2525             :                          * can't handle the error it will return -EIO and we
    2526             :                          * remain responsible for that page.
    2527             :                          */
    2528           0 :                         ret = bio_readpage_error(bio, offset, page, start, end,
    2529             :                                                  mirror);
    2530           0 :                         if (ret == 0) {
    2531             :                                 uptodate =
    2532             :                                         test_bit(BIO_UPTODATE, &bio->bi_flags);
    2533           0 :                                 if (err)
    2534             :                                         uptodate = 0;
    2535           0 :                                 offset += len;
    2536           0 :                                 continue;
    2537             :                         }
    2538             :                 }
    2539             : readpage_ok:
    2540       80629 :                 if (likely(uptodate)) {
    2541             :                         loff_t i_size = i_size_read(inode);
    2542       80629 :                         pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    2543             :                         unsigned offset;
    2544             : 
    2545             :                         /* Zero out the end if this page straddles i_size */
    2546       80629 :                         offset = i_size & (PAGE_CACHE_SIZE-1);
    2547       80629 :                         if (page->index == end_index && offset)
    2548             :                                 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    2549             :                         SetPageUptodate(page);
    2550             :                 } else {
    2551             :                         ClearPageUptodate(page);
    2552             :                         SetPageError(page);
    2553             :                 }
    2554       80657 :                 unlock_page(page);
    2555       80656 :                 offset += len;
    2556             : 
    2557       80656 :                 if (unlikely(!uptodate)) {
    2558           0 :                         if (extent_len) {
    2559           0 :                                 endio_readpage_release_extent(tree,
    2560             :                                                               extent_start,
    2561             :                                                               extent_len, 1);
    2562             :                                 extent_start = 0;
    2563             :                                 extent_len = 0;
    2564             :                         }
    2565           0 :                         endio_readpage_release_extent(tree, start,
    2566           0 :                                                       end - start + 1, 0);
    2567       80656 :                 } else if (!extent_len) {
    2568             :                         extent_start = start;
    2569       28578 :                         extent_len = end + 1 - start;
    2570       52078 :                 } else if (extent_start + extent_len == start) {
    2571       51067 :                         extent_len += end + 1 - start;
    2572             :                 } else {
    2573        1011 :                         endio_readpage_release_extent(tree, extent_start,
    2574             :                                                       extent_len, uptodate);
    2575             :                         extent_start = start;
    2576        1011 :                         extent_len = end + 1 - start;
    2577             :                 }
    2578             :         }
    2579             : 
    2580       28579 :         if (extent_len)
    2581       28579 :                 endio_readpage_release_extent(tree, extent_start, extent_len,
    2582             :                                               uptodate);
    2583       28580 :         if (io_bio->end_io)
    2584         375 :                 io_bio->end_io(io_bio, err);
    2585       28580 :         bio_put(bio);
    2586       28580 : }
    2587             : 
    2588             : /*
    2589             :  * this allocates from the btrfs_bioset.  We're returning a bio right now
    2590             :  * but you can call btrfs_io_bio for the appropriate container_of magic
    2591             :  */
    2592             : struct bio *
    2593      122377 : btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
    2594             :                 gfp_t gfp_flags)
    2595             : {
    2596             :         struct btrfs_io_bio *btrfs_bio;
    2597             :         struct bio *bio;
    2598             : 
    2599      122377 :         bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
    2600             : 
    2601      122375 :         if (bio == NULL && (current->flags & PF_MEMALLOC)) {
    2602           1 :                 while (!bio && (nr_vecs /= 2)) {
    2603           0 :                         bio = bio_alloc_bioset(gfp_flags,
    2604             :                                                nr_vecs, btrfs_bioset);
    2605             :                 }
    2606             :         }
    2607             : 
    2608      122376 :         if (bio) {
    2609      122376 :                 bio->bi_bdev = bdev;
    2610      122376 :                 bio->bi_iter.bi_sector = first_sector;
    2611             :                 btrfs_bio = btrfs_io_bio(bio);
    2612      122376 :                 btrfs_bio->csum = NULL;
    2613      122376 :                 btrfs_bio->csum_allocated = NULL;
    2614      122376 :                 btrfs_bio->end_io = NULL;
    2615             :         }
    2616      122376 :         return bio;
    2617             : }
    2618             : 
    2619       90602 : struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
    2620             : {
    2621       90602 :         return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
    2622             : }
    2623             : 
    2624             : 
    2625             : /* this also allocates from the btrfs_bioset */
    2626      183799 : struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
    2627             : {
    2628             :         struct btrfs_io_bio *btrfs_bio;
    2629             :         struct bio *bio;
    2630             : 
    2631      183799 :         bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
    2632      183812 :         if (bio) {
    2633             :                 btrfs_bio = btrfs_io_bio(bio);
    2634      183809 :                 btrfs_bio->csum = NULL;
    2635      183809 :                 btrfs_bio->csum_allocated = NULL;
    2636      183809 :                 btrfs_bio->end_io = NULL;
    2637             :         }
    2638      183812 :         return bio;
    2639             : }
    2640             : 
    2641             : 
    2642      122182 : static int __must_check submit_one_bio(int rw, struct bio *bio,
    2643             :                                        int mirror_num, unsigned long bio_flags)
    2644             : {
    2645             :         int ret = 0;
    2646      122182 :         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
    2647      244364 :         struct page *page = bvec->bv_page;
    2648      122182 :         struct extent_io_tree *tree = bio->bi_private;
    2649             :         u64 start;
    2650             : 
    2651      122182 :         start = page_offset(page) + bvec->bv_offset;
    2652             : 
    2653      122182 :         bio->bi_private = NULL;
    2654             : 
    2655      122182 :         bio_get(bio);
    2656             : 
    2657      122185 :         if (tree->ops && tree->ops->submit_bio_hook)
    2658      122185 :                 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
    2659             :                                            mirror_num, bio_flags, start);
    2660             :         else
    2661           0 :                 btrfsic_submit_bio(rw, bio);
    2662             : 
    2663      122184 :         if (bio_flagged(bio, BIO_EOPNOTSUPP))
    2664             :                 ret = -EOPNOTSUPP;
    2665      122184 :         bio_put(bio);
    2666      122185 :         return ret;
    2667             : }
    2668             : 
    2669     1495689 : static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
    2670             :                      unsigned long offset, size_t size, struct bio *bio,
    2671             :                      unsigned long bio_flags)
    2672             : {
    2673             :         int ret = 0;
    2674     1495689 :         if (tree->ops && tree->ops->merge_bio_hook)
    2675     1495690 :                 ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
    2676             :                                                 bio_flags);
    2677     1495708 :         BUG_ON(ret < 0);
    2678     1495708 :         return ret;
    2679             : 
    2680             : }
    2681             : 
    2682     3062299 : static int submit_extent_page(int rw, struct extent_io_tree *tree,
    2683             :                               struct page *page, sector_t sector,
    2684             :                               size_t size, unsigned long offset,
    2685             :                               struct block_device *bdev,
    2686             :                               struct bio **bio_ret,
    2687             :                               unsigned long max_pages,
    2688             :                               bio_end_io_t end_io_func,
    2689             :                               int mirror_num,
    2690             :                               unsigned long prev_bio_flags,
    2691             :                               unsigned long bio_flags)
    2692             : {
    2693             :         int ret = 0;
    2694             :         struct bio *bio;
    2695             :         int nr;
    2696             :         int contig = 0;
    2697     1566609 :         int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
    2698     1566609 :         int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
    2699     1566609 :         size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
    2700             : 
    2701     1566609 :         if (bio_ret && *bio_ret) {
    2702             :                 bio = *bio_ret;
    2703     1508257 :                 if (old_compressed)
    2704         297 :                         contig = bio->bi_iter.bi_sector == sector;
    2705             :                 else
    2706     1507960 :                         contig = bio_end_sector(bio) == sector;
    2707             : 
    2708     3003965 :                 if (prev_bio_flags != bio_flags || !contig ||
    2709     2943355 :                     merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
    2710     1447672 :                     bio_add_page(bio, page, page_size, offset) < page_size) {
    2711       63824 :                         ret = submit_one_bio(rw, bio, mirror_num,
    2712             :                                              prev_bio_flags);
    2713       63830 :                         if (ret < 0)
    2714             :                                 return ret;
    2715             :                         bio = NULL;
    2716             :                 } else {
    2717             :                         return 0;
    2718             :                 }
    2719             :         }
    2720      122184 :         if (this_compressed)
    2721             :                 nr = BIO_MAX_PAGES;
    2722             :         else
    2723      122142 :                 nr = bio_get_nr_vecs(bdev);
    2724             : 
    2725      122183 :         bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
    2726      122181 :         if (!bio)
    2727             :                 return -ENOMEM;
    2728             : 
    2729      122181 :         bio_add_page(bio, page, page_size, offset);
    2730      122183 :         bio->bi_end_io = end_io_func;
    2731      122183 :         bio->bi_private = tree;
    2732             : 
    2733      122183 :         if (bio_ret)
    2734      122183 :                 *bio_ret = bio;
    2735             :         else
    2736           0 :                 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
    2737             : 
    2738             :         return ret;
    2739             : }
    2740             : 
    2741      192393 : static void attach_extent_buffer_page(struct extent_buffer *eb,
    2742             :                                       struct page *page)
    2743             : {
    2744      192393 :         if (!PagePrivate(page)) {
    2745             :                 SetPagePrivate(page);
    2746      192395 :                 page_cache_get(page);
    2747      192394 :                 set_page_private(page, (unsigned long)eb);
    2748             :         } else {
    2749           0 :                 WARN_ON(page->private != (unsigned long)eb);
    2750             :         }
    2751      192394 : }
    2752             : 
    2753     3158407 : void set_page_extent_mapped(struct page *page)
    2754             : {
    2755     3158407 :         if (!PagePrivate(page)) {
    2756             :                 SetPagePrivate(page);
    2757     1757500 :                 page_cache_get(page);
    2758     1757494 :                 set_page_private(page, EXTENT_PAGE_PRIVATE);
    2759             :         }
    2760     3158408 : }
    2761             : 
    2762             : static struct extent_map *
    2763      462506 : __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
    2764             :                  u64 start, u64 len, get_extent_t *get_extent,
    2765             :                  struct extent_map **em_cached)
    2766             : {
    2767      426247 :         struct extent_map *em;
    2768             : 
    2769      462506 :         if (em_cached && *em_cached) {
    2770             :                 em = *em_cached;
    2771      852494 :                 if (extent_map_in_tree(em) && start >= em->start &&
    2772             :                     start < extent_map_end(em)) {
    2773      418787 :                         atomic_inc(&em->refs);
    2774      418787 :                         return em;
    2775             :                 }
    2776             : 
    2777        7460 :                 free_extent_map(em);
    2778        7460 :                 *em_cached = NULL;
    2779             :         }
    2780             : 
    2781       43719 :         em = get_extent(inode, page, pg_offset, start, len, 0);
    2782       75276 :         if (em_cached && !IS_ERR_OR_NULL(em)) {
    2783       31557 :                 BUG_ON(*em_cached);
    2784       31557 :                 atomic_inc(&em->refs);
    2785       31557 :                 *em_cached = em;
    2786             :         }
    2787       43719 :         return em;
    2788             : }
    2789             : /*
    2790             :  * basic readpage implementation.  Locked extent state structs are inserted
    2791             :  * into the tree that are removed when the IO is done (by the end_io
    2792             :  * handlers)
    2793             :  * XXX JDM: This needs looking at to ensure proper page locking
    2794             :  */
    2795      486456 : static int __do_readpage(struct extent_io_tree *tree,
    2796      486456 :                          struct page *page,
    2797             :                          get_extent_t *get_extent,
    2798             :                          struct extent_map **em_cached,
    2799             :                          struct bio **bio, int mirror_num,
    2800             :                          unsigned long *bio_flags, int rw)
    2801             : {
    2802      972912 :         struct inode *inode = page->mapping->host;
    2803      486456 :         u64 start = page_offset(page);
    2804      486456 :         u64 page_end = start + PAGE_CACHE_SIZE - 1;
    2805             :         u64 end;
    2806             :         u64 cur = start;
    2807             :         u64 extent_offset;
    2808      486456 :         u64 last_byte = i_size_read(inode);
    2809             :         u64 block_start;
    2810             :         u64 cur_end;
    2811             :         sector_t sector;
    2812      462506 :         struct extent_map *em;
    2813             :         struct block_device *bdev;
    2814             :         int ret;
    2815             :         int nr = 0;
    2816      486456 :         int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
    2817             :         size_t pg_offset = 0;
    2818             :         size_t iosize;
    2819             :         size_t disk_io_size;
    2820      486456 :         size_t blocksize = inode->i_sb->s_blocksize;
    2821      486456 :         unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
    2822             : 
    2823      486456 :         set_page_extent_mapped(page);
    2824             : 
    2825             :         end = page_end;
    2826      486456 :         if (!PageUptodate(page)) {
    2827      486456 :                 if (cleancache_get_page(page) == 0) {
    2828           0 :                         BUG_ON(blocksize != PAGE_SIZE);
    2829           0 :                         unlock_extent(tree, start, end);
    2830           0 :                         goto out;
    2831             :                 }
    2832             :         }
    2833             : 
    2834      486456 :         if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
    2835             :                 char *userpage;
    2836        8950 :                 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
    2837             : 
    2838        8950 :                 if (zero_offset) {
    2839        3432 :                         iosize = PAGE_CACHE_SIZE - zero_offset;
    2840             :                         userpage = kmap_atomic(page);
    2841        3432 :                         memset(userpage + zero_offset, 0, iosize);
    2842             :                         flush_dcache_page(page);
    2843             :                         kunmap_atomic(userpage);
    2844             :                 }
    2845             :         }
    2846      948962 :         while (cur <= end) {
    2847             :                 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
    2848             : 
    2849      486456 :                 if (cur >= last_byte) {
    2850             :                         char *userpage;
    2851       23950 :                         struct extent_state *cached = NULL;
    2852             : 
    2853       23950 :                         iosize = PAGE_CACHE_SIZE - pg_offset;
    2854             :                         userpage = kmap_atomic(page);
    2855       23950 :                         memset(userpage + pg_offset, 0, iosize);
    2856             :                         flush_dcache_page(page);
    2857             :                         kunmap_atomic(userpage);
    2858       23950 :                         set_extent_uptodate(tree, cur, cur + iosize - 1,
    2859             :                                             &cached, GFP_NOFS);
    2860       23950 :                         if (!parent_locked)
    2861             :                                 unlock_extent_cached(tree, cur,
    2862             :                                                      cur + iosize - 1,
    2863             :                                                      &cached, GFP_NOFS);
    2864             :                         break;
    2865             :                 }
    2866      462506 :                 em = __get_extent_map(inode, page, pg_offset, cur,
    2867      462506 :                                       end - cur + 1, get_extent, em_cached);
    2868      462506 :                 if (IS_ERR_OR_NULL(em)) {
    2869             :                         SetPageError(page);
    2870           0 :                         if (!parent_locked)
    2871           0 :                                 unlock_extent(tree, cur, end);
    2872             :                         break;
    2873             :                 }
    2874      462506 :                 extent_offset = cur - em->start;
    2875      462506 :                 BUG_ON(extent_map_end(em) <= cur);
    2876      462506 :                 BUG_ON(end < cur);
    2877             : 
    2878      462506 :                 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
    2879         332 :                         this_bio_flag |= EXTENT_BIO_COMPRESSED;
    2880         332 :                         extent_set_compress_type(&this_bio_flag,
    2881         332 :                                                  em->compress_type);
    2882             :                 }
    2883             : 
    2884      462506 :                 iosize = min(extent_map_end(em) - cur, end - cur + 1);
    2885      462506 :                 cur_end = min(extent_map_end(em) - 1, end);
    2886      462506 :                 iosize = ALIGN(iosize, blocksize);
    2887      462506 :                 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
    2888         332 :                         disk_io_size = em->block_len;
    2889         332 :                         sector = em->block_start >> 9;
    2890             :                 } else {
    2891      462174 :                         sector = (em->block_start + extent_offset) >> 9;
    2892             :                         disk_io_size = iosize;
    2893             :                 }
    2894      462506 :                 bdev = em->bdev;
    2895      462506 :                 block_start = em->block_start;
    2896      462506 :                 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
    2897             :                         block_start = EXTENT_MAP_HOLE;
    2898      462506 :                 free_extent_map(em);
    2899             :                 em = NULL;
    2900             : 
    2901             :                 /* we've found a hole, just zero and go on */
    2902      462506 :                 if (block_start == EXTENT_MAP_HOLE) {
    2903             :                         char *userpage;
    2904      381384 :                         struct extent_state *cached = NULL;
    2905             : 
    2906             :                         userpage = kmap_atomic(page);
    2907      381384 :                         memset(userpage + pg_offset, 0, iosize);
    2908             :                         flush_dcache_page(page);
    2909             :                         kunmap_atomic(userpage);
    2910             : 
    2911      381384 :                         set_extent_uptodate(tree, cur, cur + iosize - 1,
    2912             :                                             &cached, GFP_NOFS);
    2913             :                         unlock_extent_cached(tree, cur, cur + iosize - 1,
    2914             :                                              &cached, GFP_NOFS);
    2915             :                         cur = cur + iosize;
    2916      381384 :                         pg_offset += iosize;
    2917      381384 :                         continue;
    2918             :                 }
    2919             :                 /* the get_extent function already copied into the page */
    2920       81122 :                 if (test_range_bit(tree, cur, cur_end,
    2921             :                                    EXTENT_UPTODATE, 1, NULL)) {
    2922         452 :                         check_page_uptodate(tree, page);
    2923         452 :                         if (!parent_locked)
    2924         452 :                                 unlock_extent(tree, cur, cur + iosize - 1);
    2925         452 :                         cur = cur + iosize;
    2926         452 :                         pg_offset += iosize;
    2927         452 :                         continue;
    2928             :                 }
    2929             :                 /* we have an inline extent but it didn't get marked up
    2930             :                  * to date.  Error out
    2931             :                  */
    2932       80670 :                 if (block_start == EXTENT_MAP_INLINE) {
    2933             :                         SetPageError(page);
    2934           0 :                         if (!parent_locked)
    2935           0 :                                 unlock_extent(tree, cur, cur + iosize - 1);
    2936           0 :                         cur = cur + iosize;
    2937           0 :                         pg_offset += iosize;
    2938           0 :                         continue;
    2939             :                 }
    2940             : 
    2941             :                 pnr -= page->index;
    2942       80670 :                 ret = submit_extent_page(rw, tree, page,
    2943             :                                          sector, disk_io_size, pg_offset,
    2944             :                                          bdev, bio, pnr,
    2945             :                                          end_bio_extent_readpage, mirror_num,
    2946             :                                          *bio_flags,
    2947             :                                          this_bio_flag);
    2948       80670 :                 if (!ret) {
    2949       80670 :                         nr++;
    2950       80670 :                         *bio_flags = this_bio_flag;
    2951             :                 } else {
    2952             :                         SetPageError(page);
    2953           0 :                         if (!parent_locked)
    2954           0 :                                 unlock_extent(tree, cur, cur + iosize - 1);
    2955             :                 }
    2956       80670 :                 cur = cur + iosize;
    2957       80670 :                 pg_offset += iosize;
    2958             :         }
    2959             : out:
    2960      486456 :         if (!nr) {
    2961      405786 :                 if (!PageError(page))
    2962             :                         SetPageUptodate(page);
    2963      405786 :                 unlock_page(page);
    2964             :         }
    2965      486456 :         return 0;
    2966             : }
    2967             : 
    2968       51609 : static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
    2969             :                                              struct page *pages[], int nr_pages,
    2970             :                                              u64 start, u64 end,
    2971             :                                              get_extent_t *get_extent,
    2972             :                                              struct extent_map **em_cached,
    2973             :                                              struct bio **bio, int mirror_num,
    2974             :                                              unsigned long *bio_flags, int rw)
    2975             : {
    2976             :         struct inode *inode;
    2977             :         struct btrfs_ordered_extent *ordered;
    2978             :         int index;
    2979             : 
    2980       51609 :         inode = pages[0]->mapping->host;
    2981             :         while (1) {
    2982             :                 lock_extent(tree, start, end);
    2983       51609 :                 ordered = btrfs_lookup_ordered_range(inode, start,
    2984       51609 :                                                      end - start + 1);
    2985       51609 :                 if (!ordered)
    2986             :                         break;
    2987           0 :                 unlock_extent(tree, start, end);
    2988           0 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    2989           0 :                 btrfs_put_ordered_extent(ordered);
    2990           0 :         }
    2991             : 
    2992      450344 :         for (index = 0; index < nr_pages; index++) {
    2993      450344 :                 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
    2994             :                               mirror_num, bio_flags, rw);
    2995      450344 :                 page_cache_release(pages[index]);
    2996             :         }
    2997       51609 : }
    2998             : 
    2999       49223 : static void __extent_readpages(struct extent_io_tree *tree,
    3000             :                                struct page *pages[],
    3001             :                                int nr_pages, get_extent_t *get_extent,
    3002             :                                struct extent_map **em_cached,
    3003             :                                struct bio **bio, int mirror_num,
    3004             :                                unsigned long *bio_flags, int rw)
    3005             : {
    3006             :         u64 start = 0;
    3007             :         u64 end = 0;
    3008             :         u64 page_start;
    3009             :         int index;
    3010             :         int first_index = 0;
    3011             : 
    3012      499567 :         for (index = 0; index < nr_pages; index++) {
    3013      900688 :                 page_start = page_offset(pages[index]);
    3014      450344 :                 if (!end) {
    3015             :                         start = page_start;
    3016       49223 :                         end = start + PAGE_CACHE_SIZE - 1;
    3017             :                         first_index = index;
    3018      401121 :                 } else if (end + 1 == page_start) {
    3019      398735 :                         end += PAGE_CACHE_SIZE;
    3020             :                 } else {
    3021        2386 :                         __do_contiguous_readpages(tree, &pages[first_index],
    3022             :                                                   index - first_index, start,
    3023             :                                                   end, get_extent, em_cached,
    3024             :                                                   bio, mirror_num, bio_flags,
    3025             :                                                   rw);
    3026             :                         start = page_start;
    3027        2386 :                         end = start + PAGE_CACHE_SIZE - 1;
    3028             :                         first_index = index;
    3029             :                 }
    3030             :         }
    3031             : 
    3032       49223 :         if (end)
    3033       49223 :                 __do_contiguous_readpages(tree, &pages[first_index],
    3034             :                                           index - first_index, start,
    3035             :                                           end, get_extent, em_cached, bio,
    3036             :                                           mirror_num, bio_flags, rw);
    3037       49223 : }
    3038             : 
    3039       36048 : static int __extent_read_full_page(struct extent_io_tree *tree,
    3040       36048 :                                    struct page *page,
    3041             :                                    get_extent_t *get_extent,
    3042             :                                    struct bio **bio, int mirror_num,
    3043             :                                    unsigned long *bio_flags, int rw)
    3044             : {
    3045       36048 :         struct inode *inode = page->mapping->host;
    3046             :         struct btrfs_ordered_extent *ordered;
    3047       36048 :         u64 start = page_offset(page);
    3048       36048 :         u64 end = start + PAGE_CACHE_SIZE - 1;
    3049             :         int ret;
    3050             : 
    3051             :         while (1) {
    3052             :                 lock_extent(tree, start, end);
    3053       36048 :                 ordered = btrfs_lookup_ordered_extent(inode, start);
    3054       36048 :                 if (!ordered)
    3055             :                         break;
    3056           0 :                 unlock_extent(tree, start, end);
    3057           0 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    3058           0 :                 btrfs_put_ordered_extent(ordered);
    3059           0 :         }
    3060             : 
    3061       36048 :         ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
    3062             :                             bio_flags, rw);
    3063       36048 :         return ret;
    3064             : }
    3065             : 
    3066       27771 : int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
    3067             :                             get_extent_t *get_extent, int mirror_num)
    3068             : {
    3069       27771 :         struct bio *bio = NULL;
    3070       27771 :         unsigned long bio_flags = 0;
    3071             :         int ret;
    3072             : 
    3073       27771 :         ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
    3074             :                                       &bio_flags, READ);
    3075       27771 :         if (bio)
    3076         969 :                 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
    3077       27771 :         return ret;
    3078             : }
    3079             : 
    3080          64 : int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
    3081             :                                  get_extent_t *get_extent, int mirror_num)
    3082             : {
    3083          64 :         struct bio *bio = NULL;
    3084          64 :         unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
    3085             :         int ret;
    3086             : 
    3087          64 :         ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
    3088             :                                       &bio_flags, READ);
    3089          64 :         if (bio)
    3090          64 :                 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
    3091          64 :         return ret;
    3092             : }
    3093             : 
    3094     1485946 : static noinline void update_nr_written(struct page *page,
    3095             :                                       struct writeback_control *wbc,
    3096             :                                       unsigned long nr_written)
    3097             : {
    3098     1485946 :         wbc->nr_to_write -= nr_written;
    3099     2418637 :         if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
    3100     1722071 :             wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
    3101     1342513 :                 page->mapping->writeback_index = page->index + nr_written;
    3102     1485946 : }
    3103             : 
    3104             : /*
    3105             :  * helper for __extent_writepage, doing all of the delayed allocation setup.
    3106             :  *
    3107             :  * This returns 1 if our fill_delalloc function did all the work required
    3108             :  * to write the page (copy into inline extent).  In this case the IO has
    3109             :  * been started and the page is already unlocked.
    3110             :  *
    3111             :  * This returns 0 if all went well (page still locked)
    3112             :  * This returns < 0 if there were errors (page still locked)
    3113             :  */
    3114     1312952 : static noinline_for_stack int writepage_delalloc(struct inode *inode,
    3115             :                               struct page *page, struct writeback_control *wbc,
    3116             :                               struct extent_page_data *epd,
    3117             :                               u64 delalloc_start,
    3118             :                               unsigned long *nr_written)
    3119             : {
    3120     1312952 :         struct extent_io_tree *tree = epd->tree;
    3121     1312952 :         u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
    3122             :         u64 nr_delalloc;
    3123             :         u64 delalloc_to_write = 0;
    3124     1312952 :         u64 delalloc_end = 0;
    3125             :         int ret;
    3126     1312952 :         int page_started = 0;
    3127             : 
    3128     1312952 :         if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
    3129             :                 return 0;
    3130             : 
    3131     2625685 :         while (delalloc_end < page_end) {
    3132     1312843 :                 nr_delalloc = find_lock_delalloc_range(inode, tree,
    3133             :                                                page,
    3134             :                                                &delalloc_start,
    3135             :                                                &delalloc_end,
    3136             :                                                128 * 1024 * 1024);
    3137     1312839 :                 if (nr_delalloc == 0) {
    3138     1282958 :                         delalloc_start = delalloc_end + 1;
    3139     1282958 :                         continue;
    3140             :                 }
    3141       29881 :                 ret = tree->ops->fill_delalloc(inode, page,
    3142             :                                                delalloc_start,
    3143             :                                                delalloc_end,
    3144             :                                                &page_started,
    3145             :                                                nr_written);
    3146             :                 /* File system has been set read-only */
    3147       29880 :                 if (ret) {
    3148             :                         SetPageError(page);
    3149             :                         /* fill_delalloc should be return < 0 for error
    3150             :                          * but just in case, we use > 0 here meaning the
    3151             :                          * IO is started, so we don't want to return > 0
    3152             :                          * unless things are going well.
    3153             :                          */
    3154           1 :                         ret = ret < 0 ? ret : -EIO;
    3155             :                         goto done;
    3156             :                 }
    3157             :                 /*
    3158             :                  * delalloc_end is already one less than the total
    3159             :                  * length, so we don't subtract one from
    3160             :                  * PAGE_CACHE_SIZE
    3161             :                  */
    3162       59760 :                 delalloc_to_write += (delalloc_end - delalloc_start +
    3163       29880 :                                       PAGE_CACHE_SIZE) >>
    3164             :                                       PAGE_CACHE_SHIFT;
    3165       29880 :                 delalloc_start = delalloc_end + 1;
    3166             :         }
    3167     1312842 :         if (wbc->nr_to_write < delalloc_to_write) {
    3168             :                 int thresh = 8192;
    3169             : 
    3170           3 :                 if (delalloc_to_write < thresh * 2)
    3171           1 :                         thresh = delalloc_to_write;
    3172           3 :                 wbc->nr_to_write = min_t(u64, delalloc_to_write,
    3173             :                                          thresh);
    3174             :         }
    3175             : 
    3176             :         /* did the fill delalloc function already unlock and start
    3177             :          * the IO?
    3178             :          */
    3179     1312842 :         if (page_started) {
    3180             :                 /*
    3181             :                  * we've unlocked the page, so we can't update
    3182             :                  * the mapping's writeback index, just update
    3183             :                  * nr_to_write.
    3184             :                  */
    3185        4059 :                 wbc->nr_to_write -= *nr_written;
    3186             :                 return 1;
    3187             :         }
    3188             : 
    3189             :         ret = 0;
    3190             : 
    3191             : done:
    3192             :         return ret;
    3193             : }
    3194             : 
    3195             : /*
    3196             :  * helper for __extent_writepage.  This calls the writepage start hooks,
    3197             :  * and does the loop to map the page into extents and bios.
    3198             :  *
    3199             :  * We return 1 if the IO is started and the page is unlocked,
    3200             :  * 0 if all went well (page still locked)
    3201             :  * < 0 if there were errors (page still locked)
    3202             :  */
    3203     1308893 : static noinline_for_stack int __extent_writepage_io(struct inode *inode,
    3204     1308893 :                                  struct page *page,
    3205             :                                  struct writeback_control *wbc,
    3206             :                                  struct extent_page_data *epd,
    3207             :                                  loff_t i_size,
    3208             :                                  unsigned long nr_written,
    3209             :                                  int write_flags, int *nr_ret)
    3210             : {
    3211     1308893 :         struct extent_io_tree *tree = epd->tree;
    3212     1308893 :         u64 start = page_offset(page);
    3213     1308893 :         u64 page_end = start + PAGE_CACHE_SIZE - 1;
    3214             :         u64 end;
    3215             :         u64 cur = start;
    3216             :         u64 extent_offset;
    3217             :         u64 block_start;
    3218             :         u64 iosize;
    3219             :         sector_t sector;
    3220             :         struct extent_state *cached_state = NULL;
    3221     1308916 :         struct extent_map *em;
    3222             :         struct block_device *bdev;
    3223             :         size_t pg_offset = 0;
    3224             :         size_t blocksize;
    3225             :         int ret = 0;
    3226             :         int nr = 0;
    3227             :         bool compressed;
    3228             : 
    3229     1308893 :         if (tree->ops && tree->ops->writepage_start_hook) {
    3230     1308894 :                 ret = tree->ops->writepage_start_hook(page, start,
    3231             :                                                       page_end);
    3232     1308916 :                 if (ret) {
    3233             :                         /* Fixup worker will requeue */
    3234           0 :                         if (ret == -EBUSY)
    3235           0 :                                 wbc->pages_skipped++;
    3236             :                         else
    3237           0 :                                 redirty_page_for_writepage(wbc, page);
    3238             : 
    3239           0 :                         update_nr_written(page, wbc, nr_written);
    3240           0 :                         unlock_page(page);
    3241             :                         ret = 1;
    3242           0 :                         goto done_unlocked;
    3243             :                 }
    3244             :         }
    3245             : 
    3246             :         /*
    3247             :          * we don't want to touch the inode after unlocking the page,
    3248             :          * so we update the mapping writeback index now
    3249             :          */
    3250     1308915 :         update_nr_written(page, wbc, nr_written + 1);
    3251             : 
    3252             :         end = page_end;
    3253     1308915 :         if (i_size <= start) {
    3254           0 :                 if (tree->ops && tree->ops->writepage_end_io_hook)
    3255           0 :                         tree->ops->writepage_end_io_hook(page, start,
    3256             :                                                          page_end, NULL, 1);
    3257             :                 goto done;
    3258             :         }
    3259             : 
    3260     1308915 :         blocksize = inode->i_sb->s_blocksize;
    3261             : 
    3262     3926720 :         while (cur <= end) {
    3263             :                 u64 em_end;
    3264     1308915 :                 if (cur >= i_size) {
    3265           0 :                         if (tree->ops && tree->ops->writepage_end_io_hook)
    3266           0 :                                 tree->ops->writepage_end_io_hook(page, cur,
    3267             :                                                          page_end, NULL, 1);
    3268             :                         break;
    3269             :                 }
    3270     2617830 :                 em = epd->get_extent(inode, page, pg_offset, cur,
    3271     1308915 :                                      end - cur + 1, 1);
    3272     1308916 :                 if (IS_ERR_OR_NULL(em)) {
    3273             :                         SetPageError(page);
    3274             :                         ret = PTR_ERR_OR_ZERO(em);
    3275           0 :                         break;
    3276             :                 }
    3277             : 
    3278     1308916 :                 extent_offset = cur - em->start;
    3279             :                 em_end = extent_map_end(em);
    3280     1308916 :                 BUG_ON(em_end <= cur);
    3281     1308916 :                 BUG_ON(end < cur);
    3282     1308916 :                 iosize = min(em_end - cur, end - cur + 1);
    3283     1308916 :                 iosize = ALIGN(iosize, blocksize);
    3284     1308916 :                 sector = (em->block_start + extent_offset) >> 9;
    3285     1308916 :                 bdev = em->bdev;
    3286             :                 block_start = em->block_start;
    3287             :                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
    3288     1308916 :                 free_extent_map(em);
    3289             :                 em = NULL;
    3290             : 
    3291             :                 /*
    3292             :                  * compressed and inline extents are written through other
    3293             :                  * paths in the FS
    3294             :                  */
    3295     1308913 :                 if (compressed || block_start == EXTENT_MAP_HOLE ||
    3296             :                     block_start == EXTENT_MAP_INLINE) {
    3297             :                         /*
    3298             :                          * end_io notification does not happen here for
    3299             :                          * compressed extents
    3300             :                          */
    3301           0 :                         if (!compressed && tree->ops &&
    3302           0 :                             tree->ops->writepage_end_io_hook)
    3303           0 :                                 tree->ops->writepage_end_io_hook(page, cur,
    3304           0 :                                                          cur + iosize - 1,
    3305             :                                                          NULL, 1);
    3306           0 :                         else if (compressed) {
    3307             :                                 /* we don't want to end_page_writeback on
    3308             :                                  * a compressed extent.  this happens
    3309             :                                  * elsewhere
    3310             :                                  */
    3311           0 :                                 nr++;
    3312             :                         }
    3313             : 
    3314           0 :                         cur += iosize;
    3315           0 :                         pg_offset += iosize;
    3316           0 :                         continue;
    3317             :                 }
    3318             : 
    3319     1308913 :                 if (tree->ops && tree->ops->writepage_io_hook) {
    3320           0 :                         ret = tree->ops->writepage_io_hook(page, cur,
    3321           0 :                                                 cur + iosize - 1);
    3322             :                 } else {
    3323             :                         ret = 0;
    3324             :                 }
    3325     1308913 :                 if (ret) {
    3326             :                         SetPageError(page);
    3327             :                 } else {
    3328             :                         unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
    3329             : 
    3330     1308913 :                         set_range_writeback(tree, cur, cur + iosize - 1);
    3331     1308912 :                         if (!PageWriteback(page)) {
    3332           0 :                                 btrfs_err(BTRFS_I(inode)->root->fs_info,
    3333             :                                            "page %lu not writeback, cur %llu end %llu",
    3334             :                                        page->index, cur, end);
    3335             :                         }
    3336             : 
    3337     1308912 :                         ret = submit_extent_page(write_flags, tree, page,
    3338             :                                                  sector, iosize, pg_offset,
    3339             :                                                  bdev, &epd->bio, max_nr,
    3340             :                                                  end_bio_extent_writepage,
    3341             :                                                  0, 0, 0);
    3342     1308890 :                         if (ret)
    3343             :                                 SetPageError(page);
    3344             :                 }
    3345     1308890 :                 cur = cur + iosize;
    3346     1308890 :                 pg_offset += iosize;
    3347     1308890 :                 nr++;
    3348             :         }
    3349             : done:
    3350     1308889 :         *nr_ret = nr;
    3351             : 
    3352             : done_unlocked:
    3353             : 
    3354             :         /* drop our reference on any cached states */
    3355     1308889 :         free_extent_state(cached_state);
    3356     1308888 :         return ret;
    3357             : }
    3358             : 
    3359             : /*
    3360             :  * the writepage semantics are similar to regular writepage.  extent
    3361             :  * records are inserted to lock ranges in the tree, and as dirty areas
    3362             :  * are found, they are marked writeback.  Then the lock bits are removed
    3363             :  * and the end_io handler clears the writeback ranges
    3364             :  */
    3365     2625928 : static int __extent_writepage(struct page *page, struct writeback_control *wbc,
    3366             :                               void *data)
    3367             : {
    3368     2625928 :         struct inode *inode = page->mapping->host;
    3369             :         struct extent_page_data *epd = data;
    3370     1312964 :         u64 start = page_offset(page);
    3371     1312964 :         u64 page_end = start + PAGE_CACHE_SIZE - 1;
    3372             :         int ret;
    3373     1312964 :         int nr = 0;
    3374             :         size_t pg_offset = 0;
    3375             :         loff_t i_size = i_size_read(inode);
    3376     1312964 :         unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
    3377             :         int write_flags;
    3378     1312964 :         unsigned long nr_written = 0;
    3379             : 
    3380     1312964 :         if (wbc->sync_mode == WB_SYNC_ALL)
    3381             :                 write_flags = WRITE_SYNC;
    3382             :         else
    3383             :                 write_flags = WRITE;
    3384             : 
    3385     1312964 :         trace___extent_writepage(page, inode, wbc);
    3386             : 
    3387     1312965 :         WARN_ON(!PageLocked(page));
    3388             : 
    3389             :         ClearPageError(page);
    3390             : 
    3391     1312965 :         pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
    3392     1312965 :         if (page->index > end_index ||
    3393       10355 :            (page->index == end_index && !pg_offset)) {
    3394           4 :                 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
    3395           3 :                 unlock_page(page);
    3396           3 :                 return 0;
    3397             :         }
    3398             : 
    3399     1312961 :         if (page->index == end_index) {
    3400             :                 char *userpage;
    3401             : 
    3402             :                 userpage = kmap_atomic(page);
    3403       10355 :                 memset(userpage + pg_offset, 0,
    3404             :                        PAGE_CACHE_SIZE - pg_offset);
    3405             :                 kunmap_atomic(userpage);
    3406             :                 flush_dcache_page(page);
    3407             :         }
    3408             : 
    3409             :         pg_offset = 0;
    3410             : 
    3411     1312961 :         set_page_extent_mapped(page);
    3412             : 
    3413     1312952 :         ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
    3414     1312951 :         if (ret == 1)
    3415             :                 goto done_unlocked;
    3416     1308893 :         if (ret)
    3417             :                 goto done;
    3418             : 
    3419     1308893 :         ret = __extent_writepage_io(inode, page, wbc, epd,
    3420             :                                     i_size, nr_written, write_flags, &nr);
    3421     1308889 :         if (ret == 1)
    3422             :                 goto done_unlocked;
    3423             : 
    3424             : done:
    3425     1308891 :         if (nr == 0) {
    3426             :                 /* make sure the mapping tag for page dirty gets cleared */
    3427             :                 set_page_writeback(page);
    3428           0 :                 end_page_writeback(page);
    3429             :         }
    3430     1308891 :         if (PageError(page)) {
    3431           0 :                 ret = ret < 0 ? ret : -EIO;
    3432           0 :                 end_extent_writepage(page, ret, start, page_end);
    3433             :         }
    3434     1308891 :         unlock_page(page);
    3435     1308901 :         return ret;
    3436             : 
    3437             : done_unlocked:
    3438             :         return 0;
    3439             : }
    3440             : 
    3441           0 : void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
    3442             : {
    3443          45 :         wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
    3444             :                        TASK_UNINTERRUPTIBLE);
    3445           0 : }
    3446             : 
    3447             : static noinline_for_stack int
    3448       57106 : lock_extent_buffer_for_io(struct extent_buffer *eb,
    3449             :                           struct btrfs_fs_info *fs_info,
    3450             :                           struct extent_page_data *epd)
    3451             : {
    3452             :         unsigned long i, num_pages;
    3453             :         int flush = 0;
    3454             :         int ret = 0;
    3455             : 
    3456       57106 :         if (!btrfs_try_tree_write_lock(eb)) {
    3457             :                 flush = 1;
    3458          50 :                 flush_write_bio(epd);
    3459          50 :                 btrfs_tree_lock(eb);
    3460             :         }
    3461             : 
    3462       57107 :         if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
    3463          45 :                 btrfs_tree_unlock(eb);
    3464          45 :                 if (!epd->sync_io)
    3465             :                         return 0;
    3466          45 :                 if (!flush) {
    3467          45 :                         flush_write_bio(epd);
    3468             :                         flush = 1;
    3469             :                 }
    3470             :                 while (1) {
    3471             :                         wait_on_extent_buffer_writeback(eb);
    3472          45 :                         btrfs_tree_lock(eb);
    3473          45 :                         if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
    3474             :                                 break;
    3475           0 :                         btrfs_tree_unlock(eb);
    3476           0 :                 }
    3477             :         }
    3478             : 
    3479             :         /*
    3480             :          * We need to do this to prevent races in people who check if the eb is
    3481             :          * under IO since we can end up having no IO bits set for a short period
    3482             :          * of time.
    3483             :          */
    3484             :         spin_lock(&eb->refs_lock);
    3485      114213 :         if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
    3486             :                 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
    3487             :                 spin_unlock(&eb->refs_lock);
    3488             :                 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
    3489      114052 :                 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
    3490       57026 :                                      -eb->len,
    3491             :                                      fs_info->dirty_metadata_batch);
    3492             :                 ret = 1;
    3493             :         } else {
    3494             :                 spin_unlock(&eb->refs_lock);
    3495             :         }
    3496             : 
    3497       57107 :         btrfs_tree_unlock(eb);
    3498             : 
    3499       57107 :         if (!ret)
    3500             :                 return ret;
    3501             : 
    3502       57026 :         num_pages = num_extent_pages(eb->start, eb->len);
    3503      234073 :         for (i = 0; i < num_pages; i++) {
    3504             :                 struct page *p = extent_buffer_page(eb, i);
    3505             : 
    3506      177047 :                 if (!trylock_page(p)) {
    3507           0 :                         if (!flush) {
    3508           0 :                                 flush_write_bio(epd);
    3509             :                                 flush = 1;
    3510             :                         }
    3511           0 :                         lock_page(p);
    3512             :                 }
    3513             :         }
    3514             : 
    3515             :         return ret;
    3516             : }
    3517             : 
    3518       57026 : static void end_extent_buffer_writeback(struct extent_buffer *eb)
    3519             : {
    3520             :         clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
    3521       57026 :         smp_mb__after_atomic();
    3522       57026 :         wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
    3523       57026 : }
    3524             : 
    3525       27921 : static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
    3526             : {
    3527             :         struct bio_vec *bvec;
    3528             :         struct extent_buffer *eb;
    3529             :         int i, done;
    3530             : 
    3531      204968 :         bio_for_each_segment_all(bvec, bio, i) {
    3532      177047 :                 struct page *page = bvec->bv_page;
    3533             : 
    3534      177047 :                 eb = (struct extent_buffer *)page->private;
    3535      177047 :                 BUG_ON(!eb);
    3536      177047 :                 done = atomic_dec_and_test(&eb->io_pages);
    3537             : 
    3538      354094 :                 if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
    3539             :                         set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
    3540             :                         ClearPageUptodate(page);
    3541             :                         SetPageError(page);
    3542             :                 }
    3543             : 
    3544      177047 :                 end_page_writeback(page);
    3545             : 
    3546      177047 :                 if (!done)
    3547      120021 :                         continue;
    3548             : 
    3549       57026 :                 end_extent_buffer_writeback(eb);
    3550             :         }
    3551             : 
    3552       27921 :         bio_put(bio);
    3553       27921 : }
    3554             : 
    3555      114052 : static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
    3556             :                         struct btrfs_fs_info *fs_info,
    3557             :                         struct writeback_control *wbc,
    3558             :                         struct extent_page_data *epd)
    3559             : {
    3560       57026 :         struct block_device *bdev = fs_info->fs_devices->latest_bdev;
    3561       57026 :         struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
    3562       57026 :         u64 offset = eb->start;
    3563             :         unsigned long i, num_pages;
    3564             :         unsigned long bio_flags = 0;
    3565       57026 :         int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
    3566             :         int ret = 0;
    3567             : 
    3568             :         clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
    3569       57026 :         num_pages = num_extent_pages(eb->start, eb->len);
    3570       57026 :         atomic_set(&eb->io_pages, num_pages);
    3571       57026 :         if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
    3572             :                 bio_flags = EXTENT_BIO_TREE_LOG;
    3573             : 
    3574      177047 :         for (i = 0; i < num_pages; i++) {
    3575             :                 struct page *p = extent_buffer_page(eb, i);
    3576             : 
    3577      177047 :                 clear_page_dirty_for_io(p);
    3578             :                 set_page_writeback(p);
    3579      177047 :                 ret = submit_extent_page(rw, tree, p, offset >> 9,
    3580             :                                          PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
    3581             :                                          -1, end_bio_extent_buffer_writepage,
    3582             :                                          0, epd->bio_flags, bio_flags);
    3583      177047 :                 epd->bio_flags = bio_flags;
    3584      177047 :                 if (ret) {
    3585             :                         set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
    3586             :                         SetPageError(p);
    3587           0 :                         if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
    3588           0 :                                 end_extent_buffer_writeback(eb);
    3589             :                         ret = -EIO;
    3590             :                         break;
    3591             :                 }
    3592      177047 :                 offset += PAGE_CACHE_SIZE;
    3593      177047 :                 update_nr_written(p, wbc, 1);
    3594      177047 :                 unlock_page(p);
    3595             :         }
    3596             : 
    3597       57026 :         if (unlikely(ret)) {
    3598           0 :                 for (; i < num_pages; i++) {
    3599             :                         struct page *p = extent_buffer_page(eb, i);
    3600           0 :                         unlock_page(p);
    3601             :                 }
    3602             :         }
    3603             : 
    3604       57026 :         return ret;
    3605             : }
    3606             : 
    3607       21234 : int btree_write_cache_pages(struct address_space *mapping,
    3608             :                                    struct writeback_control *wbc)
    3609             : {
    3610       21234 :         struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
    3611       78260 :         struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
    3612             :         struct extent_buffer *eb, *prev_eb = NULL;
    3613       42468 :         struct extent_page_data epd = {
    3614             :                 .bio = NULL,
    3615             :                 .tree = tree,
    3616             :                 .extent_locked = 0,
    3617       21234 :                 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
    3618             :                 .bio_flags = 0,
    3619             :         };
    3620             :         int ret = 0;
    3621             :         int done = 0;
    3622             :         int nr_to_write_done = 0;
    3623             :         struct pagevec pvec;
    3624             :         int nr_pages;
    3625             :         pgoff_t index;
    3626             :         pgoff_t end;            /* Inclusive */
    3627             :         int scanned = 0;
    3628             :         int tag;
    3629             : 
    3630             :         pagevec_init(&pvec, 0);
    3631       21234 :         if (wbc->range_cyclic) {
    3632           0 :                 index = mapping->writeback_index; /* Start from prev offset */
    3633             :                 end = -1;
    3634             :         } else {
    3635       21234 :                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
    3636       21234 :                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
    3637             :                 scanned = 1;
    3638             :         }
    3639       21234 :         if (wbc->sync_mode == WB_SYNC_ALL)
    3640             :                 tag = PAGECACHE_TAG_TOWRITE;
    3641             :         else
    3642             :                 tag = PAGECACHE_TAG_DIRTY;
    3643             : retry:
    3644       21234 :         if (wbc->sync_mode == WB_SYNC_ALL)
    3645       21234 :                 tag_pages_for_writeback(mapping, index, end);
    3646       71732 :         while (!done && !nr_to_write_done && (index <= end) &&
    3647       29025 :                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
    3648       29025 :                         min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
    3649             :                 unsigned i;
    3650             : 
    3651             :                 scanned = 1;
    3652      163899 :                 for (i = 0; i < nr_pages; i++) {
    3653      163969 :                         struct page *page = pvec.pages[i];
    3654             : 
    3655      163969 :                         if (!PagePrivate(page))
    3656           0 :                                 continue;
    3657             : 
    3658      163969 :                         if (!wbc->range_cyclic && page->index > end) {
    3659             :                                 done = 1;
    3660             :                                 break;
    3661             :                         }
    3662             : 
    3663             :                         spin_lock(&mapping->private_lock);
    3664      163899 :                         if (!PagePrivate(page)) {
    3665             :                                 spin_unlock(&mapping->private_lock);
    3666           0 :                                 continue;
    3667             :                         }
    3668             : 
    3669      163899 :                         eb = (struct extent_buffer *)page->private;
    3670             : 
    3671             :                         /*
    3672             :                          * Shouldn't happen and normally this would be a BUG_ON
    3673             :                          * but no sense in crashing the users box for something
    3674             :                          * we can survive anyway.
    3675             :                          */
    3676      163899 :                         if (WARN_ON(!eb)) {
    3677             :                                 spin_unlock(&mapping->private_lock);
    3678           0 :                                 continue;
    3679             :                         }
    3680             : 
    3681      163899 :                         if (eb == prev_eb) {
    3682             :                                 spin_unlock(&mapping->private_lock);
    3683      106792 :                                 continue;
    3684             :                         }
    3685             : 
    3686             :                         ret = atomic_inc_not_zero(&eb->refs);
    3687             :                         spin_unlock(&mapping->private_lock);
    3688       57106 :                         if (!ret)
    3689           0 :                                 continue;
    3690             : 
    3691             :                         prev_eb = eb;
    3692       57106 :                         ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
    3693       57107 :                         if (!ret) {
    3694          81 :                                 free_extent_buffer(eb);
    3695          81 :                                 continue;
    3696             :                         }
    3697             : 
    3698       57026 :                         ret = write_one_eb(eb, fs_info, wbc, &epd);
    3699       57026 :                         if (ret) {
    3700             :                                 done = 1;
    3701           0 :                                 free_extent_buffer(eb);
    3702           0 :                                 break;
    3703             :                         }
    3704       57026 :                         free_extent_buffer(eb);
    3705             : 
    3706             :                         /*
    3707             :                          * the filesystem may choose to bump up nr_to_write.
    3708             :                          * We have to make sure to honor the new nr_to_write
    3709             :                          * at any time
    3710             :                          */
    3711       57026 :                         nr_to_write_done = wbc->nr_to_write <= 0;
    3712             :                 }
    3713             :                 pagevec_release(&pvec);
    3714       21473 :                 cond_resched();
    3715             :         }
    3716       21234 :         if (!scanned && !done) {
    3717             :                 /*
    3718             :                  * We hit the last page and there is more work to be done: wrap
    3719             :                  * back to the start of the file
    3720             :                  */
    3721             :                 scanned = 1;
    3722           0 :                 index = 0;
    3723           0 :                 goto retry;
    3724             :         }
    3725       21234 :         flush_write_bio(&epd);
    3726       21234 :         return ret;
    3727             : }
    3728             : 
    3729             : /**
    3730             :  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
    3731             :  * @mapping: address space structure to write
    3732             :  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
    3733             :  * @writepage: function called for each page
    3734             :  * @data: data passed to writepage function
    3735             :  *
    3736             :  * If a page is already under I/O, write_cache_pages() skips it, even
    3737             :  * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
    3738             :  * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
    3739             :  * and msync() need to guarantee that all the data which was dirty at the time
    3740             :  * the call was made get new I/O started against them.  If wbc->sync_mode is
    3741             :  * WB_SYNC_ALL then we were called for data integrity and we must wait for
    3742             :  * existing IO to complete.
    3743             :  */
    3744       38221 : static int extent_write_cache_pages(struct extent_io_tree *tree,
    3745             :                              struct address_space *mapping,
    3746             :                              struct writeback_control *wbc,
    3747             :                              writepage_t writepage, void *data,
    3748             :                              void (*flush_fn)(void *))
    3749             : {
    3750       38221 :         struct inode *inode = mapping->host;
    3751             :         int ret = 0;
    3752             :         int done = 0;
    3753             :         int err = 0;
    3754             :         int nr_to_write_done = 0;
    3755             :         struct pagevec pvec;
    3756             :         int nr_pages;
    3757             :         pgoff_t index;
    3758             :         pgoff_t end;            /* Inclusive */
    3759             :         int scanned = 0;
    3760             :         int tag;
    3761             : 
    3762             :         /*
    3763             :          * We have to hold onto the inode so that ordered extents can do their
    3764             :          * work when the IO finishes.  The alternative to this is failing to add
    3765             :          * an ordered extent if the igrab() fails there and that is a huge pain
    3766             :          * to deal with, so instead just hold onto the inode throughout the
    3767             :          * writepages operation.  If it fails here we are freeing up the inode
    3768             :          * anyway and we'd rather not waste our time writing out stuff that is
    3769             :          * going to be truncated anyway.
    3770             :          */
    3771       38221 :         if (!igrab(inode))
    3772             :                 return 0;
    3773             : 
    3774             :         pagevec_init(&pvec, 0);
    3775       34946 :         if (wbc->range_cyclic) {
    3776        6687 :                 index = mapping->writeback_index; /* Start from prev offset */
    3777             :                 end = -1;
    3778             :         } else {
    3779       28259 :                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
    3780       28259 :                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
    3781             :                 scanned = 1;
    3782             :         }
    3783       34946 :         if (wbc->sync_mode == WB_SYNC_ALL)
    3784             :                 tag = PAGECACHE_TAG_TOWRITE;
    3785             :         else
    3786             :                 tag = PAGECACHE_TAG_DIRTY;
    3787             : retry:
    3788       35056 :         if (wbc->sync_mode == WB_SYNC_ALL)
    3789       17387 :                 tag_pages_for_writeback(mapping, index, end);
    3790      286121 :         while (!done && !nr_to_write_done && (index <= end) &&
    3791      142502 :                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
    3792      142502 :                         min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
    3793             :                 unsigned i;
    3794             : 
    3795             :                 scanned = 1;
    3796     1314224 :                 for (i = 0; i < nr_pages; i++) {
    3797     1314224 :                         struct page *page = pvec.pages[i];
    3798             : 
    3799             :                         /*
    3800             :                          * At this point we hold neither mapping->tree_lock nor
    3801             :                          * lock on the page itself: the page may be truncated or
    3802             :                          * invalidated (changing page->mapping to NULL), or even
    3803             :                          * swizzled back from swapper_space to tmpfs file
    3804             :                          * mapping
    3805             :                          */
    3806     1314237 :                         if (!trylock_page(page)) {
    3807         154 :                                 flush_fn(data);
    3808         154 :                                 lock_page(page);
    3809             :                         }
    3810             : 
    3811     1314237 :                         if (unlikely(page->mapping != mapping)) {
    3812           0 :                                 unlock_page(page);
    3813           0 :                                 continue;
    3814             :                         }
    3815             : 
    3816     1314237 :                         if (!wbc->range_cyclic && page->index > end) {
    3817             :                                 done = 1;
    3818           0 :                                 unlock_page(page);
    3819           0 :                                 continue;
    3820             :                         }
    3821             : 
    3822     1314237 :                         if (wbc->sync_mode != WB_SYNC_NONE) {
    3823      199209 :                                 if (PageWriteback(page))
    3824          29 :                                         flush_fn(data);
    3825      199209 :                                 wait_on_page_writeback(page);
    3826             :                         }
    3827             : 
    3828     2627377 :                         if (PageWriteback(page) ||
    3829     1313154 :                             !clear_page_dirty_for_io(page)) {
    3830        1365 :                                 unlock_page(page);
    3831        1367 :                                 continue;
    3832             :                         }
    3833             : 
    3834     1312858 :                         ret = (*writepage)(page, wbc, data);
    3835             : 
    3836     1312858 :                         if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
    3837           0 :                                 unlock_page(page);
    3838             :                                 ret = 0;
    3839             :                         }
    3840     1312857 :                         if (!err && ret < 0)
    3841             :                                 err = ret;
    3842             : 
    3843             :                         /*
    3844             :                          * the filesystem may choose to bump up nr_to_write.
    3845             :                          * We have to make sure to honor the new nr_to_write
    3846             :                          * at any time
    3847             :                          */
    3848     1312857 :                         nr_to_write_done = wbc->nr_to_write <= 0;
    3849             :                 }
    3850             :                 pagevec_release(&pvec);
    3851      108565 :                 cond_resched();
    3852             :         }
    3853       35054 :         if (!scanned && !done && !err) {
    3854             :                 /*
    3855             :                  * We hit the last page and there is more work to be done: wrap
    3856             :                  * back to the start of the file
    3857             :                  */
    3858             :                 scanned = 1;
    3859         110 :                 index = 0;
    3860             :                 goto retry;
    3861             :         }
    3862       34944 :         btrfs_add_delayed_iput(inode);
    3863             :         return err;
    3864             : }
    3865             : 
    3866       59814 : static void flush_epd_write_bio(struct extent_page_data *epd)
    3867             : {
    3868       59814 :         if (epd->bio) {
    3869             :                 int rw = WRITE;
    3870             :                 int ret;
    3871             : 
    3872       31855 :                 if (epd->sync_io)
    3873             :                         rw = WRITE_SYNC;
    3874             : 
    3875       31855 :                 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
    3876       31855 :                 BUG_ON(ret < 0); /* -ENOMEM */
    3877       31855 :                 epd->bio = NULL;
    3878             :         }
    3879       59814 : }
    3880             : 
    3881       21511 : static noinline void flush_write_bio(void *data)
    3882             : {
    3883             :         struct extent_page_data *epd = data;
    3884       21511 :         flush_epd_write_bio(epd);
    3885       21512 : }
    3886             : 
    3887           0 : int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
    3888             :                           get_extent_t *get_extent,
    3889             :                           struct writeback_control *wbc)
    3890             : {
    3891             :         int ret;
    3892           0 :         struct extent_page_data epd = {
    3893             :                 .bio = NULL,
    3894             :                 .tree = tree,
    3895             :                 .get_extent = get_extent,
    3896             :                 .extent_locked = 0,
    3897           0 :                 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
    3898             :                 .bio_flags = 0,
    3899             :         };
    3900             : 
    3901           0 :         ret = __extent_writepage(page, wbc, &epd);
    3902             : 
    3903           0 :         flush_epd_write_bio(&epd);
    3904           0 :         return ret;
    3905             : }
    3906             : 
    3907          81 : int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
    3908             :                               u64 start, u64 end, get_extent_t *get_extent,
    3909             :                               int mode)
    3910             : {
    3911             :         int ret = 0;
    3912          81 :         struct address_space *mapping = inode->i_mapping;
    3913             :         struct page *page;
    3914          81 :         unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
    3915             :                 PAGE_CACHE_SHIFT;
    3916             : 
    3917         162 :         struct extent_page_data epd = {
    3918             :                 .bio = NULL,
    3919             :                 .tree = tree,
    3920             :                 .get_extent = get_extent,
    3921             :                 .extent_locked = 1,
    3922          81 :                 .sync_io = mode == WB_SYNC_ALL,
    3923             :                 .bio_flags = 0,
    3924             :         };
    3925         243 :         struct writeback_control wbc_writepages = {
    3926             :                 .sync_mode      = mode,
    3927          81 :                 .nr_to_write    = nr_pages * 2,
    3928             :                 .range_start    = start,
    3929          81 :                 .range_end      = end + 1,
    3930             :         };
    3931             : 
    3932         270 :         while (start <= end) {
    3933         108 :                 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
    3934         108 :                 if (clear_page_dirty_for_io(page))
    3935         108 :                         ret = __extent_writepage(page, &wbc_writepages, &epd);
    3936             :                 else {
    3937           0 :                         if (tree->ops && tree->ops->writepage_end_io_hook)
    3938           0 :                                 tree->ops->writepage_end_io_hook(page, start,
    3939             :                                                  start + PAGE_CACHE_SIZE - 1,
    3940             :                                                  NULL, 1);
    3941           0 :                         unlock_page(page);
    3942             :                 }
    3943         108 :                 page_cache_release(page);
    3944         108 :                 start += PAGE_CACHE_SIZE;
    3945             :         }
    3946             : 
    3947          81 :         flush_epd_write_bio(&epd);
    3948          81 :         return ret;
    3949             : }
    3950             : 
    3951       38223 : int extent_writepages(struct extent_io_tree *tree,
    3952             :                       struct address_space *mapping,
    3953             :                       get_extent_t *get_extent,
    3954             :                       struct writeback_control *wbc)
    3955             : {
    3956             :         int ret = 0;
    3957       76446 :         struct extent_page_data epd = {
    3958             :                 .bio = NULL,
    3959             :                 .tree = tree,
    3960             :                 .get_extent = get_extent,
    3961             :                 .extent_locked = 0,
    3962       38223 :                 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
    3963             :                 .bio_flags = 0,
    3964             :         };
    3965             : 
    3966       38223 :         ret = extent_write_cache_pages(tree, mapping, wbc,
    3967             :                                        __extent_writepage, &epd,
    3968             :                                        flush_write_bio);
    3969       38223 :         flush_epd_write_bio(&epd);
    3970       38224 :         return ret;
    3971             : }
    3972             : 
    3973       24097 : int extent_readpages(struct extent_io_tree *tree,
    3974             :                      struct address_space *mapping,
    3975             :                      struct list_head *pages, unsigned nr_pages,
    3976             :                      get_extent_t get_extent)
    3977             : {
    3978       24097 :         struct bio *bio = NULL;
    3979             :         unsigned page_idx;
    3980       24097 :         unsigned long bio_flags = 0;
    3981             :         struct page *pagepool[16];
    3982             :         struct page *page;
    3983       24097 :         struct extent_map *em_cached = NULL;
    3984             :         int nr = 0;
    3985             : 
    3986      474441 :         for (page_idx = 0; page_idx < nr_pages; page_idx++) {
    3987      450344 :                 page = list_entry(pages->prev, struct page, lru);
    3988             : 
    3989      450344 :                 prefetchw(&page->flags);
    3990      450344 :                 list_del(&page->lru);
    3991      450344 :                 if (add_to_page_cache_lru(page, mapping,
    3992             :                                         page->index, GFP_NOFS)) {
    3993           0 :                         page_cache_release(page);
    3994           0 :                         continue;
    3995             :                 }
    3996             : 
    3997      450344 :                 pagepool[nr++] = page;
    3998      450344 :                 if (nr < ARRAY_SIZE(pagepool))
    3999      424185 :                         continue;
    4000       26159 :                 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
    4001             :                                    &bio, 0, &bio_flags, READ);
    4002             :                 nr = 0;
    4003             :         }
    4004       24097 :         if (nr)
    4005       23064 :                 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
    4006             :                                    &bio, 0, &bio_flags, READ);
    4007             : 
    4008       24097 :         if (em_cached)
    4009       24097 :                 free_extent_map(em_cached);
    4010             : 
    4011       24097 :         BUG_ON(!list_empty(pages));
    4012       24097 :         if (bio)
    4013       23283 :                 return submit_one_bio(READ, bio, 0, bio_flags);
    4014             :         return 0;
    4015             : }
    4016             : 
    4017             : /*
    4018             :  * basic invalidatepage code, this waits on any locked or writeback
    4019             :  * ranges corresponding to the page, and then deletes any extent state
    4020             :  * records from the tree
    4021             :  */
    4022        1335 : int extent_invalidatepage(struct extent_io_tree *tree,
    4023        1335 :                           struct page *page, unsigned long offset)
    4024             : {
    4025        1335 :         struct extent_state *cached_state = NULL;
    4026        1335 :         u64 start = page_offset(page);
    4027        1335 :         u64 end = start + PAGE_CACHE_SIZE - 1;
    4028        1335 :         size_t blocksize = page->mapping->host->i_sb->s_blocksize;
    4029             : 
    4030        1335 :         start += ALIGN(offset, blocksize);
    4031        1335 :         if (start > end)
    4032             :                 return 0;
    4033             : 
    4034        1335 :         lock_extent_bits(tree, start, end, 0, &cached_state);
    4035        1335 :         wait_on_page_writeback(page);
    4036        1335 :         clear_extent_bit(tree, start, end,
    4037             :                          EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
    4038             :                          EXTENT_DO_ACCOUNTING,
    4039             :                          1, 1, &cached_state, GFP_NOFS);
    4040        1335 :         return 0;
    4041             : }
    4042             : 
    4043             : /*
    4044             :  * a helper for releasepage, this tests for areas of the page that
    4045             :  * are locked or under IO and drops the related state bits if it is safe
    4046             :  * to drop the page.
    4047             :  */
    4048      188224 : static int try_release_extent_state(struct extent_map_tree *map,
    4049             :                                     struct extent_io_tree *tree,
    4050      188224 :                                     struct page *page, gfp_t mask)
    4051             : {
    4052      188224 :         u64 start = page_offset(page);
    4053      188224 :         u64 end = start + PAGE_CACHE_SIZE - 1;
    4054             :         int ret = 1;
    4055             : 
    4056      188224 :         if (test_range_bit(tree, start, end,
    4057             :                            EXTENT_IOBITS, 0, NULL))
    4058             :                 ret = 0;
    4059             :         else {
    4060      188224 :                 if ((mask & GFP_NOFS) == GFP_NOFS)
    4061             :                         mask = GFP_NOFS;
    4062             :                 /*
    4063             :                  * at this point we can safely clear everything except the
    4064             :                  * locked bit and the nodatasum bit
    4065             :                  */
    4066      188224 :                 ret = clear_extent_bit(tree, start, end,
    4067             :                                  ~(EXTENT_LOCKED | EXTENT_NODATASUM),
    4068             :                                  0, 0, NULL, mask);
    4069             : 
    4070             :                 /* if clear_extent_bit failed for enomem reasons,
    4071             :                  * we can't allow the release to continue.
    4072             :                  */
    4073      188224 :                 if (ret < 0)
    4074             :                         ret = 0;
    4075             :                 else
    4076             :                         ret = 1;
    4077             :         }
    4078      188224 :         return ret;
    4079             : }
    4080             : 
    4081             : /*
    4082             :  * a helper for releasepage.  As long as there are no locked extents
    4083             :  * in the range corresponding to the page, both state records and extent
    4084             :  * map records are removed
    4085             :  */
    4086      188224 : int try_release_extent_mapping(struct extent_map_tree *map,
    4087      188224 :                                struct extent_io_tree *tree, struct page *page,
    4088             :                                gfp_t mask)
    4089             : {
    4090           0 :         struct extent_map *em;
    4091      188224 :         u64 start = page_offset(page);
    4092      188224 :         u64 end = start + PAGE_CACHE_SIZE - 1;
    4093             : 
    4094      375172 :         if ((mask & __GFP_WAIT) &&
    4095      186948 :             page->mapping->host->i_size > 16 * 1024 * 1024) {
    4096             :                 u64 len;
    4097           0 :                 while (start <= end) {
    4098           0 :                         len = end - start + 1;
    4099           0 :                         write_lock(&map->lock);
    4100           0 :                         em = lookup_extent_mapping(map, start, len);
    4101           0 :                         if (!em) {
    4102             :                                 write_unlock(&map->lock);
    4103             :                                 break;
    4104             :                         }
    4105           0 :                         if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
    4106           0 :                             em->start != start) {
    4107             :                                 write_unlock(&map->lock);
    4108           0 :                                 free_extent_map(em);
    4109           0 :                                 break;
    4110             :                         }
    4111           0 :                         if (!test_range_bit(tree, em->start,
    4112             :                                             extent_map_end(em) - 1,
    4113             :                                             EXTENT_LOCKED | EXTENT_WRITEBACK,
    4114             :                                             0, NULL)) {
    4115           0 :                                 remove_extent_mapping(map, em);
    4116             :                                 /* once for the rb tree */
    4117           0 :                                 free_extent_map(em);
    4118             :                         }
    4119             :                         start = extent_map_end(em);
    4120             :                         write_unlock(&map->lock);
    4121             : 
    4122             :                         /* once for us */
    4123           0 :                         free_extent_map(em);
    4124             :                 }
    4125             :         }
    4126      188224 :         return try_release_extent_state(map, tree, page, mask);
    4127             : }
    4128             : 
    4129             : /*
    4130             :  * helper function for fiemap, which doesn't want to see any holes.
    4131             :  * This maps until we find something past 'last'
    4132             :  */
    4133        1030 : static struct extent_map *get_extent_skip_holes(struct inode *inode,
    4134             :                                                 u64 offset,
    4135             :                                                 u64 last,
    4136             :                                                 get_extent_t *get_extent)
    4137             : {
    4138        1030 :         u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
    4139         409 :         struct extent_map *em;
    4140             :         u64 len;
    4141             : 
    4142        1030 :         if (offset >= last)
    4143             :                 return NULL;
    4144             : 
    4145             :         while (1) {
    4146        1096 :                 len = last - offset;
    4147        1096 :                 if (len == 0)
    4148             :                         break;
    4149        1096 :                 len = ALIGN(len, sectorsize);
    4150        1096 :                 em = get_extent(inode, NULL, 0, offset, len, 0);
    4151        1095 :                 if (IS_ERR_OR_NULL(em))
    4152             :                         return em;
    4153             : 
    4154             :                 /* if this isn't a hole return it */
    4155        2190 :                 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
    4156        1094 :                     em->block_start != EXTENT_MAP_HOLE) {
    4157             :                         return em;
    4158             :                 }
    4159             : 
    4160             :                 /* this is a hole, advance to the next extent */
    4161             :                 offset = extent_map_end(em);
    4162         409 :                 free_extent_map(em);
    4163         409 :                 if (offset >= last)
    4164             :                         break;
    4165             :         }
    4166             :         return NULL;
    4167             : }
    4168             : 
    4169        1120 : static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
    4170             : {
    4171        1120 :         unsigned long cnt = *((unsigned long *)ctx);
    4172             : 
    4173        1120 :         cnt++;
    4174        1120 :         *((unsigned long *)ctx) = cnt;
    4175             : 
    4176             :         /* Now we're sure that the extent is shared. */
    4177        1120 :         if (cnt > 1)
    4178             :                 return 1;
    4179         629 :         return 0;
    4180             : }
    4181             : 
    4182         688 : int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
    4183             :                 __u64 start, __u64 len, get_extent_t *get_extent)
    4184             : {
    4185             :         int ret = 0;
    4186             :         u64 off = start;
    4187         344 :         u64 max = start + len;
    4188             :         u32 flags = 0;
    4189             :         u32 found_type;
    4190             :         u64 last;
    4191             :         u64 last_for_get_extent = 0;
    4192             :         u64 disko = 0;
    4193         344 :         u64 isize = i_size_read(inode);
    4194             :         struct btrfs_key found_key;
    4195         686 :         struct extent_map *em = NULL;
    4196         344 :         struct extent_state *cached_state = NULL;
    4197             :         struct btrfs_path *path;
    4198             :         int end = 0;
    4199             :         u64 em_start = 0;
    4200             :         u64 em_len = 0;
    4201             :         u64 em_end = 0;
    4202             : 
    4203         344 :         if (len == 0)
    4204             :                 return -EINVAL;
    4205             : 
    4206         344 :         path = btrfs_alloc_path();
    4207         344 :         if (!path)
    4208             :                 return -ENOMEM;
    4209         344 :         path->leave_spinning = 1;
    4210             : 
    4211         344 :         start = round_down(start, BTRFS_I(inode)->root->sectorsize);
    4212         344 :         len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
    4213             : 
    4214             :         /*
    4215             :          * lookup the last file extent.  We're not using i_size here
    4216             :          * because there might be preallocation past i_size
    4217             :          */
    4218         344 :         ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
    4219             :                                        path, btrfs_ino(inode), -1, 0);
    4220         344 :         if (ret < 0) {
    4221           0 :                 btrfs_free_path(path);
    4222           0 :                 return ret;
    4223             :         }
    4224         344 :         WARN_ON(!ret);
    4225         344 :         path->slots[0]--;
    4226         344 :         btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
    4227         344 :         found_type = btrfs_key_type(&found_key);
    4228             : 
    4229             :         /* No extents, but there might be delalloc bits */
    4230         688 :         if (found_key.objectid != btrfs_ino(inode) ||
    4231             :             found_type != BTRFS_EXTENT_DATA_KEY) {
    4232             :                 /* have to trust i_size as the end */
    4233             :                 last = (u64)-1;
    4234             :                 last_for_get_extent = isize;
    4235             :         } else {
    4236             :                 /*
    4237             :                  * remember the start of the last extent.  There are a
    4238             :                  * bunch of different factors that go into the length of the
    4239             :                  * extent, so its much less complex to remember where it started
    4240             :                  */
    4241         298 :                 last = found_key.offset;
    4242         298 :                 last_for_get_extent = last + 1;
    4243             :         }
    4244         344 :         btrfs_release_path(path);
    4245             : 
    4246             :         /*
    4247             :          * we might have some extents allocated but more delalloc past those
    4248             :          * extents.  so, we trust isize unless the start of the last extent is
    4249             :          * beyond isize
    4250             :          */
    4251         344 :         if (last < isize) {
    4252             :                 last = (u64)-1;
    4253             :                 last_for_get_extent = isize;
    4254             :         }
    4255             : 
    4256         344 :         lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
    4257             :                          &cached_state);
    4258             : 
    4259         344 :         em = get_extent_skip_holes(inode, start, last_for_get_extent,
    4260             :                                    get_extent);
    4261         343 :         if (!em)
    4262             :                 goto out;
    4263         217 :         if (IS_ERR(em)) {
    4264           0 :                 ret = PTR_ERR(em);
    4265           0 :                 goto out;
    4266             :         }
    4267             : 
    4268         686 :         while (!end) {
    4269             :                 u64 offset_in_extent = 0;
    4270             : 
    4271             :                 /* break if the extent we found is outside the range */
    4272        1372 :                 if (em->start >= max || extent_map_end(em) < off)
    4273             :                         break;
    4274             : 
    4275             :                 /*
    4276             :                  * get_extent may return an extent that starts before our
    4277             :                  * requested range.  We have to make sure the ranges
    4278             :                  * we return to fiemap always move forward and don't
    4279             :                  * overlap, so adjust the offsets here
    4280             :                  */
    4281         687 :                 em_start = max(em->start, off);
    4282             : 
    4283             :                 /*
    4284             :                  * record the offset from the start of the extent
    4285             :                  * for adjusting the disk offset below.  Only do this if the
    4286             :                  * extent isn't compressed since our in ram offset may be past
    4287             :                  * what we have actually allocated on disk.
    4288             :                  */
    4289         687 :                 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
    4290         682 :                         offset_in_extent = em_start - em->start;
    4291             :                 em_end = extent_map_end(em);
    4292         687 :                 em_len = em_end - em_start;
    4293             :                 disko = 0;
    4294             :                 flags = 0;
    4295             : 
    4296             :                 /*
    4297             :                  * bump off for our next call to get_extent
    4298             :                  */
    4299             :                 off = extent_map_end(em);
    4300         687 :                 if (off >= max)
    4301             :                         end = 1;
    4302             : 
    4303         687 :                 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
    4304             :                         end = 1;
    4305             :                         flags |= FIEMAP_EXTENT_LAST;
    4306         686 :                 } else if (em->block_start == EXTENT_MAP_INLINE) {
    4307             :                         flags |= (FIEMAP_EXTENT_DATA_INLINE |
    4308             :                                   FIEMAP_EXTENT_NOT_ALIGNED);
    4309         686 :                 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
    4310             :                         flags |= (FIEMAP_EXTENT_DELALLOC |
    4311             :                                   FIEMAP_EXTENT_UNKNOWN);
    4312             :                 } else {
    4313         677 :                         unsigned long ref_cnt = 0;
    4314             : 
    4315         677 :                         disko = em->block_start + offset_in_extent;
    4316             : 
    4317             :                         /*
    4318             :                          * As btrfs supports shared space, this information
    4319             :                          * can be exported to userspace tools via
    4320             :                          * flag FIEMAP_EXTENT_SHARED.
    4321             :                          */
    4322         677 :                         ret = iterate_inodes_from_logical(
    4323             :                                         em->block_start,
    4324         677 :                                         BTRFS_I(inode)->root->fs_info,
    4325             :                                         path, count_ext_ref, &ref_cnt);
    4326         677 :                         if (ret < 0 && ret != -ENOENT)
    4327             :                                 goto out_free;
    4328             : 
    4329         677 :                         if (ref_cnt > 1)
    4330             :                                 flags |= FIEMAP_EXTENT_SHARED;
    4331             :                 }
    4332         687 :                 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
    4333           5 :                         flags |= FIEMAP_EXTENT_ENCODED;
    4334             : 
    4335         687 :                 free_extent_map(em);
    4336             :                 em = NULL;
    4337        1330 :                 if ((em_start >= last) || em_len == (u64)-1 ||
    4338         643 :                    (last == (u64)-1 && isize <= em_end)) {
    4339         191 :                         flags |= FIEMAP_EXTENT_LAST;
    4340             :                         end = 1;
    4341             :                 }
    4342             : 
    4343             :                 /* now scan forward to see if this is really the last extent. */
    4344         687 :                 em = get_extent_skip_holes(inode, off, last_for_get_extent,
    4345             :                                            get_extent);
    4346         687 :                 if (IS_ERR(em)) {
    4347           0 :                         ret = PTR_ERR(em);
    4348           0 :                         goto out;
    4349             :                 }
    4350         687 :                 if (!em) {
    4351         217 :                         flags |= FIEMAP_EXTENT_LAST;
    4352             :                         end = 1;
    4353             :                 }
    4354         687 :                 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
    4355             :                                               em_len, flags);
    4356         687 :                 if (ret)
    4357             :                         goto out_free;
    4358             :         }
    4359             : out_free:
    4360         217 :         free_extent_map(em);
    4361             : out:
    4362         343 :         btrfs_free_path(path);
    4363             :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
    4364             :                              &cached_state, GFP_NOFS);
    4365         344 :         return ret;
    4366             : }
    4367             : 
    4368             : static void __free_extent_buffer(struct extent_buffer *eb)
    4369             : {
    4370             :         btrfs_leak_debug_del(&eb->leak_list);
    4371       61440 :         kmem_cache_free(extent_buffer_cache, eb);
    4372             : }
    4373             : 
    4374           0 : int extent_buffer_under_io(struct extent_buffer *eb)
    4375             : {
    4376      122471 :         return (atomic_read(&eb->io_pages) ||
    4377      245879 :                 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
    4378             :                 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
    4379             : }
    4380             : 
    4381             : /*
    4382             :  * Helper for releasing extent buffer page.
    4383             :  */
    4384       61460 : static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
    4385             :                                                 unsigned long start_idx)
    4386             : {
    4387             :         unsigned long index;
    4388             :         unsigned long num_pages;
    4389             :         struct page *page;
    4390       61460 :         int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
    4391             : 
    4392       61460 :         BUG_ON(extent_buffer_under_io(eb));
    4393             : 
    4394       61460 :         num_pages = num_extent_pages(eb->start, eb->len);
    4395       61460 :         index = start_idx + num_pages;
    4396       61460 :         if (start_idx >= index)
    4397       61461 :                 return;
    4398             : 
    4399             :         do {
    4400      191186 :                 index--;
    4401             :                 page = extent_buffer_page(eb, index);
    4402      191186 :                 if (page && mapped) {
    4403      188257 :                         spin_lock(&page->mapping->private_lock);
    4404             :                         /*
    4405             :                          * We do this since we'll remove the pages after we've
    4406             :                          * removed the eb from the radix tree, so we could race
    4407             :                          * and have this page now attached to the new eb.  So
    4408             :                          * only clear page_private if it's still connected to
    4409             :                          * this eb.
    4410             :                          */
    4411      376518 :                         if (PagePrivate(page) &&
    4412      188259 :                             page->private == (unsigned long)eb) {
    4413      188259 :                                 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
    4414      188259 :                                 BUG_ON(PageDirty(page));
    4415      188259 :                                 BUG_ON(PageWriteback(page));
    4416             :                                 /*
    4417             :                                  * We need to make sure we haven't be attached
    4418             :                                  * to a new eb.
    4419             :                                  */
    4420             :                                 ClearPagePrivate(page);
    4421      188258 :                                 set_page_private(page, 0);
    4422             :                                 /* One for the page private */
    4423      188258 :                                 page_cache_release(page);
    4424             :                         }
    4425      188258 :                         spin_unlock(&page->mapping->private_lock);
    4426             : 
    4427             :                 }
    4428      191185 :                 if (page) {
    4429             :                         /* One for when we alloced the page */
    4430      191185 :                         page_cache_release(page);
    4431             :                 }
    4432      191187 :         } while (index != start_idx);
    4433             : }
    4434             : 
    4435             : /*
    4436             :  * Helper for releasing the extent buffer.
    4437             :  */
    4438           0 : static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
    4439             : {
    4440           0 :         btrfs_release_extent_buffer_page(eb, 0);
    4441             :         __free_extent_buffer(eb);
    4442           0 : }
    4443             : 
    4444             : static struct extent_buffer *
    4445       61762 : __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
    4446             :                       unsigned long len, gfp_t mask)
    4447             : {
    4448             :         struct extent_buffer *eb = NULL;
    4449             : 
    4450       61762 :         eb = kmem_cache_zalloc(extent_buffer_cache, mask);
    4451       61760 :         if (eb == NULL)
    4452             :                 return NULL;
    4453       61760 :         eb->start = start;
    4454       61760 :         eb->len = len;
    4455       61760 :         eb->fs_info = fs_info;
    4456       61760 :         eb->bflags = 0;
    4457       61760 :         rwlock_init(&eb->lock);
    4458             :         atomic_set(&eb->write_locks, 0);
    4459             :         atomic_set(&eb->read_locks, 0);
    4460             :         atomic_set(&eb->blocking_readers, 0);
    4461             :         atomic_set(&eb->blocking_writers, 0);
    4462             :         atomic_set(&eb->spinning_readers, 0);
    4463             :         atomic_set(&eb->spinning_writers, 0);
    4464       61760 :         eb->lock_nested = 0;
    4465       61760 :         init_waitqueue_head(&eb->write_lock_wq);
    4466       61760 :         init_waitqueue_head(&eb->read_lock_wq);
    4467             : 
    4468             :         btrfs_leak_debug_add(&eb->leak_list, &buffers);
    4469             : 
    4470       61760 :         spin_lock_init(&eb->refs_lock);
    4471             :         atomic_set(&eb->refs, 1);
    4472             :         atomic_set(&eb->io_pages, 0);
    4473             : 
    4474             :         /*
    4475             :          * Sanity checks, currently the maximum is 64k covered by 16x 4k pages
    4476             :          */
    4477             :         BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
    4478             :                 > MAX_INLINE_EXTENT_BUFFER_SIZE);
    4479       61760 :         BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
    4480             : 
    4481             :         return eb;
    4482             : }
    4483             : 
    4484         731 : struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
    4485             : {
    4486             :         unsigned long i;
    4487             :         struct page *p;
    4488             :         struct extent_buffer *new;
    4489         731 :         unsigned long num_pages = num_extent_pages(src->start, src->len);
    4490             : 
    4491         731 :         new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
    4492         731 :         if (new == NULL)
    4493             :                 return NULL;
    4494             : 
    4495        2924 :         for (i = 0; i < num_pages; i++) {
    4496             :                 p = alloc_page(GFP_NOFS);
    4497        2924 :                 if (!p) {
    4498           0 :                         btrfs_release_extent_buffer(new);
    4499           0 :                         return NULL;
    4500             :                 }
    4501        2924 :                 attach_extent_buffer_page(new, p);
    4502        2924 :                 WARN_ON(PageDirty(p));
    4503             :                 SetPageUptodate(p);
    4504        2924 :                 new->pages[i] = p;
    4505             :         }
    4506             : 
    4507         731 :         copy_extent_buffer(new, src, 0, 0, src->len);
    4508             :         set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
    4509             :         set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
    4510             : 
    4511         731 :         return new;
    4512             : }
    4513             : 
    4514           1 : struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
    4515             : {
    4516           1 :         struct extent_buffer *eb;
    4517             :         unsigned long num_pages = num_extent_pages(0, len);
    4518             :         unsigned long i;
    4519             : 
    4520           1 :         eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
    4521           1 :         if (!eb)
    4522             :                 return NULL;
    4523             : 
    4524           4 :         for (i = 0; i < num_pages; i++) {
    4525           4 :                 eb->pages[i] = alloc_page(GFP_NOFS);
    4526           4 :                 if (!eb->pages[i])
    4527             :                         goto err;
    4528             :         }
    4529           1 :         set_extent_buffer_uptodate(eb);
    4530             :         btrfs_set_header_nritems(eb, 0);
    4531             :         set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
    4532             : 
    4533           1 :         return eb;
    4534             : err:
    4535           0 :         for (; i > 0; i--)
    4536           0 :                 __free_page(eb->pages[i - 1]);
    4537             :         __free_extent_buffer(eb);
    4538           0 :         return NULL;
    4539             : }
    4540             : 
    4541     4657738 : static void check_buffer_tree_ref(struct extent_buffer *eb)
    4542             : {
    4543             :         int refs;
    4544             :         /* the ref bit is tricky.  We have to make sure it is set
    4545             :          * if we have the buffer dirty.   Otherwise the
    4546             :          * code to free a buffer can end up dropping a dirty
    4547             :          * page
    4548             :          *
    4549             :          * Once the ref bit is set, it won't go away while the
    4550             :          * buffer is dirty or in writeback, and it also won't
    4551             :          * go away while we have the reference count on the
    4552             :          * eb bumped.
    4553             :          *
    4554             :          * We can't just set the ref bit without bumping the
    4555             :          * ref on the eb because free_extent_buffer might
    4556             :          * see the ref bit and try to clear it.  If this happens
    4557             :          * free_extent_buffer might end up dropping our original
    4558             :          * ref by mistake and freeing the page before we are able
    4559             :          * to add one more ref.
    4560             :          *
    4561             :          * So bump the ref count first, then set the bit.  If someone
    4562             :          * beat us to it, drop the ref we added.
    4563             :          */
    4564             :         refs = atomic_read(&eb->refs);
    4565     9254688 :         if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
    4566     4657982 :                 return;
    4567             : 
    4568             :         spin_lock(&eb->refs_lock);
    4569      122066 :         if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
    4570       61033 :                 atomic_inc(&eb->refs);
    4571             :         spin_unlock(&eb->refs_lock);
    4572             : }
    4573             : 
    4574     3432902 : static void mark_extent_buffer_accessed(struct extent_buffer *eb,
    4575             :                 struct page *accessed)
    4576             : {
    4577             :         unsigned long num_pages, i;
    4578             : 
    4579     3432902 :         check_buffer_tree_ref(eb);
    4580             : 
    4581     3432447 :         num_pages = num_extent_pages(eb->start, eb->len);
    4582    16298546 :         for (i = 0; i < num_pages; i++) {
    4583             :                 struct page *p = extent_buffer_page(eb, i);
    4584    12865934 :                 if (p != accessed)
    4585    12865933 :                         mark_page_accessed(p);
    4586             :         }
    4587     3432612 : }
    4588             : 
    4589     3493030 : struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
    4590             :                                          u64 start)
    4591             : {
    4592             :         struct extent_buffer *eb;
    4593             : 
    4594             :         rcu_read_lock();
    4595     3493044 :         eb = radix_tree_lookup(&fs_info->buffer_radix,
    4596     3493044 :                                start >> PAGE_CACHE_SHIFT);
    4597     6926565 :         if (eb && atomic_inc_not_zero(&eb->refs)) {
    4598             :                 rcu_read_unlock();
    4599     3433100 :                 mark_extent_buffer_accessed(eb, NULL);
    4600     3432520 :                 return eb;
    4601             :         }
    4602             :         rcu_read_unlock();
    4603             : 
    4604       61556 :         return NULL;
    4605             : }
    4606             : 
    4607             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4608             : struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
    4609             :                                                u64 start, unsigned long len)
    4610             : {
    4611             :         struct extent_buffer *eb, *exists = NULL;
    4612             :         int ret;
    4613             : 
    4614             :         eb = find_extent_buffer(fs_info, start);
    4615             :         if (eb)
    4616             :                 return eb;
    4617             :         eb = alloc_dummy_extent_buffer(start, len);
    4618             :         if (!eb)
    4619             :                 return NULL;
    4620             :         eb->fs_info = fs_info;
    4621             : again:
    4622             :         ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
    4623             :         if (ret)
    4624             :                 goto free_eb;
    4625             :         spin_lock(&fs_info->buffer_lock);
    4626             :         ret = radix_tree_insert(&fs_info->buffer_radix,
    4627             :                                 start >> PAGE_CACHE_SHIFT, eb);
    4628             :         spin_unlock(&fs_info->buffer_lock);
    4629             :         radix_tree_preload_end();
    4630             :         if (ret == -EEXIST) {
    4631             :                 exists = find_extent_buffer(fs_info, start);
    4632             :                 if (exists)
    4633             :                         goto free_eb;
    4634             :                 else
    4635             :                         goto again;
    4636             :         }
    4637             :         check_buffer_tree_ref(eb);
    4638             :         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
    4639             : 
    4640             :         /*
    4641             :          * We will free dummy extent buffer's if they come into
    4642             :          * free_extent_buffer with a ref count of 2, but if we are using this we
    4643             :          * want the buffers to stay in memory until we're done with them, so
    4644             :          * bump the ref count again.
    4645             :          */
    4646             :         atomic_inc(&eb->refs);
    4647             :         return eb;
    4648             : free_eb:
    4649             :         btrfs_release_extent_buffer(eb);
    4650             :         return exists;
    4651             : }
    4652             : #endif
    4653             : 
    4654     1268775 : struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
    4655             :                                           u64 start, unsigned long len)
    4656             : {
    4657             :         unsigned long num_pages = num_extent_pages(start, len);
    4658             :         unsigned long i;
    4659             :         unsigned long index = start >> PAGE_CACHE_SHIFT;
    4660             :         struct extent_buffer *eb;
    4661             :         struct extent_buffer *exists = NULL;
    4662             :         struct page *p;
    4663     1268775 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    4664             :         int uptodate = 1;
    4665             :         int ret;
    4666             : 
    4667     1268775 :         eb = find_extent_buffer(fs_info, start);
    4668     1268782 :         if (eb)
    4669             :                 return eb;
    4670             : 
    4671       61031 :         eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
    4672       61028 :         if (!eb)
    4673             :                 return NULL;
    4674             : 
    4675      189469 :         for (i = 0; i < num_pages; i++, index++) {
    4676             :                 p = find_or_create_page(mapping, index, GFP_NOFS);
    4677      189462 :                 if (!p)
    4678             :                         goto free_eb;
    4679             : 
    4680             :                 spin_lock(&mapping->private_lock);
    4681      189470 :                 if (PagePrivate(p)) {
    4682             :                         /*
    4683             :                          * We could have already allocated an eb for this page
    4684             :                          * and attached one so lets see if we can get a ref on
    4685             :                          * the existing eb, and if we can we know it's good and
    4686             :                          * we can just return that one, else we know we can just
    4687             :                          * overwrite page->private.
    4688             :                          */
    4689           0 :                         exists = (struct extent_buffer *)p->private;
    4690           0 :                         if (atomic_inc_not_zero(&exists->refs)) {
    4691             :                                 spin_unlock(&mapping->private_lock);
    4692           0 :                                 unlock_page(p);
    4693           0 :                                 page_cache_release(p);
    4694           0 :                                 mark_extent_buffer_accessed(exists, p);
    4695           0 :                                 goto free_eb;
    4696             :                         }
    4697             : 
    4698             :                         /*
    4699             :                          * Do this so attach doesn't complain and we need to
    4700             :                          * drop the ref the old guy had.
    4701             :                          */
    4702             :                         ClearPagePrivate(p);
    4703           0 :                         WARN_ON(PageDirty(p));
    4704           0 :                         page_cache_release(p);
    4705             :                 }
    4706      189470 :                 attach_extent_buffer_page(eb, p);
    4707             :                 spin_unlock(&mapping->private_lock);
    4708      189470 :                 WARN_ON(PageDirty(p));
    4709      189469 :                 eb->pages[i] = p;
    4710      189469 :                 if (!PageUptodate(p))
    4711             :                         uptodate = 0;
    4712             : 
    4713             :                 /*
    4714             :                  * see below about how we avoid a nasty race with release page
    4715             :                  * and why we unlock later
    4716             :                  */
    4717             :         }
    4718       61032 :         if (uptodate)
    4719             :                 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
    4720             : again:
    4721       61032 :         ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
    4722       61031 :         if (ret)
    4723             :                 goto free_eb;
    4724             : 
    4725             :         spin_lock(&fs_info->buffer_lock);
    4726       61032 :         ret = radix_tree_insert(&fs_info->buffer_radix,
    4727             :                                 start >> PAGE_CACHE_SHIFT, eb);
    4728             :         spin_unlock(&fs_info->buffer_lock);
    4729             :         radix_tree_preload_end();
    4730       61031 :         if (ret == -EEXIST) {
    4731           0 :                 exists = find_extent_buffer(fs_info, start);
    4732           0 :                 if (exists)
    4733             :                         goto free_eb;
    4734             :                 else
    4735             :                         goto again;
    4736             :         }
    4737             :         /* add one reference for the tree */
    4738       61031 :         check_buffer_tree_ref(eb);
    4739             :         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
    4740             : 
    4741             :         /*
    4742             :          * there is a race where release page may have
    4743             :          * tried to find this extent buffer in the radix
    4744             :          * but failed.  It will tell the VM it is safe to
    4745             :          * reclaim the, and it will clear the page private bit.
    4746             :          * We must make sure to set the page private bit properly
    4747             :          * after the extent buffer is in the radix tree so
    4748             :          * it doesn't get lost
    4749             :          */
    4750       61032 :         SetPageChecked(eb->pages[0]);
    4751      189468 :         for (i = 1; i < num_pages; i++) {
    4752             :                 p = extent_buffer_page(eb, i);
    4753             :                 ClearPageChecked(p);
    4754      128436 :                 unlock_page(p);
    4755             :         }
    4756       61032 :         unlock_page(eb->pages[0]);
    4757       61031 :         return eb;
    4758             : 
    4759             : free_eb:
    4760           0 :         for (i = 0; i < num_pages; i++) {
    4761           0 :                 if (eb->pages[i])
    4762           0 :                         unlock_page(eb->pages[i]);
    4763             :         }
    4764             : 
    4765           0 :         WARN_ON(!atomic_dec_and_test(&eb->refs));
    4766           0 :         btrfs_release_extent_buffer(eb);
    4767           0 :         return exists;
    4768             : }
    4769             : 
    4770       61440 : static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
    4771             : {
    4772       61440 :         struct extent_buffer *eb =
    4773             :                         container_of(head, struct extent_buffer, rcu_head);
    4774             : 
    4775             :         __free_extent_buffer(eb);
    4776       61436 : }
    4777             : 
    4778             : /* Expects to have eb->eb_lock already held */
    4779     4671365 : static int release_extent_buffer(struct extent_buffer *eb)
    4780             : {
    4781     4671365 :         WARN_ON(atomic_read(&eb->refs) == 0);
    4782     9343137 :         if (atomic_dec_and_test(&eb->refs)) {
    4783      122922 :                 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
    4784       60729 :                         struct btrfs_fs_info *fs_info = eb->fs_info;
    4785             : 
    4786             :                         spin_unlock(&eb->refs_lock);
    4787             : 
    4788             :                         spin_lock(&fs_info->buffer_lock);
    4789       60729 :                         radix_tree_delete(&fs_info->buffer_radix,
    4790       60729 :                                           eb->start >> PAGE_CACHE_SHIFT);
    4791             :                         spin_unlock(&fs_info->buffer_lock);
    4792             :                 } else {
    4793             :                         spin_unlock(&eb->refs_lock);
    4794             :                 }
    4795             : 
    4796             :                 /* Should be safe to release our pages at this point */
    4797       61461 :                 btrfs_release_extent_buffer_page(eb, 0);
    4798       61461 :                 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
    4799       61461 :                 return 1;
    4800             :         }
    4801             :         spin_unlock(&eb->refs_lock);
    4802             : 
    4803     4610241 :         return 0;
    4804             : }
    4805             : 
    4806     7021982 : void free_extent_buffer(struct extent_buffer *eb)
    4807             : {
    4808             :         int refs;
    4809             :         int old;
    4810     7021982 :         if (!eb)
    4811             :                 return;
    4812             : 
    4813             :         while (1) {
    4814             :                 refs = atomic_read(&eb->refs);
    4815     7023473 :                 if (refs <= 3)
    4816             :                         break;
    4817     2414965 :                 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
    4818     2415103 :                 if (old == refs)
    4819             :                         return;
    4820             :         }
    4821             : 
    4822             :         spin_lock(&eb->refs_lock);
    4823     6907331 :         if (atomic_read(&eb->refs) == 2 &&
    4824             :             test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
    4825         732 :                 atomic_dec(&eb->refs);
    4826             : 
    4827     6906563 :         if (atomic_read(&eb->refs) == 2 &&
    4828       10806 :             test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
    4829       10464 :             !extent_buffer_under_io(eb) &&
    4830       10464 :             test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
    4831       10464 :                 atomic_dec(&eb->refs);
    4832             : 
    4833             :         /*
    4834             :          * I know this is terrible, but it's temporary until we stop tracking
    4835             :          * the uptodate bits and such for the extent buffers.
    4836             :          */
    4837     4610018 :         release_extent_buffer(eb);
    4838             : }
    4839             : 
    4840       53689 : void free_extent_buffer_stale(struct extent_buffer *eb)
    4841             : {
    4842       53689 :         if (!eb)
    4843       53687 :                 return;
    4844             : 
    4845             :         spin_lock(&eb->refs_lock);
    4846             :         set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
    4847             : 
    4848      139185 :         if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
    4849             :             test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
    4850       42310 :                 atomic_dec(&eb->refs);
    4851       53689 :         release_extent_buffer(eb);
    4852             : }
    4853             : 
    4854        2006 : void clear_extent_buffer_dirty(struct extent_buffer *eb)
    4855             : {
    4856             :         unsigned long i;
    4857             :         unsigned long num_pages;
    4858             :         struct page *page;
    4859             : 
    4860        2006 :         num_pages = num_extent_pages(eb->start, eb->len);
    4861             : 
    4862        6712 :         for (i = 0; i < num_pages; i++) {
    4863             :                 page = extent_buffer_page(eb, i);
    4864        4706 :                 if (!PageDirty(page))
    4865           0 :                         continue;
    4866             : 
    4867        4706 :                 lock_page(page);
    4868        4706 :                 WARN_ON(!PagePrivate(page));
    4869             : 
    4870        4706 :                 clear_page_dirty_for_io(page);
    4871        4706 :                 spin_lock_irq(&page->mapping->tree_lock);
    4872        4706 :                 if (!PageDirty(page)) {
    4873        4706 :                         radix_tree_tag_clear(&page->mapping->page_tree,
    4874             :                                                 page_index(page),
    4875             :                                                 PAGECACHE_TAG_DIRTY);
    4876             :                 }
    4877        4706 :                 spin_unlock_irq(&page->mapping->tree_lock);
    4878             :                 ClearPageError(page);
    4879        4706 :                 unlock_page(page);
    4880             :         }
    4881        2006 :         WARN_ON(atomic_read(&eb->refs) == 0);
    4882        2006 : }
    4883             : 
    4884     1165610 : int set_extent_buffer_dirty(struct extent_buffer *eb)
    4885             : {
    4886             :         unsigned long i;
    4887             :         unsigned long num_pages;
    4888             :         int was_dirty = 0;
    4889             : 
    4890     1165610 :         check_buffer_tree_ref(eb);
    4891             : 
    4892     1165569 :         was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
    4893             : 
    4894     1165913 :         num_pages = num_extent_pages(eb->start, eb->len);
    4895     1165913 :         WARN_ON(atomic_read(&eb->refs) == 0);
    4896     1165913 :         WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
    4897             : 
    4898     4015547 :         for (i = 0; i < num_pages; i++)
    4899     4015207 :                 set_page_dirty(extent_buffer_page(eb, i));
    4900     1166253 :         return was_dirty;
    4901             : }
    4902             : 
    4903           0 : int clear_extent_buffer_uptodate(struct extent_buffer *eb)
    4904             : {
    4905             :         unsigned long i;
    4906             :         struct page *page;
    4907             :         unsigned long num_pages;
    4908             : 
    4909             :         clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
    4910           0 :         num_pages = num_extent_pages(eb->start, eb->len);
    4911           0 :         for (i = 0; i < num_pages; i++) {
    4912             :                 page = extent_buffer_page(eb, i);
    4913           0 :                 if (page)
    4914             :                         ClearPageUptodate(page);
    4915             :         }
    4916           0 :         return 0;
    4917             : }
    4918             : 
    4919       61290 : int set_extent_buffer_uptodate(struct extent_buffer *eb)
    4920             : {
    4921             :         unsigned long i;
    4922             :         struct page *page;
    4923             :         unsigned long num_pages;
    4924             : 
    4925             :         set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
    4926       61290 :         num_pages = num_extent_pages(eb->start, eb->len);
    4927      250958 :         for (i = 0; i < num_pages; i++) {
    4928             :                 page = extent_buffer_page(eb, i);
    4929             :                 SetPageUptodate(page);
    4930             :         }
    4931       61291 :         return 0;
    4932             : }
    4933             : 
    4934     3434517 : int extent_buffer_uptodate(struct extent_buffer *eb)
    4935             : {
    4936     3434517 :         return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
    4937             : }
    4938             : 
    4939     1209796 : int read_extent_buffer_pages(struct extent_io_tree *tree,
    4940             :                              struct extent_buffer *eb, u64 start, int wait,
    4941             :                              get_extent_t *get_extent, int mirror_num)
    4942             : {
    4943             :         unsigned long i;
    4944             :         unsigned long start_i;
    4945             :         struct page *page;
    4946             :         int err;
    4947             :         int ret = 0;
    4948             :         int locked_pages = 0;
    4949             :         int all_uptodate = 1;
    4950             :         unsigned long num_pages;
    4951             :         unsigned long num_reads = 0;
    4952     1209796 :         struct bio *bio = NULL;
    4953     1209796 :         unsigned long bio_flags = 0;
    4954             : 
    4955     1209796 :         if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
    4956             :                 return 0;
    4957             : 
    4958        2189 :         if (start) {
    4959           0 :                 WARN_ON(start < eb->start);
    4960           0 :                 start_i = (start >> PAGE_CACHE_SHIFT) -
    4961           0 :                         (eb->start >> PAGE_CACHE_SHIFT);
    4962             :         } else {
    4963             :                 start_i = 0;
    4964             :         }
    4965             : 
    4966        2189 :         num_pages = num_extent_pages(eb->start, eb->len);
    4967       10478 :         for (i = start_i; i < num_pages; i++) {
    4968             :                 page = extent_buffer_page(eb, i);
    4969        8291 :                 if (wait == WAIT_NONE) {
    4970         221 :                         if (!trylock_page(page))
    4971             :                                 goto unlock_exit;
    4972             :                 } else {
    4973        8070 :                         lock_page(page);
    4974             :                 }
    4975        8289 :                 locked_pages++;
    4976        8289 :                 if (!PageUptodate(page)) {
    4977        8277 :                         num_reads++;
    4978             :                         all_uptodate = 0;
    4979             :                 }
    4980             :         }
    4981        2187 :         if (all_uptodate) {
    4982           3 :                 if (start_i == 0)
    4983             :                         set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
    4984             :                 goto unlock_exit;
    4985             :         }
    4986             : 
    4987             :         clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
    4988        2184 :         eb->read_mirror = 0;
    4989        2184 :         atomic_set(&eb->io_pages, num_reads);
    4990       10461 :         for (i = start_i; i < num_pages; i++) {
    4991             :                 page = extent_buffer_page(eb, i);
    4992        8277 :                 if (!PageUptodate(page)) {
    4993             :                         ClearPageError(page);
    4994        8277 :                         err = __extent_read_full_page(tree, page,
    4995             :                                                       get_extent, &bio,
    4996             :                                                       mirror_num, &bio_flags,
    4997             :                                                       READ | REQ_META);
    4998        8277 :                         if (err)
    4999             :                                 ret = err;
    5000             :                 } else {
    5001           0 :                         unlock_page(page);
    5002             :                 }
    5003             :         }
    5004             : 
    5005        2184 :         if (bio) {
    5006        2184 :                 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
    5007             :                                      bio_flags);
    5008        2184 :                 if (err)
    5009             :                         return err;
    5010             :         }
    5011             : 
    5012        2184 :         if (ret || wait != WAIT_COMPLETE)
    5013             :                 return ret;
    5014             : 
    5015        7485 :         for (i = start_i; i < num_pages; i++) {
    5016             :                 page = extent_buffer_page(eb, i);
    5017             :                 wait_on_page_locked(page);
    5018        7485 :                 if (!PageUptodate(page))
    5019             :                         ret = -EIO;
    5020             :         }
    5021             : 
    5022             :         return ret;
    5023             : 
    5024             : unlock_exit:
    5025             :         i = start_i;
    5026          22 :         while (locked_pages > 0) {
    5027             :                 page = extent_buffer_page(eb, i);
    5028          12 :                 i++;
    5029          12 :                 unlock_page(page);
    5030          12 :                 locked_pages--;
    5031             :         }
    5032             :         return ret;
    5033             : }
    5034             : 
    5035    12262845 : void read_extent_buffer(struct extent_buffer *eb, void *dstv,
    5036             :                         unsigned long start,
    5037             :                         unsigned long len)
    5038             : {
    5039             :         size_t cur;
    5040             :         size_t offset;
    5041             :         struct page *page;
    5042             :         char *kaddr;
    5043             :         char *dst = (char *)dstv;
    5044    12262845 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5045    12262845 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5046             : 
    5047    12262845 :         WARN_ON(start > eb->len);
    5048    12265417 :         WARN_ON(start + len > eb->start + eb->len);
    5049             : 
    5050    12265417 :         offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
    5051             : 
    5052    36985812 :         while (len > 0) {
    5053             :                 page = extent_buffer_page(eb, i);
    5054             : 
    5055    12454978 :                 cur = min(len, (PAGE_CACHE_SIZE - offset));
    5056             :                 kaddr = page_address(page);
    5057    12454978 :                 memcpy(dst, kaddr + offset, cur);
    5058             : 
    5059    12454978 :                 dst += cur;
    5060    12454978 :                 len -= cur;
    5061             :                 offset = 0;
    5062    12454978 :                 i++;
    5063             :         }
    5064    12265417 : }
    5065             : 
    5066         208 : int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
    5067             :                         unsigned long start,
    5068             :                         unsigned long len)
    5069             : {
    5070             :         size_t cur;
    5071             :         size_t offset;
    5072             :         struct page *page;
    5073             :         char *kaddr;
    5074             :         char __user *dst = (char __user *)dstv;
    5075         208 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5076         208 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5077             :         int ret = 0;
    5078             : 
    5079         208 :         WARN_ON(start > eb->len);
    5080         208 :         WARN_ON(start + len > eb->start + eb->len);
    5081             : 
    5082         208 :         offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
    5083             : 
    5084         625 :         while (len > 0) {
    5085             :                 page = extent_buffer_page(eb, i);
    5086             : 
    5087         209 :                 cur = min(len, (PAGE_CACHE_SIZE - offset));
    5088             :                 kaddr = page_address(page);
    5089         418 :                 if (copy_to_user(dst, kaddr + offset, cur)) {
    5090             :                         ret = -EFAULT;
    5091             :                         break;
    5092             :                 }
    5093             : 
    5094         209 :                 dst += cur;
    5095         209 :                 len -= cur;
    5096             :                 offset = 0;
    5097         209 :                 i++;
    5098             :         }
    5099             : 
    5100         208 :         return ret;
    5101             : }
    5102             : 
    5103    41588982 : int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
    5104             :                                unsigned long min_len, char **map,
    5105             :                                unsigned long *map_start,
    5106             :                                unsigned long *map_len)
    5107             : {
    5108             :         size_t offset = start & (PAGE_CACHE_SIZE - 1);
    5109             :         char *kaddr;
    5110             :         struct page *p;
    5111    41588982 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5112    41588982 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5113    41588982 :         unsigned long end_i = (start_offset + start + min_len - 1) >>
    5114             :                 PAGE_CACHE_SHIFT;
    5115             : 
    5116    41588982 :         if (i != end_i)
    5117             :                 return -EINVAL;
    5118             : 
    5119    41325202 :         if (i == 0) {
    5120             :                 offset = start_offset;
    5121    22961501 :                 *map_start = 0;
    5122             :         } else {
    5123             :                 offset = 0;
    5124    18363701 :                 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
    5125             :         }
    5126             : 
    5127    41325202 :         if (start + min_len > eb->len) {
    5128           0 :                 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
    5129             :                        "wanted %lu %lu\n",
    5130             :                        eb->start, eb->len, start, min_len);
    5131           0 :                 return -EINVAL;
    5132             :         }
    5133             : 
    5134             :         p = extent_buffer_page(eb, i);
    5135             :         kaddr = page_address(p);
    5136    41325202 :         *map = kaddr + offset;
    5137    41325202 :         *map_len = PAGE_CACHE_SIZE - offset;
    5138    41325202 :         return 0;
    5139             : }
    5140             : 
    5141      228578 : int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
    5142             :                           unsigned long start,
    5143             :                           unsigned long len)
    5144             : {
    5145             :         size_t cur;
    5146             :         size_t offset;
    5147             :         struct page *page;
    5148             :         char *kaddr;
    5149             :         char *ptr = (char *)ptrv;
    5150      228578 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5151      228578 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5152             :         int ret = 0;
    5153             : 
    5154      228578 :         WARN_ON(start > eb->len);
    5155      228578 :         WARN_ON(start + len > eb->start + eb->len);
    5156             : 
    5157      228578 :         offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
    5158             : 
    5159      492219 :         while (len > 0) {
    5160             :                 page = extent_buffer_page(eb, i);
    5161             : 
    5162      229177 :                 cur = min(len, (PAGE_CACHE_SIZE - offset));
    5163             : 
    5164             :                 kaddr = page_address(page);
    5165      229177 :                 ret = memcmp(ptr, kaddr + offset, cur);
    5166      229177 :                 if (ret)
    5167             :                         break;
    5168             : 
    5169       35063 :                 ptr += cur;
    5170       35063 :                 len -= cur;
    5171             :                 offset = 0;
    5172       35063 :                 i++;
    5173             :         }
    5174      228578 :         return ret;
    5175             : }
    5176             : 
    5177      897735 : void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
    5178             :                          unsigned long start, unsigned long len)
    5179             : {
    5180             :         size_t cur;
    5181             :         size_t offset;
    5182             :         struct page *page;
    5183             :         char *kaddr;
    5184             :         char *src = (char *)srcv;
    5185      897735 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5186      897735 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5187             : 
    5188      897735 :         WARN_ON(start > eb->len);
    5189      897739 :         WARN_ON(start + len > eb->start + eb->len);
    5190             : 
    5191      897739 :         offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
    5192             : 
    5193     2784773 :         while (len > 0) {
    5194             :                 page = extent_buffer_page(eb, i);
    5195      989295 :                 WARN_ON(!PageUptodate(page));
    5196             : 
    5197      989295 :                 cur = min(len, PAGE_CACHE_SIZE - offset);
    5198             :                 kaddr = page_address(page);
    5199      989295 :                 memcpy(kaddr + offset, src, cur);
    5200             : 
    5201      989295 :                 src += cur;
    5202      989295 :                 len -= cur;
    5203             :                 offset = 0;
    5204      989295 :                 i++;
    5205             :         }
    5206      897746 : }
    5207             : 
    5208       28122 : void memset_extent_buffer(struct extent_buffer *eb, char c,
    5209             :                           unsigned long start, unsigned long len)
    5210             : {
    5211             :         size_t cur;
    5212             :         size_t offset;
    5213             :         struct page *page;
    5214             :         char *kaddr;
    5215       28122 :         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
    5216       28122 :         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
    5217             : 
    5218       28122 :         WARN_ON(start > eb->len);
    5219       28122 :         WARN_ON(start + len > eb->start + eb->len);
    5220             : 
    5221       28122 :         offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
    5222             : 
    5223       85086 :         while (len > 0) {
    5224             :                 page = extent_buffer_page(eb, i);
    5225       28842 :                 WARN_ON(!PageUptodate(page));
    5226             : 
    5227       28842 :                 cur = min(len, PAGE_CACHE_SIZE - offset);
    5228             :                 kaddr = page_address(page);
    5229       28842 :                 memset(kaddr + offset, c, cur);
    5230             : 
    5231       28842 :                 len -= cur;
    5232             :                 offset = 0;
    5233       28842 :                 i++;
    5234             :         }
    5235       28122 : }
    5236             : 
    5237       93062 : void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
    5238             :                         unsigned long dst_offset, unsigned long src_offset,
    5239             :                         unsigned long len)
    5240             : {
    5241       93062 :         u64 dst_len = dst->len;
    5242             :         size_t cur;
    5243             :         size_t offset;
    5244             :         struct page *page;
    5245             :         char *kaddr;
    5246       93062 :         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
    5247       93062 :         unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
    5248             : 
    5249       93062 :         WARN_ON(src->len != dst_len);
    5250             : 
    5251       93061 :         offset = (start_offset + dst_offset) &
    5252             :                 (PAGE_CACHE_SIZE - 1);
    5253             : 
    5254      400049 :         while (len > 0) {
    5255             :                 page = extent_buffer_page(dst, i);
    5256      213927 :                 WARN_ON(!PageUptodate(page));
    5257             : 
    5258      213927 :                 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
    5259             : 
    5260             :                 kaddr = page_address(page);
    5261      213927 :                 read_extent_buffer(src, kaddr + offset, src_offset, cur);
    5262             : 
    5263      213927 :                 src_offset += cur;
    5264      213927 :                 len -= cur;
    5265             :                 offset = 0;
    5266      213927 :                 i++;
    5267             :         }
    5268       93064 : }
    5269             : 
    5270             : static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
    5271             : {
    5272     1100198 :         unsigned long distance = (src > dst) ? src - dst : dst - src;
    5273             :         return distance < len;
    5274             : }
    5275             : 
    5276     1476644 : static void copy_pages(struct page *dst_page, struct page *src_page,
    5277             :                        unsigned long dst_off, unsigned long src_off,
    5278             :                        unsigned long len)
    5279             : {
    5280             :         char *dst_kaddr = page_address(dst_page);
    5281             :         char *src_kaddr;
    5282             :         int must_memmove = 0;
    5283             : 
    5284     1476644 :         if (dst_page != src_page) {
    5285             :                 src_kaddr = page_address(src_page);
    5286             :         } else {
    5287             :                 src_kaddr = dst_kaddr;
    5288     1100198 :                 if (areas_overlap(src_off, dst_off, len))
    5289             :                         must_memmove = 1;
    5290             :         }
    5291             : 
    5292     1476644 :         if (must_memmove)
    5293     1039557 :                 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
    5294             :         else
    5295      437087 :                 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
    5296     1476644 : }
    5297             : 
    5298      410701 : void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
    5299             :                            unsigned long src_offset, unsigned long len)
    5300             : {
    5301             :         size_t cur;
    5302             :         size_t dst_off_in_page;
    5303             :         size_t src_off_in_page;
    5304      410701 :         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
    5305             :         unsigned long dst_i;
    5306             :         unsigned long src_i;
    5307             : 
    5308      410701 :         if (src_offset + len > dst->len) {
    5309           0 :                 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
    5310             :                        "len %lu dst len %lu\n", src_offset, len, dst->len);
    5311           0 :                 BUG_ON(1);
    5312             :         }
    5313      410701 :         if (dst_offset + len > dst->len) {
    5314           0 :                 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
    5315             :                        "len %lu dst len %lu\n", dst_offset, len, dst->len);
    5316           0 :                 BUG_ON(1);
    5317             :         }
    5318             : 
    5319     1279924 :         while (len > 0) {
    5320      869240 :                 dst_off_in_page = (start_offset + dst_offset) &
    5321             :                         (PAGE_CACHE_SIZE - 1);
    5322      869240 :                 src_off_in_page = (start_offset + src_offset) &
    5323             :                         (PAGE_CACHE_SIZE - 1);
    5324             : 
    5325      869240 :                 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
    5326      869240 :                 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
    5327             : 
    5328      869240 :                 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
    5329             :                                                src_off_in_page));
    5330      869240 :                 cur = min_t(unsigned long, cur,
    5331             :                         (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
    5332             : 
    5333      869240 :                 copy_pages(extent_buffer_page(dst, dst_i),
    5334             :                            extent_buffer_page(dst, src_i),
    5335             :                            dst_off_in_page, src_off_in_page, cur);
    5336             : 
    5337      869223 :                 src_offset += cur;
    5338      869223 :                 dst_offset += cur;
    5339      869223 :                 len -= cur;
    5340             :         }
    5341      410684 : }
    5342             : 
    5343      743372 : void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
    5344             :                            unsigned long src_offset, unsigned long len)
    5345             : {
    5346             :         size_t cur;
    5347             :         size_t dst_off_in_page;
    5348             :         size_t src_off_in_page;
    5349      743372 :         unsigned long dst_end = dst_offset + len - 1;
    5350      743372 :         unsigned long src_end = src_offset + len - 1;
    5351      743372 :         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
    5352             :         unsigned long dst_i;
    5353             :         unsigned long src_i;
    5354             : 
    5355      743372 :         if (src_offset + len > dst->len) {
    5356           0 :                 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
    5357             :                        "len %lu len %lu\n", src_offset, len, dst->len);
    5358           0 :                 BUG_ON(1);
    5359             :         }
    5360      743372 :         if (dst_offset + len > dst->len) {
    5361           0 :                 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
    5362             :                        "len %lu len %lu\n", dst_offset, len, dst->len);
    5363           0 :                 BUG_ON(1);
    5364             :         }
    5365      743372 :         if (dst_offset < src_offset) {
    5366      408650 :                 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
    5367     1151959 :                 return;
    5368             :         }
    5369      942145 :         while (len > 0) {
    5370      607424 :                 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
    5371      607424 :                 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
    5372             : 
    5373      607424 :                 dst_off_in_page = (start_offset + dst_end) &
    5374             :                         (PAGE_CACHE_SIZE - 1);
    5375      607424 :                 src_off_in_page = (start_offset + src_end) &
    5376             :                         (PAGE_CACHE_SIZE - 1);
    5377             : 
    5378      607424 :                 cur = min_t(unsigned long, len, src_off_in_page + 1);
    5379      607424 :                 cur = min(cur, dst_off_in_page + 1);
    5380     1822272 :                 copy_pages(extent_buffer_page(dst, dst_i),
    5381             :                            extent_buffer_page(dst, src_i),
    5382      607424 :                            dst_off_in_page - cur + 1,
    5383      607424 :                            src_off_in_page - cur + 1, cur);
    5384             : 
    5385      607423 :                 dst_end -= cur;
    5386      607423 :                 src_end -= cur;
    5387      607423 :                 len -= cur;
    5388             :         }
    5389             : }
    5390             : 
    5391       13164 : int try_release_extent_buffer(struct page *page)
    5392             : {
    5393             :         struct extent_buffer *eb;
    5394             : 
    5395             :         /*
    5396             :          * We need to make sure noboody is attaching this page to an eb right
    5397             :          * now.
    5398             :          */
    5399       13164 :         spin_lock(&page->mapping->private_lock);
    5400       13164 :         if (!PagePrivate(page)) {
    5401           0 :                 spin_unlock(&page->mapping->private_lock);
    5402           0 :                 return 1;
    5403             :         }
    5404             : 
    5405       13164 :         eb = (struct extent_buffer *)page->private;
    5406       13164 :         BUG_ON(!eb);
    5407             : 
    5408             :         /*
    5409             :          * This is a little awful but should be ok, we need to make sure that
    5410             :          * the eb doesn't disappear out from under us while we're looking at
    5411             :          * this page.
    5412             :          */
    5413             :         spin_lock(&eb->refs_lock);
    5414       21120 :         if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
    5415             :                 spin_unlock(&eb->refs_lock);
    5416        5208 :                 spin_unlock(&page->mapping->private_lock);
    5417        5208 :                 return 0;
    5418             :         }
    5419        7956 :         spin_unlock(&page->mapping->private_lock);
    5420             : 
    5421             :         /*
    5422             :          * If tree ref isn't set then we know the ref on this eb is a real ref,
    5423             :          * so just return, this page will likely be freed soon anyway.
    5424             :          */
    5425       15912 :         if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
    5426             :                 spin_unlock(&eb->refs_lock);
    5427           0 :                 return 0;
    5428             :         }
    5429             : 
    5430        7956 :         return release_extent_buffer(eb);
    5431             : }

Generated by: LCOV version 1.10