LCOV - code coverage report
Current view: top level - fs/btrfs - ctree.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 1909 2186 87.3 %
Date: 2014-11-28 Functions: 93 94 98.9 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/sched.h>
      20             : #include <linux/slab.h>
      21             : #include <linux/rbtree.h>
      22             : #include "ctree.h"
      23             : #include "disk-io.h"
      24             : #include "transaction.h"
      25             : #include "print-tree.h"
      26             : #include "locking.h"
      27             : 
      28             : static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
      29             :                       *root, struct btrfs_path *path, int level);
      30             : static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
      31             :                       *root, struct btrfs_key *ins_key,
      32             :                       struct btrfs_path *path, int data_size, int extend);
      33             : static int push_node_left(struct btrfs_trans_handle *trans,
      34             :                           struct btrfs_root *root, struct extent_buffer *dst,
      35             :                           struct extent_buffer *src, int empty);
      36             : static int balance_node_right(struct btrfs_trans_handle *trans,
      37             :                               struct btrfs_root *root,
      38             :                               struct extent_buffer *dst_buf,
      39             :                               struct extent_buffer *src_buf);
      40             : static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
      41             :                     int level, int slot);
      42             : static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
      43             :                                  struct extent_buffer *eb);
      44             : 
      45     1854047 : struct btrfs_path *btrfs_alloc_path(void)
      46             : {
      47             :         struct btrfs_path *path;
      48     1855831 :         path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
      49     1854158 :         return path;
      50             : }
      51             : 
      52             : /*
      53             :  * set all locked nodes in the path to blocking locks.  This should
      54             :  * be done before scheduling
      55             :  */
      56     2594767 : noinline void btrfs_set_path_blocking(struct btrfs_path *p)
      57             : {
      58             :         int i;
      59    23375417 :         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
      60    20777480 :                 if (!p->nodes[i] || !p->locks[i])
      61    19637098 :                         continue;
      62     1140382 :                 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
      63     1143552 :                 if (p->locks[i] == BTRFS_READ_LOCK)
      64      409973 :                         p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
      65      733579 :                 else if (p->locks[i] == BTRFS_WRITE_LOCK)
      66      713014 :                         p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
      67             :         }
      68     2597937 : }
      69             : 
      70             : /*
      71             :  * reset all the locked nodes in the patch to spinning locks.
      72             :  *
      73             :  * held is used to keep lockdep happy, when lockdep is enabled
      74             :  * we set held to a blocking lock before we go around and
      75             :  * retake all the spinlocks in the path.  You can safely use NULL
      76             :  * for held
      77             :  */
      78     5212087 : noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
      79             :                                         struct extent_buffer *held, int held_rw)
      80             : {
      81             :         int i;
      82             : 
      83             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
      84             :         /* lockdep really cares that we take all of these spinlocks
      85             :          * in the right order.  If any of the locks in the path are not
      86             :          * currently blocking, it is going to complain.  So, make really
      87             :          * really sure by forcing the path to blocking before we clear
      88             :          * the path blocking.
      89             :          */
      90             :         if (held) {
      91             :                 btrfs_set_lock_blocking_rw(held, held_rw);
      92             :                 if (held_rw == BTRFS_WRITE_LOCK)
      93             :                         held_rw = BTRFS_WRITE_LOCK_BLOCKING;
      94             :                 else if (held_rw == BTRFS_READ_LOCK)
      95             :                         held_rw = BTRFS_READ_LOCK_BLOCKING;
      96             :         }
      97             :         btrfs_set_path_blocking(p);
      98             : #endif
      99             : 
     100    46861138 :         for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
     101    41652998 :                 if (p->nodes[i] && p->locks[i]) {
     102     3786076 :                         btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
     103     3782129 :                         if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
     104      326601 :                                 p->locks[i] = BTRFS_WRITE_LOCK;
     105     3455528 :                         else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
     106        5404 :                                 p->locks[i] = BTRFS_READ_LOCK;
     107             :                 }
     108             :         }
     109             : 
     110             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
     111             :         if (held)
     112             :                 btrfs_clear_lock_blocking_rw(held, held_rw);
     113             : #endif
     114     5208140 : }
     115             : 
     116             : /* this also releases the path */
     117     1855866 : void btrfs_free_path(struct btrfs_path *p)
     118             : {
     119     1855866 :         if (!p)
     120     1855760 :                 return;
     121     1855874 :         btrfs_release_path(p);
     122     1855739 :         kmem_cache_free(btrfs_path_cachep, p);
     123             : }
     124             : 
     125             : /*
     126             :  * path release drops references on the extent buffers in the path
     127             :  * and it drops any locks held by this path
     128             :  *
     129             :  * It is safe to call this on paths that no locks or extent buffers held.
     130             :  */
     131     4493213 : noinline void btrfs_release_path(struct btrfs_path *p)
     132             : {
     133             :         int i;
     134             : 
     135    40453389 :         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
     136    35957959 :                 p->slots[i] = 0;
     137    35957959 :                 if (!p->nodes[i])
     138    30953674 :                         continue;
     139     5004285 :                 if (p->locks[i]) {
     140     1384137 :                         btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
     141     1384409 :                         p->locks[i] = 0;
     142             :                 }
     143     5004557 :                 free_extent_buffer(p->nodes[i]);
     144     5006502 :                 p->nodes[i] = NULL;
     145             :         }
     146     4495430 : }
     147             : 
     148             : /*
     149             :  * safely gets a reference on the root node of a tree.  A lock
     150             :  * is not taken, so a concurrent writer may put a different node
     151             :  * at the root of the tree.  See btrfs_lock_root_node for the
     152             :  * looping required.
     153             :  *
     154             :  * The extent buffer returned by this has a reference taken, so
     155             :  * it won't disappear.  It may stop being the root of the tree
     156             :  * at any time because there are no locks held.
     157             :  */
     158     1872073 : struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
     159             : {
     160             :         struct extent_buffer *eb;
     161             : 
     162             :         while (1) {
     163             :                 rcu_read_lock();
     164     1872156 :                 eb = rcu_dereference(root->node);
     165             : 
     166             :                 /*
     167             :                  * RCU really hurts here, we could free up the root node because
     168             :                  * it was cow'ed but we may not get the new root node yet so do
     169             :                  * the inc_not_zero dance and if it doesn't work then
     170             :                  * synchronize_rcu and try again.
     171             :                  */
     172     1873751 :                 if (atomic_inc_not_zero(&eb->refs)) {
     173             :                         rcu_read_unlock();
     174             :                         break;
     175             :                 }
     176             :                 rcu_read_unlock();
     177             :                 synchronize_rcu();
     178             :         }
     179     1873755 :         return eb;
     180             : }
     181             : 
     182             : /* loop around taking references on and locking the root node of the
     183             :  * tree until you end up with a lock on the root.  A locked buffer
     184             :  * is returned, with a reference held.
     185             :  */
     186      635185 : struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
     187             : {
     188             :         struct extent_buffer *eb;
     189             : 
     190             :         while (1) {
     191      635210 :                 eb = btrfs_root_node(root);
     192      635225 :                 btrfs_tree_lock(eb);
     193      635245 :                 if (eb == root->node)
     194             :                         break;
     195          25 :                 btrfs_tree_unlock(eb);
     196          25 :                 free_extent_buffer(eb);
     197          25 :         }
     198      635220 :         return eb;
     199             : }
     200             : 
     201             : /* loop around taking references on and locking the root node of the
     202             :  * tree until you end up with a lock on the root.  A locked buffer
     203             :  * is returned, with a reference held.
     204             :  */
     205     1211496 : static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
     206             : {
     207             :         struct extent_buffer *eb;
     208             : 
     209             :         while (1) {
     210     1211636 :                 eb = btrfs_root_node(root);
     211     1212206 :                 btrfs_tree_read_lock(eb);
     212     1212141 :                 if (eb == root->node)
     213             :                         break;
     214         140 :                 btrfs_tree_read_unlock(eb);
     215         140 :                 free_extent_buffer(eb);
     216         140 :         }
     217     1212001 :         return eb;
     218             : }
     219             : 
     220             : /* cowonly root (everything not a reference counted cow subvolume), just get
     221             :  * put onto a simple dirty list.  transaction.c walks this to make sure they
     222             :  * get properly updated on disk.
     223             :  */
     224       13659 : static void add_root_to_dirty_list(struct btrfs_root *root)
     225             : {
     226       13659 :         spin_lock(&root->fs_info->trans_lock);
     227       19174 :         if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
     228        5515 :             list_empty(&root->dirty_list)) {
     229        4833 :                 list_add(&root->dirty_list,
     230        4833 :                          &root->fs_info->dirty_cowonly_roots);
     231             :         }
     232       13659 :         spin_unlock(&root->fs_info->trans_lock);
     233       13659 : }
     234             : 
     235             : /*
     236             :  * used by snapshot creation to make a copy of a root for a tree with
     237             :  * a given objectid.  The buffer with the new root node is returned in
     238             :  * cow_ret, and this func returns zero on success or a negative error code.
     239             :  */
     240         585 : int btrfs_copy_root(struct btrfs_trans_handle *trans,
     241             :                       struct btrfs_root *root,
     242        1170 :                       struct extent_buffer *buf,
     243             :                       struct extent_buffer **cow_ret, u64 new_root_objectid)
     244             : {
     245        1316 :         struct extent_buffer *cow;
     246             :         int ret = 0;
     247             :         int level;
     248             :         struct btrfs_disk_key disk_key;
     249             : 
     250         585 :         WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
     251             :                 trans->transid != root->fs_info->running_transaction->transid);
     252         585 :         WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
     253             :                 trans->transid != root->last_trans);
     254             : 
     255         585 :         level = btrfs_header_level(buf);
     256         585 :         if (level == 0)
     257             :                 btrfs_item_key(buf, &disk_key, 0);
     258             :         else
     259          79 :                 btrfs_node_key(buf, &disk_key, 0);
     260             : 
     261         585 :         cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
     262             :                                      new_root_objectid, &disk_key, level,
     263             :                                      buf->start, 0);
     264         585 :         if (IS_ERR(cow))
     265           0 :                 return PTR_ERR(cow);
     266             : 
     267         585 :         copy_extent_buffer(cow, buf, 0, 0, cow->len);
     268         585 :         btrfs_set_header_bytenr(cow, cow->start);
     269         585 :         btrfs_set_header_generation(cow, trans->transid);
     270             :         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
     271             :         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
     272             :                                      BTRFS_HEADER_FLAG_RELOC);
     273         585 :         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
     274             :                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
     275             :         else
     276             :                 btrfs_set_header_owner(cow, new_root_objectid);
     277             : 
     278         585 :         write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
     279             :                             BTRFS_FSID_SIZE);
     280             : 
     281         585 :         WARN_ON(btrfs_header_generation(buf) > trans->transid);
     282         585 :         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
     283         439 :                 ret = btrfs_inc_ref(trans, root, cow, 1);
     284             :         else
     285         146 :                 ret = btrfs_inc_ref(trans, root, cow, 0);
     286             : 
     287         585 :         if (ret)
     288             :                 return ret;
     289             : 
     290         585 :         btrfs_mark_buffer_dirty(cow);
     291         585 :         *cow_ret = cow;
     292         585 :         return 0;
     293             : }
     294             : 
     295             : enum mod_log_op {
     296             :         MOD_LOG_KEY_REPLACE,
     297             :         MOD_LOG_KEY_ADD,
     298             :         MOD_LOG_KEY_REMOVE,
     299             :         MOD_LOG_KEY_REMOVE_WHILE_FREEING,
     300             :         MOD_LOG_KEY_REMOVE_WHILE_MOVING,
     301             :         MOD_LOG_MOVE_KEYS,
     302             :         MOD_LOG_ROOT_REPLACE,
     303             : };
     304             : 
     305             : struct tree_mod_move {
     306             :         int dst_slot;
     307             :         int nr_items;
     308             : };
     309             : 
     310             : struct tree_mod_root {
     311             :         u64 logical;
     312             :         u8 level;
     313             : };
     314             : 
     315             : struct tree_mod_elem {
     316             :         struct rb_node node;
     317             :         u64 index;              /* shifted logical */
     318             :         u64 seq;
     319             :         enum mod_log_op op;
     320             : 
     321             :         /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
     322             :         int slot;
     323             : 
     324             :         /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
     325             :         u64 generation;
     326             : 
     327             :         /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
     328             :         struct btrfs_disk_key key;
     329             :         u64 blockptr;
     330             : 
     331             :         /* this is used for op == MOD_LOG_MOVE_KEYS */
     332             :         struct tree_mod_move move;
     333             : 
     334             :         /* this is used for op == MOD_LOG_ROOT_REPLACE */
     335             :         struct tree_mod_root old_root;
     336             : };
     337             : 
     338             : static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
     339             : {
     340       21988 :         read_lock(&fs_info->tree_mod_log_lock);
     341             : }
     342             : 
     343             : static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
     344             : {
     345             :         read_unlock(&fs_info->tree_mod_log_lock);
     346             : }
     347             : 
     348             : static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
     349             : {
     350       10596 :         write_lock(&fs_info->tree_mod_log_lock);
     351             : }
     352             : 
     353             : static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
     354             : {
     355             :         write_unlock(&fs_info->tree_mod_log_lock);
     356             : }
     357             : 
     358             : /*
     359             :  * Pull a new tree mod seq number for our operation.
     360             :  */
     361             : static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
     362             : {
     363       15698 :         return atomic64_inc_return(&fs_info->tree_mod_seq);
     364             : }
     365             : 
     366             : /*
     367             :  * This adds a new blocker to the tree mod log's blocker list if the @elem
     368             :  * passed does not already have a sequence number set. So when a caller expects
     369             :  * to record tree modifications, it should ensure to set elem->seq to zero
     370             :  * before calling btrfs_get_tree_mod_seq.
     371             :  * Returns a fresh, unused tree log modification sequence number, even if no new
     372             :  * blocker was added.
     373             :  */
     374        4917 : u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
     375             :                            struct seq_list *elem)
     376             : {
     377             :         tree_mod_log_write_lock(fs_info);
     378             :         spin_lock(&fs_info->tree_mod_seq_lock);
     379        4917 :         if (!elem->seq) {
     380        4917 :                 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
     381        4917 :                 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
     382             :         }
     383             :         spin_unlock(&fs_info->tree_mod_seq_lock);
     384             :         tree_mod_log_write_unlock(fs_info);
     385             : 
     386        4917 :         return elem->seq;
     387             : }
     388             : 
     389       17271 : void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
     390             :                             struct seq_list *elem)
     391             : {
     392             :         struct rb_root *tm_root;
     393             :         struct rb_node *node;
     394             :         struct rb_node *next;
     395             :         struct seq_list *cur_elem;
     396             :         struct tree_mod_elem *tm;
     397             :         u64 min_seq = (u64)-1;
     398       17271 :         u64 seq_putting = elem->seq;
     399             : 
     400       17271 :         if (!seq_putting)
     401             :                 return;
     402             : 
     403             :         spin_lock(&fs_info->tree_mod_seq_lock);
     404        4917 :         list_del(&elem->list);
     405        4917 :         elem->seq = 0;
     406             : 
     407       30418 :         list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
     408       26763 :                 if (cur_elem->seq < min_seq) {
     409        2262 :                         if (seq_putting > cur_elem->seq) {
     410             :                                 /*
     411             :                                  * blocker with lower sequence number exists, we
     412             :                                  * cannot remove anything from the log
     413             :                                  */
     414             :                                 spin_unlock(&fs_info->tree_mod_seq_lock);
     415             :                                 return;
     416             :                         }
     417             :                         min_seq = cur_elem->seq;
     418             :                 }
     419             :         }
     420             :         spin_unlock(&fs_info->tree_mod_seq_lock);
     421             : 
     422             :         /*
     423             :          * anything that's lower than the lowest existing (read: blocked)
     424             :          * sequence number can be removed from the tree.
     425             :          */
     426             :         tree_mod_log_write_lock(fs_info);
     427        3655 :         tm_root = &fs_info->tree_mod_log;
     428      216105 :         for (node = rb_first(tm_root); node; node = next) {
     429      208795 :                 next = rb_next(node);
     430             :                 tm = container_of(node, struct tree_mod_elem, node);
     431      208795 :                 if (tm->seq > min_seq)
     432      205863 :                         continue;
     433        2932 :                 rb_erase(node, tm_root);
     434        2932 :                 kfree(tm);
     435             :         }
     436             :         tree_mod_log_write_unlock(fs_info);
     437             : }
     438             : 
     439             : /*
     440             :  * key order of the log:
     441             :  *       index -> sequence
     442             :  *
     443             :  * the index is the shifted logical of the *new* root node for root replace
     444             :  * operations, or the shifted logical of the affected block for all other
     445             :  * operations.
     446             :  *
     447             :  * Note: must be called with write lock (tree_mod_log_write_lock).
     448             :  */
     449             : static noinline int
     450        2932 : __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
     451             : {
     452             :         struct rb_root *tm_root;
     453             :         struct rb_node **new;
     454             :         struct rb_node *parent = NULL;
     455             :         struct tree_mod_elem *cur;
     456             : 
     457        2932 :         BUG_ON(!tm);
     458             : 
     459        2932 :         tm->seq = btrfs_inc_tree_mod_seq(fs_info);
     460             : 
     461        2932 :         tm_root = &fs_info->tree_mod_log;
     462        2932 :         new = &tm_root->rb_node;
     463       32774 :         while (*new) {
     464             :                 cur = container_of(*new, struct tree_mod_elem, node);
     465             :                 parent = *new;
     466       26910 :                 if (cur->index < tm->index)
     467         352 :                         new = &((*new)->rb_left);
     468       26558 :                 else if (cur->index > tm->index)
     469        4232 :                         new = &((*new)->rb_right);
     470       22326 :                 else if (cur->seq < tm->seq)
     471       22326 :                         new = &((*new)->rb_left);
     472           0 :                 else if (cur->seq > tm->seq)
     473           0 :                         new = &((*new)->rb_right);
     474             :                 else
     475             :                         return -EEXIST;
     476             :         }
     477             : 
     478        2932 :         rb_link_node(&tm->node, parent, new);
     479        2932 :         rb_insert_color(&tm->node, tm_root);
     480        2932 :         return 0;
     481             : }
     482             : 
     483             : /*
     484             :  * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
     485             :  * returns zero with the tree_mod_log_lock acquired. The caller must hold
     486             :  * this until all tree mod log insertions are recorded in the rb tree and then
     487             :  * call tree_mod_log_write_unlock() to release.
     488             :  */
     489        2024 : static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
     490        2011 :                                     struct extent_buffer *eb) {
     491        2024 :         smp_mb();
     492        4048 :         if (list_empty(&(fs_info)->tree_mod_seq_list))
     493             :                 return 1;
     494        4035 :         if (eb && btrfs_header_level(eb) == 0)
     495             :                 return 1;
     496             : 
     497             :         tree_mod_log_write_lock(fs_info);
     498        2024 :         if (list_empty(&(fs_info)->tree_mod_seq_list)) {
     499             :                 tree_mod_log_write_unlock(fs_info);
     500           0 :                 return 1;
     501             :         }
     502             : 
     503             :         return 0;
     504             : }
     505             : 
     506             : /* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
     507             : static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
     508        2011 :                                     struct extent_buffer *eb)
     509             : {
     510       79644 :         smp_mb();
     511      159290 :         if (list_empty(&(fs_info)->tree_mod_seq_list))
     512             :                 return 0;
     513        4022 :         if (eb && btrfs_header_level(eb) == 0)
     514             :                 return 0;
     515             : 
     516             :         return 1;
     517             : }
     518             : 
     519             : static struct tree_mod_elem *
     520        2697 : alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
     521             :                     enum mod_log_op op, gfp_t flags)
     522             : {
     523             :         struct tree_mod_elem *tm;
     524             : 
     525        2697 :         tm = kzalloc(sizeof(*tm), flags);
     526        2697 :         if (!tm)
     527             :                 return NULL;
     528             : 
     529        2697 :         tm->index = eb->start >> PAGE_CACHE_SHIFT;
     530        2697 :         if (op != MOD_LOG_KEY_ADD) {
     531        2448 :                 btrfs_node_key(eb, &tm->key, slot);
     532        2448 :                 tm->blockptr = btrfs_node_blockptr(eb, slot);
     533             :         }
     534        2697 :         tm->op = op;
     535        2697 :         tm->slot = slot;
     536        2697 :         tm->generation = btrfs_node_ptr_generation(eb, slot);
     537        2697 :         RB_CLEAR_NODE(&tm->node);
     538             : 
     539        2697 :         return tm;
     540             : }
     541             : 
     542             : static noinline int
     543       58000 : tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
     544             :                         struct extent_buffer *eb, int slot,
     545             :                         enum mod_log_op op, gfp_t flags)
     546             : {
     547             :         struct tree_mod_elem *tm;
     548             :         int ret;
     549             : 
     550       58001 :         if (!tree_mod_need_log(fs_info, eb))
     551             :                 return 0;
     552             : 
     553        1783 :         tm = alloc_tree_mod_elem(eb, slot, op, flags);
     554        1783 :         if (!tm)
     555             :                 return -ENOMEM;
     556             : 
     557        1783 :         if (tree_mod_dont_log(fs_info, eb)) {
     558           0 :                 kfree(tm);
     559           0 :                 return 0;
     560             :         }
     561             : 
     562        1783 :         ret = __tree_mod_log_insert(fs_info, tm);
     563             :         tree_mod_log_write_unlock(fs_info);
     564        1783 :         if (ret)
     565           0 :                 kfree(tm);
     566             : 
     567        1783 :         return ret;
     568             : }
     569             : 
     570             : static noinline int
     571        4522 : tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
     572             :                          struct extent_buffer *eb, int dst_slot, int src_slot,
     573             :                          int nr_items, gfp_t flags)
     574             : {
     575             :         struct tree_mod_elem *tm = NULL;
     576             :         struct tree_mod_elem **tm_list = NULL;
     577             :         int ret = 0;
     578             :         int i;
     579             :         int locked = 0;
     580             : 
     581        4522 :         if (!tree_mod_need_log(fs_info, eb))
     582             :                 return 0;
     583             : 
     584         228 :         tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
     585         228 :         if (!tm_list)
     586             :                 return -ENOMEM;
     587             : 
     588         228 :         tm = kzalloc(sizeof(*tm), flags);
     589         228 :         if (!tm) {
     590             :                 ret = -ENOMEM;
     591             :                 goto free_tms;
     592             :         }
     593             : 
     594         228 :         tm->index = eb->start >> PAGE_CACHE_SHIFT;
     595         228 :         tm->slot = src_slot;
     596         228 :         tm->move.dst_slot = dst_slot;
     597         228 :         tm->move.nr_items = nr_items;
     598         228 :         tm->op = MOD_LOG_MOVE_KEYS;
     599             : 
     600         455 :         for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
     601         227 :                 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
     602             :                     MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
     603         227 :                 if (!tm_list[i]) {
     604             :                         ret = -ENOMEM;
     605             :                         goto free_tms;
     606             :                 }
     607             :         }
     608             : 
     609         228 :         if (tree_mod_dont_log(fs_info, eb))
     610             :                 goto free_tms;
     611             :         locked = 1;
     612             : 
     613             :         /*
     614             :          * When we override something during the move, we log these removals.
     615             :          * This can only happen when we move towards the beginning of the
     616             :          * buffer, i.e. dst_slot < src_slot.
     617             :          */
     618         227 :         for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
     619         227 :                 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
     620         227 :                 if (ret)
     621             :                         goto free_tms;
     622             :         }
     623             : 
     624         228 :         ret = __tree_mod_log_insert(fs_info, tm);
     625         228 :         if (ret)
     626             :                 goto free_tms;
     627             :         tree_mod_log_write_unlock(fs_info);
     628         228 :         kfree(tm_list);
     629             : 
     630         228 :         return 0;
     631             : free_tms:
     632           0 :         for (i = 0; i < nr_items; i++) {
     633           0 :                 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
     634           0 :                         rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
     635           0 :                 kfree(tm_list[i]);
     636             :         }
     637           0 :         if (locked)
     638             :                 tree_mod_log_write_unlock(fs_info);
     639           0 :         kfree(tm_list);
     640           0 :         kfree(tm);
     641             : 
     642           0 :         return ret;
     643             : }
     644             : 
     645             : static inline int
     646           6 : __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
     647             :                        struct tree_mod_elem **tm_list,
     648             :                        int nritems)
     649             : {
     650             :         int i, j;
     651             :         int ret;
     652             : 
     653         227 :         for (i = nritems - 1; i >= 0; i--) {
     654         221 :                 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
     655         221 :                 if (ret) {
     656           0 :                         for (j = nritems - 1; j > i; j--)
     657           0 :                                 rb_erase(&tm_list[j]->node,
     658             :                                          &fs_info->tree_mod_log);
     659             :                         return ret;
     660             :                 }
     661             :         }
     662             : 
     663             :         return 0;
     664             : }
     665             : 
     666             : static noinline int
     667       13659 : tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
     668          21 :                          struct extent_buffer *old_root,
     669             :                          struct extent_buffer *new_root, gfp_t flags,
     670             :                          int log_removal)
     671             : {
     672             :         struct tree_mod_elem *tm = NULL;
     673             :         struct tree_mod_elem **tm_list = NULL;
     674             :         int nritems = 0;
     675             :         int ret = 0;
     676             :         int i;
     677             : 
     678       13659 :         if (!tree_mod_need_log(fs_info, NULL))
     679             :                 return 0;
     680             : 
     681          14 :         if (log_removal && btrfs_header_level(old_root) > 0) {
     682           6 :                 nritems = btrfs_header_nritems(old_root);
     683           6 :                 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
     684             :                                   flags);
     685           6 :                 if (!tm_list) {
     686             :                         ret = -ENOMEM;
     687             :                         goto free_tms;
     688             :                 }
     689         221 :                 for (i = 0; i < nritems; i++) {
     690         221 :                         tm_list[i] = alloc_tree_mod_elem(old_root, i,
     691             :                             MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
     692         221 :                         if (!tm_list[i]) {
     693             :                                 ret = -ENOMEM;
     694             :                                 goto free_tms;
     695             :                         }
     696             :                 }
     697             :         }
     698             : 
     699           7 :         tm = kzalloc(sizeof(*tm), flags);
     700           7 :         if (!tm) {
     701             :                 ret = -ENOMEM;
     702             :                 goto free_tms;
     703             :         }
     704             : 
     705           7 :         tm->index = new_root->start >> PAGE_CACHE_SHIFT;
     706           7 :         tm->old_root.logical = old_root->start;
     707           7 :         tm->old_root.level = btrfs_header_level(old_root);
     708           7 :         tm->generation = btrfs_header_generation(old_root);
     709           7 :         tm->op = MOD_LOG_ROOT_REPLACE;
     710             : 
     711           7 :         if (tree_mod_dont_log(fs_info, NULL))
     712             :                 goto free_tms;
     713             : 
     714           7 :         if (tm_list)
     715           6 :                 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
     716           7 :         if (!ret)
     717           7 :                 ret = __tree_mod_log_insert(fs_info, tm);
     718             : 
     719             :         tree_mod_log_write_unlock(fs_info);
     720           7 :         if (ret)
     721             :                 goto free_tms;
     722           7 :         kfree(tm_list);
     723             : 
     724             :         return ret;
     725             : 
     726             : free_tms:
     727           0 :         if (tm_list) {
     728           0 :                 for (i = 0; i < nritems; i++)
     729           0 :                         kfree(tm_list[i]);
     730           0 :                 kfree(tm_list);
     731             :         }
     732           0 :         kfree(tm);
     733             : 
     734             :         return ret;
     735             : }
     736             : 
     737             : static struct tree_mod_elem *
     738       21967 : __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
     739             :                       int smallest)
     740             : {
     741             :         struct rb_root *tm_root;
     742             :         struct rb_node *node;
     743             :         struct tree_mod_elem *cur = NULL;
     744             :         struct tree_mod_elem *found = NULL;
     745       21967 :         u64 index = start >> PAGE_CACHE_SHIFT;
     746             : 
     747             :         tree_mod_log_read_lock(fs_info);
     748             :         tm_root = &fs_info->tree_mod_log;
     749       21967 :         node = tm_root->rb_node;
     750       57305 :         while (node) {
     751             :                 cur = container_of(node, struct tree_mod_elem, node);
     752       13371 :                 if (cur->index < index) {
     753        1048 :                         node = node->rb_left;
     754       12323 :                 } else if (cur->index > index) {
     755       12219 :                         node = node->rb_right;
     756         104 :                 } else if (cur->seq < min_seq) {
     757           0 :                         node = node->rb_left;
     758         104 :                 } else if (!smallest) {
     759             :                         /* we want the node with the highest seq */
     760          39 :                         if (found)
     761          18 :                                 BUG_ON(found->seq > cur->seq);
     762             :                         found = cur;
     763          39 :                         node = node->rb_left;
     764          65 :                 } else if (cur->seq > min_seq) {
     765             :                         /* we want the node with the smallest seq */
     766          65 :                         if (found)
     767          16 :                                 BUG_ON(found->seq < cur->seq);
     768             :                         found = cur;
     769          65 :                         node = node->rb_right;
     770             :                 } else {
     771             :                         found = cur;
     772             :                         break;
     773             :                 }
     774             :         }
     775             :         tree_mod_log_read_unlock(fs_info);
     776             : 
     777       21967 :         return found;
     778             : }
     779             : 
     780             : /*
     781             :  * this returns the element from the log with the smallest time sequence
     782             :  * value that's in the log (the oldest log item). any element with a time
     783             :  * sequence lower than min_seq will be ignored.
     784             :  */
     785             : static struct tree_mod_elem *
     786             : tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
     787             :                            u64 min_seq)
     788             : {
     789       20677 :         return __tree_mod_log_search(fs_info, start, min_seq, 1);
     790             : }
     791             : 
     792             : /*
     793             :  * this returns the element from the log with the largest time sequence
     794             :  * value that's in the log (the most recent log item). any element with
     795             :  * a time sequence lower than min_seq will be ignored.
     796             :  */
     797             : static struct tree_mod_elem *
     798             : tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
     799             : {
     800        1290 :         return __tree_mod_log_search(fs_info, start, min_seq, 0);
     801             : }
     802             : 
     803             : static noinline int
     804         217 : tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
     805           0 :                      struct extent_buffer *src, unsigned long dst_offset,
     806             :                      unsigned long src_offset, int nr_items)
     807             : {
     808             :         int ret = 0;
     809             :         struct tree_mod_elem **tm_list = NULL;
     810             :         struct tree_mod_elem **tm_list_add, **tm_list_rem;
     811             :         int i;
     812             :         int locked = 0;
     813             : 
     814         211 :         if (!tree_mod_need_log(fs_info, NULL))
     815             :                 return 0;
     816             : 
     817           6 :         if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
     818             :                 return 0;
     819             : 
     820           6 :         tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
     821             :                           GFP_NOFS);
     822           6 :         if (!tm_list)
     823             :                 return -ENOMEM;
     824             : 
     825             :         tm_list_add = tm_list;
     826           6 :         tm_list_rem = tm_list + nr_items;
     827         239 :         for (i = 0; i < nr_items; i++) {
     828         233 :                 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
     829             :                     MOD_LOG_KEY_REMOVE, GFP_NOFS);
     830         233 :                 if (!tm_list_rem[i]) {
     831             :                         ret = -ENOMEM;
     832             :                         goto free_tms;
     833             :                 }
     834             : 
     835         233 :                 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
     836             :                     MOD_LOG_KEY_ADD, GFP_NOFS);
     837         233 :                 if (!tm_list_add[i]) {
     838             :                         ret = -ENOMEM;
     839             :                         goto free_tms;
     840             :                 }
     841             :         }
     842             : 
     843           6 :         if (tree_mod_dont_log(fs_info, NULL))
     844             :                 goto free_tms;
     845             :         locked = 1;
     846             : 
     847         233 :         for (i = 0; i < nr_items; i++) {
     848         233 :                 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
     849         233 :                 if (ret)
     850             :                         goto free_tms;
     851         233 :                 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
     852         233 :                 if (ret)
     853             :                         goto free_tms;
     854             :         }
     855             : 
     856             :         tree_mod_log_write_unlock(fs_info);
     857           6 :         kfree(tm_list);
     858             : 
     859           6 :         return 0;
     860             : 
     861             : free_tms:
     862           0 :         for (i = 0; i < nr_items * 2; i++) {
     863           0 :                 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
     864           0 :                         rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
     865           0 :                 kfree(tm_list[i]);
     866             :         }
     867           0 :         if (locked)
     868             :                 tree_mod_log_write_unlock(fs_info);
     869           0 :         kfree(tm_list);
     870             : 
     871           0 :         return ret;
     872             : }
     873             : 
     874             : static inline void
     875        4522 : tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
     876             :                      int dst_offset, int src_offset, int nr_items)
     877             : {
     878             :         int ret;
     879        4522 :         ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
     880             :                                        nr_items, GFP_NOFS);
     881        4522 :         BUG_ON(ret < 0);
     882        4522 : }
     883             : 
     884             : static noinline void
     885       13775 : tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
     886             :                           struct extent_buffer *eb, int slot, int atomic)
     887             : {
     888             :         int ret;
     889             : 
     890       13775 :         ret = tree_mod_log_insert_key(fs_info, eb, slot,
     891             :                                         MOD_LOG_KEY_REPLACE,
     892             :                                         atomic ? GFP_ATOMIC : GFP_NOFS);
     893       13775 :         BUG_ON(ret < 0);
     894       13775 : }
     895             : 
     896             : static noinline int
     897       38288 : tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
     898             : {
     899             :         struct tree_mod_elem **tm_list = NULL;
     900             :         int nritems = 0;
     901             :         int i;
     902             :         int ret = 0;
     903             : 
     904       38288 :         if (btrfs_header_level(eb) == 0)
     905             :                 return 0;
     906             : 
     907        3252 :         if (!tree_mod_need_log(fs_info, NULL))
     908             :                 return 0;
     909             : 
     910           0 :         nritems = btrfs_header_nritems(eb);
     911           0 :         tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
     912             :                           GFP_NOFS);
     913           0 :         if (!tm_list)
     914             :                 return -ENOMEM;
     915             : 
     916           0 :         for (i = 0; i < nritems; i++) {
     917           0 :                 tm_list[i] = alloc_tree_mod_elem(eb, i,
     918             :                     MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
     919           0 :                 if (!tm_list[i]) {
     920             :                         ret = -ENOMEM;
     921             :                         goto free_tms;
     922             :                 }
     923             :         }
     924             : 
     925           0 :         if (tree_mod_dont_log(fs_info, eb))
     926             :                 goto free_tms;
     927             : 
     928           0 :         ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
     929             :         tree_mod_log_write_unlock(fs_info);
     930           0 :         if (ret)
     931             :                 goto free_tms;
     932           0 :         kfree(tm_list);
     933             : 
     934           0 :         return 0;
     935             : 
     936             : free_tms:
     937           0 :         for (i = 0; i < nritems; i++)
     938           0 :                 kfree(tm_list[i]);
     939           0 :         kfree(tm_list);
     940             : 
     941           0 :         return ret;
     942             : }
     943             : 
     944             : static noinline void
     945       13659 : tree_mod_log_set_root_pointer(struct btrfs_root *root,
     946             :                               struct extent_buffer *new_root_node,
     947             :                               int log_removal)
     948             : {
     949             :         int ret;
     950       13659 :         ret = tree_mod_log_insert_root(root->fs_info, root->node,
     951             :                                        new_root_node, GFP_NOFS, log_removal);
     952       13658 :         BUG_ON(ret < 0);
     953       13658 : }
     954             : 
     955             : /*
     956             :  * check if the tree block can be shared by multiple trees
     957             :  */
     958       52290 : int btrfs_block_can_be_shared(struct btrfs_root *root,
     959       19589 :                               struct extent_buffer *buf)
     960             : {
     961             :         /*
     962             :          * Tree blocks not in refernece counted trees and tree roots
     963             :          * are never shared. If a block was allocated after the last
     964             :          * snapshot and the block was not allocated by tree relocation,
     965             :          * we know the block is not shared.
     966             :          */
     967       75123 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
     968       62011 :             buf != root->node && buf != root->commit_root &&
     969             :             (btrfs_header_generation(buf) <=
     970       18647 :              btrfs_root_last_snapshot(&root->root_item) ||
     971             :              btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
     972             :                 return 1;
     973             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
     974       73225 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
     975             :             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
     976             :                 return 1;
     977             : #endif
     978       51341 :         return 0;
     979             : }
     980             : 
     981       52284 : static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
     982             :                                        struct btrfs_root *root,
     983       53645 :                                        struct extent_buffer *buf,
     984             :                                        struct extent_buffer *cow,
     985             :                                        int *last_ref)
     986             : {
     987             :         u64 refs;
     988             :         u64 owner;
     989             :         u64 flags;
     990             :         u64 new_flags = 0;
     991             :         int ret;
     992             : 
     993             :         /*
     994             :          * Backrefs update rules:
     995             :          *
     996             :          * Always use full backrefs for extent pointers in tree block
     997             :          * allocated by tree relocation.
     998             :          *
     999             :          * If a shared tree block is no longer referenced by its owner
    1000             :          * tree (btrfs_header_owner(buf) == root->root_key.objectid),
    1001             :          * use full backrefs for extent pointers in tree block.
    1002             :          *
    1003             :          * If a tree block is been relocating
    1004             :          * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
    1005             :          * use full backrefs for extent pointers in tree block.
    1006             :          * The reason for this is some operations (such as drop tree)
    1007             :          * are only allowed for blocks use full backrefs.
    1008             :          */
    1009             : 
    1010       52284 :         if (btrfs_block_can_be_shared(root, buf)) {
    1011         949 :                 ret = btrfs_lookup_extent_info(trans, root, buf->start,
    1012             :                                                btrfs_header_level(buf), 1,
    1013             :                                                &refs, &flags);
    1014         949 :                 if (ret)
    1015             :                         return ret;
    1016         949 :                 if (refs == 0) {
    1017             :                         ret = -EROFS;
    1018           0 :                         btrfs_std_error(root->fs_info, ret);
    1019           0 :                         return ret;
    1020             :                 }
    1021             :         } else {
    1022       51335 :                 refs = 1;
    1023      102668 :                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
    1024             :                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
    1025           2 :                         flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    1026             :                 else
    1027       51333 :                         flags = 0;
    1028             :         }
    1029             : 
    1030             :         owner = btrfs_header_owner(buf);
    1031       52284 :         BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
    1032             :                !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
    1033             : 
    1034       52284 :         if (refs > 1) {
    1035         470 :                 if ((owner == root->root_key.objectid ||
    1036         419 :                      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
    1037         419 :                     !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
    1038         412 :                         ret = btrfs_inc_ref(trans, root, buf, 1);
    1039         412 :                         BUG_ON(ret); /* -ENOMEM */
    1040             : 
    1041         412 :                         if (root->root_key.objectid ==
    1042             :                             BTRFS_TREE_RELOC_OBJECTID) {
    1043           9 :                                 ret = btrfs_dec_ref(trans, root, buf, 0);
    1044           9 :                                 BUG_ON(ret); /* -ENOMEM */
    1045           9 :                                 ret = btrfs_inc_ref(trans, root, cow, 1);
    1046           9 :                                 BUG_ON(ret); /* -ENOMEM */
    1047             :                         }
    1048             :                         new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
    1049             :                 } else {
    1050             : 
    1051          58 :                         if (root->root_key.objectid ==
    1052             :                             BTRFS_TREE_RELOC_OBJECTID)
    1053           7 :                                 ret = btrfs_inc_ref(trans, root, cow, 1);
    1054             :                         else
    1055          51 :                                 ret = btrfs_inc_ref(trans, root, cow, 0);
    1056          58 :                         BUG_ON(ret); /* -ENOMEM */
    1057             :                 }
    1058         470 :                 if (new_flags != 0) {
    1059         412 :                         int level = btrfs_header_level(buf);
    1060             : 
    1061         412 :                         ret = btrfs_set_disk_extent_flags(trans, root,
    1062             :                                                           buf->start,
    1063         412 :                                                           buf->len,
    1064             :                                                           new_flags, level, 0);
    1065         412 :                         if (ret)
    1066           0 :                                 return ret;
    1067             :                 }
    1068             :         } else {
    1069       51814 :                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
    1070         332 :                         if (root->root_key.objectid ==
    1071             :                             BTRFS_TREE_RELOC_OBJECTID)
    1072           2 :                                 ret = btrfs_inc_ref(trans, root, cow, 1);
    1073             :                         else
    1074         330 :                                 ret = btrfs_inc_ref(trans, root, cow, 0);
    1075         332 :                         BUG_ON(ret); /* -ENOMEM */
    1076         332 :                         ret = btrfs_dec_ref(trans, root, buf, 1);
    1077         332 :                         BUG_ON(ret); /* -ENOMEM */
    1078             :                 }
    1079       51814 :                 clean_tree_block(trans, root, buf);
    1080       51814 :                 *last_ref = 1;
    1081             :         }
    1082             :         return 0;
    1083             : }
    1084             : 
    1085             : /*
    1086             :  * does the dirty work in cow of a single block.  The parent block (if
    1087             :  * supplied) is updated to point to the new cow copy.  The new buffer is marked
    1088             :  * dirty and returned locked.  If you modify the block it needs to be marked
    1089             :  * dirty again.
    1090             :  *
    1091             :  * search_start -- an allocation hint for the new block
    1092             :  *
    1093             :  * empty_size -- a hint that you plan on doing more cow.  This is the size in
    1094             :  * bytes the allocator should try to find free next to the block it returns.
    1095             :  * This is just a hint and may be ignored by the allocator.
    1096             :  */
    1097       52285 : static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
    1098       13527 :                              struct btrfs_root *root,
    1099       52284 :                              struct extent_buffer *buf,
    1100       38757 :                              struct extent_buffer *parent, int parent_slot,
    1101             :                              struct extent_buffer **cow_ret,
    1102             :                              u64 search_start, u64 empty_size)
    1103             : {
    1104             :         struct btrfs_disk_key disk_key;
    1105      156840 :         struct extent_buffer *cow;
    1106             :         int level, ret;
    1107       52285 :         int last_ref = 0;
    1108             :         int unlock_orig = 0;
    1109             :         u64 parent_start;
    1110             : 
    1111       52285 :         if (*cow_ret == buf)
    1112             :                 unlock_orig = 1;
    1113             : 
    1114       52285 :         btrfs_assert_tree_locked(buf);
    1115             : 
    1116       52283 :         WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
    1117             :                 trans->transid != root->fs_info->running_transaction->transid);
    1118       52284 :         WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
    1119             :                 trans->transid != root->last_trans);
    1120             : 
    1121       52284 :         level = btrfs_header_level(buf);
    1122             : 
    1123       52284 :         if (level == 0)
    1124             :                 btrfs_item_key(buf, &disk_key, 0);
    1125             :         else
    1126        7972 :                 btrfs_node_key(buf, &disk_key, 0);
    1127             : 
    1128       52284 :         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
    1129          18 :                 if (parent)
    1130          16 :                         parent_start = parent->start;
    1131             :                 else
    1132             :                         parent_start = 0;
    1133             :         } else
    1134             :                 parent_start = 0;
    1135             : 
    1136       52284 :         cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
    1137             :                                      root->root_key.objectid, &disk_key,
    1138             :                                      level, search_start, empty_size);
    1139       52286 :         if (IS_ERR(cow))
    1140           0 :                 return PTR_ERR(cow);
    1141             : 
    1142             :         /* cow is set to blocking by btrfs_init_new_buffer */
    1143             : 
    1144       52286 :         copy_extent_buffer(cow, buf, 0, 0, cow->len);
    1145       52286 :         btrfs_set_header_bytenr(cow, cow->start);
    1146       52286 :         btrfs_set_header_generation(cow, trans->transid);
    1147             :         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
    1148             :         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
    1149             :                                      BTRFS_HEADER_FLAG_RELOC);
    1150       52286 :         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
    1151             :                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
    1152             :         else
    1153             :                 btrfs_set_header_owner(cow, root->root_key.objectid);
    1154             : 
    1155       52286 :         write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
    1156             :                             BTRFS_FSID_SIZE);
    1157             : 
    1158       52284 :         ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
    1159       52284 :         if (ret) {
    1160           0 :                 btrfs_abort_transaction(trans, root, ret);
    1161           0 :                 return ret;
    1162             :         }
    1163             : 
    1164       52284 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
    1165       22831 :                 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
    1166       22831 :                 if (ret)
    1167             :                         return ret;
    1168             :         }
    1169             : 
    1170       52285 :         if (buf == root->node) {
    1171       13528 :                 WARN_ON(parent && parent != buf);
    1172       27054 :                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
    1173             :                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
    1174           2 :                         parent_start = buf->start;
    1175             :                 else
    1176             :                         parent_start = 0;
    1177             : 
    1178             :                 extent_buffer_get(cow);
    1179       13527 :                 tree_mod_log_set_root_pointer(root, cow, 1);
    1180       13527 :                 rcu_assign_pointer(root->node, cow);
    1181             : 
    1182       13527 :                 btrfs_free_tree_block(trans, root, buf, parent_start,
    1183             :                                       last_ref);
    1184       13528 :                 free_extent_buffer(buf);
    1185       13528 :                 add_root_to_dirty_list(root);
    1186             :         } else {
    1187       38757 :                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
    1188          16 :                         parent_start = parent->start;
    1189             :                 else
    1190             :                         parent_start = 0;
    1191             : 
    1192       77514 :                 WARN_ON(trans->transid != btrfs_header_generation(parent));
    1193       38757 :                 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
    1194             :                                         MOD_LOG_KEY_REPLACE, GFP_NOFS);
    1195       38757 :                 btrfs_set_node_blockptr(parent, parent_slot,
    1196             :                                         cow->start);
    1197       38757 :                 btrfs_set_node_ptr_generation(parent, parent_slot,
    1198             :                                               trans->transid);
    1199       38756 :                 btrfs_mark_buffer_dirty(parent);
    1200       38758 :                 if (last_ref) {
    1201       38288 :                         ret = tree_mod_log_free_eb(root->fs_info, buf);
    1202       38288 :                         if (ret) {
    1203           0 :                                 btrfs_abort_transaction(trans, root, ret);
    1204           0 :                                 return ret;
    1205             :                         }
    1206             :                 }
    1207       38758 :                 btrfs_free_tree_block(trans, root, buf, parent_start,
    1208             :                                       last_ref);
    1209             :         }
    1210       52286 :         if (unlock_orig)
    1211       52286 :                 btrfs_tree_unlock(buf);
    1212       52286 :         free_extent_buffer_stale(buf);
    1213       52284 :         btrfs_mark_buffer_dirty(cow);
    1214       52285 :         *cow_ret = cow;
    1215       52285 :         return 0;
    1216             : }
    1217             : 
    1218             : /*
    1219             :  * returns the logical address of the oldest predecessor of the given root.
    1220             :  * entries older than time_seq are ignored.
    1221             :  */
    1222             : static struct tree_mod_elem *
    1223       20675 : __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
    1224             :                            struct extent_buffer *eb_root, u64 time_seq)
    1225             : {
    1226             :         struct tree_mod_elem *tm;
    1227             :         struct tree_mod_elem *found = NULL;
    1228       20675 :         u64 root_logical = eb_root->start;
    1229             :         int looped = 0;
    1230             : 
    1231       20675 :         if (!time_seq)
    1232             :                 return NULL;
    1233             : 
    1234             :         /*
    1235             :          * the very last operation that's logged for a root is the replacement
    1236             :          * operation (if it is replaced at all). this has the index of the *new*
    1237             :          * root, making it the very first operation that's logged for this root.
    1238             :          */
    1239             :         while (1) {
    1240             :                 tm = tree_mod_log_search_oldest(fs_info, root_logical,
    1241             :                                                 time_seq);
    1242       20677 :                 if (!looped && !tm)
    1243             :                         return NULL;
    1244             :                 /*
    1245             :                  * if there are no tree operation for the oldest root, we simply
    1246             :                  * return it. this should only happen if that (old) root is at
    1247             :                  * level 0.
    1248             :                  */
    1249          49 :                 if (!tm)
    1250             :                         break;
    1251             : 
    1252             :                 /*
    1253             :                  * if there's an operation that's not a root replacement, we
    1254             :                  * found the oldest version of our root. normally, we'll find a
    1255             :                  * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
    1256             :                  */
    1257          49 :                 if (tm->op != MOD_LOG_ROOT_REPLACE)
    1258             :                         break;
    1259             : 
    1260             :                 found = tm;
    1261           2 :                 root_logical = tm->old_root.logical;
    1262             :                 looped = 1;
    1263             :         }
    1264             : 
    1265             :         /* if there's no old root to return, return what we found instead */
    1266          47 :         if (!found)
    1267             :                 found = tm;
    1268             : 
    1269             :         return found;
    1270             : }
    1271             : 
    1272             : /*
    1273             :  * tm is a pointer to the first operation to rewind within eb. then, all
    1274             :  * previous operations will be rewinded (until we reach something older than
    1275             :  * time_seq).
    1276             :  */
    1277             : static void
    1278          42 : __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
    1279             :                       u64 time_seq, struct tree_mod_elem *first_tm)
    1280             : {
    1281             :         u32 n;
    1282             :         struct rb_node *next;
    1283             :         struct tree_mod_elem *tm = first_tm;
    1284             :         unsigned long o_dst;
    1285             :         unsigned long o_src;
    1286             :         unsigned long p_size = sizeof(struct btrfs_key_ptr);
    1287             : 
    1288             :         n = btrfs_header_nritems(eb);
    1289             :         tree_mod_log_read_lock(fs_info);
    1290         238 :         while (tm && tm->seq >= time_seq) {
    1291             :                 /*
    1292             :                  * all the operations are recorded with the operator used for
    1293             :                  * the modification. as we're going backwards, we do the
    1294             :                  * opposite of each operation here.
    1295             :                  */
    1296         238 :                 switch (tm->op) {
    1297             :                 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
    1298         208 :                         BUG_ON(tm->slot < n);
    1299             :                         /* Fallthrough */
    1300             :                 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
    1301             :                 case MOD_LOG_KEY_REMOVE:
    1302         208 :                         btrfs_set_node_key(eb, &tm->key, tm->slot);
    1303         208 :                         btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
    1304         208 :                         btrfs_set_node_ptr_generation(eb, tm->slot,
    1305             :                                                       tm->generation);
    1306         208 :                         n++;
    1307         208 :                         break;
    1308             :                 case MOD_LOG_KEY_REPLACE:
    1309          27 :                         BUG_ON(tm->slot >= n);
    1310          27 :                         btrfs_set_node_key(eb, &tm->key, tm->slot);
    1311          27 :                         btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
    1312          27 :                         btrfs_set_node_ptr_generation(eb, tm->slot,
    1313             :                                                       tm->generation);
    1314             :                         break;
    1315             :                 case MOD_LOG_KEY_ADD:
    1316             :                         /* if a move operation is needed it's in the log */
    1317           3 :                         n--;
    1318           3 :                         break;
    1319             :                 case MOD_LOG_MOVE_KEYS:
    1320           0 :                         o_dst = btrfs_node_key_ptr_offset(tm->slot);
    1321           0 :                         o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
    1322           0 :                         memmove_extent_buffer(eb, o_dst, o_src,
    1323           0 :                                               tm->move.nr_items * p_size);
    1324           0 :                         break;
    1325             :                 case MOD_LOG_ROOT_REPLACE:
    1326             :                         /*
    1327             :                          * this operation is special. for roots, this must be
    1328             :                          * handled explicitly before rewinding.
    1329             :                          * for non-roots, this operation may exist if the node
    1330             :                          * was a root: root A -> child B; then A gets empty and
    1331             :                          * B is promoted to the new root. in the mod log, we'll
    1332             :                          * have a root-replace operation for B, a tree block
    1333             :                          * that is no root. we simply ignore that operation.
    1334             :                          */
    1335             :                         break;
    1336             :                 }
    1337         238 :                 next = rb_next(&tm->node);
    1338         238 :                 if (!next)
    1339             :                         break;
    1340             :                 tm = container_of(next, struct tree_mod_elem, node);
    1341         217 :                 if (tm->index != first_tm->index)
    1342             :                         break;
    1343             :         }
    1344             :         tree_mod_log_read_unlock(fs_info);
    1345             :         btrfs_set_header_nritems(eb, n);
    1346          21 : }
    1347             : 
    1348             : /*
    1349             :  * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
    1350             :  * is returned. If rewind operations happen, a fresh buffer is returned. The
    1351             :  * returned buffer is always read-locked. If the returned buffer is not the
    1352             :  * input buffer, the lock on the input buffer is released and the input buffer
    1353             :  * is freed (its refcount is decremented).
    1354             :  */
    1355             : static struct extent_buffer *
    1356        3188 : tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
    1357        3188 :                     struct extent_buffer *eb, u64 time_seq)
    1358             : {
    1359           0 :         struct extent_buffer *eb_rewin;
    1360             :         struct tree_mod_elem *tm;
    1361             : 
    1362        3188 :         if (!time_seq)
    1363             :                 return eb;
    1364             : 
    1365        3188 :         if (btrfs_header_level(eb) == 0)
    1366             :                 return eb;
    1367             : 
    1368        1269 :         tm = tree_mod_log_search(fs_info, eb->start, time_seq);
    1369        1269 :         if (!tm)
    1370             :                 return eb;
    1371             : 
    1372           0 :         btrfs_set_path_blocking(path);
    1373           0 :         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
    1374             : 
    1375           0 :         if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
    1376           0 :                 BUG_ON(tm->slot != 0);
    1377           0 :                 eb_rewin = alloc_dummy_extent_buffer(eb->start,
    1378           0 :                                                 fs_info->tree_root->nodesize);
    1379           0 :                 if (!eb_rewin) {
    1380           0 :                         btrfs_tree_read_unlock_blocking(eb);
    1381           0 :                         free_extent_buffer(eb);
    1382           0 :                         return NULL;
    1383             :                 }
    1384           0 :                 btrfs_set_header_bytenr(eb_rewin, eb->start);
    1385             :                 btrfs_set_header_backref_rev(eb_rewin,
    1386             :                                              btrfs_header_backref_rev(eb));
    1387             :                 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
    1388             :                 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
    1389             :         } else {
    1390           0 :                 eb_rewin = btrfs_clone_extent_buffer(eb);
    1391           0 :                 if (!eb_rewin) {
    1392           0 :                         btrfs_tree_read_unlock_blocking(eb);
    1393           0 :                         free_extent_buffer(eb);
    1394           0 :                         return NULL;
    1395             :                 }
    1396             :         }
    1397             : 
    1398           0 :         btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
    1399           0 :         btrfs_tree_read_unlock_blocking(eb);
    1400           0 :         free_extent_buffer(eb);
    1401             : 
    1402             :         extent_buffer_get(eb_rewin);
    1403           0 :         btrfs_tree_read_lock(eb_rewin);
    1404           0 :         __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
    1405           0 :         WARN_ON(btrfs_header_nritems(eb_rewin) >
    1406             :                 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
    1407             : 
    1408           0 :         return eb_rewin;
    1409             : }
    1410             : 
    1411             : /*
    1412             :  * get_old_root() rewinds the state of @root's root node to the given @time_seq
    1413             :  * value. If there are no changes, the current root->root_node is returned. If
    1414             :  * anything changed in between, there's a fresh buffer allocated on which the
    1415             :  * rewind operations are done. In any case, the returned buffer is read locked.
    1416             :  * Returns NULL on error (with no locks held).
    1417             :  */
    1418             : static inline struct extent_buffer *
    1419        7947 : get_old_root(struct btrfs_root *root, u64 time_seq)
    1420             : {
    1421             :         struct tree_mod_elem *tm;
    1422          25 :         struct extent_buffer *eb = NULL;
    1423        7948 :         struct extent_buffer *eb_root;
    1424             :         struct extent_buffer *old;
    1425             :         struct tree_mod_root *old_root = NULL;
    1426             :         u64 old_generation = 0;
    1427             :         u64 logical;
    1428             :         u32 blocksize;
    1429             : 
    1430        7947 :         eb_root = btrfs_read_lock_root_node(root);
    1431       15894 :         tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
    1432        7947 :         if (!tm)
    1433             :                 return eb_root;
    1434             : 
    1435          21 :         if (tm->op == MOD_LOG_ROOT_REPLACE) {
    1436           1 :                 old_root = &tm->old_root;
    1437           1 :                 old_generation = tm->generation;
    1438           1 :                 logical = old_root->logical;
    1439             :         } else {
    1440          20 :                 logical = eb_root->start;
    1441             :         }
    1442             : 
    1443          21 :         tm = tree_mod_log_search(root->fs_info, logical, time_seq);
    1444          21 :         if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
    1445           0 :                 btrfs_tree_read_unlock(eb_root);
    1446           0 :                 free_extent_buffer(eb_root);
    1447           0 :                 blocksize = btrfs_level_size(root, old_root->level);
    1448           0 :                 old = read_tree_block(root, logical, blocksize, 0);
    1449           0 :                 if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
    1450           0 :                         free_extent_buffer(old);
    1451           0 :                         btrfs_warn(root->fs_info,
    1452             :                                 "failed to read tree block %llu from get_old_root", logical);
    1453             :                 } else {
    1454           0 :                         eb = btrfs_clone_extent_buffer(old);
    1455           0 :                         free_extent_buffer(old);
    1456             :                 }
    1457          21 :         } else if (old_root) {
    1458           1 :                 btrfs_tree_read_unlock(eb_root);
    1459           1 :                 free_extent_buffer(eb_root);
    1460           1 :                 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
    1461             :         } else {
    1462          20 :                 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
    1463          20 :                 eb = btrfs_clone_extent_buffer(eb_root);
    1464          20 :                 btrfs_tree_read_unlock_blocking(eb_root);
    1465          20 :                 free_extent_buffer(eb_root);
    1466             :         }
    1467             : 
    1468          21 :         if (!eb)
    1469             :                 return NULL;
    1470             :         extent_buffer_get(eb);
    1471          21 :         btrfs_tree_read_lock(eb);
    1472          21 :         if (old_root) {
    1473           1 :                 btrfs_set_header_bytenr(eb, eb->start);
    1474             :                 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
    1475             :                 btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
    1476           1 :                 btrfs_set_header_level(eb, old_root->level);
    1477             :                 btrfs_set_header_generation(eb, old_generation);
    1478             :         }
    1479          21 :         if (tm)
    1480          21 :                 __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
    1481             :         else
    1482           0 :                 WARN_ON(btrfs_header_level(eb) != 0);
    1483          21 :         WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
    1484             : 
    1485          21 :         return eb;
    1486             : }
    1487             : 
    1488       12728 : int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
    1489             : {
    1490             :         struct tree_mod_elem *tm;
    1491             :         int level;
    1492       38183 :         struct extent_buffer *eb_root = btrfs_root_node(root);
    1493             : 
    1494       25456 :         tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
    1495       12728 :         if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
    1496           1 :                 level = tm->old_root.level;
    1497             :         } else {
    1498       12727 :                 level = btrfs_header_level(eb_root);
    1499             :         }
    1500       12728 :         free_extent_buffer(eb_root);
    1501             : 
    1502       12728 :         return level;
    1503             : }
    1504             : 
    1505     1600370 : static inline int should_cow_block(struct btrfs_trans_handle *trans,
    1506             :                                    struct btrfs_root *root,
    1507     1600379 :                                    struct extent_buffer *buf)
    1508             : {
    1509             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    1510             :         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
    1511             :                 return 0;
    1512             : #endif
    1513             :         /* ensure we can see the force_cow */
    1514     1600370 :         smp_rmb();
    1515             : 
    1516             :         /*
    1517             :          * We do not need to cow a block if
    1518             :          * 1) this block is not created or changed in this transaction;
    1519             :          * 2) this block does not belong to TREE_RELOC tree;
    1520             :          * 3) the root is not forced COW.
    1521             :          *
    1522             :          * What is forced COW:
    1523             :          *    when we create snapshot during commiting the transaction,
    1524             :          *    after we've finished coping src root, we must COW the shared
    1525             :          *    block to ensure the metadata consistency.
    1526             :          */
    1527     3119011 :         if (btrfs_header_generation(buf) == trans->transid &&
    1528     1490830 :             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
    1529     2981638 :             !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    1530     1490838 :               btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
    1531             :             !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
    1532             :                 return 0;
    1533             :         return 1;
    1534             : }
    1535             : 
    1536             : /*
    1537             :  * cows a single block, see __btrfs_cow_block for the real work.
    1538             :  * This version of it has extra checks so that a block isn't cow'd more than
    1539             :  * once per transaction, as long as it hasn't been written yet
    1540             :  */
    1541       66091 : noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
    1542             :                     struct btrfs_root *root, struct extent_buffer *buf,
    1543             :                     struct extent_buffer *parent, int parent_slot,
    1544             :                     struct extent_buffer **cow_ret)
    1545             : {
    1546             :         u64 search_start;
    1547             :         int ret;
    1548             : 
    1549       66091 :         if (trans->transaction != root->fs_info->running_transaction)
    1550           0 :                 WARN(1, KERN_CRIT "trans %llu running %llu\n",
    1551             :                        trans->transid,
    1552             :                        root->fs_info->running_transaction->transid);
    1553             : 
    1554       66091 :         if (trans->transid != root->fs_info->generation)
    1555           0 :                 WARN(1, KERN_CRIT "trans %llu running %llu\n",
    1556             :                        trans->transid, root->fs_info->generation);
    1557             : 
    1558       66091 :         if (!should_cow_block(trans, root, buf)) {
    1559       13805 :                 *cow_ret = buf;
    1560       13805 :                 return 0;
    1561             :         }
    1562             : 
    1563       52286 :         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
    1564             : 
    1565       52286 :         if (parent)
    1566             :                 btrfs_set_lock_blocking(parent);
    1567             :         btrfs_set_lock_blocking(buf);
    1568             : 
    1569       52285 :         ret = __btrfs_cow_block(trans, root, buf, parent,
    1570             :                                  parent_slot, cow_ret, search_start, 0);
    1571             : 
    1572       52284 :         trace_btrfs_cow_block(root, buf, *cow_ret);
    1573             : 
    1574       52284 :         return ret;
    1575             : }
    1576             : 
    1577             : /*
    1578             :  * helper function for defrag to decide if two blocks pointed to by a
    1579             :  * node are actually close by
    1580             :  */
    1581             : static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
    1582             : {
    1583           0 :         if (blocknr < other && other - (blocknr + blocksize) < 32768)
    1584             :                 return 1;
    1585           0 :         if (blocknr > other && blocknr - (other + blocksize) < 32768)
    1586             :                 return 1;
    1587             :         return 0;
    1588             : }
    1589             : 
    1590             : /*
    1591             :  * compare two keys in a memcmp fashion
    1592             :  */
    1593             : static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
    1594             : {
    1595             :         struct btrfs_key k1;
    1596             : 
    1597             :         btrfs_disk_key_to_cpu(&k1, disk);
    1598             : 
    1599             :         return btrfs_comp_cpu_keys(&k1, k2);
    1600             : }
    1601             : 
    1602             : /*
    1603             :  * same as comp_keys only with two btrfs_key's
    1604             :  */
    1605      275675 : int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
    1606             : {
    1607    28437019 :         if (k1->objectid > k2->objectid)
    1608             :                 return 1;
    1609    20136305 :         if (k1->objectid < k2->objectid)
    1610             :                 return -1;
    1611    12375604 :         if (k1->type > k2->type)
    1612             :                 return 1;
    1613    11863499 :         if (k1->type < k2->type)
    1614             :                 return -1;
    1615    11495661 :         if (k1->offset > k2->offset)
    1616             :                 return 1;
    1617     7012757 :         if (k1->offset < k2->offset)
    1618             :                 return -1;
    1619        6644 :         return 0;
    1620             : }
    1621             : 
    1622             : /*
    1623             :  * this is used by the defrag code to go through all the
    1624             :  * leaves pointed to by a node and reallocate them so that
    1625             :  * disk order is close to key order
    1626             :  */
    1627           0 : int btrfs_realloc_node(struct btrfs_trans_handle *trans,
    1628           0 :                        struct btrfs_root *root, struct extent_buffer *parent,
    1629             :                        int start_slot, u64 *last_ret,
    1630             :                        struct btrfs_key *progress)
    1631             : {
    1632             :         struct extent_buffer *cur;
    1633             :         u64 blocknr;
    1634             :         u64 gen;
    1635           0 :         u64 search_start = *last_ret;
    1636             :         u64 last_block = 0;
    1637             :         u64 other;
    1638             :         u32 parent_nritems;
    1639             :         int end_slot;
    1640             :         int i;
    1641             :         int err = 0;
    1642             :         int parent_level;
    1643             :         int uptodate;
    1644             :         u32 blocksize;
    1645             :         int progress_passed = 0;
    1646             :         struct btrfs_disk_key disk_key;
    1647             : 
    1648             :         parent_level = btrfs_header_level(parent);
    1649             : 
    1650           0 :         WARN_ON(trans->transaction != root->fs_info->running_transaction);
    1651           0 :         WARN_ON(trans->transid != root->fs_info->generation);
    1652             : 
    1653             :         parent_nritems = btrfs_header_nritems(parent);
    1654             :         blocksize = btrfs_level_size(root, parent_level - 1);
    1655           0 :         end_slot = parent_nritems;
    1656             : 
    1657           0 :         if (parent_nritems == 1)
    1658             :                 return 0;
    1659             : 
    1660             :         btrfs_set_lock_blocking(parent);
    1661             : 
    1662           0 :         for (i = start_slot; i < end_slot; i++) {
    1663             :                 int close = 1;
    1664             : 
    1665           0 :                 btrfs_node_key(parent, &disk_key, i);
    1666           0 :                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
    1667           0 :                         continue;
    1668             : 
    1669             :                 progress_passed = 1;
    1670             :                 blocknr = btrfs_node_blockptr(parent, i);
    1671             :                 gen = btrfs_node_ptr_generation(parent, i);
    1672           0 :                 if (last_block == 0)
    1673             :                         last_block = blocknr;
    1674             : 
    1675           0 :                 if (i > 0) {
    1676           0 :                         other = btrfs_node_blockptr(parent, i - 1);
    1677             :                         close = close_blocks(blocknr, other, blocksize);
    1678             :                 }
    1679           0 :                 if (!close && i < end_slot - 2) {
    1680           0 :                         other = btrfs_node_blockptr(parent, i + 1);
    1681             :                         close = close_blocks(blocknr, other, blocksize);
    1682             :                 }
    1683           0 :                 if (close) {
    1684             :                         last_block = blocknr;
    1685           0 :                         continue;
    1686             :                 }
    1687             : 
    1688           0 :                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
    1689           0 :                 if (cur)
    1690           0 :                         uptodate = btrfs_buffer_uptodate(cur, gen, 0);
    1691             :                 else
    1692             :                         uptodate = 0;
    1693           0 :                 if (!cur || !uptodate) {
    1694           0 :                         if (!cur) {
    1695           0 :                                 cur = read_tree_block(root, blocknr,
    1696             :                                                          blocksize, gen);
    1697           0 :                                 if (!cur || !extent_buffer_uptodate(cur)) {
    1698           0 :                                         free_extent_buffer(cur);
    1699           0 :                                         return -EIO;
    1700             :                                 }
    1701           0 :                         } else if (!uptodate) {
    1702           0 :                                 err = btrfs_read_buffer(cur, gen);
    1703           0 :                                 if (err) {
    1704           0 :                                         free_extent_buffer(cur);
    1705           0 :                                         return err;
    1706             :                                 }
    1707             :                         }
    1708             :                 }
    1709           0 :                 if (search_start == 0)
    1710             :                         search_start = last_block;
    1711             : 
    1712           0 :                 btrfs_tree_lock(cur);
    1713           0 :                 btrfs_set_lock_blocking(cur);
    1714           0 :                 err = __btrfs_cow_block(trans, root, cur, parent, i,
    1715             :                                         &cur, search_start,
    1716           0 :                                         min(16 * blocksize,
    1717             :                                             (end_slot - i) * blocksize));
    1718           0 :                 if (err) {
    1719           0 :                         btrfs_tree_unlock(cur);
    1720           0 :                         free_extent_buffer(cur);
    1721           0 :                         break;
    1722             :                 }
    1723           0 :                 search_start = cur->start;
    1724             :                 last_block = cur->start;
    1725           0 :                 *last_ret = search_start;
    1726           0 :                 btrfs_tree_unlock(cur);
    1727           0 :                 free_extent_buffer(cur);
    1728             :         }
    1729           0 :         return err;
    1730             : }
    1731             : 
    1732             : /*
    1733             :  * The leaf data grows from end-to-front in the node.
    1734             :  * this returns the address of the start of the last item,
    1735             :  * which is the stop of the leaf data stack
    1736             :  */
    1737      524875 : static inline unsigned int leaf_data_end(struct btrfs_root *root,
    1738      524875 :                                          struct extent_buffer *leaf)
    1739             : {
    1740             :         u32 nr = btrfs_header_nritems(leaf);
    1741      524875 :         if (nr == 0)
    1742        1779 :                 return BTRFS_LEAF_DATA_SIZE(root);
    1743      523096 :         return btrfs_item_offset_nr(leaf, nr - 1);
    1744             : }
    1745             : 
    1746             : 
    1747             : /*
    1748             :  * search for key in the extent_buffer.  The items start at offset p,
    1749             :  * and they are item_size apart.  There are 'max' items in p.
    1750             :  *
    1751             :  * the slot in the array is returned via slot, and it points to
    1752             :  * the place where you would insert key if it is not found in
    1753             :  * the array.
    1754             :  *
    1755             :  * slot may point to max if the key is bigger than all of the keys
    1756             :  */
    1757     4978020 : static noinline int generic_bin_search(struct extent_buffer *eb,
    1758             :                                        unsigned long p,
    1759             :                                        int item_size, struct btrfs_key *key,
    1760             :                                        int max, int *slot)
    1761             : {
    1762             :         int low = 0;
    1763             :         int high = max;
    1764             :         int mid;
    1765             :         int ret;
    1766             :         struct btrfs_disk_key *tmp = NULL;
    1767             :         struct btrfs_disk_key unaligned;
    1768             :         unsigned long offset;
    1769     4978020 :         char *kaddr = NULL;
    1770     4978020 :         unsigned long map_start = 0;
    1771     4978020 :         unsigned long map_len = 0;
    1772             :         int err;
    1773             : 
    1774    37022947 :         while (low < high) {
    1775    28139641 :                 mid = (low + high) / 2;
    1776    28139641 :                 offset = p + mid * item_size;
    1777             : 
    1778    51123818 :                 if (!kaddr || offset < map_start ||
    1779    22984177 :                     (offset + sizeof(struct btrfs_disk_key)) >
    1780    22984177 :                     map_start + map_len) {
    1781             : 
    1782     5503721 :                         err = map_private_extent_buffer(eb, offset,
    1783             :                                                 sizeof(struct btrfs_disk_key),
    1784             :                                                 &kaddr, &map_start, &map_len);
    1785             : 
    1786     5515117 :                         if (!err) {
    1787     5476843 :                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
    1788             :                                                         map_start);
    1789             :                         } else {
    1790       38274 :                                 read_extent_buffer(eb, &unaligned,
    1791             :                                                    offset, sizeof(unaligned));
    1792             :                                 tmp = &unaligned;
    1793             :                         }
    1794             : 
    1795             :                 } else {
    1796    22635920 :                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
    1797             :                                                         map_start);
    1798             :                 }
    1799             :                 ret = comp_keys(tmp, key);
    1800             : 
    1801    28145279 :                 if (ret < 0)
    1802    13819832 :                         low = mid + 1;
    1803    14325447 :                 else if (ret > 0)
    1804             :                         high = mid;
    1805             :                 else {
    1806     1078372 :                         *slot = mid;
    1807     1078372 :                         return 0;
    1808             :                 }
    1809             :         }
    1810     3905286 :         *slot = low;
    1811     3905286 :         return 1;
    1812             : }
    1813             : 
    1814             : /*
    1815             :  * simple bin_search frontend that does the right thing for
    1816             :  * leaves vs nodes
    1817             :  */
    1818     9970782 : static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
    1819             :                       int level, int *slot)
    1820             : {
    1821     4985391 :         if (level == 0)
    1822     2131536 :                 return generic_bin_search(eb,
    1823             :                                           offsetof(struct btrfs_leaf, items),
    1824             :                                           sizeof(struct btrfs_item),
    1825             :                                           key, btrfs_header_nritems(eb),
    1826             :                                           slot);
    1827             :         else
    1828     2853855 :                 return generic_bin_search(eb,
    1829             :                                           offsetof(struct btrfs_node, ptrs),
    1830             :                                           sizeof(struct btrfs_key_ptr),
    1831             :                                           key, btrfs_header_nritems(eb),
    1832             :                                           slot);
    1833             : }
    1834             : 
    1835         247 : int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
    1836             :                      int level, int *slot)
    1837             : {
    1838         247 :         return bin_search(eb, key, level, slot);
    1839             : }
    1840             : 
    1841             : static void root_add_used(struct btrfs_root *root, u32 size)
    1842             : {
    1843             :         spin_lock(&root->accounting_lock);
    1844        5411 :         btrfs_set_root_used(&root->root_item,
    1845             :                             btrfs_root_used(&root->root_item) + size);
    1846             :         spin_unlock(&root->accounting_lock);
    1847             : }
    1848             : 
    1849             : static void root_sub_used(struct btrfs_root *root, u32 size)
    1850             : {
    1851             :         spin_lock(&root->accounting_lock);
    1852        1403 :         btrfs_set_root_used(&root->root_item,
    1853             :                             btrfs_root_used(&root->root_item) - size);
    1854             :         spin_unlock(&root->accounting_lock);
    1855             : }
    1856             : 
    1857             : /* given a node and slot number, this reads the blocks it points to.  The
    1858             :  * extent buffer is returned with a reference taken (but unlocked).
    1859             :  * NULL is returned on error.
    1860             :  */
    1861       43874 : static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
    1862       21977 :                                    struct extent_buffer *parent, int slot)
    1863             : {
    1864       21977 :         int level = btrfs_header_level(parent);
    1865             :         struct extent_buffer *eb;
    1866             : 
    1867       21977 :         if (slot < 0)
    1868             :                 return NULL;
    1869       43826 :         if (slot >= btrfs_header_nritems(parent))
    1870             :                 return NULL;
    1871             : 
    1872       21897 :         BUG_ON(level == 0);
    1873             : 
    1874       21897 :         eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
    1875             :                              btrfs_level_size(root, level - 1),
    1876             :                              btrfs_node_ptr_generation(parent, slot));
    1877       21897 :         if (eb && !extent_buffer_uptodate(eb)) {
    1878           0 :                 free_extent_buffer(eb);
    1879             :                 eb = NULL;
    1880             :         }
    1881             : 
    1882       21897 :         return eb;
    1883             : }
    1884             : 
    1885             : /*
    1886             :  * node level balancing, used to make sure nodes are in proper order for
    1887             :  * item deletion.  We balance from the top down, so we have to make sure
    1888             :  * that a deletion won't leave an node completely empty later on.
    1889             :  */
    1890      190233 : static noinline int balance_level(struct btrfs_trans_handle *trans,
    1891          32 :                          struct btrfs_root *root,
    1892             :                          struct btrfs_path *path, int level)
    1893             : {
    1894      190233 :         struct extent_buffer *right = NULL;
    1895      380487 :         struct extent_buffer *mid;
    1896      190233 :         struct extent_buffer *left = NULL;
    1897             :         struct extent_buffer *parent = NULL;
    1898             :         int ret = 0;
    1899             :         int wret;
    1900             :         int pslot;
    1901      190233 :         int orig_slot = path->slots[level];
    1902             :         u64 orig_ptr;
    1903             : 
    1904      190233 :         if (level == 0)
    1905             :                 return 0;
    1906             : 
    1907      190233 :         mid = path->nodes[level];
    1908             : 
    1909      190233 :         WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
    1910             :                 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
    1911      190233 :         WARN_ON(btrfs_header_generation(mid) != trans->transid);
    1912             : 
    1913             :         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
    1914             : 
    1915      190232 :         if (level < BTRFS_MAX_LEVEL - 1) {
    1916      190233 :                 parent = path->nodes[level + 1];
    1917      190233 :                 pslot = path->slots[level + 1];
    1918             :         }
    1919             : 
    1920             :         /*
    1921             :          * deal with the case where there is only one pointer in the root
    1922             :          * by promoting the node below to a root
    1923             :          */
    1924      190232 :         if (!parent) {
    1925             :                 struct extent_buffer *child;
    1926             : 
    1927      178923 :                 if (btrfs_header_nritems(mid) != 1)
    1928      178923 :                         return 0;
    1929             : 
    1930             :                 /* promote the child to a root */
    1931          32 :                 child = read_node_slot(root, mid, 0);
    1932          32 :                 if (!child) {
    1933             :                         ret = -EROFS;
    1934           0 :                         btrfs_std_error(root->fs_info, ret);
    1935           0 :                         goto enospc;
    1936             :                 }
    1937             : 
    1938          32 :                 btrfs_tree_lock(child);
    1939          32 :                 btrfs_set_lock_blocking(child);
    1940          32 :                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
    1941          32 :                 if (ret) {
    1942           0 :                         btrfs_tree_unlock(child);
    1943           0 :                         free_extent_buffer(child);
    1944           0 :                         goto enospc;
    1945             :                 }
    1946             : 
    1947          64 :                 tree_mod_log_set_root_pointer(root, child, 1);
    1948          32 :                 rcu_assign_pointer(root->node, child);
    1949             : 
    1950          32 :                 add_root_to_dirty_list(root);
    1951          32 :                 btrfs_tree_unlock(child);
    1952             : 
    1953          32 :                 path->locks[level] = 0;
    1954          32 :                 path->nodes[level] = NULL;
    1955          32 :                 clean_tree_block(trans, root, mid);
    1956          32 :                 btrfs_tree_unlock(mid);
    1957             :                 /* once for the path */
    1958          32 :                 free_extent_buffer(mid);
    1959             : 
    1960          32 :                 root_sub_used(root, mid->len);
    1961          32 :                 btrfs_free_tree_block(trans, root, mid, 0, 1);
    1962             :                 /* once for the root ptr */
    1963          32 :                 free_extent_buffer_stale(mid);
    1964          32 :                 return 0;
    1965             :         }
    1966       22618 :         if (btrfs_header_nritems(mid) >
    1967       11309 :             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
    1968             :                 return 0;
    1969             : 
    1970          11 :         left = read_node_slot(root, parent, pslot - 1);
    1971          27 :         if (left) {
    1972           8 :                 btrfs_tree_lock(left);
    1973           8 :                 btrfs_set_lock_blocking(left);
    1974           8 :                 wret = btrfs_cow_block(trans, root, left,
    1975             :                                        parent, pslot - 1, &left);
    1976           8 :                 if (wret) {
    1977             :                         ret = wret;
    1978             :                         goto enospc;
    1979             :                 }
    1980             :         }
    1981          11 :         right = read_node_slot(root, parent, pslot + 1);
    1982          20 :         if (right) {
    1983           9 :                 btrfs_tree_lock(right);
    1984           9 :                 btrfs_set_lock_blocking(right);
    1985           9 :                 wret = btrfs_cow_block(trans, root, right,
    1986             :                                        parent, pslot + 1, &right);
    1987           9 :                 if (wret) {
    1988             :                         ret = wret;
    1989             :                         goto enospc;
    1990             :                 }
    1991             :         }
    1992             : 
    1993             :         /* first, try to make some room in the middle buffer */
    1994          11 :         if (left) {
    1995           8 :                 orig_slot += btrfs_header_nritems(left);
    1996           8 :                 wret = push_node_left(trans, root, left, mid, 1);
    1997           8 :                 if (wret < 0)
    1998             :                         ret = wret;
    1999             :         }
    2000             : 
    2001             :         /*
    2002             :          * then try to empty the right most buffer into the middle
    2003             :          */
    2004          11 :         if (right) {
    2005           9 :                 wret = push_node_left(trans, root, mid, right, 1);
    2006           9 :                 if (wret < 0 && wret != -ENOSPC)
    2007             :                         ret = wret;
    2008          18 :                 if (btrfs_header_nritems(right) == 0) {
    2009           9 :                         clean_tree_block(trans, root, right);
    2010           9 :                         btrfs_tree_unlock(right);
    2011           9 :                         del_ptr(root, path, level + 1, pslot + 1);
    2012           9 :                         root_sub_used(root, right->len);
    2013           9 :                         btrfs_free_tree_block(trans, root, right, 0, 1);
    2014           9 :                         free_extent_buffer_stale(right);
    2015           9 :                         right = NULL;
    2016             :                 } else {
    2017             :                         struct btrfs_disk_key right_key;
    2018           0 :                         btrfs_node_key(right, &right_key, 0);
    2019           0 :                         tree_mod_log_set_node_key(root->fs_info, parent,
    2020             :                                                   pslot + 1, 0);
    2021             :                         btrfs_set_node_key(parent, &right_key, pslot + 1);
    2022           0 :                         btrfs_mark_buffer_dirty(parent);
    2023             :                 }
    2024             :         }
    2025          11 :         if (btrfs_header_nritems(mid) == 1) {
    2026             :                 /*
    2027             :                  * we're not allowed to leave a node with one item in the
    2028             :                  * tree during a delete.  A deletion from lower in the tree
    2029             :                  * could try to delete the only pointer in this node.
    2030             :                  * So, pull some keys from the left.
    2031             :                  * There has to be a left pointer at this point because
    2032             :                  * otherwise we would have pulled some pointers from the
    2033             :                  * right
    2034             :                  */
    2035           0 :                 if (!left) {
    2036             :                         ret = -EROFS;
    2037           0 :                         btrfs_std_error(root->fs_info, ret);
    2038             :                         goto enospc;
    2039             :                 }
    2040           0 :                 wret = balance_node_right(trans, root, mid, left);
    2041           0 :                 if (wret < 0) {
    2042             :                         ret = wret;
    2043             :                         goto enospc;
    2044             :                 }
    2045           0 :                 if (wret == 1) {
    2046           0 :                         wret = push_node_left(trans, root, left, mid, 1);
    2047           0 :                         if (wret < 0)
    2048             :                                 ret = wret;
    2049             :                 }
    2050           0 :                 BUG_ON(wret == 1);
    2051             :         }
    2052          11 :         if (btrfs_header_nritems(mid) == 0) {
    2053           2 :                 clean_tree_block(trans, root, mid);
    2054           2 :                 btrfs_tree_unlock(mid);
    2055           2 :                 del_ptr(root, path, level + 1, pslot);
    2056           2 :                 root_sub_used(root, mid->len);
    2057           2 :                 btrfs_free_tree_block(trans, root, mid, 0, 1);
    2058           2 :                 free_extent_buffer_stale(mid);
    2059             :                 mid = NULL;
    2060             :         } else {
    2061             :                 /* update the parent key to reflect our changes */
    2062             :                 struct btrfs_disk_key mid_key;
    2063           9 :                 btrfs_node_key(mid, &mid_key, 0);
    2064           9 :                 tree_mod_log_set_node_key(root->fs_info, parent,
    2065             :                                           pslot, 0);
    2066             :                 btrfs_set_node_key(parent, &mid_key, pslot);
    2067           9 :                 btrfs_mark_buffer_dirty(parent);
    2068             :         }
    2069             : 
    2070             :         /* update the path */
    2071          11 :         if (left) {
    2072           8 :                 if (btrfs_header_nritems(left) > orig_slot) {
    2073             :                         extent_buffer_get(left);
    2074             :                         /* left was locked after cow */
    2075           8 :                         path->nodes[level] = left;
    2076           8 :                         path->slots[level + 1] -= 1;
    2077           8 :                         path->slots[level] = orig_slot;
    2078           8 :                         if (mid) {
    2079           6 :                                 btrfs_tree_unlock(mid);
    2080           6 :                                 free_extent_buffer(mid);
    2081             :                         }
    2082             :                 } else {
    2083           0 :                         orig_slot -= btrfs_header_nritems(left);
    2084           0 :                         path->slots[level] = orig_slot;
    2085             :                 }
    2086             :         }
    2087             :         /* double check we haven't messed things up */
    2088          11 :         if (orig_ptr !=
    2089          11 :             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
    2090           0 :                 BUG();
    2091             : enospc:
    2092          11 :         if (right) {
    2093           0 :                 btrfs_tree_unlock(right);
    2094           0 :                 free_extent_buffer(right);
    2095             :         }
    2096          11 :         if (left) {
    2097           8 :                 if (path->nodes[level] != left)
    2098           0 :                         btrfs_tree_unlock(left);
    2099           8 :                 free_extent_buffer(left);
    2100             :         }
    2101          11 :         return ret;
    2102             : }
    2103             : 
    2104             : /* Node balancing for insertion.  Here we only split or push nodes around
    2105             :  * when they are completely full.  This is also done top down, so we
    2106             :  * have to be pessimistic.
    2107             :  */
    2108         177 : static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
    2109             :                                           struct btrfs_root *root,
    2110             :                                           struct btrfs_path *path, int level)
    2111             : {
    2112         177 :         struct extent_buffer *right = NULL;
    2113         247 :         struct extent_buffer *mid;
    2114         177 :         struct extent_buffer *left = NULL;
    2115             :         struct extent_buffer *parent = NULL;
    2116             :         int ret = 0;
    2117             :         int wret;
    2118             :         int pslot;
    2119         177 :         int orig_slot = path->slots[level];
    2120             : 
    2121         177 :         if (level == 0)
    2122             :                 return 1;
    2123             : 
    2124         177 :         mid = path->nodes[level];
    2125         177 :         WARN_ON(btrfs_header_generation(mid) != trans->transid);
    2126             : 
    2127         177 :         if (level < BTRFS_MAX_LEVEL - 1) {
    2128         177 :                 parent = path->nodes[level + 1];
    2129         177 :                 pslot = path->slots[level + 1];
    2130             :         }
    2131             : 
    2132         177 :         if (!parent)
    2133             :                 return 1;
    2134             : 
    2135         177 :         left = read_node_slot(root, parent, pslot - 1);
    2136             : 
    2137             :         /* first, try to make some room in the middle buffer */
    2138         390 :         if (left) {
    2139             :                 u32 left_nr;
    2140             : 
    2141         116 :                 btrfs_tree_lock(left);
    2142         116 :                 btrfs_set_lock_blocking(left);
    2143             : 
    2144         116 :                 left_nr = btrfs_header_nritems(left);
    2145         116 :                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
    2146             :                         wret = 1;
    2147             :                 } else {
    2148          97 :                         ret = btrfs_cow_block(trans, root, left, parent,
    2149             :                                               pslot - 1, &left);
    2150          97 :                         if (ret)
    2151             :                                 wret = 1;
    2152             :                         else {
    2153          97 :                                 wret = push_node_left(trans, root,
    2154             :                                                       left, mid, 0);
    2155             :                         }
    2156             :                 }
    2157             :                 if (wret < 0)
    2158             :                         ret = wret;
    2159         116 :                 if (wret == 0) {
    2160             :                         struct btrfs_disk_key disk_key;
    2161          97 :                         orig_slot += left_nr;
    2162          97 :                         btrfs_node_key(mid, &disk_key, 0);
    2163          97 :                         tree_mod_log_set_node_key(root->fs_info, parent,
    2164             :                                                   pslot, 0);
    2165             :                         btrfs_set_node_key(parent, &disk_key, pslot);
    2166          97 :                         btrfs_mark_buffer_dirty(parent);
    2167         194 :                         if (btrfs_header_nritems(left) > orig_slot) {
    2168           9 :                                 path->nodes[level] = left;
    2169           9 :                                 path->slots[level + 1] -= 1;
    2170           9 :                                 path->slots[level] = orig_slot;
    2171           9 :                                 btrfs_tree_unlock(mid);
    2172           9 :                                 free_extent_buffer(mid);
    2173             :                         } else {
    2174          88 :                                 orig_slot -=
    2175             :                                         btrfs_header_nritems(left);
    2176          88 :                                 path->slots[level] = orig_slot;
    2177          88 :                                 btrfs_tree_unlock(left);
    2178          88 :                                 free_extent_buffer(left);
    2179             :                         }
    2180             :                         return 0;
    2181             :                 }
    2182          19 :                 btrfs_tree_unlock(left);
    2183          19 :                 free_extent_buffer(left);
    2184             :         }
    2185          80 :         right = read_node_slot(root, parent, pslot + 1);
    2186             : 
    2187             :         /*
    2188             :          * then try to empty the right most buffer into the middle
    2189             :          */
    2190         146 :         if (right) {
    2191             :                 u32 right_nr;
    2192             : 
    2193          66 :                 btrfs_tree_lock(right);
    2194          66 :                 btrfs_set_lock_blocking(right);
    2195             : 
    2196          66 :                 right_nr = btrfs_header_nritems(right);
    2197          66 :                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
    2198             :                         wret = 1;
    2199             :                 } else {
    2200          65 :                         ret = btrfs_cow_block(trans, root, right,
    2201             :                                               parent, pslot + 1,
    2202             :                                               &right);
    2203          65 :                         if (ret)
    2204             :                                 wret = 1;
    2205             :                         else {
    2206          65 :                                 wret = balance_node_right(trans, root,
    2207             :                                                           right, mid);
    2208             :                         }
    2209             :                 }
    2210             :                 if (wret < 0)
    2211             :                         ret = wret;
    2212          66 :                 if (wret == 0) {
    2213             :                         struct btrfs_disk_key disk_key;
    2214             : 
    2215          65 :                         btrfs_node_key(right, &disk_key, 0);
    2216          65 :                         tree_mod_log_set_node_key(root->fs_info, parent,
    2217             :                                                   pslot + 1, 0);
    2218             :                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
    2219          65 :                         btrfs_mark_buffer_dirty(parent);
    2220             : 
    2221          65 :                         if (btrfs_header_nritems(mid) <= orig_slot) {
    2222           5 :                                 path->nodes[level] = right;
    2223           5 :                                 path->slots[level + 1] += 1;
    2224           5 :                                 path->slots[level] = orig_slot -
    2225             :                                         btrfs_header_nritems(mid);
    2226           5 :                                 btrfs_tree_unlock(mid);
    2227           5 :                                 free_extent_buffer(mid);
    2228             :                         } else {
    2229          60 :                                 btrfs_tree_unlock(right);
    2230          60 :                                 free_extent_buffer(right);
    2231             :                         }
    2232             :                         return 0;
    2233             :                 }
    2234           1 :                 btrfs_tree_unlock(right);
    2235           1 :                 free_extent_buffer(right);
    2236             :         }
    2237             :         return 1;
    2238             : }
    2239             : 
    2240             : /*
    2241             :  * readahead one full node of leaves, finding things that are close
    2242             :  * to the block in 'slot', and triggering ra on them.
    2243             :  */
    2244         230 : static void reada_for_search(struct btrfs_root *root,
    2245             :                              struct btrfs_path *path,
    2246             :                              int level, int slot, u64 objectid)
    2247             : {
    2248         114 :         struct extent_buffer *node;
    2249             :         struct btrfs_disk_key disk_key;
    2250             :         u32 nritems;
    2251             :         u64 search;
    2252             :         u64 target;
    2253             :         u64 nread = 0;
    2254             :         u64 gen;
    2255         116 :         int direction = path->reada;
    2256             :         struct extent_buffer *eb;
    2257             :         u32 nr;
    2258             :         u32 blocksize;
    2259             :         u32 nscan = 0;
    2260             : 
    2261         116 :         if (level != 1)
    2262           2 :                 return;
    2263             : 
    2264         114 :         if (!path->nodes[level])
    2265             :                 return;
    2266             : 
    2267             :         node = path->nodes[level];
    2268             : 
    2269             :         search = btrfs_node_blockptr(node, slot);
    2270             :         blocksize = btrfs_level_size(root, level - 1);
    2271         114 :         eb = btrfs_find_tree_block(root, search, blocksize);
    2272         114 :         if (eb) {
    2273           0 :                 free_extent_buffer(eb);
    2274           0 :                 return;
    2275             :         }
    2276             : 
    2277             :         target = search;
    2278             : 
    2279             :         nritems = btrfs_header_nritems(node);
    2280         114 :         nr = slot;
    2281             : 
    2282             :         while (1) {
    2283        1308 :                 if (direction < 0) {
    2284          37 :                         if (nr == 0)
    2285             :                                 break;
    2286          37 :                         nr--;
    2287        1271 :                 } else if (direction > 0) {
    2288        1271 :                         nr++;
    2289        1271 :                         if (nr >= nritems)
    2290             :                                 break;
    2291             :                 }
    2292        1254 :                 if (path->reada < 0 && objectid) {
    2293          37 :                         btrfs_node_key(node, &disk_key, nr);
    2294          37 :                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
    2295             :                                 break;
    2296             :                 }
    2297        1217 :                 search = btrfs_node_blockptr(node, nr);
    2298        1217 :                 if ((search <= target && target - search <= 65536) ||
    2299         744 :                     (search > target && search - target <= 65536)) {
    2300             :                         gen = btrfs_node_ptr_generation(node, nr);
    2301         142 :                         readahead_tree_block(root, search, blocksize, gen);
    2302         142 :                         nread += blocksize;
    2303             :                 }
    2304        1217 :                 nscan++;
    2305        1217 :                 if ((nread > 65536 || nscan > 32))
    2306             :                         break;
    2307             :         }
    2308             : }
    2309             : 
    2310      201896 : static noinline void reada_for_balance(struct btrfs_root *root,
    2311             :                                        struct btrfs_path *path, int level)
    2312             : {
    2313             :         int slot;
    2314             :         int nritems;
    2315       11486 :         struct extent_buffer *parent;
    2316             :         struct extent_buffer *eb;
    2317             :         u64 gen;
    2318             :         u64 block1 = 0;
    2319             :         u64 block2 = 0;
    2320             :         int blocksize;
    2321             : 
    2322      190410 :         parent = path->nodes[level + 1];
    2323      190410 :         if (!parent)
    2324      190410 :                 return;
    2325             : 
    2326       11486 :         nritems = btrfs_header_nritems(parent);
    2327       11486 :         slot = path->slots[level + 1];
    2328             :         blocksize = btrfs_level_size(root, level);
    2329             : 
    2330       11486 :         if (slot > 0) {
    2331        9446 :                 block1 = btrfs_node_blockptr(parent, slot - 1);
    2332             :                 gen = btrfs_node_ptr_generation(parent, slot - 1);
    2333        9446 :                 eb = btrfs_find_tree_block(root, block1, blocksize);
    2334             :                 /*
    2335             :                  * if we get -eagain from btrfs_buffer_uptodate, we
    2336             :                  * don't want to return eagain here.  That will loop
    2337             :                  * forever
    2338             :                  */
    2339        9446 :                 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
    2340             :                         block1 = 0;
    2341        9446 :                 free_extent_buffer(eb);
    2342             :         }
    2343       11486 :         if (slot + 1 < nritems) {
    2344             :                 block2 = btrfs_node_blockptr(parent, slot + 1);
    2345             :                 gen = btrfs_node_ptr_generation(parent, slot + 1);
    2346        7098 :                 eb = btrfs_find_tree_block(root, block2, blocksize);
    2347        7098 :                 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
    2348             :                         block2 = 0;
    2349        7098 :                 free_extent_buffer(eb);
    2350             :         }
    2351             : 
    2352       11486 :         if (block1)
    2353           0 :                 readahead_tree_block(root, block1, blocksize, 0);
    2354       11486 :         if (block2)
    2355           0 :                 readahead_tree_block(root, block2, blocksize, 0);
    2356             : }
    2357             : 
    2358             : 
    2359             : /*
    2360             :  * when we walk down the tree, it is usually safe to unlock the higher layers
    2361             :  * in the tree.  The exceptions are when our path goes through slot 0, because
    2362             :  * operations on the tree might require changing key pointers higher up in the
    2363             :  * tree.
    2364             :  *
    2365             :  * callers might also have set path->keep_locks, which tells this code to keep
    2366             :  * the lock if the path points to the last slot in the block.  This is part of
    2367             :  * walking through the tree, and selecting the next slot in the higher block.
    2368             :  *
    2369             :  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
    2370             :  * if lowest_unlock is 1, level 0 won't be unlocked
    2371             :  */
    2372     4948754 : static noinline void unlock_up(struct btrfs_path *path, int level,
    2373             :                                int lowest_unlock, int min_write_lock_level,
    2374             :                                int *write_lock_level)
    2375             : {
    2376             :         int i;
    2377             :         int skip_level = level;
    2378             :         int no_skips = 0;
    2379      105520 :         struct extent_buffer *t;
    2380             : 
    2381     7861945 :         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
    2382     7860331 :                 if (!path->nodes[i])
    2383             :                         break;
    2384     6213724 :                 if (!path->locks[i])
    2385             :                         break;
    2386     2913116 :                 if (!no_skips && path->slots[i] == 0) {
    2387      262583 :                         skip_level = i + 1;
    2388      262583 :                         continue;
    2389             :                 }
    2390     2650533 :                 if (!no_skips && path->keep_locks) {
    2391             :                         u32 nritems;
    2392             :                         t = path->nodes[i];
    2393             :                         nritems = btrfs_header_nritems(t);
    2394      105520 :                         if (nritems < 1 || path->slots[i] >= nritems - 1) {
    2395       55929 :                                 skip_level = i + 1;
    2396       55929 :                                 continue;
    2397             :                         }
    2398             :                 }
    2399     2594604 :                 if (skip_level < i && i >= lowest_unlock)
    2400             :                         no_skips = 1;
    2401             : 
    2402             :                 t = path->nodes[i];
    2403     2594604 :                 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
    2404      345066 :                         btrfs_tree_unlock_rw(t, path->locks[i]);
    2405      345141 :                         path->locks[i] = 0;
    2406      690282 :                         if (write_lock_level &&
    2407      392278 :                             i > min_write_lock_level &&
    2408       47137 :                             i <= *write_lock_level) {
    2409       21259 :                                 *write_lock_level = i - 1;
    2410             :                         }
    2411             :                 }
    2412             :         }
    2413     4948829 : }
    2414             : 
    2415             : /*
    2416             :  * This releases any locks held in the path starting at level and
    2417             :  * going all the way up to the root.
    2418             :  *
    2419             :  * btrfs_search_slot will keep the lock held on higher nodes in a few
    2420             :  * corner cases, such as COW of the block at slot zero in the node.  This
    2421             :  * ignores those rules, and it should only be called when there are no
    2422             :  * more updates to be done higher up in the tree.
    2423             :  */
    2424      295102 : noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
    2425             : {
    2426             :         int i;
    2427             : 
    2428      295102 :         if (path->keep_locks)
    2429      295296 :                 return;
    2430             : 
    2431     2054782 :         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
    2432     2054588 :                 if (!path->nodes[i])
    2433     1749008 :                         continue;
    2434      305580 :                 if (!path->locks[i])
    2435      239717 :                         continue;
    2436       65863 :                 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
    2437       66057 :                 path->locks[i] = 0;
    2438             :         }
    2439             : }
    2440             : 
    2441             : /*
    2442             :  * helper function for btrfs_search_slot.  The goal is to find a block
    2443             :  * in cache without setting the path to blocking.  If we find the block
    2444             :  * we return zero and the path is unchanged.
    2445             :  *
    2446             :  * If we can't find the block, we set the path blocking and do some
    2447             :  * reada.  -EAGAIN is returned and the search must be repeated.
    2448             :  */
    2449             : static int
    2450     2208274 : read_block_for_search(struct btrfs_trans_handle *trans,
    2451     2207946 :                        struct btrfs_root *root, struct btrfs_path *p,
    2452             :                        struct extent_buffer **eb_ret, int level, int slot,
    2453             :                        struct btrfs_key *key, u64 time_seq)
    2454             : {
    2455             :         u64 blocknr;
    2456             :         u64 gen;
    2457             :         u32 blocksize;
    2458     2208274 :         struct extent_buffer *b = *eb_ret;
    2459             :         struct extent_buffer *tmp;
    2460             :         int ret;
    2461             : 
    2462             :         blocknr = btrfs_node_blockptr(b, slot);
    2463             :         gen = btrfs_node_ptr_generation(b, slot);
    2464             :         blocksize = btrfs_level_size(root, level - 1);
    2465             : 
    2466     2207946 :         tmp = btrfs_find_tree_block(root, blocknr, blocksize);
    2467     2208699 :         if (tmp) {
    2468             :                 /* first we do an atomic uptodate check */
    2469     2208304 :                 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
    2470     2208130 :                         *eb_ret = tmp;
    2471             :                         return 0;
    2472             :                 }
    2473             : 
    2474             :                 /* the pages were up to date, but we failed
    2475             :                  * the generation number check.  Do a full
    2476             :                  * read for the generation number that is correct.
    2477             :                  * We must do this without dropping locks so
    2478             :                  * we can trust our generation number
    2479             :                  */
    2480           3 :                 btrfs_set_path_blocking(p);
    2481             : 
    2482             :                 /* now we're allowed to do a blocking uptodate check */
    2483           3 :                 ret = btrfs_read_buffer(tmp, gen);
    2484           3 :                 if (!ret) {
    2485           3 :                         *eb_ret = tmp;
    2486             :                         return 0;
    2487             :                 }
    2488           0 :                 free_extent_buffer(tmp);
    2489           0 :                 btrfs_release_path(p);
    2490             :                 return -EIO;
    2491             :         }
    2492             : 
    2493             :         /*
    2494             :          * reduce lock contention at high levels
    2495             :          * of the btree by dropping locks before
    2496             :          * we read.  Don't release the lock on the current
    2497             :          * level because we need to walk this node to figure
    2498             :          * out which blocks to read.
    2499             :          */
    2500         395 :         btrfs_unlock_up_safe(p, level + 1);
    2501         395 :         btrfs_set_path_blocking(p);
    2502             : 
    2503         395 :         free_extent_buffer(tmp);
    2504         395 :         if (p->reada)
    2505         116 :                 reada_for_search(root, p, level, slot, key->objectid);
    2506             : 
    2507         395 :         btrfs_release_path(p);
    2508             : 
    2509             :         ret = -EAGAIN;
    2510         395 :         tmp = read_tree_block(root, blocknr, blocksize, 0);
    2511         395 :         if (tmp) {
    2512             :                 /*
    2513             :                  * If the read above didn't mark this buffer up to date,
    2514             :                  * it will never end up being up to date.  Set ret to EIO now
    2515             :                  * and give up so that our caller doesn't loop forever
    2516             :                  * on our EAGAINs.
    2517             :                  */
    2518         395 :                 if (!btrfs_buffer_uptodate(tmp, 0, 0))
    2519             :                         ret = -EIO;
    2520         395 :                 free_extent_buffer(tmp);
    2521             :         }
    2522             :         return ret;
    2523             : }
    2524             : 
    2525             : /*
    2526             :  * helper function for btrfs_search_slot.  This does all of the checks
    2527             :  * for node-level blocks and does any balancing required based on
    2528             :  * the ins_len.
    2529             :  *
    2530             :  * If no extra work was required, zero is returned.  If we had to
    2531             :  * drop the path, -EAGAIN is returned and btrfs_search_slot must
    2532             :  * start over
    2533             :  */
    2534             : static int
    2535     2844551 : setup_nodes_for_search(struct btrfs_trans_handle *trans,
    2536             :                        struct btrfs_root *root, struct btrfs_path *p,
    2537      833904 :                        struct extent_buffer *b, int level, int ins_len,
    2538             :                        int *write_lock_level)
    2539             : {
    2540             :         int ret;
    2541     3528429 :         if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
    2542      341939 :             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
    2543             :                 int sret;
    2544             : 
    2545         299 :                 if (*write_lock_level < level + 1) {
    2546         119 :                         *write_lock_level = level + 1;
    2547         119 :                         btrfs_release_path(p);
    2548         119 :                         goto again;
    2549             :                 }
    2550             : 
    2551         180 :                 btrfs_set_path_blocking(p);
    2552         180 :                 reada_for_balance(root, p, level);
    2553         180 :                 sret = split_node(trans, root, p, level);
    2554         180 :                 btrfs_clear_path_blocking(p, NULL, 0);
    2555             : 
    2556         180 :                 BUG_ON(sret > 0);
    2557         180 :                 if (sret) {
    2558             :                         ret = sret;
    2559             :                         goto done;
    2560             :                 }
    2561             :                 b = p->nodes[level];
    2562     3447782 :         } else if (ins_len < 0 && btrfs_header_nritems(b) <
    2563      301765 :                    BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
    2564             :                 int sret;
    2565             : 
    2566      250402 :                 if (*write_lock_level < level + 1) {
    2567       60182 :                         *write_lock_level = level + 1;
    2568       60182 :                         btrfs_release_path(p);
    2569       60182 :                         goto again;
    2570             :                 }
    2571             : 
    2572      190220 :                 btrfs_set_path_blocking(p);
    2573      190230 :                 reada_for_balance(root, p, level);
    2574      190233 :                 sret = balance_level(trans, root, p, level);
    2575      190232 :                 btrfs_clear_path_blocking(p, NULL, 0);
    2576             : 
    2577      190231 :                 if (sret) {
    2578             :                         ret = sret;
    2579             :                         goto done;
    2580             :                 }
    2581      190232 :                 b = p->nodes[level];
    2582      190232 :                 if (!b) {
    2583          32 :                         btrfs_release_path(p);
    2584          32 :                         goto again;
    2585             :                 }
    2586      190200 :                 BUG_ON(btrfs_header_nritems(b) == 1);
    2587             :         }
    2588             :         return 0;
    2589             : 
    2590             : again:
    2591             :         ret = -EAGAIN;
    2592             : done:
    2593       60333 :         return ret;
    2594             : }
    2595             : 
    2596             : static void key_search_validate(struct extent_buffer *b,
    2597             :                                 struct btrfs_key *key,
    2598             :                                 int level)
    2599             : {
    2600             : #ifdef CONFIG_BTRFS_ASSERT
    2601             :         struct btrfs_disk_key disk_key;
    2602             : 
    2603             :         btrfs_cpu_key_to_disk(&disk_key, key);
    2604             : 
    2605             :         if (level == 0)
    2606             :                 ASSERT(!memcmp_extent_buffer(b, &disk_key,
    2607             :                     offsetof(struct btrfs_leaf, items[0].key),
    2608             :                     sizeof(disk_key)));
    2609             :         else
    2610             :                 ASSERT(!memcmp_extent_buffer(b, &disk_key,
    2611             :                     offsetof(struct btrfs_node, ptrs[0].key),
    2612             :                     sizeof(disk_key)));
    2613             : #endif
    2614             : }
    2615             : 
    2616             : static int key_search(struct extent_buffer *b, struct btrfs_key *key,
    2617             :                       int level, int *prev_cmp, int *slot)
    2618             : {
    2619     4980558 :         if (*prev_cmp != 0) {
    2620     4980672 :                 *prev_cmp = bin_search(b, key, level, slot);
    2621             :                 return *prev_cmp;
    2622             :         }
    2623             : 
    2624             :         key_search_validate(b, key, level);
    2625       11021 :         *slot = 0;
    2626             : 
    2627             :         return 0;
    2628             : }
    2629             : 
    2630        6052 : int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
    2631             :                 u64 iobjectid, u64 ioff, u8 key_type,
    2632             :                 struct btrfs_key *found_key)
    2633             : {
    2634             :         int ret;
    2635             :         struct btrfs_key key;
    2636        5227 :         struct extent_buffer *eb;
    2637             :         struct btrfs_path *path;
    2638             : 
    2639        6052 :         key.type = key_type;
    2640        6052 :         key.objectid = iobjectid;
    2641        6052 :         key.offset = ioff;
    2642             : 
    2643        6052 :         if (found_path == NULL) {
    2644             :                 path = btrfs_alloc_path();
    2645         616 :                 if (!path)
    2646             :                         return -ENOMEM;
    2647             :         } else
    2648             :                 path = found_path;
    2649             : 
    2650        6052 :         ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
    2651        6052 :         if ((ret < 0) || (found_key == NULL)) {
    2652         821 :                 if (path != found_path)
    2653         616 :                         btrfs_free_path(path);
    2654         821 :                 return ret;
    2655             :         }
    2656             : 
    2657        5231 :         eb = path->nodes[0];
    2658       10458 :         if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
    2659             :                 ret = btrfs_next_leaf(fs_root, path);
    2660          26 :                 if (ret)
    2661             :                         return ret;
    2662          26 :                 eb = path->nodes[0];
    2663             :         }
    2664             : 
    2665        5231 :         btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
    2666        9999 :         if (found_key->type != key.type ||
    2667        4768 :                         found_key->objectid != key.objectid)
    2668             :                 return 1;
    2669             : 
    2670        4768 :         return 0;
    2671             : }
    2672             : 
    2673             : /*
    2674             :  * look for key in the tree.  path is filled in with nodes along the way
    2675             :  * if key is found, we return zero and you can find the item in the leaf
    2676             :  * level of the path (level 0)
    2677             :  *
    2678             :  * If the key isn't found, the path points to the slot where it should
    2679             :  * be inserted, and 1 is returned.  If there are other errors during the
    2680             :  * search a negative error number is returned.
    2681             :  *
    2682             :  * if ins_len > 0, nodes and leaves will be split as we walk down the
    2683             :  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
    2684             :  * possible)
    2685             :  */
    2686     2713725 : int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
    2687             :                       *root, struct btrfs_key *key, struct btrfs_path *p, int
    2688             :                       ins_len, int cow)
    2689             : {
    2690             :         struct extent_buffer *b;
    2691             :         int slot;
    2692             :         int ret;
    2693             :         int err;
    2694             :         int level;
    2695             :         int lowest_unlock = 1;
    2696             :         int root_lock;
    2697             :         /* everything at write_lock_level or lower must be write locked */
    2698     2713725 :         int write_lock_level = 0;
    2699             :         u8 lowest_level = 0;
    2700             :         int min_write_lock_level;
    2701             :         int prev_cmp;
    2702             : 
    2703     2713725 :         lowest_level = p->lowest_level;
    2704     2713725 :         WARN_ON(lowest_level && ins_len > 0);
    2705     2713711 :         WARN_ON(p->nodes[0] != NULL);
    2706     2713711 :         BUG_ON(!cow && ins_len);
    2707             : 
    2708     2713711 :         if (ins_len < 0) {
    2709             :                 lowest_unlock = 2;
    2710             : 
    2711             :                 /* when we are removing items, we might have to go up to level
    2712             :                  * two as we update tree pointers  Make sure we keep write
    2713             :                  * for those levels as well
    2714             :                  */
    2715      215608 :                 write_lock_level = 2;
    2716     2498103 :         } else if (ins_len > 0) {
    2717             :                 /*
    2718             :                  * for inserting items, make sure we have a write lock on
    2719             :                  * level 1 so we can update keys
    2720             :                  */
    2721      311751 :                 write_lock_level = 1;
    2722             :         }
    2723             : 
    2724     2713711 :         if (!cow)
    2725     1995650 :                 write_lock_level = -1;
    2726             : 
    2727     2713711 :         if (cow && (p->keep_locks || p->lowest_level))
    2728       17677 :                 write_lock_level = BTRFS_MAX_LEVEL;
    2729             : 
    2730     2713711 :         min_write_lock_level = write_lock_level;
    2731             : 
    2732             : again:
    2733             :         prev_cmp = -1;
    2734             :         /*
    2735             :          * we try very hard to do read locks on the root
    2736             :          */
    2737             :         root_lock = BTRFS_READ_LOCK;
    2738             :         level = 0;
    2739     2811656 :         if (p->search_commit_root) {
    2740             :                 /*
    2741             :                  * the commit roots are read only
    2742             :                  * so we always do read locks
    2743             :                  */
    2744     1610652 :                 if (p->need_commit_sem)
    2745        7783 :                         down_read(&root->fs_info->commit_root_sem);
    2746     1610652 :                 b = root->commit_root;
    2747     9582965 :                 extent_buffer_get(b);
    2748     3221362 :                 level = btrfs_header_level(b);
    2749     1610681 :                 if (p->need_commit_sem)
    2750        7783 :                         up_read(&root->fs_info->commit_root_sem);
    2751     1610576 :                 if (!p->skip_locking)
    2752           0 :                         btrfs_tree_read_lock(b);
    2753             :         } else {
    2754     1201004 :                 if (p->skip_locking) {
    2755           0 :                         b = btrfs_root_node(root);
    2756           0 :                         level = btrfs_header_level(b);
    2757             :                 } else {
    2758             :                         /* we don't know the level of the root node
    2759             :                          * until we actually have it read locked
    2760             :                          */
    2761     1201004 :                         b = btrfs_read_lock_root_node(root);
    2762     1201432 :                         level = btrfs_header_level(b);
    2763     1201432 :                         if (level <= write_lock_level) {
    2764             :                                 /* whoops, must trade for write lock */
    2765      632025 :                                 btrfs_tree_read_unlock(b);
    2766      632011 :                                 free_extent_buffer(b);
    2767      632021 :                                 b = btrfs_lock_root_node(root);
    2768             :                                 root_lock = BTRFS_WRITE_LOCK;
    2769             : 
    2770             :                                 /* the level might have changed, check again */
    2771      632051 :                                 level = btrfs_header_level(b);
    2772             :                         }
    2773             :                 }
    2774             :         }
    2775     2812034 :         p->nodes[level] = b;
    2776     2812034 :         if (!p->skip_locking)
    2777     1201364 :                 p->locks[level] = root_lock;
    2778             : 
    2779     4991657 :         while (b) {
    2780     4990304 :                 level = btrfs_header_level(b);
    2781             : 
    2782             :                 /*
    2783             :                  * setup the path here so we can release it under lock
    2784             :                  * contention with the cow code
    2785             :                  */
    2786     4990304 :                 if (cow) {
    2787             :                         /*
    2788             :                          * if we don't really need to cow this block
    2789             :                          * then we don't want to set the path blocking,
    2790             :                          * so we test it here
    2791             :                          */
    2792     1534341 :                         if (!should_cow_block(trans, root, b))
    2793             :                                 goto cow_done;
    2794             : 
    2795       58705 :                         btrfs_set_path_blocking(p);
    2796             : 
    2797             :                         /*
    2798             :                          * must have write locks on this node and the
    2799             :                          * parent
    2800             :                          */
    2801      115296 :                         if (level > write_lock_level ||
    2802       68755 :                             (level + 1 > write_lock_level &&
    2803       12164 :                             level + 1 < BTRFS_MAX_LEVEL &&
    2804       12164 :                             p->nodes[level + 1])) {
    2805        9533 :                                 write_lock_level = level + 1;
    2806        9533 :                                 btrfs_release_path(p);
    2807        9533 :                                 goto again;
    2808             :                         }
    2809             : 
    2810       49172 :                         err = btrfs_cow_block(trans, root, b,
    2811             :                                               p->nodes[level + 1],
    2812             :                                               p->slots[level + 1], &b);
    2813       49170 :                         if (err) {
    2814             :                                 ret = err;
    2815             :                                 goto done;
    2816             :                         }
    2817             :                 }
    2818             : cow_done:
    2819     4981692 :                 p->nodes[level] = b;
    2820     4981692 :                 btrfs_clear_path_blocking(p, NULL, 0);
    2821             : 
    2822             :                 /*
    2823             :                  * we have a lock on b and as long as we aren't changing
    2824             :                  * the tree, there is no way to for the items in b to change.
    2825             :                  * It is safe to drop the lock on our parent before we
    2826             :                  * go through the expensive btree search on b.
    2827             :                  *
    2828             :                  * If we're inserting or deleting (ins_len != 0), then we might
    2829             :                  * be changing slot zero, which may require changing the parent.
    2830             :                  * So, we can't drop the lock until after we know which slot
    2831             :                  * we're operating on.
    2832             :                  */
    2833     4980504 :                 if (!ins_len && !p->keep_locks) {
    2834     3722115 :                         int u = level + 1;
    2835             : 
    2836     3722115 :                         if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
    2837      569200 :                                 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
    2838      569254 :                                 p->locks[u] = 0;
    2839             :                         }
    2840             :                 }
    2841             : 
    2842     4980558 :                 ret = key_search(b, key, level, &prev_cmp, &slot);
    2843             : 
    2844     4980582 :                 if (level != 0) {
    2845             :                         int dec = 0;
    2846     2844572 :                         if (ret && slot > 0) {
    2847             :                                 dec = 1;
    2848     2251151 :                                 slot -= 1;
    2849             :                         }
    2850     2844572 :                         p->slots[level] = slot;
    2851     2844572 :                         err = setup_nodes_for_search(trans, root, p, b, level,
    2852             :                                              ins_len, &write_lock_level);
    2853     2844566 :                         if (err == -EAGAIN)
    2854             :                                 goto again;
    2855     2784346 :                         if (err) {
    2856             :                                 ret = err;
    2857             :                                 goto done;
    2858             :                         }
    2859     2784348 :                         b = p->nodes[level];
    2860     2784348 :                         slot = p->slots[level];
    2861             : 
    2862             :                         /*
    2863             :                          * slot 0 is special, if we change the key
    2864             :                          * we have to update the parent pointer
    2865             :                          * which means we must have a write lock
    2866             :                          * on the parent
    2867             :                          */
    2868     2896461 :                         if (slot == 0 && ins_len &&
    2869      112113 :                             write_lock_level < level + 1) {
    2870       27869 :                                 write_lock_level = level + 1;
    2871       27869 :                                 btrfs_release_path(p);
    2872       27868 :                                 goto again;
    2873             :                         }
    2874             : 
    2875     2756479 :                         unlock_up(p, level, lowest_unlock,
    2876             :                                   min_write_lock_level, &write_lock_level);
    2877             : 
    2878     2756520 :                         if (level == lowest_level) {
    2879      577272 :                                 if (dec)
    2880           0 :                                         p->slots[level]++;
    2881             :                                 goto done;
    2882             :                         }
    2883             : 
    2884     2179248 :                         err = read_block_for_search(trans, root, p,
    2885             :                                                     &b, level, slot, key, 0);
    2886     2179444 :                         if (err == -EAGAIN)
    2887             :                                 goto again;
    2888     2179120 :                         if (err) {
    2889             :                                 ret = err;
    2890             :                                 goto done;
    2891             :                         }
    2892             : 
    2893     2179371 :                         if (!p->skip_locking) {
    2894     2296994 :                                 level = btrfs_header_level(b);
    2895     1148497 :                                 if (level <= write_lock_level) {
    2896      701151 :                                         err = btrfs_try_tree_write_lock(b);
    2897      701355 :                                         if (!err) {
    2898        8794 :                                                 btrfs_set_path_blocking(p);
    2899        8797 :                                                 btrfs_tree_lock(b);
    2900        8799 :                                                 btrfs_clear_path_blocking(p, b,
    2901             :                                                                   BTRFS_WRITE_LOCK);
    2902             :                                         }
    2903      701300 :                                         p->locks[level] = BTRFS_WRITE_LOCK;
    2904             :                                 } else {
    2905      447346 :                                         err = btrfs_try_tree_read_lock(b);
    2906      447453 :                                         if (!err) {
    2907        1171 :                                                 btrfs_set_path_blocking(p);
    2908        1171 :                                                 btrfs_tree_read_lock(b);
    2909        1171 :                                                 btrfs_clear_path_blocking(p, b,
    2910             :                                                                   BTRFS_READ_LOCK);
    2911             :                                         }
    2912      447449 :                                         p->locks[level] = BTRFS_READ_LOCK;
    2913             :                                 }
    2914     1148749 :                                 p->nodes[level] = b;
    2915             :                         }
    2916             :                 } else {
    2917     2136010 :                         p->slots[level] = slot;
    2918     2447718 :                         if (ins_len > 0 &&
    2919      311731 :                             btrfs_leaf_free_space(root, b) < ins_len) {
    2920       13324 :                                 if (write_lock_level < 1) {
    2921           0 :                                         write_lock_level = 1;
    2922           0 :                                         btrfs_release_path(p);
    2923           0 :                                         goto again;
    2924             :                                 }
    2925             : 
    2926       13324 :                                 btrfs_set_path_blocking(p);
    2927       13324 :                                 err = split_leaf(trans, root, key,
    2928             :                                                  p, ins_len, ret == 0);
    2929       13324 :                                 btrfs_clear_path_blocking(p, NULL, 0);
    2930             : 
    2931       13324 :                                 BUG_ON(err > 0);
    2932       13324 :                                 if (err) {
    2933             :                                         ret = err;
    2934             :                                         goto done;
    2935             :                                 }
    2936             :                         }
    2937     2134241 :                         if (!p->search_for_split)
    2938     2134116 :                                 unlock_up(p, level, lowest_unlock,
    2939             :                                           min_write_lock_level, &write_lock_level);
    2940             :                         goto done;
    2941             :                 }
    2942             :         }
    2943             :         ret = 1;
    2944             : done:
    2945             :         /*
    2946             :          * we don't really know what they plan on doing with the path
    2947             :          * from here on, so for now just mark it as blocking
    2948             :          */
    2949     2713474 :         if (!p->leave_spinning)
    2950     2234443 :                 btrfs_set_path_blocking(p);
    2951     2713702 :         if (ret < 0)
    2952        1928 :                 btrfs_release_path(p);
    2953     2713702 :         return ret;
    2954             : }
    2955             : 
    2956             : /*
    2957             :  * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
    2958             :  * current state of the tree together with the operations recorded in the tree
    2959             :  * modification log to search for the key in a previous version of this tree, as
    2960             :  * denoted by the time_seq parameter.
    2961             :  *
    2962             :  * Naturally, there is no support for insert, delete or cow operations.
    2963             :  *
    2964             :  * The resulting path and return value will be set up as if we called
    2965             :  * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
    2966             :  */
    2967      608193 : int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
    2968             :                           struct btrfs_path *p, u64 time_seq)
    2969             : {
    2970             :         struct extent_buffer *b;
    2971             :         int slot;
    2972             :         int ret;
    2973             :         int err;
    2974             :         int level;
    2975             :         int lowest_unlock = 1;
    2976             :         u8 lowest_level = 0;
    2977             :         int prev_cmp = -1;
    2978             : 
    2979      608193 :         lowest_level = p->lowest_level;
    2980      608193 :         WARN_ON(p->nodes[0] != NULL);
    2981             : 
    2982      608193 :         if (p->search_commit_root) {
    2983      600267 :                 BUG_ON(time_seq);
    2984      600267 :                 return btrfs_search_slot(NULL, root, key, p, 0, 0);
    2985             :         }
    2986             : 
    2987             : again:
    2988        7947 :         b = get_old_root(root, time_seq);
    2989       30217 :         level = btrfs_header_level(b);
    2990        7947 :         p->locks[level] = BTRFS_READ_LOCK;
    2991             : 
    2992       19082 :         while (b) {
    2993       11135 :                 level = btrfs_header_level(b);
    2994       11135 :                 p->nodes[level] = b;
    2995       11135 :                 btrfs_clear_path_blocking(p, NULL, 0);
    2996             : 
    2997             :                 /*
    2998             :                  * we have a lock on b and as long as we aren't changing
    2999             :                  * the tree, there is no way to for the items in b to change.
    3000             :                  * It is safe to drop the lock on our parent before we
    3001             :                  * go through the expensive btree search on b.
    3002             :                  */
    3003       11135 :                 btrfs_unlock_up_safe(p, level + 1);
    3004             : 
    3005             :                 /*
    3006             :                  * Since we can unwind eb's we want to do a real search every
    3007             :                  * time.
    3008             :                  */
    3009             :                 prev_cmp = -1;
    3010       11135 :                 ret = key_search(b, key, level, &prev_cmp, &slot);
    3011             : 
    3012       11135 :                 if (level != 0) {
    3013             :                         int dec = 0;
    3014        8093 :                         if (ret && slot > 0) {
    3015             :                                 dec = 1;
    3016        3267 :                                 slot -= 1;
    3017             :                         }
    3018        8093 :                         p->slots[level] = slot;
    3019        8093 :                         unlock_up(p, level, lowest_unlock, 0, NULL);
    3020             : 
    3021        8093 :                         if (level == lowest_level) {
    3022        4884 :                                 if (dec)
    3023          90 :                                         p->slots[level]++;
    3024             :                                 goto done;
    3025             :                         }
    3026             : 
    3027        3209 :                         err = read_block_for_search(NULL, root, p, &b, level,
    3028             :                                                     slot, key, time_seq);
    3029        3209 :                         if (err == -EAGAIN)
    3030             :                                 goto again;
    3031        3188 :                         if (err) {
    3032             :                                 ret = err;
    3033             :                                 goto done;
    3034             :                         }
    3035             : 
    3036        6376 :                         level = btrfs_header_level(b);
    3037        3188 :                         err = btrfs_try_tree_read_lock(b);
    3038        3188 :                         if (!err) {
    3039           0 :                                 btrfs_set_path_blocking(p);
    3040           0 :                                 btrfs_tree_read_lock(b);
    3041           0 :                                 btrfs_clear_path_blocking(p, b,
    3042             :                                                           BTRFS_READ_LOCK);
    3043             :                         }
    3044        3188 :                         b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
    3045        3188 :                         if (!b) {
    3046             :                                 ret = -ENOMEM;
    3047             :                                 goto done;
    3048             :                         }
    3049        3188 :                         p->locks[level] = BTRFS_READ_LOCK;
    3050        3188 :                         p->nodes[level] = b;
    3051             :                 } else {
    3052        3042 :                         p->slots[level] = slot;
    3053        3042 :                         unlock_up(p, level, lowest_unlock, 0, NULL);
    3054        3042 :                         goto done;
    3055             :                 }
    3056             :         }
    3057             :         ret = 1;
    3058             : done:
    3059        7926 :         if (!p->leave_spinning)
    3060        7855 :                 btrfs_set_path_blocking(p);
    3061        7926 :         if (ret < 0)
    3062           0 :                 btrfs_release_path(p);
    3063             : 
    3064        7926 :         return ret;
    3065             : }
    3066             : 
    3067             : /*
    3068             :  * helper to use instead of search slot if no exact match is needed but
    3069             :  * instead the next or previous item should be returned.
    3070             :  * When find_higher is true, the next higher item is returned, the next lower
    3071             :  * otherwise.
    3072             :  * When return_any and find_higher are both true, and no higher item is found,
    3073             :  * return the next lower instead.
    3074             :  * When return_any is true and find_higher is false, and no lower item is found,
    3075             :  * return the next higher instead.
    3076             :  * It returns 0 if any item is found, 1 if none is found (tree empty), and
    3077             :  * < 0 on error
    3078             :  */
    3079        1085 : int btrfs_search_slot_for_read(struct btrfs_root *root,
    3080             :                                struct btrfs_key *key, struct btrfs_path *p,
    3081             :                                int find_higher, int return_any)
    3082             : {
    3083             :         int ret;
    3084        1007 :         struct extent_buffer *leaf;
    3085             : 
    3086             : again:
    3087        1085 :         ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
    3088        1085 :         if (ret <= 0)
    3089             :                 return ret;
    3090             :         /*
    3091             :          * a return value of 1 means the path is at the position where the
    3092             :          * item should be inserted. Normally this is the next bigger item,
    3093             :          * but in case the previous item is the last in a leaf, path points
    3094             :          * to the first free slot in the previous leaf, i.e. at an invalid
    3095             :          * item.
    3096             :          */
    3097        1056 :         leaf = p->nodes[0];
    3098             : 
    3099        1056 :         if (find_higher) {
    3100        2014 :                 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
    3101             :                         ret = btrfs_next_leaf(root, p);
    3102          73 :                         if (ret <= 0)
    3103             :                                 return ret;
    3104          16 :                         if (!return_any)
    3105             :                                 return 1;
    3106             :                         /*
    3107             :                          * no higher item found, return the next
    3108             :                          * lower instead
    3109             :                          */
    3110             :                         return_any = 0;
    3111             :                         find_higher = 0;
    3112           0 :                         btrfs_release_path(p);
    3113           0 :                         goto again;
    3114             :                 }
    3115             :         } else {
    3116          49 :                 if (p->slots[0] == 0) {
    3117           0 :                         ret = btrfs_prev_leaf(root, p);
    3118           0 :                         if (ret < 0)
    3119             :                                 return ret;
    3120           0 :                         if (!ret) {
    3121           0 :                                 leaf = p->nodes[0];
    3122           0 :                                 if (p->slots[0] == btrfs_header_nritems(leaf))
    3123           0 :                                         p->slots[0]--;
    3124             :                                 return 0;
    3125             :                         }
    3126           0 :                         if (!return_any)
    3127             :                                 return 1;
    3128             :                         /*
    3129             :                          * no lower item found, return the next
    3130             :                          * higher instead
    3131             :                          */
    3132             :                         return_any = 0;
    3133             :                         find_higher = 1;
    3134           0 :                         btrfs_release_path(p);
    3135           0 :                         goto again;
    3136             :                 } else {
    3137          49 :                         --p->slots[0];
    3138             :                 }
    3139             :         }
    3140             :         return 0;
    3141             : }
    3142             : 
    3143             : /*
    3144             :  * adjust the pointers going up the tree, starting at level
    3145             :  * making sure the right key of each node is points to 'key'.
    3146             :  * This is used after shifting pointers to the left, so it stops
    3147             :  * fixing up pointers when a given leaf/node is not in slot 0 of the
    3148             :  * higher levels
    3149             :  *
    3150             :  */
    3151       15232 : static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
    3152             :                            struct btrfs_disk_key *key, int level)
    3153             : {
    3154             :         int i;
    3155             :         struct extent_buffer *t;
    3156             : 
    3157        2593 :         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
    3158       17825 :                 int tslot = path->slots[i];
    3159       17825 :                 if (!path->nodes[i])
    3160             :                         break;
    3161             :                 t = path->nodes[i];
    3162       13604 :                 tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
    3163             :                 btrfs_set_node_key(t, key, tslot);
    3164       13604 :                 btrfs_mark_buffer_dirty(path->nodes[i]);
    3165       13604 :                 if (tslot != 0)
    3166             :                         break;
    3167             :         }
    3168       15232 : }
    3169             : 
    3170             : /*
    3171             :  * update item key.
    3172             :  *
    3173             :  * This function isn't completely safe. It's the caller's responsibility
    3174             :  * that the new key won't break the order
    3175             :  */
    3176        8488 : void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
    3177             :                              struct btrfs_key *new_key)
    3178             : {
    3179             :         struct btrfs_disk_key disk_key;
    3180        8488 :         struct extent_buffer *eb;
    3181             :         int slot;
    3182             : 
    3183        8488 :         eb = path->nodes[0];
    3184        8488 :         slot = path->slots[0];
    3185        8488 :         if (slot > 0) {
    3186        7328 :                 btrfs_item_key(eb, &disk_key, slot - 1);
    3187        7328 :                 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
    3188             :         }
    3189       16976 :         if (slot < btrfs_header_nritems(eb) - 1) {
    3190        7257 :                 btrfs_item_key(eb, &disk_key, slot + 1);
    3191        7257 :                 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
    3192             :         }
    3193             : 
    3194             :         btrfs_cpu_key_to_disk(&disk_key, new_key);
    3195             :         btrfs_set_item_key(eb, &disk_key, slot);
    3196        8488 :         btrfs_mark_buffer_dirty(eb);
    3197        8488 :         if (slot == 0)
    3198        1160 :                 fixup_low_keys(root, path, &disk_key, 1);
    3199        8488 : }
    3200             : 
    3201             : /*
    3202             :  * try to push data from one node into the next node left in the
    3203             :  * tree.
    3204             :  *
    3205             :  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
    3206             :  * error, and > 0 if there was no room in the left hand block.
    3207             :  */
    3208         114 : static int push_node_left(struct btrfs_trans_handle *trans,
    3209         342 :                           struct btrfs_root *root, struct extent_buffer *dst,
    3210         228 :                           struct extent_buffer *src, int empty)
    3211             : {
    3212             :         int push_items = 0;
    3213             :         int src_nritems;
    3214             :         int dst_nritems;
    3215             :         int ret = 0;
    3216             : 
    3217         114 :         src_nritems = btrfs_header_nritems(src);
    3218         114 :         dst_nritems = btrfs_header_nritems(dst);
    3219         114 :         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
    3220         114 :         WARN_ON(btrfs_header_generation(src) != trans->transid);
    3221         114 :         WARN_ON(btrfs_header_generation(dst) != trans->transid);
    3222             : 
    3223         114 :         if (!empty && src_nritems <= 8)
    3224             :                 return 1;
    3225             : 
    3226         114 :         if (push_items <= 0)
    3227             :                 return 1;
    3228             : 
    3229         114 :         if (empty) {
    3230          17 :                 push_items = min(src_nritems, push_items);
    3231          17 :                 if (push_items < src_nritems) {
    3232             :                         /* leave at least 8 pointers in the node if
    3233             :                          * we aren't going to empty it
    3234             :                          */
    3235           0 :                         if (src_nritems - push_items < 8) {
    3236           0 :                                 if (push_items <= 8)
    3237             :                                         return 1;
    3238           0 :                                 push_items -= 8;
    3239             :                         }
    3240             :                 }
    3241             :         } else
    3242          97 :                 push_items = min(src_nritems - 8, push_items);
    3243             : 
    3244         114 :         ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
    3245             :                                    push_items);
    3246         114 :         if (ret) {
    3247           0 :                 btrfs_abort_transaction(trans, root, ret);
    3248           0 :                 return ret;
    3249             :         }
    3250         228 :         copy_extent_buffer(dst, src,
    3251             :                            btrfs_node_key_ptr_offset(dst_nritems),
    3252             :                            btrfs_node_key_ptr_offset(0),
    3253             :                            push_items * sizeof(struct btrfs_key_ptr));
    3254             : 
    3255         114 :         if (push_items < src_nritems) {
    3256             :                 /*
    3257             :                  * don't call tree_mod_log_eb_move here, key removal was already
    3258             :                  * fully logged by tree_mod_log_eb_copy above.
    3259             :                  */
    3260         194 :                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
    3261             :                                       btrfs_node_key_ptr_offset(push_items),
    3262          97 :                                       (src_nritems - push_items) *
    3263             :                                       sizeof(struct btrfs_key_ptr));
    3264             :         }
    3265         114 :         btrfs_set_header_nritems(src, src_nritems - push_items);
    3266         114 :         btrfs_set_header_nritems(dst, dst_nritems + push_items);
    3267         114 :         btrfs_mark_buffer_dirty(src);
    3268         114 :         btrfs_mark_buffer_dirty(dst);
    3269             : 
    3270         114 :         return ret;
    3271             : }
    3272             : 
    3273             : /*
    3274             :  * try to push data from one node into the next node right in the
    3275             :  * tree.
    3276             :  *
    3277             :  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
    3278             :  * error, and > 0 if there was no room in the right hand block.
    3279             :  *
    3280             :  * this will  only push up to 1/2 the contents of the left node over
    3281             :  */
    3282          65 : static int balance_node_right(struct btrfs_trans_handle *trans,
    3283             :                               struct btrfs_root *root,
    3284         195 :                               struct extent_buffer *dst,
    3285         195 :                               struct extent_buffer *src)
    3286             : {
    3287             :         int push_items = 0;
    3288             :         int max_push;
    3289             :         int src_nritems;
    3290             :         int dst_nritems;
    3291             :         int ret = 0;
    3292             : 
    3293          65 :         WARN_ON(btrfs_header_generation(src) != trans->transid);
    3294          65 :         WARN_ON(btrfs_header_generation(dst) != trans->transid);
    3295             : 
    3296          65 :         src_nritems = btrfs_header_nritems(src);
    3297          65 :         dst_nritems = btrfs_header_nritems(dst);
    3298          65 :         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
    3299          65 :         if (push_items <= 0)
    3300             :                 return 1;
    3301             : 
    3302          65 :         if (src_nritems < 4)
    3303             :                 return 1;
    3304             : 
    3305          65 :         max_push = src_nritems / 2 + 1;
    3306             :         /* don't try to empty the node */
    3307          65 :         if (max_push >= src_nritems)
    3308             :                 return 1;
    3309             : 
    3310          65 :         if (max_push < push_items)
    3311             :                 push_items = max_push;
    3312             : 
    3313          65 :         tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems);
    3314         130 :         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
    3315             :                                       btrfs_node_key_ptr_offset(0),
    3316             :                                       (dst_nritems) *
    3317             :                                       sizeof(struct btrfs_key_ptr));
    3318             : 
    3319          65 :         ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
    3320          65 :                                    src_nritems - push_items, push_items);
    3321          65 :         if (ret) {
    3322           0 :                 btrfs_abort_transaction(trans, root, ret);
    3323           0 :                 return ret;
    3324             :         }
    3325          65 :         copy_extent_buffer(dst, src,
    3326             :                            btrfs_node_key_ptr_offset(0),
    3327             :                            btrfs_node_key_ptr_offset(src_nritems - push_items),
    3328             :                            push_items * sizeof(struct btrfs_key_ptr));
    3329             : 
    3330          65 :         btrfs_set_header_nritems(src, src_nritems - push_items);
    3331          65 :         btrfs_set_header_nritems(dst, dst_nritems + push_items);
    3332             : 
    3333          65 :         btrfs_mark_buffer_dirty(src);
    3334          65 :         btrfs_mark_buffer_dirty(dst);
    3335             : 
    3336          65 :         return ret;
    3337             : }
    3338             : 
    3339             : /*
    3340             :  * helper function to insert a new root level in the tree.
    3341             :  * A new node is allocated, and a single item is inserted to
    3342             :  * point to the existing root
    3343             :  *
    3344             :  * returns zero on success or < 0 on failure.
    3345             :  */
    3346          99 : static noinline int insert_new_root(struct btrfs_trans_handle *trans,
    3347          99 :                            struct btrfs_root *root,
    3348             :                            struct btrfs_path *path, int level)
    3349             : {
    3350             :         u64 lower_gen;
    3351          99 :         struct extent_buffer *lower;
    3352         495 :         struct extent_buffer *c;
    3353             :         struct extent_buffer *old;
    3354             :         struct btrfs_disk_key lower_key;
    3355             : 
    3356          99 :         BUG_ON(path->nodes[level]);
    3357          99 :         BUG_ON(path->nodes[level-1] != root->node);
    3358             : 
    3359             :         lower = path->nodes[level-1];
    3360          99 :         if (level == 1)
    3361             :                 btrfs_item_key(lower, &lower_key, 0);
    3362             :         else
    3363           3 :                 btrfs_node_key(lower, &lower_key, 0);
    3364             : 
    3365          99 :         c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
    3366             :                                    root->root_key.objectid, &lower_key,
    3367          99 :                                    level, root->node->start, 0);
    3368          99 :         if (IS_ERR(c))
    3369           0 :                 return PTR_ERR(c);
    3370             : 
    3371          99 :         root_add_used(root, root->nodesize);
    3372             : 
    3373          99 :         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
    3374             :         btrfs_set_header_nritems(c, 1);
    3375          99 :         btrfs_set_header_level(c, level);
    3376          99 :         btrfs_set_header_bytenr(c, c->start);
    3377          99 :         btrfs_set_header_generation(c, trans->transid);
    3378             :         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
    3379          99 :         btrfs_set_header_owner(c, root->root_key.objectid);
    3380             : 
    3381          99 :         write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
    3382             :                             BTRFS_FSID_SIZE);
    3383             : 
    3384          99 :         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
    3385             :                             btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
    3386             : 
    3387             :         btrfs_set_node_key(c, &lower_key, 0);
    3388          99 :         btrfs_set_node_blockptr(c, 0, lower->start);
    3389             :         lower_gen = btrfs_header_generation(lower);
    3390          99 :         WARN_ON(lower_gen != trans->transid);
    3391             : 
    3392             :         btrfs_set_node_ptr_generation(c, 0, lower_gen);
    3393             : 
    3394          99 :         btrfs_mark_buffer_dirty(c);
    3395             : 
    3396          99 :         old = root->node;
    3397          99 :         tree_mod_log_set_root_pointer(root, c, 0);
    3398          99 :         rcu_assign_pointer(root->node, c);
    3399             : 
    3400             :         /* the super has an extra ref to root->node */
    3401          99 :         free_extent_buffer(old);
    3402             : 
    3403          99 :         add_root_to_dirty_list(root);
    3404             :         extent_buffer_get(c);
    3405          99 :         path->nodes[level] = c;
    3406          99 :         path->locks[level] = BTRFS_WRITE_LOCK;
    3407          99 :         path->slots[level] = 0;
    3408          99 :         return 0;
    3409             : }
    3410             : 
    3411             : /*
    3412             :  * worker function to insert a single pointer in a node.
    3413             :  * the node should have enough room for the pointer already
    3414             :  *
    3415             :  * slot and level indicate where you want the key to go, and
    3416             :  * blocknr is the block the key points to.
    3417             :  */
    3418        5312 : static void insert_ptr(struct btrfs_trans_handle *trans,
    3419             :                        struct btrfs_root *root, struct btrfs_path *path,
    3420             :                        struct btrfs_disk_key *key, u64 bytenr,
    3421             :                        int slot, int level)
    3422             : {
    3423       10624 :         struct extent_buffer *lower;
    3424             :         int nritems;
    3425             :         int ret;
    3426             : 
    3427        5312 :         BUG_ON(!path->nodes[level]);
    3428        5312 :         btrfs_assert_tree_locked(path->nodes[level]);
    3429        5312 :         lower = path->nodes[level];
    3430        5312 :         nritems = btrfs_header_nritems(lower);
    3431        5312 :         BUG_ON(slot > nritems);
    3432        5312 :         BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
    3433        5312 :         if (slot != nritems) {
    3434        3242 :                 if (level)
    3435        3242 :                         tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
    3436             :                                              slot, nritems - slot);
    3437        9726 :                 memmove_extent_buffer(lower,
    3438             :                               btrfs_node_key_ptr_offset(slot + 1),
    3439             :                               btrfs_node_key_ptr_offset(slot),
    3440        3242 :                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
    3441             :         }
    3442        5312 :         if (level) {
    3443        5312 :                 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
    3444             :                                               MOD_LOG_KEY_ADD, GFP_NOFS);
    3445        5312 :                 BUG_ON(ret < 0);
    3446             :         }
    3447             :         btrfs_set_node_key(lower, key, slot);
    3448             :         btrfs_set_node_blockptr(lower, slot, bytenr);
    3449        5312 :         WARN_ON(trans->transid == 0);
    3450        5312 :         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
    3451        5312 :         btrfs_set_header_nritems(lower, nritems + 1);
    3452        5312 :         btrfs_mark_buffer_dirty(lower);
    3453        5312 : }
    3454             : 
    3455             : /*
    3456             :  * split the node at the specified level in path in two.
    3457             :  * The path is corrected to point to the appropriate node after the split
    3458             :  *
    3459             :  * Before splitting this tries to make some room in the node by pushing
    3460             :  * left and right, if either one works, it returns right away.
    3461             :  *
    3462             :  * returns 0 on success and < 0 on failure
    3463             :  */
    3464         180 : static noinline int split_node(struct btrfs_trans_handle *trans,
    3465             :                                struct btrfs_root *root,
    3466             :                                struct btrfs_path *path, int level)
    3467             : {
    3468         438 :         struct extent_buffer *c;
    3469         160 :         struct extent_buffer *split;
    3470             :         struct btrfs_disk_key disk_key;
    3471             :         int mid;
    3472             :         int ret;
    3473             :         u32 c_nritems;
    3474             : 
    3475         180 :         c = path->nodes[level];
    3476         180 :         WARN_ON(btrfs_header_generation(c) != trans->transid);
    3477         180 :         if (c == root->node) {
    3478             :                 /*
    3479             :                  * trying to split the root, lets make a new one
    3480             :                  *
    3481             :                  * tree mod log: We don't log_removal old root in
    3482             :                  * insert_new_root, because that root buffer will be kept as a
    3483             :                  * normal node. We are going to log removal of half of the
    3484             :                  * elements below with tree_mod_log_eb_copy. We're holding a
    3485             :                  * tree lock on the buffer, which is why we cannot race with
    3486             :                  * other tree_mod_log users.
    3487             :                  */
    3488           3 :                 ret = insert_new_root(trans, root, path, level + 1);
    3489           3 :                 if (ret)
    3490             :                         return ret;
    3491             :         } else {
    3492         177 :                 ret = push_nodes_for_insert(trans, root, path, level);
    3493         177 :                 c = path->nodes[level];
    3494         501 :                 if (!ret && btrfs_header_nritems(c) <
    3495         162 :                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
    3496             :                         return 0;
    3497          29 :                 if (ret < 0)
    3498             :                         return ret;
    3499             :         }
    3500             : 
    3501             :         c_nritems = btrfs_header_nritems(c);
    3502          32 :         mid = (c_nritems + 1) / 2;
    3503          32 :         btrfs_node_key(c, &disk_key, mid);
    3504             : 
    3505          32 :         split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
    3506             :                                         root->root_key.objectid,
    3507             :                                         &disk_key, level, c->start, 0);
    3508          32 :         if (IS_ERR(split))
    3509           0 :                 return PTR_ERR(split);
    3510             : 
    3511          32 :         root_add_used(root, root->nodesize);
    3512             : 
    3513          32 :         memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
    3514             :         btrfs_set_header_level(split, btrfs_header_level(c));
    3515          32 :         btrfs_set_header_bytenr(split, split->start);
    3516          32 :         btrfs_set_header_generation(split, trans->transid);
    3517             :         btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
    3518          32 :         btrfs_set_header_owner(split, root->root_key.objectid);
    3519          32 :         write_extent_buffer(split, root->fs_info->fsid,
    3520             :                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
    3521          32 :         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
    3522             :                             btrfs_header_chunk_tree_uuid(split),
    3523             :                             BTRFS_UUID_SIZE);
    3524             : 
    3525          32 :         ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
    3526          32 :                                    mid, c_nritems - mid);
    3527          32 :         if (ret) {
    3528           0 :                 btrfs_abort_transaction(trans, root, ret);
    3529           0 :                 return ret;
    3530             :         }
    3531          64 :         copy_extent_buffer(split, c,
    3532             :                            btrfs_node_key_ptr_offset(0),
    3533             :                            btrfs_node_key_ptr_offset(mid),
    3534             :                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
    3535             :         btrfs_set_header_nritems(split, c_nritems - mid);
    3536             :         btrfs_set_header_nritems(c, mid);
    3537             :         ret = 0;
    3538             : 
    3539          32 :         btrfs_mark_buffer_dirty(c);
    3540          32 :         btrfs_mark_buffer_dirty(split);
    3541             : 
    3542          32 :         insert_ptr(trans, root, path, &disk_key, split->start,
    3543          32 :                    path->slots[level + 1] + 1, level + 1);
    3544             : 
    3545          32 :         if (path->slots[level] >= mid) {
    3546          26 :                 path->slots[level] -= mid;
    3547          26 :                 btrfs_tree_unlock(c);
    3548          26 :                 free_extent_buffer(c);
    3549          26 :                 path->nodes[level] = split;
    3550          26 :                 path->slots[level + 1] += 1;
    3551             :         } else {
    3552           6 :                 btrfs_tree_unlock(split);
    3553           6 :                 free_extent_buffer(split);
    3554             :         }
    3555             :         return ret;
    3556             : }
    3557             : 
    3558             : /*
    3559             :  * how many bytes are required to store the items in a leaf.  start
    3560             :  * and nr indicate which items in the leaf to check.  This totals up the
    3561             :  * space used both by the item structs and the item data
    3562             :  */
    3563     1472520 : static int leaf_space_used(struct extent_buffer *l, int start, int nr)
    3564             : {
    3565             :         struct btrfs_item *start_item;
    3566             :         struct btrfs_item *end_item;
    3567             :         struct btrfs_map_token token;
    3568             :         int data_len;
    3569     1472520 :         int nritems = btrfs_header_nritems(l);
    3570     1472520 :         int end = min(nritems, start + nr) - 1;
    3571             : 
    3572     1472520 :         if (!nr)
    3573             :                 return 0;
    3574             :         btrfs_init_map_token(&token);
    3575             :         start_item = btrfs_item_nr(start);
    3576             :         end_item = btrfs_item_nr(end);
    3577     1469729 :         data_len = btrfs_token_item_offset(l, start_item, &token) +
    3578             :                 btrfs_token_item_size(l, start_item, &token);
    3579     1469192 :         data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
    3580     1469192 :         data_len += sizeof(struct btrfs_item) * nr;
    3581     1469192 :         WARN_ON(data_len < 0);
    3582     1469233 :         return data_len;
    3583             : }
    3584             : 
    3585             : /*
    3586             :  * The space between the end of the leaf items and
    3587             :  * the start of the leaf data.  IOW, how much room
    3588             :  * the leaf has left for both items and data
    3589             :  */
    3590     1340412 : noinline int btrfs_leaf_free_space(struct btrfs_root *root,
    3591     1340412 :                                    struct extent_buffer *leaf)
    3592             : {
    3593     1340412 :         int nritems = btrfs_header_nritems(leaf);
    3594             :         int ret;
    3595     1340412 :         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
    3596     1339636 :         if (ret < 0) {
    3597           0 :                 btrfs_crit(root->fs_info,
    3598             :                         "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
    3599             :                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
    3600             :                        leaf_space_used(leaf, 0, nritems), nritems);
    3601             :         }
    3602     1339636 :         return ret;
    3603             : }
    3604             : 
    3605             : /*
    3606             :  * min slot controls the lowest index we're willing to push to the
    3607             :  * right.  We'll push up to and including min_slot, but no lower
    3608             :  */
    3609        5925 : static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
    3610             :                                       struct btrfs_root *root,
    3611             :                                       struct btrfs_path *path,
    3612             :                                       int data_size, int empty,
    3613        8795 :                                       struct extent_buffer *right,
    3614             :                                       int free_space, u32 left_nritems,
    3615             :                                       u32 min_slot)
    3616             : {
    3617       10322 :         struct extent_buffer *left = path->nodes[0];
    3618        5925 :         struct extent_buffer *upper = path->nodes[1];
    3619             :         struct btrfs_map_token token;
    3620             :         struct btrfs_disk_key disk_key;
    3621             :         int slot;
    3622             :         u32 i;
    3623             :         int push_space = 0;
    3624             :         int push_items = 0;
    3625             :         struct btrfs_item *item;
    3626             :         u32 nr;
    3627             :         u32 right_nritems;
    3628             :         u32 data_end;
    3629             :         u32 this_item_size;
    3630             : 
    3631             :         btrfs_init_map_token(&token);
    3632             : 
    3633        5925 :         if (empty)
    3634             :                 nr = 0;
    3635             :         else
    3636        4988 :                 nr = max_t(u32, 1, min_slot);
    3637             : 
    3638        5925 :         if (path->slots[0] >= left_nritems)
    3639             :                 push_space += data_size;
    3640             : 
    3641        5925 :         slot = path->slots[1];
    3642        5925 :         i = left_nritems - 1;
    3643      120382 :         while (i >= nr) {
    3644      114453 :                 item = btrfs_item_nr(i);
    3645             : 
    3646      114453 :                 if (!empty && push_items > 0) {
    3647      101420 :                         if (path->slots[0] > i)
    3648             :                                 break;
    3649      100659 :                         if (path->slots[0] == i) {
    3650        1249 :                                 int space = btrfs_leaf_free_space(root, left);
    3651        1249 :                                 if (space + push_space * 2 > free_space)
    3652             :                                         break;
    3653             :                         }
    3654             :                 }
    3655             : 
    3656      113137 :                 if (path->slots[0] == i)
    3657        1158 :                         push_space += data_size;
    3658             : 
    3659             :                 this_item_size = btrfs_item_size(left, item);
    3660      113137 :                 if (this_item_size + sizeof(*item) + push_space > free_space)
    3661             :                         break;
    3662             : 
    3663      108846 :                 push_items++;
    3664      108846 :                 push_space += this_item_size + sizeof(*item);
    3665      108846 :                 if (i == 0)
    3666             :                         break;
    3667      108532 :                 i--;
    3668             :         }
    3669             : 
    3670        5925 :         if (push_items == 0)
    3671             :                 goto out_unlock;
    3672             : 
    3673        4398 :         WARN_ON(!empty && push_items == left_nritems);
    3674             : 
    3675             :         /* push left to right */
    3676             :         right_nritems = btrfs_header_nritems(right);
    3677             : 
    3678        4398 :         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
    3679        4398 :         push_space -= leaf_data_end(root, left);
    3680             : 
    3681             :         /* make room in the right data area */
    3682        4398 :         data_end = leaf_data_end(root, right);
    3683        4398 :         memmove_extent_buffer(right,
    3684             :                               btrfs_leaf_data(right) + data_end - push_space,
    3685             :                               btrfs_leaf_data(right) + data_end,
    3686        4398 :                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
    3687             : 
    3688             :         /* copy from the left data area */
    3689       13194 :         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
    3690        8796 :                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
    3691        4398 :                      btrfs_leaf_data(left) + leaf_data_end(root, left),
    3692             :                      push_space);
    3693             : 
    3694        8796 :         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
    3695             :                               btrfs_item_nr_offset(0),
    3696             :                               right_nritems * sizeof(struct btrfs_item));
    3697             : 
    3698             :         /* copy the items from left to right */
    3699        4398 :         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
    3700             :                    btrfs_item_nr_offset(left_nritems - push_items),
    3701             :                    push_items * sizeof(struct btrfs_item));
    3702             : 
    3703             :         /* update the item pointers */
    3704        4397 :         right_nritems += push_items;
    3705             :         btrfs_set_header_nritems(right, right_nritems);
    3706        4397 :         push_space = BTRFS_LEAF_DATA_SIZE(root);
    3707      442960 :         for (i = 0; i < right_nritems; i++) {
    3708      438563 :                 item = btrfs_item_nr(i);
    3709      877126 :                 push_space -= btrfs_token_item_size(right, item, &token);
    3710             :                 btrfs_set_token_item_offset(right, item, push_space, &token);
    3711             :         }
    3712             : 
    3713             :         left_nritems -= push_items;
    3714             :         btrfs_set_header_nritems(left, left_nritems);
    3715             : 
    3716        4397 :         if (left_nritems)
    3717        4083 :                 btrfs_mark_buffer_dirty(left);
    3718             :         else
    3719         314 :                 clean_tree_block(trans, root, left);
    3720             : 
    3721        4398 :         btrfs_mark_buffer_dirty(right);
    3722             : 
    3723             :         btrfs_item_key(right, &disk_key, 0);
    3724        4398 :         btrfs_set_node_key(upper, &disk_key, slot + 1);
    3725        4397 :         btrfs_mark_buffer_dirty(upper);
    3726             : 
    3727             :         /* then fixup the leaf pointer in the path */
    3728        4398 :         if (path->slots[0] >= left_nritems) {
    3729        1164 :                 path->slots[0] -= left_nritems;
    3730        2328 :                 if (btrfs_header_nritems(path->nodes[0]) == 0)
    3731         314 :                         clean_tree_block(trans, root, path->nodes[0]);
    3732        1164 :                 btrfs_tree_unlock(path->nodes[0]);
    3733        1164 :                 free_extent_buffer(path->nodes[0]);
    3734        1164 :                 path->nodes[0] = right;
    3735        1164 :                 path->slots[1] += 1;
    3736             :         } else {
    3737        3234 :                 btrfs_tree_unlock(right);
    3738        3234 :                 free_extent_buffer(right);
    3739             :         }
    3740             :         return 0;
    3741             : 
    3742             : out_unlock:
    3743        1527 :         btrfs_tree_unlock(right);
    3744        1527 :         free_extent_buffer(right);
    3745        1527 :         return 1;
    3746             : }
    3747             : 
    3748             : /*
    3749             :  * push some data in the path leaf to the right, trying to free up at
    3750             :  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
    3751             :  *
    3752             :  * returns 1 if the push failed because the other node didn't have enough
    3753             :  * room, 0 if everything worked out and < 0 if there were major errors.
    3754             :  *
    3755             :  * this will push starting from min_slot to the end of the leaf.  It won't
    3756             :  * push any slot lower than min_slot
    3757             :  */
    3758       25319 : static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
    3759             :                            *root, struct btrfs_path *path,
    3760             :                            int min_data_size, int data_size,
    3761             :                            int empty, u32 min_slot)
    3762             : {
    3763       31370 :         struct extent_buffer *left = path->nodes[0];
    3764             :         struct extent_buffer *right;
    3765       14758 :         struct extent_buffer *upper;
    3766             :         int slot;
    3767             :         int free_space;
    3768             :         u32 left_nritems;
    3769             :         int ret;
    3770             : 
    3771       25319 :         if (!path->nodes[1])
    3772             :                 return 1;
    3773             : 
    3774       14758 :         slot = path->slots[1];
    3775             :         upper = path->nodes[1];
    3776       29516 :         if (slot >= btrfs_header_nritems(upper) - 1)
    3777             :                 return 1;
    3778             : 
    3779        9422 :         btrfs_assert_tree_locked(path->nodes[1]);
    3780             : 
    3781        9422 :         right = read_node_slot(root, upper, slot + 1);
    3782        9422 :         if (right == NULL)
    3783             :                 return 1;
    3784             : 
    3785        9422 :         btrfs_tree_lock(right);
    3786        9422 :         btrfs_set_lock_blocking(right);
    3787             : 
    3788        9422 :         free_space = btrfs_leaf_free_space(root, right);
    3789        9422 :         if (free_space < data_size)
    3790             :                 goto out_unlock;
    3791             : 
    3792             :         /* cow and double check */
    3793        6051 :         ret = btrfs_cow_block(trans, root, right, upper,
    3794             :                               slot + 1, &right);
    3795        6051 :         if (ret)
    3796             :                 goto out_unlock;
    3797             : 
    3798        6051 :         free_space = btrfs_leaf_free_space(root, right);
    3799        6051 :         if (free_space < data_size)
    3800             :                 goto out_unlock;
    3801             : 
    3802             :         left_nritems = btrfs_header_nritems(left);
    3803        6051 :         if (left_nritems == 0)
    3804             :                 goto out_unlock;
    3805             : 
    3806        6051 :         if (path->slots[0] == left_nritems && !empty) {
    3807             :                 /* Key greater than all keys in the leaf, right neighbor has
    3808             :                  * enough room for it and we're not emptying our leaf to delete
    3809             :                  * it, therefore use right neighbor to insert the new item and
    3810             :                  * no need to touch/dirty our left leaft. */
    3811         126 :                 btrfs_tree_unlock(left);
    3812         126 :                 free_extent_buffer(left);
    3813         126 :                 path->nodes[0] = right;
    3814         126 :                 path->slots[0] = 0;
    3815         126 :                 path->slots[1]++;
    3816         126 :                 return 0;
    3817             :         }
    3818             : 
    3819        5925 :         return __push_leaf_right(trans, root, path, min_data_size, empty,
    3820             :                                 right, free_space, left_nritems, min_slot);
    3821             : out_unlock:
    3822        3371 :         btrfs_tree_unlock(right);
    3823        3371 :         free_extent_buffer(right);
    3824        3371 :         return 1;
    3825             : }
    3826             : 
    3827             : /*
    3828             :  * push some data in the path leaf to the left, trying to free up at
    3829             :  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
    3830             :  *
    3831             :  * max_slot can put a limit on how far into the leaf we'll push items.  The
    3832             :  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
    3833             :  * items
    3834             :  */
    3835        7942 : static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
    3836             :                                      struct btrfs_root *root,
    3837             :                                      struct btrfs_path *path, int data_size,
    3838       15128 :                                      int empty, struct extent_buffer *left,
    3839             :                                      int free_space, u32 right_nritems,
    3840             :                                      u32 max_slot)
    3841             : {
    3842             :         struct btrfs_disk_key disk_key;
    3843       20842 :         struct extent_buffer *right = path->nodes[0];
    3844             :         int i;
    3845             :         int push_space = 0;
    3846             :         int push_items = 0;
    3847             :         struct btrfs_item *item;
    3848             :         u32 old_left_nritems;
    3849             :         u32 nr;
    3850             :         int ret = 0;
    3851             :         u32 this_item_size;
    3852             :         u32 old_left_item_size;
    3853             :         struct btrfs_map_token token;
    3854             : 
    3855             :         btrfs_init_map_token(&token);
    3856             : 
    3857        7942 :         if (empty)
    3858        3080 :                 nr = min(right_nritems, max_slot);
    3859             :         else
    3860        4862 :                 nr = min(right_nritems - 1, max_slot);
    3861             : 
    3862      113871 :         for (i = 0; i < nr; i++) {
    3863             :                 item = btrfs_item_nr(i);
    3864             : 
    3865      112954 :                 if (!empty && push_items > 0) {
    3866       88912 :                         if (path->slots[0] < i)
    3867             :                                 break;
    3868       88345 :                         if (path->slots[0] == i) {
    3869         889 :                                 int space = btrfs_leaf_free_space(root, right);
    3870         889 :                                 if (space + push_space * 2 > free_space)
    3871             :                                         break;
    3872             :                         }
    3873             :                 }
    3874             : 
    3875      112132 :                 if (path->slots[0] == i)
    3876        1635 :                         push_space += data_size;
    3877             : 
    3878             :                 this_item_size = btrfs_item_size(right, item);
    3879      112131 :                 if (this_item_size + sizeof(*item) + push_space > free_space)
    3880             :                         break;
    3881             : 
    3882      105929 :                 push_items++;
    3883      105929 :                 push_space += this_item_size + sizeof(*item);
    3884             :         }
    3885             : 
    3886        7941 :         if (push_items == 0) {
    3887             :                 ret = 1;
    3888             :                 goto out;
    3889             :         }
    3890        8716 :         WARN_ON(!empty && push_items == btrfs_header_nritems(right));
    3891             : 
    3892             :         /* push data from right to left */
    3893       15126 :         copy_extent_buffer(left, right,
    3894             :                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
    3895             :                            btrfs_item_nr_offset(0),
    3896             :                            push_items * sizeof(struct btrfs_item));
    3897             : 
    3898       10086 :         push_space = BTRFS_LEAF_DATA_SIZE(root) -
    3899        5043 :                      btrfs_item_offset_nr(right, push_items - 1);
    3900             : 
    3901       10086 :         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
    3902        5043 :                      leaf_data_end(root, left) - push_space,
    3903             :                      btrfs_leaf_data(right) +
    3904             :                      btrfs_item_offset_nr(right, push_items - 1),
    3905             :                      push_space);
    3906             :         old_left_nritems = btrfs_header_nritems(left);
    3907        5043 :         BUG_ON(old_left_nritems <= 0);
    3908             : 
    3909        5043 :         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
    3910      110984 :         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
    3911             :                 u32 ioff;
    3912             : 
    3913             :                 item = btrfs_item_nr(i);
    3914             : 
    3915             :                 ioff = btrfs_token_item_offset(left, item, &token);
    3916      105941 :                 btrfs_set_token_item_offset(left, item,
    3917      105941 :                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
    3918             :                       &token);
    3919             :         }
    3920             :         btrfs_set_header_nritems(left, old_left_nritems + push_items);
    3921             : 
    3922             :         /* fixup right node */
    3923        5043 :         if (push_items > right_nritems)
    3924           0 :                 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
    3925             :                        right_nritems);
    3926             : 
    3927        5043 :         if (push_items < right_nritems) {
    3928        4183 :                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
    3929        4183 :                                                   leaf_data_end(root, right);
    3930       12549 :                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
    3931        8366 :                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
    3932             :                                       btrfs_leaf_data(right) +
    3933        4183 :                                       leaf_data_end(root, right), push_space);
    3934             : 
    3935        8366 :                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
    3936             :                               btrfs_item_nr_offset(push_items),
    3937        4183 :                              (btrfs_header_nritems(right) - push_items) *
    3938             :                              sizeof(struct btrfs_item));
    3939             :         }
    3940        5043 :         right_nritems -= push_items;
    3941             :         btrfs_set_header_nritems(right, right_nritems);
    3942        5043 :         push_space = BTRFS_LEAF_DATA_SIZE(root);
    3943      289376 :         for (i = 0; i < right_nritems; i++) {
    3944             :                 item = btrfs_item_nr(i);
    3945             : 
    3946      568666 :                 push_space = push_space - btrfs_token_item_size(right,
    3947             :                                                                 item, &token);
    3948             :                 btrfs_set_token_item_offset(right, item, push_space, &token);
    3949             :         }
    3950             : 
    3951        5043 :         btrfs_mark_buffer_dirty(left);
    3952        5043 :         if (right_nritems)
    3953        4183 :                 btrfs_mark_buffer_dirty(right);
    3954             :         else
    3955         860 :                 clean_tree_block(trans, root, right);
    3956             : 
    3957             :         btrfs_item_key(right, &disk_key, 0);
    3958        5043 :         fixup_low_keys(root, path, &disk_key, 1);
    3959             : 
    3960             :         /* then fixup the leaf pointer in the path */
    3961        5043 :         if (path->slots[0] < push_items) {
    3962        1437 :                 path->slots[0] += old_left_nritems;
    3963        1437 :                 btrfs_tree_unlock(path->nodes[0]);
    3964        1437 :                 free_extent_buffer(path->nodes[0]);
    3965        1437 :                 path->nodes[0] = left;
    3966        1437 :                 path->slots[1] -= 1;
    3967             :         } else {
    3968        3606 :                 btrfs_tree_unlock(left);
    3969        3606 :                 free_extent_buffer(left);
    3970        3606 :                 path->slots[0] -= push_items;
    3971             :         }
    3972        5043 :         BUG_ON(path->slots[0] < 0);
    3973             :         return ret;
    3974             : out:
    3975        2899 :         btrfs_tree_unlock(left);
    3976        2899 :         free_extent_buffer(left);
    3977        2899 :         return ret;
    3978             : }
    3979             : 
    3980             : /*
    3981             :  * push some data in the path leaf to the left, trying to free up at
    3982             :  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
    3983             :  *
    3984             :  * max_slot can put a limit on how far into the leaf we'll push items.  The
    3985             :  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
    3986             :  * items
    3987             :  */
    3988       22351 : static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
    3989             :                           *root, struct btrfs_path *path, int min_data_size,
    3990             :                           int data_size, int empty, u32 max_slot)
    3991             : {
    3992       33615 :         struct extent_buffer *right = path->nodes[0];
    3993             :         struct extent_buffer *left;
    3994             :         int slot;
    3995             :         int free_space;
    3996             :         u32 right_nritems;
    3997             :         int ret = 0;
    3998             : 
    3999       22351 :         slot = path->slots[1];
    4000       22351 :         if (slot == 0)
    4001             :                 return 1;
    4002       11264 :         if (!path->nodes[1])
    4003             :                 return 1;
    4004             : 
    4005             :         right_nritems = btrfs_header_nritems(right);
    4006       11264 :         if (right_nritems == 0)
    4007             :                 return 1;
    4008             : 
    4009       11264 :         btrfs_assert_tree_locked(path->nodes[1]);
    4010             : 
    4011       11264 :         left = read_node_slot(root, path->nodes[1], slot - 1);
    4012       11264 :         if (left == NULL)
    4013             :                 return 1;
    4014             : 
    4015       11264 :         btrfs_tree_lock(left);
    4016       11264 :         btrfs_set_lock_blocking(left);
    4017             : 
    4018       11264 :         free_space = btrfs_leaf_free_space(root, left);
    4019       11264 :         if (free_space < data_size) {
    4020             :                 ret = 1;
    4021             :                 goto out;
    4022             :         }
    4023             : 
    4024             :         /* cow and double check */
    4025        7942 :         ret = btrfs_cow_block(trans, root, left,
    4026             :                               path->nodes[1], slot - 1, &left);
    4027        7942 :         if (ret) {
    4028             :                 /* we hit -ENOSPC, but it isn't fatal here */
    4029           0 :                 if (ret == -ENOSPC)
    4030             :                         ret = 1;
    4031             :                 goto out;
    4032             :         }
    4033             : 
    4034        7942 :         free_space = btrfs_leaf_free_space(root, left);
    4035        7942 :         if (free_space < data_size) {
    4036             :                 ret = 1;
    4037             :                 goto out;
    4038             :         }
    4039             : 
    4040        7942 :         return __push_leaf_left(trans, root, path, min_data_size,
    4041             :                                empty, left, free_space, right_nritems,
    4042             :                                max_slot);
    4043             : out:
    4044        3322 :         btrfs_tree_unlock(left);
    4045        3322 :         free_extent_buffer(left);
    4046        3322 :         return ret;
    4047             : }
    4048             : 
    4049             : /*
    4050             :  * split the path's leaf in two, making sure there is at least data_size
    4051             :  * available for the resulting leaf level of the path.
    4052             :  */
    4053        4137 : static noinline void copy_for_split(struct btrfs_trans_handle *trans,
    4054             :                                     struct btrfs_root *root,
    4055             :                                     struct btrfs_path *path,
    4056        4137 :                                     struct extent_buffer *l,
    4057        4137 :                                     struct extent_buffer *right,
    4058             :                                     int slot, int mid, int nritems)
    4059             : {
    4060             :         int data_copy_size;
    4061             :         int rt_data_off;
    4062             :         int i;
    4063             :         struct btrfs_disk_key disk_key;
    4064             :         struct btrfs_map_token token;
    4065             : 
    4066             :         btrfs_init_map_token(&token);
    4067             : 
    4068        4137 :         nritems = nritems - mid;
    4069        4137 :         btrfs_set_header_nritems(right, nritems);
    4070        4137 :         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
    4071             : 
    4072        4137 :         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
    4073             :                            btrfs_item_nr_offset(mid),
    4074             :                            nritems * sizeof(struct btrfs_item));
    4075             : 
    4076        8274 :         copy_extent_buffer(right, l,
    4077        4137 :                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
    4078             :                      data_copy_size, btrfs_leaf_data(l) +
    4079        4137 :                      leaf_data_end(root, l), data_copy_size);
    4080             : 
    4081        8275 :         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
    4082             :                       btrfs_item_end_nr(l, mid);
    4083             : 
    4084      140992 :         for (i = 0; i < nritems; i++) {
    4085             :                 struct btrfs_item *item = btrfs_item_nr(i);
    4086             :                 u32 ioff;
    4087             : 
    4088             :                 ioff = btrfs_token_item_offset(right, item, &token);
    4089      136854 :                 btrfs_set_token_item_offset(right, item,
    4090             :                                             ioff + rt_data_off, &token);
    4091             :         }
    4092             : 
    4093        4137 :         btrfs_set_header_nritems(l, mid);
    4094             :         btrfs_item_key(right, &disk_key, 0);
    4095        4137 :         insert_ptr(trans, root, path, &disk_key, right->start,
    4096        4137 :                    path->slots[1] + 1, 1);
    4097             : 
    4098        4137 :         btrfs_mark_buffer_dirty(right);
    4099        4137 :         btrfs_mark_buffer_dirty(l);
    4100        4137 :         BUG_ON(path->slots[0] != slot);
    4101             : 
    4102        4137 :         if (mid <= slot) {
    4103        3295 :                 btrfs_tree_unlock(path->nodes[0]);
    4104        3295 :                 free_extent_buffer(path->nodes[0]);
    4105        3295 :                 path->nodes[0] = right;
    4106        3295 :                 path->slots[0] -= mid;
    4107        3295 :                 path->slots[1] += 1;
    4108             :         } else {
    4109         842 :                 btrfs_tree_unlock(right);
    4110         842 :                 free_extent_buffer(right);
    4111             :         }
    4112             : 
    4113        4137 :         BUG_ON(path->slots[0] < 0);
    4114        4137 : }
    4115             : 
    4116             : /*
    4117             :  * double splits happen when we need to insert a big item in the middle
    4118             :  * of a leaf.  A double split can leave us with 3 mostly empty leaves:
    4119             :  * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
    4120             :  *          A                 B                 C
    4121             :  *
    4122             :  * We avoid this by trying to push the items on either side of our target
    4123             :  * into the adjacent leaves.  If all goes well we can avoid the double split
    4124             :  * completely.
    4125             :  */
    4126         946 : static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
    4127             :                                           struct btrfs_root *root,
    4128             :                                           struct btrfs_path *path,
    4129             :                                           int data_size)
    4130             : {
    4131             :         int ret;
    4132             :         int progress = 0;
    4133             :         int slot;
    4134             :         u32 nritems;
    4135             :         int space_needed = data_size;
    4136             : 
    4137         946 :         slot = path->slots[0];
    4138        2838 :         if (slot < btrfs_header_nritems(path->nodes[0]))
    4139         946 :                 space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
    4140             : 
    4141             :         /*
    4142             :          * try to push all the items after our slot into the
    4143             :          * right leaf
    4144             :          */
    4145         946 :         ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
    4146         946 :         if (ret < 0)
    4147             :                 return ret;
    4148             : 
    4149         946 :         if (ret == 0)
    4150             :                 progress++;
    4151             : 
    4152         946 :         nritems = btrfs_header_nritems(path->nodes[0]);
    4153             :         /*
    4154             :          * our goal is to get our slot at the start or end of a leaf.  If
    4155             :          * we've done so we're done
    4156             :          */
    4157         946 :         if (path->slots[0] == 0 || path->slots[0] == nritems)
    4158             :                 return 0;
    4159             : 
    4160         943 :         if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
    4161             :                 return 0;
    4162             : 
    4163             :         /* try to push all the items before our slot into the next leaf */
    4164         943 :         slot = path->slots[0];
    4165         943 :         ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
    4166         943 :         if (ret < 0)
    4167             :                 return ret;
    4168             : 
    4169         943 :         if (ret == 0)
    4170           0 :                 progress++;
    4171             : 
    4172         943 :         if (progress)
    4173             :                 return 0;
    4174         943 :         return 1;
    4175             : }
    4176             : 
    4177             : /*
    4178             :  * split the path's leaf in two, making sure there is at least data_size
    4179             :  * available for the resulting leaf level of the path.
    4180             :  *
    4181             :  * returns 0 if all went well and < 0 on failure.
    4182             :  */
    4183       13447 : static noinline int split_leaf(struct btrfs_trans_handle *trans,
    4184             :                                struct btrfs_root *root,
    4185             :                                struct btrfs_key *ins_key,
    4186             :                                struct btrfs_path *path, int data_size,
    4187             :                                int extend)
    4188             : {
    4189             :         struct btrfs_disk_key disk_key;
    4190       17649 :         struct extent_buffer *l;
    4191             :         u32 nritems;
    4192             :         int mid;
    4193             :         int slot;
    4194       22263 :         struct extent_buffer *right;
    4195             :         int ret = 0;
    4196             :         int wret;
    4197             :         int split;
    4198             :         int num_doubles = 0;
    4199             :         int tried_avoid_double = 0;
    4200             : 
    4201       13447 :         l = path->nodes[0];
    4202       13447 :         slot = path->slots[0];
    4203       17875 :         if (extend && data_size + btrfs_item_size_nr(l, slot) +
    4204        2214 :             sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
    4205             :                 return -EOVERFLOW;
    4206             : 
    4207             :         /* first try to make some room by pushing left and right */
    4208       11519 :         if (data_size && path->nodes[1]) {
    4209             :                 int space_needed = data_size;
    4210             : 
    4211       22846 :                 if (slot < btrfs_header_nritems(l))
    4212        8627 :                         space_needed -= btrfs_leaf_free_space(root, l);
    4213             : 
    4214       11423 :                 wret = push_leaf_right(trans, root, path, space_needed,
    4215             :                                        space_needed, 0, 0);
    4216       11423 :                 if (wret < 0)
    4217             :                         return wret;
    4218       11423 :                 if (wret) {
    4219        7384 :                         wret = push_leaf_left(trans, root, path, space_needed,
    4220             :                                               space_needed, 0, (u32)-1);
    4221        7384 :                         if (wret < 0)
    4222             :                                 return wret;
    4223             :                 }
    4224       11423 :                 l = path->nodes[0];
    4225             : 
    4226             :                 /* did the pushes work? */
    4227       11423 :                 if (btrfs_leaf_free_space(root, l) >= data_size)
    4228             :                         return 0;
    4229             :         }
    4230             : 
    4231        4337 :         if (!path->nodes[1]) {
    4232          96 :                 ret = insert_new_root(trans, root, path, 1);
    4233          96 :                 if (ret)
    4234             :                         return ret;
    4235             :         }
    4236             : again:
    4237             :         split = 1;
    4238        6226 :         l = path->nodes[0];
    4239        6226 :         slot = path->slots[0];
    4240             :         nritems = btrfs_header_nritems(l);
    4241        6226 :         mid = (nritems + 1) / 2;
    4242             : 
    4243        6226 :         if (mid <= slot) {
    4244        5390 :                 if (nritems == 1 ||
    4245        2678 :                     leaf_space_used(l, mid, nritems - mid) + data_size >
    4246        2678 :                         BTRFS_LEAF_DATA_SIZE(root)) {
    4247         504 :                         if (slot >= nritems) {
    4248             :                                 split = 0;
    4249             :                         } else {
    4250             :                                 mid = slot;
    4251         652 :                                 if (mid != nritems &&
    4252         326 :                                     leaf_space_used(l, mid, nritems - mid) +
    4253         326 :                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
    4254         293 :                                         if (data_size && !tried_avoid_double)
    4255             :                                                 goto push_for_double;
    4256             :                                         split = 2;
    4257             :                                 }
    4258             :                         }
    4259             :                 }
    4260             :         } else {
    4261        7028 :                 if (leaf_space_used(l, 0, mid) + data_size >
    4262        3514 :                         BTRFS_LEAF_DATA_SIZE(root)) {
    4263        2672 :                         if (!extend && data_size && slot == 0) {
    4264             :                                 split = 0;
    4265        1707 :                         } else if ((extend || !data_size) && slot == 0) {
    4266             :                                 mid = 1;
    4267             :                         } else {
    4268             :                                 mid = slot;
    4269        3414 :                                 if (mid != nritems &&
    4270        1707 :                                     leaf_space_used(l, mid, nritems - mid) +
    4271        1707 :                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
    4272        1596 :                                         if (data_size && !tried_avoid_double)
    4273             :                                                 goto push_for_double;
    4274             :                                         split = 2;
    4275             :                                 }
    4276             :                         }
    4277             :                 }
    4278             :         }
    4279             : 
    4280        5280 :         if (split == 0)
    4281             :                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
    4282             :         else
    4283             :                 btrfs_item_key(l, &disk_key, mid);
    4284             : 
    4285        5280 :         right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
    4286             :                                         root->root_key.objectid,
    4287             :                                         &disk_key, 0, l->start, 0);
    4288        5280 :         if (IS_ERR(right))
    4289           0 :                 return PTR_ERR(right);
    4290             : 
    4291        5280 :         root_add_used(root, root->leafsize);
    4292             : 
    4293        5280 :         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
    4294        5280 :         btrfs_set_header_bytenr(right, right->start);
    4295        5280 :         btrfs_set_header_generation(right, trans->transid);
    4296             :         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
    4297        5280 :         btrfs_set_header_owner(right, root->root_key.objectid);
    4298             :         btrfs_set_header_level(right, 0);
    4299        5280 :         write_extent_buffer(right, root->fs_info->fsid,
    4300             :                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
    4301             : 
    4302        5280 :         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
    4303             :                             btrfs_header_chunk_tree_uuid(right),
    4304             :                             BTRFS_UUID_SIZE);
    4305             : 
    4306        5280 :         if (split == 0) {
    4307        1143 :                 if (mid <= slot) {
    4308             :                         btrfs_set_header_nritems(right, 0);
    4309         178 :                         insert_ptr(trans, root, path, &disk_key, right->start,
    4310         178 :                                    path->slots[1] + 1, 1);
    4311         178 :                         btrfs_tree_unlock(path->nodes[0]);
    4312         178 :                         free_extent_buffer(path->nodes[0]);
    4313         178 :                         path->nodes[0] = right;
    4314         178 :                         path->slots[0] = 0;
    4315         178 :                         path->slots[1] += 1;
    4316             :                 } else {
    4317             :                         btrfs_set_header_nritems(right, 0);
    4318         965 :                         insert_ptr(trans, root, path, &disk_key, right->start,
    4319             :                                           path->slots[1], 1);
    4320         965 :                         btrfs_tree_unlock(path->nodes[0]);
    4321         965 :                         free_extent_buffer(path->nodes[0]);
    4322         965 :                         path->nodes[0] = right;
    4323         965 :                         path->slots[0] = 0;
    4324         965 :                         if (path->slots[1] == 0)
    4325           0 :                                 fixup_low_keys(root, path, &disk_key, 1);
    4326             :                 }
    4327        1143 :                 btrfs_mark_buffer_dirty(right);
    4328        1143 :                 return ret;
    4329             :         }
    4330             : 
    4331        4137 :         copy_for_split(trans, root, path, l, right, slot, mid, nritems);
    4332             : 
    4333        4137 :         if (split == 2) {
    4334         943 :                 BUG_ON(num_doubles != 0);
    4335         943 :                 num_doubles++;
    4336         943 :                 goto again;
    4337             :         }
    4338             : 
    4339             :         return 0;
    4340             : 
    4341             : push_for_double:
    4342         946 :         push_for_double_split(trans, root, path, data_size);
    4343             :         tried_avoid_double = 1;
    4344         946 :         if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
    4345             :                 return 0;
    4346             :         goto again;
    4347             : }
    4348             : 
    4349        3246 : static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
    4350             :                                          struct btrfs_root *root,
    4351             :                                          struct btrfs_path *path, int ins_len)
    4352             : {
    4353             :         struct btrfs_key key;
    4354             :         struct extent_buffer *leaf;
    4355             :         struct btrfs_file_extent_item *fi;
    4356             :         u64 extent_len = 0;
    4357             :         u32 item_size;
    4358             :         int ret;
    4359             : 
    4360        3246 :         leaf = path->nodes[0];
    4361        3246 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    4362             : 
    4363        3246 :         BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
    4364             :                key.type != BTRFS_EXTENT_CSUM_KEY);
    4365             : 
    4366        3246 :         if (btrfs_leaf_free_space(root, leaf) >= ins_len)
    4367             :                 return 0;
    4368             : 
    4369         125 :         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    4370         125 :         if (key.type == BTRFS_EXTENT_DATA_KEY) {
    4371         244 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    4372             :                                     struct btrfs_file_extent_item);
    4373             :                 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
    4374             :         }
    4375         125 :         btrfs_release_path(path);
    4376             : 
    4377         125 :         path->keep_locks = 1;
    4378         125 :         path->search_for_split = 1;
    4379         125 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    4380         125 :         path->search_for_split = 0;
    4381         125 :         if (ret < 0)
    4382             :                 goto err;
    4383             : 
    4384             :         ret = -EAGAIN;
    4385         125 :         leaf = path->nodes[0];
    4386             :         /* if our item isn't there or got smaller, return now */
    4387         250 :         if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
    4388             :                 goto err;
    4389             : 
    4390             :         /* the leaf has  changed, it now has room.  return now */
    4391         125 :         if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
    4392             :                 goto err;
    4393             : 
    4394         123 :         if (key.type == BTRFS_EXTENT_DATA_KEY) {
    4395         240 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    4396             :                                     struct btrfs_file_extent_item);
    4397         120 :                 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
    4398             :                         goto err;
    4399             :         }
    4400             : 
    4401         123 :         btrfs_set_path_blocking(path);
    4402         123 :         ret = split_leaf(trans, root, &key, path, ins_len, 1);
    4403         123 :         if (ret)
    4404             :                 goto err;
    4405             : 
    4406         123 :         path->keep_locks = 0;
    4407         123 :         btrfs_unlock_up_safe(path, 1);
    4408         123 :         return 0;
    4409             : err:
    4410           2 :         path->keep_locks = 0;
    4411           2 :         return ret;
    4412             : }
    4413             : 
    4414        1183 : static noinline int split_item(struct btrfs_trans_handle *trans,
    4415             :                                struct btrfs_root *root,
    4416             :                                struct btrfs_path *path,
    4417             :                                struct btrfs_key *new_key,
    4418             :                                unsigned long split_offset)
    4419             : {
    4420        2366 :         struct extent_buffer *leaf;
    4421             :         struct btrfs_item *item;
    4422             :         struct btrfs_item *new_item;
    4423             :         int slot;
    4424             :         char *buf;
    4425             :         u32 nritems;
    4426             :         u32 item_size;
    4427             :         u32 orig_offset;
    4428             :         struct btrfs_disk_key disk_key;
    4429             : 
    4430        1183 :         leaf = path->nodes[0];
    4431        1183 :         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
    4432             : 
    4433        1183 :         btrfs_set_path_blocking(path);
    4434             : 
    4435        1183 :         item = btrfs_item_nr(path->slots[0]);
    4436             :         orig_offset = btrfs_item_offset(leaf, item);
    4437             :         item_size = btrfs_item_size(leaf, item);
    4438             : 
    4439        1183 :         buf = kmalloc(item_size, GFP_NOFS);
    4440        1183 :         if (!buf)
    4441             :                 return -ENOMEM;
    4442             : 
    4443        2366 :         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
    4444             :                             path->slots[0]), item_size);
    4445             : 
    4446        1183 :         slot = path->slots[0] + 1;
    4447             :         nritems = btrfs_header_nritems(leaf);
    4448        1183 :         if (slot != nritems) {
    4449             :                 /* shift the items */
    4450        3498 :                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
    4451             :                                 btrfs_item_nr_offset(slot),
    4452        1166 :                                 (nritems - slot) * sizeof(struct btrfs_item));
    4453             :         }
    4454             : 
    4455             :         btrfs_cpu_key_to_disk(&disk_key, new_key);
    4456             :         btrfs_set_item_key(leaf, &disk_key, slot);
    4457             : 
    4458             :         new_item = btrfs_item_nr(slot);
    4459             : 
    4460             :         btrfs_set_item_offset(leaf, new_item, orig_offset);
    4461        1183 :         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
    4462             : 
    4463        1183 :         btrfs_set_item_offset(leaf, item,
    4464        1183 :                               orig_offset + item_size - split_offset);
    4465             :         btrfs_set_item_size(leaf, item, split_offset);
    4466             : 
    4467        1183 :         btrfs_set_header_nritems(leaf, nritems + 1);
    4468             : 
    4469             :         /* write the data for the start of the original item */
    4470        1183 :         write_extent_buffer(leaf, buf,
    4471        1183 :                             btrfs_item_ptr_offset(leaf, path->slots[0]),
    4472             :                             split_offset);
    4473             : 
    4474             :         /* write the data for the new item */
    4475        2366 :         write_extent_buffer(leaf, buf + split_offset,
    4476             :                             btrfs_item_ptr_offset(leaf, slot),
    4477             :                             item_size - split_offset);
    4478        1183 :         btrfs_mark_buffer_dirty(leaf);
    4479             : 
    4480        1183 :         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
    4481        1183 :         kfree(buf);
    4482             :         return 0;
    4483             : }
    4484             : 
    4485             : /*
    4486             :  * This function splits a single item into two items,
    4487             :  * giving 'new_key' to the new item and splitting the
    4488             :  * old one at split_offset (from the start of the item).
    4489             :  *
    4490             :  * The path may be released by this operation.  After
    4491             :  * the split, the path is pointing to the old item.  The
    4492             :  * new item is going to be in the same node as the old one.
    4493             :  *
    4494             :  * Note, the item being split must be smaller enough to live alone on
    4495             :  * a tree block with room for one extra struct btrfs_item
    4496             :  *
    4497             :  * This allows us to split the item in place, keeping a lock on the
    4498             :  * leaf the entire time.
    4499             :  */
    4500        1183 : int btrfs_split_item(struct btrfs_trans_handle *trans,
    4501             :                      struct btrfs_root *root,
    4502             :                      struct btrfs_path *path,
    4503             :                      struct btrfs_key *new_key,
    4504             :                      unsigned long split_offset)
    4505             : {
    4506             :         int ret;
    4507        1183 :         ret = setup_leaf_for_split(trans, root, path,
    4508             :                                    sizeof(struct btrfs_item));
    4509        1183 :         if (ret)
    4510             :                 return ret;
    4511             : 
    4512        1183 :         ret = split_item(trans, root, path, new_key, split_offset);
    4513        1183 :         return ret;
    4514             : }
    4515             : 
    4516             : /*
    4517             :  * This function duplicate a item, giving 'new_key' to the new item.
    4518             :  * It guarantees both items live in the same tree leaf and the new item
    4519             :  * is contiguous with the original item.
    4520             :  *
    4521             :  * This allows us to split file extent in place, keeping a lock on the
    4522             :  * leaf the entire time.
    4523             :  */
    4524        2063 : int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
    4525             :                          struct btrfs_root *root,
    4526             :                          struct btrfs_path *path,
    4527             :                          struct btrfs_key *new_key)
    4528             : {
    4529             :         struct extent_buffer *leaf;
    4530             :         int ret;
    4531             :         u32 item_size;
    4532             : 
    4533        2063 :         leaf = path->nodes[0];
    4534        4126 :         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    4535        2063 :         ret = setup_leaf_for_split(trans, root, path,
    4536             :                                    item_size + sizeof(struct btrfs_item));
    4537        2063 :         if (ret)
    4538             :                 return ret;
    4539             : 
    4540        2061 :         path->slots[0]++;
    4541        2061 :         setup_items_for_insert(root, path, new_key, &item_size,
    4542             :                                item_size, item_size +
    4543             :                                sizeof(struct btrfs_item), 1);
    4544        2061 :         leaf = path->nodes[0];
    4545        6183 :         memcpy_extent_buffer(leaf,
    4546        2061 :                              btrfs_item_ptr_offset(leaf, path->slots[0]),
    4547        2061 :                              btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
    4548             :                              item_size);
    4549        2061 :         return 0;
    4550             : }
    4551             : 
    4552             : /*
    4553             :  * make the item pointed to by the path smaller.  new_size indicates
    4554             :  * how small to make it, and from_end tells us if we just chop bytes
    4555             :  * off the end of the item or if we shift the item to chop bytes off
    4556             :  * the front.
    4557             :  */
    4558       16006 : void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
    4559             :                          u32 new_size, int from_end)
    4560             : {
    4561             :         int slot;
    4562       16006 :         struct extent_buffer *leaf;
    4563             :         struct btrfs_item *item;
    4564             :         u32 nritems;
    4565             :         unsigned int data_end;
    4566             :         unsigned int old_data_start;
    4567             :         unsigned int old_size;
    4568             :         unsigned int size_diff;
    4569             :         int i;
    4570             :         struct btrfs_map_token token;
    4571             : 
    4572             :         btrfs_init_map_token(&token);
    4573             : 
    4574       16006 :         leaf = path->nodes[0];
    4575       16006 :         slot = path->slots[0];
    4576             : 
    4577             :         old_size = btrfs_item_size_nr(leaf, slot);
    4578       16006 :         if (old_size == new_size)
    4579           0 :                 return;
    4580             : 
    4581             :         nritems = btrfs_header_nritems(leaf);
    4582       16006 :         data_end = leaf_data_end(root, leaf);
    4583             : 
    4584             :         old_data_start = btrfs_item_offset_nr(leaf, slot);
    4585             : 
    4586       16006 :         size_diff = old_size - new_size;
    4587             : 
    4588       16006 :         BUG_ON(slot < 0);
    4589       16006 :         BUG_ON(slot >= nritems);
    4590             : 
    4591             :         /*
    4592             :          * item0..itemN ... dataN.offset..dataN.size .. data0.size
    4593             :          */
    4594             :         /* first correct the data pointers */
    4595     1293333 :         for (i = slot; i < nritems; i++) {
    4596             :                 u32 ioff;
    4597             :                 item = btrfs_item_nr(i);
    4598             : 
    4599             :                 ioff = btrfs_token_item_offset(leaf, item, &token);
    4600     1293333 :                 btrfs_set_token_item_offset(leaf, item,
    4601             :                                             ioff + size_diff, &token);
    4602             :         }
    4603             : 
    4604             :         /* shift the data */
    4605       16006 :         if (from_end) {
    4606       10801 :                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
    4607             :                               data_end + size_diff, btrfs_leaf_data(leaf) +
    4608       10801 :                               data_end, old_data_start + new_size - data_end);
    4609             :         } else {
    4610             :                 struct btrfs_disk_key disk_key;
    4611             :                 u64 offset;
    4612             : 
    4613             :                 btrfs_item_key(leaf, &disk_key, slot);
    4614             : 
    4615        5205 :                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
    4616             :                         unsigned long ptr;
    4617             :                         struct btrfs_file_extent_item *fi;
    4618             : 
    4619           0 :                         fi = btrfs_item_ptr(leaf, slot,
    4620             :                                             struct btrfs_file_extent_item);
    4621           0 :                         fi = (struct btrfs_file_extent_item *)(
    4622           0 :                              (unsigned long)fi - size_diff);
    4623             : 
    4624           0 :                         if (btrfs_file_extent_type(leaf, fi) ==
    4625             :                             BTRFS_FILE_EXTENT_INLINE) {
    4626           0 :                                 ptr = btrfs_item_ptr_offset(leaf, slot);
    4627           0 :                                 memmove_extent_buffer(leaf, ptr,
    4628             :                                       (unsigned long)fi,
    4629             :                                       offsetof(struct btrfs_file_extent_item,
    4630             :                                                  disk_bytenr));
    4631             :                         }
    4632             :                 }
    4633             : 
    4634        5205 :                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
    4635             :                               data_end + size_diff, btrfs_leaf_data(leaf) +
    4636        5205 :                               data_end, old_data_start - data_end);
    4637             : 
    4638             :                 offset = btrfs_disk_key_offset(&disk_key);
    4639        5205 :                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
    4640             :                 btrfs_set_item_key(leaf, &disk_key, slot);
    4641        5205 :                 if (slot == 0)
    4642        1133 :                         fixup_low_keys(root, path, &disk_key, 1);
    4643             :         }
    4644             : 
    4645             :         item = btrfs_item_nr(slot);
    4646             :         btrfs_set_item_size(leaf, item, new_size);
    4647       16006 :         btrfs_mark_buffer_dirty(leaf);
    4648             : 
    4649       16006 :         if (btrfs_leaf_free_space(root, leaf) < 0) {
    4650           0 :                 btrfs_print_leaf(root, leaf);
    4651           0 :                 BUG();
    4652             :         }
    4653             : }
    4654             : 
    4655             : /*
    4656             :  * make the item pointed to by the path bigger, data_size is the added size.
    4657             :  */
    4658       98804 : void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
    4659             :                        u32 data_size)
    4660             : {
    4661             :         int slot;
    4662       98804 :         struct extent_buffer *leaf;
    4663             :         struct btrfs_item *item;
    4664             :         u32 nritems;
    4665             :         unsigned int data_end;
    4666             :         unsigned int old_data;
    4667             :         unsigned int old_size;
    4668             :         int i;
    4669             :         struct btrfs_map_token token;
    4670             : 
    4671             :         btrfs_init_map_token(&token);
    4672             : 
    4673       98804 :         leaf = path->nodes[0];
    4674             : 
    4675             :         nritems = btrfs_header_nritems(leaf);
    4676       98804 :         data_end = leaf_data_end(root, leaf);
    4677             : 
    4678       98804 :         if (btrfs_leaf_free_space(root, leaf) < data_size) {
    4679           0 :                 btrfs_print_leaf(root, leaf);
    4680           0 :                 BUG();
    4681             :         }
    4682       98804 :         slot = path->slots[0];
    4683             :         old_data = btrfs_item_end_nr(leaf, slot);
    4684             : 
    4685       98823 :         BUG_ON(slot < 0);
    4686       98823 :         if (slot >= nritems) {
    4687           0 :                 btrfs_print_leaf(root, leaf);
    4688           0 :                 btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
    4689             :                        slot, nritems);
    4690           0 :                 BUG_ON(1);
    4691             :         }
    4692             : 
    4693             :         /*
    4694             :          * item0..itemN ... dataN.offset..dataN.size .. data0.size
    4695             :          */
    4696             :         /* first correct the data pointers */
    4697     1700248 :         for (i = slot; i < nritems; i++) {
    4698             :                 u32 ioff;
    4699             :                 item = btrfs_item_nr(i);
    4700             : 
    4701             :                 ioff = btrfs_token_item_offset(leaf, item, &token);
    4702     1700249 :                 btrfs_set_token_item_offset(leaf, item,
    4703             :                                             ioff - data_size, &token);
    4704             :         }
    4705             : 
    4706             :         /* shift the data */
    4707       98804 :         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
    4708             :                       data_end - data_size, btrfs_leaf_data(leaf) +
    4709       98804 :                       data_end, old_data - data_end);
    4710             : 
    4711             :         data_end = old_data;
    4712             :         old_size = btrfs_item_size_nr(leaf, slot);
    4713             :         item = btrfs_item_nr(slot);
    4714       98803 :         btrfs_set_item_size(leaf, item, old_size + data_size);
    4715       98803 :         btrfs_mark_buffer_dirty(leaf);
    4716             : 
    4717       98803 :         if (btrfs_leaf_free_space(root, leaf) < 0) {
    4718           0 :                 btrfs_print_leaf(root, leaf);
    4719           0 :                 BUG();
    4720             :         }
    4721       98802 : }
    4722             : 
    4723             : /*
    4724             :  * this is a helper for btrfs_insert_empty_items, the main goal here is
    4725             :  * to save stack depth by doing the bulk of the work in a function
    4726             :  * that doesn't call btrfs_search_slot
    4727             :  */
    4728      264568 : void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
    4729             :                             struct btrfs_key *cpu_key, u32 *data_size,
    4730             :                             u32 total_data, u32 total_size, int nr)
    4731             : {
    4732             :         struct btrfs_item *item;
    4733             :         int i;
    4734             :         u32 nritems;
    4735             :         unsigned int data_end;
    4736             :         struct btrfs_disk_key disk_key;
    4737      529126 :         struct extent_buffer *leaf;
    4738             :         int slot;
    4739             :         struct btrfs_map_token token;
    4740             : 
    4741             :         btrfs_init_map_token(&token);
    4742             : 
    4743      264568 :         leaf = path->nodes[0];
    4744      264568 :         slot = path->slots[0];
    4745             : 
    4746             :         nritems = btrfs_header_nritems(leaf);
    4747      264568 :         data_end = leaf_data_end(root, leaf);
    4748             : 
    4749      264561 :         if (btrfs_leaf_free_space(root, leaf) < total_size) {
    4750           0 :                 btrfs_print_leaf(root, leaf);
    4751           0 :                 btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
    4752             :                        total_size, btrfs_leaf_free_space(root, leaf));
    4753           0 :                 BUG();
    4754             :         }
    4755             : 
    4756      264543 :         if (slot != nritems) {
    4757             :                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
    4758             : 
    4759      183630 :                 if (old_data < data_end) {
    4760           0 :                         btrfs_print_leaf(root, leaf);
    4761           0 :                         btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
    4762             :                                slot, old_data, data_end);
    4763           0 :                         BUG_ON(1);
    4764             :                 }
    4765             :                 /*
    4766             :                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
    4767             :                  */
    4768             :                 /* first correct the data pointers */
    4769    10554215 :                 for (i = slot; i < nritems; i++) {
    4770             :                         u32 ioff;
    4771             : 
    4772             :                         item = btrfs_item_nr( i);
    4773             :                         ioff = btrfs_token_item_offset(leaf, item, &token);
    4774    10554227 :                         btrfs_set_token_item_offset(leaf, item,
    4775             :                                                     ioff - total_data, &token);
    4776             :                 }
    4777             :                 /* shift the items */
    4778      367242 :                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
    4779             :                               btrfs_item_nr_offset(slot),
    4780      183621 :                               (nritems - slot) * sizeof(struct btrfs_item));
    4781             : 
    4782             :                 /* shift the data */
    4783      183622 :                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
    4784             :                               data_end - total_data, btrfs_leaf_data(leaf) +
    4785      183622 :                               data_end, old_data - data_end);
    4786             :                 data_end = old_data;
    4787             :         }
    4788             : 
    4789             :         /* setup the item for the new data */
    4790      310631 :         for (i = 0; i < nr; i++) {
    4791      310640 :                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
    4792      310640 :                 btrfs_set_item_key(leaf, &disk_key, slot + i);
    4793             :                 item = btrfs_item_nr(slot + i);
    4794      310655 :                 btrfs_set_token_item_offset(leaf, item,
    4795      310655 :                                             data_end - data_size[i], &token);
    4796      310644 :                 data_end -= data_size[i];
    4797             :                 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
    4798             :         }
    4799             : 
    4800      264558 :         btrfs_set_header_nritems(leaf, nritems + nr);
    4801             : 
    4802      264558 :         if (slot == 0) {
    4803             :                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
    4804        3663 :                 fixup_low_keys(root, path, &disk_key, 1);
    4805             :         }
    4806      264558 :         btrfs_unlock_up_safe(path, 1);
    4807      264576 :         btrfs_mark_buffer_dirty(leaf);
    4808             : 
    4809      264596 :         if (btrfs_leaf_free_space(root, leaf) < 0) {
    4810           0 :                 btrfs_print_leaf(root, leaf);
    4811           0 :                 BUG();
    4812             :         }
    4813      264544 : }
    4814             : 
    4815             : /*
    4816             :  * Given a key and some data, insert items into the tree.
    4817             :  * This does all the path init required, making room in the tree if needed.
    4818             :  */
    4819      212207 : int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
    4820             :                             struct btrfs_root *root,
    4821             :                             struct btrfs_path *path,
    4822             :                             struct btrfs_key *cpu_key, u32 *data_size,
    4823             :                             int nr)
    4824             : {
    4825             :         int ret = 0;
    4826             :         int slot;
    4827             :         int i;
    4828             :         u32 total_size = 0;
    4829             :         u32 total_data = 0;
    4830             : 
    4831      456894 :         for (i = 0; i < nr; i++)
    4832      244687 :                 total_data += data_size[i];
    4833             : 
    4834      212207 :         total_size = total_data + (nr * sizeof(struct btrfs_item));
    4835      212207 :         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
    4836      212209 :         if (ret == 0)
    4837             :                 return -EEXIST;
    4838      210124 :         if (ret < 0)
    4839             :                 return ret;
    4840             : 
    4841      208196 :         slot = path->slots[0];
    4842      208196 :         BUG_ON(slot < 0);
    4843             : 
    4844      208196 :         setup_items_for_insert(root, path, cpu_key, data_size,
    4845             :                                total_data, total_size, nr);
    4846      208179 :         return 0;
    4847             : }
    4848             : 
    4849             : /*
    4850             :  * Given a key and some data, insert an item into the tree.
    4851             :  * This does all the path init required, making room in the tree if needed.
    4852             :  */
    4853        1136 : int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
    4854             :                       *root, struct btrfs_key *cpu_key, void *data, u32
    4855             :                       data_size)
    4856             : {
    4857             :         int ret = 0;
    4858             :         struct btrfs_path *path;
    4859             :         struct extent_buffer *leaf;
    4860             :         unsigned long ptr;
    4861             : 
    4862             :         path = btrfs_alloc_path();
    4863        1136 :         if (!path)
    4864             :                 return -ENOMEM;
    4865             :         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
    4866        1136 :         if (!ret) {
    4867        1136 :                 leaf = path->nodes[0];
    4868        2272 :                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    4869        1136 :                 write_extent_buffer(leaf, data, ptr, data_size);
    4870        1136 :                 btrfs_mark_buffer_dirty(leaf);
    4871             :         }
    4872        1136 :         btrfs_free_path(path);
    4873        1136 :         return ret;
    4874             : }
    4875             : 
    4876             : /*
    4877             :  * delete the pointer from a given node.
    4878             :  *
    4879             :  * the tree should have been previously balanced so the deletion does not
    4880             :  * empty a node.
    4881             :  */
    4882        1371 : static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
    4883             :                     int level, int slot)
    4884             : {
    4885        2742 :         struct extent_buffer *parent = path->nodes[level];
    4886             :         u32 nritems;
    4887             :         int ret;
    4888             : 
    4889             :         nritems = btrfs_header_nritems(parent);
    4890        1371 :         if (slot != nritems - 1) {
    4891        1215 :                 if (level)
    4892        1215 :                         tree_mod_log_eb_move(root->fs_info, parent, slot,
    4893        1215 :                                              slot + 1, nritems - slot - 1);
    4894        2430 :                 memmove_extent_buffer(parent,
    4895             :                               btrfs_node_key_ptr_offset(slot),
    4896             :                               btrfs_node_key_ptr_offset(slot + 1),
    4897             :                               sizeof(struct btrfs_key_ptr) *
    4898        1215 :                               (nritems - slot - 1));
    4899         156 :         } else if (level) {
    4900         156 :                 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
    4901             :                                               MOD_LOG_KEY_REMOVE, GFP_NOFS);
    4902         156 :                 BUG_ON(ret < 0);
    4903             :         }
    4904             : 
    4905             :         nritems--;
    4906             :         btrfs_set_header_nritems(parent, nritems);
    4907        1371 :         if (nritems == 0 && parent == root->node) {
    4908           0 :                 BUG_ON(btrfs_header_level(root->node) != 1);
    4909             :                 /* just turn the root into a leaf and break */
    4910             :                 btrfs_set_header_level(root->node, 0);
    4911        1371 :         } else if (slot == 0) {
    4912             :                 struct btrfs_disk_key disk_key;
    4913             : 
    4914          80 :                 btrfs_node_key(parent, &disk_key, 0);
    4915          80 :                 fixup_low_keys(root, path, &disk_key, level + 1);
    4916             :         }
    4917        1371 :         btrfs_mark_buffer_dirty(parent);
    4918        1371 : }
    4919             : 
    4920             : /*
    4921             :  * a helper function to delete the leaf pointed to by path->slots[1] and
    4922             :  * path->nodes[1].
    4923             :  *
    4924             :  * This deletes the pointer in path->nodes[1] and frees the leaf
    4925             :  * block extent.  zero is returned if it all worked out, < 0 otherwise.
    4926             :  *
    4927             :  * The path must have already been setup for deleting the leaf, including
    4928             :  * all the proper balancing.  path->nodes[1] must be locked.
    4929             :  */
    4930        1360 : static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
    4931             :                                     struct btrfs_root *root,
    4932             :                                     struct btrfs_path *path,
    4933        1360 :                                     struct extent_buffer *leaf)
    4934             : {
    4935        1360 :         WARN_ON(btrfs_header_generation(leaf) != trans->transid);
    4936        1360 :         del_ptr(root, path, 1, path->slots[1]);
    4937             : 
    4938             :         /*
    4939             :          * btrfs_free_extent is expensive, we want to make sure we
    4940             :          * aren't holding any locks when we call it
    4941             :          */
    4942        1360 :         btrfs_unlock_up_safe(path, 0);
    4943             : 
    4944        1360 :         root_sub_used(root, leaf->len);
    4945             : 
    4946             :         extent_buffer_get(leaf);
    4947        1360 :         btrfs_free_tree_block(trans, root, leaf, 0, 1);
    4948        1360 :         free_extent_buffer_stale(leaf);
    4949        1360 : }
    4950             : /*
    4951             :  * delete the item at the leaf level in path.  If that empties
    4952             :  * the leaf, remove it from the tree
    4953             :  */
    4954      124167 : int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    4955             :                     struct btrfs_path *path, int slot, int nr)
    4956             : {
    4957      275542 :         struct extent_buffer *leaf;
    4958             :         struct btrfs_item *item;
    4959             :         int last_off;
    4960             :         int dsize = 0;
    4961             :         int ret = 0;
    4962             :         int wret;
    4963             :         int i;
    4964             :         u32 nritems;
    4965             :         struct btrfs_map_token token;
    4966             : 
    4967             :         btrfs_init_map_token(&token);
    4968             : 
    4969      124167 :         leaf = path->nodes[0];
    4970      248335 :         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
    4971             : 
    4972      272602 :         for (i = 0; i < nr; i++)
    4973      296869 :                 dsize += btrfs_item_size_nr(leaf, slot + i);
    4974             : 
    4975             :         nritems = btrfs_header_nritems(leaf);
    4976             : 
    4977      124167 :         if (slot + nr != nritems) {
    4978      110684 :                 int data_end = leaf_data_end(root, leaf);
    4979             : 
    4980      110684 :                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
    4981             :                               data_end + dsize,
    4982             :                               btrfs_leaf_data(leaf) + data_end,
    4983      110684 :                               last_off - data_end);
    4984             : 
    4985     6122663 :                 for (i = slot + nr; i < nritems; i++) {
    4986             :                         u32 ioff;
    4987             : 
    4988             :                         item = btrfs_item_nr(i);
    4989             :                         ioff = btrfs_token_item_offset(leaf, item, &token);
    4990     6011958 :                         btrfs_set_token_item_offset(leaf, item,
    4991             :                                                     ioff + dsize, &token);
    4992             :                 }
    4993             : 
    4994      221370 :                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
    4995             :                               btrfs_item_nr_offset(slot + nr),
    4996             :                               sizeof(struct btrfs_item) *
    4997      110685 :                               (nritems - slot - nr));
    4998             :         }
    4999      124168 :         btrfs_set_header_nritems(leaf, nritems - nr);
    5000             :         nritems -= nr;
    5001             : 
    5002             :         /* delete the leaf if we've emptied it */
    5003      124168 :         if (nritems == 0) {
    5004         214 :                 if (leaf == root->node) {
    5005             :                         btrfs_set_header_level(leaf, 0);
    5006             :                 } else {
    5007         186 :                         btrfs_set_path_blocking(path);
    5008         186 :                         clean_tree_block(trans, root, leaf);
    5009         186 :                         btrfs_del_leaf(trans, root, path, leaf);
    5010             :                 }
    5011             :         } else {
    5012      123954 :                 int used = leaf_space_used(leaf, 0, nritems);
    5013      123951 :                 if (slot == 0) {
    5014             :                         struct btrfs_disk_key disk_key;
    5015             : 
    5016             :                         btrfs_item_key(leaf, &disk_key, 0);
    5017        4153 :                         fixup_low_keys(root, path, &disk_key, 1);
    5018             :                 }
    5019             : 
    5020             :                 /* delete the leaf if it is mostly empty */
    5021      123951 :                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
    5022             :                         /* push_leaf_left fixes the path.
    5023             :                          * make sure the path still points to our leaf
    5024             :                          * for possible call to del_ptr below
    5025             :                          */
    5026       14022 :                         slot = path->slots[1];
    5027             :                         extent_buffer_get(leaf);
    5028             : 
    5029       14024 :                         btrfs_set_path_blocking(path);
    5030       14024 :                         wret = push_leaf_left(trans, root, path, 1, 1,
    5031             :                                               1, (u32)-1);
    5032       14024 :                         if (wret < 0 && wret != -ENOSPC)
    5033             :                                 ret = wret;
    5034             : 
    5035       27179 :                         if (path->nodes[0] == leaf &&
    5036             :                             btrfs_header_nritems(leaf)) {
    5037       12950 :                                 wret = push_leaf_right(trans, root, path, 1,
    5038             :                                                        1, 1, 0);
    5039       12950 :                                 if (wret < 0 && wret != -ENOSPC)
    5040             :                                         ret = wret;
    5041             :                         }
    5042             : 
    5043       14024 :                         if (btrfs_header_nritems(leaf) == 0) {
    5044        1174 :                                 path->slots[1] = slot;
    5045        1174 :                                 btrfs_del_leaf(trans, root, path, leaf);
    5046        1174 :                                 free_extent_buffer(leaf);
    5047             :                                 ret = 0;
    5048             :                         } else {
    5049             :                                 /* if we're still in the path, make sure
    5050             :                                  * we're dirty.  Otherwise, one of the
    5051             :                                  * push_leaf functions must have already
    5052             :                                  * dirtied this buffer
    5053             :                                  */
    5054       12850 :                                 if (path->nodes[0] == leaf)
    5055       12551 :                                         btrfs_mark_buffer_dirty(leaf);
    5056       12850 :                                 free_extent_buffer(leaf);
    5057             :                         }
    5058             :                 } else {
    5059      109929 :                         btrfs_mark_buffer_dirty(leaf);
    5060             :                 }
    5061             :         }
    5062      124169 :         return ret;
    5063             : }
    5064             : 
    5065             : /*
    5066             :  * search the tree again to find a leaf with lesser keys
    5067             :  * returns 0 if it found something or 1 if there are no lesser leaves.
    5068             :  * returns < 0 on io errors.
    5069             :  *
    5070             :  * This may release the path, and so you may lose any locks held at the
    5071             :  * time you call it.
    5072             :  */
    5073         164 : int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
    5074             : {
    5075             :         struct btrfs_key key;
    5076             :         struct btrfs_disk_key found_key;
    5077             :         int ret;
    5078             : 
    5079         164 :         btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
    5080             : 
    5081         164 :         if (key.offset > 0) {
    5082         164 :                 key.offset--;
    5083           0 :         } else if (key.type > 0) {
    5084           0 :                 key.type--;
    5085           0 :                 key.offset = (u64)-1;
    5086           0 :         } else if (key.objectid > 0) {
    5087           0 :                 key.objectid--;
    5088           0 :                 key.type = (u8)-1;
    5089           0 :                 key.offset = (u64)-1;
    5090             :         } else {
    5091             :                 return 1;
    5092             :         }
    5093             : 
    5094         164 :         btrfs_release_path(path);
    5095         164 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5096         164 :         if (ret < 0)
    5097             :                 return ret;
    5098         164 :         btrfs_item_key(path->nodes[0], &found_key, 0);
    5099             :         ret = comp_keys(&found_key, &key);
    5100             :         /*
    5101             :          * We might have had an item with the previous key in the tree right
    5102             :          * before we released our path. And after we released our path, that
    5103             :          * item might have been pushed to the first slot (0) of the leaf we
    5104             :          * were holding due to a tree balance. Alternatively, an item with the
    5105             :          * previous key can exist as the only element of a leaf (big fat item).
    5106             :          * Therefore account for these 2 cases, so that our callers (like
    5107             :          * btrfs_previous_item) don't miss an existing item with a key matching
    5108             :          * the previous key we computed above.
    5109             :          */
    5110         164 :         if (ret <= 0)
    5111             :                 return 0;
    5112          80 :         return 1;
    5113             : }
    5114             : 
    5115             : /*
    5116             :  * A helper function to walk down the tree starting at min_key, and looking
    5117             :  * for nodes or leaves that are have a minimum transaction id.
    5118             :  * This is used by the btree defrag code, and tree logging
    5119             :  *
    5120             :  * This does not cow, but it does stuff the starting key it finds back
    5121             :  * into min_key, so you can call btrfs_search_slot with cow=1 on the
    5122             :  * key and get a writable path.
    5123             :  *
    5124             :  * This does lock as it descends, and path->keep_locks should be set
    5125             :  * to 1 by the caller.
    5126             :  *
    5127             :  * This honors path->lowest_level to prevent descent past a given level
    5128             :  * of the tree.
    5129             :  *
    5130             :  * min_trans indicates the oldest transaction that you are interested
    5131             :  * in walking through.  Any nodes or leaves older than min_trans are
    5132             :  * skipped over (without reading them).
    5133             :  *
    5134             :  * returns zero if something useful was found, < 0 on error and 1 if there
    5135             :  * was nothing in the tree that matched the search criteria.
    5136             :  */
    5137        2536 : int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
    5138             :                          struct btrfs_path *path,
    5139             :                          u64 min_trans)
    5140             : {
    5141        8832 :         struct extent_buffer *cur;
    5142             :         struct btrfs_key found_key;
    5143             :         int slot;
    5144             :         int sret;
    5145             :         u32 nritems;
    5146             :         int level;
    5147             :         int ret = 1;
    5148             : 
    5149        2536 :         WARN_ON(!path->keep_locks);
    5150             : again:
    5151        2627 :         cur = btrfs_read_lock_root_node(root);
    5152        2627 :         level = btrfs_header_level(cur);
    5153        2627 :         WARN_ON(path->nodes[level]);
    5154        2627 :         path->nodes[level] = cur;
    5155        2627 :         path->locks[level] = BTRFS_READ_LOCK;
    5156             : 
    5157        2627 :         if (btrfs_header_generation(cur) < min_trans) {
    5158             :                 ret = 1;
    5159             :                 goto out;
    5160             :         }
    5161             :         while (1) {
    5162             :                 nritems = btrfs_header_nritems(cur);
    5163        3578 :                 level = btrfs_header_level(cur);
    5164        3578 :                 sret = bin_search(cur, min_key, level, &slot);
    5165             : 
    5166             :                 /* at the lowest level, we're done, setup the path and exit */
    5167        3578 :                 if (level == path->lowest_level) {
    5168        2623 :                         if (slot >= nritems)
    5169             :                                 goto find_next_key;
    5170             :                         ret = 0;
    5171        2277 :                         path->slots[level] = slot;
    5172        2277 :                         btrfs_item_key_to_cpu(cur, &found_key, slot);
    5173        2277 :                         goto out;
    5174             :                 }
    5175         955 :                 if (sret && slot > 0)
    5176         851 :                         slot--;
    5177             :                 /*
    5178             :                  * check this node pointer against the min_trans parameters.
    5179             :                  * If it is too old, old, skip to the next one.
    5180             :                  */
    5181         976 :                 while (slot < nritems) {
    5182             :                         u64 gen;
    5183             : 
    5184             :                         gen = btrfs_node_ptr_generation(cur, slot);
    5185         976 :                         if (gen < min_trans) {
    5186          21 :                                 slot++;
    5187          21 :                                 continue;
    5188             :                         }
    5189             :                         break;
    5190             :                 }
    5191             : find_next_key:
    5192             :                 /*
    5193             :                  * we didn't find a candidate key in this node, walk forward
    5194             :                  * and find another one
    5195             :                  */
    5196        1301 :                 if (slot >= nritems) {
    5197         346 :                         path->slots[level] = slot;
    5198         346 :                         btrfs_set_path_blocking(path);
    5199         346 :                         sret = btrfs_find_next_key(root, path, min_key, level,
    5200             :                                                   min_trans);
    5201         346 :                         if (sret == 0) {
    5202          91 :                                 btrfs_release_path(path);
    5203          91 :                                 goto again;
    5204             :                         } else {
    5205             :                                 goto out;
    5206             :                         }
    5207             :                 }
    5208             :                 /* save our key for returning back */
    5209             :                 btrfs_node_key_to_cpu(cur, &found_key, slot);
    5210         955 :                 path->slots[level] = slot;
    5211         955 :                 if (level == path->lowest_level) {
    5212             :                         ret = 0;
    5213           0 :                         unlock_up(path, level, 1, 0, NULL);
    5214           0 :                         goto out;
    5215             :                 }
    5216         955 :                 btrfs_set_path_blocking(path);
    5217         955 :                 cur = read_node_slot(root, cur, slot);
    5218         955 :                 BUG_ON(!cur); /* -ENOMEM */
    5219             : 
    5220         955 :                 btrfs_tree_read_lock(cur);
    5221             : 
    5222         955 :                 path->locks[level - 1] = BTRFS_READ_LOCK;
    5223         955 :                 path->nodes[level - 1] = cur;
    5224         955 :                 unlock_up(path, level, 1, 0, NULL);
    5225         955 :                 btrfs_clear_path_blocking(path, NULL, 0);
    5226         955 :         }
    5227             : out:
    5228        2536 :         if (ret == 0)
    5229        2277 :                 memcpy(min_key, &found_key, sizeof(found_key));
    5230        2536 :         btrfs_set_path_blocking(path);
    5231        2536 :         return ret;
    5232             : }
    5233             : 
    5234          25 : static void tree_move_down(struct btrfs_root *root,
    5235             :                            struct btrfs_path *path,
    5236             :                            int *level, int root_level)
    5237             : {
    5238          25 :         BUG_ON(*level == 0);
    5239          25 :         path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level],
    5240             :                                         path->slots[*level]);
    5241          25 :         path->slots[*level - 1] = 0;
    5242          25 :         (*level)--;
    5243          25 : }
    5244             : 
    5245        3684 : static int tree_move_next_or_upnext(struct btrfs_root *root,
    5246             :                                     struct btrfs_path *path,
    5247             :                                     int *level, int root_level)
    5248             : {
    5249             :         int ret = 0;
    5250             :         int nritems;
    5251        7393 :         nritems = btrfs_header_nritems(path->nodes[*level]);
    5252             : 
    5253        3684 :         path->slots[*level]++;
    5254             : 
    5255        3709 :         while (path->slots[*level] >= nritems) {
    5256          57 :                 if (*level == root_level)
    5257             :                         return -1;
    5258             : 
    5259             :                 /* move upnext */
    5260          25 :                 path->slots[*level] = 0;
    5261          25 :                 free_extent_buffer(path->nodes[*level]);
    5262          25 :                 path->nodes[*level] = NULL;
    5263          25 :                 (*level)++;
    5264          25 :                 path->slots[*level]++;
    5265             : 
    5266          50 :                 nritems = btrfs_header_nritems(path->nodes[*level]);
    5267             :                 ret = 1;
    5268             :         }
    5269             :         return ret;
    5270             : }
    5271             : 
    5272             : /*
    5273             :  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
    5274             :  * or down.
    5275             :  */
    5276        3709 : static int tree_advance(struct btrfs_root *root,
    5277             :                         struct btrfs_path *path,
    5278             :                         int *level, int root_level,
    5279             :                         int allow_down,
    5280             :                         struct btrfs_key *key)
    5281             : {
    5282             :         int ret;
    5283             : 
    5284        3709 :         if (*level == 0 || !allow_down) {
    5285        3684 :                 ret = tree_move_next_or_upnext(root, path, level, root_level);
    5286             :         } else {
    5287          25 :                 tree_move_down(root, path, level, root_level);
    5288             :                 ret = 0;
    5289             :         }
    5290        3709 :         if (ret >= 0) {
    5291        3677 :                 if (*level == 0)
    5292        3650 :                         btrfs_item_key_to_cpu(path->nodes[*level], key,
    5293             :                                         path->slots[*level]);
    5294             :                 else
    5295          27 :                         btrfs_node_key_to_cpu(path->nodes[*level], key,
    5296             :                                         path->slots[*level]);
    5297             :         }
    5298        3709 :         return ret;
    5299             : }
    5300             : 
    5301         807 : static int tree_compare_item(struct btrfs_root *left_root,
    5302             :                              struct btrfs_path *left_path,
    5303             :                              struct btrfs_path *right_path,
    5304             :                              char *tmp_buf)
    5305             : {
    5306             :         int cmp;
    5307             :         int len1, len2;
    5308             :         unsigned long off1, off2;
    5309             : 
    5310        1614 :         len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
    5311        1614 :         len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
    5312         807 :         if (len1 != len2)
    5313             :                 return 1;
    5314             : 
    5315        1586 :         off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
    5316        1586 :         off2 = btrfs_item_ptr_offset(right_path->nodes[0],
    5317             :                                 right_path->slots[0]);
    5318             : 
    5319         793 :         read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
    5320             : 
    5321         793 :         cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
    5322         793 :         if (cmp)
    5323             :                 return 1;
    5324             :         return 0;
    5325             : }
    5326             : 
    5327             : #define ADVANCE 1
    5328             : #define ADVANCE_ONLY_NEXT -1
    5329             : 
    5330             : /*
    5331             :  * This function compares two trees and calls the provided callback for
    5332             :  * every changed/new/deleted item it finds.
    5333             :  * If shared tree blocks are encountered, whole subtrees are skipped, making
    5334             :  * the compare pretty fast on snapshotted subvolumes.
    5335             :  *
    5336             :  * This currently works on commit roots only. As commit roots are read only,
    5337             :  * we don't do any locking. The commit roots are protected with transactions.
    5338             :  * Transactions are ended and rejoined when a commit is tried in between.
    5339             :  *
    5340             :  * This function checks for modifications done to the trees while comparing.
    5341             :  * If it detects a change, it aborts immediately.
    5342             :  */
    5343          16 : int btrfs_compare_trees(struct btrfs_root *left_root,
    5344             :                         struct btrfs_root *right_root,
    5345             :                         btrfs_changed_cb_t changed_cb, void *ctx)
    5346             : {
    5347             :         int ret;
    5348             :         int cmp;
    5349             :         struct btrfs_path *left_path = NULL;
    5350             :         struct btrfs_path *right_path = NULL;
    5351             :         struct btrfs_key left_key;
    5352             :         struct btrfs_key right_key;
    5353             :         char *tmp_buf = NULL;
    5354             :         int left_root_level;
    5355             :         int right_root_level;
    5356             :         int left_level;
    5357             :         int right_level;
    5358             :         int left_end_reached;
    5359             :         int right_end_reached;
    5360             :         int advance_left;
    5361             :         int advance_right;
    5362             :         u64 left_blockptr;
    5363             :         u64 right_blockptr;
    5364             :         u64 left_gen;
    5365             :         u64 right_gen;
    5366             : 
    5367             :         left_path = btrfs_alloc_path();
    5368          16 :         if (!left_path) {
    5369             :                 ret = -ENOMEM;
    5370             :                 goto out;
    5371             :         }
    5372             :         right_path = btrfs_alloc_path();
    5373          16 :         if (!right_path) {
    5374             :                 ret = -ENOMEM;
    5375             :                 goto out;
    5376             :         }
    5377             : 
    5378          16 :         tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS);
    5379          16 :         if (!tmp_buf) {
    5380             :                 ret = -ENOMEM;
    5381             :                 goto out;
    5382             :         }
    5383             : 
    5384          16 :         left_path->search_commit_root = 1;
    5385          16 :         left_path->skip_locking = 1;
    5386          16 :         right_path->search_commit_root = 1;
    5387          16 :         right_path->skip_locking = 1;
    5388             : 
    5389             :         /*
    5390             :          * Strategy: Go to the first items of both trees. Then do
    5391             :          *
    5392             :          * If both trees are at level 0
    5393             :          *   Compare keys of current items
    5394             :          *     If left < right treat left item as new, advance left tree
    5395             :          *       and repeat
    5396             :          *     If left > right treat right item as deleted, advance right tree
    5397             :          *       and repeat
    5398             :          *     If left == right do deep compare of items, treat as changed if
    5399             :          *       needed, advance both trees and repeat
    5400             :          * If both trees are at the same level but not at level 0
    5401             :          *   Compare keys of current nodes/leafs
    5402             :          *     If left < right advance left tree and repeat
    5403             :          *     If left > right advance right tree and repeat
    5404             :          *     If left == right compare blockptrs of the next nodes/leafs
    5405             :          *       If they match advance both trees but stay at the same level
    5406             :          *         and repeat
    5407             :          *       If they don't match advance both trees while allowing to go
    5408             :          *         deeper and repeat
    5409             :          * If tree levels are different
    5410             :          *   Advance the tree that needs it and repeat
    5411             :          *
    5412             :          * Advancing a tree means:
    5413             :          *   If we are at level 0, try to go to the next slot. If that's not
    5414             :          *   possible, go one level up and repeat. Stop when we found a level
    5415             :          *   where we could go to the next slot. We may at this point be on a
    5416             :          *   node or a leaf.
    5417             :          *
    5418             :          *   If we are not at level 0 and not on shared tree blocks, go one
    5419             :          *   level deeper.
    5420             :          *
    5421             :          *   If we are not at level 0 and on shared tree blocks, go one slot to
    5422             :          *   the right if possible or go up and right.
    5423             :          */
    5424             : 
    5425          16 :         down_read(&left_root->fs_info->commit_root_sem);
    5426          32 :         left_level = btrfs_header_level(left_root->commit_root);
    5427             :         left_root_level = left_level;
    5428          16 :         left_path->nodes[left_level] = left_root->commit_root;
    5429          16 :         extent_buffer_get(left_path->nodes[left_level]);
    5430             : 
    5431          32 :         right_level = btrfs_header_level(right_root->commit_root);
    5432             :         right_root_level = right_level;
    5433          16 :         right_path->nodes[right_level] = right_root->commit_root;
    5434          16 :         extent_buffer_get(right_path->nodes[right_level]);
    5435          16 :         up_read(&left_root->fs_info->commit_root_sem);
    5436             : 
    5437          16 :         if (left_level == 0)
    5438          12 :                 btrfs_item_key_to_cpu(left_path->nodes[left_level],
    5439             :                                 &left_key, left_path->slots[left_level]);
    5440             :         else
    5441           4 :                 btrfs_node_key_to_cpu(left_path->nodes[left_level],
    5442             :                                 &left_key, left_path->slots[left_level]);
    5443          16 :         if (right_level == 0)
    5444          12 :                 btrfs_item_key_to_cpu(right_path->nodes[right_level],
    5445             :                                 &right_key, right_path->slots[right_level]);
    5446             :         else
    5447           4 :                 btrfs_node_key_to_cpu(right_path->nodes[right_level],
    5448             :                                 &right_key, right_path->slots[right_level]);
    5449             : 
    5450             :         left_end_reached = right_end_reached = 0;
    5451             :         advance_left = advance_right = 0;
    5452             : 
    5453             :         while (1) {
    5454        2907 :                 if (advance_left && !left_end_reached) {
    5455        1234 :                         ret = tree_advance(left_root, left_path, &left_level,
    5456             :                                         left_root_level,
    5457             :                                         advance_left != ADVANCE_ONLY_NEXT,
    5458             :                                         &left_key);
    5459        1234 :                         if (ret < 0)
    5460             :                                 left_end_reached = ADVANCE;
    5461             :                         advance_left = 0;
    5462             :                 }
    5463        2907 :                 if (advance_right && !right_end_reached) {
    5464        2475 :                         ret = tree_advance(right_root, right_path, &right_level,
    5465             :                                         right_root_level,
    5466             :                                         advance_right != ADVANCE_ONLY_NEXT,
    5467             :                                         &right_key);
    5468        2475 :                         if (ret < 0)
    5469             :                                 right_end_reached = ADVANCE;
    5470             :                         advance_right = 0;
    5471             :                 }
    5472             : 
    5473        2907 :                 if (left_end_reached && right_end_reached) {
    5474             :                         ret = 0;
    5475             :                         goto out;
    5476        2891 :                 } else if (left_end_reached) {
    5477        1421 :                         if (right_level == 0) {
    5478        1413 :                                 ret = changed_cb(left_root, right_root,
    5479             :                                                 left_path, right_path,
    5480             :                                                 &right_key,
    5481             :                                                 BTRFS_COMPARE_TREE_DELETED,
    5482             :                                                 ctx);
    5483        1413 :                                 if (ret < 0)
    5484             :                                         goto out;
    5485             :                         }
    5486             :                         advance_right = ADVANCE;
    5487        1421 :                         continue;
    5488        1470 :                 } else if (right_end_reached) {
    5489         150 :                         if (left_level == 0) {
    5490         149 :                                 ret = changed_cb(left_root, right_root,
    5491             :                                                 left_path, right_path,
    5492             :                                                 &left_key,
    5493             :                                                 BTRFS_COMPARE_TREE_NEW,
    5494             :                                                 ctx);
    5495         149 :                                 if (ret < 0)
    5496             :                                         goto out;
    5497             :                         }
    5498             :                         advance_left = ADVANCE;
    5499         150 :                         continue;
    5500             :                 }
    5501             : 
    5502        1320 :                 if (left_level == 0 && right_level == 0) {
    5503             :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    5504        1305 :                         if (cmp < 0) {
    5505         264 :                                 ret = changed_cb(left_root, right_root,
    5506             :                                                 left_path, right_path,
    5507             :                                                 &left_key,
    5508             :                                                 BTRFS_COMPARE_TREE_NEW,
    5509             :                                                 ctx);
    5510         264 :                                 if (ret < 0)
    5511             :                                         goto out;
    5512             :                                 advance_left = ADVANCE;
    5513        1041 :                         } else if (cmp > 0) {
    5514         234 :                                 ret = changed_cb(left_root, right_root,
    5515             :                                                 left_path, right_path,
    5516             :                                                 &right_key,
    5517             :                                                 BTRFS_COMPARE_TREE_DELETED,
    5518             :                                                 ctx);
    5519         234 :                                 if (ret < 0)
    5520             :                                         goto out;
    5521             :                                 advance_right = ADVANCE;
    5522             :                         } else {
    5523             :                                 enum btrfs_compare_tree_result cmp;
    5524             : 
    5525         807 :                                 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
    5526         807 :                                 ret = tree_compare_item(left_root, left_path,
    5527             :                                                 right_path, tmp_buf);
    5528         807 :                                 if (ret)
    5529             :                                         cmp = BTRFS_COMPARE_TREE_CHANGED;
    5530             :                                 else
    5531             :                                         cmp = BTRFS_COMPARE_TREE_SAME;
    5532         807 :                                 ret = changed_cb(left_root, right_root,
    5533             :                                                  left_path, right_path,
    5534             :                                                  &left_key, cmp, ctx);
    5535         807 :                                 if (ret < 0)
    5536             :                                         goto out;
    5537             :                                 advance_left = ADVANCE;
    5538             :                                 advance_right = ADVANCE;
    5539             :                         }
    5540          15 :                 } else if (left_level == right_level) {
    5541             :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    5542          11 :                         if (cmp < 0) {
    5543             :                                 advance_left = ADVANCE;
    5544          11 :                         } else if (cmp > 0) {
    5545             :                                 advance_right = ADVANCE;
    5546             :                         } else {
    5547          11 :                                 left_blockptr = btrfs_node_blockptr(
    5548             :                                                 left_path->nodes[left_level],
    5549             :                                                 left_path->slots[left_level]);
    5550          11 :                                 right_blockptr = btrfs_node_blockptr(
    5551             :                                                 right_path->nodes[right_level],
    5552             :                                                 right_path->slots[right_level]);
    5553          11 :                                 left_gen = btrfs_node_ptr_generation(
    5554             :                                                 left_path->nodes[left_level],
    5555             :                                                 left_path->slots[left_level]);
    5556          11 :                                 right_gen = btrfs_node_ptr_generation(
    5557             :                                                 right_path->nodes[right_level],
    5558             :                                                 right_path->slots[right_level]);
    5559          22 :                                 if (left_blockptr == right_blockptr &&
    5560          11 :                                     left_gen == right_gen) {
    5561             :                                         /*
    5562             :                                          * As we're on a shared block, don't
    5563             :                                          * allow to go deeper.
    5564             :                                          */
    5565             :                                         advance_left = ADVANCE_ONLY_NEXT;
    5566             :                                         advance_right = ADVANCE_ONLY_NEXT;
    5567             :                                 } else {
    5568             :                                         advance_left = ADVANCE;
    5569             :                                         advance_right = ADVANCE;
    5570             :                                 }
    5571             :                         }
    5572           4 :                 } else if (left_level < right_level) {
    5573             :                         advance_right = ADVANCE;
    5574             :                 } else {
    5575             :                         advance_left = ADVANCE;
    5576             :                 }
    5577             :         }
    5578             : 
    5579             : out:
    5580          16 :         btrfs_free_path(left_path);
    5581          16 :         btrfs_free_path(right_path);
    5582          16 :         kfree(tmp_buf);
    5583          16 :         return ret;
    5584             : }
    5585             : 
    5586             : /*
    5587             :  * this is similar to btrfs_next_leaf, but does not try to preserve
    5588             :  * and fixup the path.  It looks for and returns the next key in the
    5589             :  * tree based on the current path and the min_trans parameters.
    5590             :  *
    5591             :  * 0 is returned if another key is found, < 0 if there are any errors
    5592             :  * and 1 is returned if there are no higher keys in the tree
    5593             :  *
    5594             :  * path->keep_locks should be set to 1 on the search made before
    5595             :  * calling this function.
    5596             :  */
    5597         346 : int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
    5598             :                         struct btrfs_key *key, int level, u64 min_trans)
    5599             : {
    5600             :         int slot;
    5601         585 :         struct extent_buffer *c;
    5602             : 
    5603         346 :         WARN_ON(!path->keep_locks);
    5604         465 :         while (level < BTRFS_MAX_LEVEL) {
    5605         465 :                 if (!path->nodes[level])
    5606             :                         return 1;
    5607             : 
    5608         465 :                 slot = path->slots[level] + 1;
    5609             :                 c = path->nodes[level];
    5610             : next:
    5611        1170 :                 if (slot >= btrfs_header_nritems(c)) {
    5612             :                         int ret;
    5613             :                         int orig_lowest;
    5614             :                         struct btrfs_key cur_key;
    5615         748 :                         if (level + 1 >= BTRFS_MAX_LEVEL ||
    5616         374 :                             !path->nodes[level + 1])
    5617         255 :                                 return 1;
    5618             : 
    5619         119 :                         if (path->locks[level + 1]) {
    5620             :                                 level++;
    5621         119 :                                 continue;
    5622             :                         }
    5623             : 
    5624           0 :                         slot = btrfs_header_nritems(c) - 1;
    5625           0 :                         if (level == 0)
    5626           0 :                                 btrfs_item_key_to_cpu(c, &cur_key, slot);
    5627             :                         else
    5628             :                                 btrfs_node_key_to_cpu(c, &cur_key, slot);
    5629             : 
    5630           0 :                         orig_lowest = path->lowest_level;
    5631           0 :                         btrfs_release_path(path);
    5632           0 :                         path->lowest_level = level;
    5633           0 :                         ret = btrfs_search_slot(NULL, root, &cur_key, path,
    5634             :                                                 0, 0);
    5635           0 :                         path->lowest_level = orig_lowest;
    5636           0 :                         if (ret < 0)
    5637             :                                 return ret;
    5638             : 
    5639           0 :                         c = path->nodes[level];
    5640           0 :                         slot = path->slots[level];
    5641           0 :                         if (ret == 0)
    5642           0 :                                 slot++;
    5643           0 :                         goto next;
    5644             :                 }
    5645             : 
    5646         211 :                 if (level == 0)
    5647           0 :                         btrfs_item_key_to_cpu(c, key, slot);
    5648             :                 else {
    5649             :                         u64 gen = btrfs_node_ptr_generation(c, slot);
    5650             : 
    5651         211 :                         if (gen < min_trans) {
    5652         120 :                                 slot++;
    5653         120 :                                 goto next;
    5654             :                         }
    5655             :                         btrfs_node_key_to_cpu(c, key, slot);
    5656             :                 }
    5657             :                 return 0;
    5658             :         }
    5659             :         return 1;
    5660             : }
    5661             : 
    5662             : /*
    5663             :  * search the tree again to find a leaf with greater keys
    5664             :  * returns 0 if it found something or 1 if there are no greater leaves.
    5665             :  * returns < 0 on io errors.
    5666             :  */
    5667       25598 : int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
    5668             : {
    5669       25697 :         return btrfs_next_old_leaf(root, path, 0);
    5670             : }
    5671             : 
    5672       49082 : int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
    5673             :                         u64 time_seq)
    5674             : {
    5675             :         int slot;
    5676             :         int level;
    5677       38739 :         struct extent_buffer *c;
    5678             :         struct extent_buffer *next;
    5679             :         struct btrfs_key key;
    5680             :         u32 nritems;
    5681             :         int ret;
    5682       49082 :         int old_spinning = path->leave_spinning;
    5683             :         int next_rw_lock = 0;
    5684             : 
    5685       97032 :         nritems = btrfs_header_nritems(path->nodes[0]);
    5686       49082 :         if (nritems == 0)
    5687             :                 return 1;
    5688             : 
    5689       47896 :         btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
    5690             : again:
    5691             :         level = 1;
    5692       47944 :         next = NULL;
    5693             :         next_rw_lock = 0;
    5694       47944 :         btrfs_release_path(path);
    5695             : 
    5696       47953 :         path->keep_locks = 1;
    5697       47953 :         path->leave_spinning = 1;
    5698             : 
    5699       47953 :         if (time_seq)
    5700          71 :                 ret = btrfs_search_old_slot(root, &key, path, time_seq);
    5701             :         else
    5702       47882 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5703       47950 :         path->keep_locks = 0;
    5704             : 
    5705       47950 :         if (ret < 0)
    5706             :                 return ret;
    5707             : 
    5708       47950 :         nritems = btrfs_header_nritems(path->nodes[0]);
    5709             :         /*
    5710             :          * by releasing the path above we dropped all our locks.  A balance
    5711             :          * could have added more items next to the key that used to be
    5712             :          * at the very end of the block.  So, check again here and
    5713             :          * advance the path if there are now more items available.
    5714             :          */
    5715       47950 :         if (nritems > 0 && path->slots[0] < nritems - 1) {
    5716         207 :                 if (ret == 0)
    5717         207 :                         path->slots[0]++;
    5718             :                 ret = 0;
    5719             :                 goto done;
    5720             :         }
    5721             :         /*
    5722             :          * So the above check misses one case:
    5723             :          * - after releasing the path above, someone has removed the item that
    5724             :          *   used to be at the very end of the block, and balance between leafs
    5725             :          *   gets another one with bigger key.offset to replace it.
    5726             :          *
    5727             :          * This one should be returned as well, or we can get leaf corruption
    5728             :          * later(esp. in __btrfs_drop_extents()).
    5729             :          *
    5730             :          * And a bit more explanation about this check,
    5731             :          * with ret > 0, the key isn't found, the path points to the slot
    5732             :          * where it should be inserted, so the path->slots[0] item must be the
    5733             :          * bigger one.
    5734             :          */
    5735       47743 :         if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
    5736             :                 ret = 0;
    5737             :                 goto done;
    5738             :         }
    5739             : 
    5740       60633 :         while (level < BTRFS_MAX_LEVEL) {
    5741       60629 :                 if (!path->nodes[level]) {
    5742             :                         ret = 1;
    5743             :                         goto done;
    5744             :                 }
    5745             : 
    5746       38739 :                 slot = path->slots[level] + 1;
    5747             :                 c = path->nodes[level];
    5748       77478 :                 if (slot >= btrfs_header_nritems(c)) {
    5749       12891 :                         level++;
    5750       12891 :                         if (level == BTRFS_MAX_LEVEL) {
    5751             :                                 ret = 1;
    5752             :                                 goto done;
    5753             :                         }
    5754       12889 :                         continue;
    5755             :                 }
    5756             : 
    5757       25848 :                 if (next) {
    5758           0 :                         btrfs_tree_unlock_rw(next, next_rw_lock);
    5759           0 :                         free_extent_buffer(next);
    5760             :                 }
    5761             : 
    5762       25848 :                 next = c;
    5763       25848 :                 next_rw_lock = path->locks[level];
    5764       25848 :                 ret = read_block_for_search(NULL, root, path, &next, level,
    5765             :                                             slot, &key, 0);
    5766       25848 :                 if (ret == -EAGAIN)
    5767             :                         goto again;
    5768             : 
    5769       25800 :                 if (ret < 0) {
    5770           0 :                         btrfs_release_path(path);
    5771           0 :                         goto done;
    5772             :                 }
    5773             : 
    5774       25800 :                 if (!path->skip_locking) {
    5775        2814 :                         ret = btrfs_try_tree_read_lock(next);
    5776        2814 :                         if (!ret && time_seq) {
    5777             :                                 /*
    5778             :                                  * If we don't get the lock, we may be racing
    5779             :                                  * with push_leaf_left, holding that lock while
    5780             :                                  * itself waiting for the leaf we've currently
    5781             :                                  * locked. To solve this situation, we give up
    5782             :                                  * on our lock and cycle.
    5783             :                                  */
    5784           0 :                                 free_extent_buffer(next);
    5785           0 :                                 btrfs_release_path(path);
    5786           0 :                                 cond_resched();
    5787           0 :                                 goto again;
    5788             :                         }
    5789        2814 :                         if (!ret) {
    5790           0 :                                 btrfs_set_path_blocking(path);
    5791           0 :                                 btrfs_tree_read_lock(next);
    5792           0 :                                 btrfs_clear_path_blocking(path, next,
    5793             :                                                           BTRFS_READ_LOCK);
    5794             :                         }
    5795             :                         next_rw_lock = BTRFS_READ_LOCK;
    5796             :                 }
    5797             :                 break;
    5798             :         }
    5799       25800 :         path->slots[level] = slot;
    5800             :         while (1) {
    5801       25824 :                 level--;
    5802       25824 :                 c = path->nodes[level];
    5803       25824 :                 if (path->locks[level])
    5804        2836 :                         btrfs_tree_unlock_rw(c, path->locks[level]);
    5805             : 
    5806       25824 :                 free_extent_buffer(c);
    5807       25824 :                 path->nodes[level] = next;
    5808       25824 :                 path->slots[level] = 0;
    5809       25824 :                 if (!path->skip_locking)
    5810        2836 :                         path->locks[level] = next_rw_lock;
    5811       25824 :                 if (!level)
    5812             :                         break;
    5813             : 
    5814          24 :                 ret = read_block_for_search(NULL, root, path, &next, level,
    5815             :                                             0, &key, 0);
    5816          24 :                 if (ret == -EAGAIN)
    5817             :                         goto again;
    5818             : 
    5819          24 :                 if (ret < 0) {
    5820           0 :                         btrfs_release_path(path);
    5821           0 :                         goto done;
    5822             :                 }
    5823             : 
    5824          24 :                 if (!path->skip_locking) {
    5825          22 :                         ret = btrfs_try_tree_read_lock(next);
    5826          22 :                         if (!ret) {
    5827           0 :                                 btrfs_set_path_blocking(path);
    5828           0 :                                 btrfs_tree_read_lock(next);
    5829           0 :                                 btrfs_clear_path_blocking(path, next,
    5830             :                                                           BTRFS_READ_LOCK);
    5831             :                         }
    5832             :                         next_rw_lock = BTRFS_READ_LOCK;
    5833             :                 }
    5834             :         }
    5835             :         ret = 0;
    5836             : done:
    5837       47898 :         unlock_up(path, 0, 1, 0, NULL);
    5838       47901 :         path->leave_spinning = old_spinning;
    5839       47901 :         if (!old_spinning)
    5840       47832 :                 btrfs_set_path_blocking(path);
    5841             : 
    5842       47920 :         return ret;
    5843             : }
    5844             : 
    5845             : /*
    5846             :  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
    5847             :  * searching until it gets past min_objectid or finds an item of 'type'
    5848             :  *
    5849             :  * returns 0 if something is found, 1 if nothing was found and < 0 on error
    5850             :  */
    5851         282 : int btrfs_previous_item(struct btrfs_root *root,
    5852             :                         struct btrfs_path *path, u64 min_objectid,
    5853             :                         int type)
    5854             : {
    5855             :         struct btrfs_key found_key;
    5856         282 :         struct extent_buffer *leaf;
    5857             :         u32 nritems;
    5858             :         int ret;
    5859             : 
    5860             :         while (1) {
    5861         320 :                 if (path->slots[0] == 0) {
    5862          38 :                         btrfs_set_path_blocking(path);
    5863          38 :                         ret = btrfs_prev_leaf(root, path);
    5864          38 :                         if (ret != 0)
    5865             :                                 return ret;
    5866             :                 } else {
    5867         282 :                         path->slots[0]--;
    5868             :                 }
    5869         282 :                 leaf = path->nodes[0];
    5870             :                 nritems = btrfs_header_nritems(leaf);
    5871         282 :                 if (nritems == 0)
    5872             :                         return 1;
    5873         282 :                 if (path->slots[0] == nritems)
    5874           0 :                         path->slots[0]--;
    5875             : 
    5876         282 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    5877         282 :                 if (found_key.objectid < min_objectid)
    5878             :                         break;
    5879         218 :                 if (found_key.type == type)
    5880             :                         return 0;
    5881          38 :                 if (found_key.objectid == min_objectid &&
    5882             :                     found_key.type < type)
    5883             :                         break;
    5884             :         }
    5885             :         return 1;
    5886             : }
    5887             : 
    5888             : /*
    5889             :  * search in extent tree to find a previous Metadata/Data extent item with
    5890             :  * min objecitd.
    5891             :  *
    5892             :  * returns 0 if something is found, 1 if nothing was found and < 0 on error
    5893             :  */
    5894       33368 : int btrfs_previous_extent_item(struct btrfs_root *root,
    5895             :                         struct btrfs_path *path, u64 min_objectid)
    5896             : {
    5897             :         struct btrfs_key found_key;
    5898       33697 :         struct extent_buffer *leaf;
    5899             :         u32 nritems;
    5900             :         int ret;
    5901             : 
    5902             :         while (1) {
    5903       33738 :                 if (path->slots[0] == 0) {
    5904         105 :                         btrfs_set_path_blocking(path);
    5905         105 :                         ret = btrfs_prev_leaf(root, path);
    5906         105 :                         if (ret != 0)
    5907             :                                 return ret;
    5908             :                 } else {
    5909       33633 :                         path->slots[0]--;
    5910             :                 }
    5911       33697 :                 leaf = path->nodes[0];
    5912             :                 nritems = btrfs_header_nritems(leaf);
    5913       33697 :                 if (nritems == 0)
    5914             :                         return 1;
    5915       33697 :                 if (path->slots[0] == nritems)
    5916          63 :                         path->slots[0]--;
    5917             : 
    5918       33697 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    5919       33697 :                 if (found_key.objectid < min_objectid)
    5920             :                         break;
    5921       33697 :                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
    5922             :                     found_key.type == BTRFS_METADATA_ITEM_KEY)
    5923             :                         return 0;
    5924         370 :                 if (found_key.objectid == min_objectid &&
    5925             :                     found_key.type < BTRFS_EXTENT_ITEM_KEY)
    5926             :                         break;
    5927             :         }
    5928             :         return 1;
    5929             : }

Generated by: LCOV version 1.10